| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277 |
- // Copyright (C) 2024-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include "mupdf/pdf.h"
- static const char *
- tag_or_text(fz_xml *x, const char *find)
- {
- const char *text;
- const char *f = strchr(find, ':');
- /* If we find a : we have a namespace. Search for both with and
- * without the namespace. */
- if (f)
- f++;
- text = fz_xml_att(x, find);
- if (text == NULL && f)
- text = fz_xml_att(x, f);
- if (text == NULL)
- text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, find)));
- if (text == NULL && f)
- text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, f)));
- return text;
- }
- static enum pdf_zugferd_profile
- do_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version, char **fname)
- {
- pdf_obj *metadata = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Metadata), NULL);
- fz_buffer *buf;
- fz_xml *xml = NULL;
- fz_xml *x;
- enum pdf_zugferd_profile ret = PDF_NOT_ZUGFERD;
- if (version)
- *version = 0;
- if (fname)
- *fname = NULL;
- if (metadata == NULL)
- return PDF_NOT_ZUGFERD;
- buf = pdf_load_stream(ctx, metadata);
- fz_var(xml);
- fz_try(ctx)
- {
- xml = fz_parse_xml(ctx, buf, 0);
- /* Version 1. */
- x = fz_xml_find_dfs(xml, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
- if (x)
- {
- while (x)
- {
- /* The Version tag in the document appears to always be 1.0 */
- const char *v = tag_or_text(x, "zf:Version");
- const char *cl = tag_or_text(x, "zf:ConformanceLevel");
- const char *df = tag_or_text(x, "zf:DocumentFileName");
- const char *dt = tag_or_text(x, "zf:DocumentType");
- if (v && dt && !strcmp(dt, "INVOICE"))
- {
- if (!cl)
- fz_warn(ctx, "No conformance level specified");
- else if (!strcmp(cl, "COMFORT"))
- ret = PDF_ZUGFERD_COMFORT;
- else if (!strcmp(cl, "BASIC"))
- ret = PDF_ZUGFERD_BASIC;
- else if (!strcmp(cl, "EXTENDED"))
- ret = PDF_ZUGFERD_EXTENDED;
- if (version)
- *version = fz_atof(v);
- if (!df)
- fz_warn(ctx, "ZUGFeRD doc is missing filename");
- else if (strcmp(df, "ZUGFeRD-invoice.xml"))
- fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
- if (fname && df)
- *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
- break;
- }
- x = fz_xml_find_next_dfs(x, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
- }
- break;
- }
- /* Version 2. */
- x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
- if (x)
- {
- while (x)
- {
- const char *v = tag_or_text(x, "fx:Version");
- const char *cl = tag_or_text(x, "fx:ConformanceLevel");
- const char *df = tag_or_text(x, "fx:DocumentFileName");
- const char *dt = tag_or_text(x, "fx:DocumentType");
- if (v && dt && !strcmp(dt, "INVOICE"))
- {
- if (!cl)
- fz_warn(ctx, "No conformance level specified");
- else if (!strcmp(cl, "EN 16931"))
- ret = PDF_ZUGFERD_COMFORT;
- else if (!strcmp(cl, "BASIC"))
- ret = PDF_ZUGFERD_BASIC;
- else if (!strcmp(cl, "EXTENDED"))
- ret = PDF_ZUGFERD_EXTENDED;
- else if (!strcmp(cl, "BASIC WL"))
- ret = PDF_ZUGFERD_BASIC_WL;
- else if (!strcmp(cl, "MINIMUM"))
- ret = PDF_ZUGFERD_MINIMUM;
- if (version)
- *version = fz_atof(v);
- if (!df)
- fz_warn(ctx, "ZUGFeRD doc is missing filename");
- else if (strcmp(df, "zugferd-invoice.xml"))
- fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
- if (fname && df)
- *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
- break;
- }
- x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
- }
- break;
- }
- /* Version 2.1 + 2.11 */
- x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
- if (x)
- {
- while (x)
- {
- const char *v = tag_or_text(x, "fx:Version");
- const char *cl = tag_or_text(x, "fx:ConformanceLevel");
- const char *df = tag_or_text(x, "fx:DocumentFileName");
- const char *dt = tag_or_text(x, "fx:DocumentType");
- if (v && dt && !strcmp(dt, "INVOICE"))
- {
- if (!cl)
- fz_warn(ctx, "No conformance level specified");
- else if (!strcmp(cl, "EN 16931"))
- ret = PDF_ZUGFERD_COMFORT;
- else if (!strcmp(cl, "BASIC"))
- ret = PDF_ZUGFERD_BASIC;
- else if (!strcmp(cl, "EXTENDED"))
- ret = PDF_ZUGFERD_EXTENDED;
- else if (!strcmp(cl, "BASIC WL"))
- ret = PDF_ZUGFERD_BASIC_WL;
- else if (!strcmp(cl, "MINIMUM"))
- ret = PDF_ZUGFERD_MINIMUM;
- else if (!strcmp(cl, "XRECHNUNG"))
- ret = PDF_ZUGFERD_XRECHNUNG;
- if (version)
- *version = fz_atof(v);
- if (!df)
- fz_warn(ctx, "ZUGFeRD doc is missing filename");
- else if (ret == PDF_ZUGFERD_XRECHNUNG && strcmp(df, "xrechnung.xml"))
- fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
- else if (ret != PDF_ZUGFERD_XRECHNUNG && strcmp(df, "factur-x.xml"))
- fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
- if (fname && df)
- *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
- break;
- }
- x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
- }
- break;
- }
- }
- fz_always(ctx)
- {
- fz_drop_xml(ctx, xml);
- fz_drop_buffer(ctx, buf);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- return ret;
- }
- enum pdf_zugferd_profile pdf_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version)
- {
- return do_zugferd_profile(ctx, doc, version, NULL);
- }
- fz_buffer *pdf_zugferd_xml(fz_context *ctx, pdf_document *doc)
- {
- char *fname;
- float version;
- enum pdf_zugferd_profile p = do_zugferd_profile(ctx, doc, &version, &fname);
- int count, i;
- fz_buffer *buf = NULL;
- if (p == PDF_NOT_ZUGFERD)
- {
- fz_free(ctx, fname);
- return NULL;
- }
- fz_try(ctx)
- {
- count = pdf_count_document_associated_files(ctx, doc);
- for (i = 0; i < count; i++)
- {
- pdf_obj *fs = pdf_document_associated_file(ctx, doc, i);
- pdf_filespec_params params;
- pdf_get_filespec_params(ctx, fs, ¶ms);
- if (!strcmp(fname, params.filename))
- {
- if (!pdf_is_embedded_file(ctx, fs))
- fz_throw(ctx, FZ_ERROR_FORMAT, "ZUGFeRD XML was not embedded");
- buf = pdf_load_embedded_file_contents(ctx, fs);
- break;
- }
- }
- }
- fz_always(ctx)
- fz_free(ctx, fname);
- fz_catch(ctx)
- fz_rethrow(ctx);
- return buf;
- }
- const char *
- pdf_zugferd_profile_to_string(fz_context *ctx, enum pdf_zugferd_profile profile)
- {
- static const char *strings[] =
- {
- "NOT ZUGFERD",
- "COMFORT",
- "BASIC",
- "EXTENDED",
- "BASIC WL",
- "MINIMUM",
- "XRECHNUNG",
- "UNKNOWN"
- };
- if (profile < PDF_NOT_ZUGFERD || profile > PDF_ZUGFERD_UNKNOWN)
- profile = PDF_ZUGFERD_UNKNOWN;
- return strings[profile];
- }
|