pdf-zugferd.c 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. // Copyright (C) 2024-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. static const char *
  25. tag_or_text(fz_xml *x, const char *find)
  26. {
  27. const char *text;
  28. const char *f = strchr(find, ':');
  29. /* If we find a : we have a namespace. Search for both with and
  30. * without the namespace. */
  31. if (f)
  32. f++;
  33. text = fz_xml_att(x, find);
  34. if (text == NULL && f)
  35. text = fz_xml_att(x, f);
  36. if (text == NULL)
  37. text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, find)));
  38. if (text == NULL && f)
  39. text = fz_xml_text(fz_xml_down(fz_xml_find_down(x, f)));
  40. return text;
  41. }
  42. static enum pdf_zugferd_profile
  43. do_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version, char **fname)
  44. {
  45. pdf_obj *metadata = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME(Root), PDF_NAME(Metadata), NULL);
  46. fz_buffer *buf;
  47. fz_xml *xml = NULL;
  48. fz_xml *x;
  49. enum pdf_zugferd_profile ret = PDF_NOT_ZUGFERD;
  50. if (version)
  51. *version = 0;
  52. if (fname)
  53. *fname = NULL;
  54. if (metadata == NULL)
  55. return PDF_NOT_ZUGFERD;
  56. buf = pdf_load_stream(ctx, metadata);
  57. fz_var(xml);
  58. fz_try(ctx)
  59. {
  60. xml = fz_parse_xml(ctx, buf, 0);
  61. /* Version 1. */
  62. x = fz_xml_find_dfs(xml, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
  63. if (x)
  64. {
  65. while (x)
  66. {
  67. /* The Version tag in the document appears to always be 1.0 */
  68. const char *v = tag_or_text(x, "zf:Version");
  69. const char *cl = tag_or_text(x, "zf:ConformanceLevel");
  70. const char *df = tag_or_text(x, "zf:DocumentFileName");
  71. const char *dt = tag_or_text(x, "zf:DocumentType");
  72. if (v && dt && !strcmp(dt, "INVOICE"))
  73. {
  74. if (!cl)
  75. fz_warn(ctx, "No conformance level specified");
  76. else if (!strcmp(cl, "COMFORT"))
  77. ret = PDF_ZUGFERD_COMFORT;
  78. else if (!strcmp(cl, "BASIC"))
  79. ret = PDF_ZUGFERD_BASIC;
  80. else if (!strcmp(cl, "EXTENDED"))
  81. ret = PDF_ZUGFERD_EXTENDED;
  82. if (version)
  83. *version = fz_atof(v);
  84. if (!df)
  85. fz_warn(ctx, "ZUGFeRD doc is missing filename");
  86. else if (strcmp(df, "ZUGFeRD-invoice.xml"))
  87. fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
  88. if (fname && df)
  89. *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
  90. break;
  91. }
  92. x = fz_xml_find_next_dfs(x, "Description", "xmlns:zf", "urn:ferd:pdfa:CrossIndustryDocument:invoice:1p0#");
  93. }
  94. break;
  95. }
  96. /* Version 2. */
  97. x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
  98. if (x)
  99. {
  100. while (x)
  101. {
  102. const char *v = tag_or_text(x, "fx:Version");
  103. const char *cl = tag_or_text(x, "fx:ConformanceLevel");
  104. const char *df = tag_or_text(x, "fx:DocumentFileName");
  105. const char *dt = tag_or_text(x, "fx:DocumentType");
  106. if (v && dt && !strcmp(dt, "INVOICE"))
  107. {
  108. if (!cl)
  109. fz_warn(ctx, "No conformance level specified");
  110. else if (!strcmp(cl, "EN 16931"))
  111. ret = PDF_ZUGFERD_COMFORT;
  112. else if (!strcmp(cl, "BASIC"))
  113. ret = PDF_ZUGFERD_BASIC;
  114. else if (!strcmp(cl, "EXTENDED"))
  115. ret = PDF_ZUGFERD_EXTENDED;
  116. else if (!strcmp(cl, "BASIC WL"))
  117. ret = PDF_ZUGFERD_BASIC_WL;
  118. else if (!strcmp(cl, "MINIMUM"))
  119. ret = PDF_ZUGFERD_MINIMUM;
  120. if (version)
  121. *version = fz_atof(v);
  122. if (!df)
  123. fz_warn(ctx, "ZUGFeRD doc is missing filename");
  124. else if (strcmp(df, "zugferd-invoice.xml"))
  125. fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
  126. if (fname && df)
  127. *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
  128. break;
  129. }
  130. x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:zugferd:pdfa:CrossIndustryDocument:invoice:2p0#");
  131. }
  132. break;
  133. }
  134. /* Version 2.1 + 2.11 */
  135. x = fz_xml_find_dfs(xml, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
  136. if (x)
  137. {
  138. while (x)
  139. {
  140. const char *v = tag_or_text(x, "fx:Version");
  141. const char *cl = tag_or_text(x, "fx:ConformanceLevel");
  142. const char *df = tag_or_text(x, "fx:DocumentFileName");
  143. const char *dt = tag_or_text(x, "fx:DocumentType");
  144. if (v && dt && !strcmp(dt, "INVOICE"))
  145. {
  146. if (!cl)
  147. fz_warn(ctx, "No conformance level specified");
  148. else if (!strcmp(cl, "EN 16931"))
  149. ret = PDF_ZUGFERD_COMFORT;
  150. else if (!strcmp(cl, "BASIC"))
  151. ret = PDF_ZUGFERD_BASIC;
  152. else if (!strcmp(cl, "EXTENDED"))
  153. ret = PDF_ZUGFERD_EXTENDED;
  154. else if (!strcmp(cl, "BASIC WL"))
  155. ret = PDF_ZUGFERD_BASIC_WL;
  156. else if (!strcmp(cl, "MINIMUM"))
  157. ret = PDF_ZUGFERD_MINIMUM;
  158. else if (!strcmp(cl, "XRECHNUNG"))
  159. ret = PDF_ZUGFERD_XRECHNUNG;
  160. if (version)
  161. *version = fz_atof(v);
  162. if (!df)
  163. fz_warn(ctx, "ZUGFeRD doc is missing filename");
  164. else if (ret == PDF_ZUGFERD_XRECHNUNG && strcmp(df, "xrechnung.xml"))
  165. fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
  166. else if (ret != PDF_ZUGFERD_XRECHNUNG && strcmp(df, "factur-x.xml"))
  167. fz_warn(ctx, "ZUGFeRD doc has non-standard filename");
  168. if (fname && df)
  169. *fname = fz_strdup(ctx, df); /* Nothing can throw after this */
  170. break;
  171. }
  172. x = fz_xml_find_next_dfs(x, "Description", "xmlns:fx", "urn:factur-x:pdfa:CrossIndustryDocument:invoice:1p0#");
  173. }
  174. break;
  175. }
  176. }
  177. fz_always(ctx)
  178. {
  179. fz_drop_xml(ctx, xml);
  180. fz_drop_buffer(ctx, buf);
  181. }
  182. fz_catch(ctx)
  183. fz_rethrow(ctx);
  184. return ret;
  185. }
  186. enum pdf_zugferd_profile pdf_zugferd_profile(fz_context *ctx, pdf_document *doc, float *version)
  187. {
  188. return do_zugferd_profile(ctx, doc, version, NULL);
  189. }
  190. fz_buffer *pdf_zugferd_xml(fz_context *ctx, pdf_document *doc)
  191. {
  192. char *fname;
  193. float version;
  194. enum pdf_zugferd_profile p = do_zugferd_profile(ctx, doc, &version, &fname);
  195. int count, i;
  196. fz_buffer *buf = NULL;
  197. if (p == PDF_NOT_ZUGFERD)
  198. {
  199. fz_free(ctx, fname);
  200. return NULL;
  201. }
  202. fz_try(ctx)
  203. {
  204. count = pdf_count_document_associated_files(ctx, doc);
  205. for (i = 0; i < count; i++)
  206. {
  207. pdf_obj *fs = pdf_document_associated_file(ctx, doc, i);
  208. pdf_filespec_params params;
  209. pdf_get_filespec_params(ctx, fs, &params);
  210. if (!strcmp(fname, params.filename))
  211. {
  212. if (!pdf_is_embedded_file(ctx, fs))
  213. fz_throw(ctx, FZ_ERROR_FORMAT, "ZUGFeRD XML was not embedded");
  214. buf = pdf_load_embedded_file_contents(ctx, fs);
  215. break;
  216. }
  217. }
  218. }
  219. fz_always(ctx)
  220. fz_free(ctx, fname);
  221. fz_catch(ctx)
  222. fz_rethrow(ctx);
  223. return buf;
  224. }
  225. const char *
  226. pdf_zugferd_profile_to_string(fz_context *ctx, enum pdf_zugferd_profile profile)
  227. {
  228. static const char *strings[] =
  229. {
  230. "NOT ZUGFERD",
  231. "COMFORT",
  232. "BASIC",
  233. "EXTENDED",
  234. "BASIC WL",
  235. "MINIMUM",
  236. "XRECHNUNG",
  237. "UNKNOWN"
  238. };
  239. if (profile < PDF_NOT_ZUGFERD || profile > PDF_ZUGFERD_UNKNOWN)
  240. profile = PDF_ZUGFERD_UNKNOWN;
  241. return strings[profile];
  242. }