xml-write.c 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. // Copyright (C) 2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "xml-imp.h"
  23. void fz_save_xml(fz_context *ctx, fz_xml *root, const char *path, int indented)
  24. {
  25. fz_output *out = fz_new_output_with_path(ctx, path, 0);
  26. fz_try(ctx)
  27. {
  28. fz_write_xml(ctx, root, out, indented);
  29. fz_close_output(ctx, out);
  30. }
  31. fz_always(ctx)
  32. fz_drop_output(ctx, out);
  33. fz_catch(ctx)
  34. fz_rethrow(ctx);
  35. }
  36. static void
  37. xml_escape_tag(fz_context *ctx, fz_output *out, const char *s)
  38. {
  39. while (1)
  40. {
  41. int c;
  42. size_t len = fz_chartorune(&c, s);
  43. size_t i;
  44. if (c == 0)
  45. break;
  46. if (c == '<')
  47. fz_write_string(ctx, out, "&lt;");
  48. else if (c == '>')
  49. fz_write_string(ctx, out, "&gt;");
  50. else if (c == '&')
  51. fz_write_string(ctx, out, "&amp;");
  52. else
  53. for (i = 0; i < len; i++)
  54. {
  55. char d = s[i];
  56. if (d < 32 || d >= 127)
  57. {
  58. fz_write_string(ctx, out, "&#x");
  59. fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
  60. fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
  61. fz_write_byte(ctx, out, ';');
  62. }
  63. else
  64. fz_write_byte(ctx, out, d);
  65. }
  66. s += len;
  67. }
  68. }
  69. static void
  70. xml_escape_string(fz_context *ctx, fz_output *out, const char *s)
  71. {
  72. while (1)
  73. {
  74. int c;
  75. size_t len = fz_chartorune(&c, s);
  76. size_t i;
  77. if (c == 0)
  78. break;
  79. if (c == '<')
  80. fz_write_string(ctx, out, "&lt;");
  81. else if (c == '>')
  82. fz_write_string(ctx, out, "&gt;");
  83. else if (c == '&')
  84. fz_write_string(ctx, out, "&amp;");
  85. else if (c == '\"')
  86. {
  87. fz_write_string(ctx, out, "&quot;");
  88. }
  89. else
  90. for (i = 0; i < len; i++)
  91. {
  92. char d = s[i];
  93. if (d < 32 || d >= 127)
  94. {
  95. fz_write_string(ctx, out, "&#x");
  96. fz_write_byte(ctx, out, "0123456789abcdef"[(d>>4)&15]);
  97. fz_write_byte(ctx, out, "0123456789abcdef"[d&15]);
  98. fz_write_byte(ctx, out, ';');
  99. }
  100. else
  101. fz_write_byte(ctx, out, d);
  102. }
  103. s += len;
  104. }
  105. }
  106. static void
  107. indent(fz_context *ctx, fz_output *out, int depth)
  108. {
  109. fz_write_byte(ctx, out, '\n');
  110. while (depth-- > 0)
  111. {
  112. fz_write_byte(ctx, out, ' ');
  113. }
  114. }
  115. static int
  116. do_write(fz_context *ctx, fz_xml *node, fz_output *out, int depth)
  117. {
  118. const char *tag;
  119. fz_xml *down;
  120. int last_was_text = 0;
  121. for (; node != NULL; node = fz_xml_next(node))
  122. {
  123. struct attribute *att;
  124. tag = fz_xml_tag(node);
  125. if (!tag)
  126. {
  127. /* Text node. */
  128. char *text = fz_xml_text(node);
  129. if (text)
  130. xml_escape_tag(ctx, out, text);
  131. last_was_text = 1;
  132. continue;
  133. }
  134. last_was_text = 0;
  135. if (depth >= 0)
  136. indent(ctx, out, depth);
  137. fz_write_byte(ctx, out, '<');
  138. xml_escape_tag(ctx, out, tag);
  139. for (att = node->u.node.u.d.atts; att; att = att->next)
  140. {
  141. fz_write_byte(ctx, out, ' ');
  142. xml_escape_tag(ctx, out, att->name);
  143. fz_write_string(ctx, out, "=\"");
  144. xml_escape_string(ctx, out, att->value);
  145. fz_write_byte(ctx, out, '\"');
  146. }
  147. down = fz_xml_down(node);
  148. if (down)
  149. {
  150. fz_write_byte(ctx, out, '>');
  151. if (!do_write(ctx, down, out, depth >= 0 ? depth+1 : -1))
  152. indent(ctx, out, depth);
  153. fz_write_string(ctx, out, "</");
  154. xml_escape_tag(ctx, out, tag);
  155. fz_write_byte(ctx, out, '>');
  156. }
  157. else
  158. {
  159. fz_write_string(ctx, out, "/>");
  160. }
  161. }
  162. return depth >= 0 ? last_was_text : 1;
  163. }
  164. void
  165. fz_write_xml(fz_context *ctx, fz_xml *root, fz_output *out, int indented)
  166. {
  167. if (root == NULL)
  168. return;
  169. fz_write_string(ctx, out, "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>");
  170. /* Skip over the document object, if we're handed that. */
  171. if (root->up == NULL)
  172. root = root->down;
  173. if (!do_write(ctx, root, out, indented ? 0 : -1))
  174. fz_write_byte(ctx, out, '\n');
  175. }