xps-util.c 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "xps-imp.h"
  24. static inline int xps_tolower(int c)
  25. {
  26. if (c >= 'A' && c <= 'Z')
  27. return c + 32;
  28. return c;
  29. }
  30. int
  31. xps_strcasecmp(char *a, char *b)
  32. {
  33. while (xps_tolower(*a) == xps_tolower(*b))
  34. {
  35. if (*a++ == 0)
  36. return 0;
  37. b++;
  38. }
  39. return xps_tolower(*a) - xps_tolower(*b);
  40. }
  41. /* A URL is defined as consisting of a:
  42. * SCHEME (e.g. http:)
  43. * AUTHORITY (username, password, hostname, port, eg //test:passwd@mupdf.com:999)
  44. * PATH (e.g. /download)
  45. * QUERY (e.g. ?view=page)
  46. * FRAGMENT (e.g. #fred) (not strictly part of the URL)
  47. */
  48. static char *
  49. skip_scheme(char *path)
  50. {
  51. char *p = path;
  52. /* Skip over: alpha *(alpha | digit | "+" | "-" | ".") looking for : */
  53. if (*p >= 'a' && *p <= 'z')
  54. {
  55. /* Starts with a-z */
  56. }
  57. else if (*p >= 'A' && *p <= 'Z')
  58. {
  59. /* Starts with A-Z */
  60. }
  61. else
  62. return path;
  63. while (*++p)
  64. {
  65. if (*p >= 'a' && *p <= 'z')
  66. continue;
  67. if (*p >= 'A' && *p <= 'Z')
  68. continue;
  69. if (*p >= '0' && *p <= '9')
  70. continue;
  71. if (*p == '+')
  72. continue;
  73. if (*p == '-')
  74. continue;
  75. if (*p == '.')
  76. continue;
  77. if (*p == ':')
  78. return p+1;
  79. break;
  80. }
  81. return path;
  82. }
  83. static char *
  84. skip_authority(char *path)
  85. {
  86. char *p = path;
  87. /* Authority section must start with '//' */
  88. if (p[0] != '/' || p[1] != '/')
  89. return path;
  90. p += 2;
  91. /* Authority is terminated by end of URL, '/' or '?' */
  92. while (*p && *p != '/' && *p != '?')
  93. p++;
  94. return p;
  95. }
  96. #define SEP(x) ((x)=='/' || (x) == 0)
  97. static char *
  98. clean_path(char *name)
  99. {
  100. char *p, *q, *dotdot, *start;
  101. int rooted;
  102. start = skip_scheme(name);
  103. start = skip_authority(start);
  104. rooted = start[0] == '/';
  105. /*
  106. * invariants:
  107. * p points at beginning of path element we're considering.
  108. * q points just past the last path element we wrote (no slash).
  109. * dotdot points just past the point where .. cannot backtrack
  110. * any further (no slash).
  111. */
  112. p = q = dotdot = start + rooted;
  113. while (*p)
  114. {
  115. if(p[0] == '/') /* null element */
  116. p++;
  117. else if (p[0] == '.' && SEP(p[1]))
  118. p += 1; /* don't count the separator in case it is nul */
  119. else if (p[0] == '.' && p[1] == '.' && SEP(p[2]))
  120. {
  121. p += 2;
  122. if (q > dotdot) /* can backtrack */
  123. {
  124. while(--q > dotdot && *q != '/')
  125. ;
  126. }
  127. else if (!rooted) /* /.. is / but ./../ is .. */
  128. {
  129. if (q != start)
  130. *q++ = '/';
  131. *q++ = '.';
  132. *q++ = '.';
  133. dotdot = q;
  134. }
  135. }
  136. else /* real path element */
  137. {
  138. if (q != start+rooted)
  139. *q++ = '/';
  140. while ((*q = *p) != '/' && *q != 0)
  141. p++, q++;
  142. }
  143. }
  144. /* Protect against 'blah:' input, where start = q = the terminator.
  145. * We must not overrun it. */
  146. if (q == start && *q != 0) /* empty string is really "." */
  147. *q++ = '.';
  148. *q = '\0';
  149. return name;
  150. }
  151. void
  152. xps_resolve_url(fz_context *ctx, xps_document *doc, char *output, char *base_uri, char *path, int output_size)
  153. {
  154. char *p = skip_authority(skip_scheme(path));
  155. if (p != path || path[0] == '/')
  156. {
  157. fz_strlcpy(output, path, output_size);
  158. }
  159. else
  160. {
  161. size_t len = fz_strlcpy(output, base_uri, output_size);
  162. if (len == 0 || output[len-1] != '/')
  163. fz_strlcat(output, "/", output_size);
  164. fz_strlcat(output, path, output_size);
  165. }
  166. clean_path(output);
  167. }