pdf-cmap.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "mupdf/pdf.h"
  24. #include <assert.h>
  25. #include <string.h>
  26. #undef CHECK_SPLAY
  27. #undef DUMP_SPLAY
  28. /*
  29. * Allocate, destroy and simple parameters.
  30. */
  31. void
  32. pdf_drop_cmap_imp(fz_context *ctx, fz_storable *cmap_)
  33. {
  34. pdf_cmap *cmap = (pdf_cmap *)cmap_;
  35. pdf_drop_cmap(ctx, cmap->usecmap);
  36. fz_free(ctx, cmap->ranges);
  37. fz_free(ctx, cmap->xranges);
  38. fz_free(ctx, cmap->mranges);
  39. fz_free(ctx, cmap->dict);
  40. fz_free(ctx, cmap->tree);
  41. fz_free(ctx, cmap);
  42. }
  43. pdf_cmap *
  44. pdf_new_cmap(fz_context *ctx)
  45. {
  46. pdf_cmap *cmap = fz_malloc_struct(ctx, pdf_cmap);
  47. FZ_INIT_STORABLE(cmap, 1, pdf_drop_cmap_imp);
  48. return cmap;
  49. }
  50. /* Could be a macro for speed */
  51. pdf_cmap *
  52. pdf_keep_cmap(fz_context *ctx, pdf_cmap *cmap)
  53. {
  54. return fz_keep_storable(ctx, &cmap->storable);
  55. }
  56. /* Could be a macro for speed */
  57. void
  58. pdf_drop_cmap(fz_context *ctx, pdf_cmap *cmap)
  59. {
  60. fz_drop_storable(ctx, &cmap->storable);
  61. }
  62. void
  63. pdf_set_usecmap(fz_context *ctx, pdf_cmap *cmap, pdf_cmap *usecmap)
  64. {
  65. int i;
  66. pdf_drop_cmap(ctx, cmap->usecmap);
  67. cmap->usecmap = pdf_keep_cmap(ctx, usecmap);
  68. if (cmap->codespace_len == 0)
  69. {
  70. cmap->codespace_len = usecmap->codespace_len;
  71. for (i = 0; i < usecmap->codespace_len; i++)
  72. cmap->codespace[i] = usecmap->codespace[i];
  73. }
  74. }
  75. int
  76. pdf_cmap_wmode(fz_context *ctx, pdf_cmap *cmap)
  77. {
  78. return cmap->wmode;
  79. }
  80. void
  81. pdf_set_cmap_wmode(fz_context *ctx, pdf_cmap *cmap, int wmode)
  82. {
  83. cmap->wmode = wmode;
  84. }
  85. void
  86. pdf_add_codespace(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, size_t n)
  87. {
  88. if (cmap->codespace_len + 1 == nelem(cmap->codespace))
  89. {
  90. fz_warn(ctx, "assert: too many code space ranges");
  91. return;
  92. }
  93. if ((uint32_t)n != n)
  94. {
  95. fz_warn(ctx, "assert: code space range too large");
  96. return;
  97. }
  98. cmap->codespace[cmap->codespace_len].n = (int)n;
  99. cmap->codespace[cmap->codespace_len].low = low;
  100. cmap->codespace[cmap->codespace_len].high = high;
  101. cmap->codespace_len ++;
  102. }
  103. struct cmap_splay {
  104. unsigned int low;
  105. unsigned int high;
  106. unsigned int out;
  107. unsigned int left;
  108. unsigned int right;
  109. unsigned int parent : 31;
  110. unsigned int many : 1;
  111. };
  112. #define EMPTY ((unsigned int)0x40000000)
  113. /*
  114. The splaying steps used:
  115. Case 1: | z x
  116. | y D => A y
  117. | x C B z
  118. | A B C D
  119. Case 2: | z x
  120. | y D => y z
  121. | A x A B C D
  122. | B C
  123. Case 3: | y x
  124. | x C => A y
  125. | A B B C
  126. */
  127. static void
  128. move_to_root(cmap_splay *tree, unsigned int x)
  129. {
  130. if (x == EMPTY)
  131. return;
  132. do
  133. {
  134. unsigned int z, zp;
  135. unsigned int y = tree[x].parent;
  136. if (y == EMPTY)
  137. break;
  138. z = tree[y].parent;
  139. if (z == EMPTY)
  140. {
  141. /* Case 3 */
  142. tree[x].parent = EMPTY;
  143. tree[y].parent = x;
  144. if (tree[y].left == x)
  145. {
  146. /* Case 3 */
  147. tree[y].left = tree[x].right;
  148. if (tree[y].left != EMPTY)
  149. tree[tree[y].left].parent = y;
  150. tree[x].right = y;
  151. }
  152. else
  153. {
  154. /* Case 3 - reflected */
  155. assert(tree[y].right == x);
  156. tree[y].right = tree[x].left;
  157. if (tree[y].right != EMPTY)
  158. tree[tree[y].right].parent = y;
  159. tree[x].left = y;
  160. }
  161. break;
  162. }
  163. zp = tree[z].parent;
  164. tree[x].parent = zp;
  165. if (zp != EMPTY) {
  166. if (tree[zp].left == z)
  167. tree[zp].left = x;
  168. else
  169. {
  170. assert(tree[zp].right == z);
  171. tree[zp].right = x;
  172. }
  173. }
  174. tree[y].parent = x;
  175. if (tree[y].left == x)
  176. {
  177. tree[y].left = tree[x].right;
  178. if (tree[y].left != EMPTY)
  179. tree[tree[y].left].parent = y;
  180. tree[x].right = y;
  181. if (tree[z].left == y)
  182. {
  183. /* Case 1 */
  184. tree[z].parent = y;
  185. tree[z].left = tree[y].right;
  186. if (tree[z].left != EMPTY)
  187. tree[tree[z].left].parent = z;
  188. tree[y].right = z;
  189. }
  190. else
  191. {
  192. /* Case 2 - reflected */
  193. assert(tree[z].right == y);
  194. tree[z].parent = x;
  195. tree[z].right = tree[x].left;
  196. if (tree[z].right != EMPTY)
  197. tree[tree[z].right].parent = z;
  198. tree[x].left = z;
  199. }
  200. }
  201. else
  202. {
  203. assert(tree[y].right == x);
  204. tree[y].right = tree[x].left;
  205. if (tree[y].right != EMPTY)
  206. tree[tree[y].right].parent = y;
  207. tree[x].left = y;
  208. if (tree[z].left == y)
  209. {
  210. /* Case 2 */
  211. tree[z].parent = x;
  212. tree[z].left = tree[x].right;
  213. if (tree[z].left != EMPTY)
  214. tree[tree[z].left].parent = z;
  215. tree[x].right = z;
  216. }
  217. else
  218. {
  219. /* Case 1 - reflected */
  220. assert(tree[z].right == y);
  221. tree[z].parent = y;
  222. tree[z].right = tree[y].left;
  223. if (tree[z].right != EMPTY)
  224. tree[tree[z].right].parent = z;
  225. tree[y].left = z;
  226. }
  227. }
  228. } while (1);
  229. }
  230. static unsigned int delete_node(pdf_cmap *cmap, unsigned int current)
  231. {
  232. cmap_splay *tree = cmap->tree;
  233. unsigned int parent;
  234. unsigned int replacement;
  235. assert(current != EMPTY);
  236. parent = tree[current].parent;
  237. if (tree[current].right == EMPTY)
  238. {
  239. if (parent == EMPTY)
  240. {
  241. replacement = cmap->ttop = tree[current].left;
  242. }
  243. else if (tree[parent].left == current)
  244. {
  245. replacement = tree[parent].left = tree[current].left;
  246. }
  247. else
  248. {
  249. assert(tree[parent].right == current);
  250. replacement = tree[parent].right = tree[current].left;
  251. }
  252. if (replacement != EMPTY)
  253. tree[replacement].parent = parent;
  254. else
  255. replacement = parent;
  256. }
  257. else if (tree[current].left == EMPTY)
  258. {
  259. if (parent == EMPTY)
  260. {
  261. replacement = cmap->ttop = tree[current].right;
  262. }
  263. else if (tree[parent].left == current)
  264. {
  265. replacement = tree[parent].left = tree[current].right;
  266. }
  267. else
  268. {
  269. assert(tree[parent].right == current);
  270. replacement = tree[parent].right = tree[current].right;
  271. }
  272. if (replacement != EMPTY)
  273. tree[replacement].parent = parent;
  274. else
  275. replacement = parent;
  276. }
  277. else
  278. {
  279. /* Hard case, find the in-order predecessor of current */
  280. unsigned int amputee = current;
  281. replacement = tree[current].left;
  282. while (tree[replacement].right != EMPTY) {
  283. amputee = replacement;
  284. replacement = tree[replacement].right;
  285. }
  286. /* Remove replacement from the tree */
  287. if (amputee == current)
  288. {
  289. tree[amputee].left = tree[replacement].left;
  290. if (tree[amputee].left != EMPTY)
  291. tree[tree[amputee].left].parent = amputee;
  292. }
  293. else
  294. {
  295. tree[amputee].right = tree[replacement].left;
  296. if (tree[amputee].right != EMPTY)
  297. tree[tree[amputee].right].parent = amputee;
  298. }
  299. /* Insert replacement in place of current */
  300. tree[replacement].parent = parent;
  301. if (parent == EMPTY)
  302. {
  303. tree[replacement].parent = EMPTY;
  304. cmap->ttop = replacement;
  305. }
  306. else if (tree[parent].left == current)
  307. tree[parent].left = replacement;
  308. else
  309. {
  310. assert(tree[parent].right == current);
  311. tree[parent].right = replacement;
  312. }
  313. tree[replacement].left = tree[current].left;
  314. if (tree[replacement].left != EMPTY)
  315. tree[tree[replacement].left].parent = replacement;
  316. tree[replacement].right = tree[current].right;
  317. if (tree[replacement].right != EMPTY)
  318. tree[tree[replacement].right].parent = replacement;
  319. }
  320. /* current is now unlinked. We need to remove it from our array. */
  321. cmap->tlen--;
  322. if (current != (unsigned int) cmap->tlen)
  323. {
  324. if (replacement == (unsigned int) cmap->tlen)
  325. replacement = current;
  326. tree[current] = tree[cmap->tlen];
  327. parent = tree[current].parent;
  328. if (parent == EMPTY)
  329. cmap->ttop = current;
  330. else if (tree[parent].left == (unsigned int) cmap->tlen)
  331. tree[parent].left = current;
  332. else
  333. {
  334. assert(tree[parent].right == (unsigned int) cmap->tlen);
  335. tree[parent].right = current;
  336. }
  337. if (tree[current].left != EMPTY)
  338. {
  339. assert(tree[tree[current].left].parent == (unsigned int) cmap->tlen);
  340. tree[tree[current].left].parent = current;
  341. }
  342. if (tree[current].right != EMPTY)
  343. {
  344. assert(tree[tree[current].right].parent == (unsigned int) cmap->tlen);
  345. tree[tree[current].right].parent = current;
  346. }
  347. }
  348. /* Return the node that we should continue searching from */
  349. return replacement;
  350. }
  351. #ifdef DUMP_SPLAY
  352. static void
  353. dump_splay(cmap_splay *tree, unsigned int node, int depth, const char *pre)
  354. {
  355. int i;
  356. if (tree == NULL || node == EMPTY)
  357. return;
  358. for (i = 0; i < depth; i++)
  359. fprintf(stderr, " ");
  360. fprintf(stderr, "%s%d:", pre, node);
  361. if (tree[node].parent == EMPTY)
  362. fprintf(stderr, "^EMPTY");
  363. else
  364. fprintf(stderr, "^%d", tree[node].parent);
  365. if (tree[node].left == EMPTY)
  366. fprintf(stderr, "<EMPTY");
  367. else
  368. fprintf(stderr, "<%d", tree[node].left);
  369. if (tree[node].right == EMPTY)
  370. fprintf(stderr, ">EMPTY");
  371. else
  372. fprintf(stderr, ">%d", tree[node].right);
  373. fprintf(stderr, "(%x,%x,%x,%d)\n", tree[node].low, tree[node].high, tree[node].out, tree[node].many);
  374. assert(tree[node].parent == EMPTY || depth);
  375. assert(tree[node].left == EMPTY || tree[tree[node].left].parent == node);
  376. assert(tree[node].right == EMPTY || tree[tree[node].right].parent == node);
  377. dump_splay(tree, tree[node].left, depth+1, "L");
  378. dump_splay(tree, tree[node].right, depth+1, "R");
  379. }
  380. #endif
  381. enum
  382. {
  383. TOP = 0,
  384. LEFT = 1,
  385. RIGHT = 2
  386. };
  387. static void walk_splay(cmap_splay *tree, unsigned int node, void (*fn)(cmap_splay *, void *), void *arg)
  388. {
  389. int from = TOP;
  390. while (node != EMPTY)
  391. {
  392. switch (from)
  393. {
  394. case TOP:
  395. if (tree[node].left != EMPTY)
  396. {
  397. node = tree[node].left;
  398. from = TOP;
  399. break;
  400. }
  401. /* fallthrough */
  402. case LEFT:
  403. fn(&tree[node], arg);
  404. if (tree[node].right != EMPTY)
  405. {
  406. node = tree[node].right;
  407. from = TOP;
  408. break;
  409. }
  410. /* fallthrough */
  411. case RIGHT:
  412. {
  413. unsigned int parent = tree[node].parent;
  414. if (parent == EMPTY)
  415. return;
  416. if (tree[parent].left == node)
  417. from = LEFT;
  418. else
  419. {
  420. assert(tree[parent].right == node);
  421. from = RIGHT;
  422. }
  423. node = parent;
  424. }
  425. }
  426. }
  427. }
  428. #ifdef CHECK_SPLAY
  429. static int
  430. tree_has_overlap(cmap_splay *tree, int node, int low, int high)
  431. {
  432. if (tree[node].left != EMPTY)
  433. if (tree_has_overlap(tree, tree[node].left, low, high))
  434. return 1;
  435. if (tree[node].right != EMPTY)
  436. if (tree_has_overlap(tree, tree[node].right, low, high))
  437. return 1;
  438. return (tree[node].low < low && low < tree[node].high) || (tree[node].low < high && high < tree[node].high);
  439. }
  440. static void
  441. do_check(cmap_splay *node, void *arg)
  442. {
  443. cmap_splay *tree = arg;
  444. unsigned int num = node - tree;
  445. assert(!node->many || node->low == node->high);
  446. assert(node->low <= node->high);
  447. assert((node->left == EMPTY) || (tree[node->left].parent == num &&
  448. tree[node->left].high < node->low));
  449. assert(node->right == EMPTY || (tree[node->right].parent == num &&
  450. node->high < tree[node->right].low));
  451. assert(!tree_has_overlap(tree, num, node->low, node->high));
  452. }
  453. static void
  454. check_splay(cmap_splay *tree, unsigned int node, int depth)
  455. {
  456. if (node == EMPTY)
  457. return;
  458. assert(tree[node].parent == EMPTY);
  459. walk_splay(tree, node, do_check, tree);
  460. }
  461. #endif
  462. /*
  463. * Add a range.
  464. */
  465. static void
  466. add_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, unsigned int out, int check_for_overlap, int many)
  467. {
  468. int current;
  469. cmap_splay *tree;
  470. if (low > high)
  471. {
  472. fz_warn(ctx, "range limits out of range in cmap %s", cmap->cmap_name);
  473. return;
  474. }
  475. if (cmap->codespace_len == 0)
  476. {
  477. fz_warn(ctx, "CMap is missing codespace range");
  478. pdf_add_codespace(ctx, cmap, 0, 65535, 2);
  479. }
  480. tree = cmap->tree;
  481. if (cmap->tlen)
  482. {
  483. unsigned int move = cmap->ttop;
  484. unsigned int gt = EMPTY;
  485. unsigned int lt = EMPTY;
  486. if (check_for_overlap)
  487. {
  488. /* Check for collision with the current node */
  489. do
  490. {
  491. current = move;
  492. /* Cases we might meet:
  493. * tree[i]: <----->
  494. * case 0: <->
  495. * case 1: <------->
  496. * case 2: <------------->
  497. * case 3: <->
  498. * case 4: <------->
  499. * case 5: <->
  500. */
  501. if (low <= tree[current].low && tree[current].low <= high)
  502. {
  503. /* case 1, reduces to case 0 */
  504. /* or case 2, deleting the node */
  505. tree[current].out += high + 1 - tree[current].low;
  506. tree[current].low = high + 1;
  507. if (tree[current].low > tree[current].high)
  508. {
  509. /* update lt/gt references that will be moved/stale after deleting current */
  510. if (gt == (unsigned int) cmap->tlen - 1)
  511. gt = current;
  512. if (lt == (unsigned int) cmap->tlen - 1)
  513. lt = current;
  514. /* delete_node() moves the element at cmap->tlen-1 into current */
  515. move = delete_node(cmap, current);
  516. current = EMPTY;
  517. continue;
  518. }
  519. }
  520. else if (low <= tree[current].high && tree[current].high <= high)
  521. {
  522. /* case 4, reduces to case 5 */
  523. tree[current].high = low - 1;
  524. assert(tree[current].low <= tree[current].high);
  525. }
  526. else if (tree[current].low < low && high < tree[current].high)
  527. {
  528. /* case 3, reduces to case 5 */
  529. int new_high = tree[current].high;
  530. tree[current].high = low-1;
  531. add_range(ctx, cmap, high+1, new_high, tree[current].out + high + 1 - tree[current].low, 0, tree[current].many);
  532. tree = cmap->tree;
  533. }
  534. /* Now look for where to move to next (left for case 0, right for case 5) */
  535. if (tree[current].low > high) {
  536. move = tree[current].left;
  537. gt = current;
  538. }
  539. else
  540. {
  541. move = tree[current].right;
  542. lt = current;
  543. }
  544. }
  545. while (move != EMPTY);
  546. }
  547. else
  548. {
  549. do
  550. {
  551. current = move;
  552. if (tree[current].low > high)
  553. {
  554. move = tree[current].left;
  555. gt = current;
  556. }
  557. else
  558. {
  559. move = tree[current].right;
  560. lt = current;
  561. }
  562. } while (move != EMPTY);
  563. }
  564. /* current is now the node to which we would be adding the new node */
  565. /* lt is the last node we traversed which is lt the new node. */
  566. /* gt is the last node we traversed which is gt the new node. */
  567. if (!many)
  568. {
  569. /* Check for the 'merge' cases. */
  570. if (lt != EMPTY && !tree[lt].many && tree[lt].high == low-1 && tree[lt].out - tree[lt].low == out - low)
  571. {
  572. tree[lt].high = high;
  573. if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
  574. {
  575. tree[lt].high = tree[gt].high;
  576. delete_node(cmap, gt);
  577. }
  578. goto exit;
  579. }
  580. if (gt != EMPTY && !tree[gt].many && tree[gt].low == high+1 && tree[gt].out - tree[gt].low == out - low)
  581. {
  582. tree[gt].low = low;
  583. tree[gt].out = out;
  584. goto exit;
  585. }
  586. }
  587. }
  588. else
  589. current = EMPTY;
  590. if (cmap->tlen == cmap->tcap)
  591. {
  592. int new_cap = cmap->tcap ? cmap->tcap * 2 : 256;
  593. tree = cmap->tree = fz_realloc_array(ctx, cmap->tree, new_cap, cmap_splay);
  594. cmap->tcap = new_cap;
  595. }
  596. tree[cmap->tlen].low = low;
  597. tree[cmap->tlen].high = high;
  598. tree[cmap->tlen].out = out;
  599. tree[cmap->tlen].parent = current;
  600. tree[cmap->tlen].left = EMPTY;
  601. tree[cmap->tlen].right = EMPTY;
  602. tree[cmap->tlen].many = many;
  603. cmap->tlen++;
  604. if (current == EMPTY)
  605. cmap->ttop = 0;
  606. else if (tree[current].low > high)
  607. tree[current].left = cmap->tlen-1;
  608. else
  609. {
  610. assert(tree[current].high < low);
  611. tree[current].right = cmap->tlen-1;
  612. }
  613. move_to_root(tree, cmap->tlen-1);
  614. cmap->ttop = cmap->tlen-1;
  615. exit:
  616. {}
  617. #ifdef CHECK_SPLAY
  618. check_splay(cmap->tree, cmap->ttop, 0);
  619. #endif
  620. #ifdef DUMP_SPLAY
  621. dump_splay(cmap->tree, cmap->ttop, 0, "");
  622. #endif
  623. }
  624. /*
  625. * Add a one-to-many mapping.
  626. */
  627. static void
  628. add_mrange(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *out, int len)
  629. {
  630. int out_pos;
  631. if (cmap->dlen + len + 1 > cmap->dcap)
  632. {
  633. int new_cap = cmap->dcap ? cmap->dcap * 2 : 256;
  634. cmap->dict = fz_realloc_array(ctx, cmap->dict, new_cap, int);
  635. cmap->dcap = new_cap;
  636. }
  637. out_pos = cmap->dlen;
  638. cmap->dict[out_pos] = len;
  639. memcpy(&cmap->dict[out_pos+1], out, sizeof(int)*len);
  640. cmap->dlen += len + 1;
  641. add_range(ctx, cmap, low, low, out_pos, 1, 1);
  642. }
  643. void
  644. pdf_map_range_to_range(fz_context *ctx, pdf_cmap *cmap, unsigned int low, unsigned int high, int out)
  645. {
  646. add_range(ctx, cmap, low, high, out, 1, 0);
  647. }
  648. void
  649. pdf_map_one_to_many(fz_context *ctx, pdf_cmap *cmap, unsigned int low, int *values, size_t len)
  650. {
  651. int *ovalues = values;
  652. /* len is always restricted to <= 256 by the callers. */
  653. int local[256];
  654. assert(len <= 256);
  655. /* Decode unicode surrogate pairs. */
  656. /* Only the *-UCS2 CMaps use one-to-many mappings, so assuming unicode should be safe. */
  657. if (len >= 2)
  658. {
  659. size_t i, j;
  660. /* Look for mranges with either multiple surrogate pairs in, or surrogate pairs
  661. * with other chars. See bug 706131. */
  662. for (i = 0, j = 0; i < len; i++, j++)
  663. {
  664. int hi = ovalues[i];
  665. if (hi >= 0xd800 && hi < 0xdc00 && i < len-1)
  666. {
  667. int lo = ovalues[i+1];
  668. if (lo >= 0xdc00 && lo < 0xe000)
  669. {
  670. hi = ((hi - 0xD800) << 10) + (lo - 0xDC00) + 0x10000;
  671. i++;
  672. }
  673. }
  674. if (values != local)
  675. {
  676. /* We can't change the callers data, so copy stuff in. */
  677. if (j)
  678. memcpy(local, values, sizeof(local[0]) * (j-1));
  679. values = local;
  680. }
  681. values[j] = hi;
  682. }
  683. len = j;
  684. }
  685. if (len == 1)
  686. {
  687. add_range(ctx, cmap, low, low, values[0], 1, 0);
  688. return;
  689. }
  690. if (len > PDF_MRANGE_CAP)
  691. {
  692. fz_warn(ctx, "ignoring one to many mapping in cmap %s", cmap->cmap_name);
  693. return;
  694. }
  695. add_mrange(ctx, cmap, low, values, (int)len);
  696. }
  697. static void
  698. count_node_types(cmap_splay *node, void *arg)
  699. {
  700. int *counts = (int *)arg;
  701. if (node->many)
  702. counts[2]++;
  703. else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
  704. counts[0]++;
  705. else
  706. counts[1]++;
  707. }
  708. static void
  709. copy_node_types(cmap_splay *node, void *arg)
  710. {
  711. pdf_cmap *cmap = (pdf_cmap *)arg;
  712. if (node->many)
  713. {
  714. assert(node->low == node->high);
  715. cmap->mranges[cmap->mlen].low = node->low;
  716. cmap->mranges[cmap->mlen].out = node->out;
  717. cmap->mlen++;
  718. }
  719. else if (node->low <= 0xffff && node->high <= 0xFFFF && node->out <= 0xFFFF)
  720. {
  721. cmap->ranges[cmap->rlen].low = node->low;
  722. cmap->ranges[cmap->rlen].high = node->high;
  723. cmap->ranges[cmap->rlen].out = node->out;
  724. cmap->rlen++;
  725. }
  726. else
  727. {
  728. cmap->xranges[cmap->xlen].low = node->low;
  729. cmap->xranges[cmap->xlen].high = node->high;
  730. cmap->xranges[cmap->xlen].out = node->out;
  731. cmap->xlen++;
  732. }
  733. }
  734. void
  735. pdf_sort_cmap(fz_context *ctx, pdf_cmap *cmap)
  736. {
  737. int counts[3];
  738. if (cmap->tree == NULL)
  739. return;
  740. counts[0] = 0;
  741. counts[1] = 0;
  742. counts[2] = 0;
  743. walk_splay(cmap->tree, cmap->ttop, count_node_types, &counts);
  744. cmap->ranges = Memento_label(fz_malloc_array(ctx, counts[0], pdf_range), "cmap_range");
  745. cmap->rcap = counts[0];
  746. cmap->xranges = Memento_label(fz_malloc_array(ctx, counts[1], pdf_xrange), "cmap_xrange");
  747. cmap->xcap = counts[1];
  748. cmap->mranges = Memento_label(fz_malloc_array(ctx, counts[2], pdf_mrange), "cmap_mrange");
  749. cmap->mcap = counts[2];
  750. walk_splay(cmap->tree, cmap->ttop, copy_node_types, cmap);
  751. fz_free(ctx, cmap->tree);
  752. cmap->tree = NULL;
  753. }
  754. int
  755. pdf_lookup_cmap(pdf_cmap *cmap, unsigned int cpt)
  756. {
  757. pdf_range *ranges = cmap->ranges;
  758. pdf_xrange *xranges = cmap->xranges;
  759. int l, r, m;
  760. l = 0;
  761. r = cmap->rlen - 1;
  762. while (l <= r)
  763. {
  764. m = (l + r) >> 1;
  765. if (cpt < ranges[m].low)
  766. r = m - 1;
  767. else if (cpt > ranges[m].high)
  768. l = m + 1;
  769. else
  770. return cpt - ranges[m].low + ranges[m].out;
  771. }
  772. l = 0;
  773. r = cmap->xlen - 1;
  774. while (l <= r)
  775. {
  776. m = (l + r) >> 1;
  777. if (cpt < xranges[m].low)
  778. r = m - 1;
  779. else if (cpt > xranges[m].high)
  780. l = m + 1;
  781. else
  782. return cpt - xranges[m].low + xranges[m].out;
  783. }
  784. if (cmap->usecmap)
  785. return pdf_lookup_cmap(cmap->usecmap, cpt);
  786. return -1;
  787. }
  788. int
  789. pdf_lookup_cmap_full(pdf_cmap *cmap, unsigned int cpt, int *out)
  790. {
  791. pdf_range *ranges = cmap->ranges;
  792. pdf_xrange *xranges = cmap->xranges;
  793. pdf_mrange *mranges = cmap->mranges;
  794. unsigned int i;
  795. int l, r, m;
  796. l = 0;
  797. r = cmap->rlen - 1;
  798. while (l <= r)
  799. {
  800. m = (l + r) >> 1;
  801. if (cpt < ranges[m].low)
  802. r = m - 1;
  803. else if (cpt > ranges[m].high)
  804. l = m + 1;
  805. else
  806. {
  807. out[0] = cpt - ranges[m].low + ranges[m].out;
  808. return 1;
  809. }
  810. }
  811. l = 0;
  812. r = cmap->xlen - 1;
  813. while (l <= r)
  814. {
  815. m = (l + r) >> 1;
  816. if (cpt < xranges[m].low)
  817. r = m - 1;
  818. else if (cpt > xranges[m].high)
  819. l = m + 1;
  820. else
  821. {
  822. out[0] = cpt - xranges[m].low + xranges[m].out;
  823. return 1;
  824. }
  825. }
  826. l = 0;
  827. r = cmap->mlen - 1;
  828. while (l <= r)
  829. {
  830. m = (l + r) >> 1;
  831. if (cpt < mranges[m].low)
  832. r = m - 1;
  833. else if (cpt > mranges[m].low)
  834. l = m + 1;
  835. else
  836. {
  837. int *ptr = &cmap->dict[cmap->mranges[m].out];
  838. unsigned int len = (unsigned int)*ptr++;
  839. for (i = 0; i < len; ++i)
  840. out[i] = *ptr++;
  841. return len;
  842. }
  843. }
  844. if (cmap->usecmap)
  845. return pdf_lookup_cmap_full(cmap->usecmap, cpt, out);
  846. return 0;
  847. }
  848. int
  849. pdf_decode_cmap(pdf_cmap *cmap, unsigned char *buf, unsigned char *end, unsigned int *cpt)
  850. {
  851. unsigned int c;
  852. int k, n;
  853. int len = end - buf;
  854. if (len > 4)
  855. len = 4;
  856. c = 0;
  857. for (n = 0; n < len; n++)
  858. {
  859. c = (c << 8) | buf[n];
  860. for (k = 0; k < cmap->codespace_len; k++)
  861. {
  862. if (cmap->codespace[k].n == n + 1)
  863. {
  864. if (c >= cmap->codespace[k].low && c <= cmap->codespace[k].high)
  865. {
  866. *cpt = c;
  867. return n + 1;
  868. }
  869. }
  870. }
  871. }
  872. *cpt = 0;
  873. return 1;
  874. }
  875. size_t
  876. pdf_cmap_size(fz_context *ctx, pdf_cmap *cmap)
  877. {
  878. if (cmap == NULL)
  879. return 0;
  880. if (cmap->storable.refs < 0)
  881. return 0;
  882. return pdf_cmap_size(ctx, cmap->usecmap) +
  883. cmap->rcap * sizeof *cmap->ranges +
  884. cmap->xcap * sizeof *cmap->xranges +
  885. cmap->mcap * sizeof *cmap->mranges +
  886. cmap->tcap * sizeof *cmap->tree +
  887. sizeof(*cmap);
  888. }