stext-search.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782
  1. // Copyright (C) 2004-2024 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <string.h>
  24. #include <limits.h>
  25. #include <assert.h>
  26. /* Enumerate marked selection */
  27. static float hdist(fz_point *dir, fz_point *a, fz_point *b)
  28. {
  29. float dx = b->x - a->x;
  30. float dy = b->y - a->y;
  31. return fz_abs(dx * dir->x - dy * dir->y);
  32. }
  33. static float vdist(fz_point *dir, fz_point *a, fz_point *b)
  34. {
  35. float dx = b->x - a->x;
  36. float dy = b->y - a->y;
  37. return fz_abs(dx * dir->y - dy * dir->x);
  38. }
  39. static float vecdot(fz_point a, fz_point b)
  40. {
  41. return a.x * b.x + a.y * b.y;
  42. }
  43. static float linedist(fz_point origin, fz_point dir, fz_point q)
  44. {
  45. return vecdot(dir, fz_make_point(q.x - origin.x, q.y - origin.y));
  46. }
  47. static int line_length(fz_stext_line *line)
  48. {
  49. fz_stext_char *ch;
  50. int n = 0;
  51. for (ch = line->first_char; ch; ch = ch->next)
  52. ++n;
  53. return n;
  54. }
  55. static float largest_size_in_line(fz_stext_line *line)
  56. {
  57. fz_stext_char *ch;
  58. float size = 0;
  59. for (ch = line->first_char; ch; ch = ch->next)
  60. if (ch->size > size)
  61. size = ch->size;
  62. return size;
  63. }
  64. static int find_closest_in_line(fz_stext_line *line, int idx, fz_point q)
  65. {
  66. fz_stext_char *ch;
  67. float closest_dist = 1e30f;
  68. int closest_idx = idx;
  69. float d1, d2;
  70. float hsize = largest_size_in_line(line) / 2;
  71. fz_point vdir = fz_make_point(-line->dir.y, line->dir.x);
  72. fz_point hdir = line->dir;
  73. // Compute mid-line from quads!
  74. fz_point p1 = fz_make_point(
  75. (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2,
  76. (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2
  77. );
  78. // Signed distance perpendicular mid-line (positive is below)
  79. float vdist = linedist(p1, vdir, q);
  80. if (vdist < -hsize)
  81. return idx;
  82. if (vdist > hsize)
  83. return idx + line_length(line);
  84. for (ch = line->first_char; ch; ch = ch->next)
  85. {
  86. if (ch->bidi & 1)
  87. {
  88. d1 = fz_abs(linedist(ch->quad.lr, hdir, q));
  89. d2 = fz_abs(linedist(ch->quad.ll, hdir, q));
  90. }
  91. else
  92. {
  93. d1 = fz_abs(linedist(ch->quad.ll, hdir, q));
  94. d2 = fz_abs(linedist(ch->quad.lr, hdir, q));
  95. }
  96. if (d1 < closest_dist)
  97. {
  98. closest_dist = d1;
  99. closest_idx = idx;
  100. }
  101. if (d2 < closest_dist)
  102. {
  103. closest_dist = d2;
  104. closest_idx = idx + 1;
  105. }
  106. ++idx;
  107. }
  108. return closest_idx;
  109. }
  110. static int find_closest_in_page(fz_stext_page *page, fz_point q)
  111. {
  112. fz_stext_block *block;
  113. fz_stext_line *line;
  114. fz_stext_line *closest_line = NULL;
  115. int closest_idx = 0;
  116. float closest_vdist = 1e30f;
  117. float closest_hdist = 1e30f;
  118. int idx = 0;
  119. for (block = page->first_block; block; block = block->next)
  120. {
  121. if (block->type != FZ_STEXT_BLOCK_TEXT)
  122. continue;
  123. for (line = block->u.t.first_line; line; line = line->next)
  124. {
  125. float hsize = largest_size_in_line(line) / 2;
  126. fz_point hdir = line->dir;
  127. fz_point vdir = fz_make_point(-line->dir.y, line->dir.x);
  128. // Compute mid-line from quads!
  129. fz_point p1 = fz_make_point(
  130. (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2,
  131. (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2
  132. );
  133. fz_point p2 = fz_make_point(
  134. (line->last_char->quad.lr.x + line->last_char->quad.ur.x) / 2,
  135. (line->last_char->quad.lr.y + line->last_char->quad.ur.y) / 2
  136. );
  137. // Signed distance perpendicular mid-line (positive is below)
  138. float vdist = linedist(p1, vdir, q);
  139. // Signed distance tangent to mid-line from end points (positive is to end)
  140. float hdist1 = linedist(p1, hdir, q);
  141. float hdist2 = linedist(p2, hdir, q);
  142. // Within the line itself!
  143. if (vdist >= -hsize && vdist <= hsize && (hdist1 > 0) != (hdist2 > 0))
  144. {
  145. closest_vdist = 0;
  146. closest_hdist = 0;
  147. closest_line = line;
  148. closest_idx = idx;
  149. }
  150. else
  151. {
  152. // Vertical distance from mid-line.
  153. float avdist = fz_abs(vdist);
  154. // Horizontal distance from closest end-point
  155. float ahdist = fz_min(fz_abs(hdist1), fz_abs(hdist2));
  156. if (avdist < hsize)
  157. {
  158. // Within extended line
  159. if (ahdist <= closest_hdist)
  160. {
  161. closest_vdist = 0;
  162. closest_hdist = ahdist;
  163. closest_line = line;
  164. closest_idx = idx;
  165. }
  166. }
  167. else
  168. {
  169. // Outside line
  170. // TODO: closest column?
  171. if (avdist <= closest_vdist)
  172. {
  173. closest_vdist = avdist;
  174. closest_line = line;
  175. closest_idx = idx;
  176. }
  177. }
  178. }
  179. idx += line_length(line);
  180. }
  181. }
  182. if (closest_line)
  183. return find_closest_in_line(closest_line, closest_idx, q);
  184. return 0;
  185. }
  186. struct callbacks
  187. {
  188. void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch);
  189. void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln);
  190. void *arg;
  191. };
  192. static void
  193. fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb)
  194. {
  195. fz_stext_block *block;
  196. fz_stext_line *line;
  197. fz_stext_char *ch;
  198. int idx, start, end;
  199. int inside;
  200. start = find_closest_in_page(page, a);
  201. end = find_closest_in_page(page, b);
  202. if (start > end)
  203. idx = start, start = end, end = idx;
  204. if (start == end)
  205. return;
  206. inside = 0;
  207. idx = 0;
  208. for (block = page->first_block; block; block = block->next)
  209. {
  210. if (block->type != FZ_STEXT_BLOCK_TEXT)
  211. continue;
  212. for (line = block->u.t.first_line; line; line = line->next)
  213. {
  214. for (ch = line->first_char; ch; ch = ch->next)
  215. {
  216. if (!inside)
  217. if (idx == start)
  218. inside = 1;
  219. if (inside)
  220. cb->on_char(ctx, cb->arg, line, ch);
  221. if (++idx == end)
  222. return;
  223. }
  224. if (inside)
  225. cb->on_line(ctx, cb->arg, line);
  226. }
  227. }
  228. }
  229. fz_quad
  230. fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode)
  231. {
  232. fz_stext_block *block;
  233. fz_stext_line *line;
  234. fz_stext_char *ch;
  235. fz_quad handles;
  236. int idx, start, end;
  237. int pc;
  238. start = find_closest_in_page(page, *a);
  239. end = find_closest_in_page(page, *b);
  240. if (start > end)
  241. idx = start, start = end, end = idx;
  242. handles.ll = handles.ul = *a;
  243. handles.lr = handles.ur = *b;
  244. idx = 0;
  245. for (block = page->first_block; block; block = block->next)
  246. {
  247. if (block->type != FZ_STEXT_BLOCK_TEXT)
  248. continue;
  249. for (line = block->u.t.first_line; line; line = line->next)
  250. {
  251. pc = '\n';
  252. for (ch = line->first_char; ch; ch = ch->next)
  253. {
  254. if (idx <= start)
  255. {
  256. if (mode == FZ_SELECT_CHARS
  257. || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n'))
  258. || (mode == FZ_SELECT_LINES && (pc == '\n')))
  259. {
  260. handles.ll = ch->quad.ll;
  261. handles.ul = ch->quad.ul;
  262. *a = ch->origin;
  263. }
  264. }
  265. if (idx >= end)
  266. {
  267. if (mode == FZ_SELECT_CHARS
  268. || (mode == FZ_SELECT_WORDS && (ch->c == ' ')))
  269. {
  270. handles.lr = ch->quad.ll;
  271. handles.ur = ch->quad.ul;
  272. *b = ch->origin;
  273. return handles;
  274. }
  275. if (!ch->next)
  276. {
  277. handles.lr = ch->quad.lr;
  278. handles.ur = ch->quad.ur;
  279. *b = ch->quad.lr;
  280. return handles;
  281. }
  282. }
  283. pc = ch->c;
  284. ++idx;
  285. }
  286. }
  287. }
  288. return handles;
  289. }
  290. /* Highlight selection */
  291. struct highlight
  292. {
  293. int len, cap;
  294. fz_quad *box;
  295. float hfuzz, vfuzz;
  296. };
  297. int same_point(fz_point a, fz_point b)
  298. {
  299. int dx = fz_abs(a.x - b.x);
  300. int dy = fz_abs(a.y - b.y);
  301. return (dx < 0.1 && dy < 0.1);
  302. }
  303. int is_near(float hfuzz, float vfuzz, fz_point hdir, fz_point end, fz_point p1, fz_point p2)
  304. {
  305. float v = fz_abs(linedist(end, fz_make_point(-hdir.y, hdir.x), p1));
  306. float d1 = fz_abs(linedist(end, hdir, p1));
  307. float d2 = fz_abs(linedist(end, hdir, p2));
  308. return (v < vfuzz && d1 < hfuzz && d1 < d2);
  309. }
  310. static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
  311. {
  312. struct highlight *hits = arg;
  313. float vfuzz = hits->vfuzz * ch->size;
  314. float hfuzz = hits->hfuzz * ch->size;
  315. fz_point dir = line->dir;
  316. // Skip zero-extent quads
  317. if (same_point(ch->quad.ll, ch->quad.lr))
  318. return;
  319. if (hits->len > 0)
  320. {
  321. fz_quad *end = &hits->box[hits->len-1];
  322. if (is_near(hfuzz, vfuzz, dir, end->lr, ch->quad.ll, ch->quad.lr) &&
  323. is_near(hfuzz, vfuzz, dir, end->ur, ch->quad.ul, ch->quad.ur))
  324. {
  325. end->ur = ch->quad.ur;
  326. end->lr = ch->quad.lr;
  327. return;
  328. }
  329. if (is_near(hfuzz, vfuzz, dir, end->ll, ch->quad.lr, ch->quad.ll) &&
  330. is_near(hfuzz, vfuzz, dir, end->ul, ch->quad.ur, ch->quad.ul))
  331. {
  332. end->ul = ch->quad.ul;
  333. end->ll = ch->quad.ll;
  334. return;
  335. }
  336. }
  337. if (hits->len < hits->cap)
  338. hits->box[hits->len++] = ch->quad;
  339. }
  340. static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line)
  341. {
  342. }
  343. int
  344. fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads)
  345. {
  346. struct callbacks cb;
  347. struct highlight hits;
  348. hits.len = 0;
  349. hits.cap = max_quads;
  350. hits.box = quads;
  351. hits.hfuzz = 0.5f; /* merge large gaps */
  352. hits.vfuzz = 0.1f;
  353. cb.on_char = on_highlight_char;
  354. cb.on_line = on_highlight_line;
  355. cb.arg = &hits;
  356. fz_enumerate_selection(ctx, page, a, b, &cb);
  357. return hits.len;
  358. }
  359. /* Copy selection */
  360. static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
  361. {
  362. fz_buffer *buffer = arg;
  363. int c = ch->c;
  364. if (c < 32)
  365. c = FZ_REPLACEMENT_CHARACTER;
  366. fz_append_rune(ctx, buffer, c);
  367. }
  368. static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line)
  369. {
  370. fz_buffer *buffer = arg;
  371. fz_append_byte(ctx, buffer, '\r');
  372. fz_append_byte(ctx, buffer, '\n');
  373. }
  374. static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line)
  375. {
  376. fz_buffer *buffer = arg;
  377. fz_append_byte(ctx, buffer, '\n');
  378. }
  379. char *
  380. fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf)
  381. {
  382. struct callbacks cb;
  383. fz_buffer *buffer;
  384. unsigned char *s;
  385. buffer = fz_new_buffer(ctx, 1024);
  386. fz_try(ctx)
  387. {
  388. cb.on_char = on_copy_char;
  389. cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf;
  390. cb.arg = buffer;
  391. fz_enumerate_selection(ctx, page, a, b, &cb);
  392. fz_terminate_buffer(ctx, buffer);
  393. }
  394. fz_catch(ctx)
  395. {
  396. fz_drop_buffer(ctx, buffer);
  397. fz_rethrow(ctx);
  398. }
  399. fz_buffer_extract(ctx, buffer, &s); /* take over the data */
  400. fz_drop_buffer(ctx, buffer);
  401. return (char*)s;
  402. }
  403. char *
  404. fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf)
  405. {
  406. fz_stext_block *block;
  407. fz_stext_line *line;
  408. fz_stext_char *ch;
  409. fz_buffer *buffer;
  410. unsigned char *s;
  411. int need_new_line = 0;
  412. buffer = fz_new_buffer(ctx, 1024);
  413. fz_try(ctx)
  414. {
  415. for (block = page->first_block; block; block = block->next)
  416. {
  417. if (block->type != FZ_STEXT_BLOCK_TEXT)
  418. continue;
  419. for (line = block->u.t.first_line; line; line = line->next)
  420. {
  421. int line_had_text = 0;
  422. for (ch = line->first_char; ch; ch = ch->next)
  423. {
  424. fz_rect r = fz_rect_from_quad(ch->quad);
  425. if (!fz_is_empty_rect(fz_intersect_rect(r, area)))
  426. {
  427. line_had_text = 1;
  428. if (need_new_line)
  429. {
  430. fz_append_string(ctx, buffer, crlf ? "\r\n" : "\n");
  431. need_new_line = 0;
  432. }
  433. fz_append_rune(ctx, buffer, ch->c < 32 ? FZ_REPLACEMENT_CHARACTER : ch->c);
  434. }
  435. }
  436. if (line_had_text)
  437. need_new_line = 1;
  438. }
  439. }
  440. fz_terminate_buffer(ctx, buffer);
  441. }
  442. fz_catch(ctx)
  443. {
  444. fz_drop_buffer(ctx, buffer);
  445. fz_rethrow(ctx);
  446. }
  447. fz_buffer_extract(ctx, buffer, &s); /* take over the data */
  448. fz_drop_buffer(ctx, buffer);
  449. return (char*)s;
  450. }
  451. /* String search */
  452. static inline int canon(int c)
  453. {
  454. // Map full-width ASCII forms to ASCII:
  455. // U+FF01 .. U+FF5E => U+0021 .. U+007E
  456. if (c >= 0xFF01 && c <= 0xFF5E)
  457. c = c - 0xFF01 + 0x21;
  458. if (c == 0xA0 || c == 0x2028 || c == 0x2029)
  459. return ' ';
  460. if (c == '\r' || c == '\n' || c == '\t')
  461. return ' ';
  462. return fz_toupper(c);
  463. }
  464. static inline int chartocanon(int *c, const char *s)
  465. {
  466. int n = fz_chartorune(c, s);
  467. *c = canon(*c);
  468. return n;
  469. }
  470. static const char *match_string(const char *h, const char *n)
  471. {
  472. int hc, nc;
  473. const char *e = h;
  474. h += chartocanon(&hc, h);
  475. n += chartocanon(&nc, n);
  476. while (hc == nc)
  477. {
  478. e = h;
  479. if (hc == ' ')
  480. do
  481. h += chartocanon(&hc, h);
  482. while (hc == ' ');
  483. else
  484. h += chartocanon(&hc, h);
  485. if (nc == ' ')
  486. do
  487. n += chartocanon(&nc, n);
  488. while (nc == ' ');
  489. else
  490. n += chartocanon(&nc, n);
  491. }
  492. return nc == 0 ? e : NULL;
  493. }
  494. static const char *find_string(const char *s, const char *needle, const char **endp)
  495. {
  496. const char *end;
  497. while (*s)
  498. {
  499. end = match_string(s, needle);
  500. if (end)
  501. return *endp = end, s;
  502. ++s;
  503. }
  504. return *endp = NULL, NULL;
  505. }
  506. struct search_data
  507. {
  508. /* Number of hits so far.*/
  509. int count_quads;
  510. int count_hits;
  511. int max_quads;
  512. int quad_fill;
  513. fz_quad locals[32];
  514. fz_quad *quads;
  515. float hfuzz, vfuzz;
  516. fz_search_callback_fn *cb;
  517. void *opaque;
  518. };
  519. static int hit_char(fz_context *ctx, struct search_data *hits, fz_stext_line *line, fz_stext_char *ch, int is_at_start)
  520. {
  521. float vfuzz = ch->size * hits->vfuzz;
  522. float hfuzz = ch->size * hits->hfuzz;
  523. /* Can we continue an existing quad? */
  524. if (hits->quad_fill > 0 && !is_at_start)
  525. {
  526. fz_quad *end = &hits->quads[hits->quad_fill-1];
  527. if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz
  528. && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz
  529. && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz
  530. && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz)
  531. {
  532. /* Yes */
  533. end->ur = ch->quad.ur;
  534. end->lr = ch->quad.lr;
  535. return 0;
  536. }
  537. }
  538. if (is_at_start && hits->quad_fill > 0)
  539. {
  540. /* Send the quads we have queued. */
  541. hits->count_hits++;
  542. if (hits->cb && hits->cb(ctx, hits->opaque, hits->quad_fill, hits->quads))
  543. return 1;
  544. hits->quad_fill = 0;
  545. }
  546. if (hits->quad_fill == hits->max_quads)
  547. {
  548. int newmax = hits->max_quads * 2;
  549. if (hits->quads == hits->locals)
  550. {
  551. hits->quads = fz_malloc(ctx, sizeof(hits->quads[0]) * newmax);
  552. memcpy(hits->quads, hits->locals, sizeof(hits->quads[0]) * nelem(hits->locals));
  553. }
  554. else
  555. {
  556. hits->quads = fz_realloc(ctx, hits->quads, sizeof(hits->quads[0]) * newmax);
  557. }
  558. hits->max_quads = newmax;
  559. }
  560. hits->quads[hits->quad_fill++] = ch->quad;
  561. hits->count_quads++;
  562. return 0;
  563. }
  564. int
  565. fz_search_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_search_callback_fn *cb, void *opaque)
  566. {
  567. struct search_data hits;
  568. fz_stext_block *block;
  569. fz_stext_line *line;
  570. fz_stext_char *ch;
  571. fz_buffer *buffer;
  572. const char *haystack, *begin, *end;
  573. int c, inside;
  574. if (strlen(needle) == 0)
  575. return 0;
  576. hits.count_quads = 0;
  577. hits.count_hits = 0;
  578. hits.quad_fill = 0;
  579. hits.max_quads = nelem(hits.locals);
  580. hits.quads = hits.locals;
  581. hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
  582. hits.vfuzz = 0.1f;
  583. hits.cb = cb;
  584. hits.opaque = opaque;
  585. buffer = fz_new_buffer_from_stext_page(ctx, page);
  586. fz_try(ctx)
  587. {
  588. haystack = fz_string_from_buffer(ctx, buffer);
  589. begin = find_string(haystack, needle, &end);
  590. if (!begin)
  591. goto no_more_matches;
  592. inside = 0;
  593. for (block = page->first_block; block; block = block->next)
  594. {
  595. if (block->type != FZ_STEXT_BLOCK_TEXT)
  596. continue;
  597. for (line = block->u.t.first_line; line; line = line->next)
  598. {
  599. for (ch = line->first_char; ch; ch = ch->next)
  600. {
  601. try_new_match:
  602. if (!inside)
  603. {
  604. if (haystack >= begin)
  605. inside = 1;
  606. }
  607. if (inside)
  608. {
  609. if (haystack < end)
  610. {
  611. if (hit_char(ctx, &hits, line, ch, haystack == begin))
  612. goto no_more_matches;
  613. }
  614. else
  615. {
  616. inside = 0;
  617. begin = find_string(haystack, needle, &end);
  618. if (!begin)
  619. goto no_more_matches;
  620. else
  621. goto try_new_match;
  622. }
  623. }
  624. haystack += fz_chartorune(&c, haystack);
  625. }
  626. assert(*haystack == '\n');
  627. ++haystack;
  628. }
  629. assert(*haystack == '\n');
  630. ++haystack;
  631. }
  632. no_more_matches:
  633. /* Send the quads we have queued. */
  634. if (hits.quad_fill)
  635. {
  636. hits.count_hits++;
  637. if (hits.cb)
  638. (void)hits.cb(ctx, hits.opaque, hits.quad_fill, hits.quads);
  639. }
  640. }
  641. fz_always(ctx)
  642. {
  643. fz_drop_buffer(ctx, buffer);
  644. if (hits.quads != hits.locals)
  645. fz_free(ctx, hits.quads);
  646. }
  647. fz_catch(ctx)
  648. fz_rethrow(ctx);
  649. return hits.count_hits;
  650. }
  651. typedef struct
  652. {
  653. int *hit_mark;
  654. fz_quad *quads;
  655. int max_quads;
  656. int fill;
  657. int hit;
  658. } oldsearch_data;
  659. static int
  660. oldsearch_cb(fz_context *ctx, void *opaque, int num_quads, fz_quad *quads)
  661. {
  662. oldsearch_data *data = (oldsearch_data *)opaque;
  663. int i;
  664. int hit = data->hit++;
  665. for (i = 0; i < num_quads; i++)
  666. {
  667. if (data->fill == data->max_quads)
  668. break;
  669. if (data->hit_mark)
  670. data->hit_mark[data->fill] = hit;
  671. data->quads[data->fill] = quads[i];
  672. data->fill++;
  673. }
  674. /* We never return 1 here, even if we fill up the buffer, as we
  675. * want the old API to get the correct total number of quads. */
  676. return 0;
  677. }
  678. int
  679. fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, int *hit_mark, fz_quad *quads, int max_quads)
  680. {
  681. oldsearch_data data;
  682. data.hit_mark = hit_mark;
  683. data.quads = quads;
  684. data.max_quads = max_quads;
  685. data.fill = 0;
  686. data.hit = 0;
  687. (void)fz_search_stext_page_cb(ctx, page, needle, oldsearch_cb, &data);
  688. return data.fill; /* Return the number of quads we have read */
  689. }