| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782 |
- // Copyright (C) 2004-2024 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- #include "mupdf/fitz.h"
- #include <string.h>
- #include <limits.h>
- #include <assert.h>
- /* Enumerate marked selection */
- static float hdist(fz_point *dir, fz_point *a, fz_point *b)
- {
- float dx = b->x - a->x;
- float dy = b->y - a->y;
- return fz_abs(dx * dir->x - dy * dir->y);
- }
- static float vdist(fz_point *dir, fz_point *a, fz_point *b)
- {
- float dx = b->x - a->x;
- float dy = b->y - a->y;
- return fz_abs(dx * dir->y - dy * dir->x);
- }
- static float vecdot(fz_point a, fz_point b)
- {
- return a.x * b.x + a.y * b.y;
- }
- static float linedist(fz_point origin, fz_point dir, fz_point q)
- {
- return vecdot(dir, fz_make_point(q.x - origin.x, q.y - origin.y));
- }
- static int line_length(fz_stext_line *line)
- {
- fz_stext_char *ch;
- int n = 0;
- for (ch = line->first_char; ch; ch = ch->next)
- ++n;
- return n;
- }
- static float largest_size_in_line(fz_stext_line *line)
- {
- fz_stext_char *ch;
- float size = 0;
- for (ch = line->first_char; ch; ch = ch->next)
- if (ch->size > size)
- size = ch->size;
- return size;
- }
- static int find_closest_in_line(fz_stext_line *line, int idx, fz_point q)
- {
- fz_stext_char *ch;
- float closest_dist = 1e30f;
- int closest_idx = idx;
- float d1, d2;
- float hsize = largest_size_in_line(line) / 2;
- fz_point vdir = fz_make_point(-line->dir.y, line->dir.x);
- fz_point hdir = line->dir;
- // Compute mid-line from quads!
- fz_point p1 = fz_make_point(
- (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2,
- (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2
- );
- // Signed distance perpendicular mid-line (positive is below)
- float vdist = linedist(p1, vdir, q);
- if (vdist < -hsize)
- return idx;
- if (vdist > hsize)
- return idx + line_length(line);
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (ch->bidi & 1)
- {
- d1 = fz_abs(linedist(ch->quad.lr, hdir, q));
- d2 = fz_abs(linedist(ch->quad.ll, hdir, q));
- }
- else
- {
- d1 = fz_abs(linedist(ch->quad.ll, hdir, q));
- d2 = fz_abs(linedist(ch->quad.lr, hdir, q));
- }
- if (d1 < closest_dist)
- {
- closest_dist = d1;
- closest_idx = idx;
- }
- if (d2 < closest_dist)
- {
- closest_dist = d2;
- closest_idx = idx + 1;
- }
- ++idx;
- }
- return closest_idx;
- }
- static int find_closest_in_page(fz_stext_page *page, fz_point q)
- {
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_line *closest_line = NULL;
- int closest_idx = 0;
- float closest_vdist = 1e30f;
- float closest_hdist = 1e30f;
- int idx = 0;
- for (block = page->first_block; block; block = block->next)
- {
- if (block->type != FZ_STEXT_BLOCK_TEXT)
- continue;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- float hsize = largest_size_in_line(line) / 2;
- fz_point hdir = line->dir;
- fz_point vdir = fz_make_point(-line->dir.y, line->dir.x);
- // Compute mid-line from quads!
- fz_point p1 = fz_make_point(
- (line->first_char->quad.ll.x + line->first_char->quad.ul.x) / 2,
- (line->first_char->quad.ll.y + line->first_char->quad.ul.y) / 2
- );
- fz_point p2 = fz_make_point(
- (line->last_char->quad.lr.x + line->last_char->quad.ur.x) / 2,
- (line->last_char->quad.lr.y + line->last_char->quad.ur.y) / 2
- );
- // Signed distance perpendicular mid-line (positive is below)
- float vdist = linedist(p1, vdir, q);
- // Signed distance tangent to mid-line from end points (positive is to end)
- float hdist1 = linedist(p1, hdir, q);
- float hdist2 = linedist(p2, hdir, q);
- // Within the line itself!
- if (vdist >= -hsize && vdist <= hsize && (hdist1 > 0) != (hdist2 > 0))
- {
- closest_vdist = 0;
- closest_hdist = 0;
- closest_line = line;
- closest_idx = idx;
- }
- else
- {
- // Vertical distance from mid-line.
- float avdist = fz_abs(vdist);
- // Horizontal distance from closest end-point
- float ahdist = fz_min(fz_abs(hdist1), fz_abs(hdist2));
- if (avdist < hsize)
- {
- // Within extended line
- if (ahdist <= closest_hdist)
- {
- closest_vdist = 0;
- closest_hdist = ahdist;
- closest_line = line;
- closest_idx = idx;
- }
- }
- else
- {
- // Outside line
- // TODO: closest column?
- if (avdist <= closest_vdist)
- {
- closest_vdist = avdist;
- closest_line = line;
- closest_idx = idx;
- }
- }
- }
- idx += line_length(line);
- }
- }
- if (closest_line)
- return find_closest_in_line(closest_line, closest_idx, q);
- return 0;
- }
- struct callbacks
- {
- void (*on_char)(fz_context *ctx, void *arg, fz_stext_line *ln, fz_stext_char *ch);
- void (*on_line)(fz_context *ctx, void *arg, fz_stext_line *ln);
- void *arg;
- };
- static void
- fz_enumerate_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, struct callbacks *cb)
- {
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_char *ch;
- int idx, start, end;
- int inside;
- start = find_closest_in_page(page, a);
- end = find_closest_in_page(page, b);
- if (start > end)
- idx = start, start = end, end = idx;
- if (start == end)
- return;
- inside = 0;
- idx = 0;
- for (block = page->first_block; block; block = block->next)
- {
- if (block->type != FZ_STEXT_BLOCK_TEXT)
- continue;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (!inside)
- if (idx == start)
- inside = 1;
- if (inside)
- cb->on_char(ctx, cb->arg, line, ch);
- if (++idx == end)
- return;
- }
- if (inside)
- cb->on_line(ctx, cb->arg, line);
- }
- }
- }
- fz_quad
- fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *a, fz_point *b, int mode)
- {
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_char *ch;
- fz_quad handles;
- int idx, start, end;
- int pc;
- start = find_closest_in_page(page, *a);
- end = find_closest_in_page(page, *b);
- if (start > end)
- idx = start, start = end, end = idx;
- handles.ll = handles.ul = *a;
- handles.lr = handles.ur = *b;
- idx = 0;
- for (block = page->first_block; block; block = block->next)
- {
- if (block->type != FZ_STEXT_BLOCK_TEXT)
- continue;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- pc = '\n';
- for (ch = line->first_char; ch; ch = ch->next)
- {
- if (idx <= start)
- {
- if (mode == FZ_SELECT_CHARS
- || (mode == FZ_SELECT_WORDS && (pc == ' ' || pc == '\n'))
- || (mode == FZ_SELECT_LINES && (pc == '\n')))
- {
- handles.ll = ch->quad.ll;
- handles.ul = ch->quad.ul;
- *a = ch->origin;
- }
- }
- if (idx >= end)
- {
- if (mode == FZ_SELECT_CHARS
- || (mode == FZ_SELECT_WORDS && (ch->c == ' ')))
- {
- handles.lr = ch->quad.ll;
- handles.ur = ch->quad.ul;
- *b = ch->origin;
- return handles;
- }
- if (!ch->next)
- {
- handles.lr = ch->quad.lr;
- handles.ur = ch->quad.ur;
- *b = ch->quad.lr;
- return handles;
- }
- }
- pc = ch->c;
- ++idx;
- }
- }
- }
- return handles;
- }
- /* Highlight selection */
- struct highlight
- {
- int len, cap;
- fz_quad *box;
- float hfuzz, vfuzz;
- };
- int same_point(fz_point a, fz_point b)
- {
- int dx = fz_abs(a.x - b.x);
- int dy = fz_abs(a.y - b.y);
- return (dx < 0.1 && dy < 0.1);
- }
- int is_near(float hfuzz, float vfuzz, fz_point hdir, fz_point end, fz_point p1, fz_point p2)
- {
- float v = fz_abs(linedist(end, fz_make_point(-hdir.y, hdir.x), p1));
- float d1 = fz_abs(linedist(end, hdir, p1));
- float d2 = fz_abs(linedist(end, hdir, p2));
- return (v < vfuzz && d1 < hfuzz && d1 < d2);
- }
- static void on_highlight_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
- {
- struct highlight *hits = arg;
- float vfuzz = hits->vfuzz * ch->size;
- float hfuzz = hits->hfuzz * ch->size;
- fz_point dir = line->dir;
- // Skip zero-extent quads
- if (same_point(ch->quad.ll, ch->quad.lr))
- return;
- if (hits->len > 0)
- {
- fz_quad *end = &hits->box[hits->len-1];
- if (is_near(hfuzz, vfuzz, dir, end->lr, ch->quad.ll, ch->quad.lr) &&
- is_near(hfuzz, vfuzz, dir, end->ur, ch->quad.ul, ch->quad.ur))
- {
- end->ur = ch->quad.ur;
- end->lr = ch->quad.lr;
- return;
- }
- if (is_near(hfuzz, vfuzz, dir, end->ll, ch->quad.lr, ch->quad.ll) &&
- is_near(hfuzz, vfuzz, dir, end->ul, ch->quad.ur, ch->quad.ul))
- {
- end->ul = ch->quad.ul;
- end->ll = ch->quad.ll;
- return;
- }
- }
- if (hits->len < hits->cap)
- hits->box[hits->len++] = ch->quad;
- }
- static void on_highlight_line(fz_context *ctx, void *arg, fz_stext_line *line)
- {
- }
- int
- fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads)
- {
- struct callbacks cb;
- struct highlight hits;
- hits.len = 0;
- hits.cap = max_quads;
- hits.box = quads;
- hits.hfuzz = 0.5f; /* merge large gaps */
- hits.vfuzz = 0.1f;
- cb.on_char = on_highlight_char;
- cb.on_line = on_highlight_line;
- cb.arg = &hits;
- fz_enumerate_selection(ctx, page, a, b, &cb);
- return hits.len;
- }
- /* Copy selection */
- static void on_copy_char(fz_context *ctx, void *arg, fz_stext_line *line, fz_stext_char *ch)
- {
- fz_buffer *buffer = arg;
- int c = ch->c;
- if (c < 32)
- c = FZ_REPLACEMENT_CHARACTER;
- fz_append_rune(ctx, buffer, c);
- }
- static void on_copy_line_crlf(fz_context *ctx, void *arg, fz_stext_line *line)
- {
- fz_buffer *buffer = arg;
- fz_append_byte(ctx, buffer, '\r');
- fz_append_byte(ctx, buffer, '\n');
- }
- static void on_copy_line_lf(fz_context *ctx, void *arg, fz_stext_line *line)
- {
- fz_buffer *buffer = arg;
- fz_append_byte(ctx, buffer, '\n');
- }
- char *
- fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf)
- {
- struct callbacks cb;
- fz_buffer *buffer;
- unsigned char *s;
- buffer = fz_new_buffer(ctx, 1024);
- fz_try(ctx)
- {
- cb.on_char = on_copy_char;
- cb.on_line = crlf ? on_copy_line_crlf : on_copy_line_lf;
- cb.arg = buffer;
- fz_enumerate_selection(ctx, page, a, b, &cb);
- fz_terminate_buffer(ctx, buffer);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, buffer);
- fz_rethrow(ctx);
- }
- fz_buffer_extract(ctx, buffer, &s); /* take over the data */
- fz_drop_buffer(ctx, buffer);
- return (char*)s;
- }
- char *
- fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf)
- {
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_char *ch;
- fz_buffer *buffer;
- unsigned char *s;
- int need_new_line = 0;
- buffer = fz_new_buffer(ctx, 1024);
- fz_try(ctx)
- {
- for (block = page->first_block; block; block = block->next)
- {
- if (block->type != FZ_STEXT_BLOCK_TEXT)
- continue;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- int line_had_text = 0;
- for (ch = line->first_char; ch; ch = ch->next)
- {
- fz_rect r = fz_rect_from_quad(ch->quad);
- if (!fz_is_empty_rect(fz_intersect_rect(r, area)))
- {
- line_had_text = 1;
- if (need_new_line)
- {
- fz_append_string(ctx, buffer, crlf ? "\r\n" : "\n");
- need_new_line = 0;
- }
- fz_append_rune(ctx, buffer, ch->c < 32 ? FZ_REPLACEMENT_CHARACTER : ch->c);
- }
- }
- if (line_had_text)
- need_new_line = 1;
- }
- }
- fz_terminate_buffer(ctx, buffer);
- }
- fz_catch(ctx)
- {
- fz_drop_buffer(ctx, buffer);
- fz_rethrow(ctx);
- }
- fz_buffer_extract(ctx, buffer, &s); /* take over the data */
- fz_drop_buffer(ctx, buffer);
- return (char*)s;
- }
- /* String search */
- static inline int canon(int c)
- {
- // Map full-width ASCII forms to ASCII:
- // U+FF01 .. U+FF5E => U+0021 .. U+007E
- if (c >= 0xFF01 && c <= 0xFF5E)
- c = c - 0xFF01 + 0x21;
- if (c == 0xA0 || c == 0x2028 || c == 0x2029)
- return ' ';
- if (c == '\r' || c == '\n' || c == '\t')
- return ' ';
- return fz_toupper(c);
- }
- static inline int chartocanon(int *c, const char *s)
- {
- int n = fz_chartorune(c, s);
- *c = canon(*c);
- return n;
- }
- static const char *match_string(const char *h, const char *n)
- {
- int hc, nc;
- const char *e = h;
- h += chartocanon(&hc, h);
- n += chartocanon(&nc, n);
- while (hc == nc)
- {
- e = h;
- if (hc == ' ')
- do
- h += chartocanon(&hc, h);
- while (hc == ' ');
- else
- h += chartocanon(&hc, h);
- if (nc == ' ')
- do
- n += chartocanon(&nc, n);
- while (nc == ' ');
- else
- n += chartocanon(&nc, n);
- }
- return nc == 0 ? e : NULL;
- }
- static const char *find_string(const char *s, const char *needle, const char **endp)
- {
- const char *end;
- while (*s)
- {
- end = match_string(s, needle);
- if (end)
- return *endp = end, s;
- ++s;
- }
- return *endp = NULL, NULL;
- }
- struct search_data
- {
- /* Number of hits so far.*/
- int count_quads;
- int count_hits;
- int max_quads;
- int quad_fill;
- fz_quad locals[32];
- fz_quad *quads;
- float hfuzz, vfuzz;
- fz_search_callback_fn *cb;
- void *opaque;
- };
- static int hit_char(fz_context *ctx, struct search_data *hits, fz_stext_line *line, fz_stext_char *ch, int is_at_start)
- {
- float vfuzz = ch->size * hits->vfuzz;
- float hfuzz = ch->size * hits->hfuzz;
- /* Can we continue an existing quad? */
- if (hits->quad_fill > 0 && !is_at_start)
- {
- fz_quad *end = &hits->quads[hits->quad_fill-1];
- if (hdist(&line->dir, &end->lr, &ch->quad.ll) < hfuzz
- && vdist(&line->dir, &end->lr, &ch->quad.ll) < vfuzz
- && hdist(&line->dir, &end->ur, &ch->quad.ul) < hfuzz
- && vdist(&line->dir, &end->ur, &ch->quad.ul) < vfuzz)
- {
- /* Yes */
- end->ur = ch->quad.ur;
- end->lr = ch->quad.lr;
- return 0;
- }
- }
- if (is_at_start && hits->quad_fill > 0)
- {
- /* Send the quads we have queued. */
- hits->count_hits++;
- if (hits->cb && hits->cb(ctx, hits->opaque, hits->quad_fill, hits->quads))
- return 1;
- hits->quad_fill = 0;
- }
- if (hits->quad_fill == hits->max_quads)
- {
- int newmax = hits->max_quads * 2;
- if (hits->quads == hits->locals)
- {
- hits->quads = fz_malloc(ctx, sizeof(hits->quads[0]) * newmax);
- memcpy(hits->quads, hits->locals, sizeof(hits->quads[0]) * nelem(hits->locals));
- }
- else
- {
- hits->quads = fz_realloc(ctx, hits->quads, sizeof(hits->quads[0]) * newmax);
- }
- hits->max_quads = newmax;
- }
- hits->quads[hits->quad_fill++] = ch->quad;
- hits->count_quads++;
- return 0;
- }
- int
- fz_search_stext_page_cb(fz_context *ctx, fz_stext_page *page, const char *needle, fz_search_callback_fn *cb, void *opaque)
- {
- struct search_data hits;
- fz_stext_block *block;
- fz_stext_line *line;
- fz_stext_char *ch;
- fz_buffer *buffer;
- const char *haystack, *begin, *end;
- int c, inside;
- if (strlen(needle) == 0)
- return 0;
- hits.count_quads = 0;
- hits.count_hits = 0;
- hits.quad_fill = 0;
- hits.max_quads = nelem(hits.locals);
- hits.quads = hits.locals;
- hits.hfuzz = 0.2f; /* merge kerns but not large gaps */
- hits.vfuzz = 0.1f;
- hits.cb = cb;
- hits.opaque = opaque;
- buffer = fz_new_buffer_from_stext_page(ctx, page);
- fz_try(ctx)
- {
- haystack = fz_string_from_buffer(ctx, buffer);
- begin = find_string(haystack, needle, &end);
- if (!begin)
- goto no_more_matches;
- inside = 0;
- for (block = page->first_block; block; block = block->next)
- {
- if (block->type != FZ_STEXT_BLOCK_TEXT)
- continue;
- for (line = block->u.t.first_line; line; line = line->next)
- {
- for (ch = line->first_char; ch; ch = ch->next)
- {
- try_new_match:
- if (!inside)
- {
- if (haystack >= begin)
- inside = 1;
- }
- if (inside)
- {
- if (haystack < end)
- {
- if (hit_char(ctx, &hits, line, ch, haystack == begin))
- goto no_more_matches;
- }
- else
- {
- inside = 0;
- begin = find_string(haystack, needle, &end);
- if (!begin)
- goto no_more_matches;
- else
- goto try_new_match;
- }
- }
- haystack += fz_chartorune(&c, haystack);
- }
- assert(*haystack == '\n');
- ++haystack;
- }
- assert(*haystack == '\n');
- ++haystack;
- }
- no_more_matches:
- /* Send the quads we have queued. */
- if (hits.quad_fill)
- {
- hits.count_hits++;
- if (hits.cb)
- (void)hits.cb(ctx, hits.opaque, hits.quad_fill, hits.quads);
- }
- }
- fz_always(ctx)
- {
- fz_drop_buffer(ctx, buffer);
- if (hits.quads != hits.locals)
- fz_free(ctx, hits.quads);
- }
- fz_catch(ctx)
- fz_rethrow(ctx);
- return hits.count_hits;
- }
- typedef struct
- {
- int *hit_mark;
- fz_quad *quads;
- int max_quads;
- int fill;
- int hit;
- } oldsearch_data;
- static int
- oldsearch_cb(fz_context *ctx, void *opaque, int num_quads, fz_quad *quads)
- {
- oldsearch_data *data = (oldsearch_data *)opaque;
- int i;
- int hit = data->hit++;
- for (i = 0; i < num_quads; i++)
- {
- if (data->fill == data->max_quads)
- break;
- if (data->hit_mark)
- data->hit_mark[data->fill] = hit;
- data->quads[data->fill] = quads[i];
- data->fill++;
- }
- /* We never return 1 here, even if we fill up the buffer, as we
- * want the old API to get the correct total number of quads. */
- return 0;
- }
- int
- fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, int *hit_mark, fz_quad *quads, int max_quads)
- {
- oldsearch_data data;
- data.hit_mark = hit_mark;
- data.quads = quads;
- data.max_quads = max_quads;
- data.fill = 0;
- data.hit = 0;
- (void)fz_search_stext_page_cb(ctx, page, needle, oldsearch_cb, &data);
- return data.fill; /* Return the number of quads we have read */
- }
|