| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897 |
- // Copyright (C) 2004-2025 Artifex Software, Inc.
- //
- // This file is part of MuPDF.
- //
- // MuPDF is free software: you can redistribute it and/or modify it under the
- // terms of the GNU Affero General Public License as published by the Free
- // Software Foundation, either version 3 of the License, or (at your option)
- // any later version.
- //
- // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
- // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
- // details.
- //
- // You should have received a copy of the GNU Affero General Public License
- // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
- //
- // Alternative licensing terms are available from the licensor.
- // For commercial licensing, see <https://www.artifex.com/> or contact
- // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
- // CA 94129, USA, for further information.
- /*
- This code does smooth scaling of a pixmap.
- This function returns a new pixmap representing the area starting at (0,0)
- given by taking the source pixmap src, scaling it to width w, and height h,
- and then positioning it at (frac(x),frac(y)).
- This is a cut-down version of draw_scale.c that only copes with filters
- that return values strictly in the 0..1 range, and uses bytes for
- intermediate results rather than ints.
- */
- #include "mupdf/fitz.h"
- #include "draw-imp.h"
- #include "pixmap-imp.h"
- #include <math.h>
- #include <string.h>
- #include <assert.h>
- #include <limits.h>
- /* Do we special case handling of single pixel high/wide images? The
- * 'purest' handling is given by not special casing them, but certain
- * files that use such images 'stack' them to give full images. Not
- * special casing them results in then being fainter and giving noticeable
- * rounding errors.
- */
- #define SINGLE_PIXEL_SPECIALS
- /*
- Consider a row of source samples, src, of width src_w, positioned at x,
- scaled to width dst_w.
- src[i] is centred at: x + (i + 0.5)*dst_w/src_w
- Therefore the distance between the centre of the jth output pixel and
- the centre of the ith source sample is:
- dist[j,i] = j + 0.5 - (x + (i + 0.5)*dst_w/src_w)
- When scaling up, therefore:
- dst[j] = SUM(filter(dist[j,i]) * src[i])
- (for all ints i)
- This can be simplified by noticing that filters are only non zero within
- a given filter width (henceforth called W). So:
- dst[j] = SUM(filter(dist[j,i]) * src[i])
- (for ints i, s.t. (j*src_w/dst_w)-W < i < (j*src_w/dst_w)+W)
- When scaling down, each filtered source sample is stretched to be wider
- to avoid aliasing issues. This effectively reduces the distance between
- centres.
- dst[j] = SUM(filter(dist[j,i] * F) * F * src[i])
- (where F = dst_w/src_w)
- (for ints i, s.t. (j-W)/F < i < (j+W)/F)
- */
- typedef struct fz_scale_filter
- {
- int width;
- float (*fn)(struct fz_scale_filter *, float);
- } fz_scale_filter;
- /* Image scale filters */
- static float
- triangle(fz_scale_filter *filter, float f)
- {
- if (f >= 1)
- return 0;
- return 1-f;
- }
- static float
- box(fz_scale_filter *filter, float f)
- {
- if (f >= 0.5f)
- return 0;
- return 1;
- }
- static float
- simple(fz_scale_filter *filter, float x)
- {
- if (x >= 1)
- return 0;
- return 1 + (2*x - 3)*x*x;
- }
- fz_scale_filter fz_scale_filter_box = { 1, box };
- fz_scale_filter fz_scale_filter_triangle = { 1, triangle };
- fz_scale_filter fz_scale_filter_simple = { 1, simple };
- /*
- We build ourselves a set of tables to contain the precalculated weights
- for a given set of scale settings.
- The first dst_w entries in index are the index into index of the
- sets of weight for each destination pixel.
- Each of the sets of weights is a set of values consisting of:
- the minimum source pixel index used for this destination pixel
- the number of weights used for this destination pixel
- the weights themselves
- So to calculate dst[i] we do the following:
- weights = &index[index[i]];
- min = *weights++;
- len = *weights++;
- dst[i] = 0;
- while (--len > 0)
- dst[i] += src[min++] * *weights++
- in addition, we guarantee that at the end of this process weights will now
- point to the weights value for dst pixel i+1.
- In the simplest version of this algorithm, we would scale the whole image
- horizontally first into a temporary buffer, then scale that temporary
- buffer again vertically to give us our result. Using such a simple
- algorithm would mean that could use the same style of weights for both
- horizontal and vertical scaling.
- Unfortunately, this would also require a large temporary buffer,
- particularly in the case where we are scaling up.
- We therefore modify the algorithm as follows; we scale scanlines from the
- source image horizontally into a temporary buffer, until we have all the
- contributors for a given output scanline. We then produce that output
- scanline from the temporary buffer. In this way we restrict the height
- of the temporary buffer to a small fraction of the final size.
- Unfortunately, this means that the pseudo code for recombining a
- scanline of fully scaled pixels is as follows:
- weights = &index[index[y]];
- min = *weights++;
- len = *weights++;
- for (x=0 to dst_w)
- min2 = min
- len2 = len
- weights2 = weights
- dst[x] = 0;
- while (--len2 > 0)
- dst[x] += temp[x][(min2++) % tmp_buf_height] * *weights2++
- i.e. it requires a % operation for every source pixel - this is typically
- expensive.
- To avoid this, we alter the order in which vertical weights are stored,
- so that they are ordered in the same order as the temporary buffer lines
- would appear. This simplifies the algorithm to:
- weights = &index[index[y]];
- min = *weights++;
- len = *weights++;
- for (x=0 to dst_w)
- min2 = 0
- len2 = len
- weights2 = weights
- dst[x] = 0;
- while (--len2 > 0)
- dst[x] += temp[i][min2++] * *weights2++
- This means that len may be larger than it needs to be (due to the
- possible inclusion of a zero weight row or two), but in practise this
- is only an increase of 1 or 2 at worst.
- We implement this by generating the weights as normal (but ensuring we
- leave enough space) and then reordering afterwards.
- */
- /* This structure is accessed from ARM code - bear this in mind before
- * altering it! */
- typedef struct
- {
- int flip; /* true if outputting reversed */
- int count; /* number of output pixels we have records for in this table */
- int max_len; /* Maximum number of weights for any one output pixel */
- int n; /* number of components (src->n) */
- int new_line; /* True if no weights for the current output pixel */
- int patch_l; /* How many output pixels we skip over */
- int index[FZ_FLEXIBLE_ARRAY];
- } fz_weights;
- struct fz_scale_cache
- {
- int src_w;
- float x;
- float dst_w;
- fz_scale_filter *filter;
- int vertical;
- int dst_w_int;
- int patch_l;
- int patch_r;
- int n;
- int flip;
- fz_weights *weights;
- };
- static fz_weights *
- new_weights(fz_context *ctx, fz_scale_filter *filter, int src_w, float dst_w, int patch_w, int n, int flip, int patch_l)
- {
- int max_len;
- fz_weights *weights;
- if (src_w > dst_w)
- {
- /* Scaling down, so there will be a maximum of
- * 2*filterwidth*src_w/dst_w src pixels
- * contributing to each dst pixel. */
- max_len = (int)ceilf((2 * filter->width * src_w)/dst_w);
- if (max_len > src_w)
- max_len = src_w;
- }
- else
- {
- /* Scaling up, so there will be a maximum of
- * 2*filterwidth src pixels contributing to each dst pixel.
- */
- max_len = 2 * filter->width;
- }
- /* We need the size of the struct,
- * plus patch_w*sizeof(int) for the index
- * plus (2+max_len)*sizeof(int) for the weights
- * plus room for an extra set of weights for reordering.
- */
- weights = fz_malloc_flexible(ctx, fz_weights, index, (max_len+3) * (patch_w+1));
- if (!weights)
- return NULL;
- weights->count = -1;
- weights->max_len = max_len;
- weights->index[0] = patch_w;
- weights->n = n;
- weights->patch_l = patch_l;
- weights->flip = flip;
- return weights;
- }
- /* j is destination pixel in the patch_l..patch_l+patch_w range */
- static void
- init_weights(fz_weights *weights, int j)
- {
- int index;
- j -= weights->patch_l;
- assert(weights->count == j-1);
- weights->count++;
- weights->new_line = 1;
- if (j == 0)
- index = weights->index[0];
- else
- {
- index = weights->index[j-1];
- index += 2 + weights->index[index+1];
- }
- weights->index[j] = index; /* row pointer */
- weights->index[index] = 0; /* min */
- weights->index[index+1] = 0; /* len */
- }
- static void
- insert_weight(fz_weights *weights, int j, int i, int weight)
- {
- int min, len, index;
- /* Move j from patch_l...patch_l+patch_w range to 0..patch_w range */
- j -= weights->patch_l;
- if (weights->new_line)
- {
- /* New line */
- weights->new_line = 0;
- index = weights->index[j]; /* row pointer */
- weights->index[index] = i; /* min */
- weights->index[index+1] = 0; /* len */
- }
- index = weights->index[j];
- min = weights->index[index++];
- len = weights->index[index++];
- while (i < min)
- {
- /* This only happens in rare cases, but we need to insert
- * one earlier. In exceedingly rare cases we may need to
- * insert more than one earlier. */
- int k;
- for (k = len; k > 0; k--)
- {
- weights->index[index+k] = weights->index[index+k-1];
- }
- weights->index[index] = 0;
- min--;
- len++;
- weights->index[index-2] = min;
- weights->index[index-1] = len;
- }
- if (i-min >= len)
- {
- /* The usual case */
- while (i-min >= ++len)
- {
- weights->index[index+len-1] = 0;
- }
- assert(len-1 == i-min);
- weights->index[index+i-min] = weight;
- weights->index[index-1] = len;
- assert(len <= weights->max_len);
- }
- else
- {
- /* Infrequent case */
- weights->index[index+i-min] += weight;
- }
- }
- static void
- add_weight(fz_weights *weights, int j, int i, fz_scale_filter *filter,
- float x, float F, float G, int src_w, float dst_w)
- {
- float dist = j - x + 0.5f - ((i + 0.5f)*dst_w/src_w);
- float f;
- int weight;
- dist *= G;
- if (dist < 0)
- dist = -dist;
- f = filter->fn(filter, dist)*F;
- weight = (int)(256*f+0.5f);
- /* Ensure i is in range */
- if (i < 0 || i >= src_w)
- return;
- if (weight != 0)
- insert_weight(weights, j, i, weight);
- }
- static void
- reorder_weights(fz_weights *weights, int j, int src_w)
- {
- int idx = weights->index[j - weights->patch_l];
- int min = weights->index[idx++];
- int len = weights->index[idx++];
- int max = weights->max_len;
- int tmp = idx+max;
- int i, off;
- /* Copy into the temporary area */
- memcpy(&weights->index[tmp], &weights->index[idx], sizeof(int)*len);
- /* Pad out if required */
- assert(len <= max);
- assert(min+len <= src_w);
- off = 0;
- if (len < max)
- {
- memset(&weights->index[tmp+len], 0, sizeof(int)*(max-len));
- len = max;
- if (min + len > src_w)
- {
- off = min + len - src_w;
- min = src_w - len;
- weights->index[idx-2] = min;
- }
- weights->index[idx-1] = len;
- }
- /* Copy back into the proper places */
- for (i = 0; i < len; i++)
- {
- weights->index[idx+((min+i+off) % max)] = weights->index[tmp+i];
- }
- }
- /* Due to rounding and edge effects, the sums for the weights sometimes don't
- * add up to 256. This causes visible rendering effects. Therefore, we take
- * pains to ensure that they 1) never exceed 256, and 2) add up to exactly
- * 256 for all pixels that are completely covered. See bug #691629. */
- static void
- check_weights(fz_weights *weights, int j, int w, float x, float wf)
- {
- int idx, len;
- int sum = 0;
- int max = -256;
- int maxidx = 0;
- int i;
- idx = weights->index[j - weights->patch_l];
- idx++; /* min */
- len = weights->index[idx++];
- for(i=0; i < len; i++)
- {
- int v = weights->index[idx++];
- sum += v;
- if (v > max)
- {
- max = v;
- maxidx = idx;
- }
- }
- /* If we aren't the first or last pixel, OR if the sum is too big
- * then adjust it. */
- if (((j != 0) && (j != w-1)) || (sum > 256))
- weights->index[maxidx-1] += 256-sum;
- /* Otherwise, if we are the first pixel, and it's fully covered, then
- * adjust it. */
- else if ((j == 0) && (x < 0.0001f) && (sum != 256))
- weights->index[maxidx-1] += 256-sum;
- /* Finally, if we are the last pixel, and it's fully covered, then
- * adjust it. */
- else if ((j == w-1) && (w - wf < 0.0001f) && (sum != 256))
- weights->index[maxidx-1] += 256-sum;
- }
- static int
- window_fix(int l, int *rp, float window, float centre)
- {
- int r = *rp;
- while (centre - l > window)
- l++;
- while (r - centre > window)
- r--;
- *rp = r;
- return l;
- }
- static fz_weights *
- make_weights(fz_context *ctx, int src_w, float x, float dst_w, fz_scale_filter *filter, int vertical, int dst_w_int, int patch_l, int patch_r, int n, int flip, fz_scale_cache *cache)
- {
- fz_weights *weights;
- float F, G;
- float window;
- int j;
- if (cache)
- {
- if (cache->src_w == src_w && cache->x == x && cache->dst_w == dst_w &&
- cache->filter == filter && cache->vertical == vertical &&
- cache->dst_w_int == dst_w_int &&
- cache->patch_l == patch_l && cache->patch_r == patch_r &&
- cache->n == n && cache->flip == flip)
- {
- return cache->weights;
- }
- cache->src_w = src_w;
- cache->x = x;
- cache->dst_w = dst_w;
- cache->filter = filter;
- cache->vertical = vertical;
- cache->dst_w_int = dst_w_int;
- cache->patch_l = patch_l;
- cache->patch_r = patch_r;
- cache->n = n;
- cache->flip = flip;
- fz_free(ctx, cache->weights);
- cache->weights = NULL;
- }
- if (dst_w < src_w)
- {
- /* Scaling down */
- F = dst_w / src_w;
- G = 1;
- }
- else
- {
- /* Scaling up */
- F = 1;
- G = src_w / dst_w;
- }
- window = filter->width / F;
- weights = new_weights(ctx, filter, src_w, dst_w, patch_r-patch_l, n, flip, patch_l);
- if (!weights)
- return NULL;
- for (j = patch_l; j < patch_r; j++)
- {
- /* find the position of the centre of dst[j] in src space */
- float centre = (j - x + 0.5f)*src_w/dst_w - 0.5f;
- int l, r;
- l = ceilf(centre - window);
- r = floorf(centre + window);
- /* Now, due to the vagaries of floating point, if centre is large, l
- * and r can actually end up further than 2*window apart. All we care
- * about in this case is that we don't crash! We want a cheap correction
- * that avoids the assert and doesn't cost too much in the normal case.
- * This should do. */
- if (r - l > 2 * window)
- l = window_fix(l, &r, window, centre);
- init_weights(weights, j);
- for (; l <= r; l++)
- {
- add_weight(weights, j, l, filter, x, F, G, src_w, dst_w);
- }
- if (weights->new_line)
- {
- /* In very rare cases (bug 706764) we might not actually
- * have generated any non-zero weights for this destination
- * pixel. Just use the central pixel. */
- int src_x = floorf(centre);
- if (src_x >= src_w)
- src_x = src_w-1;
- if (src_x < 0)
- src_x = 0;
- insert_weight(weights, j, src_x, 1);
- }
- check_weights(weights, j, dst_w_int, x, dst_w);
- if (vertical)
- {
- reorder_weights(weights, j, src_w);
- }
- }
- weights->count++; /* weights->count = dst_w_int now */
- if (cache)
- {
- cache->weights = weights;
- }
- return weights;
- }
- static void
- scale_row_to_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int len, i, j, n;
- const unsigned char *min;
- int tmp[FZ_MAX_COLORS];
- int *t = tmp;
- n = weights->n;
- for (j = 0; j < n; j++)
- tmp[j] = 128;
- if (weights->flip)
- {
- dst += (weights->count-1)*n;
- for (i=weights->count; i > 0; i--)
- {
- min = &src[n * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- for (j = n; j > 0; j--)
- *t++ += *min++ * *contrib;
- t -= n;
- contrib++;
- }
- for (j = n; j > 0; j--)
- {
- *dst++ = (unsigned char)(*t>>8);
- *t++ = 128;
- }
- t -= n;
- dst -= n*2;
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- min = &src[n * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- for (j = n; j > 0; j--)
- *t++ += *min++ * *contrib;
- t -= n;
- contrib++;
- }
- for (j = n; j > 0; j--)
- {
- *dst++ = (unsigned char)(*t>>8);
- *t++ = 128;
- }
- t -= n;
- }
- }
- }
- #ifdef ARCH_ARM
- static void
- scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- __attribute__((naked));
- static void
- scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- __attribute__((naked));
- static void
- scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- __attribute__((naked));
- static void
- scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- __attribute__((naked));
- static void
- scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
- __attribute__((naked));
- static void
- scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
- __attribute__((naked));
- static void
- scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- asm volatile(
- ENTER_ARM
- ".syntax unified\n"
- "stmfd r13!,{r4-r7,r9,r14} \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = weights \n"
- "ldr r12,[r2],#4 @ r12= flip \n"
- "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
- "ldr r4, [r2] @ r4 = index[0] \n"
- "cmp r12,#0 @ if (flip) \n"
- "beq 5f @ { \n"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "add r0, r0, r3 @ dst += count \n"
- "1: \n"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = a = 128 \n"
- "add r4, r1, r4 @ r4 = min = &src[r4] \n"
- "subs r9, r9, #1 @ len-- \n"
- "blt 3f @ while (len >= 0) \n"
- "2: @ { \n"
- "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
- "ldrbgt r7, [r4], #1 @ r7 = *min++ \n"
- "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
- "ldrb r14,[r4], #1 @ r14 = *min++ \n"
- "mlagt r5, r6, r7, r5 @ g += r6 * r7 \n"
- "subs r9, r9, #2 @ r9 = len -= 2 \n"
- "mla r5, r12,r14,r5 @ g += r14 * r12 \n"
- "bge 2b @ } \n"
- "3: \n"
- "mov r5, r5, lsr #8 @ g >>= 8 \n"
- "strb r5,[r0, #-1]! @ *--dst=a \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 1b @ \n"
- "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
- "5:"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "6:"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = a = 128 \n"
- "add r4, r1, r4 @ r4 = min = &src[r4] \n"
- "subs r9, r9, #1 @ len-- \n"
- "blt 9f @ while (len > 0) \n"
- "7: @ { \n"
- "ldrgt r6, [r2], #4 @ r6 = *contrib++ \n"
- "ldrbgt r7, [r4], #1 @ r7 = *min++ \n"
- "ldr r12,[r2], #4 @ r12 = *contrib++ \n"
- "ldrb r14,[r4], #1 @ r14 = *min++ \n"
- "mlagt r5, r6,r7,r5 @ a += r6 * r7 \n"
- "subs r9, r9, #2 @ r9 = len -= 2 \n"
- "mla r5, r12,r14,r5 @ a += r14 * r12 \n"
- "bge 7b @ } \n"
- "9: \n"
- "mov r5, r5, LSR #8 @ a >>= 8 \n"
- "strb r5, [r0], #1 @ *dst++=a \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 6b @ \n"
- "ldmfd r13!,{r4-r7,r9,PC} @ pop, return to thumb \n"
- ENTER_THUMB
- );
- }
- static void
- scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- asm volatile(
- ENTER_ARM
- "stmfd r13!,{r4-r6,r9-r11,r14} \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = weights \n"
- "ldr r12,[r2],#4 @ r12= flip \n"
- "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
- "ldr r4, [r2] @ r4 = index[0] \n"
- "cmp r12,#0 @ if (flip) \n"
- "beq 4f @ { \n"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "add r0, r0, r3, LSL #1 @ dst += 2*count \n"
- "1: \n"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = g = 128 \n"
- "mov r6, #128 @ r6 = a = 128 \n"
- "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 3f @ { \n"
- "2: \n"
- "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
- "ldrb r11,[r4], #1 @ r11 = *min++ \n"
- "ldrb r12,[r4], #1 @ r12 = *min++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
- "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
- "bgt 2b @ } \n"
- "3: \n"
- "mov r5, r5, lsr #8 @ g >>= 8 \n"
- "mov r6, r6, lsr #8 @ a >>= 8 \n"
- "strb r5, [r0, #-2]! @ *--dst=a \n"
- "strb r6, [r0, #1] @ *--dst=g \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 1b @ \n"
- "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
- "4:"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "5:"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = g = 128 \n"
- "mov r6, #128 @ r6 = a = 128 \n"
- "add r4, r1, r4, LSL #1 @ r4 = min = &src[2*r4] \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 7f @ { \n"
- "6: \n"
- "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
- "ldrb r11,[r4], #1 @ r11 = *min++ \n"
- "ldrb r12,[r4], #1 @ r12 = *min++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "mla r5, r14,r11,r5 @ g += r11 * r14 \n"
- "mla r6, r14,r12,r6 @ a += r12 * r14 \n"
- "bgt 6b @ } \n"
- "7: \n"
- "mov r5, r5, lsr #8 @ g >>= 8 \n"
- "mov r6, r6, lsr #8 @ a >>= 8 \n"
- "strb r5, [r0], #1 @ *dst++=g \n"
- "strb r6, [r0], #1 @ *dst++=a \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 5b @ \n"
- "ldmfd r13!,{r4-r6,r9-r11,PC} @ pop, return to thumb \n"
- ENTER_THUMB
- );
- }
- static void
- scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- asm volatile(
- ENTER_ARM
- "stmfd r13!,{r4-r11,r14} \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = weights \n"
- "ldr r12,[r2],#4 @ r12= flip \n"
- "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
- "ldr r4, [r2] @ r4 = index[0] \n"
- "cmp r12,#0 @ if (flip) \n"
- "beq 4f @ { \n"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "add r0, r0, r3, LSL #1 @ \n"
- "add r0, r0, r3 @ dst += 3*count \n"
- "1: \n"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = r = 128 \n"
- "mov r6, #128 @ r6 = g = 128 \n"
- "add r7, r1, r4, LSL #1 @ \n"
- "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
- "mov r7, #128 @ r7 = b = 128 \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 3f @ { \n"
- "2: \n"
- "ldr r14,[r2], #4 @ r14 = *contrib++ \n"
- "ldrb r8, [r4], #1 @ r8 = *min++ \n"
- "ldrb r11,[r4], #1 @ r11 = *min++ \n"
- "ldrb r12,[r4], #1 @ r12 = *min++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
- "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
- "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
- "bgt 2b @ } \n"
- "3: \n"
- "mov r5, r5, lsr #8 @ r >>= 8 \n"
- "mov r6, r6, lsr #8 @ g >>= 8 \n"
- "mov r7, r7, lsr #8 @ b >>= 8 \n"
- "strb r5, [r0, #-3]! @ *--dst=r \n"
- "strb r6, [r0, #1] @ *--dst=g \n"
- "strb r7, [r0, #2] @ *--dst=b \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 1b @ \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- "4:"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "5:"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r5, #128 @ r5 = r = 128 \n"
- "mov r6, #128 @ r6 = g = 128 \n"
- "add r7, r1, r4, LSL #1 @ r7 = min = &src[2*r4] \n"
- "add r4, r7, r4 @ r4 = min = &src[3*r4] \n"
- "mov r7, #128 @ r7 = b = 128 \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 7f @ { \n"
- "6: \n"
- "ldr r14,[r2], #4 @ r10 = *contrib++ \n"
- "ldrb r8, [r4], #1 @ r8 = *min++ \n"
- "ldrb r11,[r4], #1 @ r11 = *min++ \n"
- "ldrb r12,[r4], #1 @ r12 = *min++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "mla r5, r14,r8, r5 @ r += r8 * r14 \n"
- "mla r6, r14,r11,r6 @ g += r11 * r14 \n"
- "mla r7, r14,r12,r7 @ b += r12 * r14 \n"
- "bgt 6b @ } \n"
- "7: \n"
- "mov r5, r5, lsr #8 @ r >>= 8 \n"
- "mov r6, r6, lsr #8 @ g >>= 8 \n"
- "mov r7, r7, lsr #8 @ b >>= 8 \n"
- "strb r5, [r0], #1 @ *dst++=r \n"
- "strb r6, [r0], #1 @ *dst++=g \n"
- "strb r7, [r0], #1 @ *dst++=b \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 5b @ \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- ENTER_THUMB
- );
- }
- static void
- scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- asm volatile(
- ENTER_ARM
- "stmfd r13!,{r4-r11,r14} \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = weights \n"
- "ldr r12,[r2],#4 @ r12= flip \n"
- "ldr r3, [r2],#20 @ r3 = count r2 = &index\n"
- "ldr r4, [r2] @ r4 = index[0] \n"
- "ldr r5,=0x00800080 @ r5 = rounding \n"
- "ldr r6,=0x00FF00FF @ r7 = 0x00FF00FF \n"
- "cmp r12,#0 @ if (flip) \n"
- "beq 4f @ { \n"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "add r0, r0, r3, LSL #2 @ dst += 4*count \n"
- "1: \n"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r7, r5 @ r7 = b = rounding \n"
- "mov r8, r5 @ r8 = a = rounding \n"
- "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 3f @ { \n"
- "2: \n"
- "ldr r11,[r4], #4 @ r11 = *min++ \n"
- "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "and r12,r6, r11 @ r12 = __22__00 \n"
- "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
- "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
- "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
- "bgt 2b @ } \n"
- "3: \n"
- "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
- "bic r8, r8, r6 @ r8 = 33__11__ \n"
- "orr r7, r7, r8 @ r7 = 33221100 \n"
- "str r7, [r0, #-4]! @ *--dst=r \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 1b @ \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- "4: \n"
- "add r2, r2, r4, LSL #2 @ r2 = &index[index[0]] \n"
- "5: \n"
- "ldr r4, [r2], #4 @ r4 = *contrib++ \n"
- "ldr r9, [r2], #4 @ r9 = len = *contrib++ \n"
- "mov r7, r5 @ r7 = b = rounding \n"
- "mov r8, r5 @ r8 = a = rounding \n"
- "add r4, r1, r4, LSL #2 @ r4 = min = &src[4*r4] \n"
- "cmp r9, #0 @ while (len-- > 0) \n"
- "beq 7f @ { \n"
- "6: \n"
- "ldr r11,[r4], #4 @ r11 = *min++ \n"
- "ldr r10,[r2], #4 @ r10 = *contrib++ \n"
- "subs r9, r9, #1 @ r9 = len-- \n"
- "and r12,r6, r11 @ r12 = __22__00 \n"
- "and r11,r6, r11,LSR #8 @ r11 = __33__11 \n"
- "mla r7, r10,r12,r7 @ b += r14 * r10 \n"
- "mla r8, r10,r11,r8 @ a += r11 * r10 \n"
- "bgt 6b @ } \n"
- "7: \n"
- "and r7, r6, r7, lsr #8 @ r7 = __22__00 \n"
- "bic r8, r8, r6 @ r8 = 33__11__ \n"
- "orr r7, r7, r8 @ r7 = 33221100 \n"
- "str r7, [r0], #4 @ *dst++=r \n"
- "subs r3, r3, #1 @ i-- \n"
- "bgt 5b @ \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- ENTER_THUMB
- );
- }
- static void
- scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
- {
- asm volatile(
- ENTER_ARM
- "stmfd r13!,{r4-r11,r14} \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = &weights->index[0] \n"
- "@ r3 = width \n"
- "@ r12= row \n"
- "ldr r14,[r13,#4*9] @ r14= n \n"
- "ldr r12,[r13,#4*10] @ r12= row \n"
- "add r2, r2, #24 @ r2 = weights->index \n"
- "mul r3, r14, r3 @ r3 = width *= n \n"
- "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
- "add r2, r2, #4 @ r2 = &index[1] \n"
- "subs r6, r3, #4 @ r6 = x = width-4 \n"
- "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
- " @ r14= len = *contrib \n"
- "blt 4f @ while (x >= 0) { \n"
- #ifndef ARCH_UNALIGNED_OK
- "tst r3, #3 @ if ((r3 & 3) \n"
- "tsteq r1, #3 @ || (r1 & 3)) \n"
- "bne 4f @ can't do fast code \n"
- #endif
- "ldr r9, =0x00FF00FF @ r9 = 0x00FF00FF \n"
- "1: \n"
- "ldr r7, =0x00800080 @ r5 = val0 = round \n"
- "stmfd r13!,{r1,r2,r7} @ stash r1,r2,r5 \n"
- " @ r1 = min = src \n"
- " @ r2 = contrib2-4 \n"
- "movs r8, r14 @ r8 = len2 = len \n"
- "mov r5, r7 @ r7 = val1 = round \n"
- "ble 3f @ while (len2-- > 0) { \n"
- "2: \n"
- "ldr r12,[r1], r3 @ r12 = *min r5 = min += width\n"
- "ldr r10,[r2, #4]! @ r10 = *contrib2++ \n"
- "subs r8, r8, #1 @ len2-- \n"
- "and r11,r9, r12 @ r11= __22__00 \n"
- "and r12,r9, r12,LSR #8 @ r12= __33__11 \n"
- "mla r5, r10,r11,r5 @ r5 = val0 += r11 * r10\n"
- "mla r7, r10,r12,r7 @ r7 = val1 += r12 * r10\n"
- "bgt 2b @ } \n"
- "and r5, r9, r5, LSR #8 @ r5 = __22__00 \n"
- "and r7, r7, r9, LSL #8 @ r7 = 33__11__ \n"
- "orr r5, r5, r7 @ r5 = 33221100 \n"
- "3: \n"
- "ldmfd r13!,{r1,r2,r7} @ restore r1,r2,r7 \n"
- "subs r6, r6, #4 @ x-- \n"
- "add r1, r1, #4 @ src++ \n"
- "str r5, [r0], #4 @ *dst++ = val \n"
- "bge 1b @ \n"
- "4: @ } (Less than 4 to go) \n"
- "adds r6, r6, #4 @ r6 = x += 4 \n"
- "beq 8f @ if (x == 0) done \n"
- "5: \n"
- "mov r5, r1 @ r5 = min = src \n"
- "mov r7, #128 @ r7 = val = 128 \n"
- "movs r8, r14 @ r8 = len2 = len \n"
- "add r9, r2, #4 @ r9 = contrib2 \n"
- "ble 7f @ while (len2-- > 0) { \n"
- "6: \n"
- "ldr r10,[r9], #4 @ r10 = *contrib2++ \n"
- "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n"
- "subs r8, r8, #1 @ len2-- \n"
- "@ stall r12 \n"
- "mla r7, r10,r12,r7 @ val += r12 * r10 \n"
- "bgt 6b @ } \n"
- "7: \n"
- "mov r7, r7, asr #8 @ r7 = val >>= 8 \n"
- "subs r6, r6, #1 @ x-- \n"
- "add r1, r1, #1 @ src++ \n"
- "strb r7, [r0], #1 @ *dst++ = val \n"
- "bgt 5b @ \n"
- "8: \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- ".ltorg \n"
- ENTER_THUMB
- );
- }
- static void
- scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int width, int n, int row)
- {
- asm volatile(
- ENTER_ARM
- "stmfd r13!,{r4-r11,r14} \n"
- "mov r11,#255 @ r11= 255 \n"
- "ldr r12,[r13,#4*10] @ r12= row \n"
- "@ r0 = dst \n"
- "@ r1 = src \n"
- "@ r2 = &weights->index[0] \n"
- "@ r3 = width \n"
- "@ r11= 255 \n"
- "@ r12= row \n"
- "add r2, r2, #24 @ r2 = weights->index \n"
- "ldr r4, [r2, r12, LSL #2] @ r4 = index[row] \n"
- "add r2, r2, #4 @ r2 = &index[1] \n"
- "mov r6, r3 @ r6 = x = width \n"
- "ldr r14,[r2, r4, LSL #2]! @ r2 = contrib = index[index[row]+1]\n"
- " @ r14= len = *contrib \n"
- "5: \n"
- "ldr r4,[r13,#4*9] @ r10= nn = n \n"
- "1: \n"
- "mov r5, r1 @ r5 = min = src \n"
- "mov r7, #128 @ r7 = val = 128 \n"
- "movs r8, r14 @ r8 = len2 = len \n"
- "add r9, r2, #4 @ r9 = contrib2 \n"
- "ble 7f @ while (len2-- > 0) { \n"
- "6: \n"
- "ldr r10,[r9], #4 @ r10 = *contrib2++ \n"
- "ldrb r12,[r5], r3 @ r12 = *min r5 = min += width\n"
- "subs r8, r8, #1 @ len2-- \n"
- "@ stall r12 \n"
- "mla r7, r10,r12,r7 @ val += r12 * r10 \n"
- "bgt 6b @ } \n"
- "7: \n"
- "mov r7, r7, asr #8 @ r7 = val >>= 8 \n"
- "subs r4, r4, #1 @ r4 = nn-- \n"
- "add r1, r1, #1 @ src++ \n"
- "strb r7, [r0], #1 @ *dst++ = val \n"
- "bgt 1b @ \n"
- "subs r6, r6, #1 @ x-- \n"
- "strb r11,[r0], #1 @ *dst++ = 255 \n"
- "bgt 5b @ \n"
- "ldmfd r13!,{r4-r11,PC} @ pop, return to thumb \n"
- ".ltorg \n"
- ENTER_THUMB
- );
- }
- #else
- static void
- scale_row_to_temp1(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int len, i;
- const unsigned char *min;
- assert(weights->n == 1);
- if (weights->flip)
- {
- dst += weights->count;
- for (i=weights->count; i > 0; i--)
- {
- int val = 128;
- min = &src[*contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- val += *min++ * *contrib++;
- }
- *--dst = (unsigned char)(val>>8);
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- int val = 128;
- min = &src[*contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- val += *min++ * *contrib++;
- }
- *dst++ = (unsigned char)(val>>8);
- }
- }
- }
- static void
- scale_row_to_temp2(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int len, i;
- const unsigned char *min;
- assert(weights->n == 2);
- if (weights->flip)
- {
- dst += 2*weights->count;
- for (i=weights->count; i > 0; i--)
- {
- int c1 = 128;
- int c2 = 128;
- min = &src[2 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- c1 += *min++ * *contrib;
- c2 += *min++ * *contrib++;
- }
- *--dst = (unsigned char)(c2>>8);
- *--dst = (unsigned char)(c1>>8);
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- int c1 = 128;
- int c2 = 128;
- min = &src[2 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- c1 += *min++ * *contrib;
- c2 += *min++ * *contrib++;
- }
- *dst++ = (unsigned char)(c1>>8);
- *dst++ = (unsigned char)(c2>>8);
- }
- }
- }
- static void
- scale_row_to_temp3(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int len, i;
- const unsigned char *min;
- assert(weights->n == 3);
- if (weights->flip)
- {
- dst += 3*weights->count;
- for (i=weights->count; i > 0; i--)
- {
- int c1 = 128;
- int c2 = 128;
- int c3 = 128;
- min = &src[3 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- int c = *contrib++;
- c1 += *min++ * c;
- c2 += *min++ * c;
- c3 += *min++ * c;
- }
- *--dst = (unsigned char)(c3>>8);
- *--dst = (unsigned char)(c2>>8);
- *--dst = (unsigned char)(c1>>8);
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- int c1 = 128;
- int c2 = 128;
- int c3 = 128;
- min = &src[3 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- int c = *contrib++;
- c1 += *min++ * c;
- c2 += *min++ * c;
- c3 += *min++ * c;
- }
- *dst++ = (unsigned char)(c1>>8);
- *dst++ = (unsigned char)(c2>>8);
- *dst++ = (unsigned char)(c3>>8);
- }
- }
- }
- static void
- scale_row_to_temp4(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int len, i;
- const unsigned char *min;
- assert(weights->n == 4);
- if (weights->flip)
- {
- dst += 4*weights->count;
- for (i=weights->count; i > 0; i--)
- {
- int r = 128;
- int g = 128;
- int b = 128;
- int a = 128;
- min = &src[4 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- r += *min++ * *contrib;
- g += *min++ * *contrib;
- b += *min++ * *contrib;
- a += *min++ * *contrib++;
- }
- *--dst = (unsigned char)(a>>8);
- *--dst = (unsigned char)(b>>8);
- *--dst = (unsigned char)(g>>8);
- *--dst = (unsigned char)(r>>8);
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- int r = 128;
- int g = 128;
- int b = 128;
- int a = 128;
- min = &src[4 * *contrib++];
- len = *contrib++;
- while (len-- > 0)
- {
- r += *min++ * *contrib;
- g += *min++ * *contrib;
- b += *min++ * *contrib;
- a += *min++ * *contrib++;
- }
- *dst++ = (unsigned char)(r>>8);
- *dst++ = (unsigned char)(g>>8);
- *dst++ = (unsigned char)(b>>8);
- *dst++ = (unsigned char)(a>>8);
- }
- }
- }
- static void
- scale_row_from_temp(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
- {
- const int *contrib = &weights->index[weights->index[row]];
- int len, x;
- int width = w * n;
- contrib++; /* Skip min */
- len = *contrib++;
- for (x=width; x > 0; x--)
- {
- const unsigned char *min = src;
- int val = 128;
- int len2 = len;
- const int *contrib2 = contrib;
- while (len2-- > 0)
- {
- val += *min * *contrib2++;
- min += width;
- }
- *dst++ = (unsigned char)(val>>8);
- src++;
- }
- }
- static void
- scale_row_from_temp_alpha(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row)
- {
- const int *contrib = &weights->index[weights->index[row]];
- int len, x;
- int width = w * n;
- contrib++; /* Skip min */
- len = *contrib++;
- for (x=w; x > 0; x--)
- {
- int nn;
- for (nn = n; nn > 0; nn--)
- {
- const unsigned char *min = src;
- int val = 128;
- int len2 = len;
- const int *contrib2 = contrib;
- while (len2-- > 0)
- {
- val += *min * *contrib2++;
- min += width;
- }
- *dst++ = (unsigned char)(val>>8);
- src++;
- }
- *dst++ = 255;
- }
- }
- #endif
- #ifdef SINGLE_PIXEL_SPECIALS
- static void
- duplicate_single_pixel(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, int n, int forcealpha, int w, int h, int stride)
- {
- int i;
- for (i = n; i > 0; i--)
- *dst++ = *src++;
- if (forcealpha)
- *dst++ = 255;
- n += forcealpha;
- for (i = w-1; i > 0; i--)
- {
- memcpy(dst, dst-n, n);
- dst += n;
- }
- w *= n;
- dst -= w;
- h--;
- while (h--)
- {
- memcpy(dst+stride, dst, w);
- dst += stride;
- }
- }
- static void
- scale_single_row(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int src_w, int h, int forcealpha)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int min, len, i, j, n, nf;
- int tmp[FZ_MAX_COLORS];
- n = weights->n;
- nf = n + forcealpha;
- /* Scale a single row */
- for (j = 0; j < nf; j++)
- tmp[j] = 128;
- if (weights->flip)
- {
- dst += (weights->count-1)*nf;
- for (i=weights->count; i > 0; i--)
- {
- min = *contrib++;
- len = *contrib++;
- min *= n;
- while (len-- > 0)
- {
- int c = *contrib++;
- for (j = 0; j < n; j++)
- tmp[j] += src[min++] * c;
- if (forcealpha)
- tmp[j] += 255 * c;
- }
- for (j = 0; j < nf; j++)
- {
- *dst++ = (unsigned char)(tmp[j]>>8);
- tmp[j] = 128;
- }
- dst -= 2*nf;
- }
- dst += nf + dstride;
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- min = *contrib++;
- len = *contrib++;
- min *= n;
- while (len-- > 0)
- {
- int c = *contrib++;
- for (j = 0; j < n; j++)
- tmp[j] += src[min++] * c;
- if (forcealpha)
- tmp[j] += 255 * c;
- }
- for (j = 0; j < nf; j++)
- {
- *dst++ = (unsigned char)(tmp[j]>>8);
- tmp[j] = 128;
- }
- }
- dst += dstride - weights->count * nf;
- }
- /* And then duplicate it h times */
- nf *= weights->count;
- while (--h > 0)
- {
- memcpy(dst, dst-dstride, nf);
- dst += dstride;
- }
- }
- static void
- scale_single_col(unsigned char * FZ_RESTRICT dst, int dstride, const unsigned char * FZ_RESTRICT src, int sstride, const fz_weights * FZ_RESTRICT weights, int src_w, int n, int w, int forcealpha)
- {
- const int *contrib = &weights->index[weights->index[0]];
- int min, len, i, j;
- int tmp[FZ_MAX_COLORS];
- int nf = n + forcealpha;
- for (j = 0; j < nf; j++)
- tmp[j] = 128;
- if (weights->flip)
- {
- src_w = (src_w-1)*sstride;
- for (i=weights->count; i > 0; i--)
- {
- /* Scale the next pixel in the column */
- min = *contrib++;
- len = *contrib++;
- min = src_w-min*sstride;
- while (len-- > 0)
- {
- int c = *contrib++;
- for (j = 0; j < n; j++)
- tmp[j] += src[min+j] * c;
- if (forcealpha)
- tmp[j] += 255 * c;
- min -= sstride;
- }
- for (j = 0; j < nf; j++)
- {
- *dst++ = (unsigned char)(tmp[j]>>8);
- tmp[j] = 128;
- }
- /* And then duplicate it across the row */
- for (j = (w-1)*nf; j > 0; j--)
- {
- *dst = dst[-nf];
- dst++;
- }
- dst += dstride - w*nf;
- }
- }
- else
- {
- for (i=weights->count; i > 0; i--)
- {
- /* Scale the next pixel in the column */
- min = *contrib++;
- len = *contrib++;
- min *= sstride;
- while (len-- > 0)
- {
- int c = *contrib++;
- for (j = 0; j < n; j++)
- tmp[j] += src[min+j] * c;
- if (forcealpha)
- tmp[j] += 255 * c;
- min += sstride;
- }
- for (j = 0; j < nf; j++)
- {
- *dst++ = (unsigned char)(tmp[j]>>8);
- tmp[j] = 128;
- }
- /* And then duplicate it across the row */
- for (j = (w-1)*nf; j > 0; j--)
- {
- *dst = dst[-nf];
- dst++;
- }
- dst += dstride - w*nf;
- }
- }
- }
- #endif /* SINGLE_PIXEL_SPECIALS */
- static void
- get_alpha_edge_values(const fz_weights * FZ_RESTRICT rows, int * FZ_RESTRICT tp, int * FZ_RESTRICT bp)
- {
- const int *contrib = &rows->index[rows->index[0]];
- int len, i, t, b;
- /* Calculate the edge alpha values */
- contrib++; /* Skip min */
- len = *contrib++;
- t = 0;
- while (len--)
- t += *contrib++;
- for (i=rows->count-2; i > 0; i--)
- {
- contrib++; /* Skip min */
- len = *contrib++;
- contrib += len;
- }
- b = 0;
- if (i == 0)
- {
- contrib++;
- len = *contrib++;
- while (len--)
- b += *contrib++;
- }
- if (rows->flip && i == 0)
- {
- *tp = b;
- *bp = t;
- }
- else
- {
- *tp = t;
- *bp = b;
- }
- }
- static void
- adjust_alpha_edges(fz_pixmap * FZ_RESTRICT pix, const fz_weights * FZ_RESTRICT rows, const fz_weights * FZ_RESTRICT cols)
- {
- int t, l, r, b, tl, tr, bl, br, x, y;
- unsigned char *dp = pix->samples;
- int w = pix->w;
- int n = pix->n;
- int span = w >= 2 ? (w-1)*n : 0;
- int stride = pix->stride;
- get_alpha_edge_values(rows, &t, &b);
- get_alpha_edge_values(cols, &l, &r);
- l = (255 * l + 128)>>8;
- r = (255 * r + 128)>>8;
- tl = (l * t + 128)>>8;
- tr = (r * t + 128)>>8;
- bl = (l * b + 128)>>8;
- br = (r * b + 128)>>8;
- t = (255 * t + 128)>>8;
- b = (255 * b + 128)>>8;
- dp += n-1;
- *dp = tl;
- dp += n;
- for (x = w-2; x > 0; x--)
- {
- *dp = t;
- dp += n;
- }
- if (x == 0)
- {
- *dp = tr;
- dp += n;
- }
- dp += stride - w*n;
- for (y = pix->h-2; y > 0; y--)
- {
- dp[span] = r;
- *dp = l;
- dp += stride;
- }
- if (y == 0)
- {
- *dp = bl;
- dp += n;
- for (x = w-2; x > 0; x--)
- {
- *dp = b;
- dp += n;
- }
- if (x == 0)
- {
- *dp = br;
- }
- }
- }
- fz_pixmap *
- fz_scale_pixmap(fz_context *ctx, fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip)
- {
- return fz_scale_pixmap_cached(ctx, src, x, y, w, h, clip, NULL, NULL);
- }
- fz_pixmap *
- fz_scale_pixmap_cached(fz_context *ctx, const fz_pixmap *src, float x, float y, float w, float h, const fz_irect *clip, fz_scale_cache *cache_x, fz_scale_cache *cache_y)
- {
- fz_scale_filter *filter = &fz_scale_filter_simple;
- fz_weights *contrib_rows = NULL;
- fz_weights *contrib_cols = NULL;
- fz_pixmap *output = NULL;
- unsigned char *temp = NULL;
- int max_row, temp_span, temp_rows, row;
- int dst_w_int, dst_h_int, dst_x_int, dst_y_int;
- int flip_x, flip_y, forcealpha;
- fz_rect patch;
- fz_var(contrib_cols);
- fz_var(contrib_rows);
- /* Avoid extreme scales where overflows become problematic. */
- if (w > (1<<24) || h > (1<<24) || w < -(1<<24) || h < -(1<<24))
- return NULL;
- if (x > (1<<24) || y > (1<<24) || x < -(1<<24) || y < -(1<<24))
- return NULL;
- /* Clamp small ranges of w and h */
- if (w <= -1)
- {
- /* Large negative range. Don't clamp */
- }
- else if (w < 0)
- {
- w = -1;
- }
- else if (w < 1)
- {
- w = 1;
- }
- if (h <= -1)
- {
- /* Large negative range. Don't clamp */
- }
- else if (h < 0)
- {
- h = -1;
- }
- else if (h < 1)
- {
- h = 1;
- }
- /* If the src has an alpha, we'll make the dst have an alpha automatically.
- * We also need to force the dst to have an alpha if x/y/w/h aren't ints. */
- forcealpha = !src->alpha && (x != (float)(int)x || y != (float)(int)y || w != (float)(int)w || h != (float)(int)h);
- /* Find the destination bbox, width/height, and sub pixel offset,
- * allowing for whether we're flipping or not. */
- /* The (x,y) position given describes where the top left corner
- * of the source image should be mapped to (i.e. where (0,0) in image
- * space ends up). Also there are differences in the way we scale
- * horizontally and vertically. When scaling rows horizontally, we
- * always read forwards through the source, and store either forwards
- * or in reverse as required. When scaling vertically, we always store
- * out forwards, but may feed source rows in in a different order.
- *
- * Consider the image rectangle 'r' to which the image is mapped,
- * and the (possibly) larger rectangle 'R', given by expanding 'r' to
- * complete pixels.
- *
- * x can either be r.xmin-R.xmin or R.xmax-r.xmax depending on whether
- * the image is x flipped or not. Whatever happens 0 <= x < 1.
- * y is always R.ymax - r.ymax.
- */
- /* dst_x_int is calculated to be the left of the scaled image, and
- * x (the sub pixel offset) is the distance in from either the left
- * or right pixel expanded edge. */
- flip_x = (w < 0);
- if (flip_x)
- {
- float tmp;
- w = -w;
- dst_x_int = floorf(x-w);
- tmp = ceilf(x);
- dst_w_int = (int)tmp;
- x = tmp - x;
- dst_w_int -= dst_x_int;
- }
- else
- {
- dst_x_int = floorf(x);
- x -= dst_x_int;
- dst_w_int = (int)ceilf(x + w);
- }
- /* dst_y_int is calculated to be the top of the scaled image, and
- * y (the sub pixel offset) is the distance in from either the top
- * or bottom pixel expanded edge.
- */
- flip_y = (h < 0);
- if (flip_y)
- {
- float tmp;
- h = -h;
- dst_y_int = floorf(y-h);
- tmp = ceilf(y);
- dst_h_int = (int)tmp;
- y = tmp - y;
- dst_h_int -= dst_y_int;
- }
- else
- {
- dst_y_int = floorf(y);
- y -= dst_y_int;
- dst_h_int = (int)ceilf(y + h);
- }
- fz_valgrind_pixmap(src);
- /* Step 0: Calculate the patch */
- patch.x0 = 0;
- patch.y0 = 0;
- patch.x1 = dst_w_int;
- patch.y1 = dst_h_int;
- if (clip)
- {
- if (flip_x)
- {
- if (dst_x_int + dst_w_int > clip->x1)
- patch.x0 = dst_x_int + dst_w_int - clip->x1;
- if (clip->x0 > dst_x_int)
- {
- patch.x1 = dst_w_int - (clip->x0 - dst_x_int);
- dst_x_int = clip->x0;
- }
- }
- else
- {
- if (dst_x_int + dst_w_int > clip->x1)
- patch.x1 = clip->x1 - dst_x_int;
- if (clip->x0 > dst_x_int)
- {
- patch.x0 = clip->x0 - dst_x_int;
- dst_x_int += patch.x0;
- }
- }
- if (flip_y)
- {
- if (dst_y_int + dst_h_int > clip->y1)
- patch.y1 = clip->y1 - dst_y_int;
- if (clip->y0 > dst_y_int)
- {
- patch.y0 = clip->y0 - dst_y_int;
- dst_y_int = clip->y0;
- }
- }
- else
- {
- if (dst_y_int + dst_h_int > clip->y1)
- patch.y1 = clip->y1 - dst_y_int;
- if (clip->y0 > dst_y_int)
- {
- patch.y0 = clip->y0 - dst_y_int;
- dst_y_int += patch.y0;
- }
- }
- }
- if (patch.x0 >= patch.x1 || patch.y0 >= patch.y1)
- return NULL;
- fz_try(ctx)
- {
- /* Step 1: Calculate the weights for columns and rows */
- #ifdef SINGLE_PIXEL_SPECIALS
- if (src->w == 1)
- contrib_cols = NULL;
- else
- #endif /* SINGLE_PIXEL_SPECIALS */
- contrib_cols = Memento_label(make_weights(ctx, src->w, x, w, filter, 0, dst_w_int, patch.x0, patch.x1, src->n, flip_x, cache_x), "contrib_cols");
- #ifdef SINGLE_PIXEL_SPECIALS
- if (src->h == 1)
- contrib_rows = NULL;
- else
- #endif /* SINGLE_PIXEL_SPECIALS */
- contrib_rows = Memento_label(make_weights(ctx, src->h, y, h, filter, 1, dst_h_int, patch.y0, patch.y1, src->n, flip_y, cache_y), "contrib_rows");
- output = fz_new_pixmap(ctx, src->colorspace, patch.x1 - patch.x0, patch.y1 - patch.y0, src->seps, src->alpha || forcealpha);
- }
- fz_catch(ctx)
- {
- if (!cache_x)
- fz_free(ctx, contrib_cols);
- if (!cache_y)
- fz_free(ctx, contrib_rows);
- fz_rethrow(ctx);
- }
- output->x = dst_x_int;
- output->y = dst_y_int;
- /* Step 2: Apply the weights */
- #ifdef SINGLE_PIXEL_SPECIALS
- if (!contrib_rows)
- {
- /* Only 1 source pixel high. */
- if (!contrib_cols)
- {
- /* Only 1 pixel in the entire image! */
- duplicate_single_pixel(output->samples, src->samples, src->n, forcealpha, patch.x1-patch.x0, patch.y1-patch.y0, output->stride);
- fz_valgrind_pixmap(output);
- }
- else
- {
- /* Scale the row once, then copy it. */
- scale_single_row(output->samples, output->stride, src->samples, contrib_cols, src->w, patch.y1-patch.y0, forcealpha);
- fz_valgrind_pixmap(output);
- }
- }
- else if (!contrib_cols)
- {
- /* Only 1 source pixel wide. Scale the col and duplicate. */
- scale_single_col(output->samples, output->stride, src->samples, src->stride, contrib_rows, src->h, src->n, patch.x1-patch.x0, forcealpha);
- fz_valgrind_pixmap(output);
- }
- else
- #endif /* SINGLE_PIXEL_SPECIALS */
- {
- void (*row_scale_in)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights);
- void (*row_scale_out)(unsigned char * FZ_RESTRICT dst, const unsigned char * FZ_RESTRICT src, const fz_weights * FZ_RESTRICT weights, int w, int n, int row);
- temp_span = contrib_cols->count * src->n;
- temp_rows = contrib_rows->max_len;
- if (temp_span <= 0 || temp_rows > INT_MAX / temp_span)
- goto cleanup;
- fz_try(ctx)
- {
- temp = fz_calloc(ctx, (size_t)temp_span*temp_rows, sizeof(unsigned char));
- }
- fz_catch(ctx)
- {
- fz_drop_pixmap(ctx, output);
- if (!cache_x)
- fz_free(ctx, contrib_cols);
- if (!cache_y)
- fz_free(ctx, contrib_rows);
- fz_rethrow(ctx);
- }
- switch (src->n)
- {
- default:
- row_scale_in = scale_row_to_temp;
- break;
- case 1: /* Image mask case or Greyscale case */
- row_scale_in = scale_row_to_temp1;
- break;
- case 2: /* Greyscale with alpha case */
- row_scale_in = scale_row_to_temp2;
- break;
- case 3: /* RGB case */
- row_scale_in = scale_row_to_temp3;
- break;
- case 4: /* RGBA or CMYK case */
- row_scale_in = scale_row_to_temp4;
- break;
- }
- row_scale_out = forcealpha ? scale_row_from_temp_alpha : scale_row_from_temp;
- max_row = contrib_rows->index[contrib_rows->index[0]];
- for (row = 0; row < contrib_rows->count; row++)
- {
- /*
- Which source rows do we need to have scaled into the
- temporary buffer in order to be able to do the final
- scale?
- */
- int row_index = contrib_rows->index[row];
- int row_min = contrib_rows->index[row_index++];
- int row_len = contrib_rows->index[row_index];
- while (max_row < row_min+row_len)
- {
- /* Scale another row */
- assert(max_row < src->h);
- (*row_scale_in)(&temp[temp_span*(max_row % temp_rows)], &src->samples[(flip_y ? (src->h-1-max_row): max_row)*src->stride], contrib_cols);
- max_row++;
- }
- (*row_scale_out)(&output->samples[row*output->stride], temp, contrib_rows, contrib_cols->count, src->n, row);
- }
- fz_free(ctx, temp);
- if (forcealpha)
- adjust_alpha_edges(output, contrib_rows, contrib_cols);
- fz_valgrind_pixmap(output);
- }
- cleanup:
- if (!cache_y)
- fz_free(ctx, contrib_rows);
- if (!cache_x)
- fz_free(ctx, contrib_cols);
- return output;
- }
- void
- fz_drop_scale_cache(fz_context *ctx, fz_scale_cache *sc)
- {
- if (!sc)
- return;
- fz_free(ctx, sc->weights);
- fz_free(ctx, sc);
- }
- fz_scale_cache *
- fz_new_scale_cache(fz_context *ctx)
- {
- return fz_malloc_struct(ctx, fz_scale_cache);
- }
|