| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798 |
- #include "jsi.h"
- #include "utf.h"
- #include "regexp.h"
- static int js_doregexec(js_State *J, Reprog *prog, const char *string, Resub *sub, int eflags)
- {
- int result = js_regexec(prog, string, sub, eflags);
- if (result < 0)
- js_error(J, "regexec failed");
- return result;
- }
- static const char *checkstring(js_State *J, int idx)
- {
- if (!js_iscoercible(J, idx))
- js_typeerror(J, "string function called on null or undefined");
- return js_tostring(J, idx);
- }
- int js_runeat(js_State *J, const char *s, int i)
- {
- Rune rune = EOF;
- while (i >= 0) {
- rune = *(unsigned char*)s;
- if (rune < Runeself) {
- if (rune == 0)
- return EOF;
- ++s;
- --i;
- } else {
- s += chartorune(&rune, s);
- if (rune >= 0x10000)
- i -= 2;
- else
- --i;
- }
- }
- if (rune >= 0x10000) {
- /* high surrogate */
- if (i == -2)
- return 0xd800 + ((rune - 0x10000) >> 10);
- /* low surrogate */
- else
- return 0xdc00 + ((rune - 0x10000) & 0x3ff);
- }
- return rune;
- }
- int js_utflen(const char *s)
- {
- int c;
- int n;
- Rune rune;
- n = 0;
- for(;;) {
- c = *(unsigned char *)s;
- if (c < Runeself) {
- if (c == 0)
- return n;
- s++;
- n++;
- } else {
- s += chartorune(&rune, s);
- if (rune >= 0x10000)
- n += 2;
- else
- n++;
- }
- }
- }
- int js_utfptrtoidx(const char *s, const char *p)
- {
- Rune rune;
- int i = 0;
- while (s < p) {
- if (*(unsigned char *)s < Runeself)
- ++s;
- else
- s += chartorune(&rune, s);
- if (rune >= 0x10000)
- i += 2;
- else
- i += 1;
- }
- return i;
- }
- static void jsB_new_String(js_State *J)
- {
- js_newstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
- }
- static void jsB_String(js_State *J)
- {
- js_pushstring(J, js_gettop(J) > 1 ? js_tostring(J, 1) : "");
- }
- static void Sp_toString(js_State *J)
- {
- js_Object *self = js_toobject(J, 0);
- if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
- js_pushstring(J, self->u.s.string);
- }
- static void Sp_valueOf(js_State *J)
- {
- js_Object *self = js_toobject(J, 0);
- if (self->type != JS_CSTRING) js_typeerror(J, "not a string");
- js_pushstring(J, self->u.s.string);
- }
- static void Sp_charAt(js_State *J)
- {
- char buf[UTFmax + 1];
- const char *s = checkstring(J, 0);
- int pos = js_tointeger(J, 1);
- Rune rune = js_runeat(J, s, pos);
- if (rune >= 0) {
- buf[runetochar(buf, &rune)] = 0;
- js_pushstring(J, buf);
- } else {
- js_pushliteral(J, "");
- }
- }
- static void Sp_charCodeAt(js_State *J)
- {
- const char *s = checkstring(J, 0);
- int pos = js_tointeger(J, 1);
- Rune rune = js_runeat(J, s, pos);
- if (rune >= 0)
- js_pushnumber(J, rune);
- else
- js_pushnumber(J, NAN);
- }
- static void Sp_concat(js_State *J)
- {
- int i, top = js_gettop(J);
- int n;
- char * volatile out = NULL;
- const char *s;
- if (top == 1)
- return;
- s = checkstring(J, 0);
- n = 1 + strlen(s);
- if (js_try(J)) {
- js_free(J, out);
- js_throw(J);
- }
- if (n > JS_STRLIMIT)
- js_rangeerror(J, "invalid string length");
- out = js_malloc(J, n);
- strcpy(out, s);
- for (i = 1; i < top; ++i) {
- s = js_tostring(J, i);
- n += strlen(s);
- if (n > JS_STRLIMIT)
- js_rangeerror(J, "invalid string length");
- out = js_realloc(J, out, n);
- strcat(out, s);
- }
- js_pushstring(J, out);
- js_endtry(J);
- js_free(J, out);
- }
- static void Sp_indexOf(js_State *J)
- {
- const char *haystack = checkstring(J, 0);
- const char *needle = js_tostring(J, 1);
- int pos = js_tointeger(J, 2);
- int len = strlen(needle);
- int k = 0;
- Rune rune;
- while (*haystack) {
- if (k >= pos && !strncmp(haystack, needle, len)) {
- js_pushnumber(J, k);
- return;
- }
- haystack += chartorune(&rune, haystack);
- ++k;
- }
- js_pushnumber(J, -1);
- }
- static void Sp_lastIndexOf(js_State *J)
- {
- const char *haystack = checkstring(J, 0);
- const char *needle = js_tostring(J, 1);
- int pos = js_isdefined(J, 2) ? js_tointeger(J, 2) : (int)strlen(haystack);
- int len = strlen(needle);
- int k = 0, last = -1;
- Rune rune;
- while (*haystack && k <= pos) {
- if (!strncmp(haystack, needle, len))
- last = k;
- haystack += chartorune(&rune, haystack);
- ++k;
- }
- js_pushnumber(J, last);
- }
- static void Sp_localeCompare(js_State *J)
- {
- const char *a = checkstring(J, 0);
- const char *b = js_tostring(J, 1);
- js_pushnumber(J, strcmp(a, b));
- }
- static void Sp_substring_imp(js_State *J, const char *s, int a, int n)
- {
- Rune head_rune = 0, tail_rune = 0;
- const char *head, *tail;
- char *p;
- int i, k, head_len, tail_len;
- /* find start of substring */
- head = s;
- for (i = 0; i < a; ++i) {
- head += chartorune(&head_rune, head);
- if (head_rune >= 0x10000)
- ++i;
- }
- /* find end of substring */
- tail = head;
- for (k = i - a; k < n; ++k) {
- tail += chartorune(&tail_rune, tail);
- if (tail_rune >= 0x10000)
- ++k;
- }
- /* no surrogate pair splits! */
- if (i == a && k == n) {
- js_pushlstring(J, head, tail - head);
- return;
- }
- if (js_try(J)) {
- js_free(J, p);
- js_throw(J);
- }
- p = js_malloc(J, UTFmax + (tail - head));
- /* substring starts with low surrogate (head is just after character) */
- if (i > a) {
- head_rune = 0xdc00 + ((head_rune - 0x10000) & 0x3ff);
- head_len = runetochar(p, &head_rune);
- memcpy(p + head_len, head, tail - head);
- js_pushlstring(J, p, head_len + (tail - head));
- }
- /* substring ends with high surrogate (tail is just after character) */
- if (k > n) {
- tail -= runelen(tail_rune);
- memcpy(p, head, tail - head);
- tail_rune = 0xd800 + ((tail_rune - 0x10000) >> 10);
- tail_len = runetochar(p + (tail - head), &tail_rune);
- js_pushlstring(J, p, (tail - head) + tail_len);
- }
- js_endtry(J);
- js_free(J, p);
- }
- static void Sp_slice(js_State *J)
- {
- const char *str = checkstring(J, 0);
- int len = js_utflen(str);
- int s = js_tointeger(J, 1);
- int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
- s = s < 0 ? s + len : s;
- e = e < 0 ? e + len : e;
- s = s < 0 ? 0 : s > len ? len : s;
- e = e < 0 ? 0 : e > len ? len : e;
- if (s < e)
- Sp_substring_imp(J, str, s, e - s);
- else
- Sp_substring_imp(J, str, e, s - e);
- }
- static void Sp_substring(js_State *J)
- {
- const char *str = checkstring(J, 0);
- int len = js_utflen(str);
- int s = js_tointeger(J, 1);
- int e = js_isdefined(J, 2) ? js_tointeger(J, 2) : len;
- s = s < 0 ? 0 : s > len ? len : s;
- e = e < 0 ? 0 : e > len ? len : e;
- if (s < e)
- Sp_substring_imp(J, str, s, e - s);
- else
- Sp_substring_imp(J, str, e, s - e);
- }
- static void Sp_toLowerCase(js_State *J)
- {
- const char *s = checkstring(J, 0);
- char * volatile dst = NULL;
- char *d;
- Rune rune;
- if (js_try(J)) {
- js_free(J, dst);
- js_throw(J);
- }
- d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
- while (*s) {
- s += chartorune(&rune, s);
- rune = tolowerrune(rune);
- d += runetochar(d, &rune);
- }
- *d = 0;
- js_pushstring(J, dst);
- js_endtry(J);
- js_free(J, dst);
- }
- static void Sp_toUpperCase(js_State *J)
- {
- const char *s = checkstring(J, 0);
- char * volatile dst = NULL;
- char *d;
- Rune rune;
- if (js_try(J)) {
- js_free(J, dst);
- js_throw(J);
- }
- d = dst = js_malloc(J, UTFmax * strlen(s) + 1);
- while (*s) {
- s += chartorune(&rune, s);
- rune = toupperrune(rune);
- d += runetochar(d, &rune);
- }
- *d = 0;
- js_pushstring(J, dst);
- js_endtry(J);
- js_free(J, dst);
- }
- static int istrim(int c)
- {
- return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF ||
- c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
- }
- static void Sp_trim(js_State *J)
- {
- const char *s, *e;
- s = checkstring(J, 0);
- while (istrim(*s))
- ++s;
- e = s + strlen(s);
- while (e > s && istrim(e[-1]))
- --e;
- js_pushlstring(J, s, e - s);
- }
- static void S_fromCharCode(js_State *J)
- {
- int i, top = js_gettop(J);
- char * volatile s = NULL;
- char *p;
- Rune c;
- if (js_try(J)) {
- js_free(J, s);
- js_throw(J);
- }
- s = p = js_malloc(J, (top-1) * UTFmax + 1);
- for (i = 1; i < top; ++i) {
- c = js_touint32(J, i);
- p += runetochar(p, &c);
- }
- *p = 0;
- js_pushstring(J, s);
- js_endtry(J);
- js_free(J, s);
- }
- static void Sp_match(js_State *J)
- {
- js_Regexp *re;
- const char *text;
- int len;
- const char *a, *b, *c, *e;
- Resub m;
- text = checkstring(J, 0);
- if (js_isregexp(J, 1))
- js_copy(J, 1);
- else if (js_isundefined(J, 1))
- js_newregexp(J, "", 0);
- else
- js_newregexp(J, js_tostring(J, 1), 0);
- re = js_toregexp(J, -1);
- if (!(re->flags & JS_REGEXP_G)) {
- js_RegExp_prototype_exec(J, re, text);
- return;
- }
- re->last = 0;
- js_newarray(J);
- len = 0;
- a = text;
- e = text + strlen(text);
- while (a <= e) {
- if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
- break;
- b = m.sub[0].sp;
- c = m.sub[0].ep;
- js_pushlstring(J, b, c - b);
- js_setindex(J, -2, len++);
- a = c;
- if (c - b == 0)
- ++a;
- }
- if (len == 0) {
- js_pop(J, 1);
- js_pushnull(J);
- }
- }
- static void Sp_search(js_State *J)
- {
- js_Regexp *re;
- const char *text;
- Resub m;
- text = checkstring(J, 0);
- if (js_isregexp(J, 1))
- js_copy(J, 1);
- else if (js_isundefined(J, 1))
- js_newregexp(J, "", 0);
- else
- js_newregexp(J, js_tostring(J, 1), 0);
- re = js_toregexp(J, -1);
- if (!js_doregexec(J, re->prog, text, &m, 0))
- js_pushnumber(J, js_utfptrtoidx(text, m.sub[0].sp));
- else
- js_pushnumber(J, -1);
- }
- static void Sp_replace_regexp(js_State *J)
- {
- js_Regexp *re;
- const char *source, *s, *r;
- js_Buffer *sb = NULL;
- int n, x;
- Resub m;
- source = checkstring(J, 0);
- re = js_toregexp(J, 1);
- if (js_doregexec(J, re->prog, source, &m, 0)) {
- js_copy(J, 0);
- return;
- }
- re->last = 0;
- loop:
- s = m.sub[0].sp;
- n = m.sub[0].ep - m.sub[0].sp;
- if (js_iscallable(J, 2)) {
- js_copy(J, 2);
- js_pushundefined(J);
- for (x = 0; m.sub[x].sp; ++x) /* arg 0..x: substring and subexps that matched */
- js_pushlstring(J, m.sub[x].sp, m.sub[x].ep - m.sub[x].sp);
- js_pushnumber(J, s - source); /* arg x+2: offset within search string */
- js_copy(J, 0); /* arg x+3: search string */
- js_call(J, 2 + x);
- r = js_tostring(J, -1);
- js_putm(J, &sb, source, s);
- js_puts(J, &sb, r);
- js_pop(J, 1);
- } else {
- r = js_tostring(J, 2);
- js_putm(J, &sb, source, s);
- while (*r) {
- if (*r == '$') {
- switch (*(++r)) {
- case 0: --r; /* end of string; back up */
- /* fallthrough */
- case '$': js_putc(J, &sb, '$'); break;
- case '`': js_putm(J, &sb, source, s); break;
- case '\'': js_puts(J, &sb, s + n); break;
- case '&':
- js_putm(J, &sb, s, s + n);
- break;
- case '0': case '1': case '2': case '3': case '4':
- case '5': case '6': case '7': case '8': case '9':
- x = *r - '0';
- if (r[1] >= '0' && r[1] <= '9')
- x = x * 10 + *(++r) - '0';
- if (x > 0 && x < m.nsub) {
- js_putm(J, &sb, m.sub[x].sp, m.sub[x].ep);
- } else {
- js_putc(J, &sb, '$');
- if (x > 10) {
- js_putc(J, &sb, '0' + x / 10);
- js_putc(J, &sb, '0' + x % 10);
- } else {
- js_putc(J, &sb, '0' + x);
- }
- }
- break;
- default:
- js_putc(J, &sb, '$');
- js_putc(J, &sb, *r);
- break;
- }
- ++r;
- } else {
- js_putc(J, &sb, *r++);
- }
- }
- }
- if (re->flags & JS_REGEXP_G) {
- source = m.sub[0].ep;
- if (n == 0) {
- if (*source)
- js_putc(J, &sb, *source++);
- else
- goto end;
- }
- if (!js_doregexec(J, re->prog, source, &m, REG_NOTBOL))
- goto loop;
- }
- end:
- js_puts(J, &sb, s + n);
- js_putc(J, &sb, 0);
- if (js_try(J)) {
- js_free(J, sb);
- js_throw(J);
- }
- js_pushstring(J, sb ? sb->s : "");
- js_endtry(J);
- js_free(J, sb);
- }
- static void Sp_replace_string(js_State *J)
- {
- const char *source, *needle, *s, *r;
- js_Buffer *sb = NULL;
- int n;
- source = checkstring(J, 0);
- needle = js_tostring(J, 1);
- s = strstr(source, needle);
- if (!s) {
- js_copy(J, 0);
- return;
- }
- n = strlen(needle);
- if (js_iscallable(J, 2)) {
- js_copy(J, 2);
- js_pushundefined(J);
- js_pushlstring(J, s, n); /* arg 1: substring that matched */
- js_pushnumber(J, s - source); /* arg 2: offset within search string */
- js_copy(J, 0); /* arg 3: search string */
- js_call(J, 3);
- r = js_tostring(J, -1);
- js_putm(J, &sb, source, s);
- js_puts(J, &sb, r);
- js_puts(J, &sb, s + n);
- js_putc(J, &sb, 0);
- js_pop(J, 1);
- } else {
- r = js_tostring(J, 2);
- js_putm(J, &sb, source, s);
- while (*r) {
- if (*r == '$') {
- switch (*(++r)) {
- case 0: --r; /* end of string; back up */
- /* fallthrough */
- case '$': js_putc(J, &sb, '$'); break;
- case '&': js_putm(J, &sb, s, s + n); break;
- case '`': js_putm(J, &sb, source, s); break;
- case '\'': js_puts(J, &sb, s + n); break;
- default: js_putc(J, &sb, '$'); js_putc(J, &sb, *r); break;
- }
- ++r;
- } else {
- js_putc(J, &sb, *r++);
- }
- }
- js_puts(J, &sb, s + n);
- js_putc(J, &sb, 0);
- }
- if (js_try(J)) {
- js_free(J, sb);
- js_throw(J);
- }
- js_pushstring(J, sb ? sb->s : "");
- js_endtry(J);
- js_free(J, sb);
- }
- static void Sp_replace(js_State *J)
- {
- if (js_isregexp(J, 1))
- Sp_replace_regexp(J);
- else
- Sp_replace_string(J);
- }
- static void Sp_split_regexp(js_State *J)
- {
- js_Regexp *re;
- const char *text;
- int limit, len, k;
- const char *p, *a, *b, *c, *e;
- Resub m;
- text = checkstring(J, 0);
- re = js_toregexp(J, 1);
- limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
- js_newarray(J);
- len = 0;
- if (limit == 0)
- return;
- e = text + strlen(text);
- /* splitting the empty string */
- if (e == text) {
- if (js_doregexec(J, re->prog, text, &m, 0)) {
- js_pushliteral(J, "");
- js_setindex(J, -2, 0);
- }
- return;
- }
- p = a = text;
- while (a < e) {
- if (js_doregexec(J, re->prog, a, &m, a > text ? REG_NOTBOL : 0))
- break; /* no match */
- b = m.sub[0].sp;
- c = m.sub[0].ep;
- /* empty string at end of last match */
- if (b == c && b == p) {
- ++a;
- continue;
- }
- if (len == limit) return;
- js_pushlstring(J, p, b - p);
- js_setindex(J, -2, len++);
- for (k = 1; k < m.nsub; ++k) {
- if (len == limit) return;
- js_pushlstring(J, m.sub[k].sp, m.sub[k].ep - m.sub[k].sp);
- js_setindex(J, -2, len++);
- }
- a = p = c;
- }
- if (len == limit) return;
- js_pushstring(J, p);
- js_setindex(J, -2, len);
- }
- static void Sp_split_string(js_State *J)
- {
- const char *str = checkstring(J, 0);
- const char *sep = js_tostring(J, 1);
- int limit = js_isdefined(J, 2) ? js_tointeger(J, 2) : 1 << 30;
- int i, n;
- js_newarray(J);
- if (limit == 0)
- return;
- n = strlen(sep);
- /* empty string */
- if (n == 0) {
- Rune rune;
- for (i = 0; *str && i < limit; ++i) {
- n = chartorune(&rune, str);
- js_pushlstring(J, str, n);
- js_setindex(J, -2, i);
- str += n;
- }
- return;
- }
- for (i = 0; str && i < limit; ++i) {
- const char *s = strstr(str, sep);
- if (s) {
- js_pushlstring(J, str, s-str);
- js_setindex(J, -2, i);
- str = s + n;
- } else {
- js_pushstring(J, str);
- js_setindex(J, -2, i);
- str = NULL;
- }
- }
- }
- static void Sp_split(js_State *J)
- {
- if (js_isundefined(J, 1)) {
- js_newarray(J);
- js_pushstring(J, js_tostring(J, 0));
- js_setindex(J, -2, 0);
- } else if (js_isregexp(J, 1)) {
- Sp_split_regexp(J);
- } else {
- Sp_split_string(J);
- }
- }
- void jsB_initstring(js_State *J)
- {
- J->String_prototype->u.s.shrstr[0] = 0;
- J->String_prototype->u.s.string = J->String_prototype->u.s.shrstr;
- J->String_prototype->u.s.length = 0;
- js_pushobject(J, J->String_prototype);
- {
- jsB_propf(J, "String.prototype.toString", Sp_toString, 0);
- jsB_propf(J, "String.prototype.valueOf", Sp_valueOf, 0);
- jsB_propf(J, "String.prototype.charAt", Sp_charAt, 1);
- jsB_propf(J, "String.prototype.charCodeAt", Sp_charCodeAt, 1);
- jsB_propf(J, "String.prototype.concat", Sp_concat, 0); /* 1 */
- jsB_propf(J, "String.prototype.indexOf", Sp_indexOf, 1);
- jsB_propf(J, "String.prototype.lastIndexOf", Sp_lastIndexOf, 1);
- jsB_propf(J, "String.prototype.localeCompare", Sp_localeCompare, 1);
- jsB_propf(J, "String.prototype.match", Sp_match, 1);
- jsB_propf(J, "String.prototype.replace", Sp_replace, 2);
- jsB_propf(J, "String.prototype.search", Sp_search, 1);
- jsB_propf(J, "String.prototype.slice", Sp_slice, 2);
- jsB_propf(J, "String.prototype.split", Sp_split, 2);
- jsB_propf(J, "String.prototype.substring", Sp_substring, 2);
- jsB_propf(J, "String.prototype.toLowerCase", Sp_toLowerCase, 0);
- jsB_propf(J, "String.prototype.toLocaleLowerCase", Sp_toLowerCase, 0);
- jsB_propf(J, "String.prototype.toUpperCase", Sp_toUpperCase, 0);
- jsB_propf(J, "String.prototype.toLocaleUpperCase", Sp_toUpperCase, 0);
- /* ES5 */
- jsB_propf(J, "String.prototype.trim", Sp_trim, 0);
- }
- js_newcconstructor(J, jsB_String, jsB_new_String, "String", 0); /* 1 */
- {
- jsB_propf(J, "String.fromCharCode", S_fromCharCode, 0); /* 1 */
- }
- js_defglobal(J, "String", JS_DONTENUM);
- }
|