jslex.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878
  1. #include "jsi.h"
  2. #include "utf.h"
  3. JS_NORETURN static void jsY_error(js_State *J, const char *fmt, ...) JS_PRINTFLIKE(2,3);
  4. static void jsY_error(js_State *J, const char *fmt, ...)
  5. {
  6. va_list ap;
  7. char buf[512];
  8. char msgbuf[256];
  9. va_start(ap, fmt);
  10. vsnprintf(msgbuf, 256, fmt, ap);
  11. va_end(ap);
  12. snprintf(buf, 256, "%s:%d: ", J->filename, J->lexline);
  13. strcat(buf, msgbuf);
  14. js_newsyntaxerror(J, buf);
  15. js_throw(J);
  16. }
  17. static const char *tokenstring[] = {
  18. "(end-of-file)",
  19. "'\\x01'", "'\\x02'", "'\\x03'", "'\\x04'", "'\\x05'", "'\\x06'", "'\\x07'",
  20. "'\\x08'", "'\\x09'", "'\\x0A'", "'\\x0B'", "'\\x0C'", "'\\x0D'", "'\\x0E'", "'\\x0F'",
  21. "'\\x10'", "'\\x11'", "'\\x12'", "'\\x13'", "'\\x14'", "'\\x15'", "'\\x16'", "'\\x17'",
  22. "'\\x18'", "'\\x19'", "'\\x1A'", "'\\x1B'", "'\\x1C'", "'\\x1D'", "'\\x1E'", "'\\x1F'",
  23. "' '", "'!'", "'\"'", "'#'", "'$'", "'%'", "'&'", "'\\''",
  24. "'('", "')'", "'*'", "'+'", "','", "'-'", "'.'", "'/'",
  25. "'0'", "'1'", "'2'", "'3'", "'4'", "'5'", "'6'", "'7'",
  26. "'8'", "'9'", "':'", "';'", "'<'", "'='", "'>'", "'?'",
  27. "'@'", "'A'", "'B'", "'C'", "'D'", "'E'", "'F'", "'G'",
  28. "'H'", "'I'", "'J'", "'K'", "'L'", "'M'", "'N'", "'O'",
  29. "'P'", "'Q'", "'R'", "'S'", "'T'", "'U'", "'V'", "'W'",
  30. "'X'", "'Y'", "'Z'", "'['", "'\'", "']'", "'^'", "'_'",
  31. "'`'", "'a'", "'b'", "'c'", "'d'", "'e'", "'f'", "'g'",
  32. "'h'", "'i'", "'j'", "'k'", "'l'", "'m'", "'n'", "'o'",
  33. "'p'", "'q'", "'r'", "'s'", "'t'", "'u'", "'v'", "'w'",
  34. "'x'", "'y'", "'z'", "'{'", "'|'", "'}'", "'~'", "'\\x7F'",
  35. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  36. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  37. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  38. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  39. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  40. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  41. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  42. 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0,
  43. "(identifier)", "(number)", "(string)", "(regexp)",
  44. "'<='", "'>='", "'=='", "'!='", "'==='", "'!=='",
  45. "'<<'", "'>>'", "'>>>'", "'&&'", "'||'",
  46. "'+='", "'-='", "'*='", "'/='", "'%='",
  47. "'<<='", "'>>='", "'>>>='", "'&='", "'|='", "'^='",
  48. "'++'", "'--'",
  49. "'break'", "'case'", "'catch'", "'continue'", "'debugger'",
  50. "'default'", "'delete'", "'do'", "'else'", "'false'", "'finally'", "'for'",
  51. "'function'", "'if'", "'in'", "'instanceof'", "'new'", "'null'", "'return'",
  52. "'switch'", "'this'", "'throw'", "'true'", "'try'", "'typeof'", "'var'",
  53. "'void'", "'while'", "'with'",
  54. };
  55. const char *jsY_tokenstring(int token)
  56. {
  57. if (token >= 0 && token < (int)nelem(tokenstring))
  58. if (tokenstring[token])
  59. return tokenstring[token];
  60. return "<unknown>";
  61. }
  62. static const char *keywords[] = {
  63. "break", "case", "catch", "continue", "debugger", "default", "delete",
  64. "do", "else", "false", "finally", "for", "function", "if", "in",
  65. "instanceof", "new", "null", "return", "switch", "this", "throw",
  66. "true", "try", "typeof", "var", "void", "while", "with",
  67. };
  68. int jsY_findword(const char *s, const char **list, int num)
  69. {
  70. int l = 0;
  71. int r = num - 1;
  72. while (l <= r) {
  73. int m = (l + r) >> 1;
  74. int c = strcmp(s, list[m]);
  75. if (c < 0)
  76. r = m - 1;
  77. else if (c > 0)
  78. l = m + 1;
  79. else
  80. return m;
  81. }
  82. return -1;
  83. }
  84. static int jsY_findkeyword(js_State *J, const char *s)
  85. {
  86. int i = jsY_findword(s, keywords, nelem(keywords));
  87. if (i >= 0) {
  88. J->text = keywords[i];
  89. return TK_BREAK + i; /* first keyword + i */
  90. }
  91. J->text = js_intern(J, s);
  92. return TK_IDENTIFIER;
  93. }
  94. int jsY_iswhite(int c)
  95. {
  96. return c == 0x9 || c == 0xB || c == 0xC || c == 0x20 || c == 0xA0 || c == 0xFEFF;
  97. }
  98. int jsY_isnewline(int c)
  99. {
  100. return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
  101. }
  102. #ifndef isalpha
  103. #define isalpha(c) ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
  104. #endif
  105. #ifndef isdigit
  106. #define isdigit(c) (c >= '0' && c <= '9')
  107. #endif
  108. #ifndef ishex
  109. #define ishex(c) ((c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))
  110. #endif
  111. static int jsY_isidentifierstart(int c)
  112. {
  113. return isalpha(c) || c == '$' || c == '_' || isalpharune(c);
  114. }
  115. static int jsY_isidentifierpart(int c)
  116. {
  117. return isdigit(c) || isalpha(c) || c == '$' || c == '_' || isalpharune(c);
  118. }
  119. static int jsY_isdec(int c)
  120. {
  121. return isdigit(c);
  122. }
  123. int jsY_ishex(int c)
  124. {
  125. return isdigit(c) || ishex(c);
  126. }
  127. int jsY_tohex(int c)
  128. {
  129. if (c >= '0' && c <= '9') return c - '0';
  130. if (c >= 'a' && c <= 'f') return c - 'a' + 0xA;
  131. if (c >= 'A' && c <= 'F') return c - 'A' + 0xA;
  132. return 0;
  133. }
  134. static void jsY_next(js_State *J)
  135. {
  136. Rune c;
  137. if (*J->source == 0) {
  138. J->lexchar = EOF;
  139. return;
  140. }
  141. J->source += chartorune(&c, J->source);
  142. /* consume CR LF as one unit */
  143. if (c == '\r' && *J->source == '\n')
  144. ++J->source;
  145. if (jsY_isnewline(c)) {
  146. J->line++;
  147. c = '\n';
  148. }
  149. J->lexchar = c;
  150. }
  151. #define jsY_accept(J, x) (J->lexchar == x ? (jsY_next(J), 1) : 0)
  152. #define jsY_expect(J, x) if (!jsY_accept(J, x)) jsY_error(J, "expected '%c'", x)
  153. static void jsY_unescape(js_State *J)
  154. {
  155. if (jsY_accept(J, '\\')) {
  156. if (jsY_accept(J, 'u')) {
  157. int x = 0;
  158. if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 12; jsY_next(J);
  159. if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 8; jsY_next(J);
  160. if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar) << 4; jsY_next(J);
  161. if (!jsY_ishex(J->lexchar)) { goto error; } x |= jsY_tohex(J->lexchar);
  162. J->lexchar = x;
  163. return;
  164. }
  165. error:
  166. jsY_error(J, "unexpected escape sequence");
  167. }
  168. }
  169. static void textinit(js_State *J)
  170. {
  171. if (!J->lexbuf.text) {
  172. J->lexbuf.cap = 4096;
  173. J->lexbuf.text = js_malloc(J, J->lexbuf.cap);
  174. }
  175. J->lexbuf.len = 0;
  176. }
  177. static void textpush(js_State *J, Rune c)
  178. {
  179. int n;
  180. if (c == EOF)
  181. n = 1;
  182. else
  183. n = runelen(c);
  184. if (J->lexbuf.len + n > J->lexbuf.cap) {
  185. J->lexbuf.cap = J->lexbuf.cap * 2;
  186. J->lexbuf.text = js_realloc(J, J->lexbuf.text, J->lexbuf.cap);
  187. }
  188. if (c == EOF)
  189. J->lexbuf.text[J->lexbuf.len++] = 0;
  190. else
  191. J->lexbuf.len += runetochar(J->lexbuf.text + J->lexbuf.len, &c);
  192. }
  193. static char *textend(js_State *J)
  194. {
  195. textpush(J, EOF);
  196. return J->lexbuf.text;
  197. }
  198. static void lexlinecomment(js_State *J)
  199. {
  200. while (J->lexchar != EOF && J->lexchar != '\n')
  201. jsY_next(J);
  202. }
  203. static int lexcomment(js_State *J)
  204. {
  205. /* already consumed initial '/' '*' sequence */
  206. while (J->lexchar != EOF) {
  207. if (jsY_accept(J, '*')) {
  208. while (J->lexchar == '*')
  209. jsY_next(J);
  210. if (jsY_accept(J, '/'))
  211. return 0;
  212. }
  213. else
  214. jsY_next(J);
  215. }
  216. return -1;
  217. }
  218. static double lexhex(js_State *J)
  219. {
  220. double n = 0;
  221. if (!jsY_ishex(J->lexchar))
  222. jsY_error(J, "malformed hexadecimal number");
  223. while (jsY_ishex(J->lexchar)) {
  224. n = n * 16 + jsY_tohex(J->lexchar);
  225. jsY_next(J);
  226. }
  227. return n;
  228. }
  229. #if 0
  230. static double lexinteger(js_State *J)
  231. {
  232. double n = 0;
  233. if (!jsY_isdec(J->lexchar))
  234. jsY_error(J, "malformed number");
  235. while (jsY_isdec(J->lexchar)) {
  236. n = n * 10 + (J->lexchar - '0');
  237. jsY_next(J);
  238. }
  239. return n;
  240. }
  241. static double lexfraction(js_State *J)
  242. {
  243. double n = 0;
  244. double d = 1;
  245. while (jsY_isdec(J->lexchar)) {
  246. n = n * 10 + (J->lexchar - '0');
  247. d = d * 10;
  248. jsY_next(J);
  249. }
  250. return n / d;
  251. }
  252. static double lexexponent(js_State *J)
  253. {
  254. double sign;
  255. if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
  256. if (jsY_accept(J, '-')) sign = -1;
  257. else if (jsY_accept(J, '+')) sign = 1;
  258. else sign = 1;
  259. return sign * lexinteger(J);
  260. }
  261. return 0;
  262. }
  263. static int lexnumber(js_State *J)
  264. {
  265. double n;
  266. double e;
  267. if (jsY_accept(J, '0')) {
  268. if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
  269. J->number = lexhex(J);
  270. return TK_NUMBER;
  271. }
  272. if (jsY_isdec(J->lexchar))
  273. jsY_error(J, "number with leading zero");
  274. n = 0;
  275. if (jsY_accept(J, '.'))
  276. n += lexfraction(J);
  277. } else if (jsY_accept(J, '.')) {
  278. if (!jsY_isdec(J->lexchar))
  279. return '.';
  280. n = lexfraction(J);
  281. } else {
  282. n = lexinteger(J);
  283. if (jsY_accept(J, '.'))
  284. n += lexfraction(J);
  285. }
  286. e = lexexponent(J);
  287. if (e < 0)
  288. n /= pow(10, -e);
  289. else if (e > 0)
  290. n *= pow(10, e);
  291. if (jsY_isidentifierstart(J->lexchar))
  292. jsY_error(J, "number with letter suffix");
  293. J->number = n;
  294. return TK_NUMBER;
  295. }
  296. #else
  297. static int lexnumber(js_State *J)
  298. {
  299. const char *s = J->source - 1;
  300. if (jsY_accept(J, '0')) {
  301. if (jsY_accept(J, 'x') || jsY_accept(J, 'X')) {
  302. J->number = lexhex(J);
  303. return TK_NUMBER;
  304. }
  305. if (jsY_isdec(J->lexchar))
  306. jsY_error(J, "number with leading zero");
  307. if (jsY_accept(J, '.')) {
  308. while (jsY_isdec(J->lexchar))
  309. jsY_next(J);
  310. }
  311. } else if (jsY_accept(J, '.')) {
  312. if (!jsY_isdec(J->lexchar))
  313. return '.';
  314. while (jsY_isdec(J->lexchar))
  315. jsY_next(J);
  316. } else {
  317. while (jsY_isdec(J->lexchar))
  318. jsY_next(J);
  319. if (jsY_accept(J, '.')) {
  320. while (jsY_isdec(J->lexchar))
  321. jsY_next(J);
  322. }
  323. }
  324. if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
  325. if (J->lexchar == '-' || J->lexchar == '+')
  326. jsY_next(J);
  327. if (jsY_isdec(J->lexchar))
  328. while (jsY_isdec(J->lexchar))
  329. jsY_next(J);
  330. else
  331. jsY_error(J, "missing exponent");
  332. }
  333. if (jsY_isidentifierstart(J->lexchar))
  334. jsY_error(J, "number with letter suffix");
  335. J->number = js_strtod(s, NULL);
  336. return TK_NUMBER;
  337. }
  338. #endif
  339. static int lexescape(js_State *J)
  340. {
  341. int x = 0;
  342. /* already consumed '\' */
  343. if (jsY_accept(J, '\n'))
  344. return 0;
  345. switch (J->lexchar) {
  346. case EOF: jsY_error(J, "unterminated escape sequence");
  347. case 'u':
  348. jsY_next(J);
  349. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
  350. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
  351. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
  352. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
  353. textpush(J, x);
  354. break;
  355. case 'x':
  356. jsY_next(J);
  357. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
  358. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
  359. textpush(J, x);
  360. break;
  361. case '0': textpush(J, 0); jsY_next(J); break;
  362. case '\\': textpush(J, '\\'); jsY_next(J); break;
  363. case '\'': textpush(J, '\''); jsY_next(J); break;
  364. case '"': textpush(J, '"'); jsY_next(J); break;
  365. case 'b': textpush(J, '\b'); jsY_next(J); break;
  366. case 'f': textpush(J, '\f'); jsY_next(J); break;
  367. case 'n': textpush(J, '\n'); jsY_next(J); break;
  368. case 'r': textpush(J, '\r'); jsY_next(J); break;
  369. case 't': textpush(J, '\t'); jsY_next(J); break;
  370. case 'v': textpush(J, '\v'); jsY_next(J); break;
  371. default: textpush(J, J->lexchar); jsY_next(J); break;
  372. }
  373. return 0;
  374. }
  375. static int lexstring(js_State *J)
  376. {
  377. const char *s;
  378. int q = J->lexchar;
  379. jsY_next(J);
  380. textinit(J);
  381. while (J->lexchar != q) {
  382. if (J->lexchar == EOF || J->lexchar == '\n')
  383. jsY_error(J, "string not terminated");
  384. if (jsY_accept(J, '\\')) {
  385. if (lexescape(J))
  386. jsY_error(J, "malformed escape sequence");
  387. } else {
  388. textpush(J, J->lexchar);
  389. jsY_next(J);
  390. }
  391. }
  392. jsY_expect(J, q);
  393. s = textend(J);
  394. J->text = js_intern(J, s);
  395. return TK_STRING;
  396. }
  397. /* the ugliest language wart ever... */
  398. static int isregexpcontext(int last)
  399. {
  400. switch (last) {
  401. case ']':
  402. case ')':
  403. case '}':
  404. case TK_IDENTIFIER:
  405. case TK_NUMBER:
  406. case TK_STRING:
  407. case TK_FALSE:
  408. case TK_NULL:
  409. case TK_THIS:
  410. case TK_TRUE:
  411. return 0;
  412. default:
  413. return 1;
  414. }
  415. }
  416. static int lexregexp(js_State *J)
  417. {
  418. const char *s;
  419. int g, m, i;
  420. int inclass = 0;
  421. /* already consumed initial '/' */
  422. textinit(J);
  423. /* regexp body */
  424. while (J->lexchar != '/' || inclass) {
  425. if (J->lexchar == EOF || J->lexchar == '\n') {
  426. jsY_error(J, "regular expression not terminated");
  427. } else if (jsY_accept(J, '\\')) {
  428. if (jsY_accept(J, '/')) {
  429. textpush(J, '/');
  430. } else {
  431. textpush(J, '\\');
  432. if (J->lexchar == EOF || J->lexchar == '\n')
  433. jsY_error(J, "regular expression not terminated");
  434. textpush(J, J->lexchar);
  435. jsY_next(J);
  436. }
  437. } else {
  438. if (J->lexchar == '[' && !inclass)
  439. inclass = 1;
  440. if (J->lexchar == ']' && inclass)
  441. inclass = 0;
  442. textpush(J, J->lexchar);
  443. jsY_next(J);
  444. }
  445. }
  446. jsY_expect(J, '/');
  447. s = textend(J);
  448. /* regexp flags */
  449. g = i = m = 0;
  450. while (jsY_isidentifierpart(J->lexchar)) {
  451. if (jsY_accept(J, 'g')) ++g;
  452. else if (jsY_accept(J, 'i')) ++i;
  453. else if (jsY_accept(J, 'm')) ++m;
  454. else jsY_error(J, "illegal flag in regular expression: %c", J->lexchar);
  455. }
  456. if (g > 1 || i > 1 || m > 1)
  457. jsY_error(J, "duplicated flag in regular expression");
  458. J->text = js_intern(J, s);
  459. J->number = 0;
  460. if (g) J->number += JS_REGEXP_G;
  461. if (i) J->number += JS_REGEXP_I;
  462. if (m) J->number += JS_REGEXP_M;
  463. return TK_REGEXP;
  464. }
  465. /* simple "return [no Line Terminator here] ..." contexts */
  466. static int isnlthcontext(int last)
  467. {
  468. switch (last) {
  469. case TK_BREAK:
  470. case TK_CONTINUE:
  471. case TK_RETURN:
  472. case TK_THROW:
  473. return 1;
  474. default:
  475. return 0;
  476. }
  477. }
  478. static int jsY_lexx(js_State *J)
  479. {
  480. J->newline = 0;
  481. while (1) {
  482. J->lexline = J->line; /* save location of beginning of token */
  483. while (jsY_iswhite(J->lexchar))
  484. jsY_next(J);
  485. if (jsY_accept(J, '\n')) {
  486. J->newline = 1;
  487. if (isnlthcontext(J->lasttoken))
  488. return ';';
  489. continue;
  490. }
  491. if (jsY_accept(J, '/')) {
  492. if (jsY_accept(J, '/')) {
  493. lexlinecomment(J);
  494. continue;
  495. } else if (jsY_accept(J, '*')) {
  496. if (lexcomment(J))
  497. jsY_error(J, "multi-line comment not terminated");
  498. continue;
  499. } else if (isregexpcontext(J->lasttoken)) {
  500. return lexregexp(J);
  501. } else if (jsY_accept(J, '=')) {
  502. return TK_DIV_ASS;
  503. } else {
  504. return '/';
  505. }
  506. }
  507. if (J->lexchar >= '0' && J->lexchar <= '9') {
  508. return lexnumber(J);
  509. }
  510. switch (J->lexchar) {
  511. case '(': jsY_next(J); return '(';
  512. case ')': jsY_next(J); return ')';
  513. case ',': jsY_next(J); return ',';
  514. case ':': jsY_next(J); return ':';
  515. case ';': jsY_next(J); return ';';
  516. case '?': jsY_next(J); return '?';
  517. case '[': jsY_next(J); return '[';
  518. case ']': jsY_next(J); return ']';
  519. case '{': jsY_next(J); return '{';
  520. case '}': jsY_next(J); return '}';
  521. case '~': jsY_next(J); return '~';
  522. case '\'':
  523. case '"':
  524. return lexstring(J);
  525. case '.':
  526. return lexnumber(J);
  527. case '<':
  528. jsY_next(J);
  529. if (jsY_accept(J, '<')) {
  530. if (jsY_accept(J, '='))
  531. return TK_SHL_ASS;
  532. return TK_SHL;
  533. }
  534. if (jsY_accept(J, '='))
  535. return TK_LE;
  536. return '<';
  537. case '>':
  538. jsY_next(J);
  539. if (jsY_accept(J, '>')) {
  540. if (jsY_accept(J, '>')) {
  541. if (jsY_accept(J, '='))
  542. return TK_USHR_ASS;
  543. return TK_USHR;
  544. }
  545. if (jsY_accept(J, '='))
  546. return TK_SHR_ASS;
  547. return TK_SHR;
  548. }
  549. if (jsY_accept(J, '='))
  550. return TK_GE;
  551. return '>';
  552. case '=':
  553. jsY_next(J);
  554. if (jsY_accept(J, '=')) {
  555. if (jsY_accept(J, '='))
  556. return TK_STRICTEQ;
  557. return TK_EQ;
  558. }
  559. return '=';
  560. case '!':
  561. jsY_next(J);
  562. if (jsY_accept(J, '=')) {
  563. if (jsY_accept(J, '='))
  564. return TK_STRICTNE;
  565. return TK_NE;
  566. }
  567. return '!';
  568. case '+':
  569. jsY_next(J);
  570. if (jsY_accept(J, '+'))
  571. return TK_INC;
  572. if (jsY_accept(J, '='))
  573. return TK_ADD_ASS;
  574. return '+';
  575. case '-':
  576. jsY_next(J);
  577. if (jsY_accept(J, '-'))
  578. return TK_DEC;
  579. if (jsY_accept(J, '='))
  580. return TK_SUB_ASS;
  581. return '-';
  582. case '*':
  583. jsY_next(J);
  584. if (jsY_accept(J, '='))
  585. return TK_MUL_ASS;
  586. return '*';
  587. case '%':
  588. jsY_next(J);
  589. if (jsY_accept(J, '='))
  590. return TK_MOD_ASS;
  591. return '%';
  592. case '&':
  593. jsY_next(J);
  594. if (jsY_accept(J, '&'))
  595. return TK_AND;
  596. if (jsY_accept(J, '='))
  597. return TK_AND_ASS;
  598. return '&';
  599. case '|':
  600. jsY_next(J);
  601. if (jsY_accept(J, '|'))
  602. return TK_OR;
  603. if (jsY_accept(J, '='))
  604. return TK_OR_ASS;
  605. return '|';
  606. case '^':
  607. jsY_next(J);
  608. if (jsY_accept(J, '='))
  609. return TK_XOR_ASS;
  610. return '^';
  611. case EOF:
  612. return 0; /* EOF */
  613. }
  614. /* Handle \uXXXX escapes in identifiers */
  615. jsY_unescape(J);
  616. if (jsY_isidentifierstart(J->lexchar)) {
  617. textinit(J);
  618. textpush(J, J->lexchar);
  619. jsY_next(J);
  620. jsY_unescape(J);
  621. while (jsY_isidentifierpart(J->lexchar)) {
  622. textpush(J, J->lexchar);
  623. jsY_next(J);
  624. jsY_unescape(J);
  625. }
  626. textend(J);
  627. return jsY_findkeyword(J, J->lexbuf.text);
  628. }
  629. if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
  630. jsY_error(J, "unexpected character: '%c'", J->lexchar);
  631. jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
  632. }
  633. }
  634. void jsY_initlex(js_State *J, const char *filename, const char *source)
  635. {
  636. J->filename = filename;
  637. J->source = source;
  638. J->line = 1;
  639. J->lasttoken = 0;
  640. jsY_next(J); /* load first lookahead character */
  641. }
  642. int jsY_lex(js_State *J)
  643. {
  644. return J->lasttoken = jsY_lexx(J);
  645. }
  646. static int lexjsonnumber(js_State *J)
  647. {
  648. const char *s = J->source - 1;
  649. if (J->lexchar == '-')
  650. jsY_next(J);
  651. if (J->lexchar == '0')
  652. jsY_next(J);
  653. else if (J->lexchar >= '1' && J->lexchar <= '9')
  654. while (isdigit(J->lexchar))
  655. jsY_next(J);
  656. else
  657. jsY_error(J, "unexpected non-digit");
  658. if (jsY_accept(J, '.')) {
  659. if (isdigit(J->lexchar))
  660. while (isdigit(J->lexchar))
  661. jsY_next(J);
  662. else
  663. jsY_error(J, "missing digits after decimal point");
  664. }
  665. if (jsY_accept(J, 'e') || jsY_accept(J, 'E')) {
  666. if (J->lexchar == '-' || J->lexchar == '+')
  667. jsY_next(J);
  668. if (isdigit(J->lexchar))
  669. while (isdigit(J->lexchar))
  670. jsY_next(J);
  671. else
  672. jsY_error(J, "missing digits after exponent indicator");
  673. }
  674. J->number = js_strtod(s, NULL);
  675. return TK_NUMBER;
  676. }
  677. static int lexjsonescape(js_State *J)
  678. {
  679. int x = 0;
  680. /* already consumed '\' */
  681. switch (J->lexchar) {
  682. default: jsY_error(J, "invalid escape sequence");
  683. case 'u':
  684. jsY_next(J);
  685. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 12; jsY_next(J); }
  686. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 8; jsY_next(J); }
  687. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar) << 4; jsY_next(J); }
  688. if (!jsY_ishex(J->lexchar)) return 1; else { x |= jsY_tohex(J->lexchar); jsY_next(J); }
  689. textpush(J, x);
  690. break;
  691. case '"': textpush(J, '"'); jsY_next(J); break;
  692. case '\\': textpush(J, '\\'); jsY_next(J); break;
  693. case '/': textpush(J, '/'); jsY_next(J); break;
  694. case 'b': textpush(J, '\b'); jsY_next(J); break;
  695. case 'f': textpush(J, '\f'); jsY_next(J); break;
  696. case 'n': textpush(J, '\n'); jsY_next(J); break;
  697. case 'r': textpush(J, '\r'); jsY_next(J); break;
  698. case 't': textpush(J, '\t'); jsY_next(J); break;
  699. }
  700. return 0;
  701. }
  702. static int lexjsonstring(js_State *J)
  703. {
  704. const char *s;
  705. textinit(J);
  706. while (J->lexchar != '"') {
  707. if (J->lexchar == EOF)
  708. jsY_error(J, "unterminated string");
  709. else if (J->lexchar < 32)
  710. jsY_error(J, "invalid control character in string");
  711. else if (jsY_accept(J, '\\'))
  712. lexjsonescape(J);
  713. else {
  714. textpush(J, J->lexchar);
  715. jsY_next(J);
  716. }
  717. }
  718. jsY_expect(J, '"');
  719. s = textend(J);
  720. J->text = js_intern(J, s);
  721. return TK_STRING;
  722. }
  723. int jsY_lexjson(js_State *J)
  724. {
  725. while (1) {
  726. J->lexline = J->line; /* save location of beginning of token */
  727. while (jsY_iswhite(J->lexchar) || J->lexchar == '\n')
  728. jsY_next(J);
  729. if ((J->lexchar >= '0' && J->lexchar <= '9') || J->lexchar == '-')
  730. return lexjsonnumber(J);
  731. switch (J->lexchar) {
  732. case ',': jsY_next(J); return ',';
  733. case ':': jsY_next(J); return ':';
  734. case '[': jsY_next(J); return '[';
  735. case ']': jsY_next(J); return ']';
  736. case '{': jsY_next(J); return '{';
  737. case '}': jsY_next(J); return '}';
  738. case '"':
  739. jsY_next(J);
  740. return lexjsonstring(J);
  741. case 'f':
  742. jsY_next(J); jsY_expect(J, 'a'); jsY_expect(J, 'l'); jsY_expect(J, 's'); jsY_expect(J, 'e');
  743. return TK_FALSE;
  744. case 'n':
  745. jsY_next(J); jsY_expect(J, 'u'); jsY_expect(J, 'l'); jsY_expect(J, 'l');
  746. return TK_NULL;
  747. case 't':
  748. jsY_next(J); jsY_expect(J, 'r'); jsY_expect(J, 'u'); jsY_expect(J, 'e');
  749. return TK_TRUE;
  750. case EOF:
  751. return 0; /* EOF */
  752. }
  753. if (J->lexchar >= 0x20 && J->lexchar <= 0x7E)
  754. jsY_error(J, "unexpected character: '%c'", J->lexchar);
  755. jsY_error(J, "unexpected character: \\u%04X", J->lexchar);
  756. }
  757. }