css-parse.c 22 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118
  1. // Copyright (C) 2004-2025 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include "html-imp.h"
  24. #include <string.h>
  25. #include "css-properties.h"
  26. struct lexbuf
  27. {
  28. fz_context *ctx;
  29. fz_pool *pool;
  30. const unsigned char *start;
  31. const unsigned char *s;
  32. const char *file;
  33. int line;
  34. int lookahead;
  35. int c;
  36. int string_len;
  37. char string[1024];
  38. };
  39. static fz_css_value *parse_expr(struct lexbuf *buf);
  40. static fz_css_selector *parse_selector(struct lexbuf *buf);
  41. FZ_NORETURN static void fz_css_error(struct lexbuf *buf, const char *msg)
  42. {
  43. #define PRE_POST_SIZE 30
  44. unsigned char text[PRE_POST_SIZE * 2 + 4];
  45. unsigned char *d = text;
  46. const unsigned char *s = buf->start;
  47. int n;
  48. /* We want to make a helpful fragment for the error message.
  49. * We want err_pos to be the point at which we just tripped
  50. * the error. err_pos needs to be at least 1 byte behind
  51. * our read pointer, as we've read that char. */
  52. const unsigned char *err_pos = buf->s;
  53. n = 1;
  54. /* And if we're using lookahead, it's further behind. */
  55. if (buf->lookahead >= CSS_KEYWORD)
  56. n += buf->string_len;
  57. else if (buf->lookahead != EOF)
  58. n += 1;
  59. /* But it can't be before the start of the buffer */
  60. n = fz_mini(n, err_pos - buf->start);
  61. err_pos -= n;
  62. /* We're going to try to output:
  63. * <section prior to the error> ">" <the char that tripped> "<" <section after the error>
  64. */
  65. /* Is the section prior to the error too long? If so, truncate it with an ellipsis. */
  66. n = sizeof(text)-1;
  67. if (err_pos - s > n-PRE_POST_SIZE - 3)
  68. {
  69. *d++ = '.';
  70. *d++ = '.';
  71. *d++ = '.';
  72. n -= 3;
  73. s = err_pos - (n-PRE_POST_SIZE - 3);
  74. }
  75. /* Copy the prefix (if there is one) */
  76. if (err_pos > s)
  77. {
  78. n = err_pos - s;
  79. while (n)
  80. {
  81. unsigned char c = *s++;
  82. *d++ = (c < 32 || c > 127) ? ' ' : c;
  83. n--;
  84. }
  85. }
  86. /* Marker, char, end marker */
  87. *d++ = '>', n--;
  88. if (*err_pos)
  89. *d++ = *err_pos++, n--;
  90. *d++ = '<', n--;
  91. /* Postfix */
  92. n = (int)strlen((const char *)err_pos);
  93. if (n <= PRE_POST_SIZE)
  94. {
  95. while (n > 0)
  96. {
  97. unsigned char c = *err_pos++;
  98. *d++ = (c < 32 || c > 127) ? ' ' : c;
  99. n--;
  100. }
  101. }
  102. else
  103. {
  104. for (n = PRE_POST_SIZE-3; n > 0; n--)
  105. {
  106. unsigned char c = *err_pos++;
  107. *d++ = (c < 32 || c > 127) ? ' ' : c;
  108. }
  109. *d++ = '.';
  110. *d++ = '.';
  111. *d++ = '.';
  112. }
  113. *d = 0;
  114. fz_throw(buf->ctx, FZ_ERROR_SYNTAX, "css syntax error: %s (%s:%d) (%s)", msg, buf->file, buf->line, text);
  115. }
  116. fz_css *fz_new_css(fz_context *ctx)
  117. {
  118. fz_pool *pool = fz_new_pool(ctx);
  119. fz_css *css = NULL;
  120. fz_try(ctx)
  121. {
  122. css = fz_pool_alloc(ctx, pool, sizeof *css);
  123. css->pool = pool;
  124. css->rule = NULL;
  125. }
  126. fz_catch(ctx)
  127. {
  128. fz_drop_pool(ctx, pool);
  129. fz_rethrow(ctx);
  130. }
  131. return css;
  132. }
  133. void fz_drop_css(fz_context *ctx, fz_css *css)
  134. {
  135. if (css)
  136. fz_drop_pool(ctx, css->pool);
  137. }
  138. static fz_css_rule *fz_new_css_rule(fz_context *ctx, fz_pool *pool, fz_css_selector *selector, fz_css_property *declaration)
  139. {
  140. fz_css_rule *rule = fz_pool_alloc(ctx, pool, sizeof *rule);
  141. rule->selector = selector;
  142. rule->declaration = declaration;
  143. rule->next = NULL;
  144. return rule;
  145. }
  146. static fz_css_selector *fz_new_css_selector(fz_context *ctx, fz_pool *pool, const char *name)
  147. {
  148. fz_css_selector *sel = fz_pool_alloc(ctx, pool, sizeof *sel);
  149. sel->name = name ? fz_pool_strdup(ctx, pool, name) : NULL;
  150. sel->combine = 0;
  151. sel->cond = NULL;
  152. sel->left = NULL;
  153. sel->right = NULL;
  154. sel->next = NULL;
  155. return sel;
  156. }
  157. static fz_css_condition *fz_new_css_condition(fz_context *ctx, fz_pool *pool, int type, const char *key, const char *val)
  158. {
  159. fz_css_condition *cond = fz_pool_alloc(ctx, pool, sizeof *cond);
  160. cond->type = type;
  161. cond->key = key ? fz_pool_strdup(ctx, pool, key) : NULL;
  162. cond->val = val ? fz_pool_strdup(ctx, pool, val) : NULL;
  163. cond->next = NULL;
  164. return cond;
  165. }
  166. static fz_css_property *fz_new_css_property(fz_context *ctx, fz_pool *pool, const char *name, fz_css_value *value, int spec)
  167. {
  168. struct css_property_info *info = css_property_lookup(name, strlen(name));
  169. if (info)
  170. {
  171. fz_css_property *prop = fz_pool_alloc(ctx, pool, sizeof *prop);
  172. prop->name = info->key;
  173. prop->value = value;
  174. prop->spec = spec;
  175. prop->important = 0;
  176. prop->next = NULL;
  177. return prop;
  178. }
  179. return NULL;
  180. }
  181. static fz_css_value *fz_new_css_value_x(fz_context *ctx, fz_pool *pool, int type)
  182. {
  183. fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
  184. val->type = type;
  185. val->data = NULL;
  186. val->args = NULL;
  187. val->next = NULL;
  188. return val;
  189. }
  190. static fz_css_value *fz_new_css_value(fz_context *ctx, fz_pool *pool, int type, const char *data)
  191. {
  192. fz_css_value *val = fz_pool_alloc(ctx, pool, sizeof *val);
  193. val->type = type;
  194. val->data = fz_pool_strdup(ctx, pool, data);
  195. val->args = NULL;
  196. val->next = NULL;
  197. return val;
  198. }
  199. static void css_lex_next(struct lexbuf *buf)
  200. {
  201. if (buf->c == 0)
  202. return;
  203. buf->s += fz_chartorune(&buf->c, (const char *)buf->s);
  204. if (buf->c == '\n')
  205. ++buf->line;
  206. buf->lookahead = EOF;
  207. }
  208. static void css_lex_init(fz_context *ctx, struct lexbuf *buf, fz_pool *pool, const char *s, const char *file)
  209. {
  210. buf->ctx = ctx;
  211. buf->pool = pool;
  212. buf->s = (const unsigned char *)s;
  213. buf->lookahead = EOF;
  214. buf->start = buf->s;
  215. buf->c = -1;
  216. buf->file = file;
  217. buf->line = 1;
  218. css_lex_next(buf);
  219. buf->string_len = 0;
  220. }
  221. static inline int iswhite(int c)
  222. {
  223. return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f';
  224. }
  225. static int isnmstart(int c)
  226. {
  227. return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
  228. (c >= 128 && c <= UCS_MAX);
  229. }
  230. static int isnmchar(int c)
  231. {
  232. return c == '\\' || c == '_' || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') ||
  233. (c >= '0' && c <= '9') || c == '-' || (c >= 128 && c <= UCS_MAX);
  234. }
  235. static void css_push_char(struct lexbuf *buf, int c)
  236. {
  237. char out[4];
  238. int n = fz_runetochar(out, c);
  239. if (buf->string_len + n >= (int)nelem(buf->string))
  240. fz_css_error(buf, "token too long");
  241. memcpy(buf->string + buf->string_len, out, n);
  242. buf->string_len += n;
  243. }
  244. static void css_push_zero(struct lexbuf *buf)
  245. {
  246. if (buf->string_len + 1 >= (int)nelem(buf->string))
  247. fz_css_error(buf, "token too long");
  248. buf->string[buf->string_len] = 0;
  249. buf->string_len += 1;
  250. }
  251. static int css_lex_accept(struct lexbuf *buf, int t)
  252. {
  253. if (buf->c == t)
  254. {
  255. css_lex_next(buf);
  256. return 1;
  257. }
  258. return 0;
  259. }
  260. static void css_lex_expect(struct lexbuf *buf, int t)
  261. {
  262. if (!css_lex_accept(buf, t))
  263. fz_css_error(buf, "unexpected character");
  264. }
  265. static int css_lex_number(struct lexbuf *buf)
  266. {
  267. while (buf->c >= '0' && buf->c <= '9')
  268. {
  269. css_push_char(buf, buf->c);
  270. css_lex_next(buf);
  271. }
  272. if (css_lex_accept(buf, '.'))
  273. {
  274. css_push_char(buf, '.');
  275. while (buf->c >= '0' && buf->c <= '9')
  276. {
  277. css_push_char(buf, buf->c);
  278. css_lex_next(buf);
  279. }
  280. }
  281. if (css_lex_accept(buf, '%'))
  282. {
  283. css_push_char(buf, '%');
  284. css_push_zero(buf);
  285. return CSS_PERCENT;
  286. }
  287. if (isnmstart(buf->c))
  288. {
  289. css_push_char(buf, buf->c);
  290. css_lex_next(buf);
  291. while (isnmchar(buf->c))
  292. {
  293. css_push_char(buf, buf->c);
  294. css_lex_next(buf);
  295. }
  296. css_push_zero(buf);
  297. return CSS_LENGTH;
  298. }
  299. css_push_zero(buf);
  300. return CSS_NUMBER;
  301. }
  302. static int css_lex_keyword(struct lexbuf *buf)
  303. {
  304. while (isnmchar(buf->c))
  305. {
  306. css_push_char(buf, buf->c);
  307. css_lex_next(buf);
  308. }
  309. css_push_zero(buf);
  310. return CSS_KEYWORD;
  311. }
  312. static int css_lex_hash(struct lexbuf *buf)
  313. {
  314. while (isnmchar(buf->c))
  315. {
  316. css_push_char(buf, buf->c);
  317. css_lex_next(buf);
  318. }
  319. css_push_zero(buf);
  320. return CSS_HASH;
  321. }
  322. static int css_lex_string(struct lexbuf *buf, int q)
  323. {
  324. while (buf->c && buf->c != q)
  325. {
  326. if (css_lex_accept(buf, '\\'))
  327. {
  328. if (css_lex_accept(buf, 'n'))
  329. css_push_char(buf, '\n');
  330. else if (css_lex_accept(buf, 'r'))
  331. css_push_char(buf, '\r');
  332. else if (css_lex_accept(buf, 'f'))
  333. css_push_char(buf, '\f');
  334. else if (css_lex_accept(buf, '\f'))
  335. /* line continuation */ ;
  336. else if (css_lex_accept(buf, '\n'))
  337. /* line continuation */ ;
  338. else if (css_lex_accept(buf, '\r'))
  339. css_lex_accept(buf, '\n');
  340. else
  341. {
  342. css_push_char(buf, buf->c);
  343. css_lex_next(buf);
  344. }
  345. }
  346. else
  347. {
  348. css_push_char(buf, buf->c);
  349. css_lex_next(buf);
  350. }
  351. }
  352. css_lex_expect(buf, q);
  353. css_push_zero(buf);
  354. return CSS_STRING;
  355. }
  356. static void css_lex_uri(struct lexbuf *buf)
  357. {
  358. while (buf->c && buf->c != ')' && !iswhite(buf->c))
  359. {
  360. if (css_lex_accept(buf, '\\'))
  361. {
  362. if (css_lex_accept(buf, 'n'))
  363. css_push_char(buf, '\n');
  364. else if (css_lex_accept(buf, 'r'))
  365. css_push_char(buf, '\r');
  366. else if (css_lex_accept(buf, 'f'))
  367. css_push_char(buf, '\f');
  368. else
  369. {
  370. css_push_char(buf, buf->c);
  371. css_lex_next(buf);
  372. }
  373. }
  374. else if (buf->c == '!' || buf->c == '#' || buf->c == '$' || buf->c == '%' || buf->c == '&' ||
  375. (buf->c >= '*' && buf->c <= '[') ||
  376. (buf->c >= ']' && buf->c <= '~') ||
  377. buf->c > 159)
  378. {
  379. css_push_char(buf, buf->c);
  380. css_lex_next(buf);
  381. }
  382. else
  383. fz_css_error(buf, "unexpected character in url");
  384. }
  385. css_push_zero(buf);
  386. }
  387. static int css_lex(struct lexbuf *buf)
  388. {
  389. int t;
  390. // TODO: keyword escape sequences
  391. buf->string_len = 0;
  392. restart:
  393. if (buf->c == 0)
  394. return EOF;
  395. if (iswhite(buf->c))
  396. {
  397. while (iswhite(buf->c))
  398. css_lex_next(buf);
  399. return ' ';
  400. }
  401. if (css_lex_accept(buf, '/'))
  402. {
  403. if (css_lex_accept(buf, '*'))
  404. {
  405. while (buf->c)
  406. {
  407. if (css_lex_accept(buf, '*'))
  408. {
  409. while (buf->c == '*')
  410. css_lex_next(buf);
  411. if (css_lex_accept(buf, '/'))
  412. goto restart;
  413. }
  414. css_lex_next(buf);
  415. }
  416. fz_css_error(buf, "unterminated comment");
  417. }
  418. return '/';
  419. }
  420. if (css_lex_accept(buf, '<'))
  421. {
  422. if (css_lex_accept(buf, '!'))
  423. {
  424. css_lex_expect(buf, '-');
  425. css_lex_expect(buf, '-');
  426. goto restart; /* ignore CDO */
  427. }
  428. return '<';
  429. }
  430. if (css_lex_accept(buf, '-'))
  431. {
  432. if (css_lex_accept(buf, '-'))
  433. {
  434. if (css_lex_accept(buf, '>'))
  435. goto restart; /* ignore CDC */
  436. }
  437. if (isnmstart(buf->c))
  438. {
  439. css_push_char(buf, '-');
  440. return css_lex_keyword(buf);
  441. }
  442. return '-';
  443. }
  444. if (css_lex_accept(buf, '.'))
  445. {
  446. if (buf->c >= '0' && buf->c <= '9')
  447. {
  448. css_push_char(buf, '.');
  449. return css_lex_number(buf);
  450. }
  451. return '.';
  452. }
  453. if (css_lex_accept(buf, '#'))
  454. {
  455. if (isnmchar(buf->c))
  456. return css_lex_hash(buf);
  457. return '#';
  458. }
  459. if (css_lex_accept(buf, '"'))
  460. return css_lex_string(buf, '"');
  461. if (css_lex_accept(buf, '\''))
  462. return css_lex_string(buf, '\'');
  463. if (buf->c >= '0' && buf->c <= '9')
  464. return css_lex_number(buf);
  465. if (css_lex_accept(buf, 'u'))
  466. {
  467. if (css_lex_accept(buf, 'r'))
  468. {
  469. if (css_lex_accept(buf, 'l'))
  470. {
  471. if (css_lex_accept(buf, '('))
  472. {
  473. while (iswhite(buf->c))
  474. css_lex_next(buf);
  475. if (css_lex_accept(buf, '"'))
  476. css_lex_string(buf, '"');
  477. else if (css_lex_accept(buf, '\''))
  478. css_lex_string(buf, '\'');
  479. else
  480. css_lex_uri(buf);
  481. while (iswhite(buf->c))
  482. css_lex_next(buf);
  483. css_lex_expect(buf, ')');
  484. return CSS_URI;
  485. }
  486. css_push_char(buf, 'u');
  487. css_push_char(buf, 'r');
  488. css_push_char(buf, 'l');
  489. return css_lex_keyword(buf);
  490. }
  491. css_push_char(buf, 'u');
  492. css_push_char(buf, 'r');
  493. return css_lex_keyword(buf);
  494. }
  495. css_push_char(buf, 'u');
  496. return css_lex_keyword(buf);
  497. }
  498. if (isnmstart(buf->c))
  499. {
  500. css_push_char(buf, buf->c);
  501. css_lex_next(buf);
  502. return css_lex_keyword(buf);
  503. }
  504. t = buf->c;
  505. css_lex_next(buf);
  506. return t;
  507. }
  508. static void next(struct lexbuf *buf)
  509. {
  510. buf->lookahead = css_lex(buf);
  511. }
  512. static int accept(struct lexbuf *buf, int t)
  513. {
  514. if (buf->lookahead == t)
  515. {
  516. next(buf);
  517. return 1;
  518. }
  519. return 0;
  520. }
  521. static void expect(struct lexbuf *buf, int t)
  522. {
  523. if (accept(buf, t))
  524. return;
  525. fz_css_error(buf, "unexpected token");
  526. }
  527. static void white(struct lexbuf *buf)
  528. {
  529. while (buf->lookahead == ' ')
  530. next(buf);
  531. }
  532. static int iscond(int t)
  533. {
  534. return t == ':' || t == '.' || t == '[' || t == CSS_HASH;
  535. }
  536. static fz_css_value *parse_term(struct lexbuf *buf)
  537. {
  538. fz_css_value *v;
  539. if (buf->lookahead == '+' || buf->lookahead == '-')
  540. {
  541. float sign = buf->lookahead == '-' ? -1 : 1;
  542. next(buf);
  543. if (buf->lookahead != CSS_NUMBER && buf->lookahead != CSS_LENGTH && buf->lookahead != CSS_PERCENT)
  544. fz_css_error(buf, "expected number");
  545. if (sign < 0)
  546. {
  547. v = fz_new_css_value_x(buf->ctx, buf->pool, buf->lookahead);
  548. v->data = fz_pool_alloc(buf->ctx, buf->pool, strlen(buf->string) + 2);
  549. v->data[0] = '-';
  550. strcpy(v->data + 1, buf->string);
  551. }
  552. else
  553. {
  554. v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
  555. }
  556. next(buf);
  557. white(buf);
  558. return v;
  559. }
  560. if (buf->lookahead == CSS_KEYWORD)
  561. {
  562. v = fz_new_css_value(buf->ctx, buf->pool, CSS_KEYWORD, buf->string);
  563. next(buf);
  564. if (accept(buf, '('))
  565. {
  566. white(buf);
  567. v->type = '(';
  568. v->args = parse_expr(buf);
  569. expect(buf, ')');
  570. }
  571. white(buf);
  572. return v;
  573. }
  574. switch (buf->lookahead)
  575. {
  576. case CSS_HASH:
  577. case CSS_STRING:
  578. case CSS_URI:
  579. case CSS_NUMBER:
  580. case CSS_LENGTH:
  581. case CSS_PERCENT:
  582. v = fz_new_css_value(buf->ctx, buf->pool, buf->lookahead, buf->string);
  583. next(buf);
  584. white(buf);
  585. return v;
  586. }
  587. fz_css_error(buf, "expected value");
  588. }
  589. static fz_css_value *parse_expr(struct lexbuf *buf)
  590. {
  591. fz_css_value *head, *tail;
  592. head = tail = parse_term(buf);
  593. while (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != '!' &&
  594. buf->lookahead != ')' && buf->lookahead != EOF)
  595. {
  596. if (accept(buf, ','))
  597. {
  598. white(buf);
  599. if (buf->lookahead != ';')
  600. {
  601. tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, ',', ",");
  602. tail = tail->next = parse_term(buf);
  603. }
  604. }
  605. else if (accept(buf, '/'))
  606. {
  607. white(buf);
  608. tail = tail->next = fz_new_css_value(buf->ctx, buf->pool, '/', "/");
  609. tail = tail->next = parse_term(buf);
  610. }
  611. else
  612. {
  613. tail = tail->next = parse_term(buf);
  614. }
  615. }
  616. return head;
  617. }
  618. static fz_css_property *parse_declaration(struct lexbuf *buf)
  619. {
  620. fz_css_property *p;
  621. if (buf->lookahead != CSS_KEYWORD)
  622. fz_css_error(buf, "expected keyword in property");
  623. p = fz_new_css_property(buf->ctx, buf->pool, buf->string, NULL, 0);
  624. next(buf);
  625. white(buf);
  626. expect(buf, ':');
  627. white(buf);
  628. if (p)
  629. p->value = parse_expr(buf);
  630. else
  631. (void) parse_expr(buf);
  632. /* !important */
  633. if (accept(buf, '!'))
  634. {
  635. white(buf);
  636. if (buf->lookahead != CSS_KEYWORD || strcmp(buf->string, "important"))
  637. fz_css_error(buf, "expected keyword 'important' after '!'");
  638. if (p)
  639. p->important = 1;
  640. next(buf);
  641. white(buf);
  642. }
  643. return p;
  644. }
  645. static fz_css_property *parse_declaration_list(struct lexbuf *buf)
  646. {
  647. fz_css_property *head, *tail = NULL, *p;
  648. white(buf);
  649. if (buf->lookahead == '}' || buf->lookahead == EOF)
  650. return NULL;
  651. p = parse_declaration(buf);
  652. if (p)
  653. tail = p;
  654. head = tail;
  655. while (accept(buf, ';'))
  656. {
  657. white(buf);
  658. if (buf->lookahead != '}' && buf->lookahead != ';' && buf->lookahead != EOF)
  659. {
  660. p = parse_declaration(buf);
  661. if (p)
  662. {
  663. if (!head)
  664. head = tail = p;
  665. else
  666. tail = tail->next = p;
  667. }
  668. }
  669. }
  670. return head;
  671. }
  672. static char *parse_attrib_value(struct lexbuf *buf)
  673. {
  674. char *s;
  675. if (buf->lookahead == CSS_KEYWORD || buf->lookahead == CSS_STRING)
  676. {
  677. s = fz_pool_strdup(buf->ctx, buf->pool, buf->string);
  678. next(buf);
  679. white(buf);
  680. return s;
  681. }
  682. fz_css_error(buf, "expected attribute value");
  683. }
  684. static fz_css_condition *parse_condition(struct lexbuf *buf)
  685. {
  686. fz_css_condition *c;
  687. if (accept(buf, ':'))
  688. {
  689. (void)accept(buf, ':'); /* swallow css3 :: syntax and pretend it's a normal pseudo-class */
  690. if (buf->lookahead != CSS_KEYWORD)
  691. fz_css_error(buf, "expected keyword after ':'");
  692. c = fz_new_css_condition(buf->ctx, buf->pool, ':', "pseudo", buf->string);
  693. next(buf);
  694. if (accept(buf, '('))
  695. {
  696. white(buf);
  697. if (accept(buf, CSS_KEYWORD))
  698. white(buf);
  699. expect(buf, ')');
  700. }
  701. return c;
  702. }
  703. if (accept(buf, '.'))
  704. {
  705. if (buf->lookahead != CSS_KEYWORD)
  706. fz_css_error(buf, "expected keyword after '.'");
  707. c = fz_new_css_condition(buf->ctx, buf->pool, '.', "class", buf->string);
  708. next(buf);
  709. return c;
  710. }
  711. if (accept(buf, '['))
  712. {
  713. white(buf);
  714. if (buf->lookahead != CSS_KEYWORD)
  715. fz_css_error(buf, "expected keyword after '['");
  716. c = fz_new_css_condition(buf->ctx, buf->pool, '[', buf->string, NULL);
  717. next(buf);
  718. white(buf);
  719. if (accept(buf, '='))
  720. {
  721. c->type = '=';
  722. c->val = parse_attrib_value(buf);
  723. }
  724. else if (accept(buf, '|'))
  725. {
  726. expect(buf, '=');
  727. c->type = '|';
  728. c->val = parse_attrib_value(buf);
  729. }
  730. else if (accept(buf, '~'))
  731. {
  732. expect(buf, '=');
  733. c->type = '~';
  734. c->val = parse_attrib_value(buf);
  735. }
  736. expect(buf, ']');
  737. return c;
  738. }
  739. if (buf->lookahead == CSS_HASH)
  740. {
  741. c = fz_new_css_condition(buf->ctx, buf->pool, '#', "id", buf->string);
  742. next(buf);
  743. return c;
  744. }
  745. fz_css_error(buf, "expected condition");
  746. }
  747. static fz_css_condition *parse_condition_list(struct lexbuf *buf)
  748. {
  749. fz_css_condition *head, *tail;
  750. head = tail = parse_condition(buf);
  751. while (iscond(buf->lookahead))
  752. {
  753. tail = tail->next = parse_condition(buf);
  754. }
  755. return head;
  756. }
  757. static fz_css_selector *parse_simple_selector(struct lexbuf *buf)
  758. {
  759. fz_css_selector *s;
  760. if (accept(buf, '*'))
  761. {
  762. s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
  763. if (iscond(buf->lookahead))
  764. s->cond = parse_condition_list(buf);
  765. return s;
  766. }
  767. else if (buf->lookahead == CSS_KEYWORD)
  768. {
  769. s = fz_new_css_selector(buf->ctx, buf->pool, buf->string);
  770. next(buf);
  771. if (iscond(buf->lookahead))
  772. s->cond = parse_condition_list(buf);
  773. return s;
  774. }
  775. else if (iscond(buf->lookahead))
  776. {
  777. s = fz_new_css_selector(buf->ctx, buf->pool, NULL);
  778. s->cond = parse_condition_list(buf);
  779. return s;
  780. }
  781. fz_css_error(buf, "expected selector");
  782. }
  783. static fz_css_selector *parse_combinator(struct lexbuf *buf, int c, fz_css_selector *a)
  784. {
  785. fz_css_selector *sel, *b;
  786. white(buf);
  787. b = parse_simple_selector(buf);
  788. sel = fz_new_css_selector(buf->ctx, buf->pool, NULL);
  789. sel->combine = c;
  790. sel->left = a;
  791. sel->right = b;
  792. return sel;
  793. }
  794. static fz_css_selector *parse_selector(struct lexbuf *buf)
  795. {
  796. fz_css_selector *sel = parse_simple_selector(buf);
  797. for (;;)
  798. {
  799. if (accept(buf, ' '))
  800. {
  801. white(buf);
  802. if (accept(buf, '+'))
  803. sel = parse_combinator(buf, '+', sel);
  804. else if (accept(buf, '>'))
  805. sel = parse_combinator(buf, '>', sel);
  806. else if (buf->lookahead != ',' && buf->lookahead != '{' && buf->lookahead != EOF)
  807. sel = parse_combinator(buf, ' ', sel);
  808. else
  809. break;
  810. }
  811. else if (accept(buf, '+'))
  812. sel = parse_combinator(buf, '+', sel);
  813. else if (accept(buf, '>'))
  814. sel = parse_combinator(buf, '>', sel);
  815. else
  816. break;
  817. }
  818. return sel;
  819. }
  820. static fz_css_selector *parse_selector_list(struct lexbuf *buf)
  821. {
  822. fz_css_selector *head, *tail;
  823. head = tail = parse_selector(buf);
  824. while (accept(buf, ','))
  825. {
  826. white(buf);
  827. tail = tail->next = parse_selector(buf);
  828. }
  829. return head;
  830. }
  831. static fz_css_rule *parse_ruleset(struct lexbuf *buf)
  832. {
  833. fz_css_selector *s = NULL;
  834. fz_css_property *p = NULL;
  835. fz_try(buf->ctx)
  836. {
  837. s = parse_selector_list(buf);
  838. expect(buf, '{');
  839. p = parse_declaration_list(buf);
  840. expect(buf, '}');
  841. white(buf);
  842. }
  843. fz_catch(buf->ctx)
  844. {
  845. fz_rethrow_unless(buf->ctx, FZ_ERROR_SYNTAX);
  846. fz_report_error(buf->ctx);
  847. while (buf->lookahead != EOF)
  848. {
  849. if (accept(buf, '}'))
  850. {
  851. white(buf);
  852. break;
  853. }
  854. next(buf);
  855. }
  856. return NULL;
  857. }
  858. return fz_new_css_rule(buf->ctx, buf->pool, s, p);
  859. }
  860. static fz_css_rule *parse_at_page(struct lexbuf *buf)
  861. {
  862. fz_css_selector *s = NULL;
  863. fz_css_property *p = NULL;
  864. white(buf);
  865. if (accept(buf, ':'))
  866. {
  867. expect(buf, CSS_KEYWORD);
  868. white(buf);
  869. }
  870. expect(buf, '{');
  871. p = parse_declaration_list(buf);
  872. expect(buf, '}');
  873. white(buf);
  874. s = fz_new_css_selector(buf->ctx, buf->pool, "@page");
  875. return fz_new_css_rule(buf->ctx, buf->pool, s, p);
  876. }
  877. static fz_css_rule *parse_at_font_face(struct lexbuf *buf)
  878. {
  879. fz_css_selector *s = NULL;
  880. fz_css_property *p = NULL;
  881. white(buf);
  882. expect(buf, '{');
  883. p = parse_declaration_list(buf);
  884. expect(buf, '}');
  885. white(buf);
  886. s = fz_new_css_selector(buf->ctx, buf->pool, "@font-face");
  887. return fz_new_css_rule(buf->ctx, buf->pool, s, p);
  888. }
  889. static void parse_at_rule(struct lexbuf *buf)
  890. {
  891. expect(buf, CSS_KEYWORD);
  892. /* skip until '{' or ';' */
  893. while (buf->lookahead != EOF)
  894. {
  895. if (accept(buf, ';'))
  896. {
  897. white(buf);
  898. return;
  899. }
  900. if (accept(buf, '{'))
  901. {
  902. int depth = 1;
  903. while (buf->lookahead != EOF && depth > 0)
  904. {
  905. if (accept(buf, '{'))
  906. ++depth;
  907. else if (accept(buf, '}'))
  908. --depth;
  909. else
  910. next(buf);
  911. }
  912. white(buf);
  913. return;
  914. }
  915. next(buf);
  916. }
  917. }
  918. static fz_css_rule *parse_stylesheet(struct lexbuf *buf, fz_css_rule *chain)
  919. {
  920. fz_css_rule *rule, **nextp, *tail;
  921. tail = chain;
  922. if (tail)
  923. {
  924. while (tail->next)
  925. tail = tail->next;
  926. nextp = &tail->next;
  927. }
  928. else
  929. {
  930. nextp = &tail;
  931. }
  932. white(buf);
  933. while (buf->lookahead != EOF)
  934. {
  935. if (accept(buf, '@'))
  936. {
  937. if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "page"))
  938. {
  939. next(buf);
  940. rule = *nextp = parse_at_page(buf);
  941. nextp = &rule->next;
  942. }
  943. else if (buf->lookahead == CSS_KEYWORD && !strcmp(buf->string, "font-face"))
  944. {
  945. next(buf);
  946. rule = *nextp = parse_at_font_face(buf);
  947. nextp = &rule->next;
  948. }
  949. else
  950. {
  951. parse_at_rule(buf);
  952. }
  953. }
  954. else
  955. {
  956. fz_css_rule *x = parse_ruleset(buf);
  957. if (x)
  958. {
  959. rule = *nextp = x;
  960. nextp = &rule->next;
  961. }
  962. }
  963. white(buf);
  964. }
  965. return chain ? chain : tail;
  966. }
  967. const char *fz_css_property_name(int key)
  968. {
  969. const char *name = "unknown";
  970. size_t i;
  971. for (i = 0; i < nelem(css_property_list); ++i)
  972. if (*css_property_list[i].name && css_property_list[i].key == key)
  973. name = css_property_list[i].name;
  974. return name;
  975. }
  976. fz_css_property *fz_parse_css_properties(fz_context *ctx, fz_pool *pool, const char *source)
  977. {
  978. struct lexbuf buf;
  979. css_lex_init(ctx, &buf, pool, source, "<inline>");
  980. next(&buf);
  981. return parse_declaration_list(&buf);
  982. }
  983. void fz_parse_css(fz_context *ctx, fz_css *css, const char *source, const char *file)
  984. {
  985. struct lexbuf buf;
  986. css_lex_init(ctx, &buf, css->pool, source, file);
  987. next(&buf);
  988. css->rule = parse_stylesheet(&buf, css->rule);
  989. }