strtof.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481
  1. // Copyright (C) 2004-2021 Artifex Software, Inc.
  2. //
  3. // This file is part of MuPDF.
  4. //
  5. // MuPDF is free software: you can redistribute it and/or modify it under the
  6. // terms of the GNU Affero General Public License as published by the Free
  7. // Software Foundation, either version 3 of the License, or (at your option)
  8. // any later version.
  9. //
  10. // MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
  11. // WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  12. // FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
  13. // details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
  17. //
  18. // Alternative licensing terms are available from the licensor.
  19. // For commercial licensing, see <https://www.artifex.com/> or contact
  20. // Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
  21. // CA 94129, USA, for further information.
  22. #include "mupdf/fitz.h"
  23. #include <assert.h>
  24. #include <errno.h>
  25. #include <float.h>
  26. #ifndef INFINITY
  27. #define INFINITY (DBL_MAX+DBL_MAX)
  28. #endif
  29. #ifndef NAN
  30. #define NAN (INFINITY-INFINITY)
  31. #endif
  32. /*
  33. We use "Algorithm D" from "Contributions to a Proposed Standard for Binary
  34. Floating-Point Arithmetic" by Jerome Coonen (1984).
  35. The implementation uses a self-made floating point type, 'strtof_fp_t', with
  36. a 32-bit significand. The steps of the algorithm are
  37. INPUT: Up to 9 decimal digits d1, ... d9 and an exponent dexp.
  38. OUTPUT: A float corresponding to the number d1 ... d9 * 10^dexp.
  39. 1) Convert the integer d1 ... d9 to an strtof_fp_t x.
  40. 2) Lookup the strtof_fp_t power = 10 ^ |dexp|.
  41. 3) If dexp is positive set x = x * power, else set x = x / power. Use rounding mode 'round to odd'.
  42. 4) Round x to a float using rounding mode 'to even'.
  43. Step 1) is always lossless as the strtof_fp_t's significand can hold a 9-digit integer.
  44. In the case |dexp| <= 13 the cached power is exact and the algorithm returns
  45. the exactly rounded result (with rounding mode 'to even').
  46. There is no double-rounding in 3), 4) as the multiply/divide uses 'round to odd'.
  47. For |dexp| > 13 the maximum error is bounded by (1/2 + 1/256) ulp.
  48. This is small enough to ensure that binary to decimal to binary conversion
  49. is the identity if the decimal format uses 9 correctly rounded significant digits.
  50. */
  51. typedef struct strtof_fp_t
  52. {
  53. uint32_t f;
  54. int e;
  55. } strtof_fp_t;
  56. /* Multiply/Divide x by y with 'round to odd'. Assume that x and y are normalized. */
  57. static strtof_fp_t
  58. strtof_multiply(strtof_fp_t x, strtof_fp_t y)
  59. {
  60. uint64_t tmp;
  61. strtof_fp_t res;
  62. assert(x.f & y.f & 0x80000000);
  63. res.e = x.e + y.e + 32;
  64. tmp = (uint64_t) x.f * y.f;
  65. /* Normalize. */
  66. if ((tmp < ((uint64_t) 1 << 63)))
  67. {
  68. tmp <<= 1;
  69. --res.e;
  70. }
  71. res.f = tmp >> 32;
  72. /* Set the last bit of the significand to 1 if the result is
  73. inexact. */
  74. if (tmp & 0xffffffff)
  75. res.f |= 1;
  76. return res;
  77. }
  78. static strtof_fp_t
  79. divide(strtof_fp_t x, strtof_fp_t y)
  80. {
  81. uint64_t product, quotient;
  82. uint32_t remainder;
  83. strtof_fp_t res;
  84. res.e = x.e - y.e - 32;
  85. product = (uint64_t) x.f << 32;
  86. quotient = product / y.f;
  87. remainder = product % y.f;
  88. /* 2^31 <= quotient <= 2^33 - 2. */
  89. if (quotient <= 0xffffffff)
  90. res.f = quotient;
  91. else
  92. {
  93. ++res.e;
  94. /* If quotient % 2 != 0 we have remainder != 0. */
  95. res.f = quotient >> 1;
  96. }
  97. if (remainder)
  98. res.f |= 1;
  99. return res;
  100. }
  101. /* From 10^0 to 10^54. Generated with GNU MPFR. */
  102. static const uint32_t strtof_powers_ten[55] = {
  103. 0x80000000, 0xa0000000, 0xc8000000, 0xfa000000, 0x9c400000, 0xc3500000,
  104. 0xf4240000, 0x98968000, 0xbebc2000, 0xee6b2800, 0x9502f900, 0xba43b740,
  105. 0xe8d4a510, 0x9184e72a, 0xb5e620f4, 0xe35fa932, 0x8e1bc9bf, 0xb1a2bc2f,
  106. 0xde0b6b3a, 0x8ac72305, 0xad78ebc6, 0xd8d726b7, 0x87867832, 0xa968163f,
  107. 0xd3c21bcf, 0x84595161, 0xa56fa5ba, 0xcecb8f28, 0x813f3979, 0xa18f07d7,
  108. 0xc9f2c9cd, 0xfc6f7c40, 0x9dc5ada8, 0xc5371912, 0xf684df57, 0x9a130b96,
  109. 0xc097ce7c, 0xf0bdc21b, 0x96769951, 0xbc143fa5, 0xeb194f8e, 0x92efd1b9,
  110. 0xb7abc627, 0xe596b7b1, 0x8f7e32ce, 0xb35dbf82, 0xe0352f63, 0x8c213d9e,
  111. 0xaf298d05, 0xdaf3f046, 0x88d8762c, 0xab0e93b7, 0xd5d238a5, 0x85a36367,
  112. 0xa70c3c41
  113. };
  114. static const int strtof_powers_ten_e[55] = {
  115. -31, -28, -25, -22, -18, -15, -12, -8, -5, -2,
  116. 2, 5, 8, 12, 15, 18, 22, 25, 28, 32, 35, 38, 42, 45, 48, 52, 55, 58, 62, 65,
  117. 68, 71, 75, 78, 81, 85, 88, 91, 95, 98, 101, 105, 108, 111, 115, 118, 121,
  118. 125, 128, 131, 135, 138, 141, 145, 148
  119. };
  120. static strtof_fp_t
  121. strtof_cached_power(int i)
  122. {
  123. strtof_fp_t result;
  124. assert (i >= 0 && i <= 54);
  125. result.f = strtof_powers_ten[i];
  126. result.e = strtof_powers_ten_e[i];
  127. return result;
  128. }
  129. /* Find number of leading zero bits in an uint32_t. Derived from the
  130. "Bit Twiddling Hacks" at graphics.stanford.edu/~seander/bithacks.html. */
  131. static unsigned char clz_table[256] = {
  132. 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
  133. # define sixteen_times(N) N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N,
  134. sixteen_times (3) sixteen_times (2) sixteen_times (2)
  135. sixteen_times (1) sixteen_times (1) sixteen_times (1) sixteen_times (1)
  136. /* Zero for the rest. */
  137. };
  138. static unsigned
  139. leading_zeros (uint32_t x)
  140. {
  141. unsigned tmp1, tmp2;
  142. tmp1 = x >> 16;
  143. if (tmp1)
  144. {
  145. tmp2 = tmp1 >> 8;
  146. if (tmp2)
  147. return clz_table[tmp2];
  148. else
  149. return 8 + clz_table[tmp1];
  150. }
  151. else
  152. {
  153. tmp1 = x >> 8;
  154. if (tmp1)
  155. return 16 + clz_table[tmp1];
  156. else
  157. return 24 + clz_table[x];
  158. }
  159. }
  160. static strtof_fp_t
  161. uint32_to_diy (uint32_t x)
  162. {
  163. strtof_fp_t result = {x, 0};
  164. unsigned shift = leading_zeros(x);
  165. result.f <<= shift;
  166. result.e -= shift;
  167. return result;
  168. }
  169. #define SP_SIGNIFICAND_SIZE 23
  170. #define SP_EXPONENT_BIAS (127 + SP_SIGNIFICAND_SIZE)
  171. #define SP_MIN_EXPONENT (-SP_EXPONENT_BIAS)
  172. #define SP_EXPONENT_MASK 0x7f800000
  173. #define SP_SIGNIFICAND_MASK 0x7fffff
  174. #define SP_HIDDEN_BIT 0x800000 /* 2^23 */
  175. /* Convert normalized strtof_fp_t to IEEE-754 single with 'round to even'.
  176. See "Implementing IEEE 754-2008 Rounding" in the
  177. "Handbook of Floating-Point Arithmetik".
  178. */
  179. static float
  180. diy_to_float(strtof_fp_t x, int negative)
  181. {
  182. uint32_t result;
  183. union
  184. {
  185. float f;
  186. uint32_t n;
  187. } tmp;
  188. assert(x.f & 0x80000000);
  189. /* We have 2^32 - 2^7 = 0xffffff80. */
  190. if (x.e > 96 || (x.e == 96 && x.f >= 0xffffff80))
  191. {
  192. /* Overflow. Set result to infinity. */
  193. errno = ERANGE;
  194. result = 0xff << SP_SIGNIFICAND_SIZE;
  195. }
  196. /* We have 2^32 - 2^8 = 0xffffff00. */
  197. else if (x.e > -158)
  198. {
  199. /* x is greater or equal to FLT_MAX. So we get a normalized number. */
  200. result = (uint32_t) (x.e + 158) << SP_SIGNIFICAND_SIZE;
  201. result |= (x.f >> 8) & SP_SIGNIFICAND_MASK;
  202. if (x.f & 0x80)
  203. {
  204. /* Round-bit is set. */
  205. if (x.f & 0x7f)
  206. /* Sticky-bit is set. */
  207. ++result;
  208. else if (x.f & 0x100)
  209. /* Significand is odd. */
  210. ++result;
  211. }
  212. }
  213. else if (x.e == -158 && x.f >= 0xffffff00)
  214. {
  215. /* x is in the range (2^32, 2^32 - 2^8] * 2^-158, so its smaller than
  216. FLT_MIN but still rounds to it. */
  217. result = 1U << SP_SIGNIFICAND_SIZE;
  218. }
  219. else if (x.e > -181)
  220. {
  221. /* Non-zero Denormal. */
  222. int shift = -149 - x.e; /* 9 <= shift <= 31. */
  223. result = x.f >> shift;
  224. if (x.f & (1U << (shift - 1)))
  225. /* Round-bit is set. */
  226. {
  227. if (x.f & ((1U << (shift - 1)) - 1))
  228. /* Sticky-bit is set. */
  229. ++result;
  230. else if (x.f & 1U << shift)
  231. /* Significand is odd. */
  232. ++result;
  233. }
  234. }
  235. else if (x.e == -181 && x.f > 0x80000000)
  236. {
  237. /* x is in the range (0.5,1) * 2^-149 so it rounds to the smallest
  238. denormal. Can't handle this in the previous case as shifting a
  239. uint32_t 32 bits to the right is undefined behaviour. */
  240. result = 1;
  241. }
  242. else
  243. {
  244. /* Underflow. */
  245. errno = ERANGE;
  246. result = 0;
  247. }
  248. if (negative)
  249. result |= 0x80000000;
  250. tmp.n = result;
  251. return tmp.f;
  252. }
  253. static float
  254. scale_integer_to_float(uint32_t M, int N, int negative)
  255. {
  256. strtof_fp_t result, x, power;
  257. if (M == 0)
  258. return negative ? -0.f : 0.f;
  259. if (N > 38)
  260. {
  261. /* Overflow. */
  262. errno = ERANGE;
  263. return negative ? -INFINITY : INFINITY;
  264. }
  265. if (N < -54)
  266. {
  267. /* Underflow. */
  268. errno = ERANGE;
  269. return negative ? -0.f : 0.f;
  270. }
  271. /* If N is in the range {-13, ..., 13} the conversion is exact.
  272. Try to scale N into this region. */
  273. while (N > 13 && M <= 0xffffffff / 10)
  274. {
  275. M *= 10;
  276. --N;
  277. }
  278. while (N < -13 && M % 10 == 0)
  279. {
  280. M /= 10;
  281. ++N;
  282. }
  283. x = uint32_to_diy (M);
  284. if (N >= 0)
  285. {
  286. power = strtof_cached_power(N);
  287. result = strtof_multiply(x, power);
  288. }
  289. else
  290. {
  291. power = strtof_cached_power(-N);
  292. result = divide(x, power);
  293. }
  294. return diy_to_float(result, negative);
  295. }
  296. /* Return non-zero if *s starts with string (must be uppercase), ignoring case,
  297. and increment *s by its length. */
  298. static int
  299. starts_with(const char **s, const char *string)
  300. {
  301. const char *x = *s, *y = string;
  302. while (*x && *y && (*x == *y || *x == *y + 32))
  303. ++x, ++y;
  304. if (*y == 0)
  305. {
  306. /* Match. */
  307. *s = x;
  308. return 1;
  309. }
  310. else
  311. return 0;
  312. }
  313. #define SET_TAILPTR(tailptr, s) \
  314. do \
  315. if (tailptr) \
  316. *tailptr = (char *) s; \
  317. while (0)
  318. float
  319. fz_strtof(const char *string, char **tailptr)
  320. {
  321. /* FIXME: error (1/2 + 1/256) ulp */
  322. const char *s;
  323. uint32_t M = 0;
  324. int N = 0;
  325. /* If decimal_digits gets 9 we truncate all following digits. */
  326. int decimal_digits = 0;
  327. int negative = 0;
  328. const char *number_start = 0;
  329. /* Skip leading whitespace (isspace in "C" locale). */
  330. s = string;
  331. while (*s == ' ' || *s == '\f' || *s == '\n' || *s == '\r' || *s == '\t' || *s == '\v')
  332. ++s;
  333. /* Parse sign. */
  334. if (*s == '+')
  335. ++s;
  336. if (*s == '-')
  337. {
  338. negative = 1;
  339. ++s;
  340. }
  341. number_start = s;
  342. /* Parse digits before decimal point. */
  343. while (*s >= '0' && *s <= '9')
  344. {
  345. if (decimal_digits)
  346. {
  347. if (decimal_digits < 9)
  348. {
  349. ++decimal_digits;
  350. M = M * 10 + *s - '0';
  351. }
  352. /* Really arcane strings might overflow N. */
  353. else if (N < 1000)
  354. ++N;
  355. }
  356. else if (*s > '0')
  357. {
  358. M = *s - '0';
  359. ++decimal_digits;
  360. }
  361. ++s;
  362. }
  363. /* Parse decimal point. */
  364. if (*s == '.')
  365. ++s;
  366. /* Parse digits after decimal point. */
  367. while (*s >= '0' && *s <= '9')
  368. {
  369. if (decimal_digits < 9)
  370. {
  371. if (decimal_digits || *s > '0')
  372. {
  373. ++decimal_digits;
  374. M = M * 10 + *s - '0';
  375. }
  376. --N;
  377. }
  378. ++s;
  379. }
  380. if ((s == number_start + 1 && *number_start == '.') || number_start == s)
  381. {
  382. /* No Number. Check for INF and NAN strings. */
  383. s = number_start;
  384. if (starts_with(&s, "INFINITY") || starts_with(&s, "INF"))
  385. {
  386. errno = ERANGE;
  387. SET_TAILPTR(tailptr, s);
  388. return negative ? -INFINITY : +INFINITY;
  389. }
  390. else if (starts_with(&s, "NAN"))
  391. {
  392. SET_TAILPTR(tailptr, s);
  393. return (float)NAN;
  394. }
  395. else
  396. {
  397. SET_TAILPTR(tailptr, string);
  398. return 0.f;
  399. }
  400. }
  401. /* Parse exponent. */
  402. if (*s == 'e' || *s == 'E')
  403. {
  404. int exp_negative = 0;
  405. int exp = 0;
  406. const char *int_start;
  407. const char *exp_start = s;
  408. ++s;
  409. if (*s == '+')
  410. ++s;
  411. else if (*s == '-')
  412. {
  413. ++s;
  414. exp_negative = 1;
  415. }
  416. int_start = s;
  417. /* Parse integer. */
  418. while (*s >= '0' && *s <= '9')
  419. {
  420. /* Make sure exp does not get overflowed. */
  421. if (exp < 100)
  422. exp = exp * 10 + *s - '0';
  423. ++s;
  424. }
  425. if (exp_negative)
  426. exp = -exp;
  427. if (s == int_start)
  428. /* No Number. */
  429. s = exp_start;
  430. else
  431. N += exp;
  432. }
  433. SET_TAILPTR(tailptr, s);
  434. return scale_integer_to_float(M, N, negative);
  435. }