printf-parse.c 21 KB


  1. /* Formatted output to strings.
  2. Copyright (C) 1999-2000, 2002-2003, 2006-2021 Free Software Foundation, Inc.
  3. This file is free software: you can redistribute it and/or modify
  4. it under the terms of the GNU Lesser General Public License as
  5. published by the Free Software Foundation; either version 2.1 of the
  6. License, or (at your option) any later version.
  7. This file is distributed in the hope that it will be useful,
  8. but WITHOUT ANY WARRANTY; without even the implied warranty of
  9. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  10. GNU Lesser General Public License for more details.
  11. You should have received a copy of the GNU Lesser General Public License
  12. along with this program. If not, see <https://www.gnu.org/licenses/>. */
  13. /* This file can be parametrized with the following macros:
  14. CHAR_T The element type of the format string.
  15. CHAR_T_ONLY_ASCII Set to 1 to enable verification that all characters
  16. in the format string are ASCII.
  17. DIRECTIVE Structure denoting a format directive.
  18. Depends on CHAR_T.
  19. DIRECTIVES Structure denoting the set of format directives of a
  20. format string. Depends on CHAR_T.
  21. PRINTF_PARSE Function that parses a format string.
  22. Depends on CHAR_T.
  23. STATIC Set to 'static' to declare the function static.
  24. ENABLE_UNISTDIO Set to 1 to enable the unistdio extensions. */
  25. #ifndef PRINTF_PARSE
  26. # include <config.h>
  27. #endif
  28. /* Specification. */
  29. #ifndef PRINTF_PARSE
  30. # include "printf-parse.h"
  31. #endif
  32. /* Default parameters. */
  33. #ifndef PRINTF_PARSE
  34. # define PRINTF_PARSE printf_parse
  35. # define CHAR_T char
  36. # define DIRECTIVE char_directive
  37. # define DIRECTIVES char_directives
  38. #endif
  39. /* Get size_t, NULL. */
  40. #include <stddef.h>
  41. /* Get intmax_t. */
  42. #include <stdint.h>
  43. /* malloc(), realloc(), free(). */
  44. #include <stdlib.h>
  45. /* memcpy(). */
  46. #include <string.h>
  47. /* errno. */
  48. #include <errno.h>
  49. /* Checked size_t computations. */
  50. #include "xsize.h"
  51. #if CHAR_T_ONLY_ASCII
  52. /* c_isascii(). */
  53. # include "c-ctype.h"
  54. #endif
  55. #ifdef STATIC
  56. STATIC
  57. #endif
  58. int
  59. PRINTF_PARSE (const CHAR_T *format, DIRECTIVES *d, arguments *a)
  60. {
  61. const CHAR_T *cp = format; /* pointer into format */
  62. size_t arg_posn = 0; /* number of regular arguments consumed */
  63. size_t d_allocated; /* allocated elements of d->dir */
  64. size_t a_allocated; /* allocated elements of a->arg */
  65. size_t max_width_length = 0;
  66. size_t max_precision_length = 0;
  67. d->count = 0;
  68. d_allocated = N_DIRECT_ALLOC_DIRECTIVES;
  69. d->dir = d->direct_alloc_dir;
  70. a->count = 0;
  71. a_allocated = N_DIRECT_ALLOC_ARGUMENTS;
  72. a->arg = a->direct_alloc_arg;
  73. #define REGISTER_ARG(_index_,_type_) \
  74. { \
  75. size_t n = (_index_); \
  76. if (n >= a_allocated) \
  77. { \
  78. size_t memory_size; \
  79. argument *memory; \
  80. \
  81. a_allocated = xtimes (a_allocated, 2); \
  82. if (a_allocated <= n) \
  83. a_allocated = xsum (n, 1); \
  84. memory_size = xtimes (a_allocated, sizeof (argument)); \
  85. if (size_overflow_p (memory_size)) \
  86. /* Overflow, would lead to out of memory. */ \
  87. goto out_of_memory; \
  88. memory = (argument *) (a->arg != a->direct_alloc_arg \
  89. ? realloc (a->arg, memory_size) \
  90. : malloc (memory_size)); \
  91. if (memory == NULL) \
  92. /* Out of memory. */ \
  93. goto out_of_memory; \
  94. if (a->arg == a->direct_alloc_arg) \
  95. memcpy (memory, a->arg, a->count * sizeof (argument)); \
  96. a->arg = memory; \
  97. } \
  98. while (a->count <= n) \
  99. a->arg[a->count++].type = TYPE_NONE; \
  100. if (a->arg[n].type == TYPE_NONE) \
  101. a->arg[n].type = (_type_); \
  102. else if (a->arg[n].type != (_type_)) \
  103. /* Ambiguous type for positional argument. */ \
  104. goto error; \
  105. }
  106. while (*cp != '\0')
  107. {
  108. CHAR_T c = *cp++;
  109. if (c == '%')
  110. {
  111. size_t arg_index = ARG_NONE;
  112. DIRECTIVE *dp = &d->dir[d->count]; /* pointer to next directive */
  113. /* Initialize the next directive. */
  114. dp->dir_start = cp - 1;
  115. dp->flags = 0;
  116. dp->width_start = NULL;
  117. dp->width_end = NULL;
  118. dp->width_arg_index = ARG_NONE;
  119. dp->precision_start = NULL;
  120. dp->precision_end = NULL;
  121. dp->precision_arg_index = ARG_NONE;
  122. dp->arg_index = ARG_NONE;
  123. /* Test for positional argument. */
  124. if (*cp >= '0' && *cp <= '9')
  125. {
  126. const CHAR_T *np;
  127. for (np = cp; *np >= '0' && *np <= '9'; np++)
  128. ;
  129. if (*np == '$')
  130. {
  131. size_t n = 0;
  132. for (np = cp; *np >= '0' && *np <= '9'; np++)
  133. n = xsum (xtimes (n, 10), *np - '0');
  134. if (n == 0)
  135. /* Positional argument 0. */
  136. goto error;
  137. if (size_overflow_p (n))
  138. /* n too large, would lead to out of memory later. */
  139. goto error;
  140. arg_index = n - 1;
  141. cp = np + 1;
  142. }
  143. }
  144. /* Read the flags. */
  145. for (;;)
  146. {
  147. if (*cp == '\'')
  148. {
  149. dp->flags |= FLAG_GROUP;
  150. cp++;
  151. }
  152. else if (*cp == '-')
  153. {
  154. dp->flags |= FLAG_LEFT;
  155. cp++;
  156. }
  157. else if (*cp == '+')
  158. {
  159. dp->flags |= FLAG_SHOWSIGN;
  160. cp++;
  161. }
  162. else if (*cp == ' ')
  163. {
  164. dp->flags |= FLAG_SPACE;
  165. cp++;
  166. }
  167. else if (*cp == '#')
  168. {
  169. dp->flags |= FLAG_ALT;
  170. cp++;
  171. }
  172. else if (*cp == '0')
  173. {
  174. dp->flags |= FLAG_ZERO;
  175. cp++;
  176. }
  177. #if __GLIBC__ >= 2 && !defined __UCLIBC__
  178. else if (*cp == 'I')
  179. {
  180. dp->flags |= FLAG_LOCALIZED;
  181. cp++;
  182. }
  183. #endif
  184. else
  185. break;
  186. }
  187. /* Parse the field width. */
  188. if (*cp == '*')
  189. {
  190. dp->width_start = cp;
  191. cp++;
  192. dp->width_end = cp;
  193. if (max_width_length < 1)
  194. max_width_length = 1;
  195. /* Test for positional argument. */
  196. if (*cp >= '0' && *cp <= '9')
  197. {
  198. const CHAR_T *np;
  199. for (np = cp; *np >= '0' && *np <= '9'; np++)
  200. ;
  201. if (*np == '$')
  202. {
  203. size_t n = 0;
  204. for (np = cp; *np >= '0' && *np <= '9'; np++)
  205. n = xsum (xtimes (n, 10), *np - '0');
  206. if (n == 0)
  207. /* Positional argument 0. */
  208. goto error;
  209. if (size_overflow_p (n))
  210. /* n too large, would lead to out of memory later. */
  211. goto error;
  212. dp->width_arg_index = n - 1;
  213. cp = np + 1;
  214. }
  215. }
  216. if (dp->width_arg_index == ARG_NONE)
  217. {
  218. dp->width_arg_index = arg_posn++;
  219. if (dp->width_arg_index == ARG_NONE)
  220. /* arg_posn wrapped around. */
  221. goto error;
  222. }
  223. REGISTER_ARG (dp->width_arg_index, TYPE_INT);
  224. }
  225. else if (*cp >= '0' && *cp <= '9')
  226. {
  227. size_t width_length;
  228. dp->width_start = cp;
  229. for (; *cp >= '0' && *cp <= '9'; cp++)
  230. ;
  231. dp->width_end = cp;
  232. width_length = dp->width_end - dp->width_start;
  233. if (max_width_length < width_length)
  234. max_width_length = width_length;
  235. }
  236. /* Parse the precision. */
  237. if (*cp == '.')
  238. {
  239. cp++;
  240. if (*cp == '*')
  241. {
  242. dp->precision_start = cp - 1;
  243. cp++;
  244. dp->precision_end = cp;
  245. if (max_precision_length < 2)
  246. max_precision_length = 2;
  247. /* Test for positional argument. */
  248. if (*cp >= '0' && *cp <= '9')
  249. {
  250. const CHAR_T *np;
  251. for (np = cp; *np >= '0' && *np <= '9'; np++)
  252. ;
  253. if (*np == '$')
  254. {
  255. size_t n = 0;
  256. for (np = cp; *np >= '0' && *np <= '9'; np++)
  257. n = xsum (xtimes (n, 10), *np - '0');
  258. if (n == 0)
  259. /* Positional argument 0. */
  260. goto error;
  261. if (size_overflow_p (n))
  262. /* n too large, would lead to out of memory
  263. later. */
  264. goto error;
  265. dp->precision_arg_index = n - 1;
  266. cp = np + 1;
  267. }
  268. }
  269. if (dp->precision_arg_index == ARG_NONE)
  270. {
  271. dp->precision_arg_index = arg_posn++;
  272. if (dp->precision_arg_index == ARG_NONE)
  273. /* arg_posn wrapped around. */
  274. goto error;
  275. }
  276. REGISTER_ARG (dp->precision_arg_index, TYPE_INT);
  277. }
  278. else
  279. {
  280. size_t precision_length;
  281. dp->precision_start = cp - 1;
  282. for (; *cp >= '0' && *cp <= '9'; cp++)
  283. ;
  284. dp->precision_end = cp;
  285. precision_length = dp->precision_end - dp->precision_start;
  286. if (max_precision_length < precision_length)
  287. max_precision_length = precision_length;
  288. }
  289. }
  290. {
  291. arg_type type;
  292. /* Parse argument type/size specifiers. */
  293. {
  294. int flags = 0;
  295. for (;;)
  296. {
  297. if (*cp == 'h')
  298. {
  299. flags |= (1 << (flags & 1));
  300. cp++;
  301. }
  302. else if (*cp == 'L')
  303. {
  304. flags |= 4;
  305. cp++;
  306. }
  307. else if (*cp == 'l')
  308. {
  309. flags += 8;
  310. cp++;
  311. }
  312. else if (*cp == 'j')
  313. {
  314. if (sizeof (intmax_t) > sizeof (long))
  315. {
  316. /* intmax_t = long long */
  317. flags += 16;
  318. }
  319. else if (sizeof (intmax_t) > sizeof (int))
  320. {
  321. /* intmax_t = long */
  322. flags += 8;
  323. }
  324. cp++;
  325. }
  326. else if (*cp == 'z' || *cp == 'Z')
  327. {
  328. /* 'z' is standardized in ISO C 99, but glibc uses 'Z'
  329. because the warning facility in gcc-2.95.2 understands
  330. only 'Z' (see gcc-2.95.2/gcc/c-common.c:1784). */
  331. if (sizeof (size_t) > sizeof (long))
  332. {
  333. /* size_t = long long */
  334. flags += 16;
  335. }
  336. else if (sizeof (size_t) > sizeof (int))
  337. {
  338. /* size_t = long */
  339. flags += 8;
  340. }
  341. cp++;
  342. }
  343. else if (*cp == 't')
  344. {
  345. if (sizeof (ptrdiff_t) > sizeof (long))
  346. {
  347. /* ptrdiff_t = long long */
  348. flags += 16;
  349. }
  350. else if (sizeof (ptrdiff_t) > sizeof (int))
  351. {
  352. /* ptrdiff_t = long */
  353. flags += 8;
  354. }
  355. cp++;
  356. }
  357. #if defined __APPLE__ && defined __MACH__
  358. /* On Mac OS X 10.3, PRIdMAX is defined as "qd".
  359. We cannot change it to "lld" because PRIdMAX must also
  360. be understood by the system's printf routines. */
  361. else if (*cp == 'q')
  362. {
  363. if (64 / 8 > sizeof (long))
  364. {
  365. /* int64_t = long long */
  366. flags += 16;
  367. }
  368. else
  369. {
  370. /* int64_t = long */
  371. flags += 8;
  372. }
  373. cp++;
  374. }
  375. #endif
  376. #if defined _WIN32 && ! defined __CYGWIN__
  377. /* On native Windows, PRIdMAX is defined as "I64d".
  378. We cannot change it to "lld" because PRIdMAX must also
  379. be understood by the system's printf routines. */
  380. else if (*cp == 'I' && cp[1] == '6' && cp[2] == '4')
  381. {
  382. if (64 / 8 > sizeof (long))
  383. {
  384. /* __int64 = long long */
  385. flags += 16;
  386. }
  387. else
  388. {
  389. /* __int64 = long */
  390. flags += 8;
  391. }
  392. cp += 3;
  393. }
  394. #endif
  395. else
  396. break;
  397. }
  398. /* Read the conversion character. */
  399. c = *cp++;
  400. switch (c)
  401. {
  402. case 'd': case 'i':
  403. /* If 'long long' is larger than 'long': */
  404. if (flags >= 16 || (flags & 4))
  405. type = TYPE_LONGLONGINT;
  406. else
  407. /* If 'long long' is the same as 'long', we parse "lld" into
  408. TYPE_LONGINT. */
  409. if (flags >= 8)
  410. type = TYPE_LONGINT;
  411. else if (flags & 2)
  412. type = TYPE_SCHAR;
  413. else if (flags & 1)
  414. type = TYPE_SHORT;
  415. else
  416. type = TYPE_INT;
  417. break;
  418. case 'o': case 'u': case 'x': case 'X':
  419. /* If 'unsigned long long' is larger than 'unsigned long': */
  420. if (flags >= 16 || (flags & 4))
  421. type = TYPE_ULONGLONGINT;
  422. else
  423. /* If 'unsigned long long' is the same as 'unsigned long', we
  424. parse "llu" into TYPE_ULONGINT. */
  425. if (flags >= 8)
  426. type = TYPE_ULONGINT;
  427. else if (flags & 2)
  428. type = TYPE_UCHAR;
  429. else if (flags & 1)
  430. type = TYPE_USHORT;
  431. else
  432. type = TYPE_UINT;
  433. break;
  434. case 'f': case 'F': case 'e': case 'E': case 'g': case 'G':
  435. case 'a': case 'A':
  436. if (flags >= 16 || (flags & 4))
  437. type = TYPE_LONGDOUBLE;
  438. else
  439. type = TYPE_DOUBLE;
  440. break;
  441. case 'c':
  442. if (flags >= 8)
  443. #if HAVE_WINT_T
  444. type = TYPE_WIDE_CHAR;
  445. #else
  446. goto error;
  447. #endif
  448. else
  449. type = TYPE_CHAR;
  450. break;
  451. #if HAVE_WINT_T
  452. case 'C':
  453. type = TYPE_WIDE_CHAR;
  454. c = 'c';
  455. break;
  456. #endif
  457. case 's':
  458. if (flags >= 8)
  459. #if HAVE_WCHAR_T
  460. type = TYPE_WIDE_STRING;
  461. #else
  462. goto error;
  463. #endif
  464. else
  465. type = TYPE_STRING;
  466. break;
  467. #if HAVE_WCHAR_T
  468. case 'S':
  469. type = TYPE_WIDE_STRING;
  470. c = 's';
  471. break;
  472. #endif
  473. case 'p':
  474. type = TYPE_POINTER;
  475. break;
  476. case 'n':
  477. /* If 'long long' is larger than 'long': */
  478. if (flags >= 16 || (flags & 4))
  479. type = TYPE_COUNT_LONGLONGINT_POINTER;
  480. else
  481. /* If 'long long' is the same as 'long', we parse "lln" into
  482. TYPE_COUNT_LONGINT_POINTER. */
  483. if (flags >= 8)
  484. type = TYPE_COUNT_LONGINT_POINTER;
  485. else if (flags & 2)
  486. type = TYPE_COUNT_SCHAR_POINTER;
  487. else if (flags & 1)
  488. type = TYPE_COUNT_SHORT_POINTER;
  489. else
  490. type = TYPE_COUNT_INT_POINTER;
  491. break;
  492. #if ENABLE_UNISTDIO
  493. /* The unistdio extensions. */
  494. case 'U':
  495. if (flags >= 16)
  496. type = TYPE_U32_STRING;
  497. else if (flags >= 8)
  498. type = TYPE_U16_STRING;
  499. else
  500. type = TYPE_U8_STRING;
  501. break;
  502. #endif
  503. case '%':
  504. type = TYPE_NONE;
  505. break;
  506. default:
  507. /* Unknown conversion character. */
  508. goto error;
  509. }
  510. }
  511. if (type != TYPE_NONE)
  512. {
  513. dp->arg_index = arg_index;
  514. if (dp->arg_index == ARG_NONE)
  515. {
  516. dp->arg_index = arg_posn++;
  517. if (dp->arg_index == ARG_NONE)
  518. /* arg_posn wrapped around. */
  519. goto error;
  520. }
  521. REGISTER_ARG (dp->arg_index, type);
  522. }
  523. dp->conversion = c;
  524. dp->dir_end = cp;
  525. }
  526. d->count++;
  527. if (d->count >= d_allocated)
  528. {
  529. size_t memory_size;
  530. DIRECTIVE *memory;
  531. d_allocated = xtimes (d_allocated, 2);
  532. memory_size = xtimes (d_allocated, sizeof (DIRECTIVE));
  533. if (size_overflow_p (memory_size))
  534. /* Overflow, would lead to out of memory. */
  535. goto out_of_memory;
  536. memory = (DIRECTIVE *) (d->dir != d->direct_alloc_dir
  537. ? realloc (d->dir, memory_size)
  538. : malloc (memory_size));
  539. if (memory == NULL)
  540. /* Out of memory. */
  541. goto out_of_memory;
  542. if (d->dir == d->direct_alloc_dir)
  543. memcpy (memory, d->dir, d->count * sizeof (DIRECTIVE));
  544. d->dir = memory;
  545. }
  546. }
  547. #if CHAR_T_ONLY_ASCII
  548. else if (!c_isascii (c))
  549. {
  550. /* Non-ASCII character. Not supported. */
  551. goto error;
  552. }
  553. #endif
  554. }
  555. d->dir[d->count].dir_start = cp;
  556. d->max_width_length = max_width_length;
  557. d->max_precision_length = max_precision_length;
  558. return 0;
  559. error:
  560. if (a->arg != a->direct_alloc_arg)
  561. free (a->arg);
  562. if (d->dir != d->direct_alloc_dir)
  563. free (d->dir);
  564. errno = EINVAL;
  565. return -1;
  566. out_of_memory:
  567. if (a->arg != a->direct_alloc_arg)
  568. free (a->arg);
  569. if (d->dir != d->direct_alloc_dir)
  570. free (d->dir);
  571. errno = ENOMEM;
  572. return -1;
  573. }
  574. #undef PRINTF_PARSE
  575. #undef DIRECTIVES
  576. #undef DIRECTIVE
  577. #undef CHAR_T_ONLY_ASCII
  578. #undef CHAR_T