parsing.cc 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // -*- C++ -*-
  2. // Boost general library 'format' ---------------------------
  3. // See http://www.boost.org for updates, documentation, and revision history.
  4. // (C) Samuel Krempp 2001
  5. // krempp@crans.ens-cachan.fr
  6. // Permission to copy, use, modify, sell and
  7. // distribute this software is granted provided this copyright notice appears
  8. // in all copies. This software is provided "as is" without express or implied
  9. // warranty, and with no claim as to its suitability for any purpose.
  10. // ideas taken from Rudiger Loos's format class
  11. // and Karl Nelson's ofstream (also took its parsing code as basis for printf parsing)
  12. // ------------------------------------------------------------------------------
  13. // parsing.hpp : implementation of the parsing member functions
  14. // ( parse, parse_printf_directive)
  15. // ------------------------------------------------------------------------------
  16. #ifndef BOOST_FORMAT_PARSING_HPP
  17. #define BOOST_FORMAT_PARSING_HPP
  18. #include <boost/format.hpp>
  19. #include <boost/throw_exception.hpp>
  20. #include <boost/assert.hpp>
  21. namespace boost {
  22. namespace io {
  23. namespace detail {
  24. template<class Stream> inline
  25. bool wrap_isdigit(char c, Stream &os)
  26. {
  27. #ifndef BOOST_NO_LOCALE_ISIDIGIT
  28. return std::isdigit(c, os.rdbuf()->getloc() );
  29. # else
  30. using namespace std;
  31. return isdigit(c);
  32. #endif
  33. } //end- wrap_isdigit(..)
  34. template<class Res> inline
  35. Res str2int(const std::string& s,
  36. std::string::size_type start,
  37. BOOST_IO_STD ios &os,
  38. const Res = Res(0) )
  39. // Input : char string, with starting index
  40. // a basic_ios& merely to call its widen/narrow member function in the desired locale.
  41. // Effects : reads s[start:] and converts digits into an integral n, of type Res
  42. // Returns : n
  43. {
  44. Res n = 0;
  45. while(start<s.size() && wrap_isdigit(s[start], os) ) {
  46. char cur_ch = s[start];
  47. BOOST_ASSERT(cur_ch != 0 ); // since we called isdigit, this should not happen.
  48. n *= 10;
  49. n += cur_ch - '0'; // 22.2.1.1.2 of the C++ standard
  50. ++start;
  51. }
  52. return n;
  53. }
  54. void skip_asterisk(const std::string & buf,
  55. std::string::size_type * pos_p,
  56. BOOST_IO_STD ios &os)
  57. // skip printf's "asterisk-fields" directives in the format-string buf
  58. // Input : char string, with starting index *pos_p
  59. // a basic_ios& merely to call its widen/narrow member function in the desired locale.
  60. // Effects : advance *pos_p by skipping printf's asterisk fields.
  61. // Returns : nothing
  62. {
  63. using namespace std;
  64. BOOST_ASSERT( pos_p != 0);
  65. if(*pos_p >= buf.size() ) return;
  66. if(buf[ *pos_p]=='*') {
  67. ++ (*pos_p);
  68. while (*pos_p < buf.size() && wrap_isdigit(buf[*pos_p],os)) ++(*pos_p);
  69. if(buf[*pos_p]=='$') ++(*pos_p);
  70. }
  71. }
  72. inline void maybe_throw_exception( unsigned char exceptions)
  73. // auxiliary func called by parse_printf_directive
  74. // for centralising error handling
  75. // it either throws if user sets the corresponding flag, or does nothing.
  76. {
  77. if(exceptions & io::bad_format_string_bit)
  78. boost::throw_exception(io::bad_format_string());
  79. }
  80. bool parse_printf_directive(const std::string & buf,
  81. std::string::size_type * pos_p,
  82. detail::format_item * fpar,
  83. BOOST_IO_STD ios &os,
  84. unsigned char exceptions)
  85. // Input : a 'printf-directive' in the format-string, starting at buf[ *pos_p ]
  86. // a basic_ios& merely to call its widen/narrow member function in the desired locale.
  87. // a bitset'excpetions' telling whether to throw exceptions on errors.
  88. // Returns : true if parse somehow succeeded (possibly ignoring errors if exceptions disabled)
  89. // false if it failed so bad that the directive should be printed verbatim
  90. // Effects : - *pos_p is incremented so that buf[*pos_p] is the first char after the directive
  91. // - *fpar is set with the parameters read in the directive
  92. {
  93. typedef format_item format_item_t;
  94. BOOST_ASSERT( pos_p != 0);
  95. std::string::size_type &i1 = *pos_p,
  96. i0;
  97. fpar->argN_ = format_item_t::argN_no_posit; // if no positional-directive
  98. bool in_brackets=false;
  99. if(buf[i1]=='|')
  100. {
  101. in_brackets=true;
  102. if( ++i1 >= buf.size() ) {
  103. maybe_throw_exception(exceptions);
  104. return false;
  105. }
  106. }
  107. // the flag '0' would be picked as a digit for argument order, but here it's a flag :
  108. if(buf[i1]=='0')
  109. goto parse_flags;
  110. // handle argument order (%2$d) or possibly width specification: %2d
  111. i0 = i1; // save position before digits
  112. while (i1 < buf.size() && wrap_isdigit(buf[i1], os))
  113. ++i1;
  114. if (i1!=i0)
  115. {
  116. if( i1 >= buf.size() ) {
  117. maybe_throw_exception(exceptions);
  118. return false;
  119. }
  120. int n=str2int(buf,i0, os, int(0) );
  121. // %N% case : this is already the end of the directive
  122. if( buf[i1] == '%' )
  123. {
  124. fpar->argN_ = n-1;
  125. ++i1;
  126. if( in_brackets)
  127. maybe_throw_exception(exceptions);
  128. // but don't return. maybe "%" was used in lieu of '$', so we go on.
  129. else return true;
  130. }
  131. if ( buf[i1]=='$' )
  132. {
  133. fpar->argN_ = n-1;
  134. ++i1;
  135. }
  136. else
  137. {
  138. // non-positionnal directive
  139. fpar->ref_state_.width_ = n;
  140. fpar->argN_ = format_item_t::argN_no_posit;
  141. goto parse_precision;
  142. }
  143. }
  144. parse_flags:
  145. // handle flags
  146. while ( i1 <buf.size()) // as long as char is one of + - = # 0 l h or ' '
  147. {
  148. // misc switches
  149. switch (buf[i1])
  150. {
  151. case '\'' : break; // no effect yet. (painful to implement)
  152. case 'l':
  153. case 'h': // short/long modifier : for printf-comaptibility (no action needed)
  154. break;
  155. case '-':
  156. fpar->ref_state_.flags_ |= std::ios::left;
  157. break;
  158. case '=':
  159. fpar->pad_scheme_ |= format_item_t::centered;
  160. break;
  161. case ' ':
  162. fpar->pad_scheme_ |= format_item_t::spacepad;
  163. break;
  164. case '+':
  165. fpar->ref_state_.flags_ |= std::ios::showpos;
  166. break;
  167. case '0':
  168. fpar->pad_scheme_ |= format_item_t::zeropad;
  169. // need to know alignment before really setting flags,
  170. // so just add 'zeropad' flag for now, it will be processed later.
  171. break;
  172. case '#':
  173. fpar->ref_state_.flags_ |= std::ios::showpoint | std::ios::showbase;
  174. break;
  175. default:
  176. goto parse_width;
  177. }
  178. ++i1;
  179. } // loop on flag.
  180. if( i1>=buf.size()) {
  181. maybe_throw_exception(exceptions);
  182. return true;
  183. }
  184. parse_width:
  185. // handle width spec
  186. skip_asterisk(buf, &i1, os); // skips 'asterisk fields' : *, or *N$
  187. i0 = i1; // save position before digits
  188. while (i1<buf.size() && wrap_isdigit(buf[i1], os))
  189. i1++;
  190. if (i1!=i0)
  191. { fpar->ref_state_.width_ = str2int( buf,i0, os, std::streamsize(0) ); }
  192. parse_precision:
  193. if( i1>=buf.size()) {
  194. maybe_throw_exception(exceptions);
  195. return true;
  196. }
  197. // handle precision spec
  198. if (buf[i1]=='.')
  199. {
  200. ++i1;
  201. skip_asterisk(buf, &i1, os);
  202. i0 = i1; // save position before digits
  203. while (i1<buf.size() && wrap_isdigit(buf[i1], os))
  204. ++i1;
  205. if(i1==i0)
  206. fpar->ref_state_.precision_ = 0;
  207. else
  208. fpar->ref_state_.precision_ = str2int(buf,i0, os, std::streamsize(0) );
  209. }
  210. // handle formatting-type flags :
  211. while( i1<buf.size() &&
  212. ( buf[i1]=='l' || buf[i1]=='L' || buf[i1]=='h') )
  213. ++i1;
  214. if( i1>=buf.size()) {
  215. maybe_throw_exception(exceptions);
  216. return true;
  217. }
  218. if( in_brackets && buf[i1]=='|' )
  219. {
  220. ++i1;
  221. return true;
  222. }
  223. switch (buf[i1])
  224. {
  225. case 'X':
  226. fpar->ref_state_.flags_ |= std::ios::uppercase;
  227. case 'p': // pointer => set hex.
  228. case 'x':
  229. fpar->ref_state_.flags_ &= ~std::ios::basefield;
  230. fpar->ref_state_.flags_ |= std::ios::hex;
  231. break;
  232. case 'o':
  233. fpar->ref_state_.flags_ &= ~std::ios::basefield;
  234. fpar->ref_state_.flags_ |= std::ios::oct;
  235. break;
  236. case 'E':
  237. fpar->ref_state_.flags_ |= std::ios::uppercase;
  238. case 'e':
  239. fpar->ref_state_.flags_ &= ~std::ios::floatfield;
  240. fpar->ref_state_.flags_ |= std::ios::scientific;
  241. fpar->ref_state_.flags_ &= ~std::ios::basefield;
  242. fpar->ref_state_.flags_ |= std::ios::dec;
  243. break;
  244. case 'f':
  245. fpar->ref_state_.flags_ &= ~std::ios::floatfield;
  246. fpar->ref_state_.flags_ |= std::ios::fixed;
  247. case 'u':
  248. case 'd':
  249. case 'i':
  250. fpar->ref_state_.flags_ &= ~std::ios::basefield;
  251. fpar->ref_state_.flags_ |= std::ios::dec;
  252. break;
  253. case 'T':
  254. ++i1;
  255. if( i1 >= buf.size())
  256. maybe_throw_exception(exceptions);
  257. else
  258. fpar->ref_state_.fill_ = buf[i1];
  259. fpar->pad_scheme_ |= format_item_t::tabulation;
  260. fpar->argN_ = format_item_t::argN_tabulation;
  261. break;
  262. case 't':
  263. fpar->ref_state_.fill_ = ' ';
  264. fpar->pad_scheme_ |= format_item_t::tabulation;
  265. fpar->argN_ = format_item_t::argN_tabulation;
  266. break;
  267. case 'G':
  268. fpar->ref_state_.flags_ |= std::ios::uppercase;
  269. break;
  270. case 'g': // 'g' conversion is default for floats.
  271. fpar->ref_state_.flags_ &= ~std::ios::basefield;
  272. fpar->ref_state_.flags_ |= std::ios::dec;
  273. // CLEAR all floatield flags, so stream will CHOOSE
  274. fpar->ref_state_.flags_ &= ~std::ios::floatfield;
  275. break;
  276. case 'C':
  277. case 'c':
  278. fpar->truncate_ = 1;
  279. break;
  280. case 'S':
  281. case 's':
  282. fpar->truncate_ = fpar->ref_state_.precision_;
  283. fpar->ref_state_.precision_ = -1;
  284. break;
  285. case 'n' :
  286. fpar->argN_ = format_item_t::argN_ignored;
  287. break;
  288. default:
  289. maybe_throw_exception(exceptions);
  290. }
  291. ++i1;
  292. if( in_brackets )
  293. {
  294. if( i1<buf.size() && buf[i1]=='|' )
  295. {
  296. ++i1;
  297. return true;
  298. }
  299. else maybe_throw_exception(exceptions);
  300. }
  301. return true;
  302. }
  303. } // detail namespace
  304. } // io namespace
  305. // -----------------------------------------------
  306. // format :: parse(..)
  307. void basic_format::parse(const string_t & buf)
  308. // parse the format-string
  309. {
  310. using namespace std;
  311. const char arg_mark = '%';
  312. bool ordered_args=true;
  313. int max_argN=-1;
  314. string_t::size_type i1=0;
  315. int num_items=0;
  316. // A: find upper_bound on num_items and allocates arrays
  317. i1=0;
  318. while( (i1=buf.find(arg_mark,i1)) != string::npos )
  319. {
  320. if( i1+1 >= buf.size() ) {
  321. if(exceptions() & io::bad_format_string_bit)
  322. boost::throw_exception(io::bad_format_string()); // must not end in "bla bla %"
  323. else break; // stop there, ignore last '%'
  324. }
  325. if(buf[i1+1] == buf[i1] ) { i1+=2; continue; } // escaped "%%" / "##"
  326. ++i1;
  327. // in case of %N% directives, dont count it double (wastes allocations..) :
  328. while(i1 < buf.size() && io::detail::wrap_isdigit(buf[i1],oss_)) ++i1;
  329. if( i1 < buf.size() && buf[i1] == arg_mark ) ++ i1;
  330. ++num_items;
  331. }
  332. items_.assign( num_items, format_item_t() );
  333. // B: Now the real parsing of the format string :
  334. num_items=0;
  335. i1 = 0;
  336. string_t::size_type i0 = i1;
  337. bool special_things=false;
  338. int cur_it=0;
  339. while( (i1=buf.find(arg_mark,i1)) != string::npos )
  340. {
  341. string_t & piece = (cur_it==0) ? prefix_ : items_[cur_it-1].appendix_;
  342. if( buf[i1+1] == buf[i1] ) // escaped mark, '%%'
  343. {
  344. piece += buf.substr(i0, i1-i0) + buf[i1];
  345. i1+=2; i0=i1;
  346. continue;
  347. }
  348. BOOST_ASSERT( static_cast<unsigned int>(cur_it) < items_.size() || cur_it==0);
  349. if(i1!=i0) piece += buf.substr(i0, i1-i0);
  350. ++i1;
  351. bool parse_ok;
  352. parse_ok = io::detail::parse_printf_directive(buf, &i1, &items_[cur_it], oss_, exceptions());
  353. if( ! parse_ok ) continue; // the directive will be printed verbatim
  354. i0=i1;
  355. items_[cur_it].compute_states(); // process complex options, like zeropad, into stream params.
  356. int argN=items_[cur_it].argN_;
  357. if(argN == format_item_t::argN_ignored)
  358. continue;
  359. if(argN ==format_item_t::argN_no_posit)
  360. ordered_args=false;
  361. else if(argN == format_item_t::argN_tabulation) special_things=true;
  362. else if(argN > max_argN) max_argN = argN;
  363. ++num_items;
  364. ++cur_it;
  365. } // loop on %'s
  366. BOOST_ASSERT(cur_it == num_items);
  367. // store the final piece of string
  368. string_t & piece = (cur_it==0) ? prefix_ : items_[cur_it-1].appendix_;
  369. piece += buf.substr(i0);
  370. if( !ordered_args)
  371. {
  372. if(max_argN >= 0 ) // dont mix positional with non-positionnal directives
  373. {
  374. if(exceptions() & io::bad_format_string_bit)
  375. boost::throw_exception(io::bad_format_string());
  376. // else do nothing. => positionnal arguments are processed as non-positionnal
  377. }
  378. // set things like it would have been with positional directives :
  379. int non_ordered_items = 0;
  380. for(int i=0; i< num_items; ++i)
  381. if(items_[i].argN_ == format_item_t::argN_no_posit)
  382. {
  383. items_[i].argN_ = non_ordered_items;
  384. ++non_ordered_items;
  385. }
  386. max_argN = non_ordered_items-1;
  387. }
  388. // C: set some member data :
  389. items_.resize(num_items);
  390. if(special_things) style_ |= special_needs;
  391. num_args_ = max_argN + 1;
  392. if(ordered_args) style_ |= ordered;
  393. else style_ &= ~ordered;
  394. }
  395. } // namespace boost
  396. #endif // BOOST_FORMAT_PARSING_HPP