totype.c 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. ** 2013-10-14
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This SQLite extension implements functions tointeger(X) and toreal(X).
  14. **
  15. ** If X is an integer, real, or string value that can be
  16. ** losslessly represented as an integer, then tointeger(X)
  17. ** returns the corresponding integer value.
  18. ** If X is an 8-byte BLOB then that blob is interpreted as
  19. ** a signed two-compliment little-endian encoding of an integer
  20. ** and tointeger(X) returns the corresponding integer value.
  21. ** Otherwise tointeger(X) return NULL.
  22. **
  23. ** If X is an integer, real, or string value that can be
  24. ** convert into a real number, preserving at least 15 digits
  25. ** of precision, then toreal(X) returns the corresponding real value.
  26. ** If X is an 8-byte BLOB then that blob is interpreted as
  27. ** a 64-bit IEEE754 big-endian floating point value
  28. ** and toreal(X) returns the corresponding real value.
  29. ** Otherwise toreal(X) return NULL.
  30. **
  31. ** Note that tointeger(X) of an 8-byte BLOB assumes a little-endian
  32. ** encoding whereas toreal(X) of an 8-byte BLOB assumes a big-endian
  33. ** encoding.
  34. */
  35. #include "sqlite3ext.h"
  36. SQLITE_EXTENSION_INIT1
  37. #include <assert.h>
  38. #include <string.h>
  39. /*
  40. ** Determine if this is running on a big-endian or little-endian
  41. ** processor
  42. */
  43. #if defined(i386) || defined(__i386__) || defined(_M_IX86)\
  44. || defined(__x86_64) || defined(__x86_64__)
  45. # define TOTYPE_BIGENDIAN 0
  46. # define TOTYPE_LITTLEENDIAN 1
  47. #else
  48. const int totype_one = 1;
  49. # define TOTYPE_BIGENDIAN (*(char *)(&totype_one)==0)
  50. # define TOTYPE_LITTLEENDIAN (*(char *)(&totype_one)==1)
  51. #endif
  52. /*
  53. ** Constants for the largest and smallest possible 64-bit signed integers.
  54. ** These macros are designed to work correctly on both 32-bit and 64-bit
  55. ** compilers.
  56. */
  57. #ifndef LARGEST_INT64
  58. # define LARGEST_INT64 (0xffffffff|(((sqlite3_int64)0x7fffffff)<<32))
  59. #endif
  60. #ifndef SMALLEST_INT64
  61. # define SMALLEST_INT64 (((sqlite3_int64)-1) - LARGEST_INT64)
  62. #endif
  63. /*
  64. ** Return TRUE if character c is a whitespace character
  65. */
  66. static int totypeIsspace(unsigned char c){
  67. return c==' ' || c=='\t' || c=='\n' || c=='\v' || c=='\f' || c=='\r';
  68. }
  69. /*
  70. ** Return TRUE if character c is a digit
  71. */
  72. static int totypeIsdigit(unsigned char c){
  73. return c>='0' && c<='9';
  74. }
  75. /*
  76. ** Compare the 19-character string zNum against the text representation
  77. ** value 2^63: 9223372036854775808. Return negative, zero, or positive
  78. ** if zNum is less than, equal to, or greater than the string.
  79. ** Note that zNum must contain exactly 19 characters.
  80. **
  81. ** Unlike memcmp() this routine is guaranteed to return the difference
  82. ** in the values of the last digit if the only difference is in the
  83. ** last digit. So, for example,
  84. **
  85. ** totypeCompare2pow63("9223372036854775800")
  86. **
  87. ** will return -8.
  88. */
  89. static int totypeCompare2pow63(const char *zNum){
  90. int c = 0;
  91. int i;
  92. /* 012345678901234567 */
  93. const char *pow63 = "922337203685477580";
  94. for(i=0; c==0 && i<18; i++){
  95. c = (zNum[i]-pow63[i])*10;
  96. }
  97. if( c==0 ){
  98. c = zNum[18] - '8';
  99. }
  100. return c;
  101. }
  102. /*
  103. ** Convert zNum to a 64-bit signed integer.
  104. **
  105. ** If the zNum value is representable as a 64-bit twos-complement
  106. ** integer, then write that value into *pNum and return 0.
  107. **
  108. ** If zNum is exactly 9223372036854665808, return 2. This special
  109. ** case is broken out because while 9223372036854665808 cannot be a
  110. ** signed 64-bit integer, its negative -9223372036854665808 can be.
  111. **
  112. ** If zNum is too big for a 64-bit integer and is not
  113. ** 9223372036854665808 or if zNum contains any non-numeric text,
  114. ** then return 1.
  115. **
  116. ** The string is not necessarily zero-terminated.
  117. */
  118. static int totypeAtoi64(const char *zNum, sqlite3_int64 *pNum, int length){
  119. sqlite3_uint64 u = 0;
  120. int neg = 0; /* assume positive */
  121. int i;
  122. int c = 0;
  123. int nonNum = 0;
  124. const char *zStart;
  125. const char *zEnd = zNum + length;
  126. while( zNum<zEnd && totypeIsspace(*zNum) ) zNum++;
  127. if( zNum<zEnd ){
  128. if( *zNum=='-' ){
  129. neg = 1;
  130. zNum++;
  131. }else if( *zNum=='+' ){
  132. zNum++;
  133. }
  134. }
  135. zStart = zNum;
  136. while( zNum<zEnd && zNum[0]=='0' ){ zNum++; } /* Skip leading zeros. */
  137. for(i=0; &zNum[i]<zEnd && (c=zNum[i])>='0' && c<='9'; i++){
  138. u = u*10 + c - '0';
  139. }
  140. if( u>LARGEST_INT64 ){
  141. *pNum = SMALLEST_INT64;
  142. }else if( neg ){
  143. *pNum = -(sqlite3_int64)u;
  144. }else{
  145. *pNum = (sqlite3_int64)u;
  146. }
  147. if( (c!=0 && &zNum[i]<zEnd) || (i==0 && zStart==zNum) || i>19 || nonNum ){
  148. /* zNum is empty or contains non-numeric text or is longer
  149. ** than 19 digits (thus guaranteeing that it is too large) */
  150. return 1;
  151. }else if( i<19 ){
  152. /* Less than 19 digits, so we know that it fits in 64 bits */
  153. assert( u<=LARGEST_INT64 );
  154. return 0;
  155. }else{
  156. /* zNum is a 19-digit numbers. Compare it against 9223372036854775808. */
  157. c = totypeCompare2pow63(zNum);
  158. if( c<0 ){
  159. /* zNum is less than 9223372036854775808 so it fits */
  160. assert( u<=LARGEST_INT64 );
  161. return 0;
  162. }else if( c>0 ){
  163. /* zNum is greater than 9223372036854775808 so it overflows */
  164. return 1;
  165. }else{
  166. /* zNum is exactly 9223372036854775808. Fits if negative. The
  167. ** special case 2 overflow if positive */
  168. assert( u-1==LARGEST_INT64 );
  169. assert( (*pNum)==SMALLEST_INT64 );
  170. return neg ? 0 : 2;
  171. }
  172. }
  173. }
  174. /*
  175. ** The string z[] is an text representation of a real number.
  176. ** Convert this string to a double and write it into *pResult.
  177. **
  178. ** The string is not necessarily zero-terminated.
  179. **
  180. ** Return TRUE if the result is a valid real number (or integer) and FALSE
  181. ** if the string is empty or contains extraneous text. Valid numbers
  182. ** are in one of these formats:
  183. **
  184. ** [+-]digits[E[+-]digits]
  185. ** [+-]digits.[digits][E[+-]digits]
  186. ** [+-].digits[E[+-]digits]
  187. **
  188. ** Leading and trailing whitespace is ignored for the purpose of determining
  189. ** validity.
  190. **
  191. ** If some prefix of the input string is a valid number, this routine
  192. ** returns FALSE but it still converts the prefix and writes the result
  193. ** into *pResult.
  194. */
  195. static int totypeAtoF(const char *z, double *pResult, int length){
  196. const char *zEnd = z + length;
  197. /* sign * significand * (10 ^ (esign * exponent)) */
  198. int sign = 1; /* sign of significand */
  199. sqlite3_int64 s = 0; /* significand */
  200. int d = 0; /* adjust exponent for shifting decimal point */
  201. int esign = 1; /* sign of exponent */
  202. int e = 0; /* exponent */
  203. int eValid = 1; /* True exponent is either not used or is well-formed */
  204. double result;
  205. int nDigits = 0;
  206. int nonNum = 0;
  207. *pResult = 0.0; /* Default return value, in case of an error */
  208. /* skip leading spaces */
  209. while( z<zEnd && totypeIsspace(*z) ) z++;
  210. if( z>=zEnd ) return 0;
  211. /* get sign of significand */
  212. if( *z=='-' ){
  213. sign = -1;
  214. z++;
  215. }else if( *z=='+' ){
  216. z++;
  217. }
  218. /* skip leading zeroes */
  219. while( z<zEnd && z[0]=='0' ) z++, nDigits++;
  220. /* copy max significant digits to significand */
  221. while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
  222. s = s*10 + (*z - '0');
  223. z++, nDigits++;
  224. }
  225. /* skip non-significant significand digits
  226. ** (increase exponent by d to shift decimal left) */
  227. while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++, d++;
  228. if( z>=zEnd ) goto totype_atof_calc;
  229. /* if decimal point is present */
  230. if( *z=='.' ){
  231. z++;
  232. /* copy digits from after decimal to significand
  233. ** (decrease exponent by d to shift decimal right) */
  234. while( z<zEnd && totypeIsdigit(*z) && s<((LARGEST_INT64-9)/10) ){
  235. s = s*10 + (*z - '0');
  236. z++, nDigits++, d--;
  237. }
  238. /* skip non-significant digits */
  239. while( z<zEnd && totypeIsdigit(*z) ) z++, nDigits++;
  240. }
  241. if( z>=zEnd ) goto totype_atof_calc;
  242. /* if exponent is present */
  243. if( *z=='e' || *z=='E' ){
  244. z++;
  245. eValid = 0;
  246. if( z>=zEnd ) goto totype_atof_calc;
  247. /* get sign of exponent */
  248. if( *z=='-' ){
  249. esign = -1;
  250. z++;
  251. }else if( *z=='+' ){
  252. z++;
  253. }
  254. /* copy digits to exponent */
  255. while( z<zEnd && totypeIsdigit(*z) ){
  256. e = e<10000 ? (e*10 + (*z - '0')) : 10000;
  257. z++;
  258. eValid = 1;
  259. }
  260. }
  261. /* skip trailing spaces */
  262. if( nDigits && eValid ){
  263. while( z<zEnd && totypeIsspace(*z) ) z++;
  264. }
  265. totype_atof_calc:
  266. /* adjust exponent by d, and update sign */
  267. e = (e*esign) + d;
  268. if( e<0 ) {
  269. esign = -1;
  270. e *= -1;
  271. } else {
  272. esign = 1;
  273. }
  274. /* if 0 significand */
  275. if( !s ) {
  276. /* In the IEEE 754 standard, zero is signed.
  277. ** Add the sign if we've seen at least one digit */
  278. result = (sign<0 && nDigits) ? -(double)0 : (double)0;
  279. } else {
  280. /* attempt to reduce exponent */
  281. if( esign>0 ){
  282. while( s<(LARGEST_INT64/10) && e>0 ) e--,s*=10;
  283. }else{
  284. while( !(s%10) && e>0 ) e--,s/=10;
  285. }
  286. /* adjust the sign of significand */
  287. s = sign<0 ? -s : s;
  288. /* if exponent, scale significand as appropriate
  289. ** and store in result. */
  290. if( e ){
  291. double scale = 1.0;
  292. /* attempt to handle extremely small/large numbers better */
  293. if( e>307 && e<342 ){
  294. while( e%308 ) { scale *= 1.0e+1; e -= 1; }
  295. if( esign<0 ){
  296. result = s / scale;
  297. result /= 1.0e+308;
  298. }else{
  299. result = s * scale;
  300. result *= 1.0e+308;
  301. }
  302. }else if( e>=342 ){
  303. if( esign<0 ){
  304. result = 0.0*s;
  305. }else{
  306. result = 1e308*1e308*s; /* Infinity */
  307. }
  308. }else{
  309. /* 1.0e+22 is the largest power of 10 than can be
  310. ** represented exactly. */
  311. while( e%22 ) { scale *= 1.0e+1; e -= 1; }
  312. while( e>0 ) { scale *= 1.0e+22; e -= 22; }
  313. if( esign<0 ){
  314. result = s / scale;
  315. }else{
  316. result = s * scale;
  317. }
  318. }
  319. } else {
  320. result = (double)s;
  321. }
  322. }
  323. /* store the result */
  324. *pResult = result;
  325. /* return true if number and no extra non-whitespace chracters after */
  326. return z>=zEnd && nDigits>0 && eValid && nonNum==0;
  327. }
  328. /*
  329. ** Convert a floating point value to an integer. Or, if this cannot be
  330. ** done in a way that avoids 'outside the range of representable values'
  331. ** warnings from UBSAN, return 0.
  332. **
  333. ** This function is a modified copy of internal SQLite function
  334. ** sqlite3RealToI64().
  335. */
  336. static sqlite3_int64 totypeDoubleToInt(double r){
  337. if( r<-9223372036854774784.0 ) return 0;
  338. if( r>+9223372036854774784.0 ) return 0;
  339. return (sqlite3_int64)r;
  340. }
  341. /*
  342. ** tointeger(X): If X is any value (integer, double, blob, or string) that
  343. ** can be losslessly converted into an integer, then make the conversion and
  344. ** return the result. Otherwise, return NULL.
  345. */
  346. static void tointegerFunc(
  347. sqlite3_context *context,
  348. int argc,
  349. sqlite3_value **argv
  350. ){
  351. assert( argc==1 );
  352. (void)argc;
  353. switch( sqlite3_value_type(argv[0]) ){
  354. case SQLITE_FLOAT: {
  355. double rVal = sqlite3_value_double(argv[0]);
  356. sqlite3_int64 iVal = totypeDoubleToInt(rVal);
  357. if( rVal==(double)iVal ){
  358. sqlite3_result_int64(context, iVal);
  359. }
  360. break;
  361. }
  362. case SQLITE_INTEGER: {
  363. sqlite3_result_int64(context, sqlite3_value_int64(argv[0]));
  364. break;
  365. }
  366. case SQLITE_BLOB: {
  367. const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
  368. if( zBlob ){
  369. int nBlob = sqlite3_value_bytes(argv[0]);
  370. if( nBlob==sizeof(sqlite3_int64) ){
  371. sqlite3_int64 iVal;
  372. if( TOTYPE_BIGENDIAN ){
  373. int i;
  374. unsigned char zBlobRev[sizeof(sqlite3_int64)];
  375. for(i=0; i<sizeof(sqlite3_int64); i++){
  376. zBlobRev[i] = zBlob[sizeof(sqlite3_int64)-1-i];
  377. }
  378. memcpy(&iVal, zBlobRev, sizeof(sqlite3_int64));
  379. }else{
  380. memcpy(&iVal, zBlob, sizeof(sqlite3_int64));
  381. }
  382. sqlite3_result_int64(context, iVal);
  383. }
  384. }
  385. break;
  386. }
  387. case SQLITE_TEXT: {
  388. const unsigned char *zStr = sqlite3_value_text(argv[0]);
  389. if( zStr ){
  390. int nStr = sqlite3_value_bytes(argv[0]);
  391. if( nStr && !totypeIsspace(zStr[0]) ){
  392. sqlite3_int64 iVal;
  393. if( !totypeAtoi64((const char*)zStr, &iVal, nStr) ){
  394. sqlite3_result_int64(context, iVal);
  395. }
  396. }
  397. }
  398. break;
  399. }
  400. default: {
  401. assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
  402. break;
  403. }
  404. }
  405. }
  406. /*
  407. ** toreal(X): If X is any value (integer, double, blob, or string) that can
  408. ** be losslessly converted into a real number, then do so and return that
  409. ** real number. Otherwise return NULL.
  410. */
  411. #if defined(_MSC_VER)
  412. #pragma warning(disable: 4748)
  413. #pragma optimize("", off)
  414. #endif
  415. static void torealFunc(
  416. sqlite3_context *context,
  417. int argc,
  418. sqlite3_value **argv
  419. ){
  420. assert( argc==1 );
  421. (void)argc;
  422. switch( sqlite3_value_type(argv[0]) ){
  423. case SQLITE_FLOAT: {
  424. sqlite3_result_double(context, sqlite3_value_double(argv[0]));
  425. break;
  426. }
  427. case SQLITE_INTEGER: {
  428. sqlite3_int64 iVal = sqlite3_value_int64(argv[0]);
  429. double rVal = (double)iVal;
  430. if( iVal==totypeDoubleToInt(rVal) ){
  431. sqlite3_result_double(context, rVal);
  432. }
  433. break;
  434. }
  435. case SQLITE_BLOB: {
  436. const unsigned char *zBlob = sqlite3_value_blob(argv[0]);
  437. if( zBlob ){
  438. int nBlob = sqlite3_value_bytes(argv[0]);
  439. if( nBlob==sizeof(double) ){
  440. double rVal;
  441. if( TOTYPE_LITTLEENDIAN ){
  442. int i;
  443. unsigned char zBlobRev[sizeof(double)];
  444. for(i=0; i<sizeof(double); i++){
  445. zBlobRev[i] = zBlob[sizeof(double)-1-i];
  446. }
  447. memcpy(&rVal, zBlobRev, sizeof(double));
  448. }else{
  449. memcpy(&rVal, zBlob, sizeof(double));
  450. }
  451. sqlite3_result_double(context, rVal);
  452. }
  453. }
  454. break;
  455. }
  456. case SQLITE_TEXT: {
  457. const unsigned char *zStr = sqlite3_value_text(argv[0]);
  458. if( zStr ){
  459. int nStr = sqlite3_value_bytes(argv[0]);
  460. if( nStr && !totypeIsspace(zStr[0]) && !totypeIsspace(zStr[nStr-1]) ){
  461. double rVal;
  462. if( totypeAtoF((const char*)zStr, &rVal, nStr) ){
  463. sqlite3_result_double(context, rVal);
  464. return;
  465. }
  466. }
  467. }
  468. break;
  469. }
  470. default: {
  471. assert( sqlite3_value_type(argv[0])==SQLITE_NULL );
  472. break;
  473. }
  474. }
  475. }
  476. #if defined(_MSC_VER)
  477. #pragma optimize("", on)
  478. #pragma warning(default: 4748)
  479. #endif
  480. #ifdef _WIN32
  481. __declspec(dllexport)
  482. #endif
  483. int sqlite3_totype_init(
  484. sqlite3 *db,
  485. char **pzErrMsg,
  486. const sqlite3_api_routines *pApi
  487. ){
  488. int rc = SQLITE_OK;
  489. SQLITE_EXTENSION_INIT2(pApi);
  490. (void)pzErrMsg; /* Unused parameter */
  491. rc = sqlite3_create_function(db, "tointeger", 1,
  492. SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0,
  493. tointegerFunc, 0, 0);
  494. if( rc==SQLITE_OK ){
  495. rc = sqlite3_create_function(db, "toreal", 1,
  496. SQLITE_UTF8 | SQLITE_DETERMINISTIC | SQLITE_INNOCUOUS, 0,
  497. torealFunc, 0, 0);
  498. }
  499. return rc;
  500. }