dbhash.c 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. /*
  2. ** 2016-06-07
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. *************************************************************************
  12. **
  13. ** This is a utility program that computes an SHA1 hash on the content
  14. ** of an SQLite database.
  15. **
  16. ** The hash is computed over just the content of the database. Free
  17. ** space inside of the database file, and alternative on-disk representations
  18. ** of the same content (ex: UTF8 vs UTF16) do not affect the hash. So,
  19. ** for example, the database file page size, encoding, and auto_vacuum setting
  20. ** can all be changed without changing the hash.
  21. */
  22. #include <stdio.h>
  23. #include <stdlib.h>
  24. #include <stdarg.h>
  25. #include <ctype.h>
  26. #include <string.h>
  27. #include <assert.h>
  28. #include "sqlite3.h"
  29. /* Context for the SHA1 hash */
  30. typedef struct SHA1Context SHA1Context;
  31. struct SHA1Context {
  32. unsigned int state[5];
  33. unsigned int count[2];
  34. unsigned char buffer[64];
  35. };
  36. /*
  37. ** All global variables are gathered into the "g" singleton.
  38. */
  39. struct GlobalVars {
  40. const char *zArgv0; /* Name of program */
  41. unsigned fDebug; /* Debug flags */
  42. sqlite3 *db; /* The database connection */
  43. SHA1Context cx; /* SHA1 hash context */
  44. } g;
  45. /*
  46. ** Debugging flags
  47. */
  48. #define DEBUG_FULLTRACE 0x00000001 /* Trace hash to stderr */
  49. /******************************************************************************
  50. ** The Hash Engine
  51. **
  52. ** Modify these routines (and appropriate state fields in global variable 'g')
  53. ** in order to compute a different (better?) hash of the database.
  54. */
  55. /*
  56. * blk0() and blk() perform the initial expand.
  57. * I got the idea of expanding during the round function from SSLeay
  58. *
  59. * blk0le() for little-endian and blk0be() for big-endian.
  60. */
  61. #define SHA_ROT(x,l,r) ((x) << (l) | (x) >> (r))
  62. #define rol(x,k) SHA_ROT(x,k,32-(k))
  63. #define ror(x,k) SHA_ROT(x,32-(k),k)
  64. #define blk0le(i) (block[i] = (ror(block[i],8)&0xFF00FF00) \
  65. |(rol(block[i],8)&0x00FF00FF))
  66. #define blk0be(i) block[i]
  67. #define blk(i) (block[i&15] = rol(block[(i+13)&15]^block[(i+8)&15] \
  68. ^block[(i+2)&15]^block[i&15],1))
  69. /*
  70. * (R0+R1), R2, R3, R4 are the different operations (rounds) used in SHA1
  71. *
  72. * Rl0() for little-endian and Rb0() for big-endian. Endianness is
  73. * determined at run-time.
  74. */
  75. #define Rl0(v,w,x,y,z,i) \
  76. z+=((w&(x^y))^y)+blk0le(i)+0x5A827999+rol(v,5);w=ror(w,2);
  77. #define Rb0(v,w,x,y,z,i) \
  78. z+=((w&(x^y))^y)+blk0be(i)+0x5A827999+rol(v,5);w=ror(w,2);
  79. #define R1(v,w,x,y,z,i) \
  80. z+=((w&(x^y))^y)+blk(i)+0x5A827999+rol(v,5);w=ror(w,2);
  81. #define R2(v,w,x,y,z,i) \
  82. z+=(w^x^y)+blk(i)+0x6ED9EBA1+rol(v,5);w=ror(w,2);
  83. #define R3(v,w,x,y,z,i) \
  84. z+=(((w|x)&y)|(w&x))+blk(i)+0x8F1BBCDC+rol(v,5);w=ror(w,2);
  85. #define R4(v,w,x,y,z,i) \
  86. z+=(w^x^y)+blk(i)+0xCA62C1D6+rol(v,5);w=ror(w,2);
  87. /*
  88. * Hash a single 512-bit block. This is the core of the algorithm.
  89. */
  90. #define a qq[0]
  91. #define b qq[1]
  92. #define c qq[2]
  93. #define d qq[3]
  94. #define e qq[4]
  95. void SHA1Transform(unsigned int state[5], const unsigned char buffer[64]){
  96. unsigned int qq[5]; /* a, b, c, d, e; */
  97. static int one = 1;
  98. unsigned int block[16];
  99. memcpy(block, buffer, 64);
  100. memcpy(qq,state,5*sizeof(unsigned int));
  101. /* Copy g.cx.state[] to working vars */
  102. /*
  103. a = state[0];
  104. b = state[1];
  105. c = state[2];
  106. d = state[3];
  107. e = state[4];
  108. */
  109. /* 4 rounds of 20 operations each. Loop unrolled. */
  110. if( 1 == *(unsigned char*)&one ){
  111. Rl0(a,b,c,d,e, 0); Rl0(e,a,b,c,d, 1); Rl0(d,e,a,b,c, 2); Rl0(c,d,e,a,b, 3);
  112. Rl0(b,c,d,e,a, 4); Rl0(a,b,c,d,e, 5); Rl0(e,a,b,c,d, 6); Rl0(d,e,a,b,c, 7);
  113. Rl0(c,d,e,a,b, 8); Rl0(b,c,d,e,a, 9); Rl0(a,b,c,d,e,10); Rl0(e,a,b,c,d,11);
  114. Rl0(d,e,a,b,c,12); Rl0(c,d,e,a,b,13); Rl0(b,c,d,e,a,14); Rl0(a,b,c,d,e,15);
  115. }else{
  116. Rb0(a,b,c,d,e, 0); Rb0(e,a,b,c,d, 1); Rb0(d,e,a,b,c, 2); Rb0(c,d,e,a,b, 3);
  117. Rb0(b,c,d,e,a, 4); Rb0(a,b,c,d,e, 5); Rb0(e,a,b,c,d, 6); Rb0(d,e,a,b,c, 7);
  118. Rb0(c,d,e,a,b, 8); Rb0(b,c,d,e,a, 9); Rb0(a,b,c,d,e,10); Rb0(e,a,b,c,d,11);
  119. Rb0(d,e,a,b,c,12); Rb0(c,d,e,a,b,13); Rb0(b,c,d,e,a,14); Rb0(a,b,c,d,e,15);
  120. }
  121. R1(e,a,b,c,d,16); R1(d,e,a,b,c,17); R1(c,d,e,a,b,18); R1(b,c,d,e,a,19);
  122. R2(a,b,c,d,e,20); R2(e,a,b,c,d,21); R2(d,e,a,b,c,22); R2(c,d,e,a,b,23);
  123. R2(b,c,d,e,a,24); R2(a,b,c,d,e,25); R2(e,a,b,c,d,26); R2(d,e,a,b,c,27);
  124. R2(c,d,e,a,b,28); R2(b,c,d,e,a,29); R2(a,b,c,d,e,30); R2(e,a,b,c,d,31);
  125. R2(d,e,a,b,c,32); R2(c,d,e,a,b,33); R2(b,c,d,e,a,34); R2(a,b,c,d,e,35);
  126. R2(e,a,b,c,d,36); R2(d,e,a,b,c,37); R2(c,d,e,a,b,38); R2(b,c,d,e,a,39);
  127. R3(a,b,c,d,e,40); R3(e,a,b,c,d,41); R3(d,e,a,b,c,42); R3(c,d,e,a,b,43);
  128. R3(b,c,d,e,a,44); R3(a,b,c,d,e,45); R3(e,a,b,c,d,46); R3(d,e,a,b,c,47);
  129. R3(c,d,e,a,b,48); R3(b,c,d,e,a,49); R3(a,b,c,d,e,50); R3(e,a,b,c,d,51);
  130. R3(d,e,a,b,c,52); R3(c,d,e,a,b,53); R3(b,c,d,e,a,54); R3(a,b,c,d,e,55);
  131. R3(e,a,b,c,d,56); R3(d,e,a,b,c,57); R3(c,d,e,a,b,58); R3(b,c,d,e,a,59);
  132. R4(a,b,c,d,e,60); R4(e,a,b,c,d,61); R4(d,e,a,b,c,62); R4(c,d,e,a,b,63);
  133. R4(b,c,d,e,a,64); R4(a,b,c,d,e,65); R4(e,a,b,c,d,66); R4(d,e,a,b,c,67);
  134. R4(c,d,e,a,b,68); R4(b,c,d,e,a,69); R4(a,b,c,d,e,70); R4(e,a,b,c,d,71);
  135. R4(d,e,a,b,c,72); R4(c,d,e,a,b,73); R4(b,c,d,e,a,74); R4(a,b,c,d,e,75);
  136. R4(e,a,b,c,d,76); R4(d,e,a,b,c,77); R4(c,d,e,a,b,78); R4(b,c,d,e,a,79);
  137. /* Add the working vars back into context.state[] */
  138. state[0] += a;
  139. state[1] += b;
  140. state[2] += c;
  141. state[3] += d;
  142. state[4] += e;
  143. }
  144. /* Initialize the SHA1 hash */
  145. static void hash_init(void){
  146. /* SHA1 initialization constants */
  147. g.cx.state[0] = 0x67452301;
  148. g.cx.state[1] = 0xEFCDAB89;
  149. g.cx.state[2] = 0x98BADCFE;
  150. g.cx.state[3] = 0x10325476;
  151. g.cx.state[4] = 0xC3D2E1F0;
  152. g.cx.count[0] = g.cx.count[1] = 0;
  153. }
  154. /* Add new content to the SHA1 hash */
  155. static void hash_step(const unsigned char *data, unsigned int len){
  156. unsigned int i, j;
  157. j = g.cx.count[0];
  158. if( (g.cx.count[0] += len << 3) < j ){
  159. g.cx.count[1] += (len>>29)+1;
  160. }
  161. j = (j >> 3) & 63;
  162. if( (j + len) > 63 ){
  163. (void)memcpy(&g.cx.buffer[j], data, (i = 64-j));
  164. SHA1Transform(g.cx.state, g.cx.buffer);
  165. for(; i + 63 < len; i += 64){
  166. SHA1Transform(g.cx.state, &data[i]);
  167. }
  168. j = 0;
  169. }else{
  170. i = 0;
  171. }
  172. (void)memcpy(&g.cx.buffer[j], &data[i], len - i);
  173. }
  174. /* Add padding and compute and output the message digest. */
  175. static void hash_finish(const char *zName){
  176. unsigned int i;
  177. unsigned char finalcount[8];
  178. unsigned char digest[20];
  179. static const char zEncode[] = "0123456789abcdef";
  180. char zOut[41];
  181. for (i = 0; i < 8; i++){
  182. finalcount[i] = (unsigned char)((g.cx.count[(i >= 4 ? 0 : 1)]
  183. >> ((3-(i & 3)) * 8) ) & 255); /* Endian independent */
  184. }
  185. hash_step((const unsigned char *)"\200", 1);
  186. while ((g.cx.count[0] & 504) != 448){
  187. hash_step((const unsigned char *)"\0", 1);
  188. }
  189. hash_step(finalcount, 8); /* Should cause a SHA1Transform() */
  190. for (i = 0; i < 20; i++){
  191. digest[i] = (unsigned char)((g.cx.state[i>>2] >> ((3-(i & 3)) * 8) ) & 255);
  192. }
  193. for(i=0; i<20; i++){
  194. zOut[i*2] = zEncode[(digest[i]>>4)&0xf];
  195. zOut[i*2+1] = zEncode[digest[i] & 0xf];
  196. }
  197. zOut[i*2]= 0;
  198. printf("%s %s\n", zOut, zName);
  199. }
  200. /* End of the hashing logic
  201. *******************************************************************************/
  202. /*
  203. ** Print an error resulting from faulting command-line arguments and
  204. ** abort the program.
  205. */
  206. static void cmdlineError(const char *zFormat, ...){
  207. va_list ap;
  208. fprintf(stderr, "%s: ", g.zArgv0);
  209. va_start(ap, zFormat);
  210. vfprintf(stderr, zFormat, ap);
  211. va_end(ap);
  212. fprintf(stderr, "\n\"%s --help\" for more help\n", g.zArgv0);
  213. exit(1);
  214. }
  215. /*
  216. ** Print an error message for an error that occurs at runtime, then
  217. ** abort the program.
  218. */
  219. static void runtimeError(const char *zFormat, ...){
  220. va_list ap;
  221. fprintf(stderr, "%s: ", g.zArgv0);
  222. va_start(ap, zFormat);
  223. vfprintf(stderr, zFormat, ap);
  224. va_end(ap);
  225. fprintf(stderr, "\n");
  226. exit(1);
  227. }
  228. /*
  229. ** Prepare a new SQL statement. Print an error and abort if anything
  230. ** goes wrong.
  231. */
  232. static sqlite3_stmt *db_vprepare(const char *zFormat, va_list ap){
  233. char *zSql;
  234. int rc;
  235. sqlite3_stmt *pStmt;
  236. zSql = sqlite3_vmprintf(zFormat, ap);
  237. if( zSql==0 ) runtimeError("out of memory");
  238. rc = sqlite3_prepare_v2(g.db, zSql, -1, &pStmt, 0);
  239. if( rc ){
  240. runtimeError("SQL statement error: %s\n\"%s\"", sqlite3_errmsg(g.db),
  241. zSql);
  242. }
  243. sqlite3_free(zSql);
  244. return pStmt;
  245. }
  246. static sqlite3_stmt *db_prepare(const char *zFormat, ...){
  247. va_list ap;
  248. sqlite3_stmt *pStmt;
  249. va_start(ap, zFormat);
  250. pStmt = db_vprepare(zFormat, ap);
  251. va_end(ap);
  252. return pStmt;
  253. }
  254. /*
  255. ** Compute the hash for all rows of the query formed from the printf-style
  256. ** zFormat and its argument.
  257. */
  258. static void hash_one_query(const char *zFormat, ...){
  259. va_list ap;
  260. sqlite3_stmt *pStmt; /* The query defined by zFormat and "..." */
  261. int nCol; /* Number of columns in the result set */
  262. int i; /* Loop counter */
  263. /* Prepare the query defined by zFormat and "..." */
  264. va_start(ap, zFormat);
  265. pStmt = db_vprepare(zFormat, ap);
  266. va_end(ap);
  267. nCol = sqlite3_column_count(pStmt);
  268. /* Compute a hash over the result of the query */
  269. while( SQLITE_ROW==sqlite3_step(pStmt) ){
  270. for(i=0; i<nCol; i++){
  271. switch( sqlite3_column_type(pStmt,i) ){
  272. case SQLITE_NULL: {
  273. hash_step((const unsigned char*)"0",1);
  274. if( g.fDebug & DEBUG_FULLTRACE ) fprintf(stderr, "NULL\n");
  275. break;
  276. }
  277. case SQLITE_INTEGER: {
  278. sqlite3_uint64 u;
  279. int j;
  280. unsigned char x[8];
  281. sqlite3_int64 v = sqlite3_column_int64(pStmt,i);
  282. memcpy(&u, &v, 8);
  283. for(j=7; j>=0; j--){
  284. x[j] = u & 0xff;
  285. u >>= 8;
  286. }
  287. hash_step((const unsigned char*)"1",1);
  288. hash_step(x,8);
  289. if( g.fDebug & DEBUG_FULLTRACE ){
  290. fprintf(stderr, "INT %s\n", sqlite3_column_text(pStmt,i));
  291. }
  292. break;
  293. }
  294. case SQLITE_FLOAT: {
  295. sqlite3_uint64 u;
  296. int j;
  297. unsigned char x[8];
  298. double r = sqlite3_column_double(pStmt,i);
  299. memcpy(&u, &r, 8);
  300. for(j=7; j>=0; j--){
  301. x[j] = u & 0xff;
  302. u >>= 8;
  303. }
  304. hash_step((const unsigned char*)"2",1);
  305. hash_step(x,8);
  306. if( g.fDebug & DEBUG_FULLTRACE ){
  307. fprintf(stderr, "FLOAT %s\n", sqlite3_column_text(pStmt,i));
  308. }
  309. break;
  310. }
  311. case SQLITE_TEXT: {
  312. int n = sqlite3_column_bytes(pStmt, i);
  313. const unsigned char *z = sqlite3_column_text(pStmt, i);
  314. hash_step((const unsigned char*)"3", 1);
  315. hash_step(z, n);
  316. if( g.fDebug & DEBUG_FULLTRACE ){
  317. fprintf(stderr, "TEXT '%s'\n", sqlite3_column_text(pStmt,i));
  318. }
  319. break;
  320. }
  321. case SQLITE_BLOB: {
  322. int n = sqlite3_column_bytes(pStmt, i);
  323. const unsigned char *z = sqlite3_column_blob(pStmt, i);
  324. hash_step((const unsigned char*)"4", 1);
  325. hash_step(z, n);
  326. if( g.fDebug & DEBUG_FULLTRACE ){
  327. fprintf(stderr, "BLOB (%d bytes)\n", n);
  328. }
  329. break;
  330. }
  331. }
  332. }
  333. }
  334. sqlite3_finalize(pStmt);
  335. }
  336. /*
  337. ** Print sketchy documentation for this utility program
  338. */
  339. static void showHelp(void){
  340. printf("Usage: %s [options] FILE ...\n", g.zArgv0);
  341. printf(
  342. "Compute a SHA1 hash on the content of database FILE. System tables such as\n"
  343. "sqlite_stat1, sqlite_stat4, and sqlite_sequence are omitted from the hash.\n"
  344. "Options:\n"
  345. " --debug N Set debugging flags to N (experts only)\n"
  346. " --like PATTERN Only hash tables whose name is LIKE the pattern\n"
  347. " --schema-only Only hash the schema - omit table content\n"
  348. " --without-schema Only hash table content - omit the schema\n"
  349. );
  350. }
  351. int main(int argc, char **argv){
  352. const char *zDb = 0; /* Name of the database currently being hashed */
  353. int i; /* Loop counter */
  354. int rc; /* Subroutine return code */
  355. char *zErrMsg; /* Error message when opening database */
  356. sqlite3_stmt *pStmt; /* An SQLite query */
  357. const char *zLike = 0; /* LIKE pattern of tables to hash */
  358. int omitSchema = 0; /* True to compute hash on content only */
  359. int omitContent = 0; /* True to compute hash on schema only */
  360. int nFile = 0; /* Number of input filenames seen */
  361. g.zArgv0 = argv[0];
  362. sqlite3_config(SQLITE_CONFIG_SINGLETHREAD);
  363. for(i=1; i<argc; i++){
  364. const char *z = argv[i];
  365. if( z[0]=='-' ){
  366. z++;
  367. if( z[0]=='-' ) z++;
  368. if( strcmp(z,"debug")==0 ){
  369. if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
  370. g.fDebug = strtol(argv[++i], 0, 0);
  371. }else
  372. if( strcmp(z,"help")==0 ){
  373. showHelp();
  374. return 0;
  375. }else
  376. if( strcmp(z,"like")==0 ){
  377. if( i==argc-1 ) cmdlineError("missing argument to %s", argv[i]);
  378. if( zLike!=0 ) cmdlineError("only one --like allowed");
  379. zLike = argv[++i];
  380. }else
  381. if( strcmp(z,"schema-only")==0 ){
  382. omitContent = 1;
  383. }else
  384. if( strcmp(z,"without-schema")==0 ){
  385. omitSchema = 1;
  386. }else
  387. {
  388. cmdlineError("unknown option: %s", argv[i]);
  389. }
  390. }else{
  391. nFile++;
  392. if( nFile<i ) argv[nFile] = argv[i];
  393. }
  394. }
  395. if( nFile==0 ){
  396. cmdlineError("no input files specified - nothing to do");
  397. }
  398. if( omitSchema && omitContent ){
  399. cmdlineError("only one of --without-schema and --omit-schema allowed");
  400. }
  401. if( zLike==0 ) zLike = "%";
  402. for(i=1; i<=nFile; i++){
  403. static const int openFlags =
  404. SQLITE_OPEN_READWRITE | /* Read/write so hot journals can recover */
  405. SQLITE_OPEN_URI
  406. ;
  407. zDb = argv[i];
  408. rc = sqlite3_open_v2(zDb, &g.db, openFlags, 0);
  409. if( rc ){
  410. fprintf(stderr, "cannot open database file '%s'\n", zDb);
  411. continue;
  412. }
  413. rc = sqlite3_exec(g.db, "SELECT * FROM sqlite_schema", 0, 0, &zErrMsg);
  414. if( rc || zErrMsg ){
  415. sqlite3_close(g.db);
  416. g.db = 0;
  417. fprintf(stderr, "'%s' is not a valid SQLite database\n", zDb);
  418. continue;
  419. }
  420. /* Start the hash */
  421. hash_init();
  422. /* Hash table content */
  423. if( !omitContent ){
  424. pStmt = db_prepare(
  425. "SELECT name FROM sqlite_schema\n"
  426. " WHERE type='table' AND sql NOT LIKE 'CREATE VIRTUAL%%'\n"
  427. " AND name NOT LIKE 'sqlite_%%'\n"
  428. " AND name LIKE '%q'\n"
  429. " ORDER BY name COLLATE nocase;\n",
  430. zLike
  431. );
  432. while( SQLITE_ROW==sqlite3_step(pStmt) ){
  433. /* We want rows of the table to be hashed in PRIMARY KEY order.
  434. ** Technically, an ORDER BY clause is required to guarantee that
  435. ** order. However, though not guaranteed by the documentation, every
  436. ** historical version of SQLite has always output rows in PRIMARY KEY
  437. ** order when there is no WHERE or GROUP BY clause, so the ORDER BY
  438. ** can be safely omitted. */
  439. hash_one_query("SELECT * FROM \"%w\"", sqlite3_column_text(pStmt,0));
  440. }
  441. sqlite3_finalize(pStmt);
  442. }
  443. /* Hash the database schema */
  444. if( !omitSchema ){
  445. hash_one_query(
  446. "SELECT type, name, tbl_name, sql FROM sqlite_schema\n"
  447. " WHERE tbl_name LIKE '%q'\n"
  448. " ORDER BY name COLLATE nocase;\n",
  449. zLike
  450. );
  451. }
  452. /* Finish and output the hash and close the database connection. */
  453. hash_finish(zDb);
  454. sqlite3_close(g.db);
  455. }
  456. return 0;
  457. }