fts5_test_mi.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /*
  2. ** 2015 Aug 04
  3. **
  4. ** The author disclaims copyright to this source code. In place of
  5. ** a legal notice, here is a blessing:
  6. **
  7. ** May you do good and not evil.
  8. ** May you find forgiveness for yourself and forgive others.
  9. ** May you share freely, never taking more than you give.
  10. **
  11. ******************************************************************************
  12. **
  13. ** This file contains test code only, it is not included in release
  14. ** versions of FTS5. It contains the implementation of an FTS5 auxiliary
  15. ** function very similar to the FTS4 function matchinfo():
  16. **
  17. ** https://www.sqlite.org/fts3.html#matchinfo
  18. **
  19. ** Known differences are that:
  20. **
  21. ** 1) this function uses the FTS5 definition of "matchable phrase", which
  22. ** excludes any phrases that are part of an expression sub-tree that
  23. ** does not match the current row. This comes up for MATCH queries
  24. ** such as:
  25. **
  26. ** "a OR (b AND c)"
  27. **
  28. ** In FTS4, if a single row contains instances of tokens "a" and "c",
  29. ** but not "b", all instances of "c" are considered matches. In FTS5,
  30. ** they are not (as the "b AND c" sub-tree does not match the current
  31. ** row.
  32. **
  33. ** 2) For the values returned by 'x' that apply to all rows of the table,
  34. ** NEAR constraints are not considered. But for the number of hits in
  35. ** the current row, they are.
  36. **
  37. ** This file exports a single function that may be called to register the
  38. ** matchinfo() implementation with a database handle:
  39. **
  40. ** int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db);
  41. */
  42. #ifdef SQLITE_ENABLE_FTS5
  43. #include "fts5.h"
  44. #include <assert.h>
  45. #include <string.h>
  46. typedef struct Fts5MatchinfoCtx Fts5MatchinfoCtx;
  47. #ifndef SQLITE_AMALGAMATION
  48. typedef unsigned int u32;
  49. #endif
  50. struct Fts5MatchinfoCtx {
  51. int nCol; /* Number of cols in FTS5 table */
  52. int nPhrase; /* Number of phrases in FTS5 query */
  53. char *zArg; /* nul-term'd copy of 2nd arg */
  54. int nRet; /* Number of elements in aRet[] */
  55. u32 *aRet; /* Array of 32-bit unsigned ints to return */
  56. };
  57. /*
  58. ** Return a pointer to the fts5_api pointer for database connection db.
  59. ** If an error occurs, return NULL and leave an error in the database
  60. ** handle (accessible using sqlite3_errcode()/errmsg()).
  61. */
  62. static int fts5_api_from_db(sqlite3 *db, fts5_api **ppApi){
  63. sqlite3_stmt *pStmt = 0;
  64. int rc;
  65. *ppApi = 0;
  66. rc = sqlite3_prepare(db, "SELECT fts5(?1)", -1, &pStmt, 0);
  67. if( rc==SQLITE_OK ){
  68. sqlite3_bind_pointer(pStmt, 1, (void*)ppApi, "fts5_api_ptr", 0);
  69. (void)sqlite3_step(pStmt);
  70. rc = sqlite3_finalize(pStmt);
  71. }
  72. return rc;
  73. }
  74. /*
  75. ** Argument f should be a flag accepted by matchinfo() (a valid character
  76. ** in the string passed as the second argument). If it is not, -1 is
  77. ** returned. Otherwise, if f is a valid matchinfo flag, the value returned
  78. ** is the number of 32-bit integers added to the output array if the
  79. ** table has nCol columns and the query nPhrase phrases.
  80. */
  81. static int fts5MatchinfoFlagsize(int nCol, int nPhrase, char f){
  82. int ret = -1;
  83. switch( f ){
  84. case 'p': ret = 1; break;
  85. case 'c': ret = 1; break;
  86. case 'x': ret = 3 * nCol * nPhrase; break;
  87. case 'y': ret = nCol * nPhrase; break;
  88. case 'b': ret = ((nCol + 31) / 32) * nPhrase; break;
  89. case 'n': ret = 1; break;
  90. case 'a': ret = nCol; break;
  91. case 'l': ret = nCol; break;
  92. case 's': ret = nCol; break;
  93. }
  94. return ret;
  95. }
  96. static int fts5MatchinfoIter(
  97. const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
  98. Fts5Context *pFts, /* First arg to pass to pApi functions */
  99. Fts5MatchinfoCtx *p,
  100. int(*x)(const Fts5ExtensionApi*,Fts5Context*,Fts5MatchinfoCtx*,char,u32*)
  101. ){
  102. int i;
  103. int n = 0;
  104. int rc = SQLITE_OK;
  105. char f;
  106. for(i=0; (f = p->zArg[i]); i++){
  107. rc = x(pApi, pFts, p, f, &p->aRet[n]);
  108. if( rc!=SQLITE_OK ) break;
  109. n += fts5MatchinfoFlagsize(p->nCol, p->nPhrase, f);
  110. }
  111. return rc;
  112. }
  113. static int fts5MatchinfoXCb(
  114. const Fts5ExtensionApi *pApi,
  115. Fts5Context *pFts,
  116. void *pUserData
  117. ){
  118. Fts5PhraseIter iter;
  119. int iCol, iOff;
  120. u32 *aOut = (u32*)pUserData;
  121. int iPrev = -1;
  122. for(pApi->xPhraseFirst(pFts, 0, &iter, &iCol, &iOff);
  123. iCol>=0;
  124. pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
  125. ){
  126. aOut[iCol*3+1]++;
  127. if( iCol!=iPrev ) aOut[iCol*3 + 2]++;
  128. iPrev = iCol;
  129. }
  130. return SQLITE_OK;
  131. }
  132. static int fts5MatchinfoGlobalCb(
  133. const Fts5ExtensionApi *pApi,
  134. Fts5Context *pFts,
  135. Fts5MatchinfoCtx *p,
  136. char f,
  137. u32 *aOut
  138. ){
  139. int rc = SQLITE_OK;
  140. switch( f ){
  141. case 'p':
  142. aOut[0] = p->nPhrase;
  143. break;
  144. case 'c':
  145. aOut[0] = p->nCol;
  146. break;
  147. case 'x': {
  148. int i;
  149. for(i=0; i<p->nPhrase && rc==SQLITE_OK; i++){
  150. void *pPtr = (void*)&aOut[i * p->nCol * 3];
  151. rc = pApi->xQueryPhrase(pFts, i, pPtr, fts5MatchinfoXCb);
  152. }
  153. break;
  154. }
  155. case 'n': {
  156. sqlite3_int64 nRow;
  157. rc = pApi->xRowCount(pFts, &nRow);
  158. aOut[0] = (u32)nRow;
  159. break;
  160. }
  161. case 'a': {
  162. sqlite3_int64 nRow = 0;
  163. rc = pApi->xRowCount(pFts, &nRow);
  164. if( nRow==0 ){
  165. memset(aOut, 0, sizeof(u32) * p->nCol);
  166. }else{
  167. int i;
  168. for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
  169. sqlite3_int64 nToken;
  170. rc = pApi->xColumnTotalSize(pFts, i, &nToken);
  171. if( rc==SQLITE_OK){
  172. aOut[i] = (u32)((2*nToken + nRow) / (2*nRow));
  173. }
  174. }
  175. }
  176. break;
  177. }
  178. }
  179. return rc;
  180. }
  181. static int fts5MatchinfoLocalCb(
  182. const Fts5ExtensionApi *pApi,
  183. Fts5Context *pFts,
  184. Fts5MatchinfoCtx *p,
  185. char f,
  186. u32 *aOut
  187. ){
  188. int i;
  189. int rc = SQLITE_OK;
  190. switch( f ){
  191. case 'b': {
  192. int iPhrase;
  193. int nInt = ((p->nCol + 31) / 32) * p->nPhrase;
  194. for(i=0; i<nInt; i++) aOut[i] = 0;
  195. for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
  196. Fts5PhraseIter iter;
  197. int iCol;
  198. for(pApi->xPhraseFirstColumn(pFts, iPhrase, &iter, &iCol);
  199. iCol>=0;
  200. pApi->xPhraseNextColumn(pFts, &iter, &iCol)
  201. ){
  202. aOut[iPhrase * ((p->nCol+31)/32) + iCol/32] |= ((u32)1 << iCol%32);
  203. }
  204. }
  205. break;
  206. }
  207. case 'x':
  208. case 'y': {
  209. int nMul = (f=='x' ? 3 : 1);
  210. int iPhrase;
  211. for(i=0; i<(p->nCol*p->nPhrase); i++) aOut[i*nMul] = 0;
  212. for(iPhrase=0; iPhrase<p->nPhrase; iPhrase++){
  213. Fts5PhraseIter iter;
  214. int iOff, iCol;
  215. for(pApi->xPhraseFirst(pFts, iPhrase, &iter, &iCol, &iOff);
  216. iOff>=0;
  217. pApi->xPhraseNext(pFts, &iter, &iCol, &iOff)
  218. ){
  219. aOut[nMul * (iCol + iPhrase * p->nCol)]++;
  220. }
  221. }
  222. break;
  223. }
  224. case 'l': {
  225. for(i=0; rc==SQLITE_OK && i<p->nCol; i++){
  226. int nToken;
  227. rc = pApi->xColumnSize(pFts, i, &nToken);
  228. aOut[i] = (u32)nToken;
  229. }
  230. break;
  231. }
  232. case 's': {
  233. int nInst;
  234. memset(aOut, 0, sizeof(u32) * p->nCol);
  235. rc = pApi->xInstCount(pFts, &nInst);
  236. for(i=0; rc==SQLITE_OK && i<nInst; i++){
  237. int iPhrase, iOff, iCol = 0;
  238. int iNextPhrase;
  239. int iNextOff;
  240. u32 nSeq = 1;
  241. int j;
  242. rc = pApi->xInst(pFts, i, &iPhrase, &iCol, &iOff);
  243. iNextPhrase = iPhrase+1;
  244. iNextOff = iOff+pApi->xPhraseSize(pFts, 0);
  245. for(j=i+1; rc==SQLITE_OK && j<nInst; j++){
  246. int ip, ic, io;
  247. rc = pApi->xInst(pFts, j, &ip, &ic, &io);
  248. if( ic!=iCol || io>iNextOff ) break;
  249. if( ip==iNextPhrase && io==iNextOff ){
  250. nSeq++;
  251. iNextPhrase = ip+1;
  252. iNextOff = io + pApi->xPhraseSize(pFts, ip);
  253. }
  254. }
  255. if( nSeq>aOut[iCol] ) aOut[iCol] = nSeq;
  256. }
  257. break;
  258. }
  259. }
  260. return rc;
  261. }
  262. static Fts5MatchinfoCtx *fts5MatchinfoNew(
  263. const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
  264. Fts5Context *pFts, /* First arg to pass to pApi functions */
  265. sqlite3_context *pCtx, /* Context for returning error message */
  266. const char *zArg /* Matchinfo flag string */
  267. ){
  268. Fts5MatchinfoCtx *p;
  269. int nCol;
  270. int nPhrase;
  271. int i;
  272. int nInt;
  273. sqlite3_int64 nByte;
  274. int rc;
  275. nCol = pApi->xColumnCount(pFts);
  276. nPhrase = pApi->xPhraseCount(pFts);
  277. nInt = 0;
  278. for(i=0; zArg[i]; i++){
  279. int n = fts5MatchinfoFlagsize(nCol, nPhrase, zArg[i]);
  280. if( n<0 ){
  281. char *zErr = sqlite3_mprintf("unrecognized matchinfo flag: %c", zArg[i]);
  282. sqlite3_result_error(pCtx, zErr, -1);
  283. sqlite3_free(zErr);
  284. return 0;
  285. }
  286. nInt += n;
  287. }
  288. nByte = sizeof(Fts5MatchinfoCtx) /* The struct itself */
  289. + sizeof(u32) * nInt /* The p->aRet[] array */
  290. + (i+1); /* The p->zArg string */
  291. p = (Fts5MatchinfoCtx*)sqlite3_malloc64(nByte);
  292. if( p==0 ){
  293. sqlite3_result_error_nomem(pCtx);
  294. return 0;
  295. }
  296. memset(p, 0, nByte);
  297. p->nCol = nCol;
  298. p->nPhrase = nPhrase;
  299. p->aRet = (u32*)&p[1];
  300. p->nRet = nInt;
  301. p->zArg = (char*)&p->aRet[nInt];
  302. memcpy(p->zArg, zArg, i);
  303. rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoGlobalCb);
  304. if( rc!=SQLITE_OK ){
  305. sqlite3_result_error_code(pCtx, rc);
  306. sqlite3_free(p);
  307. p = 0;
  308. }
  309. return p;
  310. }
  311. static void fts5MatchinfoFunc(
  312. const Fts5ExtensionApi *pApi, /* API offered by current FTS version */
  313. Fts5Context *pFts, /* First arg to pass to pApi functions */
  314. sqlite3_context *pCtx, /* Context for returning result/error */
  315. int nVal, /* Number of values in apVal[] array */
  316. sqlite3_value **apVal /* Array of trailing arguments */
  317. ){
  318. const char *zArg;
  319. Fts5MatchinfoCtx *p;
  320. int rc = SQLITE_OK;
  321. if( nVal>0 ){
  322. zArg = (const char*)sqlite3_value_text(apVal[0]);
  323. }else{
  324. zArg = "pcx";
  325. }
  326. p = (Fts5MatchinfoCtx*)pApi->xGetAuxdata(pFts, 0);
  327. if( p==0 || sqlite3_stricmp(zArg, p->zArg) ){
  328. p = fts5MatchinfoNew(pApi, pFts, pCtx, zArg);
  329. if( p==0 ){
  330. rc = SQLITE_NOMEM;
  331. }else{
  332. rc = pApi->xSetAuxdata(pFts, p, sqlite3_free);
  333. }
  334. }
  335. if( rc==SQLITE_OK ){
  336. rc = fts5MatchinfoIter(pApi, pFts, p, fts5MatchinfoLocalCb);
  337. }
  338. if( rc!=SQLITE_OK ){
  339. sqlite3_result_error_code(pCtx, rc);
  340. }else{
  341. /* No errors has occured, so return a copy of the array of integers. */
  342. int nByte = p->nRet * sizeof(u32);
  343. sqlite3_result_blob(pCtx, (void*)p->aRet, nByte, SQLITE_TRANSIENT);
  344. }
  345. }
  346. int sqlite3Fts5TestRegisterMatchinfo(sqlite3 *db){
  347. int rc; /* Return code */
  348. fts5_api *pApi; /* FTS5 API functions */
  349. /* Extract the FTS5 API pointer from the database handle. The
  350. ** fts5_api_from_db() function above is copied verbatim from the
  351. ** FTS5 documentation. Refer there for details. */
  352. rc = fts5_api_from_db(db, &pApi);
  353. if( rc!=SQLITE_OK ) return rc;
  354. /* If fts5_api_from_db() returns NULL, then either FTS5 is not registered
  355. ** with this database handle, or an error (OOM perhaps?) has occurred.
  356. **
  357. ** Also check that the fts5_api object is version 2 or newer.
  358. */
  359. if( pApi==0 || pApi->iVersion<2 ){
  360. return SQLITE_ERROR;
  361. }
  362. /* Register the implementation of matchinfo() */
  363. rc = pApi->xCreateFunction(pApi, "matchinfo", 0, fts5MatchinfoFunc, 0);
  364. return rc;
  365. }
  366. #endif /* SQLITE_ENABLE_FTS5 */