registry.el 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484
  1. ;;; registry.el --- Track and remember data items by various fields
  2. ;; Copyright (C) 2011-2012 Free Software Foundation, Inc.
  3. ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
  4. ;; Keywords: data
  5. ;; This file is part of GNU Emacs.
  6. ;; GNU Emacs is free software: you can redistribute it and/or modify
  7. ;; it under the terms of the GNU General Public License as published by
  8. ;; the Free Software Foundation, either version 3 of the License, or
  9. ;; (at your option) any later version.
  10. ;; GNU Emacs is distributed in the hope that it will be useful,
  11. ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. ;; GNU General Public License for more details.
  14. ;; You should have received a copy of the GNU General Public License
  15. ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
  16. ;;; Commentary:
  17. ;; This library provides a general-purpose EIEIO-based registry
  18. ;; database with persistence, initialized with these fields:
  19. ;; version: a float, 0.1 currently (don't change it)
  20. ;; max-hard: an integer, default 5000000
  21. ;; max-soft: an integer, default 50000
  22. ;; precious: a list of symbols
  23. ;; tracked: a list of symbols
  24. ;; tracker: a hashtable tuned for 100 symbols to track (you should
  25. ;; only access this with the :lookup2-function and the
  26. ;; :lookup2+-function)
  27. ;; data: a hashtable with default size 10K and resize threshold 2.0
  28. ;; (this reflects the expected usage so override it if you know better)
  29. ;; ...plus methods to do all the work: `registry-search',
  30. ;; `registry-lookup', `registry-lookup-secondary',
  31. ;; `registry-lookup-secondary-value', `registry-insert',
  32. ;; `registry-delete', `registry-prune', `registry-size' which see
  33. ;; and with the following properties:
  34. ;; Every piece of data has a unique ID and some general-purpose fields
  35. ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
  36. ;; ((F1 D1) (F2 D2) (F3 a b c))
  37. ;; Note that whether a field has one or many pieces of data, the data
  38. ;; is always a list of values.
  39. ;; The user decides which fields are "precious", F2 for example. At
  40. ;; PRUNE TIME (when the :prune-function is called), the registry will
  41. ;; trim any entries without the F2 field until the size is :max-soft
  42. ;; or less. No entries with the F2 field will be removed at PRUNE
  43. ;; TIME.
  44. ;; When an entry is inserted, the registry will reject new entries
  45. ;; if they bring it over the max-hard limit, even if they have the F2
  46. ;; field.
  47. ;; The user decides which fields are "tracked", F1 for example. Any
  48. ;; new entry is then indexed by all the tracked fields so it can be
  49. ;; quickly looked up that way. The data is always a list (see example
  50. ;; above) and each list element is indexed.
  51. ;; Precious and tracked field names must be symbols. All other
  52. ;; fields can be any other Emacs Lisp types.
  53. ;;; Code:
  54. (eval-when-compile (require 'cl))
  55. (eval-when-compile
  56. (when (null (ignore-errors (require 'ert)))
  57. (defmacro* ert-deftest (name () &body docstring-keys-and-body))))
  58. (ignore-errors
  59. (require 'ert))
  60. (eval-and-compile
  61. (or (ignore-errors (progn
  62. (require 'eieio)
  63. (require 'eieio-base)))
  64. ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
  65. (ignore-errors
  66. (let ((load-path (cons (expand-file-name
  67. "gnus-fallback-lib/eieio"
  68. (file-name-directory (locate-library "gnus")))
  69. load-path)))
  70. (require 'eieio)
  71. (require 'eieio-base)))
  72. (error
  73. "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
  74. (defclass registry-db (eieio-persistent)
  75. ((version :initarg :version
  76. :initform 0.1
  77. :type float
  78. :custom float
  79. :documentation "The registry version.")
  80. (max-hard :initarg :max-hard
  81. :initform 5000000
  82. :type integer
  83. :custom integer
  84. :documentation "Never accept more than this many elements.")
  85. (max-soft :initarg :max-soft
  86. :initform 50000
  87. :type integer
  88. :custom integer
  89. :documentation "Prune as much as possible to get to this size.")
  90. (prune-factor
  91. :initarg :prune-factor
  92. :initform 0.1
  93. :type float
  94. :custom float
  95. :documentation "At the max-hard limit, prune size * this entries.")
  96. (tracked :initarg :tracked
  97. :initform nil
  98. :type t
  99. :documentation "The tracked (indexed) fields, a list of symbols.")
  100. (precious :initarg :precious
  101. :initform nil
  102. :type t
  103. :documentation "The precious fields, a list of symbols.")
  104. (tracker :initarg :tracker
  105. :type hash-table
  106. :documentation "The field tracking hashtable.")
  107. (data :initarg :data
  108. :type hash-table
  109. :documentation "The data hashtable.")))
  110. (eval-and-compile
  111. (defmethod initialize-instance :AFTER ((this registry-db) slots)
  112. "Set value of data slot of THIS after initialization."
  113. (with-slots (data tracker) this
  114. (unless (member :data slots)
  115. (setq data
  116. (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
  117. (unless (member :tracker slots)
  118. (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))
  119. (defmethod registry-lookup ((db registry-db) keys)
  120. "Search for KEYS in the registry-db THIS.
  121. Returns an alist of the key followed by the entry in a list, not a cons cell."
  122. (let ((data (oref db :data)))
  123. (delq nil
  124. (mapcar
  125. (lambda (k)
  126. (when (gethash k data)
  127. (list k (gethash k data))))
  128. keys))))
  129. (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
  130. "Search for KEYS in the registry-db THIS.
  131. Returns an alist of the key followed by the entry in a list, not a cons cell."
  132. (let ((data (oref db :data)))
  133. (delq nil
  134. (loop for key in keys
  135. when (gethash key data)
  136. collect (list key (gethash key data))))))
  137. (defmethod registry-lookup-secondary ((db registry-db) tracksym
  138. &optional create)
  139. "Search for TRACKSYM in the registry-db THIS.
  140. When CREATE is not nil, create the secondary index hashtable if needed."
  141. (let ((h (gethash tracksym (oref db :tracker))))
  142. (if h
  143. h
  144. (when create
  145. (puthash tracksym
  146. (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
  147. (oref db :tracker))
  148. (gethash tracksym (oref db :tracker))))))
  149. (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
  150. &optional set)
  151. "Search for TRACKSYM with value VAL in the registry-db THIS.
  152. When SET is not nil, set it for VAL (use t for an empty list)."
  153. ;; either we're asked for creation or there should be an existing index
  154. (when (or set (registry-lookup-secondary db tracksym))
  155. ;; set the entry if requested,
  156. (when set
  157. (puthash val (if (eq t set) '() set)
  158. (registry-lookup-secondary db tracksym t)))
  159. (gethash val (registry-lookup-secondary db tracksym)))))
  160. (defun registry--match (mode entry check-list)
  161. ;; for all members
  162. (when check-list
  163. (let ((key (nth 0 (nth 0 check-list)))
  164. (vals (cdr-safe (nth 0 check-list)))
  165. found)
  166. (while (and key vals (not found))
  167. (setq found (case mode
  168. (:member
  169. (member (car-safe vals) (cdr-safe (assoc key entry))))
  170. (:regex
  171. (string-match (car vals)
  172. (mapconcat
  173. 'prin1-to-string
  174. (cdr-safe (assoc key entry))
  175. "\0"))))
  176. vals (cdr-safe vals)))
  177. (or found
  178. (registry--match mode entry (cdr-safe check-list))))))
  179. (eval-and-compile
  180. (defmethod registry-search ((db registry-db) &rest spec)
  181. "Search for SPEC across the registry-db THIS.
  182. For example calling with :member '(a 1 2) will match entry '((a 3 1)).
  183. Calling with :all t (any non-nil value) will match all.
  184. Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
  185. The test order is to check :all first, then :member, then :regex."
  186. (when db
  187. (let ((all (plist-get spec :all))
  188. (member (plist-get spec :member))
  189. (regex (plist-get spec :regex)))
  190. (loop for k being the hash-keys of (oref db :data)
  191. using (hash-values v)
  192. when (or
  193. ;; :all non-nil returns all
  194. all
  195. ;; member matching
  196. (and member (registry--match :member v member))
  197. ;; regex matching
  198. (and regex (registry--match :regex v regex)))
  199. collect k))))
  200. (defmethod registry-delete ((db registry-db) keys assert &rest spec)
  201. "Delete KEYS from the registry-db THIS.
  202. If KEYS is nil, use SPEC to do a search.
  203. Updates the secondary ('tracked') indices as well.
  204. With assert non-nil, errors out if the key does not exist already."
  205. (let* ((data (oref db :data))
  206. (keys (or keys
  207. (apply 'registry-search db spec)))
  208. (tracked (oref db :tracked)))
  209. (dolist (key keys)
  210. (let ((entry (gethash key data)))
  211. (when assert
  212. (assert entry nil
  213. "Key %s does not exists in database" key))
  214. ;; clean entry from the secondary indices
  215. (dolist (tr tracked)
  216. ;; is this tracked symbol indexed?
  217. (when (registry-lookup-secondary db tr)
  218. ;; for every value in the entry under that key...
  219. (dolist (val (cdr-safe (assq tr entry)))
  220. (let* ((value-keys (registry-lookup-secondary-value
  221. db tr val)))
  222. (when (member key value-keys)
  223. ;; override the previous value
  224. (registry-lookup-secondary-value
  225. db tr val
  226. ;; with the indexed keys MINUS the current key
  227. ;; (we pass t when the list is empty)
  228. (or (delete key value-keys) t)))))))
  229. (remhash key data)))
  230. keys))
  231. (defmethod registry-full ((db registry-db))
  232. "Checks if registry-db THIS is full."
  233. (>= (registry-size db)
  234. (oref db :max-hard)))
  235. (defmethod registry-insert ((db registry-db) key entry)
  236. "Insert ENTRY under KEY into the registry-db THIS.
  237. Updates the secondary ('tracked') indices as well.
  238. Errors out if the key exists already."
  239. (assert (not (gethash key (oref db :data))) nil
  240. "Key already exists in database")
  241. (assert (not (registry-full db))
  242. nil
  243. "registry max-hard size limit reached")
  244. ;; store the entry
  245. (puthash key entry (oref db :data))
  246. ;; store the secondary indices
  247. (dolist (tr (oref db :tracked))
  248. ;; for every value in the entry under that key...
  249. (dolist (val (cdr-safe (assq tr entry)))
  250. (let* ((value-keys (registry-lookup-secondary-value db tr val)))
  251. (pushnew key value-keys :test 'equal)
  252. (registry-lookup-secondary-value db tr val value-keys))))
  253. entry)
  254. (defmethod registry-reindex ((db registry-db))
  255. "Rebuild the secondary indices of registry-db THIS."
  256. (let ((count 0)
  257. (expected (* (length (oref db :tracked)) (registry-size db))))
  258. (dolist (tr (oref db :tracked))
  259. (let (values)
  260. (maphash
  261. (lambda (key v)
  262. (incf count)
  263. (when (and (< 0 expected)
  264. (= 0 (mod count 1000)))
  265. (message "reindexing: %d of %d (%.2f%%)"
  266. count expected (/ (* 100 count) expected)))
  267. (dolist (val (cdr-safe (assq tr v)))
  268. (let* ((value-keys (registry-lookup-secondary-value db tr val)))
  269. (push key value-keys)
  270. (registry-lookup-secondary-value db tr val value-keys))))
  271. (oref db :data))))))
  272. (defmethod registry-size ((db registry-db))
  273. "Returns the size of the registry-db object THIS.
  274. This is the key count of the :data slot."
  275. (hash-table-count (oref db :data)))
  276. (defmethod registry-prune ((db registry-db) &optional sortfun)
  277. "Prunes the registry-db object THIS.
  278. Removes only entries without the :precious keys if it can,
  279. then removes oldest entries first.
  280. Returns the number of deleted entries.
  281. If SORTFUN is given, tries to keep entries that sort *higher*.
  282. SORTFUN is passed only the two keys so it must look them up directly."
  283. (dolist (collector '(registry-prune-soft-candidates
  284. registry-prune-hard-candidates))
  285. (let* ((size (registry-size db))
  286. (collected (funcall collector db))
  287. (limit (nth 0 collected))
  288. (candidates (nth 1 collected))
  289. ;; sort the candidates if SORTFUN was given
  290. (candidates (if sortfun (sort candidates sortfun) candidates))
  291. (candidates-count (length candidates))
  292. ;; are we over max-soft?
  293. (prune-needed (> size limit)))
  294. ;; while we have more candidates than we need to remove...
  295. (while (and (> candidates-count (- size limit)) candidates)
  296. (decf candidates-count)
  297. (setq candidates (cdr candidates)))
  298. (registry-delete db candidates nil)
  299. (length candidates))))
  300. (defmethod registry-prune-soft-candidates ((db registry-db))
  301. "Collects pruning candidates from the registry-db object THIS.
  302. Proposes only entries without the :precious keys."
  303. (let* ((precious (oref db :precious))
  304. (precious-p (lambda (entry-key)
  305. (cdr (memq (car entry-key) precious))))
  306. (data (oref db :data))
  307. (limit (oref db :max-soft))
  308. (candidates (loop for k being the hash-keys of data
  309. using (hash-values v)
  310. when (notany precious-p v)
  311. collect k)))
  312. (list limit candidates)))
  313. (defmethod registry-prune-hard-candidates ((db registry-db))
  314. "Collects pruning candidates from the registry-db object THIS.
  315. Proposes any entries over the max-hard limit minus size * prune-factor."
  316. (let* ((data (oref db :data))
  317. ;; prune to (size * prune-factor) below the max-hard limit so
  318. ;; we're not pruning all the time
  319. (limit (max 0 (- (oref db :max-hard)
  320. (* (registry-size db) (oref db :prune-factor)))))
  321. (candidates (loop for k being the hash-keys of data
  322. collect k)))
  323. (list limit candidates))))
  324. (ert-deftest registry-instantiation-test ()
  325. (should (registry-db "Testing")))
  326. (ert-deftest registry-match-test ()
  327. (let ((entry '((hello "goodbye" "bye") (blank))))
  328. (message "Testing :regex matching")
  329. (should (registry--match :regex entry '((hello "nye" "bye"))))
  330. (should (registry--match :regex entry '((hello "good"))))
  331. (should-not (registry--match :regex entry '((hello "nye"))))
  332. (should-not (registry--match :regex entry '((hello))))
  333. (message "Testing :member matching")
  334. (should (registry--match :member entry '((hello "bye"))))
  335. (should (registry--match :member entry '((hello "goodbye"))))
  336. (should-not (registry--match :member entry '((hello "good"))))
  337. (should-not (registry--match :member entry '((hello "nye"))))
  338. (should-not (registry--match :member entry '((hello)))))
  339. (message "Done with matching testing."))
  340. (defun registry-make-testable-db (n &optional name file)
  341. (let* ((db (registry-db
  342. (or name "Testing")
  343. :file (or file "unused")
  344. :max-hard n
  345. :max-soft 0 ; keep nothing not precious
  346. :precious '(extra more-extra)
  347. :tracked '(sender subject groups))))
  348. (dotimes (i n)
  349. (registry-insert db i `((sender "me")
  350. (subject "about you")
  351. (more-extra) ; empty data key should be pruned
  352. ;; first 5 entries will NOT have this extra data
  353. ,@(when (< 5 i) (list (list 'extra "more data")))
  354. (groups ,(number-to-string i)))))
  355. db))
  356. (ert-deftest registry-usage-test ()
  357. (let* ((n 100)
  358. (db (registry-make-testable-db n)))
  359. (message "size %d" n)
  360. (should (= n (registry-size db)))
  361. (message "max-hard test")
  362. (should-error (registry-insert db "new" '()))
  363. (message "Individual lookup")
  364. (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
  365. (message "Grouped individual lookup")
  366. (should (= 3 (length (registry-lookup db '(1 58 99)))))
  367. (when (boundp 'lexical-binding)
  368. (message "Individual lookup (breaks before lexbind)")
  369. (should (= 58
  370. (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99)))))
  371. (message "Grouped individual lookup (breaks before lexbind)")
  372. (should (= 3
  373. (length (registry-lookup-breaks-before-lexbind db
  374. '(1 58 99))))))
  375. (message "Search")
  376. (should (= n (length (registry-search db :all t))))
  377. (should (= n (length (registry-search db :member '((sender "me"))))))
  378. (message "Secondary index search")
  379. (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
  380. (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
  381. (message "Delete")
  382. (should (registry-delete db '(1) t))
  383. (decf n)
  384. (message "Search after delete")
  385. (should (= n (length (registry-search db :all t))))
  386. (message "Secondary search after delete")
  387. (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
  388. ;; (message "Pruning")
  389. ;; (let* ((tokeep (registry-search db :member '((extra "more data"))))
  390. ;; (count (- n (length tokeep)))
  391. ;; (pruned (registry-prune db))
  392. ;; (prune-count (length pruned)))
  393. ;; (message "Expecting to prune %d entries and pruned %d"
  394. ;; count prune-count)
  395. ;; (should (and (= count 5)
  396. ;; (= count prune-count))))
  397. (message "Done with usage testing.")))
  398. (ert-deftest registry-persistence-test ()
  399. (let* ((n 100)
  400. (tempfile (make-temp-file "registry-persistence-"))
  401. (name "persistence tester")
  402. (db (registry-make-testable-db n name tempfile))
  403. size back)
  404. (message "Saving to %s" tempfile)
  405. (eieio-persistent-save db)
  406. (setq size (nth 7 (file-attributes tempfile)))
  407. (message "Saved to %s: size %d" tempfile size)
  408. (should (< 0 size))
  409. (with-temp-buffer
  410. (insert-file-contents-literally tempfile)
  411. (should (looking-at (concat ";; Object "
  412. name
  413. "\n;; EIEIO PERSISTENT OBJECT"))))
  414. (message "Reading object back")
  415. (setq back (eieio-persistent-read tempfile))
  416. (should back)
  417. (message "Read object back: %d keys, expected %d==%d"
  418. (registry-size back) n (registry-size db))
  419. (should (= (registry-size back) n))
  420. (should (= (registry-size back) (registry-size db)))
  421. (delete-file tempfile))
  422. (message "Done with persistence testing."))
  423. (provide 'registry)
  424. ;;; registry.el ends here