123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484 |
- ;;; registry.el --- Track and remember data items by various fields
- ;; Copyright (C) 2011-2012 Free Software Foundation, Inc.
- ;; Author: Teodor Zlatanov <tzz@lifelogs.com>
- ;; Keywords: data
- ;; This file is part of GNU Emacs.
- ;; GNU Emacs is free software: you can redistribute it and/or modify
- ;; it under the terms of the GNU General Public License as published by
- ;; the Free Software Foundation, either version 3 of the License, or
- ;; (at your option) any later version.
- ;; GNU Emacs is distributed in the hope that it will be useful,
- ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
- ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- ;; GNU General Public License for more details.
- ;; You should have received a copy of the GNU General Public License
- ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
- ;;; Commentary:
- ;; This library provides a general-purpose EIEIO-based registry
- ;; database with persistence, initialized with these fields:
- ;; version: a float, 0.1 currently (don't change it)
- ;; max-hard: an integer, default 5000000
- ;; max-soft: an integer, default 50000
- ;; precious: a list of symbols
- ;; tracked: a list of symbols
- ;; tracker: a hashtable tuned for 100 symbols to track (you should
- ;; only access this with the :lookup2-function and the
- ;; :lookup2+-function)
- ;; data: a hashtable with default size 10K and resize threshold 2.0
- ;; (this reflects the expected usage so override it if you know better)
- ;; ...plus methods to do all the work: `registry-search',
- ;; `registry-lookup', `registry-lookup-secondary',
- ;; `registry-lookup-secondary-value', `registry-insert',
- ;; `registry-delete', `registry-prune', `registry-size' which see
- ;; and with the following properties:
- ;; Every piece of data has a unique ID and some general-purpose fields
- ;; (F1=D1, F2=D2, F3=(a b c)...) expressed as an alist, e.g.
- ;; ((F1 D1) (F2 D2) (F3 a b c))
- ;; Note that whether a field has one or many pieces of data, the data
- ;; is always a list of values.
- ;; The user decides which fields are "precious", F2 for example. At
- ;; PRUNE TIME (when the :prune-function is called), the registry will
- ;; trim any entries without the F2 field until the size is :max-soft
- ;; or less. No entries with the F2 field will be removed at PRUNE
- ;; TIME.
- ;; When an entry is inserted, the registry will reject new entries
- ;; if they bring it over the max-hard limit, even if they have the F2
- ;; field.
- ;; The user decides which fields are "tracked", F1 for example. Any
- ;; new entry is then indexed by all the tracked fields so it can be
- ;; quickly looked up that way. The data is always a list (see example
- ;; above) and each list element is indexed.
- ;; Precious and tracked field names must be symbols. All other
- ;; fields can be any other Emacs Lisp types.
- ;;; Code:
- (eval-when-compile (require 'cl))
- (eval-when-compile
- (when (null (ignore-errors (require 'ert)))
- (defmacro* ert-deftest (name () &body docstring-keys-and-body))))
- (ignore-errors
- (require 'ert))
- (eval-and-compile
- (or (ignore-errors (progn
- (require 'eieio)
- (require 'eieio-base)))
- ;; gnus-fallback-lib/ from gnus/lisp/gnus-fallback-lib
- (ignore-errors
- (let ((load-path (cons (expand-file-name
- "gnus-fallback-lib/eieio"
- (file-name-directory (locate-library "gnus")))
- load-path)))
- (require 'eieio)
- (require 'eieio-base)))
- (error
- "eieio not found in `load-path' or gnus-fallback-lib/ directory.")))
- (defclass registry-db (eieio-persistent)
- ((version :initarg :version
- :initform 0.1
- :type float
- :custom float
- :documentation "The registry version.")
- (max-hard :initarg :max-hard
- :initform 5000000
- :type integer
- :custom integer
- :documentation "Never accept more than this many elements.")
- (max-soft :initarg :max-soft
- :initform 50000
- :type integer
- :custom integer
- :documentation "Prune as much as possible to get to this size.")
- (prune-factor
- :initarg :prune-factor
- :initform 0.1
- :type float
- :custom float
- :documentation "At the max-hard limit, prune size * this entries.")
- (tracked :initarg :tracked
- :initform nil
- :type t
- :documentation "The tracked (indexed) fields, a list of symbols.")
- (precious :initarg :precious
- :initform nil
- :type t
- :documentation "The precious fields, a list of symbols.")
- (tracker :initarg :tracker
- :type hash-table
- :documentation "The field tracking hashtable.")
- (data :initarg :data
- :type hash-table
- :documentation "The data hashtable.")))
- (eval-and-compile
- (defmethod initialize-instance :AFTER ((this registry-db) slots)
- "Set value of data slot of THIS after initialization."
- (with-slots (data tracker) this
- (unless (member :data slots)
- (setq data
- (make-hash-table :size 10000 :rehash-size 2.0 :test 'equal)))
- (unless (member :tracker slots)
- (setq tracker (make-hash-table :size 100 :rehash-size 2.0)))))
- (defmethod registry-lookup ((db registry-db) keys)
- "Search for KEYS in the registry-db THIS.
- Returns an alist of the key followed by the entry in a list, not a cons cell."
- (let ((data (oref db :data)))
- (delq nil
- (mapcar
- (lambda (k)
- (when (gethash k data)
- (list k (gethash k data))))
- keys))))
- (defmethod registry-lookup-breaks-before-lexbind ((db registry-db) keys)
- "Search for KEYS in the registry-db THIS.
- Returns an alist of the key followed by the entry in a list, not a cons cell."
- (let ((data (oref db :data)))
- (delq nil
- (loop for key in keys
- when (gethash key data)
- collect (list key (gethash key data))))))
- (defmethod registry-lookup-secondary ((db registry-db) tracksym
- &optional create)
- "Search for TRACKSYM in the registry-db THIS.
- When CREATE is not nil, create the secondary index hashtable if needed."
- (let ((h (gethash tracksym (oref db :tracker))))
- (if h
- h
- (when create
- (puthash tracksym
- (make-hash-table :size 800 :rehash-size 2.0 :test 'equal)
- (oref db :tracker))
- (gethash tracksym (oref db :tracker))))))
- (defmethod registry-lookup-secondary-value ((db registry-db) tracksym val
- &optional set)
- "Search for TRACKSYM with value VAL in the registry-db THIS.
- When SET is not nil, set it for VAL (use t for an empty list)."
- ;; either we're asked for creation or there should be an existing index
- (when (or set (registry-lookup-secondary db tracksym))
- ;; set the entry if requested,
- (when set
- (puthash val (if (eq t set) '() set)
- (registry-lookup-secondary db tracksym t)))
- (gethash val (registry-lookup-secondary db tracksym)))))
- (defun registry--match (mode entry check-list)
- ;; for all members
- (when check-list
- (let ((key (nth 0 (nth 0 check-list)))
- (vals (cdr-safe (nth 0 check-list)))
- found)
- (while (and key vals (not found))
- (setq found (case mode
- (:member
- (member (car-safe vals) (cdr-safe (assoc key entry))))
- (:regex
- (string-match (car vals)
- (mapconcat
- 'prin1-to-string
- (cdr-safe (assoc key entry))
- "\0"))))
- vals (cdr-safe vals)))
- (or found
- (registry--match mode entry (cdr-safe check-list))))))
- (eval-and-compile
- (defmethod registry-search ((db registry-db) &rest spec)
- "Search for SPEC across the registry-db THIS.
- For example calling with :member '(a 1 2) will match entry '((a 3 1)).
- Calling with :all t (any non-nil value) will match all.
- Calling with :regex '\(a \"h.llo\") will match entry '((a \"hullo\" \"bye\").
- The test order is to check :all first, then :member, then :regex."
- (when db
- (let ((all (plist-get spec :all))
- (member (plist-get spec :member))
- (regex (plist-get spec :regex)))
- (loop for k being the hash-keys of (oref db :data)
- using (hash-values v)
- when (or
- ;; :all non-nil returns all
- all
- ;; member matching
- (and member (registry--match :member v member))
- ;; regex matching
- (and regex (registry--match :regex v regex)))
- collect k))))
- (defmethod registry-delete ((db registry-db) keys assert &rest spec)
- "Delete KEYS from the registry-db THIS.
- If KEYS is nil, use SPEC to do a search.
- Updates the secondary ('tracked') indices as well.
- With assert non-nil, errors out if the key does not exist already."
- (let* ((data (oref db :data))
- (keys (or keys
- (apply 'registry-search db spec)))
- (tracked (oref db :tracked)))
- (dolist (key keys)
- (let ((entry (gethash key data)))
- (when assert
- (assert entry nil
- "Key %s does not exists in database" key))
- ;; clean entry from the secondary indices
- (dolist (tr tracked)
- ;; is this tracked symbol indexed?
- (when (registry-lookup-secondary db tr)
- ;; for every value in the entry under that key...
- (dolist (val (cdr-safe (assq tr entry)))
- (let* ((value-keys (registry-lookup-secondary-value
- db tr val)))
- (when (member key value-keys)
- ;; override the previous value
- (registry-lookup-secondary-value
- db tr val
- ;; with the indexed keys MINUS the current key
- ;; (we pass t when the list is empty)
- (or (delete key value-keys) t)))))))
- (remhash key data)))
- keys))
- (defmethod registry-full ((db registry-db))
- "Checks if registry-db THIS is full."
- (>= (registry-size db)
- (oref db :max-hard)))
- (defmethod registry-insert ((db registry-db) key entry)
- "Insert ENTRY under KEY into the registry-db THIS.
- Updates the secondary ('tracked') indices as well.
- Errors out if the key exists already."
- (assert (not (gethash key (oref db :data))) nil
- "Key already exists in database")
- (assert (not (registry-full db))
- nil
- "registry max-hard size limit reached")
- ;; store the entry
- (puthash key entry (oref db :data))
- ;; store the secondary indices
- (dolist (tr (oref db :tracked))
- ;; for every value in the entry under that key...
- (dolist (val (cdr-safe (assq tr entry)))
- (let* ((value-keys (registry-lookup-secondary-value db tr val)))
- (pushnew key value-keys :test 'equal)
- (registry-lookup-secondary-value db tr val value-keys))))
- entry)
- (defmethod registry-reindex ((db registry-db))
- "Rebuild the secondary indices of registry-db THIS."
- (let ((count 0)
- (expected (* (length (oref db :tracked)) (registry-size db))))
- (dolist (tr (oref db :tracked))
- (let (values)
- (maphash
- (lambda (key v)
- (incf count)
- (when (and (< 0 expected)
- (= 0 (mod count 1000)))
- (message "reindexing: %d of %d (%.2f%%)"
- count expected (/ (* 100 count) expected)))
- (dolist (val (cdr-safe (assq tr v)))
- (let* ((value-keys (registry-lookup-secondary-value db tr val)))
- (push key value-keys)
- (registry-lookup-secondary-value db tr val value-keys))))
- (oref db :data))))))
- (defmethod registry-size ((db registry-db))
- "Returns the size of the registry-db object THIS.
- This is the key count of the :data slot."
- (hash-table-count (oref db :data)))
- (defmethod registry-prune ((db registry-db) &optional sortfun)
- "Prunes the registry-db object THIS.
- Removes only entries without the :precious keys if it can,
- then removes oldest entries first.
- Returns the number of deleted entries.
- If SORTFUN is given, tries to keep entries that sort *higher*.
- SORTFUN is passed only the two keys so it must look them up directly."
- (dolist (collector '(registry-prune-soft-candidates
- registry-prune-hard-candidates))
- (let* ((size (registry-size db))
- (collected (funcall collector db))
- (limit (nth 0 collected))
- (candidates (nth 1 collected))
- ;; sort the candidates if SORTFUN was given
- (candidates (if sortfun (sort candidates sortfun) candidates))
- (candidates-count (length candidates))
- ;; are we over max-soft?
- (prune-needed (> size limit)))
- ;; while we have more candidates than we need to remove...
- (while (and (> candidates-count (- size limit)) candidates)
- (decf candidates-count)
- (setq candidates (cdr candidates)))
- (registry-delete db candidates nil)
- (length candidates))))
- (defmethod registry-prune-soft-candidates ((db registry-db))
- "Collects pruning candidates from the registry-db object THIS.
- Proposes only entries without the :precious keys."
- (let* ((precious (oref db :precious))
- (precious-p (lambda (entry-key)
- (cdr (memq (car entry-key) precious))))
- (data (oref db :data))
- (limit (oref db :max-soft))
- (candidates (loop for k being the hash-keys of data
- using (hash-values v)
- when (notany precious-p v)
- collect k)))
- (list limit candidates)))
- (defmethod registry-prune-hard-candidates ((db registry-db))
- "Collects pruning candidates from the registry-db object THIS.
- Proposes any entries over the max-hard limit minus size * prune-factor."
- (let* ((data (oref db :data))
- ;; prune to (size * prune-factor) below the max-hard limit so
- ;; we're not pruning all the time
- (limit (max 0 (- (oref db :max-hard)
- (* (registry-size db) (oref db :prune-factor)))))
- (candidates (loop for k being the hash-keys of data
- collect k)))
- (list limit candidates))))
- (ert-deftest registry-instantiation-test ()
- (should (registry-db "Testing")))
- (ert-deftest registry-match-test ()
- (let ((entry '((hello "goodbye" "bye") (blank))))
- (message "Testing :regex matching")
- (should (registry--match :regex entry '((hello "nye" "bye"))))
- (should (registry--match :regex entry '((hello "good"))))
- (should-not (registry--match :regex entry '((hello "nye"))))
- (should-not (registry--match :regex entry '((hello))))
- (message "Testing :member matching")
- (should (registry--match :member entry '((hello "bye"))))
- (should (registry--match :member entry '((hello "goodbye"))))
- (should-not (registry--match :member entry '((hello "good"))))
- (should-not (registry--match :member entry '((hello "nye"))))
- (should-not (registry--match :member entry '((hello)))))
- (message "Done with matching testing."))
- (defun registry-make-testable-db (n &optional name file)
- (let* ((db (registry-db
- (or name "Testing")
- :file (or file "unused")
- :max-hard n
- :max-soft 0 ; keep nothing not precious
- :precious '(extra more-extra)
- :tracked '(sender subject groups))))
- (dotimes (i n)
- (registry-insert db i `((sender "me")
- (subject "about you")
- (more-extra) ; empty data key should be pruned
- ;; first 5 entries will NOT have this extra data
- ,@(when (< 5 i) (list (list 'extra "more data")))
- (groups ,(number-to-string i)))))
- db))
- (ert-deftest registry-usage-test ()
- (let* ((n 100)
- (db (registry-make-testable-db n)))
- (message "size %d" n)
- (should (= n (registry-size db)))
- (message "max-hard test")
- (should-error (registry-insert db "new" '()))
- (message "Individual lookup")
- (should (= 58 (caadr (registry-lookup db '(1 58 99)))))
- (message "Grouped individual lookup")
- (should (= 3 (length (registry-lookup db '(1 58 99)))))
- (when (boundp 'lexical-binding)
- (message "Individual lookup (breaks before lexbind)")
- (should (= 58
- (caadr (registry-lookup-breaks-before-lexbind db '(1 58 99)))))
- (message "Grouped individual lookup (breaks before lexbind)")
- (should (= 3
- (length (registry-lookup-breaks-before-lexbind db
- '(1 58 99))))))
- (message "Search")
- (should (= n (length (registry-search db :all t))))
- (should (= n (length (registry-search db :member '((sender "me"))))))
- (message "Secondary index search")
- (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
- (should (equal '(74) (registry-lookup-secondary-value db 'groups "74")))
- (message "Delete")
- (should (registry-delete db '(1) t))
- (decf n)
- (message "Search after delete")
- (should (= n (length (registry-search db :all t))))
- (message "Secondary search after delete")
- (should (= n (length (registry-lookup-secondary-value db 'sender "me"))))
- ;; (message "Pruning")
- ;; (let* ((tokeep (registry-search db :member '((extra "more data"))))
- ;; (count (- n (length tokeep)))
- ;; (pruned (registry-prune db))
- ;; (prune-count (length pruned)))
- ;; (message "Expecting to prune %d entries and pruned %d"
- ;; count prune-count)
- ;; (should (and (= count 5)
- ;; (= count prune-count))))
- (message "Done with usage testing.")))
- (ert-deftest registry-persistence-test ()
- (let* ((n 100)
- (tempfile (make-temp-file "registry-persistence-"))
- (name "persistence tester")
- (db (registry-make-testable-db n name tempfile))
- size back)
- (message "Saving to %s" tempfile)
- (eieio-persistent-save db)
- (setq size (nth 7 (file-attributes tempfile)))
- (message "Saved to %s: size %d" tempfile size)
- (should (< 0 size))
- (with-temp-buffer
- (insert-file-contents-literally tempfile)
- (should (looking-at (concat ";; Object "
- name
- "\n;; EIEIO PERSISTENT OBJECT"))))
- (message "Reading object back")
- (setq back (eieio-persistent-read tempfile))
- (should back)
- (message "Read object back: %d keys, expected %d==%d"
- (registry-size back) n (registry-size db))
- (should (= (registry-size back) n))
- (should (= (registry-size back) (registry-size db)))
- (delete-file tempfile))
- (message "Done with persistence testing."))
- (provide 'registry)
- ;;; registry.el ends here
|