pylru.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557
  1. # Cache implementaion with a Least Recently Used (LRU) replacement policy and
  2. # a basic dictionary interface.
  3. # Copyright (C) 2006, 2009, 2010, 2011 Jay Hutchinson
  4. # This program is free software; you can redistribute it and/or modify it
  5. # under the terms of the GNU General Public License as published by the Free
  6. # Software Foundation; either version 2 of the License, or (at your option)
  7. # any later version.
  8. # This program is distributed in the hope that it will be useful, but WITHOUT
  9. # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10. # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  11. # more details.
  12. # You should have received a copy of the GNU General Public License along
  13. # with this program; if not, write to the Free Software Foundation, Inc., 51
  14. # Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  15. # The cache is implemented using a combination of a python dictionary (hash
  16. # table) and a circular doubly linked list. Items in the cache are stored in
  17. # nodes. These nodes make up the linked list. The list is used to efficiently
  18. # maintain the order that the items have been used in. The front or head of
  19. # the list contains the most recently used item, the tail of the list
  20. # contains the least recently used item. When an item is used it can easily
  21. # (in a constant amount of time) be moved to the front of the list, thus
  22. # updating its position in the ordering. These nodes are also placed in the
  23. # hash table under their associated key. The hash table allows efficient
  24. # lookup of values by key.
  25. # Class for the node objects.
  26. class _dlnode(object):
  27. def __init__(self):
  28. self.empty = True
  29. class lrucache(object):
  30. def __init__(self, size, callback=None):
  31. self.callback = callback
  32. # Create an empty hash table.
  33. self.table = {}
  34. # Initialize the doubly linked list with one empty node. This is an
  35. # invariant. The cache size must always be greater than zero. Each
  36. # node has a 'prev' and 'next' variable to hold the node that comes
  37. # before it and after it respectively. Initially the two variables
  38. # each point to the head node itself, creating a circular doubly
  39. # linked list of size one. Then the size() method is used to adjust
  40. # the list to the desired size.
  41. self.head = _dlnode()
  42. self.head.next = self.head
  43. self.head.prev = self.head
  44. self.listSize = 1
  45. # Adjust the size
  46. self.size(size)
  47. def __len__(self):
  48. return len(self.table)
  49. def clear(self):
  50. for node in self.dli():
  51. node.empty = True
  52. node.key = None
  53. node.value = None
  54. self.table.clear()
  55. def __contains__(self, key):
  56. return key in self.table
  57. # Looks up a value in the cache without affecting cache order.
  58. def peek(self, key):
  59. # Look up the node
  60. node = self.table[key]
  61. return node.value
  62. def __getitem__(self, key):
  63. # Look up the node
  64. node = self.table[key]
  65. # Update the list ordering. Move this node so that is directly
  66. # proceeds the head node. Then set the 'head' variable to it. This
  67. # makes it the new head of the list.
  68. self.mtf(node)
  69. self.head = node
  70. # Return the value.
  71. return node.value
  72. def get(self, key, default=None):
  73. """Get an item - return default (None) if not present"""
  74. try:
  75. return self[key]
  76. except KeyError:
  77. return default
  78. def __setitem__(self, key, value):
  79. # First, see if any value is stored under 'key' in the cache already.
  80. # If so we are going to replace that value with the new one.
  81. if key in self.table:
  82. # Lookup the node
  83. node = self.table[key]
  84. # Replace the value.
  85. node.value = value
  86. # Update the list ordering.
  87. self.mtf(node)
  88. self.head = node
  89. return
  90. # Ok, no value is currently stored under 'key' in the cache. We need
  91. # to choose a node to place the new item in. There are two cases. If
  92. # the cache is full some item will have to be pushed out of the
  93. # cache. We want to choose the node with the least recently used
  94. # item. This is the node at the tail of the list. If the cache is not
  95. # full we want to choose a node that is empty. Because of the way the
  96. # list is managed, the empty nodes are always together at the tail
  97. # end of the list. Thus, in either case, by chooseing the node at the
  98. # tail of the list our conditions are satisfied.
  99. # Since the list is circular, the tail node directly preceeds the
  100. # 'head' node.
  101. node = self.head.prev
  102. # If the node already contains something we need to remove the old
  103. # key from the dictionary.
  104. if not node.empty:
  105. if self.callback is not None:
  106. self.callback(node.key, node.value)
  107. del self.table[node.key]
  108. # Place the new key and value in the node
  109. node.empty = False
  110. node.key = key
  111. node.value = value
  112. # Add the node to the dictionary under the new key.
  113. self.table[key] = node
  114. # We need to move the node to the head of the list. The node is the
  115. # tail node, so it directly preceeds the head node due to the list
  116. # being circular. Therefore, the ordering is already correct, we just
  117. # need to adjust the 'head' variable.
  118. self.head = node
  119. def __delitem__(self, key):
  120. # Lookup the node, then remove it from the hash table.
  121. node = self.table[key]
  122. del self.table[key]
  123. node.empty = True
  124. # Not strictly necessary.
  125. node.key = None
  126. node.value = None
  127. # Because this node is now empty we want to reuse it before any
  128. # non-empty node. To do that we want to move it to the tail of the
  129. # list. We move it so that it directly preceeds the 'head' node. This
  130. # makes it the tail node. The 'head' is then adjusted. This
  131. # adjustment ensures correctness even for the case where the 'node'
  132. # is the 'head' node.
  133. self.mtf(node)
  134. self.head = node.next
  135. def __iter__(self):
  136. # Return an iterator that returns the keys in the cache in order from
  137. # the most recently to least recently used. Does not modify the cache
  138. # order.
  139. for node in self.dli():
  140. yield node.key
  141. def items(self):
  142. # Return an iterator that returns the (key, value) pairs in the cache
  143. # in order from the most recently to least recently used. Does not
  144. # modify the cache order.
  145. for node in self.dli():
  146. yield (node.key, node.value)
  147. def keys(self):
  148. # Return an iterator that returns the keys in the cache in order from
  149. # the most recently to least recently used. Does not modify the cache
  150. # order.
  151. for node in self.dli():
  152. yield node.key
  153. def values(self):
  154. # Return an iterator that returns the values in the cache in order
  155. # from the most recently to least recently used. Does not modify the
  156. # cache order.
  157. for node in self.dli():
  158. yield node.value
  159. def size(self, size=None):
  160. if size is not None:
  161. assert size > 0
  162. if size > self.listSize:
  163. self.addTailNode(size - self.listSize)
  164. elif size < self.listSize:
  165. self.removeTailNode(self.listSize - size)
  166. return self.listSize
  167. # Increases the size of the cache by inserting n empty nodes at the tail
  168. # of the list.
  169. def addTailNode(self, n):
  170. for i in range(n):
  171. node = _dlnode()
  172. node.next = self.head
  173. node.prev = self.head.prev
  174. self.head.prev.next = node
  175. self.head.prev = node
  176. self.listSize += n
  177. # Decreases the size of the list by removing n nodes from the tail of the
  178. # list.
  179. def removeTailNode(self, n):
  180. assert self.listSize > n
  181. for i in range(n):
  182. node = self.head.prev
  183. if not node.empty:
  184. if self.callback is not None:
  185. self.callback(node.key, node.value)
  186. del self.table[node.key]
  187. # Splice the tail node out of the list
  188. self.head.prev = node.prev
  189. node.prev.next = self.head
  190. # The next four lines are not strictly necessary.
  191. node.prev = None
  192. node.next = None
  193. node.key = None
  194. node.value = None
  195. self.listSize -= n
  196. # This method adjusts the ordering of the doubly linked list so that
  197. # 'node' directly precedes the 'head' node. Because of the order of
  198. # operations, if 'node' already directly precedes the 'head' node or if
  199. # 'node' is the 'head' node the order of the list will be unchanged.
  200. def mtf(self, node):
  201. node.prev.next = node.next
  202. node.next.prev = node.prev
  203. node.prev = self.head.prev
  204. node.next = self.head.prev.next
  205. node.next.prev = node
  206. node.prev.next = node
  207. # This method returns an iterator that iterates over the non-empty nodes
  208. # in the doubly linked list in order from the most recently to the least
  209. # recently used.
  210. def dli(self):
  211. node = self.head
  212. for i in range(len(self.table)):
  213. yield node
  214. node = node.next
  215. class WriteThroughCacheManager(object):
  216. def __init__(self, store, size):
  217. self.store = store
  218. self.cache = lrucache(size)
  219. def __len__(self):
  220. return len(self.store)
  221. # Returns/sets the size of the managed cache.
  222. def size(self, size=None):
  223. return self.cache.size(size)
  224. def clear(self):
  225. self.cache.clear()
  226. self.store.clear()
  227. def __contains__(self, key):
  228. # Check the cache first. If it is there we can return quickly.
  229. if key in self.cache:
  230. return True
  231. # Not in the cache. Might be in the underlying store.
  232. if key in self.store:
  233. return True
  234. return False
  235. def __getitem__(self, key):
  236. # First we try the cache. If successful we just return the value. If
  237. # not we catch KeyError and ignore it since that just means the key
  238. # was not in the cache.
  239. try:
  240. return self.cache[key]
  241. except KeyError:
  242. pass
  243. # It wasn't in the cache. Look it up in the store, add the entry to
  244. # the cache, and return the value.
  245. value = self.store[key]
  246. self.cache[key] = value
  247. return value
  248. def get(self, key, default=None):
  249. """Get an item - return default (None) if not present"""
  250. try:
  251. return self[key]
  252. except KeyError:
  253. return default
  254. def __setitem__(self, key, value):
  255. # Add the key/value pair to the cache and store.
  256. self.cache[key] = value
  257. self.store[key] = value
  258. def __delitem__(self, key):
  259. # Write-through behavior cache and store should be consistent. Delete
  260. # it from the store.
  261. del self.store[key]
  262. try:
  263. # Ok, delete from the store was successful. It might also be in
  264. # the cache, try and delete it. If not we catch the KeyError and
  265. # ignore it.
  266. del self.cache[key]
  267. except KeyError:
  268. pass
  269. def __iter__(self):
  270. return self.keys()
  271. def keys(self):
  272. return self.store.keys()
  273. def values(self):
  274. return self.store.values()
  275. def items(self):
  276. return self.store.items()
  277. class WriteBackCacheManager(object):
  278. def __init__(self, store, size):
  279. self.store = store
  280. # Create a set to hold the dirty keys.
  281. self.dirty = set()
  282. # Define a callback function to be called by the cache when a
  283. # key/value pair is about to be ejected. This callback will check to
  284. # see if the key is in the dirty set. If so, then it will update the
  285. # store object and remove the key from the dirty set.
  286. def callback(key, value):
  287. if key in self.dirty:
  288. self.store[key] = value
  289. self.dirty.remove(key)
  290. # Create a cache and give it the callback function.
  291. self.cache = lrucache(size, callback)
  292. # Returns/sets the size of the managed cache.
  293. def size(self, size=None):
  294. return self.cache.size(size)
  295. def clear(self):
  296. self.cache.clear()
  297. self.dirty.clear()
  298. self.store.clear()
  299. def __contains__(self, key):
  300. # Check the cache first, since if it is there we can return quickly.
  301. if key in self.cache:
  302. return True
  303. # Not in the cache. Might be in the underlying store.
  304. if key in self.store:
  305. return True
  306. return False
  307. def __getitem__(self, key):
  308. # First we try the cache. If successful we just return the value. If
  309. # not we catch KeyError and ignore it since that just means the key
  310. # was not in the cache.
  311. try:
  312. return self.cache[key]
  313. except KeyError:
  314. pass
  315. # It wasn't in the cache. Look it up in the store, add the entry to
  316. # the cache, and return the value.
  317. value = self.store[key]
  318. self.cache[key] = value
  319. return value
  320. def get(self, key, default=None):
  321. """Get an item - return default (None) if not present"""
  322. try:
  323. return self[key]
  324. except KeyError:
  325. return default
  326. def __setitem__(self, key, value):
  327. # Add the key/value pair to the cache.
  328. self.cache[key] = value
  329. self.dirty.add(key)
  330. def __delitem__(self, key):
  331. found = False
  332. try:
  333. del self.cache[key]
  334. found = True
  335. self.dirty.remove(key)
  336. except KeyError:
  337. pass
  338. try:
  339. del self.store[key]
  340. found = True
  341. except KeyError:
  342. pass
  343. if not found: # If not found in cache or store, raise error.
  344. raise KeyError
  345. def __iter__(self):
  346. return self.keys()
  347. def keys(self):
  348. for key in self.store.keys():
  349. if key not in self.dirty:
  350. yield key
  351. for key in self.dirty:
  352. yield key
  353. def values(self):
  354. for key, value in self.items():
  355. yield value
  356. def items(self):
  357. for key, value in self.store.items():
  358. if key not in self.dirty:
  359. yield (key, value)
  360. for key in self.dirty:
  361. value = self.cache.peek(key)
  362. yield (key, value)
  363. def sync(self):
  364. # For each dirty key, peek at its value in the cache and update the
  365. # store. Doesn't change the cache's order.
  366. for key in self.dirty:
  367. self.store[key] = self.cache.peek(key)
  368. # There are no dirty keys now.
  369. self.dirty.clear()
  370. def flush(self):
  371. self.sync()
  372. self.cache.clear()
  373. def __enter__(self):
  374. return self
  375. def __exit__(self, exc_type, exc_val, exc_tb):
  376. self.sync()
  377. return False
  378. class FunctionCacheManager(object):
  379. def __init__(self, func, size):
  380. self.func = func
  381. self.cache = lrucache(size)
  382. def size(self, size=None):
  383. return self.cache.size(size)
  384. def clear(self):
  385. self.cache.clear()
  386. def __call__(self, *args, **kwargs):
  387. kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys()))
  388. key = (args, kwtuple)
  389. try:
  390. return self.cache[key]
  391. except KeyError:
  392. pass
  393. value = self.func(*args, **kwargs)
  394. self.cache[key] = value
  395. return value
  396. def lruwrap(store, size, writeback=False):
  397. if writeback:
  398. return WriteBackCacheManager(store, size)
  399. else:
  400. return WriteThroughCacheManager(store, size)
  401. import functools
  402. class lrudecorator(object):
  403. def __init__(self, size):
  404. self.cache = lrucache(size)
  405. def __call__(self, func):
  406. def wrapper(*args, **kwargs):
  407. kwtuple = tuple((key, kwargs[key]) for key in sorted(kwargs.keys()))
  408. key = (args, kwtuple)
  409. try:
  410. return self.cache[key]
  411. except KeyError:
  412. pass
  413. value = func(*args, **kwargs)
  414. self.cache[key] = value
  415. return value
  416. wrapper.cache = self.cache
  417. wrapper.size = self.cache.size
  418. wrapper.clear = self.cache.clear
  419. return functools.update_wrapper(wrapper, func)