cache.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240
  1. """Caches are used for multiple things:
  2. - To speed up asset building. Filter operations every step
  3. of the way can be cached, so that individual parts of a
  4. build that haven't changed can be reused.
  5. - Bundle definitions are cached when a bundle is built so we
  6. can determine whether they have changed and whether a rebuild
  7. is required.
  8. This data is not all stored in the same cache necessarily. The
  9. classes in this module provide the "environment.cache" object, but
  10. also serve in other places.
  11. """
  12. import os
  13. from os import path
  14. import errno
  15. import tempfile
  16. import warnings
  17. from webassets import six
  18. from webassets.merge import BaseHunk
  19. from webassets.filter import Filter, freezedicts
  20. from webassets.utils import md5_constructor, pickle
  21. import types
  22. __all__ = ('FilesystemCache', 'MemoryCache', 'get_cache',)
  23. def make_hashable(data):
  24. """Ensures ``data`` can be hashed().
  25. Mostly needs to support dict. The other special types we use
  26. as hash keys (Hunks, Filters) already have a proper hash() method.
  27. See also ``make_md5``.
  28. Note that we do not actually hash the data for the memory cache.
  29. """
  30. return freezedicts(data)
  31. def make_md5(*data):
  32. """Make a md5 hash based on``data``.
  33. Specifically, this knows about ``Hunk`` objects, and makes sure
  34. the actual content is hashed.
  35. This is very conservative, and raises an exception if there are
  36. data types that it does not explicitly support. This is because
  37. we had in the past some debugging headaches with the cache not
  38. working for this very reason.
  39. MD5 is faster than sha, and we don't care so much about collisions.
  40. We care enough however not to use hash().
  41. """
  42. def walk(obj):
  43. if isinstance(obj, (tuple, list, frozenset)):
  44. for item in obj:
  45. for d in walk(item): yield d
  46. elif isinstance(obj, (dict)):
  47. for k in sorted(obj.keys()):
  48. for d in walk(k): yield d
  49. for d in walk(obj[k]): yield d
  50. elif isinstance(obj, BaseHunk):
  51. yield obj.data().encode('utf-8')
  52. elif isinstance(obj, int):
  53. yield str(obj).encode('utf-8')
  54. elif isinstance(obj, six.text_type):
  55. yield obj.encode('utf-8')
  56. elif isinstance(obj, six.binary_type):
  57. yield obj
  58. elif hasattr(obj, "id"):
  59. for i in walk(obj.id()):
  60. yield i
  61. elif obj is None:
  62. yield "None".encode('utf-8')
  63. elif isinstance(obj, types.FunctionType):
  64. yield str(hash(obj)).encode('utf-8')
  65. else:
  66. raise ValueError('Cannot MD5 type %s' % type(obj))
  67. md5 = md5_constructor()
  68. for d in walk(data):
  69. md5.update(d)
  70. return md5.hexdigest()
  71. def safe_unpickle(string):
  72. """Unpickle the string, or return ``None`` if that fails."""
  73. try:
  74. return pickle.loads(string)
  75. except:
  76. return None
  77. class BaseCache(object):
  78. """Abstract base class.
  79. The cache key must be something that is supported by the Python hash()
  80. function. The cache value may be a string, or anything that can be pickled.
  81. Since the cache is used for multiple purposes, all webassets-internal code
  82. should always tag its keys with an id, like so:
  83. key = ("tag", actual_key)
  84. One cache instance can only be used safely with a single Environment.
  85. """
  86. def get(self, key):
  87. """Should return the cache contents, or False.
  88. """
  89. raise NotImplementedError()
  90. def set(self, key, value):
  91. raise NotImplementedError()
  92. class MemoryCache(BaseCache):
  93. """Caches stuff in the process memory.
  94. WARNING: Do NOT use this in a production environment, where you
  95. are likely going to have multiple processes serving the same app!
  96. Note that the keys are used as-is, not passed through hash() (which is
  97. a difference: http://stackoverflow.com/a/9022664/15677). However, the
  98. reason we don't is because the original value is nicer to debug.
  99. """
  100. def __init__(self, capacity):
  101. self.capacity = capacity
  102. self.keys = []
  103. self.cache = {}
  104. def __eq__(self, other):
  105. """Return equality with the config values that instantiate
  106. this instance.
  107. """
  108. return False == other or \
  109. None == other or \
  110. id(self) == id(other)
  111. def get(self, key):
  112. key = make_md5(make_hashable(key))
  113. return self.cache.get(key, None)
  114. def set(self, key, value):
  115. key = make_md5(make_hashable(key))
  116. self.cache[key] = value
  117. try:
  118. self.keys.remove(key)
  119. except ValueError:
  120. pass
  121. self.keys.append(key)
  122. # limit cache to the given capacity
  123. to_delete = self.keys[0:max(0, len(self.keys)-self.capacity)]
  124. self.keys = self.keys[len(to_delete):]
  125. for item in to_delete:
  126. del self.cache[item]
  127. class FilesystemCache(BaseCache):
  128. """Uses a temporary directory on the disk.
  129. """
  130. V = 2 # We have changed the cache format once
  131. def __init__(self, directory, new_file_mode=None):
  132. self.directory = directory
  133. self.new_file_mode = new_file_mode
  134. def __eq__(self, other):
  135. """Return equality with the config values
  136. that instantiate this instance.
  137. """
  138. return True == other or \
  139. self.directory == other or \
  140. id(self) == id(other)
  141. def get(self, key):
  142. filename = path.join(self.directory, '%s' % make_md5(self.V, key))
  143. try:
  144. f = open(filename, 'rb')
  145. except IOError as e:
  146. if e.errno != errno.ENOENT:
  147. raise
  148. return None
  149. try:
  150. result = f.read()
  151. finally:
  152. f.close()
  153. unpickled = safe_unpickle(result)
  154. if unpickled is None:
  155. warnings.warn('Ignoring corrupted cache file %s' % filename)
  156. return unpickled
  157. def set(self, key, data):
  158. md5 = '%s' % make_md5(self.V, key)
  159. filename = path.join(self.directory, md5)
  160. fd, temp_filename = tempfile.mkstemp(prefix='.' + md5,
  161. dir=self.directory)
  162. try:
  163. with os.fdopen(fd, 'wb') as f:
  164. pickle.dump(data, f)
  165. f.flush()
  166. # If a non default mode is specified, then chmod the file to
  167. # it before renaming it into place
  168. if self.new_file_mode is not None:
  169. os.chmod(temp_filename, self.new_file_mode)
  170. if os.path.isfile(filename):
  171. os.unlink(filename)
  172. os.rename(temp_filename, filename)
  173. except:
  174. os.unlink(temp_filename)
  175. raise
  176. def get_cache(option, ctx):
  177. """Return a cache instance based on ``option``.
  178. """
  179. if not option:
  180. return None
  181. if isinstance(option, BaseCache):
  182. return option
  183. elif isinstance(option, type) and issubclass(option, BaseCache):
  184. return option()
  185. if option is True:
  186. directory = path.join(ctx.directory, '.webassets-cache')
  187. # Auto-create the default directory
  188. if not path.exists(directory):
  189. os.makedirs(directory)
  190. else:
  191. directory = option
  192. return FilesystemCache(directory, ctx.cache_file_mode)