version.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. """This module defines the Version classes, and the related Manifest
  2. implementations.
  3. """
  4. from __future__ import with_statement
  5. import os
  6. import pickle
  7. from webassets import six
  8. from webassets.merge import FileHunk
  9. from webassets.utils import md5_constructor, RegistryMetaclass, is_url
  10. __all__ = ('get_versioner', 'VersionIndeterminableError',
  11. 'Version', 'TimestampVersion',
  12. 'get_manifest', 'HashVersion', 'Manifest', 'FileManifest',)
  13. class VersionIndeterminableError(Exception):
  14. pass
  15. class Version(six.with_metaclass(RegistryMetaclass(
  16. clazz=lambda: Version, attribute='determine_version',
  17. desc='a version implementation'))):
  18. """A Version class that can be assigned to the ``Environment.versioner``
  19. attribute.
  20. Given a bundle, this must determine its "version". This version can then
  21. be used in the output filename of the bundle, or appended to the url as a
  22. query string, in order to expire cached assets.
  23. A version could be a timestamp, a content hash, or a git revision etc.
  24. As a user, all you need to care about, in most cases, is whether you want
  25. to set the ``Environment.versioner`` attribute to ``hash`` or ``timestamp``.
  26. A single instance can be used with different environments.
  27. """
  28. def determine_version(self, bundle, ctx, hunk=None):
  29. """Return a string that represents the current version of the given
  30. bundle.
  31. This method is called on two separate occasions:
  32. 1) After a bundle has been built and is about to be saved. If the
  33. output filename contains a placeholder, this method is asked for the
  34. version. This mode is indicated by the ``hunk`` argument being
  35. available.
  36. 2) When a version is required for an already built file, either
  37. because:
  38. *) An URL needs to be constructed.
  39. *) It needs to be determined if a bundle needs an update.
  40. *This will only occur* if *no manifest* is used. If there is a
  41. manifest, it would be used to determine the version instead.
  42. Support for option (2) is optional. If not supported, then in those
  43. cases a manifest needs to be configured. ``VersionIndeterminableError``
  44. should be raised with a message why.
  45. """
  46. raise NotImplementedError()
  47. def set_version(self, bundle, ctx, filename, version):
  48. """Hook called after a bundle has been built. Some version classes
  49. may need this.
  50. """
  51. get_versioner = Version.resolve
  52. class TimestampVersion(Version):
  53. """Uses the most recent 'last modified' timestamp of all source files
  54. as the version.
  55. Uses second-precision.
  56. """
  57. id = 'timestamp'
  58. def determine_version(self, bundle, ctx, hunk=None):
  59. # Only look at an existing output file if we are not about to
  60. # overwrite it with a new version. But if we can, simply using the
  61. # timestamp of the final file is the fastest way to do this.
  62. # Note that this works because of our ``save_done`` hook.
  63. if not hunk:
  64. from webassets.bundle import has_placeholder
  65. if not has_placeholder(bundle.output):
  66. return self.get_timestamp(bundle.resolve_output(ctx))
  67. # If we need the timestamp for the file we just built (hunk!=None),
  68. # or if we need the timestamp for a bundle with a placeholder,
  69. # the way to get it is by looking at the source files.
  70. try:
  71. return self.find_recent_most_timestamp(bundle, ctx)
  72. except OSError:
  73. # Source files are missing. Under these circumstances, we cannot
  74. # return a proper version.
  75. assert hunk is None
  76. raise VersionIndeterminableError(
  77. 'source files are missing and output target has a '
  78. 'placeholder')
  79. def set_version(self, bundle, ctx, filename, version):
  80. # Update the mtime of the newly created file with the version
  81. os.utime(filename, (-1, version))
  82. @classmethod
  83. def get_timestamp(cls, filename):
  84. return int(os.stat(filename).st_mtime) # Let OSError pass
  85. @classmethod
  86. def find_recent_most_timestamp(cls, bundle, ctx):
  87. from webassets.bundle import get_all_bundle_files
  88. # Recurse through the bundle hierarchy. Check the timestamp of all
  89. # the bundle source files, as well as any additional
  90. # dependencies that we are supposed to watch.
  91. most_recent = None
  92. for filename in get_all_bundle_files(bundle, ctx):
  93. if is_url(filename):
  94. continue
  95. timestamp = cls.get_timestamp(filename)
  96. if most_recent is None or timestamp > most_recent:
  97. most_recent = timestamp
  98. return most_recent
  99. class HashVersion(Version):
  100. """Uses the MD5 hash of the content as the version.
  101. By default, only the first 8 characters of the hash are used, which
  102. should be sufficient. This can be changed by passing the appropriate
  103. ``length`` value to ``__init__`` (or ``None`` to use the full hash).
  104. You can also customize the hash used by passing the ``hash`` argument.
  105. All constructors from ``hashlib`` are supported.
  106. """
  107. id = 'hash'
  108. @classmethod
  109. def make(cls, length=None):
  110. args = [int(length)] if length else []
  111. return cls(*args)
  112. def __init__(self, length=8, hash=md5_constructor):
  113. self.length = length
  114. self.hasher = hash
  115. def determine_version(self, bundle, ctx, hunk=None):
  116. if not hunk:
  117. from webassets.bundle import has_placeholder
  118. if not has_placeholder(bundle.output):
  119. hunk = FileHunk(bundle.resolve_output(ctx))
  120. else:
  121. # Can cannot determine the version of placeholder files.
  122. raise VersionIndeterminableError(
  123. 'output target has a placeholder')
  124. hasher = self.hasher()
  125. hasher.update(hunk.data().encode('utf-8'))
  126. return hasher.hexdigest()[:self.length]
  127. class Manifest(six.with_metaclass(RegistryMetaclass(
  128. clazz=lambda: Manifest, desc='a manifest implementation'))):
  129. """Persists information about the versions bundles are at.
  130. The Manifest plays a role only if you insert the bundle version in your
  131. output filenames, or append the version as a querystring to the url (via
  132. the url_expire option). It serves two purposes:
  133. - Without a manifest, it may be impossible to determine the version
  134. at runtime. In a deployed app, the media files may be stored on
  135. a different server entirely, and be inaccessible from the application
  136. code. The manifest, if shipped with your application, is what still
  137. allows to construct the proper URLs.
  138. - Even if it were possible to determine the version at runtime without
  139. a manifest, it may be a costly process, and using a manifest may
  140. give you better performance. If you use a hash-based version for
  141. example, this hash would need to be recalculated every time a new
  142. process is started. (*)
  143. (*) It needs to happen only once per process, because Bundles are smart
  144. enough to cache their own version in memory.
  145. A special case is the ``Environment.auto_build`` option. A manifest
  146. implementation should re-read its data from its out-of-process data
  147. source on every request, if ``auto_build`` is enabled. Otherwise, if your
  148. application is served by multiple processes, then after an automatic
  149. rebuild in one process all other processes would continue to serve an old
  150. version of the file (or attach an old version to the query string).
  151. A manifest instance is currently not guaranteed to function correctly
  152. with multiple Environment instances.
  153. """
  154. def remember(self, bundle, ctx, version):
  155. raise NotImplementedError()
  156. def query(self, bundle, ctx):
  157. raise NotImplementedError()
  158. get_manifest = Manifest.resolve
  159. class FileManifest(Manifest):
  160. """Stores version data in a single file.
  161. Uses Python's pickle module to store a dict data structure. You should
  162. only use this when the manifest is read-only in production, since it is
  163. not multi-process safe. If you use ``auto_build`` in production, use
  164. ``CacheManifest`` instead.
  165. By default, the file is named ".webassets-manifest" and stored in
  166. ``Environment.directory``.
  167. """
  168. id = 'file'
  169. @classmethod
  170. def make(cls, ctx, filename=None):
  171. if not filename:
  172. filename = '.webassets-manifest'
  173. return cls(os.path.join(ctx.directory, filename))
  174. def __init__(self, filename):
  175. self.filename = filename
  176. self._load_manifest()
  177. def remember(self, bundle, ctx, version):
  178. self.manifest[bundle.output] = version
  179. self._save_manifest()
  180. def query(self, bundle, ctx):
  181. if ctx.auto_build:
  182. self._load_manifest()
  183. return self.manifest.get(bundle.output, None)
  184. def _load_manifest(self):
  185. if os.path.exists(self.filename):
  186. with open(self.filename, 'rb') as f:
  187. self.manifest = pickle.load(f)
  188. else:
  189. self.manifest = {}
  190. def _save_manifest(self):
  191. with open(self.filename, 'wb') as f:
  192. pickle.dump(self.manifest, f, protocol=2)
  193. class JsonManifest(FileManifest):
  194. """Same as ``FileManifest``, but uses JSON instead of pickle."""
  195. id = 'json'
  196. def __init__(self, *a, **kw):
  197. try:
  198. import json
  199. except ImportError:
  200. import simplejson as json
  201. self.json = json
  202. super(JsonManifest, self).__init__(*a, **kw)
  203. def _load_manifest(self):
  204. if os.path.exists(self.filename):
  205. with open(self.filename, 'r') as f:
  206. self.manifest = self.json.load(f)
  207. else:
  208. self.manifest = {}
  209. def _save_manifest(self):
  210. with open(self.filename, 'w') as f:
  211. self.json.dump(self.manifest, f, indent=4, sort_keys=True)
  212. class CacheManifest(Manifest):
  213. """Stores version data in the webassets cache.
  214. Since this has bad portability (you hardly want to copy your cache between
  215. machines), this only makes sense when you are building on the same machine
  216. where you're application code runs.
  217. When you are using ``auto_build`` in production, this is exactly what you
  218. want to use, since it is multi-process safe.
  219. """
  220. id = 'cache'
  221. def _check(self, ctx):
  222. if not ctx.cache:
  223. raise EnvironmentError(
  224. 'You are using the cache manifest, but have not '
  225. 'enabled the cache.')
  226. def remember(self, bundle, ctx, version):
  227. self._check(ctx)
  228. ctx.cache.set(('manifest', bundle.output), version)
  229. def query(self, bundle, ctx):
  230. self._check(ctx)
  231. return ctx.cache.get(('manifest', bundle.output))
  232. class SymlinkManifest(Manifest):
  233. """Creates a symlink to the actual file.
  234. E.g. compressed-current.js -> compressed-1ebcdc5.js
  235. """
  236. # Implementation notes: Would presumably be Linux only initially,
  237. # could clean up after itself, may be hard to implement and maybe
  238. # shouldn't, would only we usable to resolve placeholders in filenames.
  239. def __init__(self):
  240. raise NotImplementedError() # TODO