123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324 |
- """This module defines the Version classes, and the related Manifest
- implementations.
- """
- from __future__ import with_statement
- import os
- import pickle
- from webassets import six
- from webassets.merge import FileHunk
- from webassets.utils import md5_constructor, RegistryMetaclass, is_url
- __all__ = ('get_versioner', 'VersionIndeterminableError',
- 'Version', 'TimestampVersion',
- 'get_manifest', 'HashVersion', 'Manifest', 'FileManifest',)
- class VersionIndeterminableError(Exception):
- pass
- class Version(six.with_metaclass(RegistryMetaclass(
- clazz=lambda: Version, attribute='determine_version',
- desc='a version implementation'))):
- """A Version class that can be assigned to the ``Environment.versioner``
- attribute.
- Given a bundle, this must determine its "version". This version can then
- be used in the output filename of the bundle, or appended to the url as a
- query string, in order to expire cached assets.
- A version could be a timestamp, a content hash, or a git revision etc.
- As a user, all you need to care about, in most cases, is whether you want
- to set the ``Environment.versioner`` attribute to ``hash`` or ``timestamp``.
- A single instance can be used with different environments.
- """
- def determine_version(self, bundle, ctx, hunk=None):
- """Return a string that represents the current version of the given
- bundle.
- This method is called on two separate occasions:
- 1) After a bundle has been built and is about to be saved. If the
- output filename contains a placeholder, this method is asked for the
- version. This mode is indicated by the ``hunk`` argument being
- available.
- 2) When a version is required for an already built file, either
- because:
- *) An URL needs to be constructed.
- *) It needs to be determined if a bundle needs an update.
- *This will only occur* if *no manifest* is used. If there is a
- manifest, it would be used to determine the version instead.
- Support for option (2) is optional. If not supported, then in those
- cases a manifest needs to be configured. ``VersionIndeterminableError``
- should be raised with a message why.
- """
- raise NotImplementedError()
- def set_version(self, bundle, ctx, filename, version):
- """Hook called after a bundle has been built. Some version classes
- may need this.
- """
- get_versioner = Version.resolve
- class TimestampVersion(Version):
- """Uses the most recent 'last modified' timestamp of all source files
- as the version.
- Uses second-precision.
- """
- id = 'timestamp'
- def determine_version(self, bundle, ctx, hunk=None):
- # Only look at an existing output file if we are not about to
- # overwrite it with a new version. But if we can, simply using the
- # timestamp of the final file is the fastest way to do this.
- # Note that this works because of our ``save_done`` hook.
- if not hunk:
- from webassets.bundle import has_placeholder
- if not has_placeholder(bundle.output):
- return self.get_timestamp(bundle.resolve_output(ctx))
- # If we need the timestamp for the file we just built (hunk!=None),
- # or if we need the timestamp for a bundle with a placeholder,
- # the way to get it is by looking at the source files.
- try:
- return self.find_recent_most_timestamp(bundle, ctx)
- except OSError:
- # Source files are missing. Under these circumstances, we cannot
- # return a proper version.
- assert hunk is None
- raise VersionIndeterminableError(
- 'source files are missing and output target has a '
- 'placeholder')
- def set_version(self, bundle, ctx, filename, version):
- # Update the mtime of the newly created file with the version
- os.utime(filename, (-1, version))
- @classmethod
- def get_timestamp(cls, filename):
- return int(os.stat(filename).st_mtime) # Let OSError pass
- @classmethod
- def find_recent_most_timestamp(cls, bundle, ctx):
- from webassets.bundle import get_all_bundle_files
- # Recurse through the bundle hierarchy. Check the timestamp of all
- # the bundle source files, as well as any additional
- # dependencies that we are supposed to watch.
- most_recent = None
- for filename in get_all_bundle_files(bundle, ctx):
- if is_url(filename):
- continue
- timestamp = cls.get_timestamp(filename)
- if most_recent is None or timestamp > most_recent:
- most_recent = timestamp
- return most_recent
- class HashVersion(Version):
- """Uses the MD5 hash of the content as the version.
- By default, only the first 8 characters of the hash are used, which
- should be sufficient. This can be changed by passing the appropriate
- ``length`` value to ``__init__`` (or ``None`` to use the full hash).
- You can also customize the hash used by passing the ``hash`` argument.
- All constructors from ``hashlib`` are supported.
- """
- id = 'hash'
- @classmethod
- def make(cls, length=None):
- args = [int(length)] if length else []
- return cls(*args)
- def __init__(self, length=8, hash=md5_constructor):
- self.length = length
- self.hasher = hash
- def determine_version(self, bundle, ctx, hunk=None):
- if not hunk:
- from webassets.bundle import has_placeholder
- if not has_placeholder(bundle.output):
- hunk = FileHunk(bundle.resolve_output(ctx))
- else:
- # Can cannot determine the version of placeholder files.
- raise VersionIndeterminableError(
- 'output target has a placeholder')
- hasher = self.hasher()
- hasher.update(hunk.data().encode('utf-8'))
- return hasher.hexdigest()[:self.length]
- class Manifest(six.with_metaclass(RegistryMetaclass(
- clazz=lambda: Manifest, desc='a manifest implementation'))):
- """Persists information about the versions bundles are at.
- The Manifest plays a role only if you insert the bundle version in your
- output filenames, or append the version as a querystring to the url (via
- the url_expire option). It serves two purposes:
- - Without a manifest, it may be impossible to determine the version
- at runtime. In a deployed app, the media files may be stored on
- a different server entirely, and be inaccessible from the application
- code. The manifest, if shipped with your application, is what still
- allows to construct the proper URLs.
- - Even if it were possible to determine the version at runtime without
- a manifest, it may be a costly process, and using a manifest may
- give you better performance. If you use a hash-based version for
- example, this hash would need to be recalculated every time a new
- process is started. (*)
- (*) It needs to happen only once per process, because Bundles are smart
- enough to cache their own version in memory.
- A special case is the ``Environment.auto_build`` option. A manifest
- implementation should re-read its data from its out-of-process data
- source on every request, if ``auto_build`` is enabled. Otherwise, if your
- application is served by multiple processes, then after an automatic
- rebuild in one process all other processes would continue to serve an old
- version of the file (or attach an old version to the query string).
- A manifest instance is currently not guaranteed to function correctly
- with multiple Environment instances.
- """
- def remember(self, bundle, ctx, version):
- raise NotImplementedError()
- def query(self, bundle, ctx):
- raise NotImplementedError()
- get_manifest = Manifest.resolve
- class FileManifest(Manifest):
- """Stores version data in a single file.
- Uses Python's pickle module to store a dict data structure. You should
- only use this when the manifest is read-only in production, since it is
- not multi-process safe. If you use ``auto_build`` in production, use
- ``CacheManifest`` instead.
- By default, the file is named ".webassets-manifest" and stored in
- ``Environment.directory``.
- """
- id = 'file'
- @classmethod
- def make(cls, ctx, filename=None):
- if not filename:
- filename = '.webassets-manifest'
- return cls(os.path.join(ctx.directory, filename))
- def __init__(self, filename):
- self.filename = filename
- self._load_manifest()
- def remember(self, bundle, ctx, version):
- self.manifest[bundle.output] = version
- self._save_manifest()
- def query(self, bundle, ctx):
- if ctx.auto_build:
- self._load_manifest()
- return self.manifest.get(bundle.output, None)
- def _load_manifest(self):
- if os.path.exists(self.filename):
- with open(self.filename, 'rb') as f:
- self.manifest = pickle.load(f)
- else:
- self.manifest = {}
- def _save_manifest(self):
- with open(self.filename, 'wb') as f:
- pickle.dump(self.manifest, f, protocol=2)
- class JsonManifest(FileManifest):
- """Same as ``FileManifest``, but uses JSON instead of pickle."""
- id = 'json'
- def __init__(self, *a, **kw):
- try:
- import json
- except ImportError:
- import simplejson as json
- self.json = json
- super(JsonManifest, self).__init__(*a, **kw)
- def _load_manifest(self):
- if os.path.exists(self.filename):
- with open(self.filename, 'r') as f:
- self.manifest = self.json.load(f)
- else:
- self.manifest = {}
- def _save_manifest(self):
- with open(self.filename, 'w') as f:
- self.json.dump(self.manifest, f, indent=4, sort_keys=True)
- class CacheManifest(Manifest):
- """Stores version data in the webassets cache.
- Since this has bad portability (you hardly want to copy your cache between
- machines), this only makes sense when you are building on the same machine
- where you're application code runs.
- When you are using ``auto_build`` in production, this is exactly what you
- want to use, since it is multi-process safe.
- """
- id = 'cache'
- def _check(self, ctx):
- if not ctx.cache:
- raise EnvironmentError(
- 'You are using the cache manifest, but have not '
- 'enabled the cache.')
- def remember(self, bundle, ctx, version):
- self._check(ctx)
- ctx.cache.set(('manifest', bundle.output), version)
- def query(self, bundle, ctx):
- self._check(ctx)
- return ctx.cache.get(('manifest', bundle.output))
- class SymlinkManifest(Manifest):
- """Creates a symlink to the actual file.
- E.g. compressed-current.js -> compressed-1ebcdc5.js
- """
- # Implementation notes: Would presumably be Linux only initially,
- # could clean up after itself, may be hard to implement and maybe
- # shouldn't, would only we usable to resolve placeholders in filenames.
- def __init__(self):
- raise NotImplementedError() # TODO
|