123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379 |
- # Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0
- # For details: https://bitbucket.org/ned/coveragepy/src/default/NOTICE.txt
- """File wrangling."""
- import fnmatch
- import ntpath
- import os
- import os.path
- import posixpath
- import re
- import sys
- from coverage import env
- from coverage.backward import unicode_class
- from coverage.misc import contract, CoverageException, join_regex, isolate_module
- os = isolate_module(os)
- def set_relative_directory():
- """Set the directory that `relative_filename` will be relative to."""
- global RELATIVE_DIR, CANONICAL_FILENAME_CACHE
- # The absolute path to our current directory.
- RELATIVE_DIR = os.path.normcase(abs_file(os.curdir) + os.sep)
- # Cache of results of calling the canonical_filename() method, to
- # avoid duplicating work.
- CANONICAL_FILENAME_CACHE = {}
- def relative_directory():
- """Return the directory that `relative_filename` is relative to."""
- return RELATIVE_DIR
- @contract(returns='unicode')
- def relative_filename(filename):
- """Return the relative form of `filename`.
- The file name will be relative to the current directory when the
- `set_relative_directory` was called.
- """
- fnorm = os.path.normcase(filename)
- if fnorm.startswith(RELATIVE_DIR):
- filename = filename[len(RELATIVE_DIR):]
- return unicode_filename(filename)
- @contract(returns='unicode')
- def canonical_filename(filename):
- """Return a canonical file name for `filename`.
- An absolute path with no redundant components and normalized case.
- """
- if filename not in CANONICAL_FILENAME_CACHE:
- if not os.path.isabs(filename):
- for path in [os.curdir] + sys.path:
- if path is None:
- continue
- f = os.path.join(path, filename)
- if os.path.exists(f):
- filename = f
- break
- cf = abs_file(filename)
- CANONICAL_FILENAME_CACHE[filename] = cf
- return CANONICAL_FILENAME_CACHE[filename]
- def flat_rootname(filename):
- """A base for a flat file name to correspond to this file.
- Useful for writing files about the code where you want all the files in
- the same directory, but need to differentiate same-named files from
- different directories.
- For example, the file a/b/c.py will return 'a_b_c_py'
- """
- name = ntpath.splitdrive(filename)[1]
- return re.sub(r"[\\/.:]", "_", name)
- if env.WINDOWS:
- _ACTUAL_PATH_CACHE = {}
- _ACTUAL_PATH_LIST_CACHE = {}
- def actual_path(path):
- """Get the actual path of `path`, including the correct case."""
- if env.PY2 and isinstance(path, unicode_class):
- path = path.encode(sys.getfilesystemencoding())
- if path in _ACTUAL_PATH_CACHE:
- return _ACTUAL_PATH_CACHE[path]
- head, tail = os.path.split(path)
- if not tail:
- # This means head is the drive spec: normalize it.
- actpath = head.upper()
- elif not head:
- actpath = tail
- else:
- head = actual_path(head)
- if head in _ACTUAL_PATH_LIST_CACHE:
- files = _ACTUAL_PATH_LIST_CACHE[head]
- else:
- try:
- files = os.listdir(head)
- except OSError:
- files = []
- _ACTUAL_PATH_LIST_CACHE[head] = files
- normtail = os.path.normcase(tail)
- for f in files:
- if os.path.normcase(f) == normtail:
- tail = f
- break
- actpath = os.path.join(head, tail)
- _ACTUAL_PATH_CACHE[path] = actpath
- return actpath
- else:
- def actual_path(filename):
- """The actual path for non-Windows platforms."""
- return filename
- if env.PY2:
- @contract(returns='unicode')
- def unicode_filename(filename):
- """Return a Unicode version of `filename`."""
- if isinstance(filename, str):
- encoding = sys.getfilesystemencoding() or sys.getdefaultencoding()
- filename = filename.decode(encoding, "replace")
- return filename
- else:
- @contract(filename='unicode', returns='unicode')
- def unicode_filename(filename):
- """Return a Unicode version of `filename`."""
- return filename
- @contract(returns='unicode')
- def abs_file(filename):
- """Return the absolute normalized form of `filename`."""
- path = os.path.expandvars(os.path.expanduser(filename))
- path = os.path.abspath(os.path.realpath(path))
- path = actual_path(path)
- path = unicode_filename(path)
- return path
- RELATIVE_DIR = None
- CANONICAL_FILENAME_CACHE = None
- set_relative_directory()
- def isabs_anywhere(filename):
- """Is `filename` an absolute path on any OS?"""
- return ntpath.isabs(filename) or posixpath.isabs(filename)
- def prep_patterns(patterns):
- """Prepare the file patterns for use in a `FnmatchMatcher`.
- If a pattern starts with a wildcard, it is used as a pattern
- as-is. If it does not start with a wildcard, then it is made
- absolute with the current directory.
- If `patterns` is None, an empty list is returned.
- """
- prepped = []
- for p in patterns or []:
- if p.startswith(("*", "?")):
- prepped.append(p)
- else:
- prepped.append(abs_file(p))
- return prepped
- class TreeMatcher(object):
- """A matcher for files in a tree."""
- def __init__(self, directories):
- self.dirs = list(directories)
- def __repr__(self):
- return "<TreeMatcher %r>" % self.dirs
- def info(self):
- """A list of strings for displaying when dumping state."""
- return self.dirs
- def match(self, fpath):
- """Does `fpath` indicate a file in one of our trees?"""
- for d in self.dirs:
- if fpath.startswith(d):
- if fpath == d:
- # This is the same file!
- return True
- if fpath[len(d)] == os.sep:
- # This is a file in the directory
- return True
- return False
- class ModuleMatcher(object):
- """A matcher for modules in a tree."""
- def __init__(self, module_names):
- self.modules = list(module_names)
- def __repr__(self):
- return "<ModuleMatcher %r>" % (self.modules)
- def info(self):
- """A list of strings for displaying when dumping state."""
- return self.modules
- def match(self, module_name):
- """Does `module_name` indicate a module in one of our packages?"""
- if not module_name:
- return False
- for m in self.modules:
- if module_name.startswith(m):
- if module_name == m:
- return True
- if module_name[len(m)] == '.':
- # This is a module in the package
- return True
- return False
- class FnmatchMatcher(object):
- """A matcher for files by file name pattern."""
- def __init__(self, pats):
- self.pats = pats[:]
- # fnmatch is platform-specific. On Windows, it does the Windows thing
- # of treating / and \ as equivalent. But on other platforms, we need to
- # take care of that ourselves.
- fnpats = (fnmatch.translate(p) for p in pats)
- fnpats = (p.replace(r"\/", r"[\\/]") for p in fnpats)
- if env.WINDOWS:
- # Windows is also case-insensitive. BTW: the regex docs say that
- # flags like (?i) have to be at the beginning, but fnmatch puts
- # them at the end, and having two there seems to work fine.
- fnpats = (p + "(?i)" for p in fnpats)
- self.re = re.compile(join_regex(fnpats))
- def __repr__(self):
- return "<FnmatchMatcher %r>" % self.pats
- def info(self):
- """A list of strings for displaying when dumping state."""
- return self.pats
- def match(self, fpath):
- """Does `fpath` match one of our file name patterns?"""
- return self.re.match(fpath) is not None
- def sep(s):
- """Find the path separator used in this string, or os.sep if none."""
- sep_match = re.search(r"[\\/]", s)
- if sep_match:
- the_sep = sep_match.group(0)
- else:
- the_sep = os.sep
- return the_sep
- class PathAliases(object):
- """A collection of aliases for paths.
- When combining data files from remote machines, often the paths to source
- code are different, for example, due to OS differences, or because of
- serialized checkouts on continuous integration machines.
- A `PathAliases` object tracks a list of pattern/result pairs, and can
- map a path through those aliases to produce a unified path.
- """
- def __init__(self):
- self.aliases = []
- def add(self, pattern, result):
- """Add the `pattern`/`result` pair to the list of aliases.
- `pattern` is an `fnmatch`-style pattern. `result` is a simple
- string. When mapping paths, if a path starts with a match against
- `pattern`, then that match is replaced with `result`. This models
- isomorphic source trees being rooted at different places on two
- different machines.
- `pattern` can't end with a wildcard component, since that would
- match an entire tree, and not just its root.
- """
- # The pattern can't end with a wildcard component.
- pattern = pattern.rstrip(r"\/")
- if pattern.endswith("*"):
- raise CoverageException("Pattern must not end with wildcards.")
- pattern_sep = sep(pattern)
- # The pattern is meant to match a filepath. Let's make it absolute
- # unless it already is, or is meant to match any prefix.
- if not pattern.startswith('*') and not isabs_anywhere(pattern):
- pattern = abs_file(pattern)
- pattern += pattern_sep
- # Make a regex from the pattern. fnmatch always adds a \Z to
- # match the whole string, which we don't want.
- regex_pat = fnmatch.translate(pattern).replace(r'\Z(', '(')
- # We want */a/b.py to match on Windows too, so change slash to match
- # either separator.
- regex_pat = regex_pat.replace(r"\/", r"[\\/]")
- # We want case-insensitive matching, so add that flag.
- regex = re.compile(r"(?i)" + regex_pat)
- # Normalize the result: it must end with a path separator.
- result_sep = sep(result)
- result = result.rstrip(r"\/") + result_sep
- self.aliases.append((regex, result, pattern_sep, result_sep))
- def map(self, path):
- """Map `path` through the aliases.
- `path` is checked against all of the patterns. The first pattern to
- match is used to replace the root of the path with the result root.
- Only one pattern is ever used. If no patterns match, `path` is
- returned unchanged.
- The separator style in the result is made to match that of the result
- in the alias.
- Returns the mapped path. If a mapping has happened, this is a
- canonical path. If no mapping has happened, it is the original value
- of `path` unchanged.
- """
- for regex, result, pattern_sep, result_sep in self.aliases:
- m = regex.match(path)
- if m:
- new = path.replace(m.group(0), result)
- if pattern_sep != result_sep:
- new = new.replace(pattern_sep, result_sep)
- new = canonical_filename(new)
- return new
- return path
- def find_python_files(dirname):
- """Yield all of the importable Python files in `dirname`, recursively.
- To be importable, the files have to be in a directory with a __init__.py,
- except for `dirname` itself, which isn't required to have one. The
- assumption is that `dirname` was specified directly, so the user knows
- best, but sub-directories are checked for a __init__.py to be sure we only
- find the importable files.
- """
- for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dirname)):
- if i > 0 and '__init__.py' not in filenames:
- # If a directory doesn't have __init__.py, then it isn't
- # importable and neither are its files
- del dirnames[:]
- continue
- for filename in filenames:
- # We're only interested in files that look like reasonable Python
- # files: Must end with .py or .pyw, and must not have certain funny
- # characters that probably mean they are editor junk.
- if re.match(r"^[^.#~!$@%^&*()+=,]+\.pyw?$", filename):
- yield os.path.join(dirpath, filename)
|