link_morgue 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137
  1. #! /usr/bin/env python3
  2. #
  3. # Copyright (C) 2019, Ansgar Burchardt <ansgar@debian.org>
  4. # License: GPL-2+
  5. #
  6. # This program is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 2 of the License, or
  9. # (at your option) any later version.
  10. #
  11. # This program is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with this program. If not, see <https://www.gnu.org/licenses/>.
  18. import argparse
  19. import gzip
  20. import hashlib
  21. import os
  22. import signal
  23. import sys
  24. from contextlib import contextmanager
  25. from typing import BinaryIO, Set
  26. Hashes = Set[bytes]
  27. def hash_fh(fh: BinaryIO) -> bytes:
  28. h = hashlib.sha1()
  29. buf = b"dummy"
  30. while len(buf) > 0:
  31. buf = fh.read(32768)
  32. h.update(buf)
  33. return h.hexdigest().encode('ascii')
  34. def hash_file(filename: bytes) -> bytes:
  35. with open(filename, 'rb') as fh:
  36. return hash_fh(fh)
  37. def load_hashes(path) -> Hashes:
  38. with gzip.open(path, 'rb') as fh:
  39. return set(h.strip() for h in fh)
  40. @contextmanager
  41. def IgnoreSignals():
  42. handlers = [
  43. (sig, signal.signal(sig, signal.SIG_IGN))
  44. for sig in (signal.SIGHUP, signal.SIGINT, signal.SIGTERM)
  45. ]
  46. yield
  47. for sig, handler in handlers:
  48. if handler is None:
  49. handler = signal.SIG_DFL
  50. signal.signal(sig, handler)
  51. def replace_file(path: bytes, hash: bytes, base: bytes) -> None:
  52. target = os.path.join(base, hash[0:2], hash[2:4], hash)
  53. with IgnoreSignals():
  54. os.unlink(path)
  55. os.symlink(target, path)
  56. def keep_file(path: bytes) -> None:
  57. target = path + b".nosnapshot"
  58. with open(target, 'x') as fh:
  59. pass
  60. def process_file(path: bytes, known_hashes: Hashes, base: bytes) -> None:
  61. """
  62. Replace file `path` with a symlink below `base` if the file is
  63. known, otherwise create `{path}.nosnapshot` to avoid checking the file
  64. again later.
  65. """
  66. h = hash_file(path)
  67. if h in known_hashes:
  68. replace_file(path, h, base)
  69. else:
  70. keep_file(path)
  71. def scan_directory(path: bytes):
  72. """
  73. Returns paths to regular files in `path` and subdirectories,
  74. skipping `*.nosnapshot` and files `fn` for which `{fn}.nosnapshot`
  75. exists.
  76. """
  77. directories = []
  78. filenames = []
  79. # We do not use `os.walk` as `os.scandir` allows us to skip
  80. # symlinks without an extra `stat()` call.
  81. for entry in os.scandir(path):
  82. if entry.is_dir(follow_symlinks=False):
  83. directories.append(entry.path)
  84. elif entry.is_file(follow_symlinks=False):
  85. filenames.append(entry.path)
  86. yield from (fn for fn in filenames
  87. if fn + b".nosnapshot" not in filenames
  88. and not fn.endswith(b".nosnapshot")
  89. and not fn.endswith(b"/.nobackup"))
  90. for path in directories:
  91. yield from scan_directory(path)
  92. def process_directory(path: bytes, known_hashes: Hashes, base: bytes) -> None:
  93. os.chdir(path)
  94. for fn in scan_directory(b"."):
  95. process_file(fn, known_hashes, base)
  96. def run(config):
  97. known_hashes = load_hashes(config.known_hashes)
  98. process_directory(config.morguedir.encode(), known_hashes, config.farmdir.encode())
  99. def main(argv=sys.argv[1:]):
  100. parser = argparse.ArgumentParser(
  101. description="replace files in morgue with symlinks to snapshot.d.o"
  102. )
  103. parser.add_argument("--known-hashes", type=str, required=True)
  104. parser.add_argument("--farmdir", type=str, required=True)
  105. parser.add_argument("--morguedir", type=str, required=True)
  106. config = parser.parse_args(argv)
  107. run(config)
  108. if __name__ == "__main__":
  109. main()