snapbtr.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411
  1. #!/usr/bin/python2
  2. # -*- coding: utf-8 -*-
  3. #
  4. # Autor: Helge Jensen <hej@actua.dk>
  5. """
  6. snapbtr is a small utility that keeps snapshots of btrfs filesystems.
  7. You can run it regularly (for example in a small script in
  8. cron.hourly), or once in a while, to maintain an "interesting" (see
  9. below) set of snapshots (backups). You may manually add or remove
  10. snapshots as you like, use 'snapbtr.DATE_FORMAT' (in GMT) as
  11. snapshot-name.
  12. It will keep at most --target-backups snapshots and ensure that
  13. --target-freespace is available on the file-system by selecting
  14. snapshots to remove.
  15. Using --keep-backups, you can ensure that at least some backups are
  16. kept, even if --target-freespace cannot be satisfied.
  17. snapnbtr will keep backups with exponentially increasing distance as
  18. you go back in time. It does this by selecting snapshots to remove as
  19. follows.
  20. The snapshots to remove is selected by "scoring" each space between
  21. snapshots, (newer,older). snapbtr will remove the older of the two
  22. snapshots in the space that have the lowest score.
  23. The scoring mechanism integrates e^x from (now-newer) to (now-older)
  24. so, new pairs will have high value, even if they are tightly packed,
  25. while older pairs will have high value if they are far apart.
  26. The mechanism is completely self-contained and you can delete any
  27. snapshot manually or any files in the snapshots.
  28. I personally install snapbtr.py into /var/backups/snapbtr which is
  29. only accessible to root and run it from a small script in
  30. cron.hourly. I also install the kernel-nfs-server package and
  31. nfs-export /var/backups/snapbtr in /etc/exports:
  32. /var/backups/snapbtr/ 127.0.0.1(ro,no_root_squash,async,no_subtree_check)
  33. and mount it at /mnt/restore, in /etc/fstab:
  34. localhost:/var/backups/snapbtr/ /mnt/restore nfs _netdev,nosuid,rsize=8192,hard,intr,ro 0 2
  35. That way, all users can use the backups in /mnt/restore, but cannot
  36. exploit security-bugs or tamper with the content.
  37. """
  38. import re, math, time, os, os.path, sys, shutil, stat, statvfs
  39. DATE_FORMAT = '%Y%m%d-%H%M%S' # date format used for directories to clean
  40. DEFAULT_KEEP_BACKUPS = 2
  41. # find TIME_SCALE: t < 2**32 => e**(t/c) < 2**32
  42. TIME_SCALE = math.ceil(float((2**32)/math.log(2**32)))
  43. def timef(x):
  44. """make value inverse exponential in the time passed"""
  45. try:
  46. v = math.exp(
  47. time.mktime(
  48. time.strptime(
  49. os.path.split(x)[1],
  50. DATE_FORMAT))
  51. /TIME_SCALE)
  52. except:
  53. v = None
  54. return v
  55. def sorted_value(dirs):
  56. if len(dirs) <= 0:
  57. return dirs
  58. else:
  59. return _sorted_value(dirs)
  60. def _sorted_value(dirs):
  61. """Iterate dirs, sorted by their relative value when deleted"""
  62. def poles(items):
  63. """Yield (items[0], items[1]), (items[1], items[2]), ... (items[n-1], items[n])"""
  64. rest = iter(items)
  65. last = rest.next()
  66. for next in rest:
  67. yield (last, next)
  68. last = next
  69. def all_but_last(items):
  70. """Yield items[0], ..., items[n-1]"""
  71. rest = iter(items)
  72. last = rest.next()
  73. for x in rest:
  74. yield last
  75. last = x
  76. # Remaining candidates for yield,
  77. # except the "max" one (latest)
  78. candidates = dict(
  79. all_but_last((x, xf)
  80. for xf, x
  81. in sorted((timef(y), y) for y in dirs)
  82. if xf))
  83. # Keep going as long as there is anything to remove
  84. while len(candidates) > 1:
  85. # Get candidates ordered by timestamp (as v is monitonic in timestamp)
  86. remain = sorted((v,k) for k,v in candidates.iteritems())
  87. # Find the "amount of information we loose by deleting the
  88. # latest of the pair"
  89. diffs = list((to_tf - frm_tf, frm, to)
  90. for ((frm_tf, frm), (to_tf, to))
  91. in poles(remain))
  92. # Select the least important one
  93. mdiff, mfrm, mto = min(diffs)
  94. del candidates[mto] # That's not a candidate any longer, it's gonna go
  95. yield mto
  96. # Auch, we must delete the last entry
  97. yield candidates.iterkeys().next()
  98. def freespace(path):
  99. st = os.statvfs(path)
  100. return st[statvfs.F_BFREE] * st[statvfs.F_FRSIZE]
  101. class Operations:
  102. def __init__(self, path, trace = None):
  103. self.tracef = trace
  104. self.path = path
  105. def check_call(self, args):
  106. cmd_str = " ".join(args)
  107. self.trace(cmd_str)
  108. import subprocess
  109. p = subprocess.Popen(
  110. args,
  111. stdout=subprocess.PIPE)
  112. stdout = p.communicate()[0]
  113. self.trace(stdout)
  114. if p.returncode != 0:
  115. raise Exception("failed %s" % cmd_str)
  116. return p.returncode
  117. def unsnap(self, dir):
  118. import subprocess
  119. args = ["btrfs", "subvolume", "delete",
  120. os.path.join(self.path, dir)]
  121. self.check_call(args)
  122. def freespace(self):
  123. return freespace(self.path)
  124. def listdir(self):
  125. return [d for d in os.listdir(self.path)
  126. if timef(d)]
  127. def snap(self, path):
  128. import subprocess
  129. args = ["btrfs", "subvolume", "snapshot",
  130. path,
  131. os.path.join(self.path, self.datestamp())]
  132. self.check_call(args)
  133. def datestamp(self):
  134. return time.strftime(DATE_FORMAT, time.gmtime(None))
  135. def trace(self, *args, **kwargs):
  136. f = self.tracef
  137. if f:
  138. f(*args, **kwargs)
  139. class FakeOperations(Operations):
  140. def __init__(self,
  141. path,
  142. trace = None,
  143. dirs = None,
  144. space = None,
  145. snap_space = None):
  146. Operations.__init__(self, path = path, trace = trace)
  147. if dirs is None:
  148. dirs = {}
  149. if space is None:
  150. space = 0
  151. self.dirs = dirs
  152. self.space = space
  153. if snap_space is None:
  154. snap_space = 1
  155. self.snap_space = snap_space
  156. def snap(self, path):
  157. self.trace("snap(%s)", path)
  158. self.dirs[self.datestamp()] = self.snap_space
  159. def unsnap(self, dir):
  160. self.trace("unsnap(%s)", dir)
  161. v = self.dirs[dir]
  162. self.space += v
  163. del self.dirs[dir]
  164. def listdir(self):
  165. self.trace("listdir() = %s", self.dirs.keys())
  166. return self.dirs.iterkeys()
  167. def freespace(self):
  168. self.trace("freespace() = %s", self.space)
  169. return self.space
  170. def cleandir(operations, targets):
  171. """Perform actual cleanup using 'operations' until 'targets' are met"""
  172. trace = operations.trace
  173. keep_backups = targets.keep_backups
  174. target_fsp = targets.target_freespace
  175. target_backups = targets.target_backups
  176. was_above_target_freespace = None
  177. was_above_target_backups = None
  178. last_dirs = []
  179. def first(it):
  180. for x in it:
  181. return x
  182. while True:
  183. do_del = None
  184. dirs = sorted(operations.listdir())
  185. dirs_len = len(dirs)
  186. if dirs_len <= 0:
  187. raise Exception("No more directories to clean")
  188. break
  189. elif sorted(dirs) == last_dirs:
  190. raise Exception("No directories removed")
  191. break
  192. else:
  193. last_dirs = dirs
  194. if keep_backups is not None:
  195. if dirs_len <= keep_backups:
  196. print "Reached number of backups to keep: ", dirs_len
  197. break
  198. if target_fsp is not None:
  199. fsp = operations.freespace()
  200. #print "+++ ", fsp, target_fsp, fsp >= target_fsp
  201. if fsp >= target_fsp:
  202. if (was_above_target_freespace
  203. or was_above_target_freespace is None):
  204. trace("Satisfied freespace target: %s with %s",
  205. fsp, target_fsp)
  206. was_above_target_freespace = False
  207. if do_del is None:
  208. do_del = False
  209. else:
  210. if was_above_target_freespace is None:
  211. was_above_target_freespace = True
  212. do_del = True
  213. if target_backups is not None:
  214. if dirs_len <= target_backups:
  215. if (was_above_target_backups or
  216. was_above_target_backups is None):
  217. trace("Satisfied target number of backups: %s with %s",
  218. target_backups, dirs_len)
  219. was_above_target_backups = False
  220. if do_del is None:
  221. do_del = False
  222. else:
  223. if was_above_target_backups is None:
  224. was_above_target_backups = True
  225. do_del = True
  226. if not do_del:
  227. break
  228. next_del = first(sorted_value(dirs))
  229. if next_del is None:
  230. trace("No more backups left")
  231. break
  232. else:
  233. operations.unsnap(next_del)
  234. def default_trace(fmt, *args, **kwargs):
  235. if args is not None:
  236. print fmt % args
  237. elif kwargs is not None:
  238. print fmt % kwargs
  239. else:
  240. print fmt
  241. def main(argv):
  242. def args():
  243. import argparse
  244. class Space(int):
  245. @staticmethod
  246. def parse_target_freespace(target_str):
  247. import re
  248. mods = {
  249. None: 0,
  250. 'K': 1,
  251. 'M': 2,
  252. 'G': 3 }
  253. form = "([0-9]+)(%s)?" % \
  254. "|".join(x for x in mods.iterkeys() if x is not None)
  255. m = re.match(form, target_str, re.IGNORECASE)
  256. if m:
  257. val, mod = m.groups()
  258. return int(val) * 1024**mods[mod]
  259. else:
  260. raise "Invalid value: %s, expected: %s" % (target_str, form)
  261. def __nonzero__(self):
  262. return True
  263. def __init__(self, value):
  264. self.origin = value
  265. def __new__(cls, value=0):
  266. if isinstance(value, (str, unicode)):
  267. value = Space.parse_target_freespace(value)
  268. return super(Space, cls).__new__(cls, value)
  269. def __str__(self):
  270. if isinstance(self.origin, int):
  271. return str(self.origin)
  272. else:
  273. return "%s[%s]" % (self.origin, int(self))
  274. parser = argparse.ArgumentParser(
  275. description = 'keeps btrfs snapshots for backup',
  276. #formatter_class = argparse.ArgumentDefaultsHelpFormatter
  277. )
  278. parser.add_argument('--path', '-p', metavar = 'PATH',
  279. help = 'Path for snapshots and cleanup',
  280. default = '.')
  281. target_group = parser.add_argument_group(
  282. title='Cleanup',
  283. description='Try to cleanup until all of the targets are met.')
  284. target_group.add_argument('--target-freespace', '-F',
  285. dest='target_freespace',
  286. metavar = 'SIZE',
  287. default = None,
  288. type = Space,
  289. help = '''Cleanup PATH until at least SIZE is free.
  290. SIZE is #bytes, or given with K, M, G or T respectively for kilo, ...''')
  291. target_group.add_argument('--target-backups', '-B',
  292. dest='target_backups',
  293. metavar = '#', type = int,
  294. help =
  295. 'Cleanup PATH until at most B backups remain')
  296. target_group.add_argument('--keep-backups', '-K',
  297. metavar = '#', type = int,
  298. default = DEFAULT_KEEP_BACKUPS,
  299. help = 'Stop cleaning when K backups remain')
  300. snap_group_x = parser.add_argument_group(
  301. title = 'Snapshotting')
  302. snap_group = parser.add_mutually_exclusive_group(required=False)
  303. snap_group.add_argument('--snap', '-s',
  304. metavar = 'SUBVOL', default = '/',
  305. help = 'Take snapshot of SUBVOL on invocation')
  306. snap_group.add_argument('--no-snap', '-S',
  307. dest = 'snap',
  308. help = 'Disable snapshot taking',
  309. action = 'store_const', const = None)
  310. parser.add_argument('--test', help = 'Execute built-in test',
  311. action='store_true')
  312. parser.add_argument('--explain',
  313. help = 'Explain what %(prog)s does (and stop)',
  314. action='store_true')
  315. parser.add_argument('--verbose', help = 'Verbose output',
  316. action='store_true')
  317. pa = parser.parse_args(argv[1:])
  318. return pa, parser
  319. pa, parser = args()
  320. if pa.verbose:
  321. trace = default_trace
  322. else:
  323. trace = None
  324. if pa.explain:
  325. sys.stdout.write(__doc__)
  326. return 0
  327. if pa.target_freespace is None and pa.target_backups is None:
  328. parser.error("Set a target, either with: \n"
  329. " --target-freespace, or\n"
  330. " --target-backups")
  331. if pa.test:
  332. operations = FakeOperations(path = pa.path,
  333. trace = trace,
  334. dirs = {
  335. '20101201-000000': 0,
  336. '20101201-010000': 1,
  337. '20101201-020000': 2,
  338. '20101201-030000': 3,
  339. '20101201-040000': 4,
  340. '20101201-050000': 5,
  341. '20101201-060000': 6,
  342. '20101201-070000': 7,
  343. '20101201-080000': 8,
  344. },
  345. space = 5)
  346. else:
  347. operations = Operations(path = pa.path, trace = trace)
  348. if pa.snap:
  349. operations.snap(path = pa.snap)
  350. cleandir(operations = operations, targets = pa)
  351. if "__main__" == __name__:
  352. sys.exit(main(sys.argv))