storage.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. import pathlib
  2. import os
  3. import lmdb
  4. import hashlib
  5. import pickle
  6. import itertools
  7. class InvalidPath(Exception):
  8. ...
  9. class Storage:
  10. def __init__(self, base, db_path, properties):
  11. # base and db_path must be strings representing paths
  12. # properties must be an iterable
  13. # and user(s) of this class
  14. # will be bound to just those.
  15. if os.path.isdir(base):
  16. self.base = os.path.abspath(base)
  17. else:
  18. raise OSError(base + " must exist and be a directory")
  19. self.properties = dict()
  20. self.env = lmdb.open(db_path, max_dbs=len(properties))
  21. for pr in properties:
  22. self.properties[pr] = self.env.open_db(pr.encode())
  23. self.total_size = 0
  24. getsize = os.path.getsize
  25. join = os.path.join
  26. db_path = os.path.abspath(db_path)
  27. for root, d, files in os.walk(base):
  28. for name in files:
  29. addr = os.path.join(root, name)
  30. if os.path.isfile(addr):
  31. if root != db_path: # to exclude DB
  32. self.total_size += self.size(addr)
  33. def _safe(self, path, dir_allow=False):
  34. base = pathlib.Path(self.base)
  35. path = str((base / path).resolve())
  36. valid = path.startswith(self.base)
  37. if not valid:
  38. raise InvalidPath
  39. if os.path.exists(path):
  40. if os.path.isfile(path) or dir_allow:
  41. return path
  42. else:
  43. raise InvalidPath
  44. else:
  45. return path
  46. def ls(self, path="", reverse=False):
  47. path = self._safe(path, True)
  48. if not os.path.isdir(path):
  49. raise ValueError("path must be a directory")
  50. prepend_path = lambda f: os.path.join(path, f)
  51. deprepend_path = lambda f: os.path.relpath(f, start=path)
  52. #TODO: ^ better solution
  53. file_or_dir = lambda f: os.path.isfile(f) or os.path.isdir(f)
  54. all_ = filter(file_or_dir, map(prepend_path, os.listdir(path)))
  55. all_ = sorted(map(deprepend_path, all_), reverse=reverse)
  56. return all_
  57. def get(self, path):
  58. path = self._safe(path)
  59. return open(path, "rb")
  60. def add(self, path, file, properties = {}):
  61. path = self._safe(path)
  62. if self.exists(path):
  63. self.delete(path)
  64. with open(path, "wb") as o_fp:
  65. if isinstance(file, bytes):
  66. self.total_size += o_fp.write(file)
  67. else:
  68. buf = file.read(64 * 1024)
  69. while buf:
  70. self.total_size += o_fp.write(buf)
  71. buf = file.read(64 * 1024)
  72. for key, value in properties.items():
  73. if key not in self.properties:
  74. raise ValueError("Invalid property")
  75. self.set_property(path, key, value)
  76. def isdir(self, path):
  77. path = self._safe(path, True)
  78. return os.path.isdir(path)
  79. def hashsum(self, path):
  80. path = self._safe(path)
  81. result = hashlib.sha1()
  82. with open(path, "rb") as fp:
  83. while True:
  84. buf = fp.read(64 * 1024)
  85. if buf:
  86. result.update(buf)
  87. else:
  88. break
  89. return result.hexdigest()
  90. def delete(self, path):
  91. path = self._safe(path)
  92. self.total_size -= self.size(path)
  93. os.remove(path)
  94. for db in self.properties.values():
  95. with self.env.begin(write = True, db = db) as txn:
  96. txn.delete(path.encode())
  97. def exists(self, path):
  98. path = self._safe(path)
  99. return os.path.exists(path)
  100. def size(self, path):
  101. path = self._safe(path)
  102. return os.path.getsize(path)
  103. def move(self, path, new):
  104. path = self._safe(path)
  105. new = self._safe(new)
  106. os.rename(path, new)
  107. for pr, db in self.properties.items():
  108. value = self.get_property(path, pr)
  109. self.set_property(new, pr, value)
  110. with self.env.begin(db=db, write=True) as txn:
  111. txn.delete(path.encode())
  112. def get_property(self, path, property_):
  113. if property_ not in self.properties:
  114. raise ValueError("Invalid property")
  115. path = self._safe(path, True)
  116. db = self.properties.get(property_)
  117. with self.env.begin(db = db) as txn:
  118. value = txn.get(path.encode())
  119. return pickle.loads(value) if value else value
  120. def set_property(self, path, property_, value):
  121. if property_ not in self.properties:
  122. raise ValueError("Invalid property")
  123. path = self._safe(path, True)
  124. db = self.properties.get(property_)
  125. value = pickle.dumps(value)
  126. with self.env.begin(db = db, write = True) as txn:
  127. if txn.get(path.encode()):
  128. txn.replace(path.encode(), value)
  129. else:
  130. txn.put(path.encode(), value)