123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151 |
- import pathlib
- import os
- import lmdb
- import hashlib
- import pickle
- import itertools
- class InvalidPath(Exception):
- ...
- class Storage:
- def __init__(self, base, db_path, properties):
- # base and db_path must be strings representing paths
- # properties must be an iterable
- # and user(s) of this class
- # will be bound to just those.
- if os.path.isdir(base):
- self.base = os.path.abspath(base)
- else:
- raise OSError(base + " must exist and be a directory")
- self.properties = dict()
- self.env = lmdb.open(db_path, max_dbs=len(properties))
- for pr in properties:
- self.properties[pr] = self.env.open_db(pr.encode())
- self.total_size = 0
-
- getsize = os.path.getsize
- join = os.path.join
- db_path = os.path.abspath(db_path)
- for root, d, files in os.walk(base):
- for name in files:
- addr = os.path.join(root, name)
- if os.path.isfile(addr):
- if root != db_path: # to exclude DB
- self.total_size += self.size(addr)
-
- def _safe(self, path, dir_allow=False):
- base = pathlib.Path(self.base)
- path = str((base / path).resolve())
- valid = path.startswith(self.base)
- if not valid:
- raise InvalidPath
-
- if os.path.exists(path):
- if os.path.isfile(path) or dir_allow:
- return path
- else:
- raise InvalidPath
- else:
- return path
- def ls(self, path="", reverse=False):
- path = self._safe(path, True)
- if not os.path.isdir(path):
- raise ValueError("path must be a directory")
- prepend_path = lambda f: os.path.join(path, f)
- deprepend_path = lambda f: os.path.relpath(f, start=path)
- #TODO: ^ better solution
- file_or_dir = lambda f: os.path.isfile(f) or os.path.isdir(f)
- all_ = filter(file_or_dir, map(prepend_path, os.listdir(path)))
- all_ = sorted(map(deprepend_path, all_), reverse=reverse)
- return all_
- def get(self, path):
- path = self._safe(path)
- return open(path, "rb")
- def add(self, path, file, properties = {}):
- path = self._safe(path)
- if self.exists(path):
- self.delete(path)
- with open(path, "wb") as o_fp:
- if isinstance(file, bytes):
- self.total_size += o_fp.write(file)
- else:
- buf = file.read(64 * 1024)
- while buf:
- self.total_size += o_fp.write(buf)
- buf = file.read(64 * 1024)
- for key, value in properties.items():
- if key not in self.properties:
- raise ValueError("Invalid property")
- self.set_property(path, key, value)
- def isdir(self, path):
- path = self._safe(path, True)
- return os.path.isdir(path)
- def hashsum(self, path):
- path = self._safe(path)
- result = hashlib.sha1()
- with open(path, "rb") as fp:
- while True:
- buf = fp.read(64 * 1024)
- if buf:
- result.update(buf)
- else:
- break
- return result.hexdigest()
- def delete(self, path):
- path = self._safe(path)
- self.total_size -= self.size(path)
- os.remove(path)
-
- for db in self.properties.values():
- with self.env.begin(write = True, db = db) as txn:
- txn.delete(path.encode())
-
- def exists(self, path):
- path = self._safe(path)
- return os.path.exists(path)
- def size(self, path):
- path = self._safe(path)
- return os.path.getsize(path)
-
- def move(self, path, new):
- path = self._safe(path)
- new = self._safe(new)
- os.rename(path, new)
- for pr, db in self.properties.items():
- value = self.get_property(path, pr)
- self.set_property(new, pr, value)
- with self.env.begin(db=db, write=True) as txn:
- txn.delete(path.encode())
-
- def get_property(self, path, property_):
- if property_ not in self.properties:
- raise ValueError("Invalid property")
- path = self._safe(path, True)
- db = self.properties.get(property_)
- with self.env.begin(db = db) as txn:
- value = txn.get(path.encode())
- return pickle.loads(value) if value else value
- def set_property(self, path, property_, value):
- if property_ not in self.properties:
- raise ValueError("Invalid property")
- path = self._safe(path, True)
- db = self.properties.get(property_)
- value = pickle.dumps(value)
- with self.env.begin(db = db, write = True) as txn:
- if txn.get(path.encode()):
- txn.replace(path.encode(), value)
- else:
- txn.put(path.encode(), value)
|