BigfilePiecefield.py 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import array
  2. def packPiecefield(data):
  3. if not isinstance(data, bytes) and not isinstance(data, bytearray):
  4. raise Exception("Invalid data type: %s" % type(data))
  5. res = []
  6. if not data:
  7. return array.array("H", b"")
  8. if data[0] == b"\x00":
  9. res.append(0)
  10. find = b"\x01"
  11. else:
  12. find = b"\x00"
  13. last_pos = 0
  14. pos = 0
  15. while 1:
  16. pos = data.find(find, pos)
  17. if find == b"\x00":
  18. find = b"\x01"
  19. else:
  20. find = b"\x00"
  21. if pos == -1:
  22. res.append(len(data) - last_pos)
  23. break
  24. res.append(pos - last_pos)
  25. last_pos = pos
  26. return array.array("H", res)
  27. def unpackPiecefield(data):
  28. if not data:
  29. return b""
  30. res = []
  31. char = b"\x01"
  32. for times in data:
  33. if times > 10000:
  34. return b""
  35. res.append(char * times)
  36. if char == b"\x01":
  37. char = b"\x00"
  38. else:
  39. char = b"\x01"
  40. return b"".join(res)
  41. def spliceBit(data, idx, bit):
  42. if bit != b"\x00" and bit != b"\x01":
  43. raise Exception("Invalid bit: %s" % bit)
  44. if len(data) < idx:
  45. data = data.ljust(idx + 1, b"\x00")
  46. return data[:idx] + bit + data[idx+ 1:]
  47. class Piecefield(object):
  48. def tostring(self):
  49. return "".join(["1" if b else "0" for b in self.tobytes()])
  50. class BigfilePiecefield(Piecefield):
  51. __slots__ = ["data"]
  52. def __init__(self):
  53. self.data = b""
  54. def frombytes(self, s):
  55. if not isinstance(s, bytes) and not isinstance(s, bytearray):
  56. raise Exception("Invalid type: %s" % type(s))
  57. self.data = s
  58. def tobytes(self):
  59. return self.data
  60. def pack(self):
  61. return packPiecefield(self.data).tobytes()
  62. def unpack(self, s):
  63. self.data = unpackPiecefield(array.array("H", s))
  64. def __getitem__(self, key):
  65. try:
  66. return self.data[key]
  67. except IndexError:
  68. return False
  69. def __setitem__(self, key, value):
  70. self.data = spliceBit(self.data, key, value)
  71. class BigfilePiecefieldPacked(Piecefield):
  72. __slots__ = ["data"]
  73. def __init__(self):
  74. self.data = b""
  75. def frombytes(self, data):
  76. if not isinstance(data, bytes) and not isinstance(data, bytearray):
  77. raise Exception("Invalid type: %s" % type(data))
  78. self.data = packPiecefield(data).tobytes()
  79. def tobytes(self):
  80. return unpackPiecefield(array.array("H", self.data))
  81. def pack(self):
  82. return array.array("H", self.data).tobytes()
  83. def unpack(self, data):
  84. self.data = data
  85. def __getitem__(self, key):
  86. try:
  87. return self.tobytes()[key]
  88. except IndexError:
  89. return False
  90. def __setitem__(self, key, value):
  91. data = spliceBit(self.tobytes(), key, value)
  92. self.frombytes(data)
  93. if __name__ == "__main__":
  94. import os
  95. import psutil
  96. import time
  97. testdata = b"\x01" * 100 + b"\x00" * 900 + b"\x01" * 4000 + b"\x00" * 4999 + b"\x01"
  98. meminfo = psutil.Process(os.getpid()).memory_info
  99. for storage in [BigfilePiecefieldPacked, BigfilePiecefield]:
  100. print("-- Testing storage: %s --" % storage)
  101. m = meminfo()[0]
  102. s = time.time()
  103. piecefields = {}
  104. for i in range(10000):
  105. piecefield = storage()
  106. piecefield.frombytes(testdata[:i] + b"\x00" + testdata[i + 1:])
  107. piecefields[i] = piecefield
  108. print("Create x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
  109. m = meminfo()[0]
  110. s = time.time()
  111. for piecefield in list(piecefields.values()):
  112. val = piecefield[1000]
  113. print("Query one x10000: +%sKB in %.3fs" % ((meminfo()[0] - m) / 1024, time.time() - s))
  114. m = meminfo()[0]
  115. s = time.time()
  116. for piecefield in list(piecefields.values()):
  117. piecefield[1000] = b"\x01"
  118. print("Change one x10000: +%sKB in %.3fs" % ((meminfo()[0] - m) / 1024, time.time() - s))
  119. m = meminfo()[0]
  120. s = time.time()
  121. for piecefield in list(piecefields.values()):
  122. packed = piecefield.pack()
  123. print("Pack x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(packed)))
  124. m = meminfo()[0]
  125. s = time.time()
  126. for piecefield in list(piecefields.values()):
  127. piecefield.unpack(packed)
  128. print("Unpack x10000: +%sKB in %.3fs (len: %s)" % ((meminfo()[0] - m) / 1024, time.time() - s, len(piecefields[0].data)))
  129. piecefields = {}