_marshal.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692
  1. """Internal Python object serialization
  2. This module contains functions that can read and write Python values in a binary format. The format is specific to Python, but independent of machine architecture issues (e.g., you can write a Python value to a file on a PC, transport the file to a Sun, and read it back there). Details of the format may change between Python versions.
  3. """
  4. # NOTE: This module is used in the Python3 interpreter, but also by
  5. # the "sandboxed" process. It must work for Python2 as well.
  6. import types
  7. try:
  8. intern
  9. except NameError:
  10. from sys import intern
  11. try: from __pypy__ import builtinify
  12. except ImportError: builtinify = lambda f: f
  13. TYPE_NULL = '0'
  14. TYPE_NONE = 'N'
  15. TYPE_FALSE = 'F'
  16. TYPE_TRUE = 'T'
  17. TYPE_STOPITER = 'S'
  18. TYPE_ELLIPSIS = '.'
  19. TYPE_INT = 'i'
  20. TYPE_INT64 = 'I'
  21. TYPE_FLOAT = 'f'
  22. TYPE_COMPLEX = 'x'
  23. TYPE_LONG = 'l'
  24. TYPE_STRING = 's'
  25. TYPE_INTERNED = 't'
  26. TYPE_STRINGREF= 'R'
  27. TYPE_TUPLE = '('
  28. TYPE_LIST = '['
  29. TYPE_DICT = '{'
  30. TYPE_CODE = 'c'
  31. TYPE_UNICODE = 'u'
  32. TYPE_UNKNOWN = '?'
  33. TYPE_SET = '<'
  34. TYPE_FROZENSET= '>'
  35. class _Marshaller:
  36. dispatch = {}
  37. def __init__(self, writefunc):
  38. self._write = writefunc
  39. def dump(self, x):
  40. try:
  41. self.dispatch[type(x)](self, x)
  42. except KeyError:
  43. for tp in type(x).mro():
  44. func = self.dispatch.get(tp)
  45. if func:
  46. break
  47. else:
  48. raise ValueError("unmarshallable object")
  49. func(self, x)
  50. def w_long64(self, x):
  51. self.w_long(x)
  52. self.w_long(x>>32)
  53. def w_long(self, x):
  54. a = chr(x & 0xff)
  55. x >>= 8
  56. b = chr(x & 0xff)
  57. x >>= 8
  58. c = chr(x & 0xff)
  59. x >>= 8
  60. d = chr(x & 0xff)
  61. self._write(a + b + c + d)
  62. def w_short(self, x):
  63. self._write(chr((x) & 0xff))
  64. self._write(chr((x>> 8) & 0xff))
  65. def dump_none(self, x):
  66. self._write(TYPE_NONE)
  67. dispatch[type(None)] = dump_none
  68. def dump_bool(self, x):
  69. if x:
  70. self._write(TYPE_TRUE)
  71. else:
  72. self._write(TYPE_FALSE)
  73. dispatch[bool] = dump_bool
  74. def dump_stopiter(self, x):
  75. if x is not StopIteration:
  76. raise ValueError("unmarshallable object")
  77. self._write(TYPE_STOPITER)
  78. dispatch[type(StopIteration)] = dump_stopiter
  79. def dump_ellipsis(self, x):
  80. self._write(TYPE_ELLIPSIS)
  81. try:
  82. dispatch[type(Ellipsis)] = dump_ellipsis
  83. except NameError:
  84. pass
  85. # In Python3, this function is not used; see dump_long() below.
  86. def dump_int(self, x):
  87. y = x>>31
  88. if y and y != -1:
  89. self._write(TYPE_INT64)
  90. self.w_long64(x)
  91. else:
  92. self._write(TYPE_INT)
  93. self.w_long(x)
  94. dispatch[int] = dump_int
  95. def dump_long(self, x):
  96. self._write(TYPE_LONG)
  97. sign = 1
  98. if x < 0:
  99. sign = -1
  100. x = -x
  101. digits = []
  102. while x:
  103. digits.append(x & 0x7FFF)
  104. x = x>>15
  105. self.w_long(len(digits) * sign)
  106. for d in digits:
  107. self.w_short(d)
  108. try:
  109. long
  110. except NameError:
  111. dispatch[int] = dump_long
  112. else:
  113. dispatch[long] = dump_long
  114. def dump_float(self, x):
  115. write = self._write
  116. write(TYPE_FLOAT)
  117. s = repr(x)
  118. write(chr(len(s)))
  119. write(s)
  120. dispatch[float] = dump_float
  121. def dump_complex(self, x):
  122. write = self._write
  123. write(TYPE_COMPLEX)
  124. s = repr(x.real)
  125. write(chr(len(s)))
  126. write(s)
  127. s = repr(x.imag)
  128. write(chr(len(s)))
  129. write(s)
  130. try:
  131. dispatch[complex] = dump_complex
  132. except NameError:
  133. pass
  134. def dump_string(self, x):
  135. # XXX we can't check for interned strings, yet,
  136. # so we (for now) never create TYPE_INTERNED or TYPE_STRINGREF
  137. self._write(TYPE_STRING)
  138. self.w_long(len(x))
  139. self._write(x)
  140. dispatch[bytes] = dump_string
  141. def dump_unicode(self, x):
  142. self._write(TYPE_UNICODE)
  143. s = x.encode('utf8')
  144. self.w_long(len(s))
  145. self._write(s)
  146. try:
  147. unicode
  148. except NameError:
  149. dispatch[str] = dump_unicode
  150. else:
  151. dispatch[unicode] = dump_unicode
  152. def dump_tuple(self, x):
  153. self._write(TYPE_TUPLE)
  154. self.w_long(len(x))
  155. for item in x:
  156. self.dump(item)
  157. dispatch[tuple] = dump_tuple
  158. def dump_list(self, x):
  159. self._write(TYPE_LIST)
  160. self.w_long(len(x))
  161. for item in x:
  162. self.dump(item)
  163. dispatch[list] = dump_list
  164. def dump_dict(self, x):
  165. self._write(TYPE_DICT)
  166. for key, value in x.items():
  167. self.dump(key)
  168. self.dump(value)
  169. self._write(TYPE_NULL)
  170. dispatch[dict] = dump_dict
  171. def dump_code(self, x):
  172. self._write(TYPE_CODE)
  173. self.w_long(x.co_argcount)
  174. self.w_long(x.co_nlocals)
  175. self.w_long(x.co_stacksize)
  176. self.w_long(x.co_flags)
  177. self.dump(x.co_code)
  178. self.dump(x.co_consts)
  179. self.dump(x.co_names)
  180. self.dump(x.co_varnames)
  181. self.dump(x.co_freevars)
  182. self.dump(x.co_cellvars)
  183. self.dump(x.co_filename)
  184. self.dump(x.co_name)
  185. self.w_long(x.co_firstlineno)
  186. self.dump(x.co_lnotab)
  187. try:
  188. dispatch[types.CodeType] = dump_code
  189. except NameError:
  190. pass
  191. def dump_set(self, x):
  192. self._write(TYPE_SET)
  193. self.w_long(len(x))
  194. for each in x:
  195. self.dump(each)
  196. try:
  197. dispatch[set] = dump_set
  198. except NameError:
  199. pass
  200. def dump_frozenset(self, x):
  201. self._write(TYPE_FROZENSET)
  202. self.w_long(len(x))
  203. for each in x:
  204. self.dump(each)
  205. try:
  206. dispatch[frozenset] = dump_frozenset
  207. except NameError:
  208. pass
  209. class _NULL:
  210. pass
  211. class _StringBuffer:
  212. def __init__(self, value):
  213. self.bufstr = value
  214. self.bufpos = 0
  215. def read(self, n):
  216. pos = self.bufpos
  217. newpos = pos + n
  218. ret = self.bufstr[pos : newpos]
  219. self.bufpos = newpos
  220. return ret
  221. class _Unmarshaller:
  222. dispatch = {}
  223. def __init__(self, readfunc):
  224. self._read = readfunc
  225. self._stringtable = []
  226. def load(self):
  227. c = self._read(1)
  228. if not c:
  229. raise EOFError
  230. try:
  231. return self.dispatch[c](self)
  232. except KeyError:
  233. raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
  234. def r_short(self):
  235. lo = ord(self._read(1))
  236. hi = ord(self._read(1))
  237. x = lo | (hi<<8)
  238. if x & 0x8000:
  239. x = x - 0x10000
  240. return x
  241. def r_long(self):
  242. s = self._read(4)
  243. a = ord(s[0])
  244. b = ord(s[1])
  245. c = ord(s[2])
  246. d = ord(s[3])
  247. x = a | (b<<8) | (c<<16) | (d<<24)
  248. if d & 0x80 and x > 0:
  249. x = -((1<<32) - x)
  250. return int(x)
  251. else:
  252. return x
  253. def r_long64(self):
  254. a = ord(self._read(1))
  255. b = ord(self._read(1))
  256. c = ord(self._read(1))
  257. d = ord(self._read(1))
  258. e = ord(self._read(1))
  259. f = ord(self._read(1))
  260. g = ord(self._read(1))
  261. h = ord(self._read(1))
  262. x = a | (b<<8) | (c<<16) | (d<<24)
  263. x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
  264. if h & 0x80 and x > 0:
  265. x = -((1<<64) - x)
  266. return x
  267. def load_null(self):
  268. return _NULL
  269. dispatch[TYPE_NULL] = load_null
  270. def load_none(self):
  271. return None
  272. dispatch[TYPE_NONE] = load_none
  273. def load_true(self):
  274. return True
  275. dispatch[TYPE_TRUE] = load_true
  276. def load_false(self):
  277. return False
  278. dispatch[TYPE_FALSE] = load_false
  279. def load_stopiter(self):
  280. return StopIteration
  281. dispatch[TYPE_STOPITER] = load_stopiter
  282. def load_ellipsis(self):
  283. return Ellipsis
  284. dispatch[TYPE_ELLIPSIS] = load_ellipsis
  285. dispatch[TYPE_INT] = r_long
  286. dispatch[TYPE_INT64] = r_long64
  287. def load_long(self):
  288. size = self.r_long()
  289. sign = 1
  290. if size < 0:
  291. sign = -1
  292. size = -size
  293. x = 0
  294. for i in range(size):
  295. d = self.r_short()
  296. x = x | (d<<(i*15))
  297. return x * sign
  298. dispatch[TYPE_LONG] = load_long
  299. def load_float(self):
  300. n = ord(self._read(1))
  301. s = self._read(n)
  302. return float(s)
  303. dispatch[TYPE_FLOAT] = load_float
  304. def load_complex(self):
  305. n = ord(self._read(1))
  306. s = self._read(n)
  307. real = float(s)
  308. n = ord(self._read(1))
  309. s = self._read(n)
  310. imag = float(s)
  311. return complex(real, imag)
  312. dispatch[TYPE_COMPLEX] = load_complex
  313. def load_string(self):
  314. n = self.r_long()
  315. return self._read(n)
  316. dispatch[TYPE_STRING] = load_string
  317. def load_interned(self):
  318. n = self.r_long()
  319. ret = intern(self._read(n))
  320. self._stringtable.append(ret)
  321. return ret
  322. dispatch[TYPE_INTERNED] = load_interned
  323. def load_stringref(self):
  324. n = self.r_long()
  325. return self._stringtable[n]
  326. dispatch[TYPE_STRINGREF] = load_stringref
  327. def load_unicode(self):
  328. n = self.r_long()
  329. s = self._read(n)
  330. ret = s.decode('utf8')
  331. return ret
  332. dispatch[TYPE_UNICODE] = load_unicode
  333. def load_tuple(self):
  334. return tuple(self.load_list())
  335. dispatch[TYPE_TUPLE] = load_tuple
  336. def load_list(self):
  337. n = self.r_long()
  338. list = [self.load() for i in range(n)]
  339. return list
  340. dispatch[TYPE_LIST] = load_list
  341. def load_dict(self):
  342. d = {}
  343. while 1:
  344. key = self.load()
  345. if key is _NULL:
  346. break
  347. value = self.load()
  348. d[key] = value
  349. return d
  350. dispatch[TYPE_DICT] = load_dict
  351. def load_code(self):
  352. argcount = self.r_long()
  353. nlocals = self.r_long()
  354. stacksize = self.r_long()
  355. flags = self.r_long()
  356. code = self.load()
  357. consts = self.load()
  358. names = self.load()
  359. varnames = self.load()
  360. freevars = self.load()
  361. cellvars = self.load()
  362. filename = self.load()
  363. name = self.load()
  364. firstlineno = self.r_long()
  365. lnotab = self.load()
  366. return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
  367. names, varnames, filename, name, firstlineno,
  368. lnotab, freevars, cellvars)
  369. dispatch[TYPE_CODE] = load_code
  370. def load_set(self):
  371. n = self.r_long()
  372. args = [self.load() for i in range(n)]
  373. return set(args)
  374. dispatch[TYPE_SET] = load_set
  375. def load_frozenset(self):
  376. n = self.r_long()
  377. args = [self.load() for i in range(n)]
  378. return frozenset(args)
  379. dispatch[TYPE_FROZENSET] = load_frozenset
  380. # ________________________________________________________________
  381. def _read(self, n):
  382. pos = self.bufpos
  383. newpos = pos + n
  384. if newpos > len(self.bufstr): raise EOFError
  385. ret = self.bufstr[pos : newpos]
  386. self.bufpos = newpos
  387. return ret
  388. def _read1(self):
  389. ret = self.bufstr[self.bufpos]
  390. self.bufpos += 1
  391. return ret
  392. def _r_short(self):
  393. lo = ord(_read1(self))
  394. hi = ord(_read1(self))
  395. x = lo | (hi<<8)
  396. if x & 0x8000:
  397. x = x - 0x10000
  398. return x
  399. def _r_long(self):
  400. # inlined this most common case
  401. p = self.bufpos
  402. s = self.bufstr
  403. a = ord(s[p])
  404. b = ord(s[p+1])
  405. c = ord(s[p+2])
  406. d = ord(s[p+3])
  407. self.bufpos += 4
  408. x = a | (b<<8) | (c<<16) | (d<<24)
  409. if d & 0x80 and x > 0:
  410. x = -((1<<32) - x)
  411. return int(x)
  412. else:
  413. return x
  414. def _r_long64(self):
  415. a = ord(_read1(self))
  416. b = ord(_read1(self))
  417. c = ord(_read1(self))
  418. d = ord(_read1(self))
  419. e = ord(_read1(self))
  420. f = ord(_read1(self))
  421. g = ord(_read1(self))
  422. h = ord(_read1(self))
  423. x = a | (b<<8) | (c<<16) | (d<<24)
  424. x = x | (e<<32) | (f<<40) | (g<<48) | (h<<56)
  425. if h & 0x80 and x > 0:
  426. x = -((1<<64) - x)
  427. return x
  428. _load_dispatch = {}
  429. class _FastUnmarshaller:
  430. dispatch = {}
  431. def __init__(self, buffer):
  432. self.bufstr = buffer
  433. self.bufpos = 0
  434. self._stringtable = []
  435. def load(self):
  436. # make flow space happy
  437. c = '?'
  438. try:
  439. c = self.bufstr[self.bufpos]
  440. self.bufpos += 1
  441. return _load_dispatch[c](self)
  442. except KeyError:
  443. raise ValueError("bad marshal code: %c (%d)" % (c, ord(c)))
  444. except IndexError:
  445. raise EOFError
  446. def load_null(self):
  447. return _NULL
  448. dispatch[TYPE_NULL] = load_null
  449. def load_none(self):
  450. return None
  451. dispatch[TYPE_NONE] = load_none
  452. def load_true(self):
  453. return True
  454. dispatch[TYPE_TRUE] = load_true
  455. def load_false(self):
  456. return False
  457. dispatch[TYPE_FALSE] = load_false
  458. def load_stopiter(self):
  459. return StopIteration
  460. dispatch[TYPE_STOPITER] = load_stopiter
  461. def load_ellipsis(self):
  462. return Ellipsis
  463. dispatch[TYPE_ELLIPSIS] = load_ellipsis
  464. def load_int(self):
  465. return _r_long(self)
  466. dispatch[TYPE_INT] = load_int
  467. def load_int64(self):
  468. return _r_long64(self)
  469. dispatch[TYPE_INT64] = load_int64
  470. def load_long(self):
  471. size = _r_long(self)
  472. sign = 1
  473. if size < 0:
  474. sign = -1
  475. size = -size
  476. x = 0
  477. for i in range(size):
  478. d = _r_short(self)
  479. x = x | (d<<(i*15))
  480. return x * sign
  481. dispatch[TYPE_LONG] = load_long
  482. def load_float(self):
  483. n = ord(_read1(self))
  484. s = _read(self, n)
  485. return float(s)
  486. dispatch[TYPE_FLOAT] = load_float
  487. def load_complex(self):
  488. n = ord(_read1(self))
  489. s = _read(self, n)
  490. real = float(s)
  491. n = ord(_read1(self))
  492. s = _read(self, n)
  493. imag = float(s)
  494. return complex(real, imag)
  495. dispatch[TYPE_COMPLEX] = load_complex
  496. def load_string(self):
  497. n = _r_long(self)
  498. return _read(self, n)
  499. dispatch[TYPE_STRING] = load_string
  500. def load_interned(self):
  501. n = _r_long(self)
  502. ret = intern(_read(self, n))
  503. self._stringtable.append(ret)
  504. return ret
  505. dispatch[TYPE_INTERNED] = load_interned
  506. def load_stringref(self):
  507. n = _r_long(self)
  508. return self._stringtable[n]
  509. dispatch[TYPE_STRINGREF] = load_stringref
  510. def load_unicode(self):
  511. n = _r_long(self)
  512. s = _read(self, n)
  513. ret = s.decode('utf8')
  514. return ret
  515. dispatch[TYPE_UNICODE] = load_unicode
  516. def load_tuple(self):
  517. return tuple(self.load_list())
  518. dispatch[TYPE_TUPLE] = load_tuple
  519. def load_list(self):
  520. n = _r_long(self)
  521. list = []
  522. for i in range(n):
  523. list.append(self.load())
  524. return list
  525. dispatch[TYPE_LIST] = load_list
  526. def load_dict(self):
  527. d = {}
  528. while 1:
  529. key = self.load()
  530. if key is _NULL:
  531. break
  532. value = self.load()
  533. d[key] = value
  534. return d
  535. dispatch[TYPE_DICT] = load_dict
  536. def load_code(self):
  537. argcount = _r_long(self)
  538. nlocals = _r_long(self)
  539. stacksize = _r_long(self)
  540. flags = _r_long(self)
  541. code = self.load()
  542. consts = self.load()
  543. names = self.load()
  544. varnames = self.load()
  545. freevars = self.load()
  546. cellvars = self.load()
  547. filename = self.load()
  548. name = self.load()
  549. firstlineno = _r_long(self)
  550. lnotab = self.load()
  551. return types.CodeType(argcount, nlocals, stacksize, flags, code, consts,
  552. names, varnames, filename, name, firstlineno,
  553. lnotab, freevars, cellvars)
  554. dispatch[TYPE_CODE] = load_code
  555. def load_set(self):
  556. n = _r_long(self)
  557. args = [self.load() for i in range(n)]
  558. return set(args)
  559. dispatch[TYPE_SET] = load_set
  560. def load_frozenset(self):
  561. n = _r_long(self)
  562. args = [self.load() for i in range(n)]
  563. return frozenset(args)
  564. dispatch[TYPE_FROZENSET] = load_frozenset
  565. _load_dispatch = _FastUnmarshaller.dispatch
  566. # _________________________________________________________________
  567. #
  568. # user interface
  569. version = 1
  570. @builtinify
  571. def dump(x, f, version=version):
  572. # XXX 'version' is ignored, we always dump in a version-0-compatible format
  573. m = _Marshaller(f.write)
  574. m.dump(x)
  575. @builtinify
  576. def load(f):
  577. um = _Unmarshaller(f.read)
  578. return um.load()
  579. @builtinify
  580. def dumps(x, version=version):
  581. # XXX 'version' is ignored, we always dump in a version-0-compatible format
  582. buffer = []
  583. m = _Marshaller(buffer.append)
  584. m.dump(x)
  585. return ''.join(buffer)
  586. @builtinify
  587. def loads(s):
  588. um = _FastUnmarshaller(s)
  589. return um.load()