classfile.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. ## Copyright (C) 2004, 2005 Free Software Foundation
  2. ## Written by Gary Benson <gbenson@redhat.com>
  3. ##
  4. ## This program is free software; you can redistribute it and/or modify
  5. ## it under the terms of the GNU General Public License as published by
  6. ## the Free Software Foundation; either version 2 of the License, or
  7. ## (at your option) any later version.
  8. ##
  9. ## This program is distributed in the hope that it will be useful,
  10. ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. ## GNU General Public License for more details.
  13. """Read Java(TM) class files."""
  14. import cStringIO as StringIO
  15. import struct
  16. class Class:
  17. def __init__(self, arg):
  18. if hasattr(arg, "read"):
  19. self.fp = arg
  20. elif type(arg) == type(""):
  21. if arg.startswith("\xca\xfe\xba\xbe"):
  22. self.fp = StringIO.StringIO(arg)
  23. else:
  24. self.fp = open(arg, "r")
  25. else:
  26. raise TypeError, type(arg)
  27. magic = self._read_int()
  28. assert magic == 0xcafebabeL
  29. minor, major = self._read(">HH")
  30. self.version = (major, minor)
  31. self.pool_integrity_checks = None
  32. try:
  33. assert False
  34. except AssertionError:
  35. self.pool_integrity_checks = []
  36. self._read_constants_pool()
  37. self.access_flags = self._read_short()
  38. self.name = self._read_reference_Class()
  39. self.super = self._read_reference_Class()
  40. self.interfaces = self._read_interfaces()
  41. self.fields = self._read_fieldsormethods()
  42. self.methods = self._read_fieldsormethods()
  43. self.attributes = self._read_attributes()
  44. if self.pool_integrity_checks is not None:
  45. for index, tag in self.pool_integrity_checks:
  46. assert self.constants[index][0] == tag
  47. del self.fp, self.pool_integrity_checks
  48. def __repr__(self):
  49. result = []
  50. attrs = [attr for attr in dir(self)
  51. if not attr.startswith("_") and attr != "Member"]
  52. attrs.sort()
  53. for attr in attrs:
  54. result.append("%-13s %s" % (
  55. attr + ":", attr == "constants" and
  56. "<ELIDED>" or repr(getattr(self, attr))))
  57. return "\n".join(result)
  58. def _read_constants_pool(self):
  59. self.constants = {}
  60. skip = False
  61. for i in xrange(1, self._read_short()):
  62. if skip:
  63. skip = False
  64. continue
  65. tag = {
  66. 1: "Utf8", 3: "Integer", 4: "Float", 5: "Long",
  67. 6: "Double", 7: "Class", 8: "String", 9: "Fieldref",
  68. 10: "Methodref", 11: "InterfaceMethodref",
  69. 12: "NameAndType"}[self._read_byte()]
  70. skip = tag in ("Long", "Double") # crack crack crack!
  71. self.constants[i] = (tag, getattr(self, "_read_constant_" + tag)())
  72. def _read_interfaces(self):
  73. result = []
  74. for i in xrange(self._read_short()):
  75. result.append(self._read_reference_Class())
  76. return result
  77. def _read_fieldsormethods(self):
  78. result = []
  79. for i in xrange(self._read_short()):
  80. result.append(self.Member(self))
  81. return result
  82. class Member:
  83. def __init__(self, source):
  84. self.access_flags = source._read_short()
  85. self.name = source._read_reference_Utf8()
  86. self.descriptor = source._read_reference_Utf8()
  87. self.attributes = source._read_attributes()
  88. def __repr__(self):
  89. result = []
  90. attrs = [attr for attr in dir(self) if not attr.startswith("_")]
  91. attrs.sort()
  92. for attr in attrs:
  93. value = getattr(self, attr)
  94. if attr == "attributes" and value.has_key("Code"):
  95. value = value.copy()
  96. value.update({"Code": "<ELIDED>"})
  97. result.append("%-13s %s" % (
  98. attr + ":", repr(value).replace(
  99. "'Code': '<ELIDED>'", "'Code': <ELIDED>")))
  100. return ("\n%s" % (15 * " ")).join(result)
  101. def _read_attributes(self):
  102. result = {}
  103. for i in xrange(self._read_short()):
  104. name = self._read_reference_Utf8()
  105. data = self.fp.read(self._read_int())
  106. assert not result.has_key(name)
  107. result[name] = data
  108. return result
  109. # Constants pool reference reader convenience functions
  110. def _read_reference_Utf8(self):
  111. return self._read_references("Utf8")[0]
  112. def _read_reference_Class(self):
  113. return self._read_references("Class")[0]
  114. def _read_reference_Class_NameAndType(self):
  115. return self._read_references("Class", "NameAndType")
  116. def _read_references(self, *args):
  117. result = []
  118. for arg in args:
  119. index = self._read_short()
  120. if self.pool_integrity_checks is not None:
  121. self.pool_integrity_checks.append((index, arg))
  122. result.append(index)
  123. return result
  124. # Constants pool constant reader functions
  125. def _read_constant_Utf8(self):
  126. constant = self.fp.read(self._read_short())
  127. try:
  128. constant = constant.decode("utf-8")
  129. except UnicodeError:
  130. constant = _bork_utf8_decode(constant)
  131. try:
  132. constant = constant.encode("us-ascii")
  133. except UnicodeError:
  134. pass
  135. return constant
  136. def _read_constant_Integer(self):
  137. return self._read_int()
  138. def _read_constant_Float(self):
  139. return self._read(">f")[0]
  140. def _read_constant_Long(self):
  141. return self._read(">q")[0]
  142. def _read_constant_Double(self):
  143. return self._read(">d")[0]
  144. _read_constant_Class = _read_reference_Utf8
  145. _read_constant_String = _read_reference_Utf8
  146. _read_constant_Fieldref = _read_reference_Class_NameAndType
  147. _read_constant_Methodref = _read_reference_Class_NameAndType
  148. _read_constant_InterfaceMethodref = _read_reference_Class_NameAndType
  149. def _read_constant_NameAndType(self):
  150. return self._read_reference_Utf8(), self._read_reference_Utf8()
  151. # Generic reader functions
  152. def _read_int(self):
  153. # XXX how else to read 32 bits on a 64-bit box?
  154. h, l = map(long, self._read(">HH"))
  155. return (h << 16) + l
  156. def _read_short(self):
  157. return self._read(">H")[0]
  158. def _read_byte(self):
  159. return self._read("B")[0]
  160. def _read(self, fmt):
  161. return struct.unpack(fmt, self.fp.read(struct.calcsize(fmt)))
  162. def _bork_utf8_decode(data):
  163. # more crack!
  164. bytes, unicode = map(ord, data), ""
  165. while bytes:
  166. b1 = bytes.pop(0)
  167. if b1 & 0x80:
  168. assert b1 & 0x40
  169. b2 = bytes.pop(0)
  170. assert b2 & 0xC0 == 0x80
  171. if b1 & 0x20:
  172. assert not b1 & 0x10
  173. b3 = bytes.pop(0)
  174. assert b3 & 0xC0 == 0x80
  175. unicode += unichr(
  176. ((b1 & 0x0f) << 12) + ((b2 & 0x3f) << 6) + (b3 & 0x3f))
  177. else:
  178. unicode += unichr(((b1 & 0x1f) << 6) + (b2 & 0x3f))
  179. else:
  180. unicode += unichr(b1)
  181. return unicode
  182. if __name__ == "__main__":
  183. print Class("/usr/share/katana/build/ListDependentClasses.class")