atomprop.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. import dateutil.parser
  2. import logging
  3. try:
  4. from lxml import etree as ET
  5. logging.debug("running with lxml.etree")
  6. except ImportError as e:
  7. try:
  8. import elementtree.ElementTree as ET
  9. logging.debug("running with ElementTree")
  10. except ImportError as e:
  11. logging.error("Failed to import ElementTree from any known place")
  12. raise e
  13. NS = {
  14. 'atom': 'http://www.w3.org/2005/Atom',
  15. 'xhtml': 'http://www.w3.org/1999/xhtml',
  16. }
  17. def ns_tag(ns, tag):
  18. return '{{{!s}}}{!s}'.format(ns, tag)
  19. def atom_tag(tag):
  20. return ns_tag(NS['atom'], tag)
  21. def xhtml_tag(tag):
  22. return ns_tag(NS['xhtml'], tag)
  23. def element_factory(cls, el, **kwargs):
  24. if el is None:
  25. raise KeyError('Element not found')
  26. return cls(el, **kwargs)
  27. class AtomBaseElement():
  28. tree = None
  29. root = None
  30. def __init__(self, data, **kwargs):
  31. if len(kwargs) > 0:
  32. if not type(data) is str:
  33. raise TypeError('If you provide keyword arguments, the data parameter must be a tag name as a str')
  34. self.root = ET.Element(data)
  35. for key in kwargs:
  36. self.set_atom_property(key, kwargs[key])
  37. elif type(data) is ET._Element:
  38. self.root = data
  39. self.tree = self.root.getroottree()
  40. elif type(data) in (str, bytes) and data[:5] in ['<?xml', b'<?xml']:
  41. self.root = ET.fromstring(data.encode()) if type(data) is str else ET.fromstring(data)
  42. self.tree = self.root.getroottree()
  43. else:
  44. self.tree = ET.parse(data)
  45. self.root = self.tree.getroot()
  46. if self.tree is None:
  47. ValueError('Could not get an XML tree from provided data')
  48. def validate(self):
  49. pass
  50. def get_atom_property(self, tag, cls=None, ns=NS['atom']):
  51. if not type(tag) is str:
  52. raise TypeError('tag must be string')
  53. if cls is None:
  54. cls = cls_map[ns_tag(ns, tag)]
  55. try:
  56. return element_factory(cls, self.find(tag, ns=ns))
  57. except KeyError as e:
  58. return None
  59. def set_atom_property(self, tag, value):
  60. if not type(tag) is str:
  61. raise TypeError('tag must be string')
  62. tag = atom_tag(tag)
  63. cls = cls_map[tag]
  64. ''' find the property element '''
  65. el = self.find(tag)
  66. if el is None and value is None:
  67. ''' no property there to delete '''
  68. return
  69. elif value is None:
  70. ''' delete the property '''
  71. el.parent.remove(el)
  72. return
  73. elif el is None:
  74. el = ET.Element(tag)
  75. self.root.append(el)
  76. ''' create the AtomBaseElement extension and set a value through that '''
  77. element_factory(cls, el).set_value(value)
  78. @property
  79. def tag(self):
  80. return self.root.tag
  81. def find(self, tag, ns=NS['atom']):
  82. return self.root.find(ns_tag(ns, tag))
  83. def findall(self, tag, ns=NS['atom']):
  84. return self.root.findall(ns_tag(ns, tag))
  85. def get(self, attr):
  86. return self.root.get(attr)
  87. def set(self, attr, value):
  88. return self.root.set(attr, value)
  89. class Construct(AtomBaseElement):
  90. def get_value(self):
  91. return self.root.text
  92. def set_value(self, value):
  93. self.root.text = value
  94. class DateConstruct(Construct):
  95. def get_value(self):
  96. return dateutil.parser.parse(self.root.text)
  97. def set_value(self, value):
  98. if not isinstance(value, datetime):
  99. raise TypeError('dateconstruct set value must be datetime')
  100. self.root.text = value.isoformat()
  101. class TextConstruct(Construct):
  102. pass
  103. class UriConstruct(Construct):
  104. pass
  105. class AtomLink(Construct):
  106. def get_value(self):
  107. return self.root.get('href')
  108. def set_value(self, value):
  109. self.root.set('href', value)
  110. class ContentData(TextConstruct):
  111. mimetype = None
  112. def __init__(self, data, mimetype):
  113. super().__init__(data)
  114. self.mimetype = mimetype
  115. class ContentText(ContentData):
  116. def __init__(self, data):
  117. super().__init__(data, 'text/plain')
  118. class ContentHtml(ContentData):
  119. def __init__(self, data):
  120. super().__init__(data, 'text/html')
  121. class ContentXml(ContentData):
  122. def get_value(self):
  123. return self.root
  124. def set_value(self, value):
  125. if type(value) is str:
  126. value = ET.fromstring(value)
  127. if not type(value) is ET._Element:
  128. raise TypeError('You must provide an XML string or an Element here')
  129. raise IOError('Not implemented yet')
  130. def __str__(self):
  131. return self.root.text + ''.join(ET.tostring(el).decode() for el in self.root)
  132. class ContentXhtml(ContentXml):
  133. def __init__(self, data):
  134. super().__init__(data, 'text/xhtml')
  135. class ContentProcessingModel(Construct):
  136. @property
  137. def type(self):
  138. if self.get('type') is None:
  139. self.set('type', 'text')
  140. return self.get('type')
  141. @property
  142. def construct(self):
  143. '''
  144. Processing model: https://tools.ietf.org/html/rfc4287#section-4.1.3.3
  145. Atom Documents MUST conform to the following rules. Atom Processors
  146. MUST interpret atom:content according to the first applicable rule.
  147. '''
  148. if self.type == 'text':
  149. ''' 1. If the value of "type" is "text", the content of atom:content
  150. MUST NOT contain child elements. '''
  151. return ContentText(self.root)
  152. elif self.type == 'html':
  153. ''' 2. If the value of "type" is "html", the content of atom:content
  154. MUST NOT contain child elements and SHOULD be suitable for
  155. handling as HTML [HTML]. '''
  156. return ContentHtml(self.root)
  157. elif self.type == 'xhtml':
  158. ''' 3. If the value of "type" is "xhtml", the content of atom:content
  159. MUST be a single XHTML div element [XHTML] and SHOULD be suitable
  160. for handling as XHTML. The XHTML div element itself MUST NOT be
  161. considered part of the content. '''
  162. return ContentXhtml(self.root.find(ns_tag(NS['xhtml'], 'div')))
  163. elif self.type[-4:].lower() in [ '+xml', '/xml' ]:
  164. ''' 4. If the value of "type" is an XML media type [RFC3023] or ends
  165. with "+xml" or "/xml" (case insensitive), the content of
  166. atom:content MAY include child elements and SHOULD be suitable
  167. for handling as the indicated media type. If the "src" attribute
  168. is not provided, this would normally mean that the "atom:content"
  169. element would contain a single child element that would serve as
  170. the root element of the XML document of the indicated type. '''
  171. return ContentXml(self.root, self.type)
  172. elif self.type[:5].lower() == 'text/':
  173. ''' 5. If the value of "type" begins with "text/" (case insensitive),
  174. the content of atom:content MUST NOT contain child elements. '''
  175. return ContentData(self.root.text, self.type)
  176. else:
  177. ''' 6. For all other values of "type", the content of atom:content MUST
  178. be a valid Base64 encoding, as described in [RFC3548], section 3.
  179. When decoded, it SHOULD be suitable for handling as the indicated
  180. media type. '''
  181. return ContentData(base64_decode(self.root.text), self.type)
  182. def get_value(self):
  183. return self.construct.get_value()
  184. def __str__(self):
  185. return str(self.construct)
  186. class PersonConstruct(Construct):
  187. def get_value(self):
  188. return self
  189. @property
  190. def name(self):
  191. return self.get_atom_property('name')
  192. @name.setter
  193. def name(self, value):
  194. self.set_atom_property('name', value)
  195. @property
  196. def uri(self):
  197. return self.get_atom_property('uri')
  198. @uri.setter
  199. def uri(self, value):
  200. self.set_atom_property('uri', value)
  201. @property
  202. def email(self):
  203. return self.get_atom_property('email')
  204. @email.setter
  205. def email(self, value):
  206. self.set_atom_property('email', value)
  207. class AtomAuthor(PersonConstruct):
  208. def __init__(self, *args, **kwargs):
  209. if len(args) == 0:
  210. super().__init__(atom_tag('author'), **kwargs)
  211. else:
  212. super().__init__(*args, **kwargs)
  213. def set_value(self, value):
  214. if type(value) is type(self):
  215. for key in [ 'name', 'uri', 'email' ]:
  216. try:
  217. data = getattr(value, key).get_value()
  218. except AttributeError as e:
  219. data = None
  220. setattr(self, key, data)
  221. else:
  222. for key in value:
  223. setattr(self, key, value[key])
  224. def validate(self):
  225. if self.tag != atom_tag('author'):
  226. raise ValueError('Not an atom:author tag')
  227. class AtomContent(ContentProcessingModel):
  228. pass
  229. class AtomSummary(ContentProcessingModel):
  230. pass
  231. class AtomSubtitle(ContentProcessingModel):
  232. pass
  233. class AtomTitle(TextConstruct):
  234. pass
  235. cls_map = {
  236. atom_tag('id'): UriConstruct,
  237. atom_tag('published'): DateConstruct,
  238. atom_tag('updated'): DateConstruct,
  239. atom_tag('link'): AtomLink,
  240. atom_tag('author'): AtomAuthor,
  241. atom_tag('name'): TextConstruct,
  242. atom_tag('email'): TextConstruct,
  243. atom_tag('uri'): UriConstruct,
  244. atom_tag('title'): AtomTitle,
  245. atom_tag('content'): AtomContent,
  246. atom_tag('subtitle'): AtomSubtitle,
  247. atom_tag('summary'): AtomSummary,
  248. }
  249. class AtomFeed(AtomBaseElement):
  250. def __init__(self, *args, **kwargs):
  251. if len(args) == 0:
  252. super().__init__(atom_tag('feed'), **kwargs)
  253. else:
  254. super().__init__(*args, **kwargs)
  255. def find_link(self, rels, attr=None, mimetype=None, ns=NS['atom']):
  256. if not isinstance(rels, (list, tuple)):
  257. rels = (rels,)
  258. for el in self.root.findall("./{}".format(ns_tag(ns, 'link'))):
  259. if not el.get('rel') in rels:
  260. continue
  261. if mimetype and el.get('type') != mimetype:
  262. continue
  263. if attr:
  264. return el.get(attr)
  265. return element_factory(cls_map[ns_tag(ns, 'link')], el)
  266. @property
  267. def author(self):
  268. return self.get_atom_property('author')
  269. @author.setter
  270. def author(self, value):
  271. return self.set_atom_property('author', value)
  272. @property
  273. def updated(self):
  274. return self.get_atom_property('updated')
  275. @updated.setter
  276. def updated(self, value):
  277. if not isinstance(value, datetime):
  278. raise TypeError('You must provide a datetime')
  279. self.find('updated').text = value.isoformat()
  280. @property
  281. def id(self):
  282. return self.get_atom_property('id')
  283. @property
  284. def next(self):
  285. return self.find_link('next', mimetype='application/atom+xml')
  286. @property
  287. def prev(self):
  288. return self.find_link('prev', mimetype='application/atom+xml')
  289. @property
  290. def title(self):
  291. return self.get_atom_property('title')
  292. @title.setter
  293. def title(self, value):
  294. self.find('title').text = value
  295. @property
  296. def subtitle(self):
  297. return self.get_atom_property('subtitle')
  298. @subtitle.setter
  299. def subtitle(self, value):
  300. self.find('subtitle').text = value
  301. @property
  302. def entries(self):
  303. return AtomEntries(self.findall('entry'), feed=self)
  304. @property
  305. def categories(self):
  306. return AtomCategories(self.findall('category'), feed=self)
  307. def validate(self):
  308. if self.tag != atom_tag('feed'):
  309. raise TypeError('You did not supply an atom:feed')
  310. class AtomList(list):
  311. @property
  312. def length(self):
  313. return self.__len__()
  314. class AtomCategories(AtomList):
  315. def __init__(self, args, **kwargs):
  316. super().__init__(AtomCategory(arg, **kwargs) for arg in args)
  317. class AtomEntries(AtomList):
  318. feed = None
  319. def __init__(self, args, feed):
  320. self.feed = feed
  321. super().__init__(AtomEntry(arg, feed=feed) for arg in args)
  322. def append(self, value):
  323. self.feed.root.append(value.root)
  324. super().append(value)
  325. class AtomCategory(AtomBaseElement):
  326. feed = None
  327. entry = None
  328. def __init__(self, *args, feed=None, entry=None, **kwargs):
  329. if not feed is None and not entry is None:
  330. raise ValueError('An AtomCategory cannot have both a feed and entry as parent')
  331. self.feed = feed
  332. self.entry = entry
  333. super().__init__(*args, **kwargs)
  334. def __str__(self):
  335. return self.get('term')
  336. ''' Make the following act like dict keys
  337. @property
  338. def term(self):
  339. return self.get('term')
  340. @term.setter
  341. def term(self, value):
  342. return self.set('term', value)
  343. @property
  344. def label(self):
  345. return self.get('label')
  346. @label.setter
  347. def label(self, value):
  348. return self.set('label', value)
  349. @property
  350. def scheme(self):
  351. return self.get('scheme')
  352. @scheme.setter
  353. def label(self, value):
  354. return self.set('scheme', value)'''
  355. class AtomEntry(AtomBaseElement):
  356. feed = None
  357. def __init__(self, *args, feed=None, **kwargs):
  358. if len(args) == 0:
  359. super().__init__(atom_tag('entry'), **kwargs)
  360. else:
  361. super().__init__(*args, **kwargs)
  362. self.feed = feed
  363. @property
  364. def published(self):
  365. return self.get_atom_property('published')
  366. @published.setter
  367. def published(self, value):
  368. if not isinstance(value, datetime):
  369. raise TypeError('You must provide a datetime')
  370. self.find('published').text = value.isoformat()
  371. @property
  372. def updated(self):
  373. return self.get_atom_property('updated')
  374. @updated.setter
  375. def updated(self, value):
  376. if not isinstance(value, datetime):
  377. raise TypeError('You must provide a datetime')
  378. self.find('updated').text = value.isoformat()
  379. @property
  380. def id(self):
  381. return self.get_atom_property('id')
  382. @id.setter
  383. def id(self, value):
  384. self.get_atom_property('id').set_value(value)
  385. @property
  386. def author(self):
  387. return self.get_atom_property('author')
  388. @property
  389. def title(self):
  390. return self.get_atom_property('title')
  391. @title.setter
  392. def title(self, value):
  393. if not type(value) is str:
  394. raise TypeError('Cannot set title to non-string value')
  395. self.set_atom_property('title', value)
  396. @property
  397. def categories(self):
  398. return AtomCategories(self.findall('category'), entry=self)
  399. @property
  400. def content(self):
  401. return self.get_atom_property('content')
  402. @property
  403. def summary(self):
  404. return self.get_atom_property('summary')
  405. def validate(self):
  406. if self.tag != atom_tag('entry'):
  407. raise TypeError('You did not supply an atom:entry')