xpath 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768
  1. #!/usr/bin/env python
  2. #
  3. # Copy me if you can.
  4. # by 20h
  5. #
  6. import os
  7. import sys
  8. import getopt
  9. from lxml import etree
  10. def getxpath(fd, xpath, attribute=None, encoding=None):
  11. try:
  12. parser = etree.HTMLParser(encoding=encoding)
  13. xml = etree.parse(fd, parser)
  14. sels = xml.xpath(xpath)
  15. except AssertionError:
  16. return None
  17. if attribute != None:
  18. return "\n".join(["".join(i.attrib[attribute]) for i in sels \
  19. if attribute in i.attrib])
  20. try:
  21. return "\n".join([("".join(i.itertext())).strip() for i in sels])
  22. except AttributeError:
  23. return "\n".join(sels)
  24. def usage(app):
  25. app = os.path.basename(app)
  26. sys.stderr.write("usage: %s [-h] [-e encoding] "\
  27. "[-a attribute] xpath\n" % (app))
  28. sys.exit(1)
  29. def main(args):
  30. try:
  31. opts, largs = getopt.getopt(args[1:], "he:a:")
  32. except getopt.GetoptError as err:
  33. print(str(err))
  34. usage(args[0])
  35. encoding = None
  36. attribute = None
  37. for o, a in opts:
  38. if o == "-h":
  39. usage(args[0])
  40. elif o == "-e":
  41. encoding = a
  42. elif o == "-a":
  43. attribute = a
  44. else:
  45. assert False, "unhandled option"
  46. if len(largs) < 1:
  47. usage(args[0])
  48. rpath = getxpath(sys.stdin, largs[0], attribute, encoding)
  49. if rpath == None:
  50. return 1
  51. print(rpath)
  52. return 0
  53. if __name__ == "__main__":
  54. sys.exit(main(sys.argv))