perseus 1.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. #!/usr/bin/env python3
  2. # a tool to get translations of words from persus (http://www.perseus.tufts.edu)
  3. import bs4
  4. import re
  5. import requests
  6. import sys
  7. lang_list = {
  8. "-ar" : "ar",
  9. "-greek" : "greek",
  10. "-la" : "la",
  11. "-non" : "non",
  12. }
  13. lang = "greek"
  14. usage = "Usage: perseus [-ar,-greek,-la,-non] [query]"
  15. def text_getter(lst):
  16. return [re.sub(r'\n\s*\n', r'\n\n', word.get_text().strip(), flags=re.M) for word in lst]
  17. try:
  18. query = sys.argv[1]
  19. except IndexError:
  20. print(usage)
  21. exit()
  22. if query[0] == "-":
  23. try:
  24. lang = lang_list[query]
  25. query = sys.argv[2]
  26. except Exception:
  27. print(usage)
  28. exit()
  29. url = "http://www.perseus.tufts.edu/hopper/morph?la={}&l={}".format(lang,query)
  30. data = requests.get(url)
  31. site_soup = bs4.BeautifulSoup(data.text, "html.parser")
  32. analyses = site_soup.select("div.analysis")
  33. if not analyses:
  34. print("Word not found, or other minor error.")
  35. exit()
  36. for analysis in analyses:
  37. word = text_getter(analysis.select("h4.{}".format(lang)))[0]
  38. translation = text_getter(analysis.select("span.lemma_definition"))[0]
  39. attributes = text_getter(analysis.select("td"))[1::3]
  40. complex_words = text_getter(analysis.select("td.{}".format(lang)))
  41. print(word,"\t",translation)
  42. for attribute, cword in zip(attributes,complex_words):
  43. print(cword,"\t", attribute)
  44. if analyses[-1] != analysis:
  45. print("- - -")