wordnik.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """Wordnik (general)
  3. """
  4. from lxml.html import fromstring
  5. from searx import logger
  6. from searx.utils import extract_text
  7. from searx.raise_for_httperror import raise_for_httperror
  8. logger = logger.getChild('Wordnik engine')
  9. # about
  10. about = {
  11. "website": 'https://www.wordnik.com',
  12. "wikidata_id": 'Q8034401',
  13. "official_api_documentation": None,
  14. "use_official_api": False,
  15. "require_api_key": False,
  16. "results": 'HTML',
  17. }
  18. categories = ['general']
  19. paging = False
  20. URL = 'https://www.wordnik.com'
  21. SEARCH_URL = URL + '/words/{query}'
  22. def request(query, params):
  23. params['url'] = SEARCH_URL.format(query=query)
  24. logger.debug(f"query_url --> {params['url']}")
  25. return params
  26. def response(resp):
  27. results = []
  28. raise_for_httperror(resp)
  29. dom = fromstring(resp.text)
  30. word = extract_text(dom.xpath('//*[@id="headword"]/text()'))
  31. definitions = []
  32. for src in dom.xpath('//*[@id="define"]//h3[@class="source"]'):
  33. src_text = extract_text(src).strip()
  34. if src_text.startswith('from '):
  35. src_text = src_text[5:]
  36. src_defs = []
  37. for def_item in src.xpath('following-sibling::ul[1]/li'):
  38. def_abbr = extract_text(def_item.xpath('.//abbr')).strip()
  39. def_text = extract_text(def_item).strip()
  40. if def_abbr:
  41. def_text = def_text[len(def_abbr):].strip()
  42. src_defs.append((def_abbr, def_text))
  43. definitions.append((src_text, src_defs))
  44. if not definitions:
  45. return results
  46. infobox = ''
  47. for src_text, src_defs in definitions:
  48. infobox += f"<small>{src_text}</small>"
  49. infobox += "<ul>"
  50. for def_abbr, def_text in src_defs:
  51. if def_abbr:
  52. def_abbr += ": "
  53. infobox += f"<li><i>{def_abbr}</i> {def_text}</li>"
  54. infobox += "</ul>"
  55. results.append({
  56. 'infobox': word,
  57. 'content': infobox,
  58. })
  59. return results