jisho.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """
  3. Jisho (the Japanese-English dictionary)
  4. """
  5. from urllib.parse import urlencode, urljoin
  6. # about
  7. about = {
  8. "website": 'https://jisho.org',
  9. "wikidata_id": 'Q24568389',
  10. "official_api_documentation": "https://jisho.org/forum/54fefc1f6e73340b1f160000-is-there-any-kind-of-search-api",
  11. "use_official_api": True,
  12. "require_api_key": False,
  13. "results": 'JSON',
  14. "language": 'ja',
  15. }
  16. categories = ['dictionaries']
  17. paging = False
  18. URL = 'https://jisho.org'
  19. BASE_URL = 'https://jisho.org/word/'
  20. SEARCH_URL = URL + '/api/v1/search/words?{query}'
  21. def request(query, params):
  22. query = urlencode({'keyword': query})
  23. params['url'] = SEARCH_URL.format(query=query)
  24. logger.debug(f"query_url --> {params['url']}")
  25. return params
  26. def response(resp):
  27. results = []
  28. first_result = True
  29. search_results = resp.json()
  30. for page in search_results.get('data', []):
  31. # Entries that are purely from Wikipedia are excluded.
  32. parts_of_speech = page.get('senses') and page['senses'][0].get('parts_of_speech')
  33. if parts_of_speech and parts_of_speech[0] == 'Wikipedia definition':
  34. pass
  35. # Process alternative forms
  36. alt_forms = []
  37. for title_raw in page['japanese']:
  38. if 'word' not in title_raw:
  39. alt_forms.append(title_raw['reading'])
  40. else:
  41. title = title_raw['word']
  42. if 'reading' in title_raw:
  43. title += ' (' + title_raw['reading'] + ')'
  44. alt_forms.append(title)
  45. result_url = urljoin(BASE_URL, page['slug'])
  46. definitions = get_definitions(page)
  47. # For results, we'll return the URL, all alternative forms (as title),
  48. # and all definitions (as description) truncated to 300 characters.
  49. content = " ".join(f"{engdef}." for _, engdef, _ in definitions)
  50. results.append(
  51. {'url': result_url, 'title': ", ".join(alt_forms), 'content': content[:300] + (content[300:] and '...')}
  52. )
  53. # Like Wordnik, we'll return the first result in an infobox too.
  54. if first_result:
  55. first_result = False
  56. results.append(get_infobox(alt_forms, result_url, definitions))
  57. return results
  58. def get_definitions(page):
  59. # Process definitions
  60. definitions = []
  61. for defn_raw in page['senses']:
  62. extra = []
  63. # Extra data. Since they're not documented, this implementation is based solely by the author's assumptions.
  64. if defn_raw.get('tags'):
  65. if defn_raw.get('info'):
  66. # "usually written as kana: <kana>"
  67. extra.append(defn_raw['tags'][0] + ', ' + defn_raw['info'][0] + '. ')
  68. else:
  69. # abbreviation, archaism, etc.
  70. extra.append(', '.join(defn_raw['tags']) + '. ')
  71. elif defn_raw.get('info'):
  72. # inconsistent
  73. extra.append(', '.join(defn_raw['info']).capitalize() + '. ')
  74. if defn_raw.get('restrictions'):
  75. extra.append('Only applies to: ' + ', '.join(defn_raw['restrictions']) + '. ')
  76. definitions.append(
  77. (
  78. ', '.join(defn_raw['parts_of_speech']),
  79. '; '.join(defn_raw['english_definitions']),
  80. ''.join(extra)[:-1],
  81. )
  82. )
  83. return definitions
  84. def get_infobox(alt_forms, result_url, definitions):
  85. infobox_content = []
  86. # title & alt_forms
  87. infobox_title = alt_forms[0]
  88. if len(alt_forms) > 1:
  89. infobox_content.append(f'<p><i>Other forms:</i> {", ".join(alt_forms[1:])}</p>')
  90. # definitions
  91. infobox_content.append(
  92. '''
  93. <small><a href="https://www.edrdg.org/wiki/index.php/JMdict-EDICT_Dictionary_Project">JMdict</a>
  94. and <a href="https://www.edrdg.org/enamdict/enamdict_doc.html">JMnedict</a>
  95. by <a href="https://www.edrdg.org/edrdg/licence.html">EDRDG</a>, CC BY-SA 3.0.</small>
  96. <ul>
  97. '''
  98. )
  99. for pos, engdef, extra in definitions:
  100. if pos == 'Wikipedia definition':
  101. infobox_content.append('</ul><small>Wikipedia, CC BY-SA 3.0.</small><ul>')
  102. pos = f'<i>{pos}</i>: ' if pos else ''
  103. extra = f' ({extra})' if extra else ''
  104. infobox_content.append(f'<li>{pos}{engdef}{extra}</li>')
  105. infobox_content.append('</ul>')
  106. #
  107. return {
  108. 'infobox': infobox_title,
  109. 'content': ''.join(infobox_content),
  110. 'urls': [
  111. {
  112. 'title': 'Jisho.org',
  113. 'url': result_url,
  114. }
  115. ],
  116. }