unit_converter.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. """A plugin for converting measured values from one unit to another unit (a
  3. unit converter).
  4. The plugin looks up the symbols (given in the query term) in a list of
  5. converters, each converter is one item in the list (compare
  6. :py:obj:`ADDITIONAL_UNITS`). If the symbols are ambiguous, the matching units
  7. of measurement are evaluated. The weighting in the evaluation results from the
  8. sorting of the :py:obj:`list of unit converters<symbol_to_si>`.
  9. Enable in ``settings.yml``:
  10. .. code:: yaml
  11. enabled_plugins:
  12. ..
  13. - 'Unit converter plugin'
  14. """
  15. from __future__ import annotations
  16. import re
  17. import babel.numbers
  18. from flask_babel import gettext, get_locale
  19. from searx import data
  20. from searx.result_types import Answer
  21. name = "Unit converter plugin"
  22. description = gettext("Convert between units")
  23. default_on = True
  24. plugin_id = "unit_converter"
  25. preference_section = "general"
  26. CONVERT_KEYWORDS = ["in", "to", "as"]
  27. # inspired from https://stackoverflow.com/a/42475086
  28. RE_MEASURE = r'''
  29. (?P<sign>[-+]?) # +/- or nothing for positive
  30. (\s*) # separator: white space or nothing
  31. (?P<number>[\d\.,]*) # number: 1,000.00 (en) or 1.000,00 (de)
  32. (?P<E>[eE][-+]?\d+)? # scientific notation: e(+/-)2 (*10^2)
  33. (\s*) # separator: white space or nothing
  34. (?P<unit>\S+) # unit of measure
  35. '''
  36. ADDITIONAL_UNITS = [
  37. {
  38. "si_name": "Q11579",
  39. "symbol": "°C",
  40. "to_si": lambda val: val + 273.15,
  41. "from_si": lambda val: val - 273.15,
  42. },
  43. {
  44. "si_name": "Q11579",
  45. "symbol": "°F",
  46. "to_si": lambda val: (val + 459.67) * 5 / 9,
  47. "from_si": lambda val: (val * 9 / 5) - 459.67,
  48. },
  49. ]
  50. """Additional items to convert from a measure unit to a SI unit (vice versa).
  51. .. code:: python
  52. {
  53. "si_name": "Q11579", # Wikidata item ID of the SI unit (Kelvin)
  54. "symbol": "°C", # symbol of the measure unit
  55. "to_si": lambda val: val + 273.15, # convert measure value (val) to SI unit
  56. "from_si": lambda val: val - 273.15, # convert SI value (val) measure unit
  57. },
  58. {
  59. "si_name": "Q11573",
  60. "symbol": "mi",
  61. "to_si": 1609.344, # convert measure value (val) to SI unit
  62. "from_si": 1 / 1609.344 # convert SI value (val) measure unit
  63. },
  64. The values of ``to_si`` and ``from_si`` can be of :py:obj:`float` (a multiplier)
  65. or a callable_ (val in / converted value returned).
  66. .. _callable: https://docs.python.org/3/glossary.html#term-callable
  67. """
  68. ALIAS_SYMBOLS = {
  69. '°C': ('C',),
  70. '°F': ('F',),
  71. 'mi': ('L',),
  72. }
  73. """Alias symbols for known unit of measure symbols / by example::
  74. '°C': ('C', ...), # list of alias symbols for °C (Q69362731)
  75. '°F': ('F', ...), # list of alias symbols for °F (Q99490479)
  76. 'mi': ('L',), # list of alias symbols for mi (Q253276)
  77. """
  78. SYMBOL_TO_SI = []
  79. def symbol_to_si():
  80. """Generates a list of tuples, each tuple is a measure unit and the fields
  81. in the tuple are:
  82. 0. Symbol of the measure unit (e.g. 'mi' for measure unit 'miles' Q253276)
  83. 1. SI name of the measure unit (e.g. Q11573 for SI unit 'metre')
  84. 2. Factor to get SI value from measure unit (e.g. 1mi is equal to SI 1m
  85. multiplied by 1609.344)
  86. 3. Factor to get measure value from from SI value (e.g. SI 100m is equal to
  87. 100mi divided by 1609.344)
  88. The returned list is sorted, the first items are created from
  89. ``WIKIDATA_UNITS``, the second group of items is build from
  90. :py:obj:`ADDITIONAL_UNITS` and items created from :py:obj:`ALIAS_SYMBOLS`.
  91. If you search this list for a symbol, then a match with a symbol from
  92. Wikidata has the highest weighting (first hit in the list), followed by the
  93. symbols from the :py:obj:`ADDITIONAL_UNITS` and the lowest weighting is
  94. given to the symbols resulting from the aliases :py:obj:`ALIAS_SYMBOLS`.
  95. """
  96. global SYMBOL_TO_SI # pylint: disable=global-statement
  97. if SYMBOL_TO_SI:
  98. return SYMBOL_TO_SI
  99. # filter out units which can't be normalized to a SI unit and filter out
  100. # units without a symbol / arcsecond does not have a symbol
  101. # https://www.wikidata.org/wiki/Q829073
  102. for item in data.WIKIDATA_UNITS.values():
  103. if item['to_si_factor'] and item['symbol']:
  104. SYMBOL_TO_SI.append(
  105. (
  106. item['symbol'],
  107. item['si_name'],
  108. 1 / item['to_si_factor'], # from_si
  109. item['to_si_factor'], # to_si
  110. item['symbol'],
  111. )
  112. )
  113. for item in ADDITIONAL_UNITS:
  114. SYMBOL_TO_SI.append(
  115. (
  116. item['symbol'],
  117. item['si_name'],
  118. item['from_si'],
  119. item['to_si'],
  120. item['symbol'],
  121. )
  122. )
  123. alias_items = []
  124. for item in SYMBOL_TO_SI:
  125. for alias in ALIAS_SYMBOLS.get(item[0], ()):
  126. alias_items.append(
  127. (
  128. alias,
  129. item[1],
  130. item[2], # from_si
  131. item[3], # to_si
  132. item[0], # origin unit
  133. )
  134. )
  135. SYMBOL_TO_SI = SYMBOL_TO_SI + alias_items
  136. return SYMBOL_TO_SI
  137. def _parse_text_and_convert(from_query, to_query) -> str | None:
  138. # pylint: disable=too-many-branches, too-many-locals
  139. if not (from_query and to_query):
  140. return None
  141. measured = re.match(RE_MEASURE, from_query, re.VERBOSE)
  142. if not (measured and measured.group('number'), measured.group('unit')):
  143. return None
  144. # Symbols are not unique, if there are several hits for the from-unit, then
  145. # the correct one must be determined by comparing it with the to-unit
  146. # https://github.com/searxng/searxng/pull/3378#issuecomment-2080974863
  147. # first: collecting possible units
  148. source_list, target_list = [], []
  149. for symbol, si_name, from_si, to_si, orig_symbol in symbol_to_si():
  150. if symbol == measured.group('unit'):
  151. source_list.append((si_name, to_si))
  152. if symbol == to_query:
  153. target_list.append((si_name, from_si, orig_symbol))
  154. if not (source_list and target_list):
  155. return None
  156. source_to_si = target_from_si = target_symbol = None
  157. # second: find the right unit by comparing list of from-units with list of to-units
  158. for source in source_list:
  159. for target in target_list:
  160. if source[0] == target[0]: # compare si_name
  161. source_to_si = source[1]
  162. target_from_si = target[1]
  163. target_symbol = target[2]
  164. if not (source_to_si and target_from_si):
  165. return None
  166. _locale = get_locale() or 'en_US'
  167. value = measured.group('sign') + measured.group('number') + (measured.group('E') or '')
  168. value = babel.numbers.parse_decimal(value, locale=_locale)
  169. # convert value to SI unit
  170. if isinstance(source_to_si, (float, int)):
  171. value = float(value) * source_to_si
  172. else:
  173. value = source_to_si(float(value))
  174. # convert value from SI unit to target unit
  175. if isinstance(target_from_si, (float, int)):
  176. value = float(value) * target_from_si
  177. else:
  178. value = target_from_si(float(value))
  179. if measured.group('E'):
  180. # when incoming notation is scientific, outgoing notation is scientific
  181. result = babel.numbers.format_scientific(value, locale=_locale)
  182. else:
  183. result = babel.numbers.format_decimal(value, locale=_locale, format='#,##0.##########;-#')
  184. return f'{result} {target_symbol}'
  185. def post_search(_request, search) -> list[Answer]:
  186. results = []
  187. # only convert between units on the first page
  188. if search.search_query.pageno > 1:
  189. return results
  190. query = search.search_query.query
  191. query_parts = query.split(" ")
  192. if len(query_parts) < 3:
  193. return results
  194. for query_part in query_parts:
  195. for keyword in CONVERT_KEYWORDS:
  196. if query_part == keyword:
  197. from_query, to_query = query.split(keyword, 1)
  198. target_val = _parse_text_and_convert(from_query.strip(), to_query.strip())
  199. if target_val:
  200. Answer(results=results, answer=target_val)
  201. return results