test_json_engine.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255
  1. # SPDX-License-Identifier: AGPL-3.0-or-later
  2. # pylint: disable=missing-module-docstring
  3. from collections import defaultdict
  4. import mock
  5. from searx.engines import json_engine
  6. from searx import logger
  7. from tests import SearxTestCase
  8. logger = logger.getChild('engines')
  9. class TestJsonEngine(SearxTestCase): # pylint: disable=missing-class-docstring
  10. json = """
  11. [
  12. {
  13. "title": "title0",
  14. "content": "content0",
  15. "url": "https://example.com/url0",
  16. "images": [
  17. {
  18. "thumb": "https://example.com/thumb00"
  19. },
  20. {
  21. "thumb": "https://example.com/thumb01"
  22. }
  23. ]
  24. },
  25. {
  26. "title": "<h1>title1</h1>",
  27. "content": "<h2>content1</h2>",
  28. "url": "https://example.com/url1",
  29. "images": [
  30. {
  31. "thumb": "https://example.com/thumb10"
  32. },
  33. {
  34. "thumb": "https://example.com/thumb11"
  35. }
  36. ]
  37. },
  38. {
  39. "title": "title2",
  40. "content": "content2",
  41. "url": 2,
  42. "images": [
  43. {
  44. "thumb": "thumb20"
  45. },
  46. {
  47. "thumb": 21
  48. }
  49. ]
  50. }
  51. ]
  52. """
  53. json_result_query = """
  54. {
  55. "data": {
  56. "results": [
  57. {
  58. "title": "title0",
  59. "content": "content0",
  60. "url": "https://example.com/url0",
  61. "images": [
  62. {
  63. "thumb": "https://example.com/thumb00"
  64. },
  65. {
  66. "thumb": "https://example.com/thumb01"
  67. }
  68. ]
  69. },
  70. {
  71. "title": "<h1>title1</h1>",
  72. "content": "<h2>content1</h2>",
  73. "url": "https://example.com/url1",
  74. "images": [
  75. {
  76. "thumb": "https://example.com/thumb10"
  77. },
  78. {
  79. "thumb": "https://example.com/thumb11"
  80. }
  81. ]
  82. },
  83. {
  84. "title": "title2",
  85. "content": "content2",
  86. "url": 2,
  87. "images": [
  88. {
  89. "thumb": "thumb20"
  90. },
  91. {
  92. "thumb": 21
  93. }
  94. ]
  95. }
  96. ],
  97. "suggestions": [
  98. "suggestion0",
  99. "suggestion1"
  100. ]
  101. }
  102. }
  103. """
  104. def setUp(self):
  105. json_engine.logger = logger.getChild('test_json_engine')
  106. def test_request(self):
  107. json_engine.search_url = 'https://example.com/{query}'
  108. json_engine.categories = []
  109. json_engine.paging = False
  110. query = 'test_query'
  111. dicto = defaultdict(dict)
  112. dicto['language'] = 'all'
  113. dicto['pageno'] = 1
  114. params = json_engine.request(query, dicto)
  115. self.assertIn('url', params)
  116. self.assertEqual('https://example.com/test_query', params['url'])
  117. json_engine.search_url = 'https://example.com/q={query}&p={pageno}'
  118. json_engine.paging = True
  119. query = 'test_query'
  120. dicto = defaultdict(dict)
  121. dicto['language'] = 'all'
  122. dicto['pageno'] = 1
  123. params = json_engine.request(query, dicto)
  124. self.assertIn('url', params)
  125. self.assertEqual('https://example.com/q=test_query&p=1', params['url'])
  126. json_engine.search_url = 'https://example.com/'
  127. json_engine.paging = True
  128. json_engine.request_body = '{{"page": {pageno}, "query": "{query}"}}'
  129. query = 'test_query'
  130. dicto = defaultdict(dict)
  131. dicto['language'] = 'all'
  132. dicto['pageno'] = 1
  133. params = json_engine.request(query, dicto)
  134. self.assertIn('data', params)
  135. self.assertEqual('{"page": 1, "query": "test_query"}', params['data'])
  136. def test_response(self):
  137. # without results_query
  138. json_engine.results_query = ''
  139. json_engine.url_query = 'url'
  140. json_engine.url_prefix = ''
  141. json_engine.title_query = 'title'
  142. json_engine.content_query = 'content'
  143. json_engine.thumbnail_query = 'images/thumb'
  144. json_engine.thumbnail_prefix = ''
  145. json_engine.title_html_to_text = False
  146. json_engine.content_html_to_text = False
  147. json_engine.categories = []
  148. self.assertRaises(AttributeError, json_engine.response, None)
  149. self.assertRaises(AttributeError, json_engine.response, [])
  150. self.assertRaises(AttributeError, json_engine.response, '')
  151. self.assertRaises(AttributeError, json_engine.response, '[]')
  152. response = mock.Mock(text='{}', status_code=200)
  153. self.assertEqual(json_engine.response(response), [])
  154. response = mock.Mock(text=self.json, status_code=200)
  155. results = json_engine.response(response)
  156. self.assertEqual(type(results), list)
  157. self.assertEqual(len(results), 3)
  158. self.assertEqual(results[0]['title'], 'title0')
  159. self.assertEqual(results[0]['url'], 'https://example.com/url0')
  160. self.assertEqual(results[0]['content'], 'content0')
  161. self.assertEqual(results[0]['thumbnail'], 'https://example.com/thumb00')
  162. self.assertEqual(results[1]['title'], '<h1>title1</h1>')
  163. self.assertEqual(results[1]['url'], 'https://example.com/url1')
  164. self.assertEqual(results[1]['content'], '<h2>content1</h2>')
  165. self.assertEqual(results[1]['thumbnail'], 'https://example.com/thumb10')
  166. # with prefix and suggestions without results_query
  167. json_engine.url_prefix = 'https://example.com/url'
  168. json_engine.thumbnail_query = 'images/1/thumb'
  169. json_engine.thumbnail_prefix = 'https://example.com/thumb'
  170. results = json_engine.response(response)
  171. self.assertEqual(type(results), list)
  172. self.assertEqual(len(results), 3)
  173. self.assertEqual(results[2]['title'], 'title2')
  174. self.assertEqual(results[2]['url'], 'https://example.com/url2')
  175. self.assertEqual(results[2]['content'], 'content2')
  176. self.assertEqual(results[2]['thumbnail'], 'https://example.com/thumb21')
  177. self.assertFalse(results[0].get('is_onion', False))
  178. # results are onion urls without results_query
  179. json_engine.categories = ['onions']
  180. results = json_engine.response(response)
  181. self.assertTrue(results[0]['is_onion'])
  182. def test_response_results_json(self):
  183. # with results_query
  184. json_engine.results_query = 'data/results'
  185. json_engine.url_query = 'url'
  186. json_engine.url_prefix = ''
  187. json_engine.title_query = 'title'
  188. json_engine.content_query = 'content'
  189. json_engine.thumbnail_query = 'images/1/thumb'
  190. json_engine.thumbnail_prefix = ''
  191. json_engine.title_html_to_text = True
  192. json_engine.content_html_to_text = True
  193. json_engine.categories = []
  194. self.assertRaises(AttributeError, json_engine.response, None)
  195. self.assertRaises(AttributeError, json_engine.response, [])
  196. self.assertRaises(AttributeError, json_engine.response, '')
  197. self.assertRaises(AttributeError, json_engine.response, '[]')
  198. response = mock.Mock(text='{}', status_code=200)
  199. self.assertEqual(json_engine.response(response), [])
  200. response = mock.Mock(text=self.json_result_query, status_code=200)
  201. results = json_engine.response(response)
  202. self.assertEqual(type(results), list)
  203. self.assertEqual(len(results), 3)
  204. self.assertEqual(results[0]['title'], 'title0')
  205. self.assertEqual(results[0]['url'], 'https://example.com/url0')
  206. self.assertEqual(results[0]['content'], 'content0')
  207. self.assertEqual(results[0]['thumbnail'], 'https://example.com/thumb01')
  208. self.assertEqual(results[1]['title'], 'title1')
  209. self.assertEqual(results[1]['url'], 'https://example.com/url1')
  210. self.assertEqual(results[1]['content'], 'content1')
  211. self.assertEqual(results[1]['thumbnail'], 'https://example.com/thumb11')
  212. # with prefix and suggestions with results_query
  213. json_engine.url_prefix = 'https://example.com/url'
  214. json_engine.thumbnail_query = 'images/1/thumb'
  215. json_engine.thumbnail_prefix = 'https://example.com/thumb'
  216. json_engine.suggestion_query = 'data/suggestions'
  217. results = json_engine.response(response)
  218. self.assertEqual(type(results), list)
  219. self.assertEqual(len(results), 4)
  220. self.assertEqual(results[2]['title'], 'title2')
  221. self.assertEqual(results[2]['url'], 'https://example.com/url2')
  222. self.assertEqual(results[2]['content'], 'content2')
  223. self.assertEqual(results[2]['thumbnail'], 'https://example.com/thumb21')
  224. self.assertEqual(results[3]['suggestion'], ['suggestion0', 'suggestion1'])
  225. self.assertFalse(results[0].get('is_onion', False))
  226. # results are onion urls with results_query
  227. json_engine.categories = ['onions']
  228. results = json_engine.response(response)
  229. self.assertTrue(results[0]['is_onion'])