123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150 |
- import json
- from urllib import urlencode
- from lxml import html
- from searx.utils import html_to_text
- from searx.engines.xpath import extract_text
- url = 'https://api.duckduckgo.com/'\
- + '?{query}&format=json&pretty=0&no_redirect=1&d=1'
- def result_to_text(url, text, htmlResult):
- # TODO : remove result ending with "Meaning" or "Category"
- dom = html.fromstring(htmlResult)
- a = dom.xpath('//a')
- if len(a) >= 1:
- return extract_text(a[0])
- else:
- return text
- def request(query, params):
- # TODO add kl={locale}
- params['url'] = url.format(query=urlencode({'q': query}))
- return params
- def response(resp):
- results = []
- search_res = json.loads(resp.text)
- content = ''
- heading = search_res.get('Heading', '')
- attributes = []
- urls = []
- infobox_id = None
- relatedTopics = []
- # add answer if there is one
- answer = search_res.get('Answer', '')
- if answer != '':
- results.append({'answer': html_to_text(answer)})
- # add infobox
- if 'Definition' in search_res:
- content = content + search_res.get('Definition', '')
- if 'Abstract' in search_res:
- content = content + search_res.get('Abstract', '')
- # image
- image = search_res.get('Image', '')
- image = None if image == '' else image
- # attributes
- if 'Infobox' in search_res:
- infobox = search_res.get('Infobox', None)
- if 'content' in infobox:
- for info in infobox.get('content'):
- attributes.append({'label': info.get('label'),
- 'value': info.get('value')})
- # urls
- for ddg_result in search_res.get('Results', []):
- if 'FirstURL' in ddg_result:
- firstURL = ddg_result.get('FirstURL', '')
- text = ddg_result.get('Text', '')
- urls.append({'title': text, 'url': firstURL})
- results.append({'title': heading, 'url': firstURL})
- # related topics
- for ddg_result in search_res.get('RelatedTopics', []):
- if 'FirstURL' in ddg_result:
- suggestion = result_to_text(ddg_result.get('FirstURL', None),
- ddg_result.get('Text', None),
- ddg_result.get('Result', None))
- if suggestion != heading:
- results.append({'suggestion': suggestion})
- elif 'Topics' in ddg_result:
- suggestions = []
- relatedTopics.append({'name': ddg_result.get('Name', ''),
- 'suggestions': suggestions})
- for topic_result in ddg_result.get('Topics', []):
- suggestion = result_to_text(topic_result.get('FirstURL', None),
- topic_result.get('Text', None),
- topic_result.get('Result', None))
- if suggestion != heading:
- suggestions.append(suggestion)
- # abstract
- abstractURL = search_res.get('AbstractURL', '')
- if abstractURL != '':
- # add as result ? problem always in english
- infobox_id = abstractURL
- urls.append({'title': search_res.get('AbstractSource'),
- 'url': abstractURL})
- # definition
- definitionURL = search_res.get('DefinitionURL', '')
- if definitionURL != '':
- # add as result ? as answer ? problem always in english
- infobox_id = definitionURL
- urls.append({'title': search_res.get('DefinitionSource'),
- 'url': definitionURL})
- # entity
- entity = search_res.get('Entity', None)
- # TODO continent / country / department / location / waterfall /
- # mountain range :
- # link to map search, get weather, near by locations
- # TODO musician : link to music search
- # TODO concert tour : ??
- # TODO film / actor / television / media franchise :
- # links to IMDB / rottentomatoes (or scrap result)
- # TODO music : link tu musicbrainz / last.fm
- # TODO book : ??
- # TODO artist / playwright : ??
- # TODO compagny : ??
- # TODO software / os : ??
- # TODO software engineer : ??
- # TODO prepared food : ??
- # TODO website : ??
- # TODO performing art : ??
- # TODO prepared food : ??
- # TODO programming language : ??
- # TODO file format : ??
- if len(heading) > 0:
- # TODO get infobox.meta.value where .label='article_title'
- if image is None and len(attributes) == 0 and len(urls) == 1 and\
- len(relatedTopics) == 0 and len(content) == 0:
- results.append({
- 'url': urls[0]['url'],
- 'title': heading,
- 'content': content
- })
- else:
- results.append({
- 'infobox': heading,
- 'id': infobox_id,
- 'entity': entity,
- 'content': content,
- 'img_src': image,
- 'attributes': attributes,
- 'urls': urls,
- 'relatedTopics': relatedTopics
- })
- return results
|