stackoverflow.py 1.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061
  1. """
  2. Stackoverflow (It)
  3. @website https://stackoverflow.com/
  4. @provide-api not clear (https://api.stackexchange.com/docs/advanced-search)
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from urlparse import urljoin
  11. from cgi import escape
  12. from urllib import urlencode
  13. from lxml import html
  14. from searx.engines.xpath import extract_text
  15. # engine dependent config
  16. categories = ['it']
  17. paging = True
  18. # search-url
  19. url = 'https://stackoverflow.com/'
  20. search_url = url + 'search?{query}&page={pageno}'
  21. # specific xpath variables
  22. results_xpath = '//div[contains(@class,"question-summary")]'
  23. link_xpath = './/div[@class="result-link"]//a|.//div[@class="summary"]//h3//a'
  24. content_xpath = './/div[@class="excerpt"]'
  25. # do search-request
  26. def request(query, params):
  27. params['url'] = search_url.format(query=urlencode({'q': query}),
  28. pageno=params['pageno'])
  29. return params
  30. # get response from search-request
  31. def response(resp):
  32. results = []
  33. dom = html.fromstring(resp.text)
  34. # parse results
  35. for result in dom.xpath(results_xpath):
  36. link = result.xpath(link_xpath)[0]
  37. href = urljoin(url, link.attrib.get('href'))
  38. title = escape(extract_text(link))
  39. content = escape(extract_text(result.xpath(content_xpath)))
  40. # append result
  41. results.append({'url': href,
  42. 'title': title,
  43. 'content': content})
  44. # return results
  45. return results