wolframalpha_noapi.py 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. # WolframAlpha (Maths)
  2. #
  3. # @website http://www.wolframalpha.com/
  4. # @provide-api yes (http://api.wolframalpha.com/v2/)
  5. #
  6. # @using-api no
  7. # @results HTML
  8. # @stable no
  9. # @parse answer
  10. from re import search, sub
  11. from json import loads
  12. from urllib import urlencode
  13. from lxml import html
  14. import HTMLParser
  15. # search-url
  16. url = 'http://www.wolframalpha.com/'
  17. search_url = url + 'input/?{query}'
  18. # xpath variables
  19. scripts_xpath = '//script'
  20. title_xpath = '//title'
  21. failure_xpath = '//p[attribute::class="pfail"]'
  22. # do search-request
  23. def request(query, params):
  24. params['url'] = search_url.format(query=urlencode({'i': query}))
  25. return params
  26. # get response from search-request
  27. def response(resp):
  28. results = []
  29. line = None
  30. dom = html.fromstring(resp.text)
  31. scripts = dom.xpath(scripts_xpath)
  32. # the answer is inside a js function
  33. # answer can be located in different 'pods', although by default it should be in pod_0200
  34. possible_locations = ['pod_0200\.push\((.*)',
  35. 'pod_0100\.push\((.*)']
  36. # failed result
  37. if dom.xpath(failure_xpath):
  38. return results
  39. # get line that matches the pattern
  40. for pattern in possible_locations:
  41. for script in scripts:
  42. try:
  43. line = search(pattern, script.text_content()).group(1)
  44. break
  45. except AttributeError:
  46. continue
  47. if line:
  48. break
  49. if line:
  50. # extract answer from json
  51. answer = line[line.find('{'):line.rfind('}') + 1]
  52. try:
  53. answer = loads(answer)
  54. except Exception:
  55. answer = loads(answer.encode('unicode-escape'))
  56. answer = answer['stringified']
  57. # clean plaintext answer
  58. h = HTMLParser.HTMLParser()
  59. answer = h.unescape(answer.decode('unicode-escape'))
  60. answer = sub(r'\\', '', answer)
  61. results.append({'answer': answer})
  62. # user input is in first part of title
  63. title = dom.xpath(title_xpath)[0].text.encode('utf-8')
  64. result_url = request(title[:-16], {})['url']
  65. # append result
  66. results.append({'url': result_url,
  67. 'title': title.decode('utf-8')})
  68. return results