subtitleseeker.py 2.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. """
  2. Subtitleseeker (Video)
  3. @website http://www.subtitleseeker.com
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, content
  9. """
  10. from cgi import escape
  11. from urllib import quote_plus
  12. from lxml import html
  13. from searx.languages import language_codes
  14. from searx.engines.xpath import extract_text
  15. # engine dependent config
  16. categories = ['videos']
  17. paging = True
  18. language = ""
  19. # search-url
  20. url = 'http://www.subtitleseeker.com/'
  21. search_url = url + 'search/TITLES/{query}&p={pageno}'
  22. # specific xpath variables
  23. results_xpath = '//div[@class="boxRows"]'
  24. # do search-request
  25. def request(query, params):
  26. params['url'] = search_url.format(query=quote_plus(query),
  27. pageno=params['pageno'])
  28. return params
  29. # get response from search-request
  30. def response(resp):
  31. results = []
  32. dom = html.fromstring(resp.text)
  33. search_lang = ""
  34. if resp.search_params['language'] != 'all':
  35. search_lang = [lc[1]
  36. for lc in language_codes
  37. if lc[0][:2] == resp.search_params['language'].split('_')[0]][0]
  38. # parse results
  39. for result in dom.xpath(results_xpath):
  40. link = result.xpath(".//a")[0]
  41. href = link.attrib.get('href')
  42. if language is not "":
  43. href = href + language + '/'
  44. elif search_lang:
  45. href = href + search_lang + '/'
  46. title = escape(extract_text(link))
  47. content = extract_text(result.xpath('.//div[contains(@class,"red")]'))
  48. content = content + " - "
  49. text = extract_text(result.xpath('.//div[contains(@class,"grey-web")]')[0])
  50. content = content + text
  51. if result.xpath(".//span") != []:
  52. content = content +\
  53. " - (" +\
  54. extract_text(result.xpath(".//span")) +\
  55. ")"
  56. # append result
  57. results.append({'url': href,
  58. 'title': title,
  59. 'content': escape(content)})
  60. # return results
  61. return results