www1x.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. """
  2. 1x (Images)
  3. @website http://1x.com/
  4. @provide-api no
  5. @using-api no
  6. @results HTML
  7. @stable no (HTML can change)
  8. @parse url, title, thumbnail, img_src, content
  9. """
  10. from urllib import urlencode
  11. from urlparse import urljoin
  12. from lxml import html
  13. import string
  14. import re
  15. # engine dependent config
  16. categories = ['images']
  17. paging = False
  18. # search-url
  19. base_url = 'https://1x.com'
  20. search_url = base_url + '/backend/search.php?{query}'
  21. # do search-request
  22. def request(query, params):
  23. params['url'] = search_url.format(query=urlencode({'q': query}))
  24. return params
  25. # get response from search-request
  26. def response(resp):
  27. results = []
  28. # get links from result-text
  29. regex = re.compile('(</a>|<a)')
  30. results_parts = re.split(regex, resp.text)
  31. cur_element = ''
  32. # iterate over link parts
  33. for result_part in results_parts:
  34. # processed start and end of link
  35. if result_part == '<a':
  36. cur_element = result_part
  37. continue
  38. elif result_part != '</a>':
  39. cur_element += result_part
  40. continue
  41. cur_element += result_part
  42. # fix xml-error
  43. cur_element = string.replace(cur_element, '"></a>', '"/></a>')
  44. dom = html.fromstring(cur_element)
  45. link = dom.xpath('//a')[0]
  46. url = urljoin(base_url, link.attrib.get('href'))
  47. title = link.attrib.get('title', '')
  48. thumbnail_src = urljoin(base_url, link.xpath('.//img')[0].attrib['src'])
  49. # TODO: get image with higher resolution
  50. img_src = thumbnail_src
  51. # check if url is showing to a photo
  52. if '/photo/' not in url:
  53. continue
  54. # append result
  55. results.append({'url': url,
  56. 'title': title,
  57. 'img_src': img_src,
  58. 'content': '',
  59. 'thumbnail_src': thumbnail_src,
  60. 'template': 'images.html'})
  61. # return results
  62. return results