yacy.py 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798
  1. # Yacy (Web, Images, Videos, Music, Files)
  2. #
  3. # @website http://yacy.net
  4. # @provide-api yes
  5. # (http://www.yacy-websuche.de/wiki/index.php/Dev:APIyacysearch)
  6. #
  7. # @using-api yes
  8. # @results JSON
  9. # @stable yes
  10. # @parse (general) url, title, content, publishedDate
  11. # @parse (images) url, title, img_src
  12. #
  13. # @todo parse video, audio and file results
  14. from json import loads
  15. from urllib import urlencode
  16. from dateutil import parser
  17. # engine dependent config
  18. categories = ['general', 'images'] # TODO , 'music', 'videos', 'files'
  19. paging = True
  20. language_support = True
  21. number_of_results = 5
  22. # search-url
  23. base_url = 'http://localhost:8090'
  24. search_url = '/yacysearch.json?{query}'\
  25. '&startRecord={offset}'\
  26. '&maximumRecords={limit}'\
  27. '&contentdom={search_type}'\
  28. '&resource=global'
  29. # yacy specific type-definitions
  30. search_types = {'general': 'text',
  31. 'images': 'image',
  32. 'files': 'app',
  33. 'music': 'audio',
  34. 'videos': 'video'}
  35. # do search-request
  36. def request(query, params):
  37. offset = (params['pageno'] - 1) * number_of_results
  38. search_type = search_types.get(params.get('category'), '0')
  39. params['url'] = base_url +\
  40. search_url.format(query=urlencode({'query': query}),
  41. offset=offset,
  42. limit=number_of_results,
  43. search_type=search_type)
  44. # add language tag if specified
  45. if params['language'] != 'all':
  46. params['url'] += '&lr=lang_' + params['language'].split('_')[0]
  47. return params
  48. # get response from search-request
  49. def response(resp):
  50. results = []
  51. raw_search_results = loads(resp.text)
  52. # return empty array if there are no results
  53. if not raw_search_results:
  54. return []
  55. search_results = raw_search_results.get('channels', [])
  56. if len(search_results) == 0:
  57. return []
  58. for result in search_results[0].get('items', []):
  59. # parse image results
  60. if result.get('image'):
  61. # append result
  62. results.append({'url': result['url'],
  63. 'title': result['title'],
  64. 'content': '',
  65. 'img_src': result['image'],
  66. 'template': 'images.html'})
  67. # parse general results
  68. else:
  69. publishedDate = parser.parse(result['pubDate'])
  70. # append result
  71. results.append({'url': result['link'],
  72. 'title': result['title'],
  73. 'content': result['description'],
  74. 'publishedDate': publishedDate})
  75. # TODO parse video, audio and file results
  76. # return results
  77. return results