search.py 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121
  1. from youtube import util, yt_data_extract, proto, local_playlist
  2. from youtube import yt_app
  3. import settings
  4. import json
  5. import urllib
  6. import base64
  7. import mimetypes
  8. from flask import request
  9. import flask
  10. import os
  11. # Sort: 1
  12. # Upload date: 2
  13. # View count: 3
  14. # Rating: 1
  15. # Relevance: 0
  16. # Offset: 9
  17. # Filters: 2
  18. # Upload date: 1
  19. # Type: 2
  20. # Duration: 3
  21. features = {
  22. '4k': 14,
  23. 'hd': 4,
  24. 'hdr': 25,
  25. 'subtitles': 5,
  26. 'creative_commons': 6,
  27. '3d': 7,
  28. 'live': 8,
  29. 'purchased': 9,
  30. '360': 15,
  31. 'location': 23,
  32. }
  33. def page_number_to_sp_parameter(page, autocorrect, sort, filters):
  34. offset = (int(page) - 1)*20 # 20 results per page
  35. autocorrect = proto.nested(8, proto.uint(1, 1 - int(autocorrect) ))
  36. filters_enc = proto.nested(2, proto.uint(1, filters['time']) + proto.uint(2, filters['type']) + proto.uint(3, filters['duration']))
  37. result = proto.uint(1, sort) + filters_enc + autocorrect + proto.uint(9, offset) + proto.string(61, b'')
  38. return base64.urlsafe_b64encode(result).decode('ascii')
  39. def get_search_json(query, page, autocorrect, sort, filters):
  40. url = "https://www.youtube.com/results?search_query=" + urllib.parse.quote_plus(query)
  41. headers = {
  42. 'Host': 'www.youtube.com',
  43. 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64)',
  44. 'Accept': '*/*',
  45. 'Accept-Language': 'en-US,en;q=0.5',
  46. 'X-YouTube-Client-Name': '1',
  47. 'X-YouTube-Client-Version': '2.20180418',
  48. }
  49. url += "&pbj=1&sp=" + page_number_to_sp_parameter(page, autocorrect, sort, filters).replace("=", "%3D")
  50. content = util.fetch_url(url, headers=headers, report_text="Got search results", debug_name='search_results')
  51. info = json.loads(content)
  52. return info
  53. @yt_app.route('/results')
  54. @yt_app.route('/search')
  55. def get_search_page():
  56. query = request.args.get('search_query') or request.args.get('query')
  57. if query is None:
  58. return flask.render_template('home.html', title='Search')
  59. elif query.startswith('https://www.youtube.com') or query.startswith('https://www.youtu.be'):
  60. return flask.redirect(f'/{query}')
  61. page = request.args.get("page", "1")
  62. autocorrect = int(request.args.get("autocorrect", "1"))
  63. sort = int(request.args.get("sort", "0"))
  64. filters = {}
  65. filters['time'] = int(request.args.get("time", "0"))
  66. filters['type'] = int(request.args.get("type", "0"))
  67. filters['duration'] = int(request.args.get("duration", "0"))
  68. polymer_json = get_search_json(query, page, autocorrect, sort, filters)
  69. search_info = yt_data_extract.extract_search_info(polymer_json)
  70. if search_info['error']:
  71. return flask.render_template('error.html', error_message=search_info['error'])
  72. for extract_item_info in search_info['items']:
  73. util.prefix_urls(extract_item_info)
  74. util.add_extra_html_info(extract_item_info)
  75. corrections = search_info['corrections']
  76. if corrections['type'] == 'did_you_mean':
  77. corrected_query_string = request.args.to_dict(flat=False)
  78. corrected_query_string['search_query'] = [corrections['corrected_query']]
  79. corrections['corrected_query_url'] = util.URL_ORIGIN + '/results?' + urllib.parse.urlencode(corrected_query_string, doseq=True)
  80. elif corrections['type'] == 'showing_results_for':
  81. no_autocorrect_query_string = request.args.to_dict(flat=False)
  82. no_autocorrect_query_string['autocorrect'] = ['0']
  83. no_autocorrect_query_url = util.URL_ORIGIN + '/results?' + urllib.parse.urlencode(no_autocorrect_query_string, doseq=True)
  84. corrections['original_query_url'] = no_autocorrect_query_url
  85. return flask.render_template(
  86. 'search.html',
  87. header_playlist_names=local_playlist.get_playlist_names(),
  88. query=query,
  89. estimated_results=search_info['estimated_results'],
  90. estimated_pages=search_info['estimated_pages'],
  91. corrections=search_info['corrections'],
  92. results=search_info['items'],
  93. parameters_dictionary=request.args,
  94. )
  95. @yt_app.route('/opensearch.xml')
  96. def get_search_engine_xml():
  97. with open(os.path.join(settings.program_directory, 'youtube/opensearch.xml'), 'rb') as f:
  98. if settings.app_public:
  99. main_url = '%s' % settings.app_url
  100. else:
  101. main_url = '%s:%s' % (settings.app_url, settings.port_number)
  102. content = f.read().replace(
  103. b'$main_url', str(main_url).encode()
  104. )
  105. return flask.Response(content, mimetype='application/xml')