youtube-search.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. #!/usr/bin/python
  2. from urllib import request, error, parse
  3. from json import loads, dumps
  4. base_url = 'https://m.youtube.com'
  5. log = 'utsearch.log'
  6. #Colors
  7. RED = '\033[31m'
  8. BLUE = '\033[34m'
  9. CYAN = '\033[36m'
  10. GREEN = '\33[32m'
  11. BLACK = '\33[30m'
  12. WHITE = '\33[37m'
  13. YELLOW = '\033[33m'
  14. END = '\033[0;0m'
  15. BOLD = '\033[1m'
  16. ITALIC = '\33[3m'
  17. UNDERLINE = '\033[4m'
  18. agent = ('Mozilla/5.0 (compatible; Googlebot/2.1;'
  19. '+ http://www.google.com/bot.mtml')
  20. user_agent = {'User-Agent': agent}
  21. class YoutubeSearch:
  22. def __init__(self, search_terms: str, max_results=None):
  23. self.search_terms = search_terms
  24. self.max_results = max_results
  25. self.videos = self.search()
  26. def search(self):
  27. encoded_search = parse.quote(self.search_terms)
  28. #Search Utube by results
  29. urla = f'{base_url}/results?search_query={encoded_search}'
  30. #Search utube by date
  31. urlb = (f'{base_url}/results?search_query={encoded_search}'
  32. '&search_sort=video_date_uploaded')
  33. #Search utube by views
  34. urlc = (f'{base_url}/results?search_query={encoded_search}'
  35. '&search_sort=video_view_count')
  36. #Select search type
  37. url = urlb
  38. page = request.urlopen(request.Request(
  39. url, data=None, headers=user_agent))
  40. response = page.read().decode()
  41. while "ytInitialData" not in response:
  42. response = page.read().decode()
  43. results = self.parse_html(response)
  44. if self.max_results is not None and len(results) > self.max_results:
  45. return results[: self.max_results]
  46. return results
  47. def parse_html(self, response):
  48. results = []
  49. start = (response.index("ytInitialData")
  50. + len("ytInitialData")
  51. + 3
  52. )
  53. end = response.index("};", start) + 1
  54. json_str = response[start:end]
  55. data = loads(json_str)
  56. videos = data["contents"]["twoColumnSearchResultsRenderer"][
  57. "primaryContents"]["sectionListRenderer"]["contents"][0][
  58. "itemSectionRenderer"]["contents"]
  59. #Get items from page, make a dictionary.
  60. for video in videos:
  61. res = {}
  62. if "videoRenderer" in video.keys():
  63. video_data = video.get("videoRenderer", {})
  64. res["Video Id:"] = video_data.get("videoId", None)
  65. res["Image:"] = [thumb.get(
  66. "url", None) for thumb in video_data.get(
  67. "thumbnail", {}).get("thumbnails", [{}]) ]
  68. res["Title:"] = video_data.get(
  69. "title", {}).get("runs", [[{}]])[0].get(
  70. "text", None)
  71. res["Description:"] = video_data.get(
  72. "descriptionSnippet", {}).get("runs", [{}])[0].get(
  73. "text", None)
  74. res["Channel:"] = video_data.get(
  75. "longBylineText", {}).get("runs", [[{}]])[0].get(
  76. "text", None)
  77. res["Duration:"] = video_data.get(
  78. "lengthText", {}).get("simpleText", 0)
  79. res["Views:"] = video_data.get(
  80. "viewCountText", {}).get("simpleText", 0)
  81. res["Url:"] = video_data.get(
  82. "navigationEndpoint", {}).get(
  83. "commandMetadata", {}).get(
  84. "webCommandMetadata", {}).get("url", None)
  85. results.append(res)
  86. return results
  87. def to_dict(self):
  88. return self.videos
  89. def to_json(self):
  90. return json.dumps({"videos": self.videos})
  91. if __name__ == '__main__':
  92. uts = input('Enter search terms for youtube :')
  93. #Set number of video results here.
  94. max_results = 100
  95. results = YoutubeSearch(uts, max_results).to_dict()
  96. #Print results to terminal and write to log
  97. with open(log, 'a') as f:
  98. print('%s\n'*5 % (CYAN, BOLD, 'Start Search', '='*70, END))
  99. f.write('%s\n'*3 % ('', 'Start Search', '='*70))
  100. for result in results:
  101. result['Image:'] = result['Image:'][0]
  102. #Make complete url
  103. result['Url:'] = (base_url + result['Url:'])
  104. print(YELLOW, '-'*70, END)
  105. f.write('%s\n' % ('-'*70))
  106. for key, val in result.items():
  107. #Colorize output
  108. print(GREEN, BOLD, key, END, val)
  109. f.write("%s %s\n" % (key,val))