123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- #!/usr/bin/python
- from urllib import request, error, parse
- from json import loads, dumps
- base_url = 'https://m.youtube.com'
- log = 'utsearch.log'
- #Colors
- RED = '\033[31m'
- BLUE = '\033[34m'
- CYAN = '\033[36m'
- GREEN = '\33[32m'
- BLACK = '\33[30m'
- WHITE = '\33[37m'
- YELLOW = '\033[33m'
- END = '\033[0;0m'
- BOLD = '\033[1m'
- ITALIC = '\33[3m'
- UNDERLINE = '\033[4m'
-
- agent = ('Mozilla/5.0 (compatible; Googlebot/2.1;'
- '+ http://www.google.com/bot.mtml')
-
- user_agent = {'User-Agent': agent}
-
- class YoutubeSearch:
- def __init__(self, search_terms: str, max_results=None):
- self.search_terms = search_terms
- self.max_results = max_results
- self.videos = self.search()
-
- def search(self):
- encoded_search = parse.quote(self.search_terms)
- #Search Utube by results
- urla = f'{base_url}/results?search_query={encoded_search}'
- #Search utube by date
- urlb = (f'{base_url}/results?search_query={encoded_search}'
- '&search_sort=video_date_uploaded')
- #Search utube by views
- urlc = (f'{base_url}/results?search_query={encoded_search}'
- '&search_sort=video_view_count')
- #Select search type
- url = urlb
-
- page = request.urlopen(request.Request(
- url, data=None, headers=user_agent))
- response = page.read().decode()
- while "ytInitialData" not in response:
- response = page.read().decode()
- results = self.parse_html(response)
-
- if self.max_results is not None and len(results) > self.max_results:
- return results[: self.max_results]
- return results
-
- def parse_html(self, response):
- results = []
- start = (response.index("ytInitialData")
- + len("ytInitialData")
- + 3
- )
- end = response.index("};", start) + 1
- json_str = response[start:end]
- data = loads(json_str)
- videos = data["contents"]["twoColumnSearchResultsRenderer"][
- "primaryContents"]["sectionListRenderer"]["contents"][0][
- "itemSectionRenderer"]["contents"]
-
- #Get items from page, make a dictionary.
- for video in videos:
- res = {}
- if "videoRenderer" in video.keys():
- video_data = video.get("videoRenderer", {})
- res["Video Id:"] = video_data.get("videoId", None)
- res["Image:"] = [thumb.get(
- "url", None) for thumb in video_data.get(
- "thumbnail", {}).get("thumbnails", [{}]) ]
- res["Title:"] = video_data.get(
- "title", {}).get("runs", [[{}]])[0].get(
- "text", None)
- res["Description:"] = video_data.get(
- "descriptionSnippet", {}).get("runs", [{}])[0].get(
- "text", None)
- res["Channel:"] = video_data.get(
- "longBylineText", {}).get("runs", [[{}]])[0].get(
- "text", None)
- res["Duration:"] = video_data.get(
- "lengthText", {}).get("simpleText", 0)
- res["Views:"] = video_data.get(
- "viewCountText", {}).get("simpleText", 0)
- res["Url:"] = video_data.get(
- "navigationEndpoint", {}).get(
- "commandMetadata", {}).get(
- "webCommandMetadata", {}).get("url", None)
- results.append(res)
- return results
- def to_dict(self):
- return self.videos
- def to_json(self):
- return json.dumps({"videos": self.videos})
-
- if __name__ == '__main__':
-
- uts = input('Enter search terms for youtube :')
- #Set number of video results here.
- max_results = 100
- results = YoutubeSearch(uts, max_results).to_dict()
-
- #Print results to terminal and write to log
- with open(log, 'a') as f:
- print('%s\n'*5 % (CYAN, BOLD, 'Start Search', '='*70, END))
- f.write('%s\n'*3 % ('', 'Start Search', '='*70))
-
- for result in results:
- result['Image:'] = result['Image:'][0]
- #Make complete url
- result['Url:'] = (base_url + result['Url:'])
- print(YELLOW, '-'*70, END)
- f.write('%s\n' % ('-'*70))
-
- for key, val in result.items():
- #Colorize output
- print(GREEN, BOLD, key, END, val)
- f.write("%s %s\n" % (key,val))
|