123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- from bs4 import BeautifulSoup
- import requests
- import time
- import os
- import csv
- import json
- # обработка готового browse_ajax.json
- # собрать скриптом с youtube не получилось
- # собирал вручную: json -> network -> XHR -> tab response
- def read_file(data):
- videos = []
- for i in data:
- items = i[1]['response']['continuationContents']["gridContinuation"]["items"]
- for j in items:
- # name = j["gridVideoRenderer"]["title"]["accessibility"]["accessibilityData"]["label"]
- name = j["gridVideoRenderer"]["title"]["runs"][0]["text"]
- streamed = j["gridVideoRenderer"]["publishedTimeText"]["simpleText"]
- views = j["gridVideoRenderer"]["viewCountText"]["simpleText"]
- url = j["gridVideoRenderer"]["videoId"]
- data = {
- "name": name,
- "streamed": streamed,
- "views": views,
- "url": "/watch?v="+url
- }
- write_csv(data)
- videos.append(data)
- print(len(videos)) # количество записей в массиве
- # write_file(videos) # запись массива в файл
- def write_csv(data):
- path_f = os.path.dirname(os.path.abspath(__file__))
- with open(
- os.path.join(path_f, "videos.csv"), "a", newline='', encoding='utf-8') as file_:
- order = [
- "name",
- "streamed",
- "views",
- "url"]
- writer_f = csv.DictWriter(file_, fieldnames=order)
- writer_f.writerow(data)
- def write_file(videos):
- path_f = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(path_f, "videos.json"), "w", encoding='utf-8') as outfile:
- outfile.write(str(videos))
- def main():
- path_f = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(path_f, "browse_ajax.json"), encoding='utf-8') as json_file:
- data = json.load(json_file)
- read_file(data)
- if __name__ == '__main__':
- start_time = time.time()
- main()
- print("--- %s seconds ---" % (time.time() - start_time))
|