my_main.py 2.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import time
  4. import os
  5. import csv
  6. import json
  7. # обработка готового browse_ajax.json
  8. # собрать скриптом с youtube не получилось
  9. # собирал вручную: json -> network -> XHR -> tab response
  10. def read_file(data):
  11. videos = []
  12. for i in data:
  13. items = i[1]['response']['continuationContents']["gridContinuation"]["items"]
  14. for j in items:
  15. # name = j["gridVideoRenderer"]["title"]["accessibility"]["accessibilityData"]["label"]
  16. name = j["gridVideoRenderer"]["title"]["runs"][0]["text"]
  17. streamed = j["gridVideoRenderer"]["publishedTimeText"]["simpleText"]
  18. views = j["gridVideoRenderer"]["viewCountText"]["simpleText"]
  19. url = j["gridVideoRenderer"]["videoId"]
  20. data = {
  21. "name": name,
  22. "streamed": streamed,
  23. "views": views,
  24. "url": "/watch?v="+url
  25. }
  26. write_csv(data)
  27. videos.append(data)
  28. print(len(videos)) # количество записей в массиве
  29. # write_file(videos) # запись массива в файл
  30. def write_csv(data):
  31. path_f = os.path.dirname(os.path.abspath(__file__))
  32. with open(
  33. os.path.join(path_f, "videos.csv"), "a", newline='', encoding='utf-8') as file_:
  34. order = [
  35. "name",
  36. "streamed",
  37. "views",
  38. "url"]
  39. writer_f = csv.DictWriter(file_, fieldnames=order)
  40. writer_f.writerow(data)
  41. def write_file(videos):
  42. path_f = os.path.dirname(os.path.abspath(__file__))
  43. with open(os.path.join(path_f, "videos.json"), "w", encoding='utf-8') as outfile:
  44. outfile.write(str(videos))
  45. def main():
  46. path_f = os.path.dirname(os.path.abspath(__file__))
  47. with open(os.path.join(path_f, "browse_ajax.json"), encoding='utf-8') as json_file:
  48. data = json.load(json_file)
  49. read_file(data)
  50. if __name__ == '__main__':
  51. start_time = time.time()
  52. main()
  53. print("--- %s seconds ---" % (time.time() - start_time))