1.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import random
  4. import sqlite3
  5. db = sqlite3.connect("Triangle_Kino.db")
  6. cur = db.cursor()
  7. cur.execute("""CREATE TABLE IF NOT EXISTS Triangle_Kino (
  8. ID INTEGER PRIMARY KEY,
  9. RESURS TEXT,
  10. NAME TEXT,
  11. GOD TEXT,
  12. OPISANIE TEXT,
  13. LINK_STR TEXT
  14. )""")
  15. db.commit()
  16. resursZERO = "SITE_Name"
  17. user_agent_list = [
  18. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.1.1 Safari/605.1.15',
  19. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:77.0) Gecko/20100101 Firefox/77.0',
  20. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
  21. 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:77.0) Gecko/20100101 Firefox/77.0',
  22. 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.97 Safari/537.36',
  23. ]
  24. user_agent = random.choice(user_agent_list)
  25. headers = {'User-Agent': user_agent}
  26. x = 1
  27. url = "cсылка на сайт"
  28. while True:
  29. i = 0
  30. page = BeautifulSoup(requests.get(url,headers=headers).text, "lxml")
  31. name_list = []
  32. god_list = []
  33. link1_list = []
  34. opisanie_list = []
  35. for name in page.find_all("div", class_="th-title"):
  36. name_text = name.text
  37. print(name_text)
  38. name_list.append(name_text)
  39. for god in page.find_all("div", class_="th-year"):
  40. god_text = god.text
  41. print(god_text)
  42. god_list.append(god_text)
  43. for film in page.find_all("div", class_="th-item"):
  44. Link_STR = film.find("a", class_="th-in with-mask").get('href')
  45. print(Link_STR)
  46. link1_list.append(Link_STR)
  47. for link0 in link1_list:
  48. page2 = BeautifulSoup(requests.get(link0,headers=headers).text, "lxml")
  49. opisanie = page2.find("div", class_="fdesc clearfix slice-this").text
  50. print(len(god_list))
  51. b_split_list = opisanie.split(" ")
  52. b1 = b_split_list[-1]
  53. print(b1)
  54. opisanie_list.append(b1)
  55. while i < len(opisanie_list):
  56. name1 = name_list[i]
  57. god1 = god_list[i]
  58. opisanie1 = opisanie_list[i]
  59. link2 = link1_list[i]
  60. cur.execute("""INSERT INTO Triangle_Kino (RESURS, NAME, GOD, OPISANIE, LINK_STR) VALUES (?, ?, ?, ?, ?);""", (resursZERO, name1, god1, opisanie1, link2))
  61. db.commit()
  62. print("Добавлено " + str(i))
  63. i = i + 1
  64. x = x + 1
  65. url = "cсылка на сайт" + str(x) + "/"
  66. if x == 1783:
  67. break