request.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231
  1. from asyncio import create_task, gather
  2. from utils.speed import get_speed
  3. from utils.channel import (
  4. format_channel_name,
  5. get_results_from_soup,
  6. get_results_from_soup_requests,
  7. )
  8. from utils.tools import check_url_by_patterns, get_pbar_remaining, get_soup
  9. from utils.config import get_config
  10. from proxy import get_proxy, get_proxy_next
  11. from time import time, sleep
  12. from driver.setup import setup_driver
  13. from utils.retry import (
  14. retry_func,
  15. locate_element_with_retry,
  16. find_clickable_element_with_retry,
  17. )
  18. from selenium.webdriver.common.by import By
  19. from tqdm.asyncio import tqdm_asyncio
  20. from concurrent.futures import ThreadPoolExecutor
  21. from requests_custom.utils import get_soup_requests, close_session
  22. config = get_config()
  23. async def use_accessible_url(callback):
  24. """
  25. Check if the url is accessible
  26. """
  27. callback(f"正在获取最优的在线检索节点", 0)
  28. baseUrl1 = "https://www.foodieguide.com/iptvsearch/"
  29. baseUrl2 = "http://tonkiang.us/"
  30. task1 = create_task(get_speed(baseUrl1, timeout=30))
  31. task2 = create_task(get_speed(baseUrl2, timeout=30))
  32. task_results = await gather(task1, task2)
  33. callback(f"获取在线检索节点完成", 100)
  34. if task_results[0] == float("inf") and task_results[1] == float("inf"):
  35. return None
  36. if task_results[0] < task_results[1]:
  37. return baseUrl1
  38. else:
  39. return baseUrl2
  40. def search_submit(driver, name):
  41. """
  42. Input key word and submit with driver
  43. """
  44. search_box = locate_element_with_retry(driver, (By.XPATH, '//input[@type="text"]'))
  45. if not search_box:
  46. return
  47. search_box.clear()
  48. search_box.send_keys(name)
  49. submit_button = find_clickable_element_with_retry(
  50. driver, (By.XPATH, '//input[@type="submit"]')
  51. )
  52. if not submit_button:
  53. return
  54. sleep(1)
  55. driver.execute_script("arguments[0].click();", submit_button)
  56. async def get_channels_by_online_search(names, callback):
  57. """
  58. Get the channels by online search
  59. """
  60. channels = {}
  61. # pageUrl = await use_accessible_url(callback)
  62. pageUrl = "http://tonkiang.us/"
  63. if not pageUrl:
  64. return channels
  65. proxy = None
  66. if config.open_proxy:
  67. proxy = await get_proxy(pageUrl, best=True, with_test=True)
  68. start_time = time()
  69. def process_channel_by_online_search(name):
  70. info_list = []
  71. nonlocal proxy
  72. try:
  73. if config.open_driver:
  74. driver = setup_driver(proxy)
  75. try:
  76. retry_func(
  77. lambda: driver.get(pageUrl), name=f"online search:{name}"
  78. )
  79. except Exception as e:
  80. if config.open_proxy:
  81. proxy = get_proxy_next()
  82. driver.close()
  83. driver.quit()
  84. driver = setup_driver(proxy)
  85. driver.get(pageUrl)
  86. search_submit(driver, name)
  87. else:
  88. page_soup = None
  89. request_url = f"{pageUrl}?channel={name}"
  90. try:
  91. page_soup = retry_func(
  92. lambda: get_soup_requests(request_url, proxy=proxy),
  93. name=f"online search:{name}",
  94. )
  95. except Exception as e:
  96. if config.open_proxy:
  97. proxy = get_proxy_next()
  98. page_soup = get_soup_requests(request_url, proxy=proxy)
  99. if not page_soup:
  100. print(f"{name}:Request fail.")
  101. return
  102. isFavorite = name in config.favorite_list
  103. pageNum = (
  104. config.favorite_page_num if isFavorite else config.default_page_num
  105. )
  106. retry_limit = 3
  107. for page in range(1, pageNum + 1):
  108. retries = 0
  109. if not config.open_driver and page == 1:
  110. retries = 2
  111. while retries < retry_limit:
  112. try:
  113. if page > 1:
  114. if config.open_driver:
  115. page_link = find_clickable_element_with_retry(
  116. driver,
  117. (
  118. By.XPATH,
  119. f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
  120. ),
  121. )
  122. if not page_link:
  123. break
  124. sleep(1)
  125. driver.execute_script(
  126. "arguments[0].click();", page_link
  127. )
  128. else:
  129. request_url = f"{pageUrl}?channel={name}&page={page}"
  130. page_soup = retry_func(
  131. lambda: get_soup_requests(request_url, proxy=proxy),
  132. name=f"online search:{name}, page:{page}",
  133. )
  134. sleep(1)
  135. soup = (
  136. get_soup(driver.page_source)
  137. if config.open_driver
  138. else page_soup
  139. )
  140. if soup:
  141. results = (
  142. get_results_from_soup(soup, name)
  143. if config.open_driver
  144. else get_results_from_soup_requests(soup, name)
  145. )
  146. print(name, "page:", page, "results num:", len(results))
  147. if len(results) == 0:
  148. print(
  149. f"{name}:No results found, refreshing page and retrying..."
  150. )
  151. if config.open_driver:
  152. driver.refresh()
  153. retries += 1
  154. continue
  155. elif len(results) <= 3:
  156. if config.open_driver:
  157. next_page_link = find_clickable_element_with_retry(
  158. driver,
  159. (
  160. By.XPATH,
  161. f'//a[contains(@href, "={page+1}") and contains(@href, "{name}")]',
  162. ),
  163. retries=1,
  164. )
  165. if next_page_link:
  166. if config.open_proxy:
  167. proxy = get_proxy_next()
  168. driver.close()
  169. driver.quit()
  170. driver = setup_driver(proxy)
  171. search_submit(driver, name)
  172. retries += 1
  173. continue
  174. for result in results:
  175. url, date, resolution = result
  176. if url and check_url_by_patterns(url):
  177. info_list.append((url, date, resolution))
  178. break
  179. else:
  180. print(
  181. f"{name}:No results found, refreshing page and retrying..."
  182. )
  183. if config.open_driver:
  184. driver.refresh()
  185. retries += 1
  186. continue
  187. except Exception as e:
  188. print(f"{name}:Error on page {page}: {e}")
  189. break
  190. if retries == retry_limit:
  191. print(f"{name}:Reached retry limit, moving to next page")
  192. except Exception as e:
  193. print(f"{name}:Error on search: {e}")
  194. pass
  195. finally:
  196. if config.open_driver:
  197. driver.close()
  198. driver.quit()
  199. pbar.update()
  200. callback(
  201. f"正在线上查询更新, 剩余{names_len - pbar.n}个频道待查询, 预计剩余时间: {get_pbar_remaining(pbar, start_time)}",
  202. int((pbar.n / names_len) * 100),
  203. )
  204. return {"name": format_channel_name(name), "data": info_list}
  205. names_len = len(names)
  206. pbar = tqdm_asyncio(total=names_len, desc="Online search")
  207. callback(f"正在线上查询更新, 共{names_len}个频道", 0)
  208. with ThreadPoolExecutor(max_workers=3) as executor:
  209. futures = [
  210. executor.submit(process_channel_by_online_search, name) for name in names
  211. ]
  212. for future in futures:
  213. result = future.result()
  214. name = result.get("name")
  215. data = result.get("data", [])
  216. if name:
  217. channels[name] = data
  218. if not config.open_driver:
  219. close_session()
  220. pbar.close()
  221. return channels