utils.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. try:
  2. import user_config as config
  3. except ImportError:
  4. import config
  5. import aiohttp
  6. import asyncio
  7. import time
  8. import re
  9. import datetime
  10. import os
  11. import urllib.parse
  12. import ipaddress
  13. from urllib.parse import urlparse
  14. import requests
  15. import re
  16. from bs4 import BeautifulSoup
  17. from bs4 import NavigableString
  18. import fofa_map
  19. from collections import defaultdict
  20. from tqdm import tqdm
  21. from selenium.webdriver.common.by import By
  22. from selenium.webdriver.support.ui import WebDriverWait
  23. from selenium.webdriver.support import expected_conditions as EC
  24. def formatChannelName(name):
  25. """
  26. Format the channel name with sub and replace and lower
  27. """
  28. sub_pattern = (
  29. r"-|_|\((.*?)\)|\[(.*?)\]| |频道|标清|高清|HD|hd|超清|超高|超高清|中央|央视|台"
  30. )
  31. name = re.sub(sub_pattern, "", name)
  32. name = name.replace("plus", "+")
  33. name = name.replace("PLUS", "+")
  34. name = name.replace("+", "+")
  35. name = name.replace("CCTV1综合", "CCTV1")
  36. name = name.replace("CCTV2财经", "CCTV2")
  37. name = name.replace("CCTV3综艺", "CCTV3")
  38. name = name.replace("CCTV4国际", "CCTV4")
  39. name = name.replace("CCTV4中文国际", "CCTV4")
  40. name = name.replace("CCTV4欧洲", "CCTV4")
  41. name = name.replace("CCTV5体育", "CCTV5")
  42. name = name.replace("CCTV5+体育赛视", "CCTV5+")
  43. name = name.replace("CCTV5+体育赛事", "CCTV5+")
  44. name = name.replace("CCTV5+体育", "CCTV5+")
  45. name = name.replace("CCTV6电影", "CCTV6")
  46. name = name.replace("CCTV7军事", "CCTV7")
  47. name = name.replace("CCTV7军农", "CCTV7")
  48. name = name.replace("CCTV7农业", "CCTV7")
  49. name = name.replace("CCTV7国防军事", "CCTV7")
  50. name = name.replace("CCTV8电视剧", "CCTV8")
  51. name = name.replace("CCTV9记录", "CCTV9")
  52. name = name.replace("CCTV9纪录", "CCTV9")
  53. name = name.replace("CCTV10科教", "CCTV10")
  54. name = name.replace("CCTV11戏曲", "CCTV11")
  55. name = name.replace("CCTV12社会与法", "CCTV12")
  56. name = name.replace("CCTV13新闻", "CCTV13")
  57. name = name.replace("CCTV新闻", "CCTV13")
  58. name = name.replace("CCTV14少儿", "CCTV14")
  59. name = name.replace("CCTV15音乐", "CCTV15")
  60. name = name.replace("CCTV16奥林匹克", "CCTV16")
  61. name = name.replace("CCTV17农业农村", "CCTV17")
  62. name = name.replace("CCTV17农业", "CCTV17")
  63. return name.lower()
  64. def getChannelItems():
  65. """
  66. Get the channel items from the source file
  67. """
  68. # Open the source file and read all lines.
  69. user_source_file = (
  70. "user_" + config.source_file
  71. if os.path.exists("user_" + config.source_file)
  72. else getattr(config, "source_file", "demo.txt")
  73. )
  74. # Create a dictionary to store the channels.
  75. channels = defaultdict(lambda: defaultdict(list))
  76. current_category = ""
  77. pattern = r"^(.*?),(?!#genre#)(.*?)$"
  78. with open(user_source_file, "r", encoding="utf-8") as f:
  79. for line in f:
  80. line = line.strip()
  81. if "#genre#" in line:
  82. # This is a new channel, create a new key in the dictionary.
  83. current_category = line.split(",")[0]
  84. else:
  85. # This is a url, add it to the list of urls for the current channel.
  86. match = re.search(pattern, line)
  87. if match is not None:
  88. name = match.group(1).strip()
  89. url = match.group(2).strip()
  90. if url and url not in channels[current_category][name]:
  91. channels[current_category][name].append(url)
  92. return channels
  93. async def getChannelsBySubscribeUrls(channel_names):
  94. """
  95. Get the channels by subscribe urls
  96. """
  97. channels = {}
  98. pattern = r"^(.*?),(?!#genre#)(.*?)$"
  99. subscribe_urls_len = len(config.subscribe_urls)
  100. pbar = tqdm(total=subscribe_urls_len)
  101. for base_url in config.subscribe_urls:
  102. try:
  103. pbar.set_description(
  104. f"Processing subscribe {base_url}, {subscribe_urls_len - pbar.n} urls remaining"
  105. )
  106. try:
  107. response = requests.get(base_url, timeout=30)
  108. except requests.exceptions.Timeout:
  109. print(f"Timeout on {base_url}")
  110. continue
  111. content = response.text
  112. if content:
  113. lines = content.split("\n")
  114. for line in lines:
  115. if re.match(pattern, line) is not None:
  116. key = re.match(pattern, line).group(1)
  117. resolution_match = re.search(r"_(\((.*?)\))", key)
  118. resolution = (
  119. resolution_match.group(2)
  120. if resolution_match is not None
  121. else None
  122. )
  123. key = formatChannelName(key)
  124. url = re.match(pattern, line).group(2)
  125. value = (url, None, resolution)
  126. if key in channels:
  127. if value not in channels[key]:
  128. channels[key].append(value)
  129. else:
  130. channels[key] = [value]
  131. except Exception as e:
  132. print(f"Error on {base_url}: {e}")
  133. continue
  134. finally:
  135. pbar.update()
  136. print("Finished processing subscribe urls")
  137. pbar.close()
  138. return channels
  139. def getChannelsInfoListByOnlineSearch(driver, pageUrl, name):
  140. """
  141. Get the channels info list by online search
  142. """
  143. wait = WebDriverWait(driver, 10)
  144. driver.get(pageUrl)
  145. search_box = wait.until(
  146. EC.presence_of_element_located((By.XPATH, '//input[@type="text"]'))
  147. )
  148. search_box.clear()
  149. search_box.send_keys(name)
  150. submit_button = wait.until(
  151. EC.element_to_be_clickable((By.XPATH, '//input[@type="submit"]'))
  152. )
  153. driver.execute_script("arguments[0].click();", submit_button)
  154. isFavorite = name in config.favorite_list
  155. pageNum = config.favorite_page_num if isFavorite else config.default_page_num
  156. info_list = []
  157. for page in range(1, pageNum + 1):
  158. try:
  159. if page > 1:
  160. page_link = wait.until(
  161. EC.element_to_be_clickable(
  162. (
  163. By.XPATH,
  164. f'//a[contains(@href, "={page}") and contains(@href, "{name}")]',
  165. )
  166. )
  167. )
  168. driver.execute_script("arguments[0].click();", page_link)
  169. source = re.sub(
  170. r"<!--.*?-->",
  171. "",
  172. driver.page_source,
  173. flags=re.DOTALL,
  174. )
  175. soup = BeautifulSoup(source, "html.parser")
  176. if soup:
  177. results = getResultsFromSoup(soup, name)
  178. for result in results:
  179. url, date, resolution = result
  180. if url and checkUrlByPatterns(url):
  181. info_list.append((url, date, resolution))
  182. except Exception as e:
  183. # print(f"Error on page {page}: {e}")
  184. continue
  185. return info_list
  186. def updateChannelUrlsTxt(cate, channelUrls):
  187. """
  188. Update the category and channel urls to the final file
  189. """
  190. try:
  191. with open("result_new.txt", "a", encoding="utf-8") as f:
  192. f.write(cate + ",#genre#\n")
  193. for name, urls in channelUrls.items():
  194. for url in urls:
  195. if url is not None:
  196. f.write(name + "," + url + "\n")
  197. f.write("\n")
  198. finally:
  199. f.close
  200. def updateFile(final_file, old_file):
  201. """
  202. Update the file
  203. """
  204. if os.path.exists(old_file):
  205. os.replace(old_file, final_file)
  206. def getChannelUrl(element):
  207. """
  208. Get the url, date and resolution
  209. """
  210. url = None
  211. urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
  212. url_search = re.search(
  213. urlRegex,
  214. element.get_text(strip=True),
  215. )
  216. if url_search:
  217. url = url_search.group()
  218. return url
  219. def getChannelInfo(element):
  220. """
  221. Get the channel info
  222. """
  223. date, resolution = None, None
  224. info_text = element.get_text(strip=True)
  225. if info_text:
  226. date, resolution = (
  227. (info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
  228. (
  229. info_text.partition(" ")[2].partition("•")[2]
  230. if info_text.partition(" ")[2].partition("•")[2]
  231. else None
  232. ),
  233. )
  234. return date, resolution
  235. def getResultsFromSoup(soup, name):
  236. """
  237. Get the results from the soup
  238. """
  239. results = []
  240. for element in soup.descendants:
  241. if isinstance(element, NavigableString):
  242. url = getChannelUrl(element)
  243. if url and not any(item[0] == url for item in results):
  244. url_element = soup.find(lambda tag: tag.get_text(strip=True) == url)
  245. if url_element:
  246. name_element = url_element.find_previous_sibling()
  247. if name_element:
  248. channel_name = name_element.get_text(strip=True)
  249. if name == formatChannelName(channel_name):
  250. info_element = url_element.find_next_sibling()
  251. date, resolution = getChannelInfo(info_element)
  252. results.append((url, date, resolution))
  253. return results
  254. async def getSpeed(url, urlTimeout=5):
  255. """
  256. Get the speed of the url
  257. """
  258. async with aiohttp.ClientSession() as session:
  259. start = time.time()
  260. try:
  261. async with session.get(url, timeout=urlTimeout) as response:
  262. resStatus = response.status
  263. except:
  264. return float("inf")
  265. end = time.time()
  266. if resStatus == 200:
  267. return int(round((end - start) * 1000))
  268. else:
  269. return float("inf")
  270. async def sortUrlsBySpeedAndResolution(infoList):
  271. """
  272. Sort by speed and resolution
  273. """
  274. response_times = await asyncio.gather(*(getSpeed(url) for url, _, _ in infoList))
  275. valid_responses = [
  276. (info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
  277. ]
  278. def extract_resolution(resolution_str):
  279. numbers = re.findall(r"\d+x\d+", resolution_str)
  280. if numbers:
  281. width, height = map(int, numbers[0].split("x"))
  282. return width * height
  283. else:
  284. return 0
  285. default_response_time_weight = 0.5
  286. default_resolution_weight = 0.5
  287. response_time_weight = getattr(
  288. config, "response_time_weight", default_response_time_weight
  289. )
  290. resolution_weight = getattr(config, "resolution_weight", default_resolution_weight)
  291. # Check if weights are valid
  292. if not (
  293. 0 <= response_time_weight <= 1
  294. and 0 <= resolution_weight <= 1
  295. and response_time_weight + resolution_weight == 1
  296. ):
  297. response_time_weight = default_response_time_weight
  298. resolution_weight = default_resolution_weight
  299. def combined_key(item):
  300. (_, _, resolution), response_time = item
  301. resolution_value = extract_resolution(resolution) if resolution else 0
  302. return (
  303. -(response_time_weight * response_time)
  304. + resolution_weight * resolution_value
  305. )
  306. sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
  307. return sorted_res
  308. def filterByDate(data):
  309. """
  310. Filter by date and limit
  311. """
  312. default_recent_days = 30
  313. use_recent_days = getattr(config, "recent_days", 30)
  314. if not isinstance(use_recent_days, int) or use_recent_days <= 0:
  315. use_recent_days = default_recent_days
  316. start_date = datetime.datetime.now() - datetime.timedelta(days=use_recent_days)
  317. recent_data = []
  318. unrecent_data = []
  319. for (url, date, resolution), response_time in data:
  320. item = ((url, date, resolution), response_time)
  321. if date:
  322. date = datetime.datetime.strptime(date, "%m-%d-%Y")
  323. if date >= start_date:
  324. recent_data.append(item)
  325. else:
  326. unrecent_data.append(item)
  327. else:
  328. unrecent_data.append(item)
  329. recent_data_len = len(recent_data)
  330. if recent_data_len == 0:
  331. recent_data = unrecent_data
  332. elif recent_data_len < config.urls_limit:
  333. recent_data.extend(unrecent_data[: config.urls_limit - len(recent_data)])
  334. return recent_data
  335. def getTotalUrlsFromInfoList(infoList):
  336. """
  337. Get the total urls from info list
  338. """
  339. total_urls = [url for url, _, _ in infoList]
  340. return list(dict.fromkeys(total_urls))[: config.urls_limit]
  341. def getTotalUrlsFromSortedData(data):
  342. """
  343. Get the total urls with filter by date and depulicate from sorted data
  344. """
  345. total_urls = []
  346. if len(data) > config.urls_limit:
  347. total_urls = [url for (url, _, _), _ in filterByDate(data)]
  348. else:
  349. total_urls = [url for (url, _, _), _ in data]
  350. return list(dict.fromkeys(total_urls))[: config.urls_limit]
  351. def is_ipv6(url):
  352. """
  353. Check if the url is ipv6
  354. """
  355. try:
  356. host = urllib.parse.urlparse(url).hostname
  357. ipaddress.IPv6Address(host)
  358. return True
  359. except ValueError:
  360. return False
  361. def checkUrlIPVType(url):
  362. """
  363. Check if the url is compatible with the ipv type in the config
  364. """
  365. ipv_type = getattr(config, "ipv_type", "ipv4")
  366. if ipv_type == "ipv4":
  367. return not is_ipv6(url)
  368. elif ipv_type == "ipv6":
  369. return is_ipv6(url)
  370. else:
  371. return True
  372. def checkByDomainBlacklist(url):
  373. """
  374. Check by domain blacklist
  375. """
  376. domain_blacklist = [
  377. urlparse(domain).netloc if urlparse(domain).scheme else domain
  378. for domain in getattr(config, "domain_blacklist", [])
  379. ]
  380. return urlparse(url).netloc not in domain_blacklist
  381. def checkByURLKeywordsBlacklist(url):
  382. """
  383. Check by URL blacklist keywords
  384. """
  385. url_keywords_blacklist = getattr(config, "url_keywords_blacklist", [])
  386. return not any(keyword in url for keyword in url_keywords_blacklist)
  387. def checkUrlByPatterns(url):
  388. """
  389. Check the url by patterns
  390. """
  391. return (
  392. checkUrlIPVType(url)
  393. and checkByDomainBlacklist(url)
  394. and checkByURLKeywordsBlacklist(url)
  395. )
  396. def filterUrlsByPatterns(urls):
  397. """
  398. Filter urls by patterns
  399. """
  400. urls = [url for url in urls if checkUrlIPVType(url)]
  401. urls = [url for url in urls if checkByDomainBlacklist(url)]
  402. urls = [url for url in urls if checkByURLKeywordsBlacklist(url)]
  403. return urls
  404. async def useAccessibleUrl():
  405. """
  406. Check if the url is accessible
  407. """
  408. baseUrl1 = "https://www.foodieguide.com/iptvsearch/"
  409. baseUrl2 = "http://tonkiang.us/"
  410. speed1 = await getSpeed(baseUrl1, 30)
  411. speed2 = await getSpeed(baseUrl2, 30)
  412. if speed1 == float("inf") and speed2 == float("inf"):
  413. return None
  414. if speed1 < speed2:
  415. return baseUrl1
  416. else:
  417. return baseUrl2
  418. def getFOFAUrlsFromRegionList():
  419. """
  420. Get the FOFA url from region
  421. """
  422. region_list = getattr(config, "region_list", [])
  423. urls = []
  424. region_url = getattr(fofa_map, "region_url")
  425. if "all" in region_list:
  426. urls = [url for url in region_url.values() if url]
  427. else:
  428. for region in region_list:
  429. if region in region_url:
  430. urls.append(region_url[region])
  431. return urls
  432. def getChannelsByFOFA(source):
  433. """
  434. Get the channel by FOFA
  435. """
  436. urls = set(re.findall(r"https?://[\w\.-]+:\d+", source))
  437. channels = {}
  438. urls_len = len(urls)
  439. pbar = tqdm(total=urls_len)
  440. for url in urls:
  441. try:
  442. pbar.set_description(
  443. f"Processing multicast {url}, {urls_len - pbar.n} urls remaining"
  444. )
  445. response = requests.get(url + "/iptv/live/1000.json?key=txiptv", timeout=2)
  446. try:
  447. json_data = response.json()
  448. if json_data["code"] == 0:
  449. try:
  450. for item in json_data["data"]:
  451. if isinstance(item, dict):
  452. item_name = formatChannelName(item.get("name"))
  453. item_url = item.get("url").strip()
  454. if item_name and item_url:
  455. total_url = url + item_url
  456. if item_name not in channels:
  457. channels[item_name] = [total_url]
  458. else:
  459. channels[item_name].append(total_url)
  460. except Exception as e:
  461. # print(f"Error on fofa: {e}")
  462. continue
  463. except Exception as e:
  464. # print(f"{url}: {e}")
  465. continue
  466. except Exception as e:
  467. # print(f"{url}: {e}")
  468. continue
  469. finally:
  470. pbar.update()
  471. pbar.close()
  472. return channels
  473. def mergeObjects(*objects):
  474. """
  475. Merge objects
  476. """
  477. merged_dict = {}
  478. for obj in objects:
  479. if not isinstance(obj, dict):
  480. raise TypeError("All input objects must be dictionaries")
  481. for key, value in obj.items():
  482. if key not in merged_dict:
  483. merged_dict[key] = set()
  484. if isinstance(value, set):
  485. merged_dict[key].update(value)
  486. elif isinstance(value, list):
  487. for item in value:
  488. merged_dict[key].add(item)
  489. else:
  490. merged_dict[key].add(value)
  491. for key, value in merged_dict.items():
  492. merged_dict[key] = list(value)
  493. return merged_dict