utils.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465
  1. try:
  2. import user_config as config
  3. except ImportError:
  4. import config
  5. import aiohttp
  6. import asyncio
  7. import time
  8. import re
  9. import datetime
  10. import os
  11. import urllib.parse
  12. import ipaddress
  13. from urllib.parse import urlparse
  14. import requests
  15. import re
  16. from bs4 import NavigableString
  17. import fofa_map
  18. from collections import defaultdict
  19. def getChannelItems():
  20. """
  21. Get the channel items from the source file
  22. """
  23. # Open the source file and read all lines.
  24. user_source_file = (
  25. "user_" + config.source_file
  26. if os.path.exists("user_" + config.source_file)
  27. else getattr(config, "source_file", "demo.txt")
  28. )
  29. # Create a dictionary to store the channels.
  30. channels = defaultdict(lambda: defaultdict(list))
  31. current_category = ""
  32. pattern = r"^(.*?),(?!#genre#)(.*?)$"
  33. with open(user_source_file, "r", encoding="utf-8") as f:
  34. for line in f:
  35. line = line.strip()
  36. if "#genre#" in line:
  37. # This is a new channel, create a new key in the dictionary.
  38. current_category = line.split(",")[0]
  39. else:
  40. # This is a url, add it to the list of urls for the current channel.
  41. match = re.search(pattern, line)
  42. if match is not None:
  43. name = match.group(1).strip()
  44. url = match.group(2).strip()
  45. if url and url not in channels[current_category][name]:
  46. channels[current_category][name].append(url)
  47. return channels
  48. async def getChannelsByExtendBaseUrls(channel_names):
  49. """
  50. Get the channels by extending the base urls
  51. """
  52. channels = {}
  53. pattern = r"^(.*?),(?!#genre#)(.*?)$"
  54. sub_pattern = r"_\((.*?)\)|_\[(.*?)\]|频道"
  55. for base_url in config.subscribe_urls:
  56. try:
  57. print(f"Processing extend base url: {base_url}")
  58. try:
  59. response = requests.get(base_url, timeout=30)
  60. except requests.exceptions.Timeout:
  61. print(f"Timeout on {base_url}")
  62. continue
  63. content = response.text
  64. if content:
  65. lines = content.split("\n")
  66. link_dict = {}
  67. for line in lines:
  68. if re.match(pattern, line) is not None:
  69. key = re.match(pattern, line).group(1)
  70. resolution_match = re.search(r"_(\((.*?)\))", key)
  71. resolution = (
  72. resolution_match.group(2)
  73. if resolution_match is not None
  74. else None
  75. )
  76. key = re.sub(sub_pattern, "", key).lower()
  77. url = re.match(pattern, line).group(2)
  78. value = (url, None, resolution)
  79. if key in link_dict:
  80. if value not in link_dict[key]:
  81. link_dict[key].append(value)
  82. else:
  83. link_dict[key] = [value]
  84. found_channels = []
  85. for channel_name in channel_names:
  86. sub_channel_name = (
  87. channel_name.lower()
  88. if config.strict_match
  89. else re.sub(sub_pattern, "", channel_name).lower()
  90. )
  91. values = link_dict.get(sub_channel_name)
  92. if values:
  93. if channel_name in channels:
  94. channels[channel_name] += values
  95. else:
  96. channels[channel_name] = values
  97. found_channels.append(channel_name)
  98. if found_channels:
  99. print(f"{base_url} found channels: {','.join(found_channels)}")
  100. except Exception as e:
  101. print(f"Error on {base_url}: {e}")
  102. continue
  103. print("Finished processing extend base urls")
  104. return channels
  105. def updateChannelUrlsTxt(cate, channelUrls):
  106. """
  107. Update the category and channel urls to the final file
  108. """
  109. try:
  110. with open("result_new.txt", "a", encoding="utf-8") as f:
  111. f.write(cate + ",#genre#\n")
  112. for name, urls in channelUrls.items():
  113. for url in urls:
  114. if url is not None:
  115. f.write(name + "," + url + "\n")
  116. f.write("\n")
  117. finally:
  118. f.close
  119. def updateFile(final_file, old_file):
  120. """
  121. Update the file
  122. """
  123. if os.path.exists(old_file):
  124. os.replace(old_file, final_file)
  125. def getChannelUrl(element):
  126. """
  127. Get the url, date and resolution
  128. """
  129. url = None
  130. urlRegex = r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
  131. url_search = re.search(
  132. urlRegex,
  133. element.get_text(strip=True),
  134. )
  135. if url_search:
  136. url = url_search.group()
  137. return url
  138. def getChannelInfo(element):
  139. """
  140. Get the channel info
  141. """
  142. date, resolution = None, None
  143. info_text = element.get_text(strip=True)
  144. if info_text:
  145. date, resolution = (
  146. (info_text.partition(" ")[0] if info_text.partition(" ")[0] else None),
  147. (
  148. info_text.partition(" ")[2].partition("•")[2]
  149. if info_text.partition(" ")[2].partition("•")[2]
  150. else None
  151. ),
  152. )
  153. return date, resolution
  154. def checkNameMatch(name, result_name):
  155. pattern = r"[a-zA-Z]+[_\-+]|cctv"
  156. if re.search(
  157. pattern,
  158. result_name,
  159. re.IGNORECASE,
  160. ):
  161. return name.lower() == result_name.lower()
  162. else:
  163. return True
  164. def getResultsFromSoup(soup, name):
  165. """
  166. Get the results from the soup
  167. """
  168. results = []
  169. for element in soup.descendants:
  170. if isinstance(element, NavigableString):
  171. url = getChannelUrl(element)
  172. if url and not any(item[0] == url for item in results):
  173. url_element = soup.find(lambda tag: tag.get_text(strip=True) == url)
  174. if url_element:
  175. name_element = url_element.find_previous_sibling()
  176. if name_element:
  177. channel_name = name_element.get_text(strip=True)
  178. if checkNameMatch(name, channel_name):
  179. info_element = url_element.find_next_sibling()
  180. date, resolution = getChannelInfo(info_element)
  181. results.append((url, date, resolution))
  182. return results
  183. async def getSpeed(url, urlTimeout=5):
  184. """
  185. Get the speed of the url
  186. """
  187. async with aiohttp.ClientSession() as session:
  188. start = time.time()
  189. try:
  190. async with session.get(url, timeout=urlTimeout) as response:
  191. resStatus = response.status
  192. except:
  193. return float("inf")
  194. end = time.time()
  195. if resStatus == 200:
  196. return int(round((end - start) * 1000))
  197. else:
  198. return float("inf")
  199. async def sortUrlsBySpeedAndResolution(infoList):
  200. """
  201. Sort by speed and resolution
  202. """
  203. response_times = await asyncio.gather(*(getSpeed(url) for url, _, _ in infoList))
  204. valid_responses = [
  205. (info, rt) for info, rt in zip(infoList, response_times) if rt != float("inf")
  206. ]
  207. def extract_resolution(resolution_str):
  208. numbers = re.findall(r"\d+x\d+", resolution_str)
  209. if numbers:
  210. width, height = map(int, numbers[0].split("x"))
  211. return width * height
  212. else:
  213. return 0
  214. default_response_time_weight = 0.5
  215. default_resolution_weight = 0.5
  216. response_time_weight = getattr(
  217. config, "response_time_weight", default_response_time_weight
  218. )
  219. resolution_weight = getattr(config, "resolution_weight", default_resolution_weight)
  220. # Check if weights are valid
  221. if not (
  222. 0 <= response_time_weight <= 1
  223. and 0 <= resolution_weight <= 1
  224. and response_time_weight + resolution_weight == 1
  225. ):
  226. response_time_weight = default_response_time_weight
  227. resolution_weight = default_resolution_weight
  228. def combined_key(item):
  229. (_, _, resolution), response_time = item
  230. resolution_value = extract_resolution(resolution) if resolution else 0
  231. return (
  232. -(response_time_weight * response_time)
  233. + resolution_weight * resolution_value
  234. )
  235. sorted_res = sorted(valid_responses, key=combined_key, reverse=True)
  236. return sorted_res
  237. def filterByDate(data):
  238. """
  239. Filter by date and limit
  240. """
  241. default_recent_days = 60
  242. use_recent_days = getattr(config, "recent_days", 60)
  243. if (
  244. not isinstance(use_recent_days, int)
  245. or use_recent_days <= 0
  246. or use_recent_days > 365
  247. ):
  248. use_recent_days = default_recent_days
  249. start_date = datetime.datetime.now() - datetime.timedelta(days=use_recent_days)
  250. recent_data = []
  251. unrecent_data = []
  252. for (url, date, resolution), response_time in data:
  253. if date:
  254. date = datetime.datetime.strptime(date, "%m-%d-%Y")
  255. if date >= start_date:
  256. recent_data.append(((url, date, resolution), response_time))
  257. else:
  258. unrecent_data.append(((url, date, resolution), response_time))
  259. if len(recent_data) < config.urls_limit:
  260. recent_data.extend(unrecent_data[: config.urls_limit - len(recent_data)])
  261. return recent_data[: config.urls_limit]
  262. def getTotalUrls(data):
  263. """
  264. Get the total urls with filter by date and depulicate
  265. """
  266. total_urls = []
  267. if len(data) > config.urls_limit:
  268. total_urls = [url for (url, _, _), _ in filterByDate(data)]
  269. else:
  270. total_urls = [url for (url, _, _), _ in data]
  271. return list(dict.fromkeys(total_urls))
  272. def getTotalUrlsFromInfoList(infoList):
  273. """
  274. Get the total urls from info list
  275. """
  276. total_urls = [
  277. url for url, _, _ in infoList[: min(len(infoList), config.urls_limit)]
  278. ]
  279. return list(dict.fromkeys(total_urls))
  280. def is_ipv6(url):
  281. """
  282. Check if the url is ipv6
  283. """
  284. try:
  285. host = urllib.parse.urlparse(url).hostname
  286. ipaddress.IPv6Address(host)
  287. return True
  288. except ValueError:
  289. return False
  290. def checkUrlIPVType(url):
  291. """
  292. Check if the url is compatible with the ipv type in the config
  293. """
  294. ipv_type = getattr(config, "ipv_type", "ipv4")
  295. if ipv_type == "ipv4":
  296. return not is_ipv6(url)
  297. elif ipv_type == "ipv6":
  298. return is_ipv6(url)
  299. else:
  300. return True
  301. def checkByDomainBlacklist(url):
  302. """
  303. Check by domain blacklist
  304. """
  305. domain_blacklist = [
  306. urlparse(domain).netloc if urlparse(domain).scheme else domain
  307. for domain in getattr(config, "domain_blacklist", [])
  308. ]
  309. return urlparse(url).netloc not in domain_blacklist
  310. def checkByURLKeywordsBlacklist(url):
  311. """
  312. Check by URL blacklist keywords
  313. """
  314. url_keywords_blacklist = getattr(config, "url_keywords_blacklist", [])
  315. return not any(keyword in url for keyword in url_keywords_blacklist)
  316. def checkUrlByPatterns(url):
  317. """
  318. Check the url by patterns
  319. """
  320. return (
  321. checkUrlIPVType(url)
  322. and checkByDomainBlacklist(url)
  323. and checkByURLKeywordsBlacklist(url)
  324. )
  325. def filterUrlsByPatterns(urls):
  326. """
  327. Filter urls by patterns
  328. """
  329. urls = [url for url in urls if checkUrlIPVType(url)]
  330. urls = [url for url in urls if checkByDomainBlacklist(url)]
  331. urls = [url for url in urls if checkByURLKeywordsBlacklist(url)]
  332. return urls
  333. async def useAccessibleUrl():
  334. """
  335. Check if the url is accessible
  336. """
  337. baseUrl1 = "https://www.foodieguide.com/iptvsearch/"
  338. baseUrl2 = "http://tonkiang.us/"
  339. speed1 = await getSpeed(baseUrl1, 30)
  340. speed2 = await getSpeed(baseUrl2, 30)
  341. if speed1 == float("inf") and speed2 == float("inf"):
  342. return None
  343. if speed1 < speed2:
  344. return baseUrl1
  345. else:
  346. return baseUrl2
  347. def getFOFAUrlsFromRegionList():
  348. """
  349. Get the FOFA url from region
  350. """
  351. region_list = getattr(config, "region_list", [])
  352. urls = []
  353. for region in region_list:
  354. region_url = getattr(fofa_map, "region_url")
  355. if region in region_url:
  356. urls.append(region_url[region])
  357. return urls
  358. def getChannelsByFOFA(source):
  359. """
  360. Get the channel by FOFA
  361. """
  362. urls = set(re.findall(r"https?://[\w\.-]+:\d+", source))
  363. channels = {}
  364. for url in urls:
  365. try:
  366. response = requests.get(url + "/iptv/live/1000.json?key=txiptv", timeout=2)
  367. try:
  368. json_data = response.json()
  369. if json_data["code"] == 0:
  370. try:
  371. for item in json_data["data"]:
  372. if isinstance(item, dict):
  373. item_name = item.get("name").strip()
  374. item_url = item.get("url").strip()
  375. if item_name and item_url:
  376. total_url = url + item_url
  377. if item_name not in channels:
  378. channels[item_name] = [total_url]
  379. else:
  380. channels[item_name].append(total_url)
  381. except Exception as e:
  382. # print(f"Error on fofa: {e}")
  383. continue
  384. except Exception as e:
  385. # print(f"{url}: {e}")
  386. continue
  387. except Exception as e:
  388. # print(f"{url}: {e}")
  389. continue
  390. return channels
  391. def mergeObjects(*objects):
  392. """
  393. Merge objects
  394. """
  395. merged_dict = {}
  396. for obj in objects:
  397. if not isinstance(obj, dict):
  398. raise TypeError("All input objects must be dictionaries")
  399. for key, value in obj.items():
  400. if key not in merged_dict:
  401. merged_dict[key] = set()
  402. if isinstance(value, set):
  403. merged_dict[key].update(value)
  404. elif isinstance(value, list):
  405. for item in value:
  406. merged_dict[key].add(item)
  407. else:
  408. merged_dict[key].add(value)
  409. for key, value in merged_dict.items():
  410. merged_dict[key] = list(value)
  411. return merged_dict