itv(request).py 24 KB

  1. import time
  2. import os
  3. import concurrent.futures
  4. from selenium import webdriver
  5. from import Options
  6. import requests
  7. import re
  8. urls = [
  9. "", # 北京
  10. "", # 上海
  11. "", # 天津
  12. "", # 重庆
  13. "", # 安庆
  14. "", # 鞍山
  15. "", # 安阳
  16. "", # 白城
  17. "", # 保定
  18. "", # 宝鸡
  19. "", # 滨州
  20. "", # 亳州
  21. "", # 沧州
  22. "", # 常德
  23. "", # 潮州
  24. "", # 成都
  25. "", # 郴州
  26. "", # 池州
  27. "", # 崇左
  28. "", # 滁州
  29. "", # 大理
  30. "", # 丹阳
  31. "", # 大庆
  32. "", # 大同
  33. "", # 达州
  34. "", # 德州
  35. "", # 东莞
  36. "", # 东营
  37. "", # 阜新
  38. "", # 阜阳
  39. "", # 福州
  40. "", # 赣州
  41. "", # 广州
  42. "", # 贵港
  43. "", # 贵阳
  44. "", # 海口
  45. "", # 邯郸
  46. "", # 杭州
  47. "", # 鹤壁
  48. "", # 合肥
  49. "", # 衡阳
  50. "", # 河源
  51. "", # 菏泽
  52. "", # 怀化
  53. "", # 淮南
  54. "", # 黄冈
  55. "", # 黄石
  56. "", # 惠州
  57. "", # 湖州
  58. "", # 江门
  59. "", # 焦作
  60. "", # 嘉兴
  61. "", # 吉林
  62. "", # 济南
  63. "", # 晋城
  64. "", # 景德镇
  65. "", # 景洪
  66. "", # 金华
  67. "", # 济宁
  68. "", # 晋中
  69. "", # 锦州
  70. "", # 九江
  71. "", # 济源
  72. "", # 开封
  73. "", # 昆明
  74. "", # 廊坊
  75. "", # 兰州
  76. "", # 聊城
  77. "", # 临沂
  78. "", # 娄底
  79. "", # 漯河
  80. "", # 洛阳
  81. "", # 茂名
  82. "", # 梅州
  83. "", # 牡丹江
  84. "", # 南昌
  85. "", # 南充
  86. "", # 南京
  87. "", # 南宁
  88. "", # 南阳
  89. "", # 宁波
  90. "", # 宁德
  91. "", # 平顶山
  92. "", # 萍乡
  93. "", # 濮阳
  94. "", # 青岛
  95. "", # 琼海
  96. "", # 泉州
  97. "", # 衢州
  98. "", # 日照
  99. "", # 三门峡
  100. "", # 三亚
  101. "", # 厦门
  102. "", # 佛山
  103. "", # 商丘
  104. "", # 绍兴
  105. "", # 邵阳
  106. "", # 沈阳
  107. "", # 深圳
  108. "", # 石家庄
  109. "", # 十堰
  110. "", # 松原
  111. "", # 遂宁
  112. "", # 苏州
  113. "", # 太原
  114. "", # 泰州
  115. "", # 铁岭
  116. "", # 潍坊
  117. "", # 威海
  118. "", # 渭南
  119. "", # 温州
  120. "", # 武汉
  121. "", # 芜湖
  122. "", # 无锡
  123. "", # 梧州
  124. "", # 西安
  125. "", # 湘潭
  126. "", # 西昌
  127. "", # 新乡
  128. "", # 信阳
  129. "", # 新余
  130. "", # 宿迁
  131. "", # 许昌
  132. "", # 徐州
  133. "", # 盐城
  134. "", # 阳江
  135. "", # 扬州
  136. "", # 延吉
  137. "", # 烟台
  138. "", # 宜春
  139. "", # 营口
  140. "", # 益阳
  141. "", # 永州
  142. "", # 岳阳
  143. "", # 玉林
  144. "", # 运城
  145. "", # 长春
  146. "", # 张家界
  147. "", # 长沙
  148. "", # 漳州
  149. "", # 湛江
  150. "", # 肇庆
  151. "", # 昭通
  152. "", # 郑州
  153. "", # 中山
  154. "", # 周口
  155. "", # 舟山
  156. "", # 珠海
  157. "", # 驻马店
  158. "", # 株洲
  159. "", # 自贡
  160. "", # 福清
  161. "", # 包头
  162. "", # 资阳
  163. "", # 辛集
  164. "", # 七台河
  165. ]
  166. def modify_urls(url):
  167. modified_urls = []
  168. ip_start_index = url.find("//") + 2
  169. ip_end_index = url.find(":", ip_start_index)
  170. base_url = url[:ip_start_index] # http:// or https://
  171. ip_address = url[ip_start_index:ip_end_index]
  172. port = url[ip_end_index:]
  173. ip_end = "/iptv/live/1000.json?key=txiptv"
  174. for i in range(1, 256):
  175. modified_ip = f"{ip_address[:-1]}{i}"
  176. modified_url = f"{base_url}{modified_ip}{port}{ip_end}"
  177. modified_urls.append(modified_url)
  178. return modified_urls
  179. def is_url_accessible(url):
  180. try:
  181. response = requests.get(url, timeout=1)
  182. if response.status_code == 200:
  183. return url
  184. except requests.exceptions.RequestException:
  185. pass
  186. return None
  187. results = []
  188. for url in urls:
  189. try:
  190. response = requests.get(url, timeout = 15)
  191. if response.status_code == 200:
  192. print(response)
  193. page_content = response.content.decode('utf-8')
  194. # 查找所有符合指定格式的网址
  195. pattern = r"http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+" # 设置匹配的格式,如http://
  196. urls_all = re.findall(pattern, page_content)
  197. # urls = list(set(urls_all)) # 去重得到唯一的URL列表
  198. urls = set(urls_all) # 去重得到唯一的URL列表
  199. x_urls = []
  200. for url in urls: # 对urls进行处理,ip第四位修改为1,并去重
  201. url = url.strip()
  202. ip_start_index = url.find("//") + 2
  203. ip_end_index = url.find(":", ip_start_index)
  204. ip_dot_start = url.find(".") + 1
  205. ip_dot_second = url.find(".", ip_dot_start) + 1
  206. ip_dot_three = url.find(".", ip_dot_second) + 1
  207. base_url = url[:ip_start_index] # http:// or https://
  208. ip_address = url[ip_start_index:ip_dot_three]
  209. port = url[ip_end_index:]
  210. ip_end = "1"
  211. modified_ip = f"{ip_address}{ip_end}"
  212. x_url = f"{base_url}{modified_ip}{port}"
  213. x_urls.append(x_url)
  214. urls = set(x_urls) # 去重得到唯一的URL列表
  215. valid_urls = []
  216. # 多线程获取可用url
  217. with concurrent.futures.ThreadPoolExecutor(max_workers=100) as executor:
  218. futures = []
  219. for url in urls:
  220. url = url.strip()
  221. modified_urls = modify_urls(url)
  222. for modified_url in modified_urls:
  223. futures.append(executor.submit(is_url_accessible, modified_url))
  224. for future in concurrent.futures.as_completed(futures):
  225. result = future.result()
  226. if result:
  227. valid_urls.append(result)
  228. for url in valid_urls:
  229. print(url)
  230. # 遍历网址列表,获取JSON文件并解析
  231. for url in valid_urls:
  232. try:
  233. # 发送GET请求获取JSON文件,设置超时时间为0.5秒
  234. ip_start_index = url.find("//") + 2
  235. ip_dot_start = url.find(".") + 1
  236. ip_index_second = url.find("/", ip_dot_start)
  237. base_url = url[:ip_start_index] # http:// or https://
  238. ip_address = url[ip_start_index:ip_index_second]
  239. url_x = f"{base_url}{ip_address}"
  240. json_url = f"{url}"
  241. response = requests.get(json_url, timeout=1)
  242. json_data = response.json()
  243. try:
  244. # 解析JSON文件,获取name和url字段
  245. for item in json_data['data']:
  246. if isinstance(item, dict):
  247. name = item.get('name')
  248. urlx = item.get('url')
  249. if 'http' in urlx:
  250. urld = f"{urlx}"
  251. else:
  252. urld = f"{url_x}{urlx}"
  253. if name and urld:
  254. # 删除特定文字
  255. name = name.replace("cctv", "CCTV")
  256. name = name.replace("中央", "CCTV")
  257. name = name.replace("央视", "CCTV")
  258. name = name.replace("高清", "")
  259. name = name.replace("超高", "")
  260. name = name.replace("HD", "")
  261. name = name.replace("标清", "")
  262. name = name.replace("频道", "")
  263. name = name.replace("-", "")
  264. name = name.replace(" ", "")
  265. name = name.replace("PLUS", "+")
  266. name = name.replace("+", "+")
  267. name = name.replace("(", "")
  268. name = name.replace(")", "")
  269. name = re.sub(r"CCTV(\d+)台", r"CCTV\1", name)
  270. name = name.replace("CCTV1综合", "CCTV1")
  271. name = name.replace("CCTV2财经", "CCTV2")
  272. name = name.replace("CCTV3综艺", "CCTV3")
  273. name = name.replace("CCTV4国际", "CCTV4")
  274. name = name.replace("CCTV4中文国际", "CCTV4")
  275. name = name.replace("CCTV4欧洲", "CCTV4")
  276. name = name.replace("CCTV5体育", "CCTV5")
  277. name = name.replace("CCTV6电影", "CCTV6")
  278. name = name.replace("CCTV7军事", "CCTV7")
  279. name = name.replace("CCTV7军农", "CCTV7")
  280. name = name.replace("CCTV7农业", "CCTV7")
  281. name = name.replace("CCTV7国防军事", "CCTV7")
  282. name = name.replace("CCTV8电视剧", "CCTV8")
  283. name = name.replace("CCTV9记录", "CCTV9")
  284. name = name.replace("CCTV9纪录", "CCTV9")
  285. name = name.replace("CCTV10科教", "CCTV10")
  286. name = name.replace("CCTV11戏曲", "CCTV11")
  287. name = name.replace("CCTV12社会与法", "CCTV12")
  288. name = name.replace("CCTV13新闻", "CCTV13")
  289. name = name.replace("CCTV新闻", "CCTV13")
  290. name = name.replace("CCTV14少儿", "CCTV14")
  291. name = name.replace("CCTV15音乐", "CCTV15")
  292. name = name.replace("CCTV16奥林匹克", "CCTV16")
  293. name = name.replace("CCTV17农业农村", "CCTV17")
  294. name = name.replace("CCTV17农业", "CCTV17")
  295. name = name.replace("CCTV5+体育赛视", "CCTV5+")
  296. name = name.replace("CCTV5+体育赛事", "CCTV5+")
  297. name = name.replace("CCTV5+体育", "CCTV5+")
  298. urld = urld.replace("udp://@", "/udp/")
  299. urld = urld.replace("udp://", "/udp/")
  300. urld = urld.replace("rtp://@", "/rtp/")
  301. urld = urld.replace("rtp://", "/rtp/")
  302. results.append(f"{name},{urld}")
  303. except:
  304. continue
  305. except:
  306. continue
  307. except:
  308. continue
  309. results = set(results) # 去重得到唯一的URL列表
  310. results = sorted(results)
  311. with open("itv.txt", 'w', encoding='utf-8') as file:
  312. for result in results:
  313. file.write(result + "\n")
  314. print(result)