get_active_ips.py 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. import time
  2. import requests
  3. import os
  4. from bs4 import BeautifulSoup
  5. import random
  6. from fake_useragent import UserAgent
  7. # http: // proxylist.hidemyass.com/search-1299627 # listable
  8. def get_html(url, headers=None, proxy=None):
  9. print('---------get_html------------')
  10. print(f"proxy: {proxy}\nheaders: {headers}")
  11. # response = requests.get(url, headers=headers, proxies=proxy)
  12. response = requests.get(url, headers=headers, proxies=proxy)
  13. # return response.text
  14. return response
  15. def get_my_ip(html, proxy, protocol):
  16. print('---------get_my_ip------------')
  17. arr0, arr1 = proxy.split('//')
  18. r0, r1 = arr1.split(':')
  19. ip = ''
  20. agent = ''
  21. soup = BeautifulSoup(html, 'lxml')
  22. if protocol == 'http':
  23. try:
  24. ip = soup.find('div', class_="ip-block").find('span',
  25. class_='ip').text.strip()
  26. except:
  27. ip = 'Not found'
  28. try:
  29. agent = soup.find('div', class_="ip-block").find('span',
  30. class_='ip').find_next_sibling('span').text.strip()
  31. except:
  32. agent = 'Not found'
  33. else:
  34. divs = soup.find('div', id='techinfocontent').find_all(
  35. 'dl', class_='list-info__content')
  36. lists = divs[0].find_all('div', class_="list-info__item")
  37. ip = lists[0].find('div', class_="list-info__renderer").text.strip()
  38. agent = 'agent'
  39. data = {'ip': ip, 'agent': agent}
  40. print(ip+"\n"+r0)
  41. if ip == r0:
  42. path_f = os.path.dirname(os.path.abspath(__file__))
  43. with open(os.path.join(path_f, "ip.txt"), "a") as _file:
  44. _file.write(proxy+'\n')
  45. def time_watcher(start_time):
  46. print('--- %s seconds ---' % (time.time() - start_time))
  47. def get_data_from_file(nameFile):
  48. path_f = os.path.dirname(os.path.abspath(__file__))
  49. with open(os.path.join(path_f, nameFile)) as txt_file:
  50. data = [line.rstrip() for line in txt_file]
  51. return data
  52. def main():
  53. # url = 'http://sitespy.ru/my-ip'
  54. # url = 'https://httpbin.org/ip'
  55. url = 'https://yandex.ru/internet/'
  56. protocol = 'https'
  57. useragents = get_data_from_file('useragents.txt')
  58. proxies = get_data_from_file(f'proxy_{protocol}.txt')
  59. for i in range(17, len(proxies)):
  60. time_await = random.uniform(2, 3)
  61. time.sleep(time_await)
  62. # proxy = random.choice(proxies)
  63. # proxy = 'http://201.131.164.150:3128'
  64. proxy = proxies[i]
  65. proxies_ = {
  66. "http": proxy,
  67. "https": proxy
  68. }
  69. # headers = {'User-Agent': random.choice(useragents)}
  70. ua = UserAgent()
  71. headers = {'User-Agent': ua.random}
  72. print('time_await: ' + str(time_await) +
  73. f' request: {i} '+time.asctime(time.localtime(time.time())))
  74. try:
  75. # html = get_html(url, headers, proxy)
  76. response = get_html(url, headers, proxies_)
  77. get_my_ip(response.text, proxy, protocol)
  78. except:
  79. continue
  80. # response = get_html(url)
  81. # get_my_ip(response.text, 'proxy://proxy:proxy', 'https')
  82. if __name__ == '__main__':
  83. start_time = time.time()
  84. main()
  85. time_watcher(start_time)