get_list_proxy.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. import time
  2. import os
  3. from bs4 import BeautifulSoup
  4. import requests
  5. def time_watcher(start_time): # таймер выполнения скрипта
  6. print('--- %s seconds ---' % (time.time() - start_time))
  7. def get_html(url): # запрос к ресурсу
  8. response = requests.get(url)
  9. return response.text
  10. def write_log(address, nameFile): # запись ip в файл
  11. path_f = os.path.dirname(os.path.abspath(__file__))
  12. with open(os.path.join(path_f, nameFile), "a") as _file:
  13. _file.write(address)
  14. def get_proxies(html): # получение ip адреса
  15. soup = BeautifulSoup(html, 'lxml')
  16. # trs = soup.find('tbody').find_all('tr')[1:11]
  17. trs = soup.find('tbody').find_all('tr')
  18. for tr in trs:
  19. tds = tr.find_all('td')
  20. ip = tds[0].text.strip()
  21. port = tds[1].text.strip()
  22. schema = "https" if 'yes' in tds[6].text.strip() else 'http'
  23. address = ip+":"+port
  24. # if port == '80' or port == '8080':
  25. # continue
  26. # else:
  27. # write_log(schema+"://"+address+"\n", "proxy_http.txt")
  28. if schema == "https": # запись https ip address
  29. write_log(schema+"://"+address+"\n", "proxy_https.txt")
  30. else: # запись http ip address
  31. write_log(schema+"://"+address+"\n", "proxy_http.txt")
  32. def main():
  33. url = 'https://free-proxy-list.net/'
  34. html = get_html(url)
  35. get_proxies(html)
  36. if __name__ == '__main__':
  37. start_time = time.time()
  38. main()
  39. time_watcher(start_time)