123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- import time
- import os
- from bs4 import BeautifulSoup
- import requests
- def time_watcher(start_time): # таймер выполнения скрипта
- print('--- %s seconds ---' % (time.time() - start_time))
- def get_html(url): # запрос к ресурсу
- response = requests.get(url)
- return response.text
- def write_log(address, nameFile): # запись ip в файл
- path_f = os.path.dirname(os.path.abspath(__file__))
- with open(os.path.join(path_f, nameFile), "a") as _file:
- _file.write(address)
- def get_proxies(html): # получение ip адреса
- soup = BeautifulSoup(html, 'lxml')
- # trs = soup.find('tbody').find_all('tr')[1:11]
- trs = soup.find('tbody').find_all('tr')
- for tr in trs:
- tds = tr.find_all('td')
- ip = tds[0].text.strip()
- port = tds[1].text.strip()
- schema = "https" if 'yes' in tds[6].text.strip() else 'http'
- address = ip+":"+port
- # if port == '80' or port == '8080':
- # continue
- # else:
- # write_log(schema+"://"+address+"\n", "proxy_http.txt")
- if schema == "https": # запись https ip address
- write_log(schema+"://"+address+"\n", "proxy_https.txt")
- else: # запись http ip address
- write_log(schema+"://"+address+"\n", "proxy_http.txt")
- def main():
- url = 'https://free-proxy-list.net/'
- html = get_html(url)
- get_proxies(html)
- if __name__ == '__main__':
- start_time = time.time()
- main()
- time_watcher(start_time)
|