komplekt_17
/
Python_OlegMolchanov


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
							import time
import requests
import os
from bs4 import BeautifulSoup
import random
from fake_useragent import UserAgent

# http: // proxylist.hidemyass.com/search-1299627  # listable


def get_html(url, headers=None, proxy=None):
    print('---------get_html------------')
    print(f"proxy: {proxy}\nheaders: {headers}")
    # response = requests.get(url, headers=headers, proxies=proxy)
    response = requests.get(url, headers=headers, proxies=proxy)
    # return response.text
    return response


def get_my_ip(html, proxy, protocol):
    print('---------get_my_ip------------')
    arr0, arr1 = proxy.split('//')
    r0, r1 = arr1.split(':')
    ip = ''
    agent = ''

    soup = BeautifulSoup(html, 'lxml')
    if protocol == 'http':
        try:
            ip = soup.find('div', class_="ip-block").find('span',
                                                          class_='ip').text.strip()
        except:
            ip = 'Not found'
        try:
            agent = soup.find('div', class_="ip-block").find('span',
                                                             class_='ip').find_next_sibling('span').text.strip()
        except:
            agent = 'Not found'
    else:
        divs = soup.find('div', id='techinfocontent').find_all(
            'dl', class_='list-info__content')
        lists = divs[0].find_all('div', class_="list-info__item")

        ip = lists[0].find('div', class_="list-info__renderer").text.strip()
        agent = 'agent'

    data = {'ip': ip, 'agent': agent}

    print(ip+"\n"+r0)

    if ip == r0:
        path_f = os.path.dirname(os.path.abspath(__file__))
        with open(os.path.join(path_f, "ip.txt"), "a") as _file:
            _file.write(proxy+'\n')


def time_watcher(start_time):
    print('--- %s seconds ---' % (time.time() - start_time))


def get_data_from_file(nameFile):
    path_f = os.path.dirname(os.path.abspath(__file__))
    with open(os.path.join(path_f, nameFile)) as txt_file:
        data = [line.rstrip() for line in txt_file]
        return data


def main():
    # url = 'http://sitespy.ru/my-ip'
    # url = 'https://httpbin.org/ip'
    url = 'https://yandex.ru/internet/'
    protocol = 'https'
    useragents = get_data_from_file('useragents.txt')
    proxies = get_data_from_file(f'proxy_{protocol}.txt')

    for i in range(17, len(proxies)):
        time_await = random.uniform(2, 3)
        time.sleep(time_await)
        # proxy = random.choice(proxies)
        # proxy = 'http://201.131.164.150:3128'
        proxy = proxies[i]
        proxies_ = {
            "http": proxy,
            "https": proxy
        }
        # headers = {'User-Agent': random.choice(useragents)}
        ua = UserAgent()
        headers = {'User-Agent': ua.random}
        print('time_await: ' + str(time_await) +
              f' request: {i} '+time.asctime(time.localtime(time.time())))

        try:
            # html = get_html(url, headers, proxy)
            response = get_html(url, headers, proxies_)
            get_my_ip(response.text, proxy, protocol)
        except:
            continue

    # response = get_html(url)
    # get_my_ip(response.text, 'proxy://proxy:proxy', 'https')


if __name__ == '__main__':
    start_time = time.time()
    main()
    time_watcher(start_time)