123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127 |
- #!/usr/bin/python3
- import urllib.request
- import lxml
- from lxml import etree
- import sys
- import re
- import os
- import json
- import pprint
- import getopt
- def convert_simple_list(html):
- result = []
- for line in html.split(b"\r\n"):
- st = re.search(b"^([\d.]*):([\d]*)$", line)
- if st:
- try:
- addr = st.group(1).decode('utf-8')
- port = st.group(2).decode('utf-8')
- result += [ {"addrport":(addr, port)} ]
- except IndexError as e:
- pass
- except UnicodeError as e:
- print(e, file=sys.stderr)
- # pprint.pp(result)
- return result
- def find_addrports(html):
- result = []
- for line in html.split(b"\r\n"):
- st = re.finditer(br"(\d+\.\d+\.\d+\.\d+):([\d]*)", line)
- for match in st:
- try:
- addr = match.group(1).decode('utf-8')
- port = match.group(2).decode('utf-8')
- result += [ {"addrport":(addr, port)} ]
- except IndexError as e:
- pass
- except UnicodeError as e:
- print(e, file=sys.stderr)
- return result
- def convert_proxyscan_io(html):
- result = []
- regex = re.compile(br">(\d+\.\d+\.\d+\.\d+)<.*?>([\d]*)<", re.MULTILINE|re.DOTALL)
- for match in re.finditer(regex, html):
- try:
- addr = match.group(1).decode('utf-8')
- port = match.group(2).decode('utf-8')
- result += [ {"addrport":(addr, port)} ]
- except IndexError as e:
- pass
- except UnicodeError as e:
- print(e, file=sys.stderr)
-
- return result
- # print(tree)
- # print(tree.xpath(f'//body/
- # return []
- # print(html)
- def established_process1():
- results = []
-
- url = 'https://premproxy.com/socks-by-country/United-States-03.htm'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += find_addrports(html)
-
- url = 'https://premproxy.com/socks-by-country/United-States-04.htm'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += find_addrports(html)
- url = 'https://premproxy.com/socks-by-country/United-States-05.htm'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += find_addrports(html)
- url = 'https://premproxy.com/socks-by-country/United-States-06.htm'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += find_addrports(html)
- url = 'https://api.proxyscrape.com/?request=getproxies&proxytype=socks5&timeout=10000&country=all'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += convert_simple_list(html)
- url = 'https://www.proxyscan.io/Home/FilterResult?status=1&ping=&selectedType=SOCKS5'
- with urllib.request.urlopen(url) as response:
- html = response.read()
- results += convert_proxyscan_io(html)
- print(json.dumps(results))
- if __name__ == '__main__':
-
- try:
- opts, args = getopt.getopt(sys.argv[1:], "f")
- except getopt.GetoptError as err:
- print (str(err))
- usage()
- sys.exit(2)
- doFetch = False
- for o, a in opts:
- if o == "-f":
- doFetch = True
- if doFetch:
- established_process1()
- else:
- url = 'file:///%s/list' % os.getcwd()
- with urllib.request.urlopen(url) as response:
- html = response.read()
- find_addrports(html)
|