I'll give you the source code for a programme, which automatically grabs HideMyAss Premium Proxys and saves it in a .txt file. This is a python script!
Code:
import re
import time
import json
import socket
import urllib2
from optparse import OptionParser
__version__ = 2.0
class Grabber(object):
def __init__(self):
self.headers = {
'User-Agent':'Proxist %s'%__version__,
'Accept':'application/json',
'Connection':'keep-alive',
'X-Requested-With':'XMLHttpRequest'}
self.pattern = {
'entry':r'<tr class=".+?" rel="\d+?">(.+?)</tr>',
'pages':r'<a href="/\d+?">(\d+?)</a>'}
self.page = 1
self.html = self.NextPage()
self.count = self.GetCount(self.html) # number of pages
self.entries = self.ExtractEntries(self.html)
def run(self, count, fname, saveall):
header = ('Status'
'| IP Address '
'|Port '
'| Country '
'|Protocol'
'|Anonymity'
'|Speed')
fmt = "%-6s|%-15s|%-5s|%-18s|%-8s|%-9s|%-6s"
print header
hFile = open(fname, 'a')
if hFile and saveall:
hFile.write("-"*77)
hFile.write("\n"+header+"\n")
hFile.write("-"*77+"\n")
while len(self.entries)<count:
self.html = self.NextPage()
self.entries += self.ExtractEntries(self.html)
ProxyHandler = Proxy(self.entries)
ip_port = ProxyHandler.Extract(self.entries[:count]) # returns [(ip, port), ...]
for proxy in ip_port:
status, country, proto, anon, speed = ProxyHandler.Check(proxy)
log = fmt%(status, proxy[0], proxy[1], country, proto, anon, speed)
print log
if hFile:
if saveall:
hFile.write(log+"\n")
else:
hFile.write(proxy[0]+":"+proxy[1]+"\n")
if hFile:
hFile.close()
def NextPage(self):
self.request = urllib2.Request(
"http://proxylist.hidemyass.com/"+str(self.page),
headers=self.headers)
page = urllib2.urlopen(self.request).read()
return self.Unescape(page)
def Unescape(self, html):
unescaped = html.replace(r'\"', '"')
unescaped = unescaped.replace(r'\/', '/')
unescaped = unescaped.replace(r'\n', '\n')
return unescaped
def ExtractEntries(self, html):
entries = re.findall(self.pattern['entry'], html, re.DOTALL)
return entries
def GetCount(self, html):
pages = re.findall(self.pattern['pages'], html)
return int(pages[-1]) # last page
class Proxy(object):
def __init__(self, entry):
self.pattern = {
'ip':r'</style>(.+?)</span></td>',
'port':r'<td>(\d+?)</td>',
'country':r'"country":"(.+?)"',
'none':r'\.(\S+?){display:none}',
'tags':r'<(\w+?) (\w+?)="(.+?)">(.+?)</(\w+?)>',
'info':r'({"id":.+?})'}
self.request = (
'POST {} HTTP/1.1\r\n'
'Host: www.checker.freeproxy.ru\r\n'
'User-Agent: Proxist 2.0\r\n'
'X-Requested-With: XMLHttpRequest\r\n'
'Connection: keep-alive\r\n'
'Content-Type: application/x-www-form-urlencoded\r\n'
'Content-Length: {}\r\n\r\n')
def Extract(self, entries):
ip_port = []
for entry in entries:
nones = ['display:none'] + self.FindAll(self.pattern['none'], entry)
entry = entry.replace('<span></span>', '')
ip_port += [(self.GetIP(entry, nones), self.GetPort(entry))]
return ip_port
def Check(self, proxy):
anon_types = {
'HIA':'Elite',
'ANM':'Medium',
'NOA':'None'};
while True:
http = socket.socket()
http.connect(('www.checker.freeproxy.ru', 80))
data = 'data='+proxy[0]+'%3a'+proxy[1]
try:
country = self.Post(http,
'/engine/parser.php',
len(data),
self.pattern['country'],
data)
result = self.Post(http,
'/engine/results.php',
0,
self.pattern['info'])
http.close()
break
except socket.error as err:
http.close()
continue
result = json.loads(result)
status = result['status']
if status=='valid':
speed = result['speed']
fail = [None, 'FAIL']
if result['socks5'] not in fail:
protocol = 'SOCKS5'
anon = anon_types[result['socks5']]
elif result['socks4'] not in fail:
protocol = 'SOCKS4'
anon = anon_types[result['socks4']]
elif result['https'] not in fail:
protocol = 'HTTPS'
anon = anon_types[result['https']]
elif result['http'] not in fail:
protocol = 'HTTP'
anon = anon_types[result['http']]
else:
speed = 'N/A'
protocol = 'N/A'
anon = 'N/A'
return status, country, protocol, anon, speed
def Post(self, sock, path, length, pattern, data=''):
request = self.request.format(path, length)
while True:
try:
sock.send(request+data)
response = sock.recv(65535)
response = re.search(pattern, response).group(1)
break
except AttributeError:
if pattern == self.pattern['country']:
response = 'N/A'
break
else:
time.sleep(1)
return response
def Search(self, pattern, string, option=0):
return re.search(pattern, string, option).group(1)
def FindAll(self, pattern, string):
return re.findall(pattern, string)
def GetIP(self, html, nones):
ip = self.Search(self.pattern['ip'],
html,
re.DOTALL)
tags = self.FindAll(self.pattern['tags'], ip)
for t in tags:
if t[2] in nones:
ip = self._StripNone(ip, t)
else:
ip = self._StripTrash(ip, t)
ip = ip.replace('</span>', '')
return ip
def GetPort(self, html):
return self.Search(self.pattern['port'], html)
def _StripNone(self, proxy, t):
pattern = r'<{} {}="{}">{}</{}>'.format(t[0], t[1], t[2], t[3], t[4])
return proxy.replace(pattern, '')
def _StripTrash(self, proxy, t):
pattern = r'<{} {}="{}">'.format(t[0], t[1], t[2])
return proxy.replace(pattern, '')
def main():
parser = OptionParser()
parser.add_option("-o", "--output", dest="output",
type="string", help="Output file",
metavar="FILE", default="proxist.txt")
parser.add_option("-n", dest="count",
type="int", help="Number of proxies to dump",
metavar="N", default=50)
parser.add_option("-a", "--all", dest="saveall",
help="Store all information", action="store_true",
default=False)
options, args = parser.parse_args()
output = options.output
count = options.count
saveall=options.saveall
grabber = Grabber()
grabber.run(count, output, saveall)
if __name__ == '__main__':
main()






