[RELEASE]HideMyAss Premium Proxy Grabber

06/08/2015 23:59 Czer0™#1
Yo,

I'll give you the source code for a programme, which automatically grabs HideMyAss Premium Proxys and saves it in a .txt file. This is a python script!

Code:
import re
import time
import json
import socket
import urllib2
from optparse import OptionParser

__version__ = 2.0

class Grabber(object):
    def __init__(self):
        self.headers = {
            'User-Agent':'Proxist %s'%__version__,
            'Accept':'application/json',
            'Connection':'keep-alive',
            'X-Requested-With':'XMLHttpRequest'}
        self.pattern = {
            'entry':r'<tr class=".+?" rel="\d+?">(.+?)</tr>',
            'pages':r'<a href="/\d+?">(\d+?)</a>'}
        self.page = 1
        self.html = self.NextPage()
        self.count = self.GetCount(self.html) # number of pages
        self.entries = self.ExtractEntries(self.html)
    def run(self, count, fname, saveall):
        header = ('Status'
                  '|   IP Address  '
                  '|Port '
                  '|      Country     '
                  '|Protocol'
                  '|Anonymity'
                  '|Speed')
        fmt = "%-6s|%-15s|%-5s|%-18s|%-8s|%-9s|%-6s"
        print header
        hFile = open(fname, 'a')
        if hFile and saveall:
            hFile.write("-"*77)
            hFile.write("\n"+header+"\n")
            hFile.write("-"*77+"\n")
        while len(self.entries)<count:
            self.html = self.NextPage()
            self.entries += self.ExtractEntries(self.html)
        ProxyHandler = Proxy(self.entries)
        ip_port = ProxyHandler.Extract(self.entries[:count]) # returns [(ip, port), ...]
        for proxy in ip_port:
            status, country, proto, anon, speed = ProxyHandler.Check(proxy)
            log = fmt%(status, proxy[0], proxy[1], country, proto, anon, speed)
            print log
            if hFile:
                if saveall:
                    hFile.write(log+"\n")
                else:
                    hFile.write(proxy[0]+":"+proxy[1]+"\n")
        if hFile:
            hFile.close()
    def NextPage(self):
        self.request = urllib2.Request(
            "http://proxylist.hidemyass.com/"+str(self.page),
            headers=self.headers)
        page = urllib2.urlopen(self.request).read()
        return self.Unescape(page)
    def Unescape(self, html):
        unescaped = html.replace(r'\"', '"')
        unescaped = unescaped.replace(r'\/', '/')
        unescaped = unescaped.replace(r'\n', '\n')
        return unescaped
    def ExtractEntries(self, html):
        entries = re.findall(self.pattern['entry'], html, re.DOTALL)
        return entries
    def GetCount(self, html):
        pages = re.findall(self.pattern['pages'], html)
        return int(pages[-1]) # last page
    
class Proxy(object):
    def __init__(self, entry):
        self.pattern = {
            'ip':r'</style>(.+?)</span></td>',
            'port':r'<td>(\d+?)</td>',
            'country':r'"country":"(.+?)"',
            'none':r'\.(\S+?){display:none}',
            'tags':r'<(\w+?) (\w+?)="(.+?)">(.+?)</(\w+?)>',
            'info':r'({"id":.+?})'}
        self.request = (
            'POST {} HTTP/1.1\r\n'
            'Host: www.checker.freeproxy.ru\r\n'
            'User-Agent: Proxist 2.0\r\n'
            'X-Requested-With: XMLHttpRequest\r\n'
            'Connection: keep-alive\r\n'
            'Content-Type: application/x-www-form-urlencoded\r\n'
            'Content-Length: {}\r\n\r\n')
    def Extract(self, entries):
        ip_port = []
        for entry in entries:
            nones = ['display:none'] + self.FindAll(self.pattern['none'], entry)
            entry = entry.replace('<span></span>', '')
            ip_port += [(self.GetIP(entry, nones), self.GetPort(entry))]
        return ip_port
    def Check(self, proxy):
        anon_types = {
            'HIA':'Elite',
            'ANM':'Medium',
            'NOA':'None'};
        while True:
            http = socket.socket()
            http.connect(('www.checker.freeproxy.ru', 80))
            data = 'data='+proxy[0]+'%3a'+proxy[1]
            try:
                country = self.Post(http,
                                    '/engine/parser.php',
                                    len(data),
                                    self.pattern['country'],
                                    data)
                result = self.Post(http,
                                   '/engine/results.php',
                                   0,
                                   self.pattern['info'])
                http.close()
                break
            except socket.error as err:
                http.close()
                continue
        result = json.loads(result)
        status = result['status']
        if status=='valid':
            speed = result['speed']
            fail = [None, 'FAIL']
            if result['socks5'] not in fail:
                protocol = 'SOCKS5'
                anon = anon_types[result['socks5']]
            elif result['socks4'] not in fail:
                protocol = 'SOCKS4'
                anon = anon_types[result['socks4']]
            elif result['https'] not in fail:
                protocol = 'HTTPS'
                anon = anon_types[result['https']]
            elif result['http'] not in fail:
                protocol = 'HTTP'
                anon = anon_types[result['http']]
        else:
            speed = 'N/A'
            protocol = 'N/A'
            anon = 'N/A'
        return status, country, protocol, anon, speed
    def Post(self, sock, path, length, pattern, data=''):
        request = self.request.format(path, length)
        while True:
            try:
                sock.send(request+data)
                response = sock.recv(65535)
                response = re.search(pattern, response).group(1)
                break
            except AttributeError:
                if pattern == self.pattern['country']:
                    response = 'N/A'
                    break
                else:
                    time.sleep(1)
        return response
    def Search(self, pattern, string, option=0):
        return re.search(pattern, string, option).group(1)
    def FindAll(self, pattern, string):
        return re.findall(pattern, string)
    def GetIP(self, html, nones):
        ip = self.Search(self.pattern['ip'],
                            html,
                            re.DOTALL)
        tags = self.FindAll(self.pattern['tags'], ip)
        for t in tags:
            if t[2] in nones:
                ip = self._StripNone(ip, t)
            else:
                ip = self._StripTrash(ip, t)
        ip = ip.replace('</span>', '')
        return ip
    def GetPort(self, html):
        return self.Search(self.pattern['port'], html)
    def _StripNone(self, proxy, t):
        pattern = r'<{} {}="{}">{}</{}>'.format(t[0], t[1], t[2], t[3], t[4])
        return proxy.replace(pattern, '')
    def _StripTrash(self, proxy, t):
        pattern = r'<{} {}="{}">'.format(t[0], t[1], t[2])
        return proxy.replace(pattern, '')

def main():
    parser = OptionParser()
    parser.add_option("-o", "--output", dest="output",
                      type="string", help="Output file",
                      metavar="FILE", default="proxist.txt")
    parser.add_option("-n", dest="count",
                      type="int", help="Number of proxies to dump",
                      metavar="N", default=50)
    parser.add_option("-a", "--all", dest="saveall",
                      help="Store all information", action="store_true",
                      default=False)
    options, args = parser.parse_args()
    output = options.output
    count = options.count
    saveall=options.saveall

    grabber = Grabber()
    grabber.run(count, output, saveall)
if __name__ == '__main__':
    main()