All you'll need:
1) A list of proxies that work (port 80/8080)
This is made to run continously and to randomize the gap between requests.
Code:
import urllib2, socket, time, httplib
from random import randint
class __access():
def __init__(self, proxyfilename):
self.proxy_file = open(proxyfilename,'r')
self.proxy_list = self.proxy_file.readlines()
self.proxy_len = len(self.proxy_list)
self.h = [('User-agent','Mozilla/5.0'),
('User-agent','Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'),
('User-agent','Mozilla/5.0 (Linux; U; Android 4.0.3; de-ch; HTC Sensation Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'),
('User-agent','Mozilla/5.0 (Linux; U; Android 2.3.4; fr-fr; HTC Desire Build/GRJ22) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'),
('User-agent','Opera/9.80 (J2ME/MIDP; Opera Mini/9.80 (S60; SymbOS; Opera Mobi/23.348; U; en) Presto/2.5.25 Version/10.54'),
('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+'),
('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9850; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.0.0.254 Mobile Safari/534.11+'),
('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; it) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.668 Mobile Safari/534.8+'),
('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en-US) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.446 Mobile Safari/534.8+'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'),
('User-agent','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36'),
('User-agent','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.17 Safari/537.11'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4'),
('User-agent','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1'),
('User-agent','Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02'),
('User-agent','Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0'),
('User-agent','Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00'),
('User-agent','Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00'),
('User-agent','Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52'),
('User-agent','Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'),
('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'),
('User-agent','Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko ) Version/5.1 Mobile/9B176 Safari/7534.48.3')]
def access_url(self, url, pip):
f = open('log.txt','a')
try:
timeout = randint(0,20)
socket.setdefaulttimeout(timeout)
proxy_handler = urllib2.ProxyHandler({'http': pip})
opener = urllib2.build_opener(proxy_handler)
opener.addheaders = [(self.h[randint(0,24)])]
urllib2.install_opener(opener)
req=urllib2.Request(url)
sock=urllib2.urlopen(req)
data = sock.read()
f.write(data)
f.write('\n')
return data
except urllib2.URLError, e:
print e
pass
except urllib2.HTTPError, e:
print e
pass
except httplib.HTTPException, e:
print e
pass
except socket.error, e:
print e
pass
except socket.gaierror, e:
print e
pass
except socket.herror, e:
print e
pass
def main():
obj = __access('proxies.txt')
link = 'link.com'
while True:
for x in range(0,obj.proxy_len):
z = obj.access_url(link,obj.proxy_list[x])
sleep = randint(0,20)
time.sleep(sleep)
print z
if __name__ == "__main__":
main()
A: Because tons of requests is bad. Really. Bad.
Enjoy and very happy to be here






