[Python] Proxy & UA Randomization + Access Link

05/27/2014 22:51 benjameen#1
I was totally shocked about how hard this was to make, not in the sense of the code, but the logic and I thought people could appreciate it to generate views/whatever!

All you'll need:
1) A list of proxies that work (port 80/8080)

This is made to run continously and to randomize the gap between requests.
Code:
import urllib2, socket, time, httplib
from random import randint

class __access():
	def __init__(self, proxyfilename):
		self.proxy_file = open(proxyfilename,'r')
		self.proxy_list = self.proxy_file.readlines()
		self.proxy_len = len(self.proxy_list)
		self.h = [('User-agent','Mozilla/5.0'),
		   		  ('User-agent','Mozilla/5.0 (Linux; U; Android 4.0.3; ko-kr; LG-L160L Build/IML74K) AppleWebkit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'),
		          ('User-agent','Mozilla/5.0 (Linux; U; Android 4.0.3; de-ch; HTC Sensation Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'),
		          ('User-agent','Mozilla/5.0 (Linux; U; Android 2.3.4; fr-fr; HTC Desire Build/GRJ22) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1'),
		          ('User-agent','Opera/9.80 (J2ME/MIDP; Opera Mini/9.80 (S60; SymbOS; Opera Mobi/23.348; U; en) Presto/2.5.25 Version/10.54'),
		          ('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9900; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.1.0.346 Mobile Safari/534.11+'),
		          ('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9850; en) AppleWebKit/534.11+ (KHTML, like Gecko) Version/7.0.0.254 Mobile Safari/534.11+'),
		          ('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; it) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.668 Mobile Safari/534.8+'),
		          ('User-agent','Mozilla/5.0 (BlackBerry; U; BlackBerry 9800; en-US) AppleWebKit/534.8+ (KHTML, like Gecko) Version/6.0.0.446 Mobile Safari/534.8+'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'),
		          ('User-agent','Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1667.0 Safari/537.36'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1664.3 Safari/537.36'),
		          ('User-agent','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.17 Safari/537.11'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_0) AppleWebKit/537.4 (KHTML, like Gecko) Chrome/22.0.1229.79 Safari/537.4'),
		          ('User-agent','Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/19.77.34.5 Safari/537.1'),
		          ('User-agent','Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02'),
		          ('User-agent','Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0'),
		          ('User-agent','Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00'),
		          ('User-agent','Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00'),
		          ('User-agent','Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52'),
		          ('User-agent','Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/537.13+ (KHTML, like Gecko) Version/5.1.7 Safari/534.57.2'),
		          ('User-agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like Gecko) Version/5.1.3 Safari/534.53.10'),
		          ('User-agent','Mozilla/5.0 (iPad; CPU OS 5_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko ) Version/5.1 Mobile/9B176 Safari/7534.48.3')]

	def access_url(self, url, pip):
		f = open('log.txt','a')
		try:
			timeout = randint(0,20)
			socket.setdefaulttimeout(timeout)
			proxy_handler = urllib2.ProxyHandler({'http': pip})
			opener = urllib2.build_opener(proxy_handler)
			opener.addheaders = [(self.h[randint(0,24)])]
			urllib2.install_opener(opener)
			req=urllib2.Request(url)
			sock=urllib2.urlopen(req)
			data = sock.read()
			f.write(data)
			f.write('\n')
			return data
		except urllib2.URLError, e:
			print e
			pass
		except urllib2.HTTPError, e:
			print e
			pass
		except httplib.HTTPException, e:
			print e
			pass
		except socket.error, e:
			print e
			pass
		except socket.gaierror, e:
			print e
			pass
		except socket.herror, e:
			print e
			pass
def main():
	obj = __access('proxies.txt')
	link = 'link.com'
	while True:
		for x in range(0,obj.proxy_len):
			z = obj.access_url(link,obj.proxy_list[x])
			sleep = randint(0,20)
			time.sleep(sleep)
			print z
if __name__ == "__main__":
	main()
Q: Why not use multi-threading?
A: Because tons of requests is bad. Really. Bad.

Enjoy and very happy to be here :)
05/30/2014 21:28 MrDami123#2
I can recommend to use the module requests. It is way easier than urllib2.

Code:
    from time import sleep
    from random import randint, choice
    import requests

    class Viewer:
        def __init__(self):
            self.headers = [] # your header list
            self.browser = requests.Session() # with cookies!
            

        def visit(self, url, proxy):
            proxies = {
              "http": proxy,
              "https": proxy,
            }
            self.browser.headers = choice(self.headers)
            try:
                self.browser.get(url=url, timeout=15, proxies=proxies)
                with open("log.txt", "a") as log:
                    log.write("log message")
            except:
                print("Error handling or pass")
            sleep(randint(0,20))
            
            

    with open("proxylist.txt", "r") as txt:
        proxylist = txt.readlines()
        
    Browser = Viewer()
    url = "http://www.example.org/"

    for proxy in proxylist:
        Browser.vistit(url, proxy)