Slexy.org is shutting down and stopped accepting new Pastes on May 4th, 2021.
Existing Pastes will stop being available on or after May 10th, 2021.
Author: Not specified Language: python
Description: Not specified Timestamp: 2017-09-27 08:52:49 +0000
View raw paste Reply
import os
import time
from selenium import webdriver

fileWrite = open("proxyListSelenium.txt", "w")

browser = webdriver.Chrome()

print "Scraping idcloak.com"
browser.get('http://www.idcloak.com/proxylist/proxy-list.html')
i = 2
page = 1
while page < 10: #number of pages to scrap
        browser.find_element_by_xpath('//*[@id="proxy-search"]/div[2]/div/input['+str(page)+']').click()
        print "Page: "+str(page)
        while i <= 101:
                try:
                        ip = browser.find_element_by_xpath('//*[@id="sort"]/tbody/tr['+str(i)+']/td[8]')
                        port = browser.find_element_by_xpath('//*[@id="sort"]/tbody/tr['+str(i)+']/td[7]')
                        fileWrite.write(ip.text+":"+port.text+"\n")
                except:
                        print "Exception caught - Moving on"
                i += 1
        i = 2
        page += 1
       
#-------------------------------------------------------------------------------------------
print "Scraping cool-proxy.net"
i = 2
page = 1
while page < 13:
        browser.get('https://www.cool-proxy.net/proxies/http_proxy_list/page:'+str(page)+'/sort:score/direction:desc')
        print "Page: "+str(page)
        while i <= 22:
                if i != 7:     
                        ip = browser.find_element_by_xpath('//*[@id="main"]/table/tbody/tr['+str(i)+']/td[1]')
                        port = browser.find_element_by_xpath('//*[@id="main"]/table/tbody/tr['+str(i)+']/td[2]')
                        fileWrite.write(ip.text+":"+port.text+"\n")
                i += 1
        i = 2
        page += 1

#-------------------------------------------------------------------------------------------
print "Scraping premproxy.com"
i = 0
page = 1
while page <= 15:
        browser.get('https://premproxy.com/list/ip-port/'+str(page)+'.htm')
        print "Page: "+str(page)
        ip = browser.find_element_by_xpath('//*[@id="pricing"]/div/div/div/pre')
        fileWrite.write(ip.text)
        page += 1

#-------------------------------------------------------------------------------------------
print "Scraping hidemy.name"
i = 1
page = 1
browser.get('https://hidemy.name/en/proxy-list/?start=1#list')
time.sleep(5)
while page <= 768:
        browser.get('https://hidemy.name/en/proxy-list/?start='+str(page)+'#list')
        print "Page: "+str(page)
        while i <= 64:
                ip = browser.find_element_by_xpath('//*[@id="content-section"]/section[1]/div/table/tbody/tr['+str(i)+']/td[1]')
                port = browser.find_element_by_xpath('//*[@id="content-section"]/section[1]/div/table/tbody/tr['+str(i)+']/td[2]')
                fileWrite.write(ip.text+":"+port.text+"\n")
                i += 1
        i = 1
        page += 64

#-------------------------------------------------------------------------------------------
print "Scraping proxydb.net"
i = 1
page = 0
while page <= 200:
        browser.get('http://proxydb.net/?offset='+str(page))
        print "Page: "+str(page)
        while i <= 20:
                try:   
                        ip = browser.find_element_by_xpath('/html/body/div[2]/table/tbody/tr['+str(i)+']/td[1]/a')
                        fileWrite.write(ip.text+"\n")
                except:
                        pass
                i += 1
        page += 20
        i = 1

       


browser.quit()

fileWrite.close()

#remove duplicates
lines_seen = set() # holds lines already seen

readFile = open("proxyListSelenium.txt", "r")
fileWrite = open("unique.txt", "w")
for line in readFile:
    if line not in lines_seen: # not a duplicate
        fileWrite.write(line)
        lines_seen.add(line)

fileWrite.close()
readFile.close()

os.remove("proxyListSelenium.txt")
os.rename("unique.txt", "proxyListSelenium.txt")
View raw paste Reply