1. 程式人生 > >python代理爬取存入csv文件

python代理爬取存入csv文件

encoding res add tgw 6.0.3 save time gbk NPU

爬取高匿代理

 1 from urllib import request
 2 import re
 3 import time
 4 
 5 f = open(西1.csv,w,encoding=GBK)
 6 headers = {
 7 Cookie: _free_proxy_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFVEkiJTJjNTgwNDE4OTYyNTJiNjlmMmU2NDFhZWEwZjExMjFjBjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMVd4THYybCtNcW1yZmJFckpWUDJFdnNzam5hUCtuMGJLNEg0UFUzdE1XWTA9BjsARg%3D%3D--a43f95e415d8ee53f36f90941dbab43b1503d84b
, 8 User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36, 9 } 10 url = http://www.xicidaili.com/nn/{} 11 end = int(input(你想爬幾頁的代理呢)) 12 for pag in range(1,end+1): 13 fullurl = url.format(pag) 14 res = request.Request(fullurl,headers=headers)
15 response = request.urlopen(res) 16 html = response.read().decode(utf-8) 17 # print(html) 18 list_rule = re.compile(r<tr.*?</tr>,re.S) 19 list = list_rule.findall(html) 20 # print(list) 21 for td in list: 22 iplist_rule = re.compile(r<td>(.*?)</td>
,re.S) 23 iplist = iplist_rule.findall(td) 24 # print(iplist) 25 for ip_a in iplist: 26 a_rule = re.compile(r>(.*?)<,re.S) 27 address = a_rule.findall(ip_a) 28 for ip_address in address: 29 iplist[2]=(ip_address) 30 f.write(,.join(iplist) + \n) 31 print(第{}頁下載完成.format(pag)) 32 time.sleep(5) 33 f.close()

python代理爬取存入csv文件