1. 程式人生 > >python 爬取媒體文件(使用chrome代理,啟動客戶端,有防火墻)

python 爬取媒體文件(使用chrome代理,啟動客戶端,有防火墻)

time read for nsh 中文 add page json clas

#coding = utf-8
‘‘‘
中文轉經緯度
‘‘‘
import time,json
import urllib.request
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np

AK =C2hKkyF9fHbmzESq6dmSArZIzw8wEiS1
table = pd.read_csv(./data/test.csv,encoding=utf-8)
outfp = open(./data/result_test.csv
,w,encoding=utf-8) class LoadData: def __init__(self): print("start") self.m_driver = webdriver.Chrome(D:\Program Files (x86)\ChromeDriver\chromedriver.exe) self.loc_result = [] def get_uri(self, addr, city = ‘‘): # try: server = http://api.map.baidu.com/geocoder/v2/?
params = urllib.parse.urlencode({address:addr,city:city,ak:AK,output:json}) self.m_driver.get(server+params) bs = BeautifulSoup(self.m_driver.page_source,lxml) # temp = bs.prefix result = json.loads(bs.pre.get_text())[result] location
= result.get(location) if( location != None ): lng = location.get(lng) lat = location.get(lat) return lng,lat # except: # print("error addr:",addr) # return np.NAN,np.NAN def get_lng_lat(self, addr): lng,lat = self.get_uri(addr) if((lng == None) or (lat == None)): print("error") self.loc_result.append([addr,lng,lat]) def main(self): addr_list = table[ADDRESS].tolist() [self.get_lng_lat(addr) for addr in addr_list] outfp.write(str(self.loc_result)) if __name__ == __main__: tStart = time.clock() LD = LoadData() LD.main() tEnd = time.clock() print("%s s"%(tEnd - tStart))

附錄:

chromdriver.exe與chrome版本映射及下載鏈接

https://blog.csdn.net/mmayanshuo/article/details/78962398

python 爬取媒體文件(使用chrome代理,啟動客戶端,有防火墻)