1. 程式人生 > >Python 通過lxml遍歷html xpath

Python 通過lxml遍歷html xpath

items ted windows cli XML sleep -a header Coding

#coding:utf-8
‘‘‘
Created on 2017年10月9日

@author: li.liu
‘‘‘
from selenium import webdriver
from lxml import etree
import urllib
import urllib2
import time

#url=‘http://www.woyihome.com‘
url=http://sso.woyihome.com/sso/pc-login
#url=‘http://www.baidu.com‘
user_agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36
values = {name : WHY, location : SDU, language : Python } headers = { User-Agent : user_agent } data = urllib.urlencode(values) req = urllib2.Request(url, data, headers) response = urllib2.urlopen(req) html1= response.read().encode(utf-8
) def test1(): x1={} #html1=urllib.urlopen(url).read().decode(‘utf-8‘) #print html1 hxml=etree.HTML(html1) #print hxml htree=etree.ElementTree(hxml) #print htree id_dite=htree.xpath(//*[@id]) #print id_dite coun=0 for id_items in id_dite: #print id_items.items()
#print htree.getpath(id_items) for id_item in id_items.items(): #print id_item if id_item[0]==id: id_str=//*[@id="+id_item[1]+"] x1[id_str]=[] #print id_str id_path=htree.getpath(htree.xpath(id_str)[0]) #print id_path id_str1=id_str+//* idelem_list=htree.xpath(id_str1) #print idelem_list for e in idelem_list: if len(e.items())==0: pass else: e_path=htree.getpath(e) #print e_path e_path1=e_path.split(id_path) #print e_path1[1] if len(e_path1)>1: e_str=id_str+e_path1[1] e_list=e_str.split(/) if li in e_list[len(e_list)-1] or ul in e_list[len(e_list)-1] or span in e_list[len(e_list)-1]: pass else: #print e_str coun+=1 x1[id_str].append(e_str) ‘‘‘ for i in x1: #print i for i1 in x1[i]: print i1 ‘‘‘ a=0 b=0 driver=webdriver.Chrome() driver.get(url) #print driver.title for i in x1: #print i for i1 in x1[i]: #print i1 try: d=driver.find_element_by_xpath(i1) a+=1 print d.text time.sleep(2) driver.find_element_by_xpath(i1).click() headx=driver.window_handles #print headx print 當前頁面地址:\n,driver.current_url time.sleep(1) print i,\n if len(headx)!=1: driver.switch_to_window(headx[1]) durl= driver.current_url print 當前頁面地址:\n,durl,\n if woyihome in durl: driver.close() driver.switch_to_window(headx[0]) else: k=1 break elif localhost in driver.current_url: print a except : pass #print b print a #driver.quit() #print ‘====================================================‘ print coun test1()

Python 通過lxml遍歷html xpath