1. 程式人生 > >教師結構化面試,一鍵獲取資料

教師結構化面試,一鍵獲取資料

教師結構化面試,一個程式設計師男朋友,為愛而碼


【需求】

  1、教 結構化面試資料太少,而且最好列印

  2、資料儲存word檔案,方便共享


作為一名程式設計師,開始coding...

把如下程式碼儲存為download.py,雙擊執行,結果如下:


#coding=utf-8
from lxml import etree
import requests
from docx import Document
import re

class Download():
    def __init__(self):
        pass
def getPageUrl(self): paperAll = {} for pageNum in range(1,31): url="http://wap.zgjsks.com/html/jszp/mianshi/jiegouhua/{}.html".format(pageNum) mPage = requests.get(url) selector = etree.HTML(mPage.content) # 將原始碼轉化為能被XPath匹配的格式 paperList = selector.xpath("//*[contains(concat(' ', @class, ' '), 'recruit_right')]/b/a"
) for paper in paperList: paperUrl=paper.attrib['href'] paperTitle=paper.text paperAll[paperUrl] = paperTitle return paperAll def getPagerNextMaxNum(self,paperUrl): content = requests.get(paperUrl) selector = etree.HTML(content.content) # 將原始碼轉化為能被XPath匹配的格式
try: pagerNextMaxNum = int(selector.xpath("//*[contains(concat(' ', @class, ' '), 'fenye')]")[0].text.strip("\n\t").strip("()").split("/")[1])+1 except: pagerNextMaxNum = 2 return pagerNextMaxNum def getNodeText(self,nodeP): paperLines='' if nodeP.text!=None: paperLines = nodeP.text for childNode in nodeP.getchildren(): if childNode.text != None: paperLines += childNode.text if childNode.tail!=None: paperLines +=childNode.tail self.getNodeText(childNode) return paperLines def download(self): paperAll= self.getPageUrl() for paperUrl,paperTitle in paperAll.items(): #paperTitle="對“不要讓孩子輸在起跑線上”這種說法,你怎麼" #paperUrl="http://wap.zgjsks.com/html/2017/jiegouhua_0526/232694.html" print("[*D]{} -- {}".format(paperTitle,paperUrl)) pagerNextMaxNum =self.getPagerNextMaxNum(paperUrl) paperContent = [] for pageNextUrlNum in range(1,pagerNextMaxNum): pageNextUrl=paperUrl.replace(".html","_{}.html".format(pageNextUrlNum)) content=requests.get(pageNextUrl) selector = etree.HTML(content.content) # 將原始碼轉化為能被XPath匹配的格式 paperList = selector.xpath("//*[contains(concat(' ', @class, ' '), 'article_box_info')]/p") for paper in paperList: paperLine=self.getNodeText(paper) paperContent.append(paperLine) document = Document() document.add_heading(paperTitle, 0) for paperLine in paperContent: if paperLine=='相關推薦:': break if paperLine==None: continue p = document.add_paragraph(paperLine) #'教師招聘面試指導|結構化面試——未來教育系統考' paperTitle = re.sub('[\/:*?"<>|]','-',paperTitle) document.save('{}.docx'.format(paperTitle)) if __name__=="__main__": downloadObj =Download() downloadObj.download()