利用python爬取實習僧網站上的資料
阿新 • • 發佈:2019-02-07
最近在找實習,就順便想到用python爬取一些職位資訊看看,有哪些崗位比較缺人。
#_*_coding:utf-8_*_
import requests
from bs4 import BeautifulSoup
import xlwt
import re
book = xlwt.Workbook()
#建立表格
sheet = book.add_sheet('sheet1', cell_overwrite_ok=True)
def getHtml():
url ='http://www.shixiseng.com/interns?p='
request = requests.get(url=url)
respons = request.content #得到頁面原始碼
soup = BeautifulSoup(respons,'html.parser') #解析原始碼
#下面是計算崗位列表的頁數
page=soup.select('div#pagebar')[0]
l=str(page.select('li')[-1].a.attrs['href'])
x=re.compile(r'\d{3}')
y=x.search(l)
lastpage=int(y.group())
print lastpage
#呼叫函式
saveData(url,lastpage + 1)
def saveData(url,lastpage):
row=0 #必須定義為全域性變數
for i in range(1,lastpage):
html = requests.get(url='%s%d' % (url,i)).content
soup = BeautifulSoup(html,'html.parser')
infos = soup.select('div.posi-list')[0].select('div.list')
#相關的資料資訊
for info in infos:
po_name = info.select('div.names.cutom_font' )[0].a.text
part = info.find('a', class_='cutom_font').text
addr = info.find('div', class_='addr').span.text
xz = info.find('div', class_='xz').span.text
#寫入excel
sheet.write(row, 0, po_name)
sheet.write(row, 1, part)
sheet.write(row, 2, addr)
sheet.write(row, 3, xz)
row+=1
if __name__ == '__main__':
getHtml()
book.save('shixiseng.xls')