1. 程式人生 > >簡易python爬蟲爬取boss直聘職位,並寫入excel

簡易python爬蟲爬取boss直聘職位,並寫入excel

python爬蟲寫入excel

1,默認城市是杭州,代碼如下

#! -*-coding:utf-8 -*-

from urllib import request, parse

from bs4 import BeautifulSoup

import datetime

import xlwt

starttime = datetime.datetime.now()

url = r'https://www.zhipin.com/job_detail/?scity=101210100'

# boss直聘的url地址,默認杭州


def read_page(url, page_num, keyword): # 模仿瀏覽器

page_headers = {

'Host': 'www.zhipin.com',

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36 '

'Chrome/45.0.2454.85 Safari/537.36 115Browser/6.0.3',

'Connection': 'keep-alive'

}

page_data = parse.urlencode([ # 瀏覽器請求的參數

('ka', 'page-'+str(page_num)),

('page', page_num),

('query', keyword)

])

req = request.Request(url, headers=page_headers)

page = request.urlopen(req, data=page_data.encode('utf-8')).read()

page = page.decode('utf-8')

return page

if __name__ == '__main__':

print('**********************************即將進行抓取**********************************')

keyword = input('請輸入您要搜索的職位:')

workbook = xlwt.Workbook()

sheet = workbook.add_sheet('sheet1')

i=0

for j in range(1,5):

soup=BeautifulSoup(read_page(url, j, keyword))

for link in soup.select('.company-text'):

sheet.write(i,0,link.get_text())

i=i+1

workbook.save("D:\\resultsLatest.xls")

endtime = datetime.datetime.now()

time = (endtime - starttime).seconds

print('總共用時:%s s' % time)


2,爬取的結果

技術分享圖片


簡易python爬蟲爬取boss直聘職位,並寫入excel