1. 程式人生 > >TOP100 必看的電影你錯過了嗎?

TOP100 必看的電影你錯過了嗎?

話不多說 上程式碼:

from urllib import request,parse
import re
import xlwt

'''表格檔案設定'''
#標題的字型樣式設定
font_title = xlwt.Font()
font_title.name = u'幼圓'
font_title.colour_index = 0x31
font_title.bold = True
#標題的字型居中顯示設定
alignment0 = xlwt.Alignment()
alignment0.horz = xlwt.Alignment.HORZ_CENTER
alignment0.vert = xlwt.Alignment.VERT_CENTER
#標題樣式設定
style0 = xlwt.XFStyle()
style0.font = font_title
style0.alignment = alignment0
#正文資料格式
font_body = xlwt.Font()
font_body.name = u'華文楷體'
font_body.colour_index = 0x08
font_body.bold = True
#正文垂直居中顯示
alignment1 = xlwt.Alignment()
alignment1.vert = xlwt.Alignment.VERT_CENTER
style1 = xlwt.XFStyle()
style1.font = font_body
style1.alignment = alignment1
#工作表新增
wb = xlwt.Workbook()
ws = wb.add_sheet("人生必看電影TOP100")
#設定行寬
col0 = ws.col(0)
col1 = ws.col(1)
col2 = ws.col(2)
col0.width = 256*20
col1.width = 256*45
col2.width = 256*30
#填入首行資料
ws.write(0,0,'電影名稱',style0)
ws.write(0,1,'主演',style0)
ws.write(0,2,'上映時間',style0)
ws.write(0,3,'評分',style0)


Name = []
Star = []
Releasetime = []
Score = []
for i in range(10):
    '''內容爬取'''
    url = "http://maoyan.com/board/4?offset="+str(i*10)
    headers = {
    'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
    'Accept-Encoding':'deflate',
    'Accept-Language':'zh-CN,zh;q=0.9',
    'Cache-Control':'max-age=0',
    'Connection':'keep-alive',
    'Cookie':'__mta=45578011.1541345815725.1541380877343.1541380888601.20; uuid_n_v=v1; uuid=75667970E04711E89CCFC511A3A3AA062F57C39460854EBFB5ABF721B74CE428; _lxsdk_cuid=166df5eaac1c8-08bef925d4afc-65547628-1cb7b9-166df5eaac1c8; _lxsdk=75667970E04711E89CCFC511A3A3AA062F57C39460854EBFB5ABF721B74CE428; _csrf=91e89bf12159f48b0b638b46ec5e99b58429b96aaea3d29b13c22e593b679b33; _lx_utm=utm_source%3DBaidu%26utm_medium%3Dorganic; __mta=45578011.1541345815725.1541380877343.1541380886583.20; _lxsdk_s=166e16edfcc-d54-ef8-582%7C%7C39',
    'Host':'maoyan.com',
    'Referer':'http://maoyan.com/board',
    'Upgrade-Insecure-Requests':'1',
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3510.2 Safari/537.36',
        }
    req = request.Request(url,headers=headers)
    res = request.urlopen(req)
    html = res.read().decode("utf-8")

    name = re.findall('<p class="name"><a .*>(.*?)</a></p>',html)
    star = re.findall('<p class="star">[\s]*(.*?)[\s]*</p>',html)
    releasetime = re.findall('<p class="releasetime">(.*?)</p>',html)
    score = re.findall('<p class="score"><i class="integer">(.*)</i><i class="fraction">(.*?)</i></p>',html)
    for q in name:
        Name.append(q)
    for w in star:
        Star.append(w)
    for e in releasetime:
        Releasetime.append(e)
    for r in score:
        Score.append(r)
for m in range(99):
    ws.write(m+1,0,Name[m],style1)
    ws.write(m+1,1,Star[m],style1)
    ws.write(m+1,2,Releasetime[m],style1)
    ws.write(m+1,3,Score[m][0]+Score[m][1],style1)

wb.save("TOP100.xls")
print("搞定了。。。")