python學習 爬取豆瓣電影名稱 及評分
阿新 • • 發佈:2018-12-31
import requests
from bs4 import BeautifulSoup
import bs4
import re
def getHTMLText(url):
try:
r = requests.get(url)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist,rlist,html):
count=0
soup = BeautifulSoup(html,"html.parser" )
for tg in soup.find_all("div","pl2"):
name = tg.find("a")
ulist.append(name.text.split()[0])
if tg.find_all("span",re.compile("nums")):
rate = tg.find("span",attrs={"class":"rating_nums"})
rlist.append(rate.text)
else:
rlist.append("無評價" )
print ("{} : {}".format(ulist[count],rlist[count]))
count+=1
def main():
sumz=0
lst=[]
while sumz<=980:
lst.append(sumz)
sumz=sumz+20
for n in lst:
uinfo = []
rinfo=[]
url = "https://movie.douban.com/tag/中國電影?start="+str(n)+"&type=T"
html = getHTMLText(url)
fillUnivList(uinfo,rinfo, html)
main()