python爬蟲 學習 淘寶頁面定向爬取 DAY5
阿新 • • 發佈:2019-01-01
import requests import re def getHTMLTxt(url): try: r = requests.get(url,timeout=30) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def dealHTML(ulist,html): vpl = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html) rpl = re.findall(r'\"raw_title\"\:\".*?\"',html) for i in range(len(vpl)): vstr = eval(vpl[i].split(":")[1]) rstr = eval(rpl[i].split(":")[1]) ulist.append([vstr,rstr]) def printHTML(ulist,html): count=0 print("{0:{3}<6}{1:{3}<8}\t{2:{3}^16}".format("序號","價格","商品名稱",chr(12288))) for i in range(len(ulist)): count=count+1 print("{0:{3}<4}\t{1:{3}^8}\t{2:{3}^16}".format(count,ulist[i][0],ulist[i][1],chr(12288))) def wTXT(ulist,html): count=0 f = open(r'C:\Users\lenovo\Desktop\書包價格.txt','w+') f.write("{0:{3}<6}{1:{3}<8}\t{2:{3}^16}".format("序號","價格","商品名稱",chr(12288))+'\n') for i in range(len(ulist)): count = count+1 f.write("{0:{3}<4}\t{1:{3}^8}\t{2:{3}^16}".format(count,ulist[i][0],ulist[i][1],chr(12288))+'\n') f.close() def main(): page = 3 urlm = "https://s.taobao.com/search?q=" goods = "書包" ulist = [] for i in range(page): url = urlm+goods+"&s="+str(44*i) html = getHTMLTxt(url) dealHTML(ulist,html) printHTML(ulist,html) wTXT(ulist,html) main()