1. 程式人生 > >爬圖交互界面及翻頁初嘗式

爬圖交互界面及翻頁初嘗式

語法 aso raw star write conn aid zip pen

# -*- coding:utf-8 -*-


import requests, re, os,urllib2
class TP:
    def __init__(self,baseUrl): #baseUrl是基本地址
    #url = ‘http://tieba.baidu.com/p/5307547413‘ 
        self.baseURL=baseUrl
        #傳入頁碼,獲取該頁帖子的代碼
    def getPage(self,pageNum):
        try:
            url=self.baseURL+?pn=+str(pageNum)
        res
=urllib2.Request(url) html=urllib2.urlopen(res).read().decode(UTF-8) return html except urllib2.UrlError,e:#為什麽這麽寫 if hasattr(e,"reason"):#hasattr是什麽意思 print u錯誤,e.reason return None def getPageNum(self,page): #獲取頁碼 page=self.getPage(1)#
這個是第一頁的網址信息 pattern=re.compile(<li class="l_reply_num" .*?</span>.*?<span.*?>(.*?)</span>,re.S) result=re.search(pattern,page) if result: return result.group(1).strip() else: return None #提取圖片 def getContent(self,html): header
= { Accept: */*, Accept-Encoding:gzip,deflate,sdch, Accept-Language:zh-CN,zh;q=0.8, Connection:keep-alive } html = requests.get(url,headers = header) data = html.content.decode(utf-8) find = re.compile(r<img class="BDE_Image" src="(.*?).jpg") result = find.findall(data) for img_url in result: name = img_url.split(/)[-1] img_url = img_url+.jpg html = requests.get(img_url,headers = header) im = html.content with open(name+.jpg,wb)as f: f.write(im) def start(self): indexPage=self.getPage(1) pageN=self.getPageNum(indexPage) if pageN==None: print "URL error" return try: print u該帖子有+str(pageN)+頁!# for i in range(1,int(pageN)+1): print u正在讀入第+str(i)+頁數據 page=self.getPage(i) contents=self.getContent(page) except IOError,e: print u正在寫入第+str(i)+頁數據 finally: print u爬取任務完成^_^ print u請寫入帖子號碼 baseUrl=http://tieba.baidu.com/p/+str(raw_input(uhttp://tieba.baidu.com/p/)) pt=TP(baseUrl) pt.start()

問題尚未完成,無法翻頁且圖片出不來,明天瞅瞅語法仔細盤盤邏輯

爬圖交互界面及翻頁初嘗式