1. 程式人生 > >Python爬蟲 —— 抓取美女圖片

Python爬蟲 —— 抓取美女圖片

In root lxml 取圖 ext time style main HR

代碼如下:

 1 #coding:utf-8
 2 # import datetime
 3 import requests
 4 import os
 5 import sys
 6 from lxml import etree
 7 import codecs
 8 
 9 class Spider:
10     def __init__(self):
11         self.headers = {}
12         self.headers[User_Agent] = Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0
13 self.headers[Referer] = http://www.mzitu.com/all/ 14 15 def crawl(self, root_url): 16 html_text = requests.get(root_url,headers=self.headers).text 17 html_tree = etree.HTML(html_text) 18 groups = html_tree.xpath("//div[@class=‘main-content‘]//ul[@class=‘archives‘]//a
") 19 count = 0 20 print "開始抓取:" 21 for group in groups: 22 title = group.text 23 groupUrl = group.get(href) 24 print "正在抓取組圖:"+title 25 dirpath = self.makDir(title) #獲取標題,並以標題為名字創建文件夾 26 self.getGroup(groupUrl,dirpath) #
27 count = count+1 28 if count>=5: 29 print "抓取完成……" 30 os._exit(0) 31 32 def makDir(self,dirname): 33 dirpath = os.path.join(uE:\學習資料,dirname) 34 if not os.path.exists(dirpath): 35 os.makedirs(dirpath) 36 return dirpath 37 38 def getGroup(self,groupUrl,dirpath): 39 self.headers[Referer] = groupUrl 40 html_text = requests.get(groupUrl, headers=self.headers).text 41 html_tree = etree.HTML(html_text) 42 maxPage = html_tree.xpath("//div[@class=‘pagenavi‘]//span")[-2].text #獲取改組圖的張數 43 for page in range(1,int(maxPage)+1): #獲取每一張圖的所在頁面 44 pageUrl = groupUrl + / + str(page) #拼接頁面url 45 self.getPage(pageUrl,page,dirpath) #訪問該頁面 46 47 def getPage(self, pageUrl,page,dirpath): 48 self.headers[Referer] = pageUrl 49 page_text = requests.get(pageUrl, headers=self.headers).text #請求該圖所在的頁面 50 page_tree = etree.HTML(page_text) 51 imageurl = page_tree.xpath("//div[@class=‘main-image‘]//img")[0].get(src) #獲取圖片url 52 image = requests.get(imageurl, headers=self.headers).content #請求獲取圖片 53 self.saveImage(image,page,dirpath) 54 55 def saveImage(self,image,page,dirpath): 56 imagepath = os.path.join(dirpath, str(page) + u.jpg) 57 file = codecs.open(imagepath, wb) 58 file.write(image) 59 file.close() 60 61 if __name__ == __main__: 62 reload(sys) 63 sys.setdefaultencoding(utf-8) 64 Mzitu = Spider() 65 Mzitu.crawl(http://www.mzitu.com/all)

Python爬蟲 —— 抓取美女圖片