1. 程式人生 > >利用python爬取點小圖片,滿足私欲(爬蟲)

利用python爬取點小圖片,滿足私欲(爬蟲)

.text write ret append jpg use download div pat

import requests
import re
import os,sys

links=[]
titles=[]
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36"}

def get_url(page):
url=‘http://www.zbjuran.com/mei/xinggan/list_13_%s.html‘%(page)
data=requests.get(url,headers=headers).text
data_use=re.findall(‘<div class="name"><a target="_blank" href=".*?" title=".*?</a></div>‘,data)
for use in data_use:
link=‘http://www.zbjuran.com/‘+use.split(‘href="‘)[1].split(‘" title‘)[0]
links.append(link)
title=use.split(‘title="‘)[1].split(‘">‘)[0]
titles.append(title)
mkpath=‘/Users/b1ancheng/mzpc/%s‘%title
wtxtpath=‘/Users/b1ancheng/mzpc/%s/%s.txt‘ % (title, title)
def get_pic():

url_data=requests.get(link).text
page = int(url_data.split(‘<div class="page"><li><a>共‘)[1].split(‘頁:‘)[0])
for i in range(1, page + 1):
print(‘正在下載第%s頁‘%i)
pic_url = (link[:-5] + ‘_%s‘ + link[-5:])%i
print(pic_url)
try:
pic_data_link=‘http://www.zbjuran.com‘+requests.get(pic_url,headers=headers,timeout=5).text.split(‘<img src="‘)[1].split(‘" /></div>‘)[0]
with open(‘/Users/b1ancheng/mzpc/%s/%s_%s.JPG‘ % (title, title,i),‘wb‘) as pic_download:
pic_download.write(requests.get(pic_data_link).content)
except Exception as error:
print(error)
continue
# 創建目錄
isExists = os.path.exists(mkpath)
if not isExists:
os.makedirs(mkpath)
get_pic()
else:
return False
if __name__ == ‘__main__‘:
for page in range(1,88):
get_url(page)

利用python爬取點小圖片,滿足私欲(爬蟲)