1. 程式人生 > >python爬取煎蛋網妹子圖,已解密圖片~~~~~

python爬取煎蛋網妹子圖,已解密圖片~~~~~

本來想爬一波無聊圖,唉,竟然加密了。。。。

還好是base64

不說了,程式碼獻上 2018.12.14 有效。。。。。

 

import requests
from bs4 import BeautifulSoup
import base64,time

base64_list = []
print('====開始爬取=====')
starttime = time.time()
for i in range(50,79):
    url = 'http://jandan.net/ooxx/page-{}#comments'.format(i)
# url = 'http://jandan.net/ooxx/page-50#comments'
r = requests.get(url=url, headers={ 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36' }) r.encoding = 'utf-8' soup = BeautifulSoup(r.text, 'lxml') ol = soup.find(name='ol',attrs={"class":"
commentlist"}) li_List = ol.find_all(name='li') for li in li_List: try: p = li.find(name='p') if '\n' in p.text: continue # print(p.text) base64_list.append(p.text) except AttributeError: continue
endtime = time.time() inttime = endtime - starttime print('=====爬取結束====\n用時{}秒'.format(inttime)) print('=====開始解析====') full_url = [] #解析完畢地址url for www in base64_list: try: k = base64.b64decode(www) b = 'http:' + k.decode() # print(b) full_url.append(b) except ValueError: print(1) print('=====解析完畢=====') print('===載入本地資料夾===') image_start_time = time.time() for index in range(0, len(full_url)): item = full_url[index] full_path = 'jdimg/' + str(index) + '.jpg' k = requests.get(item).content # print(k) with open(full_path,'wb') as f: f.write(k) end_time_time = time.time() print('用時%.2s秒'%(end_time_time-image_start_time)) # print(full_url)