1. 程式人生 > >Day11 (黑客成長日記) 爬取網站圖片

Day11 (黑客成長日記) 爬取網站圖片

#匯入第三方庫
# coding:utf-8
import requests,re

#找到需要爬取的網站'http://www.qqjia.com/sucai/sucai1210.htm'

#1>獲取網站 2>正則表示式匹配不同圖片的地址 3>找到所有圖片的URL
#開發講究見名識意

#1.1 定義一個函式get到url
def get_urls():
#(1)獲取網站
response = requests.get('http://www.qqjia.com/sucai/sucai1210.htm')
# (2)通過正則匹配地址re庫,共有的內容儲存,不一樣的用 .*?
url_add = r'<img border="0" alt="" src="(.*?)" /></p>'
#(3)找到所有圖片的URL
url_list = re.findall(url_add,response.text)
# (4)列印驗證
print(url_list)
return url_list
#第四步,下載網頁資料
#再定義一個函式 目的:下載資料
def get_gif(url,name):
#請求圖片地址 ,傳參
response = requests.get(url)
#下載圖片到目標位置--》D:\pygif
with open('D:\pygif\%d.gif'%name,'wb') as ft :
ft.write(response.content)

# (5)啟動這個程式
if __name__ == '__main__':
url_list = get_urls()
a = 1
for url in url_list:
get_gif(url,a)
a += 1

text是原始碼,content是二進位制資料




import requests,re
def url_get():
    response = requests.get('http://qq.yh31.com/zjbq/2920180.html')
    url_add = r'<img border="0" alt="" src="(.*?)" />'
    url_list = re.findall(url_add,response.text)
    print(url_list)
    return url_list
def download(url,name):
    response = requests.get(url)
    with open(
'D:\pygif\%d'%name,'wb') as ft : ft.write(response.content) if __name__ == '__main__': url_list = url_get() a = 1 for url in url_list: com_url = 'http://mm.yh31.com:88'+ url download(com_url, a) a += 1