1. 程式人生 > >python扒靜態網頁圖片

python扒靜態網頁圖片

拿扒東北大學官網的圖片為例:

#coding=utf-8  
import re  
import urllib2
import time  
url = urllib2.urlopen('http://www.neu.edu.cn/')  
buf = url.read()  
more = re.compile(r'<img src=\"(.*\.(jpg|png|jpeg))\"')  
abc = more.findall(buf)  
base = 'http://www.neu.edu.cn/'
img = [base+i[0] for i in abc]
for tmp in img:
    data = urllib2.urlopen(tmp).read()
    filename = './img/'
+tmp[tmp.rfind('/')+1:] print filename with open (filename, 'wb') as fw: fw.write (data) time.sleep (1)