黃圖是怎麼被python給爬下來的
本人一向喜歡看美女圖片,希望能夠下載下來,找到一個黃網,用python把上面的圖片給爬下來
執行效果如圖(資料夾中的圖片就不給大家看了,少兒不宜):
程式碼如下:
#-*- coding:utf-8 -*-
import sys
import time
import os
import HTMLParser
import urllib
import sys
import socket
socket.setdefaulttimeout(15)
urlString=['http://www.99yeye.com/']
save_path = os.path.abspath("./Download")
if not os.path.exists(save_path):
os.mkdir(save_path)
def getImage(addr):
try:
u = urllib.urlopen(addr)
data = u.read()
splitPath = addr.split('/')
fName = splitPath.pop()
if not os.path.exists("./Download"+fName):
print "Saving %s" % fName
img_file=os.path.join(save_path,fName)
f = open(img_file, 'wb')
f.write(data)
f.close()
print "download-end!!!!!!!!"
except Exception, e:
pass
class app_url(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == "a":
for name ,value in attrs:
if name=="href" and value not in urlString:
if value.startswith("http:"):
urlString.append(value)
else:
urlString.append(urlString[0]+value)
#定義HTML解析器
class parseImages(HTMLParser.HTMLParser):
def handle_starttag(self, tag, attrs):
if tag == 'img':
for name,value in attrs:
if name == 'src':
if value.startswith("http:"):
getImage(value)
else:
getImage(i+'/'+value)
#建立HTML解析器的例項
lParser = app_url()
#開啟HTML檔案
u = urllib.urlopen(urlString[0])
print "Opening URL/n===================="
#把HTML檔案傳給解析器
lParser.feed(u.read())
lParser.close()
print "##############################################################################"
print urlString
gg=parseImages()
for i in urlString:
print i
u=urllib.urlopen(i)
print u.info()
#把HTML檔案傳給解析器
gg.feed(u.read())
gg.close()
continue