1. 程式人生 > >黃圖是怎麼被python給爬下來的

黃圖是怎麼被python給爬下來的

本人一向喜歡看美女圖片,希望能夠下載下來,找到一個黃網,用python把上面的圖片給爬下來

執行效果如圖(資料夾中的圖片就不給大家看了,少兒不宜):

程式碼如下:

#-*- coding:utf-8 -*-
import sys
import time
import os
import HTMLParser
import urllib
import sys
import socket
socket.setdefaulttimeout(15)


urlString=['http://www.99yeye.com/']
save_path = os.path.abspath("./Download")
if not os.path.exists(save_path):
    os.mkdir(save_path)

def getImage(addr):
    try:
        u = urllib.urlopen(addr)
        data = u.read()
        splitPath = addr.split('/')
        fName = splitPath.pop()
        if not os.path.exists("./Download"+fName):
            print "Saving %s" % fName
            img_file=os.path.join(save_path,fName)
            f = open(img_file, 'wb')
            f.write(data)
            f.close()
            print "download-end!!!!!!!!"
    except Exception, e:
        pass
       

   
class app_url(HTMLParser.HTMLParser):
    def handle_starttag(self, tag, attrs):
        if tag == "a":
            for name ,value in attrs:
                if name=="href" and value not in urlString:
                    if value.startswith("http:"):
                        urlString.append(value)
                    else:
                        urlString.append(urlString[0]+value)

#定義HTML解析器
class parseImages(HTMLParser.HTMLParser):
    def handle_starttag(self, tag, attrs):
        if tag == 'img':
            for name,value in attrs:
                if name == 'src':
                    if value.startswith("http:"):
                        getImage(value)
                    else:
                        getImage(i+'/'+value)
               
               

#建立HTML解析器的例項

lParser = app_url()

#開啟HTML檔案


u = urllib.urlopen(urlString[0])
print "Opening URL/n===================="
#把HTML檔案傳給解析器
lParser.feed(u.read())
lParser.close()

print "##############################################################################"
print urlString

gg=parseImages()

for i in urlString:
    print i
    u=urllib.urlopen(i)
    print u.info()
    #把HTML檔案傳給解析器
    gg.feed(u.read())
    gg.close()
    continue