1. 程式人生 > >python尤果網圖片爬蟲(簡單)__selenium+phantomJS+urllib2

python尤果網圖片爬蟲(簡單)__selenium+phantomJS+urllib2

1.首先給python安裝selenium庫,然後下載phantomJS並配置環境變數(網上搜索一堆)

2.直接放python程式碼: youguo_image_spider.py

#!/usr/bin/env python
#_*_coding:utf-8_*_

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from lxml import etree
import urllib2

class youguoSpider():
    def __init__(self):
        self.url = "https://www.ugirls.com/"
        self.driver = webdriver.PhantomJS()

    def start(self):
        self.login_getIndexPage()

    def login_getIndexPage(self):
        #開啟尤果網首頁
        self.driver.get(self.url)
        #截圖檢視是否成功
        # driver.save_screenshot("index.png")

        #模擬點選"登入"按鈕,跳出登入彈窗
        self.driver.find_element_by_id("btnshowlogin").click()
        # driver.save_screenshot("butLogin.png")

        #在登入彈窗填充賬戶密碼
        self.driver.find_element_by_id("txtAccount").send_keys(u"XXXXXXXXXX")#賬號
        self.driver.find_element_by_id("txtPass").send_keys(u"XXXXXXXX")#密碼
        # driver.save_screenshot("account.png")
        #點選登入按鈕
        self.driver.find_element_by_id("btnLogin").click()
        print "登入成功"
        # driver.save_screenshot("login.png")
        # print driver.page_source
        self.driver.find_element_by_id("1").click()
        html = self.driver.page_source
        print html
        self.get_personUrlList(html)

    def get_personUrlList(self,html):
        ehtml = etree.HTML(html)
        urllist =  ehtml.xpath('//div[@class="magazine_list_wrap"]/div/a/img[@class="magazine_img"]/@src')
        self.download(urllist)
        print urllist

    def download(self,urllist):
        print "開始下載圖片:"
        flg = 1
        for personurl in urllist:
            print personurl
            response = urllib2.urlopen(personurl)
            with open(str(flg)+"_"+personurl[-48:-16]+".jpg","wb") as f:
                f.write(response.read())
            flg = int(flg) + 1
        print "下載結束!"

def main():
    ygs = youguoSpider()
    ygs.start()

if __name__ == "__main__":
    main()

3.有問題下方留言討論