python編程快速上手之第10章實踐項目參考答案(11.11.2)
阿新 • • 發佈:2017-05-08
答案 nic .com final timeout pre image 保存圖片 iframe
#!/usr/bin/env python # -*- coding:utf-8 -*- import os import re import urllib import json import socket import urllib.request import urllib.parse import urllib.error # 設置超時 import time timeout = 5 socket.setdefaulttimeout(timeout) class Crawler: # 睡眠時長 __time_sleep = 0.1 __amount = 0__start_amount = 0 __counter = 0 # 獲取圖片url內容等 # t 下載圖片時間間隔 def __init__(self, t=0.1): self.time_sleep = t # 開始獲取 def __getImages(self, word=‘美女‘): search = urllib.parse.quote(word) # pn int 圖片數 pn = self.__start_amount while pn < self.__amount: headers = {‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0‘} url = ‘http://image.baidu.com/search/avatarjson?tn=resultjsonavatarnew&ie=utf-8&word=‘ + search + ‘&cg=girl&pn=‘ + str( pn) + ‘&rn=60&itg=0&z=0&fr=&width=&height=&lm=-1&ic=0&s=0&st=-1&gsm=1e0000001e‘ # 設置header防ban try: time.sleep(self.time_sleep) req = urllib.request.Request(url=url, headers=headers) page = urllib.request.urlopen(req) data = page.read().decode(‘utf8‘) except UnicodeDecodeError as e: print(‘-----UnicodeDecodeErrorurl:‘, url) except urllib.error.URLError as e: print("-----urlErrorurl:", url) except socket.timeout as e: print("-----socket timout:", url) else: # 解析json json_data = json.loads(data) self.__saveImage(json_data, word) # 讀取下一頁 print("下載下一頁") pn += 60 finally: page.close() print("下載任務結束") return # 保存圖片 def __saveImage(self, json, word): if not os.path.exists("./" + word): os.mkdir("./" + word) # 判斷名字是否重復,獲取圖片長度 self.__counter = len(os.listdir(‘./‘ + word)) + 1 for info in json[‘imgs‘]: try: if self.__downloadImage(info, word) == False: self.__counter -= 1 except urllib.error.HTTPError as urllib_err: print(urllib_err) pass except Exception as err: time.sleep(1) print(err); print("產生未知錯誤,放棄保存") continue finally: print("小黃圖+1,已有" + str(self.__counter) + "張小黃圖") self.__counter += 1 return # 下載圖片 def __downloadImage(self, info, word): time.sleep(self.time_sleep) fix = self.__getFix(info[‘objURL‘]) urllib.request.urlretrieve(info[‘objURL‘], ‘./‘ + word + ‘/‘ + str(self.__counter) + str(fix)) # 獲取後綴名 def __getFix(self, name): m = re.search(r‘\.[^\.]*$‘, name) if m.group(0) and len(m.group(0)) <= 5: return m.group(0) else: return ‘.jpeg‘ # 獲取前綴 def __getPrefix(self, name): return name[:name.find(‘.‘)] # page_number 需要抓取數據頁數 總抓取圖片數量為 頁數x60 # start_page 起始頁數 def start(self, word, spider_page_num=1, start_page=1): self.__start_amount = (start_page - 1) * 60 self.__amount = spider_page_num * 60 + self.__start_amount self.__getImages(word) crawler = Crawler(0.05) crawler.start(‘科幻‘, 1, 5) #crawler.start(‘二次元 性感‘, 3, 3) # crawler.start(‘帥哥‘, 5)
from selenium import webdriver from selenium.webdriver.common.keys import Keys from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from time import sleep userid=input(‘username‘) passid=input(‘password‘) sendtxt="hello"#郵件內容 sendto=input(‘sendto‘) #打開Firefox driver = webdriver.Firefox() driver.get(‘http://mail.10086.cn/‘)#郵箱網址 #填寫用戶名 emailElem = driver.find_element_by_id(‘txtUser‘) emailElem.send_keys(userid)#用戶名 #填寫密碼 passwordElem = driver.find_element_by_id(‘txtPass‘) passwordElem.send_keys(passid)#密碼 passwordElem.submit() #寫郵件 sleep(10) write=driver.find_element_by_id(‘btn_compose‘) write.click() sleep(10) iframe_xpath = "//iframe[contains(@id,‘compose_‘)]" #寫信頁所在的iframe的xpath路徑,通過火狐瀏覽器的firebug插件可以查看到是在一個iframe的頁面內 iframe = driver.find_element_by_xpath(iframe_xpath) driver.switch_to_frame(iframe) sleep(5) driver.find_element_by_id(‘toContaine‘).send_keys(sendto) driver.find_element_by_id(‘txtSubject‘).send_keys(sendtxt) driver.find_element_by_id(‘topSend‘).click()
python編程快速上手之第10章實踐項目參考答案(11.11.2)