1. 程式人生 > >爬取攝圖網裡的 音樂和視訊 攝圖網模擬登陸

爬取攝圖網裡的 音樂和視訊 攝圖網模擬登陸

#首席那安裝selinium
from selenium import webdriver
import time
from lxml import etree
import json
#新增顯示等待
from selenium.webdriver.support.ui import WebDriverWait
#根據條件尋找對應節點
from selenium.webdriver.support import expected_conditions as EC
import requests
import re
import urllib.parse
import urllib

header =
{ "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", } #設定無頭瀏覽器 # options=webdriver.ChromeOptions() # options.set_headless() #建立瀏覽器驅動 # driver = webdriver.Chrome( # executable_path='/home/lbc/Documents/chromedriver', # )
# # options=options # driver.get('http://699pic.com/soundtrack/?sem=1&sem_kid=206316&sem_type=3') # #獲取cookie # cookies = driver.get_cookies() # cookie_dict = {} # for cookie in cookies: # cookie_dict[cookie['name']] = cookie['value'] # # print(cookie_dict) # #匯入滑鼠移入 # from selenium.webdriver import ActionChains
# #用xpath解析並拖拽滑鼠進行點選 # # 點選登入 # element = driver.find_element_by_xpath('/html/body/div[1]/div/div[1]/div/a[2]') # #將滑鼠移動到指定的節點 # ActionChains(driver).move_to_element(element).perform() # #將滑鼠移動到指定的節點並且點選該節點(單擊) # ActionChains(driver).move_to_element(element).click(element).perform() # # 手機號登入 # element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[1]/div[2]/p[2]/a[1]') # #將滑鼠移動到指定的節點 # ActionChains(driver).move_to_element(element).perform() # #將滑鼠移動到指定的節點並且點選該節點(單擊) # ActionChains(driver).move_to_element(element).click(element).perform() # driver.find_element_by_name('phone').send_keys('15326245558') # #隱式等待 # driver.find_element_by_name('passwd').send_keys('q134679.') # # 輸入賬號密碼點選登入 # element = driver.find_element_by_xpath('//*[@id="alert-action-login"]/div/div/div/div[2]/div[1]/div/label[3]/a') # #將滑鼠移動到指定的節點 # ActionChains(driver).move_to_element(element).perform() # #將滑鼠移動到指定的節點並且點選該節點(單擊) # ActionChains(driver).move_to_element(element).click(element).perform() def qingqiu(url): response = requests.get(url,headers=header) # response.encoding = 'gbk' print(response.status_code) a= etree.HTML(response.text) b = a.xpath('//div[@class="audio-list"]/ul[@class="soundEffect-block clearfix"]/li') for i in b: title = i.xpath('.//a[@class="soundEffect-name"]/text()') link = i.xpath('.//a[@class="soundEffect-name"]/@href')[0] tlink = urllib.parse.urljoin(response.url,link) # print(title,tlink) music(tlink) def music(url): response = requests.get(url,headers=header) print(response.status_code) a= etree.HTML(response.text) title = a.xpath('//div[@class="photo-content fl"]/h1/text()') # print(title) # print(response.text) b = a.xpath('//div[@class="audio-body"]/div[@class="audio-bodyBg"]/div[@class="audio-box clearfix"]') for i in b: mp = i.xpath('./audio[@id="audio0"]/source/@src') # print(mp3) for lin in mp: song(lin,title) def song(t,title): response = requests.get(t,headers=header) # r = re.compile(".*-(.{1,15})",re.S) # c = r.findall(response.url)[0] # print(c) with open('{}.mp3'.format(title),'wb+') as f: f.write(response.content) if __name__ == '__main__': for i in range(3,4): url = 'http://699pic.com/media/soundtrack-so-%s-0-0-0-0-0-0-0.html'%str(i) qingqiu(url)