1. 程式人生 > >爬取陽光寬頻網的視訊

爬取陽光寬頻網的視訊

import requests
from lxml import etree
import json
import os

from selenium import webdriver
import time

class LoadVideos(object):
    def __init__(self):
        self.index_url = 'http://www.365yg.com/'
        self.json_url = 'http://www.365yg.com/api/pc/feed/?category=video&utm_source=toutiao&widen=1&max_behot_time=0&max_behot_time_tmp=0&tadrequire=true&as=A1654A545ACFD9C&cp=5A4A0F0D29FC7E1&_signature='
self.headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.108 Safari/537.36'} self.webdriver = webdriver.PhantomJS('/Users/zhangninglei/Downloads/phantomjs-2.1.1-macosx/bin/phantomjs') self.video_list = {} def get_video_info
(self):
r = requests.get(url=self.json_url,headers=self.headers) obj = json.loads(r.text) for video in obj['data']: video_name = video['video_id'] video_url = self.index_url+ video['source_url'] self.video_list[video_name]=video_url def
load_video_data(self):
for i in self.video_list: url = self.video_list[i] #通過瀏覽器傳送請求 self.webdriver.get(url) #休眠一下,載入資料 time.sleep(5) #得到網頁原始碼 html = self.webdriver.page_source #解析頁面,並下載 html_tree = etree.HTML(html) video_src = html_tree.xpath('//video[@class="vjs-tech"]/source/@src')[0] print('開始載入'+i+'的資料!') r = requests.get(url = video_src,headers=self.headers) print(i + '的資料載入完畢!') #儲存到本地 print('將'+i+'儲存到本地!') save_video(filename=i,data=r.content) print(i+'已成功儲存!') def save_video(filename,data): filepath = os.path.join(os.getcwd()+'/video/'+filename+'.mp4') with open(filepath,'wb') as f1: f1.write(data) def main(): loadvideo = LoadVideos() loadvideo.get_video_info() loadvideo.load_video_data() if __name__ == '__main__': main()