1. 程式人生 > >Python 動態加載並下載"梨視頻"短視頻

Python 動態加載並下載"梨視頻"短視頻

cat 表達 curl html 正則表達 compile 獲取 成功 表達式

下載鏈接:http://www.pearvideo.com/category_1

import requests
from lxml import etree
import re
from urllib.request import urlretrieve

‘‘‘
1 獲取視頻id
2 拼接完整url
3 獲取完整視頻播放地址
4 下載視頻
‘‘‘

def download_video(url):
    # url = ‘http://www.pearvideo.com/category_1‘
    response = requests.get(url)  # 返回狀態碼200,代表請求成功
html = response.text # 把文本文件處理成可解析的對象 html = etree.HTML(html) # 獲取video id video_id = html.xpath("//div[@class=‘vervideo-bd‘]/a/@href") starturl = "http://www.pearvideo.com" # 視頻播放url列表 video_urls = [] for i in video_id: newurl = starturl + / + i video_urls.append(newurl)
for url in video_urls: html = requests.get(url).text # 獲取視頻播放真正地址 # 定義播放地址的正則表達式模式 # srcUrl = "http://video.pearvideo.com/mp4/adshort/20180720/cont-1393622-12502013_adpkg-ad_hd.mp4" req = srcUrl="(.*?)" # 編譯正則表達式字符串為對象,目的是提高效率, 可以不加 req = re.compile(req) play_url
= re.findall(req, html) # 註意play_url的格式是list # 獲取視頻標題 # 定義視頻標題正則表達式模式 # <h1 class="video-tt">闖大禍!金毛撞上蘭博基尼,車損40萬</h1> re_tag = <h1 class="video-tt">(.*?)</h1> tag = re.findall(re_tag, html) # 下載視頻 print("正在下載 %s"%tag[0]) urlretrieve(play_url[0], "./video/{}.mp4".format(tag[0])) # print("下載完成 %s"%tag[0]) # download_video() # 動態加載鏈接變化 # http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=12&mrd=0.8960730781029713&hotContIds=1394290,1394224,1394233 # http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=24&mrd=0.45490116190392094&hotContIds=1394290,1394224,1394233http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=24&mrd=0.45490116190392094&hotContIds=1394290,1394224,1394233 # http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=36&mrd=0.895263612547242&hotContIds=1394290,1394224,1394233 # &mrd 後面的東西可以不加 # 動態加載下載 def download_more(): n = 12 while True: if n > 48: return # 或者break跳出循環 url = "http://www.pearvideo.com/category_loading.jsp?reqType=5&categoryId=1&start=%d"%n download_video(url) n += 12 # sleep(10) download_more()

F12打開開發者工具 -> NetWork -> 找到動態加載html項(category_loading開頭)-> Headers -> Request URL

技術分享圖片

Python 動態加載並下載"梨視頻"短視頻