1. 程式人生 > >Twitter視頻下載方式

Twitter視頻下載方式

tab 1.2 lec chrome ref one sed lib efault

本文是傳入Twitter視頻播放鏈接通過http://twdown.net/網站獲得Twitter視頻的MP3,MP4下載鏈接用YouTube-dl下載Twitter視頻

#coding=utf-8
import os
import re
import sys
import time
import datetime
import requests
# import pdfkit
import hashlib
from scrapy.http import Request, HtmlResponse
from scrapy.selector import HtmlXPathSelector
from pymongo import MongoClient
import smtplib
import urlparse
import json
import redis

from gevent.pool import Pool
from gevent import monkey
monkey.patch_all()

reload(sys)
sys.setdefaultencoding(‘utf-8‘)

def get_url(data):
proxies={‘https‘:‘127.0.0.1:8123‘}
url = ‘http://twdown.net/download.php/‘
print data
# data = {
# ‘URL‘:‘https://twitter.com/ciamemewarfare/status/839957301981220864‘
# }
headers={
‘Accept‘:‘text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8‘,
‘Accept-Encoding‘:‘gzip, deflate‘,
‘Accept-Language‘:‘zh-CN,zh;q=0.8‘,
‘Cache-Control‘:‘max-age=0‘,
‘Content-Length‘:‘73‘,
‘Content-Type‘:‘application/x-www-form-urlencoded‘,
# ‘Cookie‘:‘__cfduid=d5949cf9b7f7659555bf30bd1176139981497940317; td_cookie=18446744071807429655; _ga=GA1.2.2022387890.1497940226; _gid=GA1.2.1592134325.1497940226; _gat=1‘,
# ‘Cookie‘:‘__cfduid=d31e200302b2de7c45238b596be124e011497948038; td_cookie=18446744071812537626; _ga=GA1.2.347564219.1497947962; _gid=GA1.2.1008045576.1497947962; _gat=1‘,
# ‘Cookie‘:‘__cfduid=d31e200302b2de7c45238b596be124e011497948038; td_cookie=18446744071812537626; _ga=GA1.2.347564219.1497947962; _gid=GA1.2.1008045576.1497947962; _gat=1‘,
# ‘Cookie‘:‘__cfduid=d5949cf9b7f7659555bf30bd1176139981497940317; td_cookie=18446744071809053771; _ga=GA1.2.2022387890.1497940226; _gid=GA1.2.1592134325.1497940226; _gat=1‘,
‘Cookie‘:‘__cfduid=d5949cf9b7f7659555bf30bd1176139981497940317; td_cookie=18446744071819015387; _ga=GA1.2.2022387890.1497940226; _gid=GA1.2.1592134325.1497940226‘,
‘Host‘:‘twdown.net‘,
‘Origin‘:‘http://twdown.net‘,
‘Proxy-Connection‘:‘keep-alive‘,
‘Referer‘:‘http://twdown.net/‘,
‘Upgrade-Insecure-Requests‘:‘1‘,
‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36‘
}

try_count = 10
hxs = None
while try_count:
print try_count
try:
#html = requests.get(url,headers=headers,proxies=proxies,verify=False).content
html = requests.post(url,headers=headers,proxies=proxies,data=data).content
response = HtmlResponse(url=url, body=html,encoding = ‘utf-8‘)
# print response
hxs = HtmlXPathSelector(response)
break
except Exception as e:
print e
time.sleep(2)
print try_count
try_count -= 1
return hxs,html
pass

def get_download_url(p):# 通過http://twdown.net/,這個網站獲得Twitter視頻的MP3,MP4下載鏈接
link = ‘http://twdown.net/‘
temp = db_client.alex_movie.alex_movie_bytwitter.find_one({‘_id‘:p})
title = temp[‘title‘]
url = temp[‘url‘]#鏈接
alex_id = temp[‘_id‘]
data = {}
data[‘URL‘] = url
hxs,html = get_url(data)
# print html
mp4 = ‘‘
mp3 = ‘‘
download_list = hxs.select(‘//div[@class="col-md-8"]/table[@class="table table-condensed table-hover table-striped"]/tbody/tr‘)
for d_l in download_list:
d_l_td_a = d_l.select(‘./td/a‘)
for td_a in d_l_td_a:
td_a_href = ‘‘.join(td_a.select([email protected]).extract()).strip()
if ‘.mp4‘ in td_a_href:
td_a_mp4 = td_a_href
mp4 = td_a_mp4
print td_a_mp4
pass
if ‘mp3.‘ in td_a_href:
td_a_mp3 = urlparse.urljoin(link,td_a_href)
mp3 = td_a_mp3
print td_a_mp3
pass
pass
pass
if not mp4 == ‘‘:
db_client.alex_movie.alex_movie_bytwitter.update({‘_id‘:temp[‘_id‘]},{‘$set‘:{‘mp4_url‘:mp4,‘is_download‘:True}})
pass
if not mp3 == ‘‘:
db_client.alex_movie.alex_movie_bytwitter.update({‘_id‘:temp[‘_id‘]},{‘$set‘:{‘mp3_url‘:mp3,‘is_download‘:True}})
pass
pass

def download(p): # 用YouTube-dl下載視頻實體
movie_site = ‘/mnt/parastor/data/downdata/videos/usatoday_mp4/‘
temp = db_client.alex_movie.alex_movie_bytwitter.find_one({‘_id‘:p})
title = temp[‘title‘]#
url = temp[‘url‘]#鏈接
alex_id = temp[‘_id‘]
video_path = os.path.join(movie_site,‘%s.mp4‘%alex_id.encode(‘utf-8‘))
try:
re_num = 10
while re_num:
# youtube-dl --proxy socks5://127.0.0.1:1080 https://www.youtube.com/watch?v=lokN6d1GaK4
youtube_content = ‘youtube-dl --proxy socks5://127.0.0.1:1080 -i --no-check-certificate -o \"%s\" -R 20 \"%s\"‘%(video_path,url)
# youtube_content = ‘youtube-dl --proxy socks5://127.0.0.1:1080 -i -o \"%s\" -R 20 \"%s\" --get-url‘%(video_path,url)
a = os.system(youtube_content.encode(‘utf-8‘))
# print a
if a ==256:
print ‘錯誤‘
if re_num == 1:
db_client.alex_movie.alex_movie_bytwitter.update({‘_id‘:temp[‘_id‘]},{‘$set‘:{‘is_download‘:False}})
pass
pass
if a ==0:
print ‘成功‘
db_client.alex_movie.alex_movie_bytwitter.update({‘_id‘:temp[‘_id‘]},{‘$set‘:{‘is_mp4‘:True,‘movie_download_site‘:video_path,‘download_time‘:int(time.time())}})
break
pass
time.sleep(3)
re_num -= 1
pass
except Exception, e:
print ‘....‘
pass

if __name__ == ‘__main__‘:
db_client = MongoClient(‘192.168.86.136‘,27017)
while True:
result1 = db_client.alex_movie.alex_movie_bytwitter.find({‘is_download‘:True,‘mp3_url‘:{‘$exists‘:False}}).limit(1)
# r_num =len(result1)
if result1:
print ‘------>ok‘
for r_1 in result1:
p = r_1[‘_id‘]
db_client.alex_movie.alex_movie_bytwitter.update({‘_id‘:p},{‘$set‘:{‘is_download‘:False}})
# download(p)
get_download_url(p)
pass
pass
else:
break
pass
pass
db_client.close()

Twitter視頻下載方式