1. 程式人生 > >302重定向、喜馬音樂

302重定向、喜馬音樂

param chrom curl sde serve track os.chdir () 內存耗盡

# 重定向: def redirect(url): r = requests.get(url,params={‘chrome‘:‘utf-8‘, ‘q‘:‘666‘}) #allow_redirects=False print(r.url, r.status_code, r.history) redirect(‘http://www.so.com/s‘) redirect(‘http://www.haosou.com/s‘) *********************分割線********************* # 重定向的應用場景:請求狀態碼為302的網址,從而獲取以文件後綴結尾的真實下載網址 import requests,os #下載酷我音樂 def downLoadKuwo(url): r = requests.get(url).text musicName = ‘:‘.join(r.split(‘title>‘)[1].split(‘-酷我音樂‘)[0].split(‘-‘)[:2][::-1]) musicID=url.split(‘?‘)[0].split(‘/‘)[-1] redirectResult=requests.get(f
‘http://antiserver.kuwo.cn/anti.s?format=aac|mp3\ &rid=MUSIC_{musicID}&type=convert_url&response=res‘) #urllib2獲取經若幹次重定向的最終網址用r.geturl();而requests庫默認重定向,無需再請求r.url musicName=‘E:\music\\‘+musicName+‘.‘+redirectResult.url.split(‘.‘)[-1] if not os.path.isdir(‘E:\music‘):os.mkdir(‘E:\music‘) if not os.path.isfile
(musicName): with open(musicName,‘wb‘) as f: #with語法和iter_content()都是叠代器,避免內存耗盡 for chunk in redirectResult.iter_content(1024): #f.write(redirectResult.content) f.write(chunk) downLoadKuwo(‘http://www.kuwo.cn/yinyue/97881‘) #http://bd.kuwo.cn/yinyue/7746750 ****************************************分割線****************************************
下載喜馬拉雅FM的音樂: import requests,re,os,json headers={‘User-Agent‘: ‘Mozilla/5.0 Chrome/59‘} def getAnchors(): anchors = [] for x in range(1,3): #下載兩頁的主播人數 res=requests.get(‘http://www.ximalaya.com/dq/%s/‘ %x,headers=headers).text anchors.extend(re.findall(‘href="(.+?)" hashlink title="(.+?)" class="discoverAlbum_title‘,res)) return anchors def getAlbums(): for anchors in getAnchors(): path=‘E:\scrapyDownload\%s‘ %anchors[1] if not os.path.exists(path): os.mkdir(path) os.chdir(path) res = requests.get(anchors[0], headers=headers).text #主播的音頻數只下1頁 rule=‘(\d+?)" track_title="(.+?)" track_.+?([0-9-]+?)<.+?title="(\d+?)次‘ musicsDetails=re.findall(rule,res,re.S) with open(‘%s.txt‘ %anchors[1],‘w‘,encoding=‘utf8‘) as f: for x in musicsDetails: f.write(json.dumps(x,ensure_ascii=False)+‘\n‘) for x in musicsDetails[:4]: #音頻挺多,每頁選取前4個下載 js=‘http://www.ximalaya.com/tracks/%s.json‘ %x[0] musicUrl=requests.get(js,headers=headers).json()[‘play_path_32‘] with open(x[1].replace(‘?‘,‘‘)+‘.m4a‘,‘wb‘) as music: #Win系統的文件名不能有? music.write(requests.get(musicUrl,headers=headers).content) if __name__ == ‘__main__‘: getAlbums()

302重定向、喜馬音樂