python3爬取qq音樂並下載 Python 爬取qqmusic音樂url並批量下載
本文參考Python 爬取qqmusic音樂url並批量下載
同學找我爬取一下qq音樂播放連結,包括歌詞等資訊打包成json,試了一下可以爬取。
一、找到qq音樂播放的url
1.找到搜尋頁面返回的資料包
歌曲最終的播放連結時經過多次拼接的,首先找到qq音樂搜尋歌曲介面,https://y.qq.com/portal/search.html#page=1&searchid=1&remoteplace=txt.yqq.top&t=song&w=%E5%91%A8%E6%9D%B0%E4%BC%A6
右鍵-檢查-重新整理-點選Network,ctrl+f,在控制檯右面搜尋第一首歌就會出現這個介面返回的json包
這個請求的連結是:
這個連結中有兩個關鍵字,n和w,分別是請求返回的歌曲條數和請求的歌曲名或歌手名,這是第一個連結。(w是unicode字元,我感覺直接中文也可以)
2.獲得返回的資料包後開始解析
url1返回的資料包把頭部 MusicJsonCallback4053122785448291 去掉後如下格式:
從返回的資料包中提取mid和strMediaMid,然後組合到第二個url中
url2:
'https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg?&jsonpCallback=MusicJsonCallback&cid=205361747&songmid=' + mid + '&filename=C400' + strMediaMid + '.m4a&guid=6612300644'
這個連結如果在瀏覽器開啟會下載一個檔案 fcg_music_express_mobile3.fcg 開啟之後可以看到vkey:
{"code":0,"cid":205361747,"userip":"183.197.0.92","data":{"expiration":80400,"items":[{"subcode":0,"songmid":"002eTq6539AsfN","filename":"C400001GuWCx14Gng9.m4a","vkey":"1F051B9378DC2CD04C0B6D2257E40D0F03E6FA042D076E462FEFF6534EFA4B579A47F084811B4BE893006805A9439224D286200FDE1598D0"}]}}
3.將url2獲取的 vkey 和 strMediaMids 一起組合成最終的url
url3: http://dl.stream.qqmusic.qq.com/C400' + strMediaMids[n] + '.m4a?vkey=' + vkey + '&guid=6612300644&uin=0&fromtag=66
二、下載歌曲
有了url下載就很簡單了,用urllib.request.urlretrieve()下載。
urllib.request.urlretrieve(url, 'F:\music\\'+data[key]['歌名']+'.m4a')
原始碼
原始碼中加入了儲存json檔案,方便呼叫讀取。
1 import json 2 import requests 3 import re 4 import urllib 5 import time 6 7 def parse(keyword,num): 8 #keyword:要搜尋的歌名或者歌手名,num:搜尋結果的條數 9 url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp?ct=24&qqmusic_ver=1298&new_json=1&remoteplace=txt.yqq.song' \ 10 '&searchid=57124856116396257&t=0&aggr=1&cr=1&catZhida=1&lossless=0&flag_qc=0&p=1&' \ 11 'n='+str(num)+'&w='+str(keyword)+'&g_tk=5381&jsonpCallback=MusicJsonCallback3695372008103126' \ 12 '&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0' 13 print(url) 14 #新增user-agent 15 head = {'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36'} 16 17 #第一次返回:MusicJsonCallback3695372008103126包 18 response = requests.get(url, headers=head) 19 response = response.text.strip("MusicJsonCallback3695372008103126()[]") 20 21 #解析json 22 json_data = json.loads(response) 23 print(json_data) 24 25 json_data = json_data['data']['song']['list'] 26 print(json_data) 27 strMediaMids = [] 28 songmids = [] 29 srcs = {} 30 songnames = [] 31 singers = [] 32 albumns = [] 33 songid = [] 34 #遍歷所獲取的列表,找到歌曲資訊儲存在list中 35 for data in json_data: 36 try: 37 strMediaMids.append(data['file']['strMediaMid']) 38 songmids.append(data['mid']) 39 songnames.append(data['name']) 40 singers.append(data['singer'][0]['name']) 41 albumns.append(data['album']['name']) 42 songid.append(data['id']) 43 except: 44 print('wrong') 45 46 #將獲取到的資訊二次組裝成url 47 for n in range(0,len(strMediaMids)): 48 49 #將strMediaMids和songmids重新組合到url中 50 url2 = 'https://c.y.qq.com/base/fcgi-bin/fcg_music_express_mobile3.fcg?&jsonpCallback=MusicJsonCallback&cid=205361747&songmid=' + songmids[n] + '&filename=C400' + strMediaMids[n] + '.m4a&guid=6612300644' 51 #獲取返回檔案並解析得到vkey 52 response2 = requests.get(url2) 53 json_data2 = json.loads(response2.text) 54 vkey = json_data2['data']['items'][0]['vkey'] 55 #這是最終的歌曲url 56 song_url = 'http://dl.stream.qqmusic.qq.com/C400' + strMediaMids[n] + '.m4a?vkey=' + vkey + '&guid=6612300644&uin=0&fromtag=66' 57 58 # 獲取歌詞文字 59 refer = 'https://y.qq.com/n/yqq/song/' + songmids[n] + '.html' 60 head = { 61 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36', 62 'Referer': refer} 63 lyric_url = 'https://c.y.qq.com/lyric/fcgi-bin/fcg_query_lyric.fcg?nobase64=1&musicid=' \ 64 + str(songid[n]) + '&callback=jsonp1&g_tk=5381&jsonpCallback=jsonp1&loginUin=0&hostUin=0&format=jsonp&inCharset=utf8&outCharset=utf-8¬ice=0&platform=yqq&needNewCode=0' 65 response3 = requests.get(lyric_url, headers=head).text 66 json_data3 = response3.strip(' jsonp1()[]') 67 print(json_data3) 68 jsonp1 = json.loads(json_data3) 69 try: 70 lyric = jsonp1['lyric'] 71 except: 72 print('wrong') 73 # print(lyric) 74 75 # result = re.findall(r'[\u4e00-\u9fa5]+', lyric) 76 # lyric = ' '.join(result) 77 data = {} 78 data['歌手'] = singers[n] 79 data['歌名'] = songnames[n] 80 data['專輯'] = albumns[n] 81 data['url'] = song_url 82 data['lyric'] = lyric 83 data['songmid'] = songmids[n] 84 data['strMediaMids'] = strMediaMids[n] 85 data['songid'] = songid[n] 86 srcs[n] = data 87 with open(str(keyword)+'.json', 'w',encoding='utf-8') as f: 88 json.dump(srcs, f) 89 90 91 92 def download(keyword): 93 with open(keyword+'.json','r') as f: 94 data = json.load(f) 95 # print(data) 96 for key in data: 97 time.sleep(1) 98 url = data[key]['url'] 99 # print(url) 100 print('正在下載:',data[key]['歌名'],'......') 101 try: 102 urllib.request.urlretrieve(url, 'F:\music\\'+data[key]['歌名']+'.m4a') 103 # with open(str(data[key]['歌名'])+'-'+str(data[key]['歌手'])+'.m4a','w') as f: 104 # f.write(requests.get(url).content) 105 except: 106 print('下載'+data[key]['歌名']+'失敗') 107 print('下載完成!') 108 109 110 keyword = '周杰倫' 111 num = 1 112 parse(keyword, num) 113 download(keyword)
有錯誤歡迎指正!