1. 程式人生 > >python爬蟲批量下載全民K歌音樂

python爬蟲批量下載全民K歌音樂

網址示例: https://node.kg.qq.com/personal?uid=639e9983222a338a

直接上原始碼:

import requests
import time
import re
import json
import pprint
import math
import os
header={
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 SE 2.X MetaSr 1.0
' } SongList=[] song_baseurl="http://node.kg.qq.com/play" album_baseurl="http://node.kg.qq.com/cgi/fcgi-bin/kg_ugc_get_homepage" def Down(url_file, filePath, FileDir): if not os.path.isdir(FileDir): os.makedirs(FileDir) if os.path.isfile(FileDir + "/" + filePath): print(filePath + " --已存在
") return 0 try: r = requests.get(url_file, stream=True) with open(FileDir + "/" + filePath, "wb") as f: size = int(r.headers['content-length']) title = " 當前下載-" + filePath + " 檔案大小:" + size + "位元組" print('\033[0;31m' + title + "\033[0m") CurTotal
= 0 for chunk in r.iter_content(chunk_size=512 * 1024): if chunk: f.write(chunk) CurTotal += len(chunk) print("\r" + filePath + "--下載進度:" + '%3s' % (str(CurTotal * 100 // size)) + "%", end='') print() r.close() except Exception as e: print(filePath + " 下載出錯!" + " 錯誤資訊" + str(e.args)) if os.path.isfile(FileDir + "/" + filePath): os.remove(FileDir + "/" + filePath) def GetData(data,url): response = requests.get(url,params=data, headers=header) return response.content.decode("utf-8") def Parse_Song_Info(content): jsonobj = re.findall(r'window.__DATA__ = (.*?); </script>', content) if len(jsonobj) > 0: data = json.loads(jsonobj[0]) # pprint.pprint(data) obj={} obj[ "name"]=data['detail']['song_name'] if data['detail']['playurl']: print(data['detail']['song_name']+" 音樂:"+data['detail']['playurl']) obj["url"]=data['detail']['playurl'] obj["type"]=".mp3" else: print(data['detail']['song_name'] + " 視訊:" + data['detail']['playurl_video']) obj["url"] = data['detail']['playurl_video'] obj["type"] = ".mp4" SongList.append(obj) else: print("沒有爬取到") def GetSongsByIndex(uid, Is_Parse, page): data={ 'jsonpCallback':'callback_0', 'g_tk':'5381', 'outCharset':'utf-8', 'format':'jsonp', 'type':'get_ugc', 'start':str(page), 'num':'8', 'touin':'', 'share_uid':uid, 'g_tk_openkey':'5381', '_':str(int(time.time()*1000)) } response=requests.get(album_baseurl,params=data,headers=header) jsonobj=re.findall(r'callback_\d\((.*)\)',response.content.decode("utf-8")) if len(jsonobj)>0: data=json.loads(jsonobj[0]) # pprint.pprint(data) count=data['data']['ugc_total_count'] if Is_Parse: time.sleep(1) for obj in data['data']['ugclist']: print(obj['title']+" -- "+obj['shareid']) songdata = { "s": obj['shareid'] } content=GetData(songdata,song_baseurl) Parse_Song_Info(content) else: pprint.pprint("共計:" + str(count)) return count else: print("沒有爬取到") return 0 def Run(uid): count=GetSongsByIndex(uid, False, 1) if count!=0: for page in range(1,math.ceil(count/8)+1): GetSongsByIndex(uid, True, page) else: print("該使用者沒有歌曲") if __name__=="__main__": Run('639e9983222a338a') for s in SongList: Down(s["url"],s["name"]+s["type"],"小小") # print(s)

將Run 括號裡面的字串換為 歌手主頁連結後面的uid  

“小小” 可自定義資料夾