1. 程式人生 > >爬取喜馬拉雅FM分類榜的程式碼分析

爬取喜馬拉雅FM分類榜的程式碼分析

import requests
import json
import os

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.162 Safari/537.36'
}
base_path = 'F:\\SxhMycode\\PYtest\\ead\\FM\\'#基地址

def mkdir(path):#建立資料夾
    path = path.strip()
    path = path.rstrip("\\")
    isExists = os.path.exists(path)
    if not isExists:
        print(path + ' 建立成功')
        os.makedirs(path)


page = 'youshengshu'  #關鍵字搜尋
theme_url = 'https://www.ximalaya.com/revision/getRankList?code='+page #大分類下的地址
start_url = 'https://www.ximalaya.com/revision/play/album?albumId={}&pageNum={}&sort=-1&pageSize=30'#小分類下的地址


theme = requests.get(theme_url,headers=headers)     #獲取小方向下的所有FM電臺歌單
ret = theme.content.decode()          #獲取程式碼以字串形式表現出來
str = json.loads(ret)         #解析json型別的字串
substring = str['data']['albums']  #解析網頁獲取原始碼
for i in substring:
    albumTitle = i['albumTitle']           #獲取歌單名
    id = i['id']                 #獲取id
    path = base_path + albumTitle  #獲取歌單名建立資料夾
    mkdir(path)
    for i in range(1):   #根據所需歌曲來調大小
        url = start_url.format(id , i + 1)  #將start_url地址補充完整
        r = requests.get(url,headers=headers)
        ret1 = r.content.decode()
        str1 = json.loads(ret1)
        substring1 = str1['data']['tracksAudioPlay']   #同上
        for i in substring1:
             src = i['src']
             name = i['trackName']   #下載備註的名字
             with open(path+'/%s.m4a' % name, 'ab') as f:
                  r = requests.get(src)
                  f.write(r.content)
                  print(name)