1. 程式人生 > >Python + Mysql 打造網易雲音樂 熱評庫 + 評論頭像下載【2】

Python + Mysql 打造網易雲音樂 熱評庫 + 評論頭像下載【2】

熱歌榜 熱評

我這裡是通過儲存為本地檔案進行獲取。

import requests 
import json
import os
from urllib.request import urlretrieve
from MySql  import MysqlConn
import pymysql
import datetime
import time
from selenium import webdriver

from bs4 import BeautifulSoup as BS

def GetCommentByKW(kw,num):
    url = 'http://120.79.36.48/search?keywords=%20'+kw
    resp = requests.get(url)
    resp.encoding = 'utf8'
    hjson = resp.json() 
    # if  not 'songs' in hjson:
    #     return 'No Lyric'
    songs = [] 
    for x in range(0,len(hjson['result']['songs'])):
        # print(hjson['result']['songs'][x]['id'])
        songs.append([hjson['result']['songs'][x]['id'],hjson['result']['songs'][x]['name']])
    # print(songs)
    if len(songs)==0:
        return 'No Songs'
    cnt = 0
    dirname = kw+'Header'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    conn = MysqlConn()
    cur_time = datetime.datetime.now()
    for s in songs: 
        commenturl = 'http://120.79.36.48/comment/music?id='+str(s[0])+'&limit='+str(num)
        # print(commenturl)
        r = requests.get(commenturl)
        r.encoding = 'utf8'
        json = r.json() 
        comment = json['hotComments'] 
        print(s)
        for i in range(0,len(comment)-1):
            userid = comment[i]['user']['userId']
            user = comment[i]['user']['nickname']
            headerurl = comment[i]['user']['avatarUrl']
            content =  comment[i]['content'] 
            commentId = comment[i]['commentId'] 
            comment_time =comment[i]['time']
            c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000))
            likedCount = comment[i]['likedCount'] 
            conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+kw+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(1)+"','"+str(s[1])+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')"
            conn.exec()
            conn.comit() 
            print(str(cnt) + '\t' + content)
            # print('正在下載:['+comment[i]['user']['nickname']+']'+comment[i]['user']['avatarUrl']) 
            # filename=os.path.join(dirname,comment[i]['user']['nickname']+'.jpg')
            # with open(filename,'w') as f:
            #     urlretrieve(comment[i]['user']['avatarUrl'],filename)
            cnt = cnt +1 

        comment = json['comments']  
        for i in range(0,len(comment)-1):
            userid = comment[i]['user']['userId']
            user = comment[i]['user']['nickname']
            headerurl = comment[i]['user']['avatarUrl']
            content =  comment[i]['content'] 
            comment_time =comment[i]['time']
            c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000))
            likedCount = comment[i]['likedCount'] 
            conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+kw+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(0)+"','"+str(s[1])+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')"
            conn.exec()
            conn.comit() 
            print(str(cnt) + '\t' + content)
            # print('正在下載:['+comment[i]['user']['nickname']+']'+comment[i]['user']['avatarUrl']) 
            # filename=os.path.join(dirname,comment[i]['user']['nickname']+'.jpg')
            # with open(filename,'w') as f:
            #     urlretrieve(comment[i]['user']['avatarUrl'],filename)
            cnt = cnt +1 
    print('已獲取完!')


def get_html(url):  #取得HTML文字
    try:
        r = requests.get(url)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        return ""
def read_html(path):  #取得HTML文字
    try:
        f = open(path, 'r', encoding='utf-8')
        return f.read()
    except:
        return ""

def GetTop100Comment():
  html = read_html("1.html")#動態網頁手動儲存到本地
  soup = BS(html)
  body = soup.findAll("tbody")[0]
  # print(body)
  conn = MysqlConn()
  cur_time = datetime.datetime.now()
  for result_table in body.findAll("a"): #,{"class":"even "}
    # print(result_table)
    # a = result_table.find("a")  
    if len(result_table['href'].split('=')) == 2:
      song_id = result_table['href'].split('=')[1]
    else:
      continue  
    commenturl = 'http://120.79.36.48/comment/music?id='+str(song_id)+'&limit=20' 
    # print(commenturl)
    r = requests.get(commenturl)
    r.encoding = 'utf8'
    json = r.json() 
    comment = json['hotComments'] 
    print(song_id+'---------------------------------------')
    for i in range(0,len(comment)-1):
        userid = comment[i]['user']['userId']
        user = comment[i]['user']['nickname']
        headerurl = comment[i]['user']['avatarUrl']
        content =  comment[i]['content'] 
        commentId = comment[i]['commentId'] 
        comment_time =comment[i]['time']
        c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000))
        likedCount = comment[i]['likedCount'] 
        conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+'雲音樂熱歌榜'+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(1)+"','"+str(song_id)+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')"
        conn.exec()
        conn.comit() 
        print(content)



# GetCommentByKW('我的一個道姑朋友',20) #每首歌非熱評獲取20條 
GetTop100Comment()