Python + Mysql 打造網易雲音樂 熱評庫 + 評論頭像下載【2】
阿新 • • 發佈:2019-02-19
熱歌榜 熱評
我這裡是通過儲存為本地檔案進行獲取。
import requests import json import os from urllib.request import urlretrieve from MySql import MysqlConn import pymysql import datetime import time from selenium import webdriver from bs4 import BeautifulSoup as BS def GetCommentByKW(kw,num): url = 'http://120.79.36.48/search?keywords=%20'+kw resp = requests.get(url) resp.encoding = 'utf8' hjson = resp.json() # if not 'songs' in hjson: # return 'No Lyric' songs = [] for x in range(0,len(hjson['result']['songs'])): # print(hjson['result']['songs'][x]['id']) songs.append([hjson['result']['songs'][x]['id'],hjson['result']['songs'][x]['name']]) # print(songs) if len(songs)==0: return 'No Songs' cnt = 0 dirname = kw+'Header' if not os.path.exists(dirname): os.makedirs(dirname) conn = MysqlConn() cur_time = datetime.datetime.now() for s in songs: commenturl = 'http://120.79.36.48/comment/music?id='+str(s[0])+'&limit='+str(num) # print(commenturl) r = requests.get(commenturl) r.encoding = 'utf8' json = r.json() comment = json['hotComments'] print(s) for i in range(0,len(comment)-1): userid = comment[i]['user']['userId'] user = comment[i]['user']['nickname'] headerurl = comment[i]['user']['avatarUrl'] content = comment[i]['content'] commentId = comment[i]['commentId'] comment_time =comment[i]['time'] c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000)) likedCount = comment[i]['likedCount'] conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+kw+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(1)+"','"+str(s[1])+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')" conn.exec() conn.comit() print(str(cnt) + '\t' + content) # print('正在下載:['+comment[i]['user']['nickname']+']'+comment[i]['user']['avatarUrl']) # filename=os.path.join(dirname,comment[i]['user']['nickname']+'.jpg') # with open(filename,'w') as f: # urlretrieve(comment[i]['user']['avatarUrl'],filename) cnt = cnt +1 comment = json['comments'] for i in range(0,len(comment)-1): userid = comment[i]['user']['userId'] user = comment[i]['user']['nickname'] headerurl = comment[i]['user']['avatarUrl'] content = comment[i]['content'] comment_time =comment[i]['time'] c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000)) likedCount = comment[i]['likedCount'] conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+kw+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(0)+"','"+str(s[1])+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')" conn.exec() conn.comit() print(str(cnt) + '\t' + content) # print('正在下載:['+comment[i]['user']['nickname']+']'+comment[i]['user']['avatarUrl']) # filename=os.path.join(dirname,comment[i]['user']['nickname']+'.jpg') # with open(filename,'w') as f: # urlretrieve(comment[i]['user']['avatarUrl'],filename) cnt = cnt +1 print('已獲取完!') def get_html(url): #取得HTML文字 try: r = requests.get(url) r.raise_for_status() r.encoding = r.apparent_encoding return r.text except: return "" def read_html(path): #取得HTML文字 try: f = open(path, 'r', encoding='utf-8') return f.read() except: return "" def GetTop100Comment(): html = read_html("1.html")#動態網頁手動儲存到本地 soup = BS(html) body = soup.findAll("tbody")[0] # print(body) conn = MysqlConn() cur_time = datetime.datetime.now() for result_table in body.findAll("a"): #,{"class":"even "} # print(result_table) # a = result_table.find("a") if len(result_table['href'].split('=')) == 2: song_id = result_table['href'].split('=')[1] else: continue commenturl = 'http://120.79.36.48/comment/music?id='+str(song_id)+'&limit=20' # print(commenturl) r = requests.get(commenturl) r.encoding = 'utf8' json = r.json() comment = json['hotComments'] print(song_id+'---------------------------------------') for i in range(0,len(comment)-1): userid = comment[i]['user']['userId'] user = comment[i]['user']['nickname'] headerurl = comment[i]['user']['avatarUrl'] content = comment[i]['content'] commentId = comment[i]['commentId'] comment_time =comment[i]['time'] c_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(comment_time/1000)) likedCount = comment[i]['likedCount'] conn.sql = "INSERT INTO `tb_nemusic_user` (`id`, `name`,`header`, `kw`,`comment`, `adddate`,`ishotcomment`,`song`,`comment_time`,`likedCount`,`jsonurl`,`commentId`) VALUES ('"+str(userid)+"', '"+pymysql.escape_string(user)+"','"+headerurl+"','"+'雲音樂熱歌榜'+"','"+pymysql.escape_string(content)+"','"+str(cur_time)+"','"+str(1)+"','"+str(song_id)+"','"+str(c_time)+"','"+str(likedCount)+"','"+commenturl+"','"+str(commentId)+"')" conn.exec() conn.comit() print(content) # GetCommentByKW('我的一個道姑朋友',20) #每首歌非熱評獲取20條 GetTop100Comment()