Python3之拉鉤資料視覺化
阿新 • • 發佈:2019-01-09
工具:Pycharm,Navicat
將前段時間使用爬蟲獲取的資料進行視覺化分析。
於2018/3/18增加職位詞雲
一.統計資料
import pymysql db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的密碼', db='你的資料庫名', charset='utf8') cursor = db.cursor() # 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組 edusql = "SELECT education FROM LAGOU" # 執行sql語句 cursor.execute(edusql) # 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要 edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]] # 建立列表,用來存放欄位值對應的出現次數 educount = [] for each in edulist: sql1 = "SELECT * FROM lagou where education = '%s'" %each cursor.execute(sql1) count1 = cursor.rowcount educount.append(count1) # print('%s的次數:%d'%(each,count1)) # print(edulist, educount) # 工作經驗:查詢 lagou 這個表中的workYear欄位的所有欄位值,返回的是一個元組 worksql = "SELECT workYear FROM LAGOU" cursor.execute(worksql) worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]] workcount = [] for each in worklist: sql2 = "SELECT * FROM lagou where workYear = '%s'" %each cursor.execute(sql2) count2 = cursor.rowcount workcount.append(count2) # print(worklist, workcount) # 地區 dissql = "SELECT district from lagou" cursor.execute(dissql) dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]] discount = [] for each in dislist: sql3 = "SELECT * FROM lagou where district = '%s'" % each cursor.execute(sql3) count3 = cursor.rowcount discount.append(count3) # print(dislist, discount) # 職位資訊 positionsql = "SELECT positionName from lagou" cursor.execute(positionsql) positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]] positioncount = [] for each in positionlist: sql4 = "SELECT * FROM lagou where positionName = '%s'" % each cursor.execute(sql4) count4 = cursor.rowcount positioncount.append(count4) # print(positionlist, positioncount) db.close()
於2018\3\20修改
將程式中獲取資料列表與資料數量列表封裝成一個函式,並使用Counter來獲取。
from collections import Counter def getList(filed): sql = "SELECT %s FROM LAGOU" %filed # print(sql) cursor.execute(sql) gettuple = cursor.fetchall() L = Counter([each[0] for each in gettuple if each[0]]) dl = dict(L) resultlist = list(dl.keys()) resultcount = [dl[i] for i in resultlist] return resultlist, resultcount
將
# 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組 edusql = "SELECT education FROM LAGOU" # 執行sql語句 cursor.execute(edusql) # 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要 edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]] # 建立列表,用來存放欄位值對應的出現次數 educount = [] for each in edulist: sql1 = "SELECT * FROM lagou where education = '%s'" %each cursor.execute(sql1) count1 = cursor.rowcount educount.append(count1) # print('%s的次數:%d'%(each,count1)) # print(edulist, educount)
換成
# 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組
# 執行sql語句
# 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要 ,建立列表,用來存放欄位值對應的出現次數
eduresult = getList('education')
edulist, educount = eduresult # 序列解包
# print(edulist)
# print(educount)
另外三種資料的獲取方式也一樣進行修改。在下面的製作圖表過程中進行同樣的修改即可。
列印結果
二.製作圖表
在製作圖表之前給你們看一個例子:
製作柱狀圖:
from pyecharts import Bar
bar = Bar("我的第一個圖表", "這裡是副標題")
bar.add("服裝", ["襯衫", "羊毛衫", "雪紡衫", "褲子", "高跟鞋", "襪子"], [5, 20, 36, 10, 75, 90])
bar.show_config()
bar.render()
在Python中執行上面的程式後,會在當前檔案下生成一個名為 'render.html' 的檔案,使用瀏覽器開啟這個檔案就可以看到
你做好的柱狀圖了。如圖所示:
製作成餅圖
from pyecharts import Pie
attr = ["襯衫", "羊毛衫", "雪紡衫", "褲子", "高跟鞋", "襪子"]
v1 = [11, 12, 13, 10, 10, 10]
pie = Pie("餅圖示例")
pie.add("", attr, v1, is_label_show=True)
pie.show_config()
pie.render(r"lizi.html")
在Python中執行上面的程式後,會在當前檔案下生成一個名為 'lizi.html' 的檔案,使用瀏覽器開啟這個檔案就可以看到
你做好的餅圖。如圖所示:
熟悉這個例子之後我們就可以進行我們的資料視覺化操作了。
現在就將工作經驗和教育程度做成柱狀圖,地區做成餅狀圖。
在統計資料中的程式碼的基礎上加入繪製圖表的程式碼變成
import pymysql
from pyecharts import Bar
from pyecharts import Pie
from pyecharts import WordCloud
db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='1likePython', db='TESTDB', charset='utf8')
cursor = db.cursor()
edusql = "SELECT education FROM LAGOU"
cursor.execute(edusql)
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
educount = []
for each in edulist:
sql1 = "SELECT * FROM lagou where education = '%s'" %each
cursor.execute(sql1)
count1 = cursor.rowcount
educount.append(count1)
# print(edulist, educount)
worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
cursor.execute(sql2)
count2 = cursor.rowcount
workcount.append(count2)
# print(worklist, workcount)
dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
sql3 = "SELECT * FROM lagou where district = '%s'" % each
cursor.execute(sql3)
count3 = cursor.rowcount
discount.append(count3)
# print(dislist, discount)
# 職位
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
cursor.execute(sql4)
count4 = cursor.rowcount
positioncount.append(count4)
# print(positionlist, positioncount)
name = '拉鉤圖表'
bar1 = Bar(name, '第一張')
bar1.add('學歷要求', edulist, educount)
bar1.show_config()
bar1.render(r'edu.html')
bar2 = Bar(name, '第二張')
bar2.add('工作經驗', worklist, workcount)
bar2.show_config()
bar2.render(r'work.html')
pie = Pie(name, '餅圖示例')
pie.add("", dislist, discount, is_label_show=True)
pie.show_config()
pie.render(r"district.html")
wordcloud = WordCloud(width=1300, height=620)
wordcloud.add("", positionlist, positioncount, word_size_range=[20, 100])
wordcloud.show_config()
wordcloud.render(r"positionName.html")
db.close()
執行程式後開啟對應的 html 檔案就可以看到圖表了。