1. 程式人生 > >Python3之拉鉤資料視覺化

Python3之拉鉤資料視覺化

工具:Pycharm,Navicat

將前段時間使用爬蟲獲取的資料進行視覺化分析。

於2018/3/18增加職位詞雲

一.統計資料

import pymysql

db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的密碼', db='你的資料庫名', charset='utf8')

cursor = db.cursor()
# 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組
edusql = "SELECT education FROM LAGOU"

# 執行sql語句
cursor.execute(edusql)

# 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]

# 建立列表,用來存放欄位值對應的出現次數
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
    # print('%s的次數:%d'%(each,count1))
# print(edulist, educount)

# 工作經驗:查詢 lagou 這個表中的workYear欄位的所有欄位值,返回的是一個元組
worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
    sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
    cursor.execute(sql2)
    count2 = cursor.rowcount
    workcount.append(count2)
# print(worklist, workcount)

# 地區
dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
    sql3 = "SELECT * FROM lagou where district = '%s'" % each
    cursor.execute(sql3)
    count3 = cursor.rowcount
    discount.append(count3)
# print(dislist, discount)

# 職位資訊
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
        sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
        cursor.execute(sql4)
        count4 = cursor.rowcount
        positioncount.append(count4)
# print(positionlist, positioncount)

db.close()

於2018\3\20修改

將程式中獲取資料列表與資料數量列表封裝成一個函式,並使用Counter來獲取。

from collections import Counter

def getList(filed):
    sql = "SELECT %s FROM LAGOU" %filed
    # print(sql)
    cursor.execute(sql)
    gettuple = cursor.fetchall()
    L = Counter([each[0] for each in gettuple if each[0]])
    dl = dict(L)
    resultlist = list(dl.keys())
    resultcount = [dl[i] for i in resultlist]

    return resultlist, resultcount

# 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組
edusql = "SELECT education FROM LAGOU"

# 執行sql語句
cursor.execute(edusql)

# 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]

# 建立列表,用來存放欄位值對應的出現次數
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
    # print('%s的次數:%d'%(each,count1))
# print(edulist, educount)

換成

# 教育程度:查詢 lagou 這個表中的 education欄位的所有欄位值,返回的是一個元組
# 執行sql語句
# 使用集合來去掉重複的欄位值,並放在一個列表中,繪圖需要  ,建立列表,用來存放欄位值對應的出現次數
eduresult = getList('education')
edulist, educount = eduresult       # 序列解包
# print(edulist)
# print(educount)

另外三種資料的獲取方式也一樣進行修改。在下面的製作圖表過程中進行同樣的修改即可。

列印結果


二.製作圖表

在製作圖表之前給你們看一個例子:

製作柱狀圖:

from pyecharts import Bar

bar = Bar("我的第一個圖表", "這裡是副標題")
bar.add("服裝", ["襯衫", "羊毛衫", "雪紡衫", "褲子", "高跟鞋", "襪子"], [5, 20, 36, 10, 75, 90])
bar.show_config()
bar.render()

在Python中執行上面的程式後,會在當前檔案下生成一個名為 'render.html' 的檔案,使用瀏覽器開啟這個檔案就可以看到

你做好的柱狀圖了。如圖所示:

製作成餅圖

from pyecharts import Pie

attr = ["襯衫", "羊毛衫", "雪紡衫", "褲子", "高跟鞋", "襪子"]
v1 = [11, 12, 13, 10, 10, 10]
pie = Pie("餅圖示例")
pie.add("", attr, v1, is_label_show=True)
pie.show_config()
pie.render(r"lizi.html")

在Python中執行上面的程式後,會在當前檔案下生成一個名為 'lizi.html' 的檔案,使用瀏覽器開啟這個檔案就可以看到

你做好的餅圖。如圖所示:



熟悉這個例子之後我們就可以進行我們的資料視覺化操作了。

現在就將工作經驗和教育程度做成柱狀圖,地區做成餅狀圖。

在統計資料中的程式碼的基礎上加入繪製圖表的程式碼變成

import pymysql
from pyecharts import Bar
from pyecharts import Pie
from pyecharts import WordCloud

db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='1likePython', db='TESTDB', charset='utf8')

cursor = db.cursor()
edusql = "SELECT education FROM LAGOU"
cursor.execute(edusql)
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
# print(edulist, educount)

worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
    sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
    cursor.execute(sql2)
    count2 = cursor.rowcount
    workcount.append(count2)
# print(worklist, workcount)

dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
    sql3 = "SELECT * FROM lagou where district = '%s'" % each
    cursor.execute(sql3)
    count3 = cursor.rowcount
    discount.append(count3)
# print(dislist, discount)

# 職位
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
        sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
        cursor.execute(sql4)
        count4 = cursor.rowcount
        positioncount.append(count4)
# print(positionlist, positioncount)

name = '拉鉤圖表'
bar1 = Bar(name, '第一張')
bar1.add('學歷要求', edulist, educount)
bar1.show_config()
bar1.render(r'edu.html')

bar2 = Bar(name, '第二張')
bar2.add('工作經驗', worklist, workcount)
bar2.show_config()
bar2.render(r'work.html')

pie = Pie(name, '餅圖示例')
pie.add("", dislist, discount, is_label_show=True)
pie.show_config()
pie.render(r"district.html")


wordcloud = WordCloud(width=1300, height=620)
wordcloud.add("", positionlist, positioncount, word_size_range=[20, 100])
wordcloud.show_config()
wordcloud.render(r"positionName.html")

db.close()

執行程式後開啟對應的 html 檔案就可以看到圖表了。

三.展示結果