1. 程式人生 > >資訊熵計算(自己編寫的python程式碼,垃圾,高手繞道)

資訊熵計算(自己編寫的python程式碼,垃圾,高手繞道)

# -*- coding:utf-8 -*- ''' Created on 2017年9月15日 @author: snow ''' import csv; import math; fileName = "AllElectronics.csv"; def allData(): csv_reader = csv.reader(open(fileName, encoding='UTF-8')); fileContent = []; for row in csv_reader: fileContent.append(row); headers = fileContent[0]; dataContent = []; labels=[]; for i in range(1,len(fileContent)): dataContent.append(fileContent[i][-1]); labels.append(fileContent[i][-1]); dataSet = []; for row in (dataContent): rowData=row[1:len(row)-1]; dataSet.append(rowData); return headers,dataContent,labels,dataSet; headers,dataContent,labels,dataSet = allData(); numEntries = len(labels); def calEnt(labels): labelCounts={}; for lable in labels: if lable not in labelCounts.keys(): labelCounts[lable] = 0; labelCounts[lable]+=1; shannonEnt=0.0; for key in labelCounts.keys(): print(labelCounts[key]); prob = float(labelCounts[key])/numEntries; shannonEnt -= prob * math.log(prob,2) # 以2為底的對數 return shannonEnt res = calEnt(labels); print(res);