機器學習演算法Python實現:基於情感詞典的文字情感分析
阿新 • • 發佈:2019-02-11
# -*- coding:utf-8 -* #本程式碼是在jupyter notebook上實現,author:huzhifei, create time:2018/8/14 #本指令碼主要實現了基於python通過已有的情感詞典對文字資料做的情感分析的專案目的 #匯入對應的包及相關的自定義的jieba詞典 import jieba import numpy as np jieba.load_userdict("C:\\Users\\Desktop\\中文分詞詞庫整理\\中文分詞詞庫整理\\百度分詞詞庫.txt") # 開啟詞典檔案,返回列表 def open_dict(Dict='hahah',path = 'C:\\Users\\Desktop\\Textming\\'): path = path + '%s.txt' %Dict dictionary = open(path, 'r', encoding='utf-8',errors='ignore') dict = [] for word in dictionary: word = word.strip('\n') dict.append(word) return dict def judgeodd(num): #往情感詞前查詢否定詞,找完全部否定詞,若數量為奇數,乘以-1,若數量為偶數,乘以1. if num % 2 == 0: return 'even' else: return 'odd' deny_word = open_dict(Dict='deny')#否定詞詞典 posdict = open_dict(Dict='positive')#積極情感詞典 negdict = open_dict(Dict = 'negative')#消極情感詞典 degree_word = open_dict(Dict = 'degree',path='C:\\Users\\AAS-1413\\Desktop\\Textming\\')#程度詞詞典 #為程度詞設定權重 mostdict = degree_word[degree_word.index('extreme')+1: degree_word.index('very')] #權重4,即在情感前乘以3 verydict = degree_word[degree_word.index('very')+1: degree_word.index('more')] #權重3 moredict = degree_word[degree_word.index('more')+1: degree_word.index('ish')]#權重2 ishdict = degree_word[degree_word.index('ish')+1: degree_word.index('last')]#權重0.5 seg_sentence=[] def sentiment_score_list(data): for i in data: seg_sentence.append(i.replace(' ',','))#去除逗號後的評論資料集 #seg_sentence=data.replace(' ',',').split(',')#以逗號分隔 count1 = [] count2 = [] for sen in seg_sentence: #print(sen)# 迴圈遍歷每一個評論 segtmp = jieba.lcut(sen, cut_all=False) # 把句子進行分詞,以列表的形式返回 #print(segtmp) i = 0 #記錄掃描到的詞的位置 a = 0 #記錄情感詞的位置 poscount = 0 # 積極詞的第一次分值 poscount2 = 0 # 積極反轉後的分值 poscount3 = 0 # 積極詞的最後分值(包括歎號的分值) negcount = 0 negcount2 = 0 negcount3 = 0 for word in segtmp: if word in posdict: # 判斷詞語是否是積極情感詞 poscount +=1 c = 0 for w in segtmp[a:i]: # 掃描情感詞前的程度詞 if w in mostdict: poscount *= 4.0 elif w in verydict: poscount *= 3.0 elif w in moredict: poscount *= 2.0 elif w in ishdict: poscount *= 0.5 elif w in deny_word: c+= 1 if judgeodd(c) == 'odd': # 掃描情感詞前的否定詞數 poscount *= -1.0 poscount2 += poscount poscount = 0 poscount3 = poscount + poscount2 + poscount3 poscount2 = 0 else: poscount3 = poscount + poscount2 + poscount3 poscount = 0 a = i+1 elif word in negdict: # 消極情感的分析,與上面一致 negcount += 1 d = 0 for w in segtmp[a:i]: if w in mostdict: negcount *= 4.0 elif w in verydict: negcount *= 3.0 elif w in moredict: negcount *= 2.0 elif w in ishdict: negcount *= 0.5 elif w in degree_word: d += 1 if judgeodd(d) == 'odd': negcount *= -1.0 negcount2 += negcount negcount = 0 negcount3 = negcount + negcount2 + negcount3 negcount2 = 0 else: negcount3 = negcount + negcount2 + negcount3 negcount = 0 a = i + 1 elif word == '!' or word == '!': # 判斷句子是否有感嘆號 for w2 in segtmp[::-1]: # 掃描感嘆號前的情感詞,發現後權值+2,然後退出迴圈 if w2 in posdict: poscount3 += 2 elif w2 in negdict: negcount3 += 2 else: poscount3 +=0 negcount3 +=0 break else: poscount3=0 negcount3=0 i += 1 # 以下是防止出現負數的情況 pos_count = 0 neg_count = 0 if poscount3 <0 and negcount3 > 0: neg_count += negcount3 - poscount3 pos_count = 0 elif negcount3 <0 and poscount3 > 0: pos_count = poscount3 - negcount3 neg_count = 0 elif poscount3 <0 and negcount3 < 0: neg_count = -pos_count pos_count = -neg_count else: pos_count = poscount3 neg_count = negcount3 count1.append([pos_count,neg_count]) #返回每條評論打分後的列表 #print(count1) count2.append(count1) count1=[] #print(count2) return count2 #返回所有評論打分後的列表 def sentiment_score(senti_score_list):#分析完所有評論後,正式對每句評論打情感分 #score = [] s='' w='' for review in senti_score_list:#senti_score_list #print(review) score_array = np.array(review) #print(score_array) Pos = np.sum(score_array[:,0])#積極總分 Neg = np.sum(score_array[:,1])#消極總分 AvgPos = np.mean(score_array[:,0])#積極情感均值 AvgPos = float('%.lf' % AvgPos) AvgNeg = np.mean(score_array[:, 1])#消極情感均值 AvgNeg = float('%.1f' % AvgNeg) StdPos = np.std(score_array[:, 0])#積極情感方差 StdPos = float('%.1f' % StdPos) StdNeg = np.std(score_array[:, 1])#消極情感方差 StdNeg = float('%.1f' % StdNeg) #s+=([Pos,Neg,AvgPos,AvgNeg,StdPos,StdNeg])) s+='\n'+str([Pos, Neg]) #score.append([Pos,Neg]) res=Pos-Neg if res>0: w+='\n'+'好評' print ('該條評論是:好評') elif res<0: w+='\n'+'差評' print ('該條評論是:差評') else: w+='\n'+'中評' print ('該條評論是:中評') #print(w) return w #讀取要做情感分析的文字 data=open("content.txt","r",errors='ignore') #呼叫函式做實體分析 sentiment_score(sentiment_score_list(data)) #將函式返回結果存入txt中 f=open('s.txt','w',errors='ignore') f.write(sentiment_score(sentiment_score_list(data))) f.close()