1. 程式人生 > >讀EXCEL資料,通過百度NLP分析情感傾向,寫入xls

讀EXCEL資料,通過百度NLP分析情感傾向,寫入xls

# -*- coding: utf-8 -*-
"""
Created on Wed Oct 25 17:40:57 2017


@author: Administrator
"""


import pandas as pd
from aip import AipNlp


# 定義常量
APP_ID = '10251280'
API_KEY = 'd2sWjj6w9pNMKUHFIabPqIiA'
SECRET_KEY = 'fGuEjHqGxHP5EdtTgGgD70QG0Gh9j8Ur '
# 初始化AipNlp物件
aipNlp = AipNlp(APP_ID, API_KEY, SECRET_KEY)
posi, nega  = [] ,[]




        
def get_data():
    '''
    獲取content列每一行的文字資訊
    '''     
    df = pd.read_excel('jinrongjietable.xlsx', encoding = 'utf-8')            
    news = df['content']
    #news.replace(u'\xa0 ', u' ')
    return news


def analysis_news(news):
    '''
    分析新聞,得到正面,負面指數
    '''
    for i in range(len(news)):

        content = news[i].replace(u'\xa0', u' ')

        #初次執行時報錯,'gbk' codec can't encode character u'\xa0' ,需要替換

        result = aipNlp.sentimentClassify(content)
        negative_prob = result['items'][0]['negative_prob']
        positive_prob = result['items'][0]['positive_prob']
        posi.append(positive_prob)
        nega.append(negative_prob)
        print len(posi)
    return posi, nega


def write_excel(posi, nega):
    '''
    寫入excel
    '''    
    posi_col = pd.DataFrame(posi)
    nega_col = pd.DataFrame(nega)


    posi_col.to_excel('posi.xlsx', encoding = 'utf-8')
    nega_col.to_excel('nega.xlsx')
    
    
if __name__ == "__main__":


    news = get_data()
    try:
        posi, nega = analysis_news(news)
    except Exception as e:
        print e
        
    write_excel(posi, nega)