達觀杯_構建模型(二)邏輯迴歸
阿新 • • 發佈:2018-12-11
特徵:tfidf(word+article)
""" 1.特徵:tfidf(word+article) 2.模型:lr 3.引數:C=120 """ import pandas as pd import pickle from sklearn.linear_model import LogisticRegression with open('tfidf(word+article).pkl', 'rb') as f: x_train, y_train, x_test = pickle.load(f) clf = LogisticRegression(C=120, dual=False) clf.fit(x_train, y_train) #返回預測標籤 y_test = clf.predict(x_test) y_test_prob = clf.predict_proba(x_test) #標籤預測結果儲存 y_test = [i+1 for i in y_test_list.tolist()] y_test_prob = y_test_prob_LR.tolist() df_result = pd.DataFrame({'id':range(102277), 'class': y_test}) df_proba = pd.DataFrame({'id':range(102277), 'prob': y_test_prob}) df_result.to_csv('lr(C=120)_tfidf(word+article).csv',index=False) df_proba.to_csv('lr(C=120)_tfidf(word+article)_proba.csv',index=False)
特徵:linearsvm-tfidf(word)+lr-tfidf(article)
""" 1.特徵:linearsvm-tfidf(word)+lr-tfidf(article) 2.模型:lr 3.引數:C=120 """ import pandas as pd import pickle from sklearn.linear_model import LogisticRegression with open('linearSVM-tfidf(word)+lr-tfidf(article).pkl', 'rb') as f: x_train, y_train, x_test = pickle.load(f) clf = LogisticRegression(C=120, dual=False) clf.fit(x_train, y_train) #返回預測標籤 y_test = clf.predict(x_test) y_test_prob = clf.predict_proba(x_test) #標籤預測結果儲存 y_test = [i+1 for i in y_test_list.tolist()] y_test_prob = y_test_prob_LR.tolist() df_result = pd.DataFrame({'id':range(102277), 'class': y_test}) df_proba = pd.DataFrame({'id':range(102277), 'prob': y_test_prob}) df_result.to_csv('LR_linearSVM-tfidf(word)+lr-tfidf(article).csv',index=False) df_proba.to_csv('LR_linearSVM-tfidf(word)+lr-tfidf(article).csv',index=False)