1. 程式人生 > >python處理csv中的空值

python處理csv中的空值

# -*- coding: UTF-8 -*-
import jieba.posseg
import tensorflow as tf
import pandas as pd
import csv
import math
"""
1.必須獲取CSV資料夾(ID:文字)
2.返回(ID:分詞後的文字)
"""
flags = tf.app.flags
flags.DEFINE_string("train_file_address","D:/NLPWORD/cut_word_test/hzytest.csv","新增訓練資料檔案")
flags.DEFINE_string("result_file_address"
,"D:/NLPWORD/cut_word_test/hzytest_result.csv","生成結果資料檔案") FLAGS = tf.app.flags.FLAGS def cut_word(train_data): """ 把資料按照行進行遍歷,然後把結果按照行寫在csv中 :return:分詞結果list """ jieba.load_userdict("newdict.txt") with open(FLAGS.result_file_address, "w", encoding='utf8') as csvfile: writer = csv.writer(csvfile) for
row in train_data.index: datas = train_data.loc[row].values[1] if isinstance(datas,str) or not math.isnan(datas): words = jieba.posseg.cut(datas) line = '' for word in words: line = line + word.word + " " writer.writerow([train_data.loc[row].values[0
], line]) def main(_): data = pd.read_csv(FLAGS.train_file_address) cut_word(data) if __name__ == "__main__": tf.app.run(main)