用python批量處理gbk/gb2312格式檔案轉utf-8
阿新 • • 發佈:2019-02-09
#-*-coding:utf-8 -*- import os folder ='G:\Reduced' #儲存文字的目錄 listDir = [ dirs[0] for dirs in os.walk(folder)][1:]#獲取所有的子目錄 for dataDir in listDir: files = [os.path.join(dataDir,i) for i in os.listdir(dataDir)]#獲取絕對路徑 for words in files: pos,filename = os.path.split(words) newFile = file(os.path.join(pos,filename[:-4]+'_.txt'),'w')#建立新檔案 try:#轉碼 newFile.write(file(words,'r').read().decode('gb2312').encode('utf-8')) except: newFile.write(file(words,'r').read().decode('gbk','ignore').encode('utf-8')) newFile.close() print(words) os.remove(words)#刪除舊檔案