GBK(GB2312)與UTF-8檔案轉碼
最近使用的Intelij IDEA開發工具,轉碼有點小問題。百度了一下,Eclipse可以自動轉碼,而IDEA卻不可以。總是需要手動去轉若要把原始檔由GBK轉成UTF-8的,得靠其他方式了。網上搜羅了一下方法,然後自己整理了一下。現把程式碼貼出來,測試OK、可以直接使用!
需要的四個jar包,分別是:“antlr-2.7.6.jar”、“commons-io-2.4.jar”、“cpdetector_1.0.5 .jar”、“jchardet-1.0.jar” 網上可以直接下載,根據下面的方法。可以把任意的編碼換成你想要的。轉成新編碼以後的檔案把原始檔覆蓋掉就可以了
package com.company;
import
import cpdetector.io.CodepageDetectorProxy;
import cpdetector.io.JChardetFacade;
import cpdetector.io.UnicodeDetector;
import org.apache.commons.io.FileUtils;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Collection;
public class
privatestatic int fileCount
= 0;
privatestatic int convertedCount
= 0;
publicstatic void main(String[] args) {
//源路徑
String primaryPath = "E:\\ WorkSpace\\Test\\src\\com";
//目標路徑
String targetPath = "E:/src";
//獲取所有java檔案
utf8AndGbkConvert(primaryPath,targetPath, false
}
/**
* UTF8和GBK(GB2312)互相轉換
*
* @param primaryPath
原始檔路徑
* @param targetPath
目標檔案路徑
* @param isUtf8ToGbk
是否是UTF-9轉成GBK true表示UTF-8轉成GBK(GB2312) false表示GBK(GB2312)轉成UTF-8
*/
public static void utf8AndGbkConvert(StringprimaryPath, String targetPath,
boolean isUtf8ToGbk) {
Collection<File> javaPrimaryFiles = FileUtils.listFiles(new
File(primaryPath),
new String[]{"java"},
true);
fileCount= javaPrimaryFiles.size();
for (File javaPrimaryFile :javaPrimaryFiles) {
try {
String chatsetName = judgeChatsetByFile(javaPrimaryFile);
String newFilePath =targetPath + javaPrimaryFile.getAbsolutePath().substring(primaryPath.length());
newFilePath =newFilePath.replaceAll("\\\\",
"/");
if (isUtf8ToGbk) {
if (chatsetName.equalsIgnoreCase("UTF-8")) {
//如果是原始檔為UTF-8則轉換成GBK,
FileUtils.writeLines(new
File(newFilePath),
"GBK", FileUtils.readLines(javaPrimaryFile,"UTF-8"));
} else {
//不是則直接複製
File f =
new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
if (!f.exists()) f.mkdirs();
File newFile =
new File(newFilePath);
if (!newFile.exists())newFile.createNewFile();
FileUtils.copyFile(javaPrimaryFile,newFile);
}
} else {
if (chatsetName.equalsIgnoreCase("GBK") || chatsetName.equalsIgnoreCase("GB2312")) {
//如果是原始檔為GBK或者GB2312則轉換成UTF-8,
FileUtils.writeLines(new
File(newFilePath),
"UTF-8", FileUtils.readLines(javaPrimaryFile,chatsetName));
} else {
File f =
new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
if (!f.exists()) f.mkdirs();
File newFile =
new File(newFilePath);
if (!newFile.exists())newFile.createNewFile();
FileUtils.copyFile(javaPrimaryFile,newFile);
}
}
} catch(IOException e) {
e.printStackTrace();
}
}
System.out.println("fileCount:"
+ fileCount);
System.out.println("convertedCount:"
+ convertedCount);
}
publicstatic String judgeChatsetByFile(File f) {
CodepageDetectorProxy detector =CodepageDetectorProxy.getInstance();
//detector.add(new ParsingDetector(false)); //如果不希望判斷xml的encoding,而是要判斷該xml檔案的編碼,則可以註釋掉
detector.add(JChardetFacade.getInstance());
//ASCIIDetector用於ASCII編碼測定
detector.add(ASCIIDetector.getInstance());
//UnicodeDetector用於Unicode家族編碼的測定
detector.add(UnicodeDetector.getInstance());
Charset charset = null;
try {
charset =detector.detectCodepage(f.toURL());
} catch(Exception ex) {
ex.printStackTrace();
}
String codeName = "";
if (charset != null) {
System.out.println(f.getName() +
"編碼是:"
+ charset.name());
convertedCount++;
codeName = charset.name();
} else {
System.out.println(f.getName() +
"未知");
codeName = "UNKOWN";
}
returncodeName;
}
}