1. 程式人生 > >GBK(GB2312)與UTF-8檔案轉碼

GBK(GB2312)與UTF-8檔案轉碼

最近使用的Intelij IDEA開發工具,轉碼有點小問題。百度了一下,Eclipse可以自動轉碼,而IDEA卻不可以。總是需要手動去轉若要把原始檔由GBK轉成UTF-8的,得靠其他方式了。網上搜羅了一下方法,然後自己整理了一下。現把程式碼貼出來,測試OK、可以直接使用!

需要的四個jar包,分別是:“antlr-2.7.6.jar”、“commons-io-2.4.jar”、“cpdetector_1.0.5 .jar”、“jchardet-1.0.jar”    網上可以直接下載,根據下面的方法。可以把任意的編碼換成你想要的。轉成新編碼以後的檔案把原始檔覆蓋掉就可以了

package com.company;

import

cpdetector.io.ASCIIDetector;
import cpdetector.io.CodepageDetectorProxy;
import cpdetector.io.JChardetFacade;
import cpdetector.io.UnicodeDetector;
import org.apache.commons.io.FileUtils;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Collection;

public class
Main {
    privatestatic int fileCount = 0;
    privatestatic int convertedCount = 0;

    publicstatic void main(String[] args) {
        //源路徑
       
String primaryPath = "E:\\ WorkSpace\\Test\\src\\com";
        //目標路徑
       
String targetPath = "E:/src";
        //獲取所有java檔案
        utf8AndGbkConvert
(primaryPath,targetPath, false
);

    }

    /**
     * UTF8
和GBK(GB2312)互相轉換
     *
     * @param
primaryPath 原始檔路徑
     * @param
targetPath  目標檔案路徑
     * @param
isUtf8ToGbk 是否是UTF-9轉成GBK  true表示UTF-8轉成GBK(GB2312) false表示GBK(GB2312)轉成UTF-8
     */
   
public static void utf8AndGbkConvert(StringprimaryPath, String targetPath, boolean isUtf8ToGbk) {
        Collection<File> javaPrimaryFiles = FileUtils.listFiles(new File(primaryPath), new String[]{"java"}, true);
        fileCount= javaPrimaryFiles.size();
        for (File javaPrimaryFile :javaPrimaryFiles) {
            try {
                String chatsetName = judgeChatsetByFile(javaPrimaryFile);
                String newFilePath =targetPath + javaPrimaryFile.getAbsolutePath().substring(primaryPath.length());
                newFilePath =newFilePath.replaceAll("\\\\", "/");

                if (isUtf8ToGbk) {
                    if (chatsetName.equalsIgnoreCase("UTF-8")) {
                        //如果是原始檔為UTF-8則轉換成GBK,
                       
FileUtils.writeLines(new File(newFilePath), "GBK", FileUtils.readLines(javaPrimaryFile,"UTF-8"));
                    } else {
                        //不是則直接複製
                       
File f = new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
                        if (!f.exists()) f.mkdirs();
                        File newFile = new File(newFilePath);
                        if (!newFile.exists())newFile.createNewFile();
                        FileUtils.copyFile(javaPrimaryFile,newFile);
                    }
                } else {
                    if (chatsetName.equalsIgnoreCase("GBK") || chatsetName.equalsIgnoreCase("GB2312")) {
                        //如果是原始檔為GBK或者GB2312則轉換成UTF-8,

                       
FileUtils.writeLines(new File(newFilePath), "UTF-8", FileUtils.readLines(javaPrimaryFile,chatsetName));
                    } else {
                        File f = new File(newFilePath.substring(0, newFilePath.lastIndexOf("/")));
                        if (!f.exists()) f.mkdirs();
                        File newFile = new File(newFilePath);
                        if (!newFile.exists())newFile.createNewFile();
                        FileUtils.copyFile(javaPrimaryFile,newFile);
                    }
                }

            } catch(IOException e) {
                e.printStackTrace();
            }
        }
        System.out.println("fileCount:" + fileCount);
        System.out.println("convertedCount:" + convertedCount);

    }

    publicstatic String judgeChatsetByFile(File f) {

        CodepageDetectorProxy detector =CodepageDetectorProxy.getInstance();


        //detector.add(new ParsingDetector(false)); //如果不希望判斷xml的encoding,而是要判斷該xml檔案的編碼,則可以註釋掉

       
detector.add(JChardetFacade.getInstance());

        //ASCIIDetector用於ASCII編碼測定

       
detector.add(ASCIIDetector.getInstance());

        //UnicodeDetector用於Unicode家族編碼的測定

       
detector.add(UnicodeDetector.getInstance());

        Charset charset = null;
        try {
            charset =detector.detectCodepage(f.toURL());
        } catch(Exception ex) {
            ex.printStackTrace();
        }
        String codeName = "";
        if (charset != null) {
            System.out.println(f.getName() + "編碼是:" + charset.name());
            convertedCount++;
            codeName = charset.name();
        } else {
            System.out.println(f.getName() + "未知");
            codeName = "UNKOWN";
        }
        returncodeName;
    }
}