Java基礎知識複習(七)-- 中文編碼問題
阿新 • • 發佈:2019-01-02
一、練習-數字對應的中文
找出 E5 B1 8C 這3個十六進位制對應UTF-8編碼的漢字
參考程式碼1
// 找出 E5 B1 8C 這3個十六進位制對應UTF-8編碼的漢字
byte[] bs = new byte[3];
bs[0] = (byte) 0xE5;
bs[1] = (byte) 0xB1;
bs[2] = (byte) 0x8C;
String str = new String(bs, "UTF-8");
System.out.println("E5B18C 對應的字元是:" + str);
參考程式碼2
String s = "E5B18C";
int total = s.length() / 2;
byte[] buffer = new byte[total];
for (int i = 0; i < total; i++) {
int start = i * 2;
//先將字元轉換為16進位制數字,再強制轉換為位元組
buffer[i] = (byte) Integer.parseInt(s.substring(start, start + 2), 16);
}
String result = new String(buffer, "UTF-8");
System. out.println("E5B18C 對應的字元是:" + result);
運作結果
二、練習-移除BOM
如果用記事本根據UTF-8編碼儲存漢字就會在最前面生成一段標示符,這個標示符用於表示該檔案是使用UTF-8編碼的,找出這段標示符對應的十六進位制,並且開發一個方法,自動去除這段標示符
在移除BOM之前,我們首先要知道UTF-8的BOM對應的編碼是多少,測試程式碼如下
package test;
import java.io.File;
import java.io.FileInputStream;
public class Test1 {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] bytes = new byte[(int) f.length()];
fis.read(bytes);
System.out.println("檔案text1.txt中的內容:"+new String(bytes,"UTF-8"));
System.out.print("對應的16進製表示:");
for(byte b : bytes) {
int a = b & 0x0000ff;
String hexString = Integer.toHexString(a).toUpperCase();
System.out.print(hexString+" ");
}
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}
測試結果
從測試結果以及上一個練習題可以知道,UTF-8的BOM對應的編碼為:EF BB BF,且為前三個,到這裡就可以開始解決我們的問題了
參考程式碼1
package review4;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
public class RemoveBOM {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
fis.read(read);
System.out.print("檔案中讀出來的資料是:");
System.out.println(new String(read,"UTF-8"));
removeBom(f);
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static void removeBom(File f) {
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
byte[] result = new byte[(int) f.length()-3];
fis.read(read);
System.out.print("除去BOM後的結果:");
for(int i = 3; i < read.length; i++) {
int a = read[i] & 0x000000ff;
result[i-3] = (byte) a;
}
System.out.println(new String(result,"UTF-8"));
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
}
參考程式碼2
package review4;
import java.io.File;
import java.io.FileInputStream;
import java.util.Arrays;
public class RemoveBOM {
public static void main(String[] args) {
File f = new File("D:/Test/test1.txt");
try(FileInputStream fis = new FileInputStream(f)) {
byte[] read = new byte[(int) f.length()];
fis.read(read);
System.out.print("檔案中讀出來的資料是:");
System.out.println(new String(read,"UTF-8"));
byte[] result = removeBom(read);
System.out.println("除去BOM後的結果:"+new String(result,"UTF-8"));
} catch (Exception e) {
// TODO: handle exception
e.printStackTrace();
}
}
public static byte[] removeBom(byte[] read) {
return Arrays.copyOfRange(read, 3,read.length);
}
}
執行結果