讀取xml格式utf-8編碼 和utf-8 無bom編碼格式,出現 前言中不允許有內容的問題
1,java 讀取 xml utf-8 編碼格式的檔案,出現 Caused by: org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 1; 前言中不允許有內容。
出現這樣的原因, 是因為讀取檔案過程中,出現了格式轉化問題,在程式碼中,需要進行轉化處理. 對utf-8編碼格式或者utf-8 無bom編碼格式,要有對應的讀取處理.
步驟一:
先進行封裝一個java類. 這個類的主要作用是 對 utf8 編碼格式進行處理,在首行 新增 bom 格式相關資訊,大約三個位元組.
package com.sdzx.controller;
import java.io.IOException;
import java.io.InputStream;
import java.io.PushbackInputStream;
public class UnicodeInputStream extends InputStream {
PushbackInputStream internalIn;
boolean isInited = false;
String defaultEnc;
String encoding;
private static final int BOM_SIZE = 4;
public UnicodeInputStream(InputStream in, String defaultEnc) {
internalIn = new PushbackInputStream(in, BOM_SIZE);
this.defaultEnc = defaultEnc;
}
public String getDefaultEncoding() {
return defaultEnc;
}
public String getEncoding() {
if (!isInited) {
try {
init();
} catch (IOException ex) {
IllegalStateException ise = new IllegalStateException("Init method failed.");
ise.initCause(ise);
throw ise;
}
}
return encoding;
}
/**
* Read-ahead four bytes and check for BOM marks. Extra bytes are
* unread back to the stream, only BOM bytes are skipped.
*/
protected void init() throws IOException {
if (isInited) return;
byte bom[] = new byte[BOM_SIZE];
int n, unread;
n = internalIn.read(bom, 0, bom.length);
if ( (bom[0] == (byte)0x00) && (bom[1] == (byte)0x00) &&
(bom[2] == (byte)0xFE) && (bom[3] == (byte)0xFF) ) {
encoding = "UTF-32BE";
unread = n - 4;
} else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) &&
(bom[2] == (byte)0x00) && (bom[3] == (byte)0x00) ) {
encoding = "UTF-32LE";
unread = n - 4;
} else if ( (bom[0] == (byte)0xEF) && (bom[1] == (byte)0xBB) &&
(bom[2] == (byte)0xBF) ) {
encoding = "UTF-8";
unread = n - 3;
} else if ( (bom[0] == (byte)0xFE) && (bom[1] == (byte)0xFF) ) {
encoding = "UTF-16BE";
unread = n - 2;
} else if ( (bom[0] == (byte)0xFF) && (bom[1] == (byte)0xFE) ) {
encoding = "UTF-16LE";
unread = n - 2;
} else {
// Unicode BOM mark not found, unread all bytes
encoding = defaultEnc;
unread = n;
}
//System.out.println("read=" + n + ", unread=" + unread);
if (unread > 0) internalIn.unread(bom, (n - unread), unread);
isInited = true;
}
public void close() throws IOException {
//init();
isInited = true;
internalIn.close();
}
public int read() throws IOException {
//init();
isInited = true;
return internalIn.read();
}
}
步驟二: 採用以下讀取xml檔案處理.
/**
// URL 引數,如果以http開始,就用http://+"檔案絕對路徑" ,如果是本地檔案,則要用ftp:/// +"檔案絕對路徑" 開頭
*/
URL url = new URL("http://****/***/test.txt");
// File f = new File("test.txt");
String enc = null; // or NULL to use systemdefault
UnicodeInputStream uin = new UnicodeInputStream(url.openStream(),enc); //如果是本地將url.openStream -> new FileInputStream(f)
enc = uin.getEncoding(); // check and skip possible BOM bytes
InputStreamReader in;
if (enc == null){
in = new InputStreamReader(uin);
}else {
in = new InputStreamReader(uin, enc);
}
BufferedReader reader = new BufferedReader(in);
//BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream("D:/tags.txt"),"utf-8"));
String tmp =reader.readLine();
這樣讀取的結果就是正常的了,
以上 : 轉載於 https://www.cnblogs.com/xifenglou/p/6233689.html