1. 程式人生 > >java 如何判斷字串中包含漢字

java 如何判斷字串中包含漢字

Java判斷一個字串是否有中文是利用Unicode編碼來判斷,因為中文的編碼區間為:0x4e00--0x9fbb,不過通用區間來判斷中文也不非常精確,因為有些中文的標點符號利用區間判斷會得到錯誤的結果。而且利用區間判斷中文效率也並不高,例如;str.substring(i, i + 1).matches("[\\u4e00-\\u9fbb]+"),就需要遍歷整個字串,如果字串太長效率非常低,而且判斷標點還會錯誤。

這裡提高一個高效準確的判斷方法,方法在下面的程式碼裡:private static final boolean isChinese(char c) 。類已經編譯通過,執行可以檢視結果。 

package com.songdan.test;

import java.util.Arrays;

public class test01 {
	/**
	 * 
	 * Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS : 4E00-9FBF:CJK 統一表意符號 
		Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS :F900-FAFF:CJK 相容象形文字 
		Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A :3400-4DBF:CJK 統一表意符號擴充套件 A 
		Character.UnicodeBlock.GENERAL_PUNCTUATION :2000-206F:常用標點 
		Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION :3000-303F:CJK 符號和標點 
		Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS :FF00-FFEF:半形及全形形式
	 * */
	
	public static boolean isCHinese(char c){
		Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
		if(ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS
			||ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS
			||ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
			||ub == Character.UnicodeBlock.GENERAL_PUNCTUATION  // GENERAL_PUNCTUATION 判斷中文的“號  
			||ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION     // CJK_SYMBOLS_AND_PUNCTUATION 判斷中文的。號  
			||ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS    // HALFWIDTH_AND_FULLWIDTH_FORMS 判斷中文的,號  
		)
		return true;
		return false;
	}
	public static boolean isCHinese(String str){
		char[] ch =  str.toCharArray();
		for (char c : ch) {
			if(isCHinese(c))
				return true;
		}
		return false;
	}
	public static void main(String[] args) {
		// TODO Auto-generated method stub
		String string1 = "i am isjd  df.";
		String string2 = "i am isjd  篦盎.";
		String string3 = "i am isjd  。";
		String string4 = "i am isjd  “";
		System.out.println(string1 +"  " +isCHinese(string1));
		System.out.println(string2 +"  " +isCHinese(string2));
		System.out.println(string3 +"  " +isCHinese(string3));
		System.out.println(string4 +"  " +isCHinese(string4));
	
		
	}

}

結果