java 判斷字串是否為亂碼
阿新 • • 發佈:2018-11-10
以下是一個事例:
import java.util.regex.Matcher; import java.util.regex.Pattern; public class MessyCodeCheck { public static boolean isChinese(char c) { Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); return ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS; } /** *判斷字串是否為亂碼 * @param strName * @return */ public static boolean isMessyCode(String strName) { Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*"); Matcher m = p.matcher(strName); String after = m.replaceAll(""); String temp = after.replaceAll("\\p{P}", ""); char[] ch = temp.trim().toCharArray(); float chLength = ch.length; float count = 0; for (char c : ch) { if (!Character.isLetterOrDigit(c)) { if (!isChinese(c)) { count = count + 1; System.out.print(c); } } } float result = count / chLength; return result > 0.4; } public static void main(String[] args) { System.out.println(isMessyCode("*��JTP.jar�ļ����JTP�ļ���ȡ��ͼƬ��Դ")); System.out.println(isMessyCode("你好")); System.out.println(isMessyCode("123123adfasdfsa")); System.out.println(isMessyCode("123123a測試——@#¥%%dfasdfsa")); System.out.println(isMessyCode("*��JTP.jar�ļ����JTP�ļ���ȡ��ͼƬ��Դ")); System.out.println(isMessyCode("你好")); System.out.println(isMessyCode("123123adfasdfsa")); System.out.println(isMessyCode("123123a測試——@#¥%%dfasdfsa")); } }