unicode碼、字串、utf8碼之間的轉換工具類

阿新 • • 發佈：2019-01-07

package com.anjz.test;

import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.commons.lang.StringUtils;

public class EncodeUtil {

	/**
	 * 獲取字串的unicode編碼
	 * 
	 * \ufeff控制字元 用來表示「位元組次序標記（Byte Order Mark）」不佔用寬度 unicode碼中一個字元佔用2個位元組
	 * @param s
	 * @return
	 */
	public static String stringToUnicode(String s) {
		if (StringUtils.isEmpty(s)) {
			return null;
		}

		try {
			StringBuffer out = new StringBuffer("");
			byte[] bytes = s.getBytes("unicode");

			for (int i = 0; i < bytes.length - 1; i += 2) {
				out.append("\\u");

				// 將位元組碼轉化成十六進位制(& oxff 是進行補碼操作)
				String str = Integer.toHexString(bytes[i] & 0xff);
				for (int j = str.length(); j < 2; j++) {
					out.append("0");
				}
				out.append(str);
				String str1 = Integer.toHexString(bytes[i + 1] & 0xff);
				for (int j = str1.length(); j < 2; j++) {
					out.append("0");
				}
				out.append(str1);
			}

			out.delete(0, "\\ufeff".length());
			return out.toString();
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
			return null;
		}
	}

	/**
	 * unicode碼轉化成字串
	 * @param str
	 * @return
	 */
	public static String unicodeToString(String str) {
		Pattern pattern = Pattern.compile("(\\\\u(\\p{XDigit}{4}))");
		Matcher matcher = pattern.matcher(str);
		char ch;
		while (matcher.find()) {
			String group = matcher.group(2);
			ch = (char) Integer.parseInt(group, 16);
			String group1 = matcher.group(1);
			str = str.replace(group1, ch + "");
		}
		return str;
	}

	/**
	 * 字串轉化成對應的utf8編碼
	 * @param s
	 * @return 16進位制的資料流
	 */
	public static String convertStringToUTF8(String s) {
		if (s == null || s.equals("")) {
			return null;
		}
		StringBuffer sb = new StringBuffer();
		try {
			char c;
			for (int i = 0; i < s.length(); i++) {
				c = s.charAt(i);
				if (c >= 0 && c <= 255) {
					sb.append(Integer.toHexString(c).toUpperCase());
				} else {
					byte[] b;
					b = Character.toString(c).getBytes("utf-8");
					for (int j = 0; j < b.length; j++) {
						int k = b[j];
						// 轉換為unsigned integer 無符號integer
						/*
						 * if (k < 0) k += 256;
						 */
						k = k < 0 ? k + 256 : k;
						// 返回整數引數的字串表示形式 作為十六進位制（base16）中的無符號整數
						// 該值以十六進位制（base16）轉換為ASCII數字的字串
						sb.append(Integer.toHexString(k).toUpperCase());

						// url轉置形式
						// sb.append("%" +Integer.toHexString(k).toUpperCase());
					}
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
		return sb.toString();
	}
	
	/**
	 * UTF-8編碼 轉換為對應的 字串
	 * 實現方式：將16進位制數轉化成有符號的十進位制數
	 * @param s
	 * @return
	 */
	public static String convertUTF8ToString(String s) {  
	    if (s == null || s.equals("")) {  
	        return null;  
	    }  
	    try {  
	        s = s.toUpperCase();  
	        int total = s.length() / 2;  
	        //標識位元組長度  
	        int pos = 0;  
	        byte[] buffer = new byte[total];  
	        for (int i = 0; i < total; i++) {  
	            int start = i * 2;  
	            //將字串引數解析為第二個引數指定的基數中的有符號整數。  
	            buffer[i] = (byte) Integer.parseInt(s.substring(start, start + 2), 16);  
	            pos++;  
	        }  
	        //通過使用指定的字符集解碼指定的位元組子陣列來構造一個新的字串。  
	        //新字串的長度是字符集的函式，因此可能不等於子陣列的長度。  
	        return new String(buffer, 0, pos, "UTF-8");  
	    } catch (UnsupportedEncodingException e) {  
	        e.printStackTrace();  
	    }  
	    return s;  
	}  
	
	/**
	 * unicode碼轉化成utf8碼
	 * unicode碼 -> 字串 -> utf8碼
	 * @param str
	 * @return
	 */
	public static String unicodeToUTF8(String str){
		return EncodeUtil.convertStringToUTF8(EncodeUtil.unicodeToString(str));  
	}
	
	/**
	 * utf8碼轉化成unicode碼
	 * utf8碼 -> 字串 -> unicode碼
	 * @param str
	 * @return
	 */
	public static String utf8ToUnicode(String str){
		return EncodeUtil.stringToUnicode(EncodeUtil.convertUTF8ToString(str));  
	}

}

測試程式碼

package com.anjz.test;

import java.io.UnsupportedEncodingException;

public class CodingTest3 {
	public static void main(String[] args) throws UnsupportedEncodingException {
		System.out.println(EncodeUtil.stringToUnicode("木999，你好，不錯,[email protected]#$"));
		System.out.println(EncodeUtil.unicodeToString("\\u6728\\u0039\\u0039\\u0039\\uff0c\\u4f60\\u597d\\uff0c\\u4e0d\\u9519\\u002c\\u007e\\u0021\\u0040\\u0023\\u0024"));
		
		System.out.println(EncodeUtil.convertStringToUTF8("你好"));
		System.out.println(EncodeUtil.convertUTF8ToString("E4BDA0E5A5BD"));
		
		System.out.println(EncodeUtil.unicodeToUTF8("\\u6728\\u0039\\u0039\\u0039\\uff0c\\u4f60\\u597d\\uff0c\\u4e0d\\u9519\\u002c\\u007e\\u0021\\u0040\\u0023\\u0024"));
		
		System.out.println(EncodeUtil.utf8ToUnicode("E69CA8393939EFBC8CE4BDA0E5A5BDEFBC8CE4B88DE994992C7E21402324"));
	}
}

測試結果

\u6728\u0039\u0039\u0039\uff0c\u4f60\u597d\uff0c\u4e0d\u9519\u002c\u007e\u0021\u0040\u0023\u0024
木999，你好，不錯,[email protected]#$
E4BDA0E5A5BD
你好
E69CA8393939EFBC8CE4BDA0E5A5BDEFBC8CE4B88DE994992C7E21402324
\u6728\u0039\u0039\u0039\uff0c\u4f60\u597d\uff0c\u4e0d\u9519\u002c\u007e\u0021\u0040\u0023\u0024

金額元分之間轉換工具類

import java.math.BigDecimal;/** *金額元分之間轉換工具類 */public class AmountUtils { /**金額為分的格式 */ public static final String CURRENCY_FEN_REGEX = "^(([1-9]{1}\

unicode碼、字串、utf8碼之間的轉換工具類

package com.anjz.test; import java.io.UnsupportedEncodingException; import java.util.regex.Matcher; import java.util.regex.Pattern; im

python常用的十進位制、16進位制、字串、位元組串之間的轉換（長期更新帖）

進行協議解析時，總是會遇到各種各樣的資料轉換的問題，從二進位制到十進位制，從位元組串到整數等等廢話不多上，直接上例子整數之間的進位制轉換: 10進位制轉16進位制: hex(16) ==> 0x10 16進位制轉10進位制: int('0

列舉、字串、值之間的轉換

public enum Fruit { banana=1, apple=2, orange=3 } //列舉轉字串 string enumString=Enum.GetName( typeof(Fruit) ,f

java之字元陣列、字串、整形之間的轉換。

轉自https://www.cnblogs.com/javacatalina/p/6641338.html 一、字元陣列轉字串方法1 char[] str = {'a','b','d'}; StringBuffer sb = new StringBugger

python--常用的十進位制、16進位制、字串、位元組串之間的轉換

進行協議解析時，總是會遇到各種各樣的資料轉換的問題，從二進位制到十進位制，從位元組串到整數等等整數之間的進位制轉換: 10進位制轉16進位制: hex(16) ==> 0x10 16進位制轉10進位制: int(‘0x10’, 16) ==> 16 類似的還有oct()，

Java 帶分隔字串、字串陣列和 ArrayList 之間的轉換

Java 帶分隔字串、字串陣列和 ArrayList<String> 之間的轉換一、先來認識一下標題說的這三件東西，也許描述的不清楚，但有了下面的例子，就不會有歧義了1、帶分隔字串是這樣的： String seperateStr = "111,222,333

python經常使用的十進位制、16進位制、字串、位元組串之間的轉換（長期更新帖）

進行協議解析時。總是會遇到各種各樣的資料轉換的問題，從二進位制到十進位制，從位元組串到整數等等廢話不多上。直接上樣例整數之間的進位制轉換: 10進位制轉16進位制: hex(16) ==> 0x10 16進位制轉10進位制: int('0x10

列表、元組、字串、集合之間的轉化

# 列表：list [] # 元組：tuple () # 字串：string {} # 集合：set {}或（）（空集合） # ...............列表轉化成元組、字串、集合............... # 列表轉換成元組 list1=['a','b

python常用的十進位制、16進位制、字串、位元組串之間的轉換

整數之間的進位制轉換: 10進位制轉16進位制: hex(16) ==> 0x1016進位制轉10進位制: int('0x10', 16) ==> 16類似的還有oct()， bin()-------------------字串轉整數:10進位制字串:

字元陣列、字串、整型數之間的轉化

1、字元陣列轉化為字串應用字串定義時的建構函式 #include <iostream> using namespace std; //字元陣列轉化為字串 #in

Java之Json和陣列、字串、物件之間的轉換

1、字串和物件轉換為Json JSONObject jsonStr = JSONObject.fromObject(String);2、陣列轉換為Json JSONArray.fromObject(Array[])3、Json轉換成字串，使用toString()方法即可

java中字串陣列、字串、整形之間的轉換

字串陣列轉字串(只能通過for迴圈): String[] str = {'a', 'b', 'd'}; StringBuffer sb = new StringBuffer(); for(int i = 0; i < str.length;i++){ 　　s

二進制、八進制、十進制、十六進制之間轉換

不變保留十進制數 0.10 多少換圖 10.10 進制讀數一、十進制與二進制之間的轉換（1）十進制轉換為二進制，分為整數部分和小數部分 ① 整數部分方法：除2取余法，即每次將整數部分除以2，余數為該位權上的數，而商繼續除以2，余數又為上一個位權上的數

C語言入門（十八）之指標、字串、函式

如何利用指標運算元組 int ages[3] = {1, 3 , 5}; ages[0] = 998; printf("ages[0] = %i\n", ages[0]); int *p = ages; // int *p = &a

NSIS的基本語法---邏輯、字串、外部呼叫、ini檔案

一、INI檔案操作 1、ReadINIStr（INI檔案）作用：從ini檔案的[section_name]中的entry_name讀取並將值儲存到使用者變數$ x中。將設定錯誤標誌，如果未找到該條目，則將$ x分配給空字串。 ReadINIStr $0 $INSTDIR\winamp.

JavaScript的組成、字串、定時器、型別轉換

JavaScript的組成 JavaScript 的核心 ECMAScript 描述了該語言的語法和基本物件； DOM 描述了處理網頁內容的方法和介面； BOM 描述了與瀏覽器進行互動的方法和介面。 1、ECMAscript javascript的語法（變數、函式、迴圈語句等語

numpy中的savez（）同時儲存陣列、字串、字典資料

python使用numpy中的savez（）儲存非陣列資料到本地。報錯： IndexError: only integers, slices (`:`), ellipsis (`...`), numpy.newaxis (`None`) and integer or boole

前端（十四）—— JavaScript基礎：Number、Date類、字串、陣列、Math類、正則

JS常用類：Number類、Date類、Math類、字串、陣列、正則一、Number 1、常用數字整數：10 小數：3.14 科學計數法：1e5 | 1e-5 正負無窮：Infinity | -Infinity 2、常用進位制二進位制：0b1010 以0b開頭八進位制：012 以0

Shell 數字、字串、檔案比較命令

Shell test 命令 Shell中的 test 命令用於檢查某個條件是否成立，它可以進行數值、字元和檔案三個方面的測試。數值測試引數說明 -eq 等於則為真 -ne

unicode碼、字串、utf8碼之間的轉換工具類

相關推薦