1. 程式人生 > >按字節截取字符串

按字節截取字符串

lombok invoice nbsp slf4 cat 錯誤 sta strlen lis

 1 package motan.unittest.order;
 2 
 3 import com.alibaba.fastjson.JSON;
 4 import lombok.extern.slf4j.Slf4j;
 5 import utils.Lang;
 6 
 7 import java.io.UnsupportedEncodingException;
 8 import java.util.List;
 9 import java.util.Map;
10 
11 @Slf4j
12 public class ProductNameTest {
13 
14
public static void main(String[] args) { 15 String configValue = "[{keyword: ‘(‘, replace:‘(‘}, {keyword: ‘)‘, replace:‘)‘},{keyword: ‘#‘, replace:‘#‘},{keyword: ‘—‘, replace:‘‘},{keyword: ‘-‘, replace:‘‘},{keyword: ‘/‘, replace:‘‘}]"; 16 List<Map> nameFilterConfigs = JSON.parseArray(configValue, Map.class
); 17 String productName = "索尼(SONY)()[email protected]&**$#@#¥¥%……&……&*……&**((*()----專業數碼錄音筆ICDPX2404G黑色智能降噪可監聽支持音頻線轉錄適用商務學習采訪取證頻線轉錄適用商務學習采訪取證頻線轉錄適用商務學習采訪取證"; 18 String productName1 = "優必利 ///雙電源桌面辦公計算器 財務家用計算機 太陽能計算器 12位大屏幕語音計算機 4016雙電源計算器大號-不帶語音";
19 String productName2 = "優必利雙電源桌面辦公計算器 財務家用計算機太陽能計算器 12位大屏幕語音計算機 4016雙電源計算器大號-不帶語音"; 20 String productName3 = "種類:4888型 (公4.5兩 母3.5兩) 8只 規格:1"; 21 String productName4 = "種類:4888型 (公4.5兩 母3.5兩) 8只 規格"; 22 System.out.println(filterInvoiceGGXH("我ABC", nameFilterConfigs, 4)); 23 System.out.println(filterInvoiceGGXH("我ABC漢DEF", nameFilterConfigs, 6)); 24 System.out.println(filterInvoiceGGXH("我ABC漢DEF", nameFilterConfigs, 100)); 25 System.out.println(filterInvoiceGGXH(productName, nameFilterConfigs, 1000000000)); 26 System.out.println(filterInvoiceGGXH(productName1, nameFilterConfigs, 60)); 27 System.out.println(filterInvoiceGGXH(productName2, nameFilterConfigs, 36)); 28 System.out.println(filterInvoiceGGXH(productName3, nameFilterConfigs, 1000000000)); 29 //商品規格取36個字節,配置在數據庫 30 System.out.println(filterInvoiceGGXH(productName3, nameFilterConfigs, 36)); 31 } 32 33 public static String filterInvoiceGGXH(String ggxh, List<Map> configs, Integer ggxhMaxLeng) { 34 String regx; 35 String replacement; 36 if (!Lang.isEmpty(configs)) { 37 for (Map m : configs) { 38 if (!Lang.isEmpty(m)) { 39 regx = (String) m.get("keyword"); 40 replacement = (String) m.get("replace"); 41 if (!Lang.isEmpty(regx) && replacement != null) { 42 ggxh = ggxh.replaceAll(regx, replacement); 43 } 44 } 45 } 46 } 47 try { 48 log.info("商品規格型號截取字節數為:{},規格型號總字節數為:{}", ggxhMaxLeng, JSON.toJSONString(ggxh.getBytes("GBK").length)); 49 if (!Lang.isEmpty(ggxh) && ggxh.getBytes("GBK").length > ggxhMaxLeng) { 50 ggxh = subStringByByte(ggxh, ggxhMaxLeng); 51 } 52 } catch (Exception e) { 53 log.error("商品規格型號處理失敗:{},錯誤信息:{}", ggxh, e); 54 } 55 return ggxh; 56 } 57 58 /** 59 * 按字節截取字符串 60 * 61 * @param str 62 * @param splitByteNum 63 * @return 64 * @throws UnsupportedEncodingException 65 */ 66 public static String subStringByByte(String str, int splitByteNum) throws UnsupportedEncodingException { 67 //輸入無效判斷 68 if (null == str || "".equals(str) || splitByteNum <= 0) { 69 return ""; 70 } 71 72 int tempSubStrLength = splitByteNum; 73 //截取長度為要截取字節數的子串。說明str.length()字符串中字符個數一定小於等於字節數。 74 String subStr = str.substring(0, tempSubStrLength > str.length() ? str.length() : tempSubStrLength); 75 //在GBK編碼下,得到子串的字節長度 76 int subStrByteNum = subStr.getBytes("GBK").length; 77 78 //如果子串的字節長度大於字符長度,說明一定有漢字 79 while (subStrByteNum > tempSubStrLength) { 80 /** 81 * 在子串末尾去掉一個字符,重新計算子串在GBK下的字節長度。 82 * 因為是去掉一個字符,而漢字占一個字符,所以不用考慮半個漢字的問題 83 */ 84 int subStrLength = --splitByteNum; 85 log.info("處理字符串" + subStrLength + "->"); 86 subStr = str.substring(0, subStrLength > str.length() ? str.length() : subStrLength); 87 subStrByteNum = subStr.getBytes("GBK").length; 88 } 89 return subStr; 90 } 91 }

按字節截取字符串