使用聚合接口獲取漢字數據字典
阿新 • • 發佈:2018-09-25
radi 文本 docs mys req find event post des 原文:使用聚合接口獲取漢字數據字典
如何獲取全部漢字及漢字的詳細信息?
剛剛扒了一通漢字完整的數據字典,mark作下記錄。
所有漢字集合
我們匹配中文字符時,經常使用Unicode字符"[^\u4e00-\u9fa5]"正則表達式來校驗。
詳細的漢字字符集列表可參考:
具體的漢字對應的字符,可查詢漢字字符集編碼查詢網站
基本漢字中從4e00到9fa5,十六進制轉化為十進制後,可知共有20902個漢字。
漢字的集合,是開源的,可以直接從網上獲取。
- 百度文庫-中國漢字大全
- Github-HanziToPinyin
- CSDN-中國所有漢字-簡體
從以上鏈接中,下載漢字集合
獲取漢字信息
漢字的詳細信息,包括拼音/筆畫數/部首/五筆輸入/筆順編號/註解
網上有相應的新華字典Api接口,這裏選的是聚合平臺-新華字典
通過他的C#源碼,我們可以獲取到漢字字典的詳細信息
詳細數據請點擊 下載
PS:
- 部首和五筆可能為空,部首返回的數據為:"難檢字"。
- 筆順編號可通過返回的簡解中得到。筆順編號,即指每個筆畫對應的號碼,這個非常有用!
聚合接口-漢字
獲取漢字:
1 public static HanziDetail FindHanzi(string hanzi) 2 { 3 //1.根據漢字查詢字典4 string url1 = "http://v.juhe.cn/xhzd/query"; 5 6 var parameters1 = new Dictionary<string, string>(); 7 8 parameters1.Add("word", hanzi); //填寫需要查詢的漢字,UTF8 urlencode編碼 9 parameters1.Add("key", "XXXXXX");//你申請的key 10 parameters1.Add("dtype", ""); //返回數據的格式,xml或json,默認json 11 12 string result2 = SendPost(url1, parameters1, "get"); 13 14 var hanziRequestResponse = JsonConvert.DeserializeObject<HanziRequestResponse>(result2); 15 16 //HanziDetail hanziDetail = null; 17 //if (hanziRequestResponse.ErrorCode == "0" && hanziRequestResponse.Result != null) 18 //{ 19 // hanziDetail = hanziRequestResponse.Result; 20 //} 21 22 return hanziRequestResponse.Result; 23 } 24 }
解析類:
1 [DataContract] 2 public class HanziRequestResponse 3 { 4 [DataMember(Name = "reason")] 5 public string Reason { get; set; } 6 7 [DataMember(Name = "error_code")] 8 public string ErrorCode { get; set; } 9 10 [DataMember(Name = "result")] 11 public HanziDetail Result { get; set; } 12 } 13 [DataContract] 14 public class HanziDetail 15 { 16 [DataMember(Name = "zi")] 17 public string Hanzi { get; set; } 18 19 /// <summary> 20 /// 部首 21 /// </summary> 22 [DataMember(Name = "bushou")] 23 public string Radical { get; set; } 24 25 /// <summary> 26 /// 拼音 27 /// </summary> 28 [DataMember(Name = "pinyin")] 29 public string Pinyin { get; set; } 30 31 /// <summary> 32 /// 筆畫數 33 /// </summary> 34 [DataMember(Name = "bihua")] 35 public string Bihua { get; set; } 36 37 /// <summary> 38 /// 五筆 39 /// </summary> 40 [DataMember(Name = "wubi")] 41 public string WuBi { get; set; } 42 43 /// <summary> 44 /// 極簡介紹 45 /// </summary> 46 [DataMember(Name = "jijie")] 47 public List<string> SimpleDetailContent { get; set; } 48 }View Code
訪問後臺接口通用類:
1 public class HttpRequestBase 2 { 3 /// <summary> 4 /// Http (GET/POST) 5 /// </summary> 6 /// <param name="url">請求URL</param> 7 /// <param name="parameters">請求參數</param> 8 /// <param name="method">請求方法</param> 9 /// <returns>響應內容</returns> 10 public static string SendPost(string url, IDictionary<string, string> parameters, string method) 11 { 12 if (method.ToLower() == "post") 13 { 14 HttpWebRequest req = null; 15 HttpWebResponse rsp = null; 16 System.IO.Stream reqStream = null; 17 try 18 { 19 req = (HttpWebRequest)WebRequest.Create(url); 20 req.Method = method; 21 req.KeepAlive = false; 22 req.ProtocolVersion = HttpVersion.Version10; 23 req.Timeout = 5000; 24 req.ContentType = "application/x-www-form-urlencoded;charset=utf-8"; 25 byte[] postData = Encoding.UTF8.GetBytes(BuildQuery(parameters, "utf8")); 26 reqStream = req.GetRequestStream(); 27 reqStream.Write(postData, 0, postData.Length); 28 rsp = (HttpWebResponse)req.GetResponse(); 29 Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet); 30 return GetResponseAsString(rsp, encoding); 31 } 32 catch (Exception ex) 33 { 34 return ex.Message; 35 } 36 finally 37 { 38 if (reqStream != null) reqStream.Close(); 39 if (rsp != null) rsp.Close(); 40 } 41 } 42 else 43 { 44 //創建請求 45 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + BuildQuery(parameters, "utf8")); 46 47 //GET請求 48 request.Method = "GET"; 49 request.ReadWriteTimeout = 5000; 50 request.ContentType = "text/html;charset=UTF-8"; 51 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); 52 Stream myResponseStream = response.GetResponseStream(); 53 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8")); 54 55 //返回內容 56 string retString = myStreamReader.ReadToEnd(); 57 return retString; 58 } 59 } 60 61 /// <summary> 62 /// 組裝普通文本請求參數。 63 /// </summary> 64 /// <param name="parameters">Key-Value形式請求參數字典</param> 65 /// <returns>URL編碼後的請求數據</returns> 66 public static string BuildQuery(IDictionary<string, string> parameters, string encode) 67 { 68 StringBuilder postData = new StringBuilder(); 69 bool hasParam = false; 70 IEnumerator<KeyValuePair<string, string>> dem = parameters.GetEnumerator(); 71 while (dem.MoveNext()) 72 { 73 string name = dem.Current.Key; 74 string value = dem.Current.Value; 75 // 忽略參數名或參數值為空的參數 76 if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value) 77 { 78 if (hasParam) 79 { 80 postData.Append("&"); 81 } 82 postData.Append(name); 83 postData.Append("="); 84 if (encode == "gb2312") 85 { 86 postData.Append(HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312"))); 87 } 88 else if (encode == "utf8") 89 { 90 postData.Append(HttpUtility.UrlEncode(value, Encoding.UTF8)); 91 } 92 else 93 { 94 postData.Append(value); 95 } 96 hasParam = true; 97 } 98 } 99 return postData.ToString(); 100 } 101 102 /// <summary> 103 /// 把響應流轉換為文本。 104 /// </summary> 105 /// <param name="rsp">響應流對象</param> 106 /// <param name="encoding">編碼方式</param> 107 /// <returns>響應文本</returns> 108 public static string GetResponseAsString(HttpWebResponse rsp, Encoding encoding) 109 { 110 System.IO.Stream stream = null; 111 StreamReader reader = null; 112 try 113 { 114 // 以字符流的方式讀取HTTP響應 115 stream = rsp.GetResponseStream(); 116 reader = new StreamReader(stream, encoding); 117 return reader.ReadToEnd(); 118 } 119 finally 120 { 121 // 釋放資源 122 if (reader != null) reader.Close(); 123 if (stream != null) stream.Close(); 124 if (rsp != null) rsp.Close(); 125 } 126 } 127 }View Code
詳細的源Demo,可查看Github :HanziDictionary
使用聚合接口獲取漢字數據字典