1. 程式人生 > >使用聚合接口獲取漢字數據字典

使用聚合接口獲取漢字數據字典

radi 文本 docs mys req find event post des

原文:使用聚合接口獲取漢字數據字典

如何獲取全部漢字及漢字的詳細信息?

剛剛扒了一通漢字完整的數據字典,mark作下記錄。

所有漢字集合

我們匹配中文字符時,經常使用Unicode字符"[^\u4e00-\u9fa5]"正則表達式來校驗。

詳細的漢字字符集列表可參考:

技術分享圖片

具體的漢字對應的字符,可查詢漢字字符集編碼查詢網站

基本漢字中從4e00到9fa5,十六進制轉化為十進制後,可知共有20902個漢字。

漢字的集合,是開源的,可以直接從網上獲取。

  • 百度文庫-中國漢字大全
  • Github-HanziToPinyin
  • CSDN-中國所有漢字-簡體

從以上鏈接中,下載漢字集合

獲取漢字信息

漢字的詳細信息,包括拼音/筆畫數/部首/五筆輸入/筆順編號/註解

網上有相應的新華字典Api接口,這裏選的是聚合平臺-新華字典

通過他的C#源碼,我們可以獲取到漢字字典的詳細信息

技術分享圖片

詳細數據請點擊 下載

PS:

  • 部首和五筆可能為空,部首返回的數據為:"難檢字"。
  • 筆順編號可通過返回的簡解中得到。筆順編號,即指每個筆畫對應的號碼,這個非常有用!

聚合接口-漢字

獲取漢字:

 1         public static HanziDetail FindHanzi(string hanzi)
 2         {
 3             //1.根據漢字查詢字典
4 string url1 = "http://v.juhe.cn/xhzd/query"; 5 6 var parameters1 = new Dictionary<string, string>(); 7 8 parameters1.Add("word", hanzi); //填寫需要查詢的漢字,UTF8 urlencode編碼 9 parameters1.Add("key", "XXXXXX");//你申請的key 10 parameters1.Add("
dtype", ""); //返回數據的格式,xml或json,默認json 11 12 string result2 = SendPost(url1, parameters1, "get"); 13 14 var hanziRequestResponse = JsonConvert.DeserializeObject<HanziRequestResponse>(result2); 15 16 //HanziDetail hanziDetail = null; 17 //if (hanziRequestResponse.ErrorCode == "0" && hanziRequestResponse.Result != null) 18 //{ 19 // hanziDetail = hanziRequestResponse.Result; 20 //} 21 22 return hanziRequestResponse.Result; 23 } 24 }

解析類:

技術分享圖片
 1     [DataContract]
 2     public class HanziRequestResponse
 3     {
 4         [DataMember(Name = "reason")]
 5         public string Reason { get; set; }
 6 
 7         [DataMember(Name = "error_code")]
 8         public string ErrorCode { get; set; }
 9 
10         [DataMember(Name = "result")]
11         public HanziDetail Result { get; set; }
12     }
13     [DataContract]
14     public class HanziDetail
15     {
16         [DataMember(Name = "zi")]
17         public string Hanzi { get; set; }
18 
19         /// <summary>
20         /// 部首
21         /// </summary>
22         [DataMember(Name = "bushou")]
23         public string Radical { get; set; }
24 
25         /// <summary>
26         /// 拼音
27         /// </summary>
28         [DataMember(Name = "pinyin")]
29         public string Pinyin { get; set; }
30 
31         /// <summary>
32         /// 筆畫數
33         /// </summary>
34         [DataMember(Name = "bihua")]
35         public string Bihua { get; set; }
36 
37         /// <summary>
38         /// 五筆
39         /// </summary>
40         [DataMember(Name = "wubi")]
41         public string WuBi { get; set; }
42 
43         /// <summary>
44         /// 極簡介紹
45         /// </summary>
46         [DataMember(Name = "jijie")]
47         public List<string> SimpleDetailContent { get; set; }
48     }
View Code

訪問後臺接口通用類:

技術分享圖片
  1     public class HttpRequestBase
  2     {
  3         /// <summary>
  4         /// Http (GET/POST)
  5         /// </summary>
  6         /// <param name="url">請求URL</param>
  7         /// <param name="parameters">請求參數</param>
  8         /// <param name="method">請求方法</param>
  9         /// <returns>響應內容</returns>
 10         public static string SendPost(string url, IDictionary<string, string> parameters, string method)
 11         {
 12             if (method.ToLower() == "post")
 13             {
 14                 HttpWebRequest req = null;
 15                 HttpWebResponse rsp = null;
 16                 System.IO.Stream reqStream = null;
 17                 try
 18                 {
 19                     req = (HttpWebRequest)WebRequest.Create(url);
 20                     req.Method = method;
 21                     req.KeepAlive = false;
 22                     req.ProtocolVersion = HttpVersion.Version10;
 23                     req.Timeout = 5000;
 24                     req.ContentType = "application/x-www-form-urlencoded;charset=utf-8";
 25                     byte[] postData = Encoding.UTF8.GetBytes(BuildQuery(parameters, "utf8"));
 26                     reqStream = req.GetRequestStream();
 27                     reqStream.Write(postData, 0, postData.Length);
 28                     rsp = (HttpWebResponse)req.GetResponse();
 29                     Encoding encoding = Encoding.GetEncoding(rsp.CharacterSet);
 30                     return GetResponseAsString(rsp, encoding);
 31                 }
 32                 catch (Exception ex)
 33                 {
 34                     return ex.Message;
 35                 }
 36                 finally
 37                 {
 38                     if (reqStream != null) reqStream.Close();
 39                     if (rsp != null) rsp.Close();
 40                 }
 41             }
 42             else
 43             {
 44                 //創建請求
 45                 HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url + "?" + BuildQuery(parameters, "utf8"));
 46 
 47                 //GET請求
 48                 request.Method = "GET";
 49                 request.ReadWriteTimeout = 5000;
 50                 request.ContentType = "text/html;charset=UTF-8";
 51                 HttpWebResponse response = (HttpWebResponse)request.GetResponse();
 52                 Stream myResponseStream = response.GetResponseStream();
 53                 StreamReader myStreamReader = new StreamReader(myResponseStream, Encoding.GetEncoding("utf-8"));
 54 
 55                 //返回內容
 56                 string retString = myStreamReader.ReadToEnd();
 57                 return retString;
 58             }
 59         }
 60 
 61         /// <summary>
 62         /// 組裝普通文本請求參數。
 63         /// </summary>
 64         /// <param name="parameters">Key-Value形式請求參數字典</param>
 65         /// <returns>URL編碼後的請求數據</returns>
 66         public static string BuildQuery(IDictionary<string, string> parameters, string encode)
 67         {
 68             StringBuilder postData = new StringBuilder();
 69             bool hasParam = false;
 70             IEnumerator<KeyValuePair<string, string>> dem = parameters.GetEnumerator();
 71             while (dem.MoveNext())
 72             {
 73                 string name = dem.Current.Key;
 74                 string value = dem.Current.Value;
 75                 // 忽略參數名或參數值為空的參數
 76                 if (!string.IsNullOrEmpty(name))//&& !string.IsNullOrEmpty(value)
 77                 {
 78                     if (hasParam)
 79                     {
 80                         postData.Append("&");
 81                     }
 82                     postData.Append(name);
 83                     postData.Append("=");
 84                     if (encode == "gb2312")
 85                     {
 86                         postData.Append(HttpUtility.UrlEncode(value, Encoding.GetEncoding("gb2312")));
 87                     }
 88                     else if (encode == "utf8")
 89                     {
 90                         postData.Append(HttpUtility.UrlEncode(value, Encoding.UTF8));
 91                     }
 92                     else
 93                     {
 94                         postData.Append(value);
 95                     }
 96                     hasParam = true;
 97                 }
 98             }
 99             return postData.ToString();
100         }
101 
102         /// <summary>
103         /// 把響應流轉換為文本。
104         /// </summary>
105         /// <param name="rsp">響應流對象</param>
106         /// <param name="encoding">編碼方式</param>
107         /// <returns>響應文本</returns>
108         public static string GetResponseAsString(HttpWebResponse rsp, Encoding encoding)
109         {
110             System.IO.Stream stream = null;
111             StreamReader reader = null;
112             try
113             {
114                 // 以字符流的方式讀取HTTP響應
115                 stream = rsp.GetResponseStream();
116                 reader = new StreamReader(stream, encoding);
117                 return reader.ReadToEnd();
118             }
119             finally
120             {
121                 // 釋放資源
122                 if (reader != null) reader.Close();
123                 if (stream != null) stream.Close();
124                 if (rsp != null) rsp.Close();
125             }
126         }
127     }
View Code

詳細的源Demo,可查看Github :HanziDictionary

使用聚合接口獲取漢字數據字典