1. 程式人生 > >獲取百度地圖POI數據三(模擬關鍵詞搜索)

獲取百度地圖POI數據三(模擬關鍵詞搜索)

nav 運行 dip data box click pri reader lac

上一篇博文中講到如何獲取用於搜索的關鍵詞,並且已經準備好了一百五十萬的關鍵詞 這其中有門牌號碼,餐館酒店名稱,公司名稱,道路名稱等。有了這些數據,我們就可以通過代碼,模擬我們在百度地圖的搜索框中搜索地點,從而獲取其返回的POI數據。下面直接上代碼~

一 、準備好用於存儲數據的數據庫表

  技術分享

技術分享


由於百度返回的POI數據都是JSON字符串且子節點非常多,為了按照子節點一一存儲,就需要建這麽一張包含很多字段的表,雖然麻煩,但是信息很全面。當然各位也可以選擇性的存儲其中的數據。

  二、獲取並解析數據的代碼

class Program
    {
        
static string[] strArr = { "name", "addr", "address_norm", "alias", "aoi", "area_name", "brand_id", "cla", "di_tag", "ext_display", "geo", "indoor_pano", "navi_update_time", "new_catalog_id", "primary_uid", "show_tag", "std_tag", "storage_src", "street_id", "tag", "tel", "uid" }; static string
[] flaArr = { "navi_x", "navi_y" }; static string[] intArr = { "acc_flag", "area", "biz_type", "catalogID", "click_flag", "detail", "diPointX", "diPointY", "dis", "dist2route", "dist2start", "ext_type", "f_flag", "father_son", "flag_type", "geo_type", "ismodified", "pano", "poiType", "poi_click_num
", "poi_profile", "prio_flag", "route_flag", "status", "ty", "view_type", "x", "y" }; static List<string> tempList = new List<string>(); static void Main(string[] args) {
       //調用SQLHelper類中的方法 DataTable kwDt
= SQLHelper.ExecuteDataTable("select KWName from SHKW order by SaveTime", CommandType.Text); List<string> kwList = new List<string>(); foreach (DataRow row in kwDt.Rows) { kwList.Add(row["KWName"].ToString()); } int total = kwList.Count; DataTable tempDt = SQLHelper.ExecuteDataTable("select name,addr from SHPOI", CommandType.Text); foreach (DataRow row in tempDt.Rows) { tempList.Add(row["name"].ToString() + "," + row["addr"].ToString()); } for (int i = 0; i < kwList.Count; i++) { string kw = kwList[i]; string keyWord = "上海市" + kw; for (int j = 0; j < 200; j++) { string url = "http://map.baidu.com/?newmap=1&reqflag=pcmap&biz=1&from=webmap&da_par=direct&pcevaname=pc4.1&qt=s&da_src=searchBox.button&wd=" + keyWord + "&c=289&pn=" + j; int count = DownloadPOIInfoFromBMap(url, -1, keyWord + j); if (count == -1) break; Console.WriteLine("成功寫入數據:" + count + "條 執行次數:" + (j + 1) + " KeyWord:" + keyWord + " kwNum:" + (kwList.IndexOf(kw) + 1) + " tatal:" + total); } } Console.WriteLine("ok"); Console.ReadKey(); } private static int DownloadPOIInfoFromBMap(string url, int typeId, string fileName) { int count = 0; Stream responseStream; string restring; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); responseStream = response.GetResponseStream(); StreamReader sr = new StreamReader(responseStream, Encoding.Default); restring = sr.ReadToEnd(); } catch { Console.WriteLine("網絡異常"); return count; } string[] strArr = restring.Split(new string[] { "\\u" }, StringSplitOptions.None); StringBuilder sb = new StringBuilder(); sb.Append(strArr[0]); try { for (int i = 1; i < strArr.Length; i++) { string code = strArr[i]; int value = Convert.ToInt32(code.Substring(0, 4), 16); sb.Append(char.ConvertFromUtf32(value)); if (code.Length > 4) { sb.Append(code.Substring(4, code.Length - 4)); } } } catch { return count; } JObject jo; try { jo = (JObject)JsonConvert.DeserializeObject(sb.ToString()); } catch {
         //一些數據無法正確轉換為json對象 需要特別處理 Regex regex
= new Regex("\"\\{\\\\\"index_tag\\\\\".*?\"\\}\""); //MatchCollection collection = regex.Matches(sb.ToString()); string data = sb.ToString(); data = regex.Replace(data, "\"\""); try { jo = (JObject)JsonConvert.DeserializeObject(data); } catch { Regex regex1 = new Regex("\"\\{\\\\\"shop_id\\\\\".*?\"\\}\""); data = regex1.Replace(data, "\"\""); try { jo = (JObject)JsonConvert.DeserializeObject(data); } catch { try { Console.Write("無法構建JSON對象 "); File.WriteAllText(@"D:\errorData\" + fileName + ".txt", data); } catch { } return count; } } } return SavePOIToDB(jo, typeId); } private static int SavePOIToDB(JObject jo, int typeId) { int count = 0; try { if (jo["content"].Children().Count() == 0) return -1; } catch { if (jo != null) return -1; } foreach (JToken child in jo["content"].Children()) { List<SqlParameter> listParam = new List<SqlParameter>(); foreach (string s in strArr) { listParam.Add(new SqlParameter(@s, GetInfoString(s, child))); } foreach (string f in flaArr) { listParam.Add(new SqlParameter(@f, GetInfoString(f, child))); } foreach (string i in intArr) { listParam.Add(new SqlParameter(@i, GetInfoString(i, child))); } List<float> coordinate = ConvertCoor(GetInfoInt("x", child), GetInfoInt("y", child)); listParam.Add(new SqlParameter(@"TypeId", typeId)); listParam.Add(new SqlParameter(@"lot_bdll", coordinate[0])); listParam.Add(new SqlParameter(@"lat_bdll", coordinate[1])); string tempStr = GetInfoString("name", child).Replace("", "") + "," + GetInfoString("addr", child); if (tempList.Contains(tempStr)) continue; tempList.Add(tempStr); string sql = "insert into SHPOI values(@Name,@acc_flag,@addr,@address_norm,@alias,@aoi,@area,@area_name,@biz_type,@brand_id,@catalogID,@cla,@click_flag,@detail,@diPointX,@diPointY,@di_tag,@dis,@dist2route,@dist2start,@ext_display,@ext_type,@f_flag,@father_son,@flag_type,@geo,@geo_type,@indoor_pano,@ismodified,@navi_update_time,@navi_x,@navi_y,@new_catalog_id,@pano,@poiType,@poi_click_num,@poi_profile,@primary_uid,@prio_flag,@route_flag,@show_tag,@status,@std_tag,@storage_src,@street_id,@tag,@tel,@ty,@uid,@view_type,@x,@y,@lot_bdll,@lat_bdll,@TypeId)"; SQLHelper.ExecuteNonQuery(sql, CommandType.Text, listParam.ToArray()); count++; } return count; } private static int GetInfoInt(string key, JToken jo) { try { return Convert.ToInt32(jo[key].ToString()); } catch { return 0; } } private static float GetInfoFloat(string key, JToken jo) { try { return float.Parse(jo[key].ToString()); } catch { return 0; } } private static string GetInfoString(string key, JToken jo) { try { return jo[key].ToString(); } catch { return DBNull.Value.ToString(); } } /// <summary> /// 將bdmac坐標轉換城bdll坐標 /// </summary> /// <param name="x"></param> /// <param name="y"></param> /// <returns></returns> private static List<float> ConvertCoor(int x, int y) { //GevTfxGlAWxzIzTobk7PGX1eu2YF0RMl //ghCXsfOvpXwWsnm6lhWGelF5f0Fh3y82 List<float> list = new List<float>(); string url = "http://api.map.baidu.com/geoconv/v1/?coords=" + x / (100.0) + "," + y / (100.0) + "&from=6&to=5&ak=ghCXsfOvpXwWsnm6lhWGelF5f0Fh3y82"; Stream responseStream; StreamReader sr; try { HttpWebRequest request = (HttpWebRequest)WebRequest.Create(url); request.Method = "GET"; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); responseStream = response.GetResponseStream(); sr = new StreamReader(responseStream, Encoding.Default); } catch { list.Add(0); list.Add(0); return list; } string restring = sr.ReadToEnd(); JObject jo; try { jo = (JObject)JsonConvert.DeserializeObject(restring); list.Add(float.Parse(jo["result"][0]["x"].ToString())); list.Add(float.Parse(jo["result"][0]["y"].ToString())); } catch { list.Add(0); list.Add(0); } return list; } }

使用關鍵詞模擬百度地圖的搜索功能從而獲取返回的POI數據的方法能夠獲取大量的POI數據 由於返回的數據中沒有百度經緯度坐標信息,只有百度墨卡托的坐標,所以還需要調用它的API 通過坐標轉換的方方法來獲取百度經緯度坐標。這就需要申請開發密鑰了,而且每個密鑰一天最多只能進行十萬次的坐標轉換。

以下是本人獲取的部分POI數據:

技術分享

技術分享

技術分享

技術分享

以上就是本人獲取百度地圖POI數據的方式,由於關鍵詞數量在百萬級別 而且百度每次返回的數據的頁數也不近相同,多的有百頁,少的僅有一條設置沒有,所以程序需要運行很長的時間,本人運行兩周程序也才獲取到一百多萬數據。預計程序運行完成時POI數據總量或與關鍵詞總量相差無幾,也能達到一百五十萬左右,雖然並不知道百度地圖上海地區的POI數據具體有多少,但是一百多萬的數據量已經能夠做些分析了。

獲取百度地圖POI數據三(模擬關鍵詞搜索)