java中從高德地圖爬取資料
阿新 • • 發佈:2019-02-07
最近一個人負責公司的一個app專案開發,需要從高德地圖爬取杭州市全部的超市資訊,放入mongodb的資料庫中。做地理位置查詢。(mongodb這部分有時間補上)
首先去高德地圖建立一個開發者賬號,獲取一個開發web服務的高德key.這個是必須要有的,可以用我這個從百度到的key試一下。
廢話不說了直接上程式碼
package com.pingogo.visit.service;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.pingogo.api.common.HttpUtils;
import com.pingogo.visit.domain.Shop;
import jxl.Cell;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
//import org.apache.poi.ss.usermodel.Workbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Created by cw on 2017/8/29.
*
*/
public class AddressLngLatExchange {
private static final String KEY = "389880a06e3f893ea46036f030c94700";
private static final String OUTPUT = "JSON";
private static final String GET_LNG_LAT_URL = "http://restapi.amap.com/v3/geocode/geo";
private static final String GET_LNG_PIO_URL = "http://restapi.amap.com/v3/place/polygon";
private static final Logger LOGGER = LoggerFactory.getLogger(AddressLngLatExchange.class);
//獲取指定地點經緯度
public static String[] getLngLatFromOneAddr(String address){
if(StringUtils.isBlank(address)) {
LOGGER.error("地址(" + address + ")為null或者空");
return null;
}
Map<String, String> params = new HashMap<String, String>();
params.put("address", address);
params.put("output", OUTPUT);
params.put("key", KEY);
String result = HttpUtils.URLPost(GET_LNG_LAT_URL,params,"");
JSONObject jsonObject = JSONObject.parseObject(result);
String[] lngLatArr = new String[2];
//拿到返回報文的status值,高德的該介面返回值有兩個:0-請求失敗,1-請求成功;
int status = Integer.valueOf(jsonObject.getString("status"));
if(status == 1) {
JSONArray jsonArray = jsonObject.getJSONArray("geocodes");
for(int i = 0; i < jsonArray.size(); i++) {
JSONObject json = jsonArray.getJSONObject(i);
String lngLat = json.getString("location");
lngLatArr = lngLat.split(",");
}
} else {
String errorMsg = jsonObject.getString("info");
LOGGER.error("地址(" + address + ")" + errorMsg);
}
return lngLatArr;
}
public static List<Shop> initialData(String lonLat, String keyword, List<Shop> shopListSon){
if(StringUtils.isBlank(keyword)) {
LOGGER.error("地址(" + keyword + ")為null或者空");
}
Map<String, String> params = new HashMap<String, String>();
try {
Thread.sleep(5000);
} catch (InterruptedException e1) {
e1.printStackTrace();
}
params.put("polygon", lonLat);//"118.21,29.11;120.30,30.33"
params.put("output", OUTPUT);
params.put("keywords", keyword);
params.put("offset", "20");
params.put("page", "1");
params.put("key", KEY);
String result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
JSONObject jsonObject = JSONObject.parseObject(result);
int statusOne = Integer.valueOf(jsonObject.getString("status"));
//第一次獲取資料時做的判斷
if(statusOne==1){
int count=Integer.valueOf(jsonObject.getString("count"));
int pageNumber=count/20;
int remainder=count%20;
if(remainder>0)pageNumber=pageNumber+1;
for(int i=1;i<=pageNumber;i++){
params.put("page", String.valueOf(i));
result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
JSONObject jsonObject2 = JSONObject.parseObject(result);
System.out.println("+++++++++"+result);
//拿到返回報文的status值,高德的該介面返回值有兩個:0-請求失敗,1-請求成功;
int status = Integer.valueOf(jsonObject2.getString("status"));
if(status == 1) {
JSONArray jsonArray = jsonObject2.getJSONArray("pois");
if(jsonArray.size()>0){
for(int j =0;j<jsonArray.size();j++){
Shop shop =new Shop();
JSONObject jsonObject1 =jsonArray.getJSONObject(j);
shop.setShopName(jsonObject1.getString("name"));
shop.setSpecificAddress(jsonObject1.getString("address"));
shop.setId(jsonObject1.getString("id"));
String [] initLonLat =jsonObject1.getString("location").split(",");
shop.setLongitude(initLonLat[0]);
shop.setLatitude(initLonLat[1]);
shopListSon.add(shop);
//DBObject doci = new BasicDBObject("shopId", "300"+i).append("shopName", "人生得意"+i).append("shopStatus",0).append("specificAddress","天堂"+i).append("gps", new Point(new Position(lon, lat)));
}
}
} else {
String errorMsg = jsonObject.getString("info");
LOGGER.error("地址(" + keyword + ")" + errorMsg);
}
}
}
return shopListSon;
}
//從高德地圖上取資料
// public static void main(String[] args) {
// List<Shop> listShop =new ArrayList<>();
// //東經118°21′-120°30′,北緯29°11′-30°33′。杭州位置
// for(double i=118.20;i<=120.31;i=i+0.1){
// for(double j=29.10;j<=30.33;j=j+0.1){
// List<Shop> listShopSon =new ArrayList<>();
// double lonHead=i;
// double latHead=j;
// double lonTail=i+0.1;
// double latTail=j+0.1;
// String LonLat=lonHead+","+latHead+";"+lonTail+","+latTail;
// listShopSon =initialData(LonLat,"便利店",listShopSon);
// for(int n=0;n<listShopSon.size();n++){
// System.out.println("店鋪地址:"+listShopSon.get(n).getSpecificAddress());
// }
// if(listShopSon.size()>0){
// listShop.addAll(listShopSon);
// }
// System.out.println("ListShop的大小:"+listShop.size());
// double d =Distance(lonHead,latHead,lonTail,latTail);
// System.out.println("兩點距離"+d);
//
// }
//
// }
//
// System.out.println("ListShop的大小:"+listShop.size());
// creatExcel(listShop);
// }
public static void main(String[] args) {
readFile("D:\\geode\\222.xls");
}
//寫入excel中
public static void creatExcel(List<Shop> shopList){
HSSFWorkbook workbook = new HSSFWorkbook();
//第二部,在workbook中建立一個sheet對應excel中的sheet
HSSFSheet sheet = workbook.createSheet("高德地圖資料");
//第三部,在sheet表中新增表頭第0行,老版本的poi對sheet的行列有限制
HSSFRow row = sheet.createRow(0);
//第四步,建立單元格,設定表頭
HSSFCell cell = row.createCell(0);
cell.setCellValue("店鋪id");
cell = row.createCell(1);
cell.setCellValue("店鋪名稱");
cell = row.createCell(2);
cell.setCellValue("店鋪地址");
cell = row.createCell(3);
cell.setCellValue("經度");
cell = row.createCell(4);
cell.setCellValue("緯度");
//第五步,寫入實體資料,實際應用中這些資料從資料庫得到,物件封裝資料,集合包物件。物件的屬性值對應表的每行的值
for (int i = 0; i < shopList.size(); i++) {
HSSFRow row1 = sheet.createRow(i + 1);
Shop shop = shopList.get(i);
//建立單元格設值
row1.createCell(0).setCellValue(shop.getId());
row1.createCell(1).setCellValue(shop.getShopName());
row1.createCell(2).setCellValue(shop.getSpecificAddress());
row1.createCell(3).setCellValue(shop.getLongitude());
row1.createCell(4).setCellValue(shop.getLatitude());
}
//將檔案儲存到指定的位置
try {
FileOutputStream fos = new FileOutputStream("D:\\geode\\高德便利店地圖資料.xls");
workbook.write(fos);
System.out.println("寫入成功");
fos.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public static double Distance(double long1, double lat1, double long2, double lat2) {
double a, b, R;
R =6371; // 地球半徑 6371km
lat1 = lat1 * Math.PI / 180.0;
lat2 = lat2 * Math.PI / 180.0;
a = lat1 - lat2;
b = (long1 - long2) * Math.PI / 180.0;
double d;
double sa2, sb2;
sa2 = Math.sin(a / 2.0);
sb2 = Math.sin(b / 2.0);
d = 2
* R
* Math.asin(Math.sqrt(sa2 * sa2 + Math.cos(lat1)
* Math.cos(lat2) * sb2 * sb2));
BigDecimal bigDecimal = new BigDecimal(d*1000);
Double din = bigDecimal.setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
return din ;
}
public static List<Shop> readFile(String filename){
List<Shop> shopList =new ArrayList<>();
Workbook wb=null;
Cell cell=null;
try {
File f=new File(filename);
InputStream in=new FileInputStream(f); //建立輸入流
wb= Workbook.getWorkbook(in); //獲取Excel檔案物件
jxl.Sheet s=wb.getSheet(0); //獲取檔案的指定工作表,預設為第一個
String value=null;
for(int i=1;i<s.getRows();i++){//表頭目錄不需要,從第一行開始
Shop shop =new Shop();
for(int j=0;j<s.getColumns();j++){
cell=s.getCell(j, i);
value=cell.getContents();
if(j==0){
shop.setId(value);
}else if(j==1){
shop.setShopName(value);
}else if(j==2){
shop.setSpecificAddress(value);
}else if(j==3){
shop.setLongitude(value);
}else if(j==4){
shop.setLatitude(value);
}
// System.out.println("value:"+value);
}
shopList.add(shop);
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (BiffException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
return shopList;
}
}
由於高德地圖對他的資料做了保護,我這邊採用的是矩形搜尋。 百度到杭州的經緯度劃分成多個小矩形,然後呼叫高德地圖的API服務。我這邊將爬取的資料寫入excel表格中,一是為了展示驗證資料是否準確,二是怕直接寫入會不會有記憶體洩漏問題。我上面的程式碼有寫入excel和讀取excel的程式碼。不過要注意一下 我用得jar包不同。寫入用的poi,讀取用的是jxl.
這邊呼叫http請求是客戶端,程式碼如下。是在網上找到的,首先謝謝分享的人。因為有段時間,原文地址忘記了。一開始用的是
URL myURL = null; URLConnection httpsConn = null; try { myURL = new然後在tomcat專案中呼叫時,報錯了,原因現在還沒有弄清楚,知道原因的告訴我一二。URL(url); } catch (MalformedURLException e) { e.printStackTrace(); } InputStreamReader insr = null; BufferedReader br = null; httpsConn = (URLConnection) myURL.openConnection();// 不使用代理
package com.pingogo.api.common; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.util.Iterator; import java.util.Map; import java.util.Set; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.HttpException; import org.apache.commons.httpclient.HttpStatus; import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * HTTP工具類 * * @author lixiangyang * */ public class HttpUtils { private static Log log = LogFactory.getLog(HttpUtils.class); /** * 定義編碼格式 UTF-8 */ public static final String URL_PARAM_DECODECHARSET_UTF8 = "UTF-8"; /** * 定義編碼格式 GBK */ public static final String URL_PARAM_DECODECHARSET_GBK = "GBK"; private static final String URL_PARAM_CONNECT_FLAG = "&"; private static final String EMPTY = ""; private static MultiThreadedHttpConnectionManager connectionManager = null; private static int connectionTimeOut = 25000; private static int socketTimeOut = 25000; private static int maxConnectionPerHost = 20; private static int maxTotalConnections = 20; private static HttpClient client; static{ connectionManager = new MultiThreadedHttpConnectionManager(); connectionManager.getParams().setConnectionTimeout(connectionTimeOut); connectionManager.getParams().setSoTimeout(socketTimeOut); connectionManager.getParams().setDefaultMaxConnectionsPerHost(maxConnectionPerHost); connectionManager.getParams().setMaxTotalConnections(maxTotalConnections); client = new HttpClient(connectionManager); } /** * POST方式提交資料 * @param url * 待請求的URL * @param params * 要提交的資料 * @param enc * 編碼 * @return * 響應結果 * @throws IOException * IO異常 */ public static String URLPost(String url, Map<String, String> params, String enc){ enc=URL_PARAM_DECODECHARSET_UTF8; String response = EMPTY; PostMethod postMethod = null; try { postMethod = new PostMethod(url); postMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc); //將表單的值放入postMethod中 Set<String> keySet = params.keySet(); for(String key : keySet){ String value = params.get(key); postMethod.addParameter(key, value); } //執行postMethod int statusCode = client.executeMethod(postMethod); if(statusCode == HttpStatus.SC_OK) { response = postMethod.getResponseBodyAsString(); }else{ log.error("響應狀態碼 = " + postMethod.getStatusCode()); } }catch(HttpException e){ log.error("發生致命的異常,可能是協議不對或者返回的內容有問題", e); e.printStackTrace(); }catch(IOException e){ log.error("發生網路異常", e); e.printStackTrace(); }finally{ if(postMethod != null){ postMethod.releaseConnection(); postMethod = null; } } return response; } /** * GET方式提交資料 * @param url * 待請求的URL * @param params * 要提交的資料 * @param enc * 編碼 * @return * 響應結果 * @throws IOException * IO異常 */ public static String URLGet(String url, Map<String, String> params, String enc){ String response = EMPTY; GetMethod getMethod = null; StringBuffer strtTotalURL = new StringBuffer(EMPTY); if(strtTotalURL.indexOf("?") == -1) { strtTotalURL.append(url).append("?").append(getUrl(params, enc)); } else { strtTotalURL.append(url).append("&").append(getUrl(params, enc)); } log.debug("GET請求URL = \n" + strtTotalURL.toString()); try { getMethod = new GetMethod(strtTotalURL.toString()); getMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc); //執行getMethod int statusCode = client.executeMethod(getMethod); if(statusCode == HttpStatus.SC_OK) { response = getMethod.getResponseBodyAsString(); }else{ log.debug("響應狀態碼 = " + getMethod.getStatusCode()); } }catch(HttpException e){ log.error("發生致命的異常,可能是協議不對或者返回的內容有問題", e); e.printStackTrace(); }catch(IOException e){ log.error("發生網路異常", e); e.printStackTrace(); }finally{ if(getMethod != null){ getMethod.releaseConnection(); getMethod = null; } } return response; } /** * 據Map生成URL字串 * @param map * Map * @param valueEnc * URL編碼 * @return * URL */ private static String getUrl(Map<String, String> map, String valueEnc) { if (null == map || map.keySet().size() == 0) { return (EMPTY); } StringBuffer url = new StringBuffer(); Set<String> keys = map.keySet(); for (Iterator<String> it = keys.iterator(); it.hasNext();) { String key = it.next(); if (map.containsKey(key)) { String val = map.get(key); String str = val != null ? val : EMPTY; try { str = URLEncoder.encode(str, valueEnc); } catch (UnsupportedEncodingException e) { e.printStackTrace(); } url.append(key).append("=").append(str).append(URL_PARAM_CONNECT_FLAG); } } String strURL = EMPTY; strURL = url.toString(); if (URL_PARAM_CONNECT_FLAG.equals(EMPTY + strURL.charAt(strURL.length() - 1))) { strURL = strURL.substring(0, strURL.length() - 1); } return (strURL); } }
maven裡的配置
<dependency> <groupId>commons-httpclient</groupId> <artifactId>commons-httpclient</artifactId> <version>3.1</version> </dependency
最後給大家看一下我爬取出來的資料
如果有什麼不對的地方,希望大家指點。
試一下付款二維碼