1. 程式人生 > >java中從高德地圖爬取資料

java中從高德地圖爬取資料

    最近一個人負責公司的一個app專案開發,需要從高德地圖爬取杭州市全部的超市資訊,放入mongodb的資料庫中。做地理位置查詢。(mongodb這部分有時間補上)

   首先去高德地圖建立一個開發者賬號,獲取一個開發web服務的高德key.這個是必須要有的,可以用我這個從百度到的key試一下。

 廢話不說了直接上程式碼

package com.pingogo.visit.service;

import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.pingogo.api.common.HttpUtils;
import com.pingogo.visit.domain.Shop;
import jxl.Cell;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
//import org.apache.poi.ss.usermodel.Workbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Created by cw on 2017/8/29.
 *
 */
public class AddressLngLatExchange {
    private static final String KEY = "389880a06e3f893ea46036f030c94700";
    private static final String OUTPUT = "JSON";
    private static final String GET_LNG_LAT_URL = "http://restapi.amap.com/v3/geocode/geo";

    private static final String GET_LNG_PIO_URL = "http://restapi.amap.com/v3/place/polygon";

    private static final Logger LOGGER = LoggerFactory.getLogger(AddressLngLatExchange.class);


    //獲取指定地點經緯度
    public static String[] getLngLatFromOneAddr(String address){
        if(StringUtils.isBlank(address)) {
            LOGGER.error("地址(" + address + ")為null或者空");
            return null;
        }
        Map<String, String> params = new HashMap<String, String>();
        params.put("address", address);
        params.put("output", OUTPUT);
        params.put("key", KEY);
        String result = HttpUtils.URLPost(GET_LNG_LAT_URL,params,"");
        JSONObject jsonObject = JSONObject.parseObject(result);
        String[] lngLatArr = new String[2];
        //拿到返回報文的status值,高德的該介面返回值有兩個:0-請求失敗,1-請求成功;
        int status = Integer.valueOf(jsonObject.getString("status"));
        if(status == 1) {
            JSONArray jsonArray = jsonObject.getJSONArray("geocodes");
            for(int i = 0; i < jsonArray.size(); i++) {
                JSONObject json = jsonArray.getJSONObject(i);
                String lngLat = json.getString("location");
                 lngLatArr = lngLat.split(",");
            }
        } else {
            String errorMsg = jsonObject.getString("info");
            LOGGER.error("地址(" + address + ")" + errorMsg);
        }
        return lngLatArr;
    }

    public static List<Shop> initialData(String lonLat, String keyword, List<Shop> shopListSon){
        if(StringUtils.isBlank(keyword)) {
            LOGGER.error("地址(" + keyword + ")為null或者空");
        }
        Map<String, String> params = new HashMap<String, String>();
        try {
            Thread.sleep(5000);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        params.put("polygon", lonLat);//"118.21,29.11;120.30,30.33"
        params.put("output", OUTPUT);
        params.put("keywords", keyword);
        params.put("offset", "20");
        params.put("page", "1");
        params.put("key", KEY);
        String result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
        JSONObject jsonObject = JSONObject.parseObject(result);
        int statusOne = Integer.valueOf(jsonObject.getString("status"));
        //第一次獲取資料時做的判斷
        if(statusOne==1){
            int count=Integer.valueOf(jsonObject.getString("count"));
            int pageNumber=count/20;
            int remainder=count%20;
            if(remainder>0)pageNumber=pageNumber+1;
            for(int i=1;i<=pageNumber;i++){
                params.put("page", String.valueOf(i));
                result = HttpUtils.URLGet(GET_LNG_PIO_URL,params,"UTF-8");
                JSONObject jsonObject2 = JSONObject.parseObject(result);
                System.out.println("+++++++++"+result);
                //拿到返回報文的status值,高德的該介面返回值有兩個:0-請求失敗,1-請求成功;
                int status = Integer.valueOf(jsonObject2.getString("status"));
                if(status == 1) {
                    JSONArray jsonArray = jsonObject2.getJSONArray("pois");
                    if(jsonArray.size()>0){
                        for(int j =0;j<jsonArray.size();j++){
                            Shop shop =new Shop();
                            JSONObject jsonObject1 =jsonArray.getJSONObject(j);
                            shop.setShopName(jsonObject1.getString("name"));
                            shop.setSpecificAddress(jsonObject1.getString("address"));
                            shop.setId(jsonObject1.getString("id"));
                            String [] initLonLat =jsonObject1.getString("location").split(",");
                            shop.setLongitude(initLonLat[0]);
                            shop.setLatitude(initLonLat[1]);
                            shopListSon.add(shop);
                            //DBObject  doci = new BasicDBObject("shopId", "300"+i).append("shopName", "人生得意"+i).append("shopStatus",0).append("specificAddress","天堂"+i).append("gps", new Point(new Position(lon, lat)));

                        }
                    }

                } else {
                        String errorMsg = jsonObject.getString("info");
                        LOGGER.error("地址(" + keyword + ")" + errorMsg);
                }
            }

        }
        return shopListSon;

    }

    //從高德地圖上取資料
//    public static void main(String[] args) {
//        List<Shop> listShop =new ArrayList<>();
//        //東經118°21′-120°30′,北緯29°11′-30°33′。杭州位置
//        for(double i=118.20;i<=120.31;i=i+0.1){
//            for(double j=29.10;j<=30.33;j=j+0.1){
//                List<Shop> listShopSon =new ArrayList<>();
//                double lonHead=i;
//                double latHead=j;
//                double lonTail=i+0.1;
//                double latTail=j+0.1;
//                String LonLat=lonHead+","+latHead+";"+lonTail+","+latTail;
//                listShopSon =initialData(LonLat,"便利店",listShopSon);
//                for(int n=0;n<listShopSon.size();n++){
//                    System.out.println("店鋪地址:"+listShopSon.get(n).getSpecificAddress());
//                }
//                if(listShopSon.size()>0){
//                    listShop.addAll(listShopSon);
//                }
//                System.out.println("ListShop的大小:"+listShop.size());
//                double d =Distance(lonHead,latHead,lonTail,latTail);
//                System.out.println("兩點距離"+d);
//
//            }
//
//        }
//
//        System.out.println("ListShop的大小:"+listShop.size());
//        creatExcel(listShop);
//    }

    public static void main(String[] args) {
        readFile("D:\\geode\\222.xls");
    }


    //寫入excel中
    public static void creatExcel(List<Shop> shopList){
        HSSFWorkbook workbook = new HSSFWorkbook();
        //第二部,在workbook中建立一個sheet對應excel中的sheet
        HSSFSheet sheet = workbook.createSheet("高德地圖資料");
        //第三部,在sheet表中新增表頭第0行,老版本的poi對sheet的行列有限制
        HSSFRow row = sheet.createRow(0);
        //第四步,建立單元格,設定表頭
        HSSFCell cell = row.createCell(0);
        cell.setCellValue("店鋪id");
        cell = row.createCell(1);
        cell.setCellValue("店鋪名稱");
        cell = row.createCell(2);
        cell.setCellValue("店鋪地址");
        cell = row.createCell(3);
        cell.setCellValue("經度");
        cell = row.createCell(4);
        cell.setCellValue("緯度");

        //第五步,寫入實體資料,實際應用中這些資料從資料庫得到,物件封裝資料,集合包物件。物件的屬性值對應表的每行的值
        for (int i = 0; i < shopList.size(); i++) {
            HSSFRow row1 = sheet.createRow(i + 1);
            Shop shop = shopList.get(i);
            //建立單元格設值
            row1.createCell(0).setCellValue(shop.getId());
            row1.createCell(1).setCellValue(shop.getShopName());
            row1.createCell(2).setCellValue(shop.getSpecificAddress());
            row1.createCell(3).setCellValue(shop.getLongitude());
            row1.createCell(4).setCellValue(shop.getLatitude());
        }

        //將檔案儲存到指定的位置
        try {
            FileOutputStream fos = new FileOutputStream("D:\\geode\\高德便利店地圖資料.xls");
            workbook.write(fos);
            System.out.println("寫入成功");
            fos.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static double Distance(double long1, double lat1, double long2, double lat2) {
        double a, b, R;
        R =6371; // 地球半徑 6371km
        lat1 = lat1 * Math.PI / 180.0;
        lat2 = lat2 * Math.PI / 180.0;
        a = lat1 - lat2;
        b = (long1 - long2) * Math.PI / 180.0;
        double d;
        double sa2, sb2;
        sa2 = Math.sin(a / 2.0);
        sb2 = Math.sin(b / 2.0);
        d = 2
                * R
                * Math.asin(Math.sqrt(sa2 * sa2 + Math.cos(lat1)
                * Math.cos(lat2) * sb2 * sb2));
        BigDecimal bigDecimal = new BigDecimal(d*1000);
        Double din = bigDecimal.setScale(2, BigDecimal.ROUND_HALF_UP).doubleValue();
        return din ;
    }

    public static List<Shop> readFile(String filename){
        List<Shop> shopList =new ArrayList<>();
        Workbook wb=null;
        Cell cell=null;
        try {
            File f=new File(filename);
            InputStream in=new FileInputStream(f);             //建立輸入流
            wb= Workbook.getWorkbook(in);               //獲取Excel檔案物件
            jxl.Sheet s=wb.getSheet(0);                        //獲取檔案的指定工作表,預設為第一個
            String value=null;
            for(int i=1;i<s.getRows();i++){//表頭目錄不需要,從第一行開始
                Shop shop =new Shop();
                for(int j=0;j<s.getColumns();j++){
                    cell=s.getCell(j, i);
                    value=cell.getContents();
                    if(j==0){
                        shop.setId(value);
                    }else if(j==1){
                        shop.setShopName(value);
                    }else if(j==2){
                        shop.setSpecificAddress(value);
                    }else if(j==3){
                    shop.setLongitude(value);
                    }else if(j==4){
                    shop.setLatitude(value);
                    }
                  //  System.out.println("value:"+value);
                }
                shopList.add(shop);
            }

        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (BiffException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return shopList;
    }
}

由於高德地圖對他的資料做了保護,我這邊採用的是矩形搜尋。 百度到杭州的經緯度劃分成多個小矩形,然後呼叫高德地圖的API服務。我這邊將爬取的資料寫入excel表格中,一是為了展示驗證資料是否準確,二是怕直接寫入會不會有記憶體洩漏問題。我上面的程式碼有寫入excel和讀取excel的程式碼。不過要注意一下 我用得jar包不同。寫入用的poi,讀取用的是jxl.

這邊呼叫http請求是客戶端,程式碼如下。是在網上找到的,首先謝謝分享的人。因為有段時間,原文地址忘記了。一開始用的是

URL myURL = null;
URLConnection httpsConn = null;
try {
    myURL = new 
URL(url); } catch (MalformedURLException e) { e.printStackTrace(); } InputStreamReader insr = null; BufferedReader br = null; httpsConn = (URLConnection) myURL.openConnection();// 不使用代理
然後在tomcat專案中呼叫時,報錯了,原因現在還沒有弄清楚,知道原因的告訴我一二。
package com.pingogo.api.common;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.MultiThreadedHttpConnectionManager;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.methods.PostMethod;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

/**
 * HTTP工具類
 *
 * @author lixiangyang
 *
 */
public class HttpUtils {

    private static Log log = LogFactory.getLog(HttpUtils.class);

    /**
     * 定義編碼格式 UTF-8
     */
    public static final String URL_PARAM_DECODECHARSET_UTF8 = "UTF-8";

    /**
     * 定義編碼格式 GBK
     */
    public static final String URL_PARAM_DECODECHARSET_GBK = "GBK";

    private static final String URL_PARAM_CONNECT_FLAG = "&";

    private static final String EMPTY = "";

    private static MultiThreadedHttpConnectionManager connectionManager = null;

    private static int connectionTimeOut = 25000;

    private static int socketTimeOut = 25000;

    private static int maxConnectionPerHost = 20;

    private static int maxTotalConnections = 20;

    private static HttpClient client;

    static{
        connectionManager = new MultiThreadedHttpConnectionManager();
        connectionManager.getParams().setConnectionTimeout(connectionTimeOut);
        connectionManager.getParams().setSoTimeout(socketTimeOut);
        connectionManager.getParams().setDefaultMaxConnectionsPerHost(maxConnectionPerHost);
        connectionManager.getParams().setMaxTotalConnections(maxTotalConnections);
        client = new HttpClient(connectionManager);
    }

    /**
     * POST方式提交資料
     * @param url
     *          待請求的URL
     * @param params
     *          要提交的資料
     * @param enc
     *          編碼
     * @return
     *          響應結果
     * @throws IOException
     *          IO異常
     */
    public static String URLPost(String url, Map<String, String> params, String enc){
        enc=URL_PARAM_DECODECHARSET_UTF8;
        String response = EMPTY;
        PostMethod postMethod = null;
        try {
            postMethod = new PostMethod(url);
            postMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc);
            //將表單的值放入postMethod中
            Set<String> keySet = params.keySet();
            for(String key : keySet){
                String value = params.get(key);
                postMethod.addParameter(key, value);
            }
            //執行postMethod
            int statusCode = client.executeMethod(postMethod);
            if(statusCode == HttpStatus.SC_OK) {
                response = postMethod.getResponseBodyAsString();
            }else{
                log.error("響應狀態碼 = " + postMethod.getStatusCode());
            }
        }catch(HttpException e){
            log.error("發生致命的異常,可能是協議不對或者返回的內容有問題", e);
            e.printStackTrace();
        }catch(IOException e){
            log.error("發生網路異常", e);
            e.printStackTrace();
        }finally{
            if(postMethod != null){
                postMethod.releaseConnection();
                postMethod = null;
            }
        }

        return response;
    }

    /**
     * GET方式提交資料
     * @param url
     *          待請求的URL
     * @param params
     *          要提交的資料
     * @param enc
     *          編碼
     * @return
     *          響應結果
     * @throws IOException
     *          IO異常
     */
    public static String URLGet(String url, Map<String, String> params, String enc){

        String response = EMPTY;
        GetMethod getMethod = null;
        StringBuffer strtTotalURL = new StringBuffer(EMPTY);

        if(strtTotalURL.indexOf("?") == -1) {
            strtTotalURL.append(url).append("?").append(getUrl(params, enc));
        } else {
            strtTotalURL.append(url).append("&").append(getUrl(params, enc));
        }
        log.debug("GET請求URL = \n" + strtTotalURL.toString());

        try {
            getMethod = new GetMethod(strtTotalURL.toString());
            getMethod.setRequestHeader("Content-Type", "application/x-www-form-urlencoded;charset=" + enc);
            //執行getMethod
            int statusCode = client.executeMethod(getMethod);
            if(statusCode == HttpStatus.SC_OK) {
                response = getMethod.getResponseBodyAsString();
            }else{
                log.debug("響應狀態碼 = " + getMethod.getStatusCode());
            }
        }catch(HttpException e){
            log.error("發生致命的異常,可能是協議不對或者返回的內容有問題", e);
            e.printStackTrace();
        }catch(IOException e){
            log.error("發生網路異常", e);
            e.printStackTrace();
        }finally{
            if(getMethod != null){
                getMethod.releaseConnection();
                getMethod = null;
            }
        }

        return response;
    }

    /**
     * 據Map生成URL字串
     * @param map
     *          Map
     * @param valueEnc
     *          URL編碼
     * @return
     *          URL
     */
    private static String getUrl(Map<String, String> map, String valueEnc) {

        if (null == map || map.keySet().size() == 0) {
            return (EMPTY);
        }
        StringBuffer url = new StringBuffer();
        Set<String> keys = map.keySet();
        for (Iterator<String> it = keys.iterator(); it.hasNext();) {
            String key = it.next();
            if (map.containsKey(key)) {
                String val = map.get(key);
                String str = val != null ? val : EMPTY;
                try {
                    str = URLEncoder.encode(str, valueEnc);
                } catch (UnsupportedEncodingException e) {
                    e.printStackTrace();
                }
                url.append(key).append("=").append(str).append(URL_PARAM_CONNECT_FLAG);
            }
        }
        String strURL = EMPTY;
        strURL = url.toString();
        if (URL_PARAM_CONNECT_FLAG.equals(EMPTY + strURL.charAt(strURL.length() - 1))) {
            strURL = strURL.substring(0, strURL.length() - 1);
        }

        return (strURL);
    }
}

maven裡的配置

<dependency>
    <groupId>commons-httpclient</groupId>
    <artifactId>commons-httpclient</artifactId>
    <version>3.1</version>
</dependency

最後給大家看一下我爬取出來的資料


如果有什麼不對的地方,希望大家指點。

試一下付款二維碼