1. 程式人生 > >爬取flash數據

爬取flash數據

serialize 查看 引入 repo list() eid lse blazeds 房產

關於html爬取數據的文章已經有很多了,我今天主要和大家交流的是如何爬取flash網頁的數據。這方面資料相對比較少,主要是html5興起後現在flash站很少了,不過用於技術研究還是可以嘗試一下,這篇文章就主要介紹我爬取數據的整個過程。

以房產透明網為例,該網站的一房一價數據就是通過flash顯示,接下來將一步步介紹如何獲取對應的數據。

特別聲明,本文章僅做相關技術學習交流,數據版權為成都透明網,個人或企業請勿用於商業或非法用途,如該文章有不妥之處請聯系本人刪除。


我找了一個樓盤用瀏覽器自帶的工具查看,可以看到返回的數據是亂碼,如下圖。

技術分享圖片

這個主要是返回的數據格式是application/x-amf,瀏覽器無法正常解析,接下來就需要用的抓包工具Charles了,這個工具沒給錢的話30分鐘會關閉,我覺得30分鐘也夠用了,目前一直忍受著。

1.首先打開Charles

技術分享圖片

2.打開透明網一房一價頁面,點擊一個單元後就可以看到請求的數據了

技術分享圖片

技術分享圖片

這裏面比較重要的幾個部分我都截取了一下,最後HOUSEITEMLIST,就我們需要處理的數據了。

3.可以看到通過抓包工具已經可以看到請求的數據了,接下來就需要用java模擬amf的請求。

 <!-- https://mvnrepository.com/artifact/org.apache.flex.blazeds/flex-messaging-core -->
        <dependency>
            <groupId>org.apache.flex.blazeds</groupId>
            <artifactId>flex-messaging-core</artifactId>
            <version>4.7.2</version>
        </dependency>
        <!-- https://mvnrepository.com/artifact/org.apache.flex.blazeds/flex-messaging-common -->
        <dependency>
            <groupId>org.apache.flex.blazeds</groupId>
            <artifactId>flex-messaging-common</artifactId>
            <version>4.7.2</version>
        </dependency>

 先要引入這兩個包,這個請求代碼如下,部分參數我設置為******,如果需要測試自行粘貼對應的參數。

 public static void main(String[] args) {
        try {

            URL urlObject = new URL("http://cd.funi.com/messagebroker/amf");
            HttpURLConnection urlConnection = (HttpURLConnection) urlObject.openConnection();
            urlConnection.setDoOutput(true);
            urlConnection.setRequestProperty("Content-type", "application/x-amf;charset=gb2312");
            urlConnection.setRequestProperty("Host", "cd.funi.com");
            urlConnection.setRequestProperty("Origin", "http://user.funi.com");
            urlConnection.setRequestProperty("Referer", "http://user.funi.com/resource/swf/house/FundateClient_www.swf?communityId=DAZXiSEGhWZLhWIrVooMiDNjk4UzP3et1CztbkK1SZrXmBDQfGR%2BAFaCxnPg5MFf&t=20181131/[[DYNAMIC]]/1");
            urlConnection.setRequestProperty("Cookie", "pgv_pvi=9961606144; pgv_si=s9152640000; Hm_lvt_77be290eccb6ceb57b524a860b6faadc=1545658648,1545745229,1545917030,1546227366; Hm_lpvt_77be290eccb6ceb57b524a860b6faadc=1546227368");
            OutputStream outputStream = urlConnection.getOutputStream();

            SerializationContext serializationContext = new SerializationContext();
            ActionContext actionContext = new ActionContext();
            //構建請求信息(0-amf0 3-amf3)
            ActionMessage requestMessage = new ActionMessage();
            AmfTrace amfTrace = new AmfTrace();
            RemotingMessage remotingMessage = new RemotingMessage();
            remotingMessage.setOperation("***********************************");
            remotingMessage.setSource(null);
            remotingMessage.setClientId("FF66DFC9-B00D-2C39-E122-6B6752416543");
            remotingMessage.setDestination("dEEDOCService");
            remotingMessage.setMessageId("******************************");
            remotingMessage.setHeader("DSEndpoint", "my-amf");
            remotingMessage.setHeader("DSId", "*************************");
            remotingMessage.setTimeToLive(0);
            remotingMessage.setTimestamp(0);
            remotingMessage.setBody(new Object[]{"kezlmwCvdjGPckPbY1SmeL3frogB2sfc7IgjBssaFJ2ihf5M93DgMgf5mIqLiWgMNvNwBsVQKuDfTympu4bAjLV9/3mGEHK+MfNqVZKTY0xC3uGOkDg+i2Pt9oTDxBm1xU5Cvmjmd/9mXzN/v3UOvSoqKlLNYy42g8uGAq+JFczhHpdRi7LBtP56E8OJaGq4VksJJnPhGLtMLt1T3wZZKzcV4MqJ2U7NTg7q5AmyCC89nvetx/5Gop8mUBe0tHQdSop8mhHerHn+n7y5O1BL3sRS8T3e1B9F2txtWzcNX0NBzDgAMpfa3AJAhaZ7yuhwd5VtLYD+KquXCUmxJAd/YSjjZGAYYomWjZqRMfO5x5cP/SH8AeI4BiKbTQ+2UygOvYCiTAzy+8GNG0oKpTDCnP2/j2CFhISaMutwAFTF7CZw6HCzJq+2iA8sVnNmCePQMieuZOyq7LG0PppzHRkQYGpUzGynN4FJ8Dz7TBXmuKu7bWJ7jlrYdHbsexEGhoI2fEh/hivzSuCaBfWojChwMQOrtiYKG/YYEgtxNmEUYVdDH5XUiFHVH0V3W+O16fluHZUoaJdvZ+Fbm9oJIB2cz1X9hQSOcs3Cc7i95hhJ0SdQGa1yMw7c2vJSWzbTKuc6rnFm8IDmR6qm6sEIUHRokN56IsDqS+ZHaXWNoOG4q0xR97tFCPlrURWxLcJX3tIJ4xl/imVVlifcAZX4/gXkykAGpM7tdGOy0J/hegAZqCY="});

            MessageBody amfMessage = new MessageBody(null, "/3", new Object[]{remotingMessage});
            requestMessage.addBody(amfMessage);
            // Setup for AMF message serializer
            actionContext.setRequestMessage(requestMessage);
            ByteArrayOutputStream outBuffer = new ByteArrayOutputStream();
            AmfMessageSerializer amfMessageSerializer = new AmfMessageSerializer();
            amfMessageSerializer.initialize(serializationContext, outBuffer, amfTrace);
            amfMessageSerializer.writeMessage(requestMessage);
            outBuffer.writeTo(outputStream);
            outBuffer.flush();
            outBuffer.close();

            InputStream inputStream = urlConnection.getInputStream();
            BufferedInputStream urlConnectionInputStream = new BufferedInputStream(inputStream);
            serializationContext = new SerializationContext();
            actionContext = new ActionContext();
            ActionMessage message = new ActionMessage();
            actionContext.setRequestMessage(message);
            ClassAliasRegistry.getRegistry().registerAlias("DSK", "com.funi.frontend.dto.HouseTable");//需在項目中設置對應的類包名需一致
            MessageDeserializer deserializer = new AmfMessageDeserializer();
            deserializer.initialize(serializationContext, urlConnectionInputStream, amfTrace);
            deserializer.readMessage(message, actionContext);
            Object result = null;
            for (MessageBody msg : (ArrayList<MessageBody>) message.getBodies()) {
                java.lang.String targetURI = msg.getTargetURI();
                if (targetURI.endsWith(MessageIOConstants.RESULT_METHOD)) {
                    result = msg.getData();
                    AcknowledgeMessage acknowledgeMessage=(AcknowledgeMessage)result;
                    Object body = acknowledgeMessage.getBody();
                    ASObject asObject=(ASObject)body;
                    ArrayCollection houseitemlist =(ArrayCollection) asObject.get("HOUSEITEMLIST");
                    for (Object o : houseitemlist) {
                        HouseTable houseTable=(HouseTable)o;
                        System.out.println(DecodeUtils.decode(houseTable.getUnitNo()));
                        System.out.println(DecodeUtils.decode(houseTable.getUsage()));
                        System.out.println(DecodeUtils.decode(houseTable.getTotalArea()));
                    }
                } else if (targetURI.endsWith(MessageIOConstants.STATUS_METHOD)) {
                    java.lang.String exMessage = "Server error";
                    result = exMessage;
                }
            }
        } catch (Exception e) {
            System.out.print("error");
        }
    }

  

package com.funi.frontend.dto;


public class HouseTable {
    private Boolean isMortgage;
    private String status;
    private String roomNo;
    private String listWaterPrice;
    private String typeHouse;
    private String huxId;
    private String buildingNo;
    private String fitmentPrice;
    private String floorNo;
    private String listPrice;
    private Boolean isSealUp;
    private String usage;
    private String totalArea;
    private Object houseTableList;
    private Object phase;
    private String unitNo;
    private String buildingId;
    private String communityId;

    public Boolean getMortgage() {
        return isMortgage;
    }

    public void setMortgage(Boolean mortgage) {
        isMortgage = mortgage;
    }

    public String getStatus() {
        return status;
    }

    public void setStatus(String status) {
        this.status = status;
    }

    public String getRoomNo() {
        return roomNo;
    }

    public void setRoomNo(String roomNo) {
        this.roomNo = roomNo;
    }

    public String getListWaterPrice() {
        return listWaterPrice;
    }

    public void setListWaterPrice(String listWaterPrice) {
        this.listWaterPrice = listWaterPrice;
    }

    public String getTypeHouse() {
        return typeHouse;
    }

    public void setTypeHouse(String typeHouse) {
        this.typeHouse = typeHouse;
    }

    public String getHuxId() {
        return huxId;
    }

    public void setHuxId(String huxId) {
        this.huxId = huxId;
    }

    public String getBuildingNo() {
        return buildingNo;
    }

    public void setBuildingNo(String buildingNo) {
        this.buildingNo = buildingNo;
    }

    public String getFitmentPrice() {
        return fitmentPrice;
    }

    public void setFitmentPrice(String fitmentPrice) {
        this.fitmentPrice = fitmentPrice;
    }

    public String getFloorNo() {
        return floorNo;
    }

    public void setFloorNo(String floorNo) {
        this.floorNo = floorNo;
    }

    public String getListPrice() {
        return listPrice;
    }

    public void setListPrice(String listPrice) {
        this.listPrice = listPrice;
    }

    public Boolean getSealUp() {
        return isSealUp;
    }

    public void setSealUp(Boolean sealUp) {
        isSealUp = sealUp;
    }

    public String getUsage() {
        return usage;
    }

    public void setUsage(String usage) {
        this.usage = usage;
    }

    public String getTotalArea() {
        return totalArea;
    }

    public void setTotalArea(String totalArea) {
        this.totalArea = totalArea;
    }

    public Object getHouseTableList() {
        return houseTableList;
    }

    public void setHouseTableList(Object houseTableList) {
        this.houseTableList = houseTableList;
    }

    public Object getPhase() {
        return phase;
    }

    public void setPhase(Object phase) {
        this.phase = phase;
    }

    public String getUnitNo() {
        return unitNo;
    }

    public void setUnitNo(String unitNo) {
        this.unitNo = unitNo;
    }

    public String getBuildingId() {
        return buildingId;
    }

    public void setBuildingId(String buildingId) {
        this.buildingId = buildingId;
    }

    public String getCommunityId() {
        return communityId;
    }

    public void setCommunityId(String communityId) {
        this.communityId = communityId;
    }
}

最後獲取到對應數據後用base64解密一下即可。

特別聲明,本文章僅做相關技術學習交流,數據版權為成都透明網,個人或企業請勿用於商業或非法用途,如該文章有不妥之處請聯系本人刪除。

喜歡java開發的可以加我qq3369245209,後面會建立一個java開發高級群,下期將介紹如何爬取app數據。

爬取flash數據