1. 程式人生 > >淘寶客高佣金商品採集爬蟲開發教程

淘寶客高佣金商品採集爬蟲開發教程

淘寶客是一種按成交計費的推廣模式,淘寶客只要從淘寶客推廣專區獲取商品程式碼,任何買家(包括你自己)經過你的推廣(連結、個人網站,部落格或者社群發的帖子)進入淘寶賣家店鋪完成購買後,就可得到由賣家支付的佣金。所以,高佣金的商品對於淘寶客使用者來說是非常有用的,在各個渠道推廣這些商品都可以獲得不錯的收益。

本文分享下用JavaScript採集淘寶客高佣金商品資訊的原始碼,並做簡單的解析。先看原始碼:

/**
    淘寶客高佣金商品採集爬蟲原始碼
    爬蟲最多可採集100頁淘寶客高佣金商品資訊
**/

var configs = {
    domains: ["pub.alimama.com"
], scanUrls: ["http://pub.alimama.com/promo/item/channel/index.htm?channel=qqhd"], contentUrlRegexes: [ /http[\w:\/]+pub\.alimama\.com\/items\/channel\/qqhd\.json\?.*/ ], helperUrlRegexes: [""], autoFindUrls: false, fields: [ { name: "infos", selectorType: SelectorType.JsonPath, selector: "$.data.pageList"
, repeated: true, children: [ { name: "id", alias: "商品ID", selectorType: SelectorType.JsonPath, selector: "$.auctionId", required: true, primaryKey: true
}, { name: "name", alias: "商品名稱", selectorType: SelectorType.JsonPath, selector: "$.title" }, { name: "image", alias: "商品圖片", selectorType: SelectorType.JsonPath, selector: "$.pictUrl" }, { name: "price", alias: "商品價格", selectorType: SelectorType.JsonPath, selector: "$.zkPrice" }, { name: "monthly_sales", alias: "月銷量", selectorType: SelectorType.JsonPath, selector: "$.biz30day" }, { name: "commision", alias: "商品佣金", selectorType: SelectorType.JsonPath, selector: "$.tkCommFee" }, { name: "commision_rate", alias: "佣金比率", selectorType: SelectorType.JsonPath, selector: "$.eventRate" }, { name: "day_left", alias: "剩餘活動天數", selectorType: SelectorType.JsonPath, selector: "$.dayLeft" }, { name: "url", alias: "商品連結", selectorType: SelectorType.JsonPath, selector: "$.auctionUrl" }, { name: "shop_name", alias: "店鋪名稱", selectorType: SelectorType.JsonPath, selector: "$.shopTitle" } ] } ] }; configs.afterDownloadPage = function (page, site) { msleep(10000); return page; }; // 獲取淘寶客高佣金商品列表頁url configs.onProcessScanPage = function(page, content, site) { for (var i = 1; i <= 100; i++) { var url = "http://pub.alimama.com/items/channel/qqhd.json?channel=qqhd&toPage=" + i + "&perPageSize=50"; site.addUrl(url); } return false; }; // 處理不同欄位的獲取到的值 configs.afterExtractField = function(fieldName, data, page, site) { if (!data) { return data; } if (fieldName == "infos.image") { return "https:" + data; } else if (fieldName == "infos.commision_rate") { return data + "%"; } return data; }; var crawler = new Crawler(configs); crawler.start();

使用Chrome瀏覽器分析淘寶客網頁,重新整理頁面後,在瀏覽器“開發者工具”中選擇“XHR”,搜尋“qqhd.json”,便可找到存放商品資訊的url,如下圖所示:

這裡寫圖片描述

總結:該爬蟲的開發並無特別難的地方,只需找到列表頁資料就可以了,然後簡單組織程式碼後,便可輕鬆開發爬蟲了。可直接將程式碼賦值到網際網路上的大資料平臺上(比如,神箭手大資料平臺等),直接執行即可。