淘寶客高佣金商品採集爬蟲開發教程
阿新 • • 發佈:2019-02-19
淘寶客是一種按成交計費的推廣模式,淘寶客只要從淘寶客推廣專區獲取商品程式碼,任何買家(包括你自己)經過你的推廣(連結、個人網站,部落格或者社群發的帖子)進入淘寶賣家店鋪完成購買後,就可得到由賣家支付的佣金。所以,高佣金的商品對於淘寶客使用者來說是非常有用的,在各個渠道推廣這些商品都可以獲得不錯的收益。
本文分享下用JavaScript採集淘寶客高佣金商品資訊的原始碼,並做簡單的解析。先看原始碼:
/**
淘寶客高佣金商品採集爬蟲原始碼
爬蟲最多可採集100頁淘寶客高佣金商品資訊
**/
var configs = {
domains: ["pub.alimama.com" ],
scanUrls: ["http://pub.alimama.com/promo/item/channel/index.htm?channel=qqhd"],
contentUrlRegexes: [
/http[\w:\/]+pub\.alimama\.com\/items\/channel\/qqhd\.json\?.*/
],
helperUrlRegexes: [""],
autoFindUrls: false,
fields: [
{
name: "infos",
selectorType: SelectorType.JsonPath,
selector: "$.data.pageList" ,
repeated: true,
children: [
{
name: "id",
alias: "商品ID",
selectorType: SelectorType.JsonPath,
selector: "$.auctionId",
required: true,
primaryKey: true
},
{
name: "name",
alias: "商品名稱",
selectorType: SelectorType.JsonPath,
selector: "$.title"
},
{
name: "image",
alias: "商品圖片",
selectorType: SelectorType.JsonPath,
selector: "$.pictUrl"
},
{
name: "price",
alias: "商品價格",
selectorType: SelectorType.JsonPath,
selector: "$.zkPrice"
},
{
name: "monthly_sales",
alias: "月銷量",
selectorType: SelectorType.JsonPath,
selector: "$.biz30day"
},
{
name: "commision",
alias: "商品佣金",
selectorType: SelectorType.JsonPath,
selector: "$.tkCommFee"
},
{
name: "commision_rate",
alias: "佣金比率",
selectorType: SelectorType.JsonPath,
selector: "$.eventRate"
},
{
name: "day_left",
alias: "剩餘活動天數",
selectorType: SelectorType.JsonPath,
selector: "$.dayLeft"
},
{
name: "url",
alias: "商品連結",
selectorType: SelectorType.JsonPath,
selector: "$.auctionUrl"
},
{
name: "shop_name",
alias: "店鋪名稱",
selectorType: SelectorType.JsonPath,
selector: "$.shopTitle"
}
]
}
]
};
configs.afterDownloadPage = function (page, site) {
msleep(10000);
return page;
};
// 獲取淘寶客高佣金商品列表頁url
configs.onProcessScanPage = function(page, content, site) {
for (var i = 1; i <= 100; i++) {
var url = "http://pub.alimama.com/items/channel/qqhd.json?channel=qqhd&toPage=" + i + "&perPageSize=50";
site.addUrl(url);
}
return false;
};
// 處理不同欄位的獲取到的值
configs.afterExtractField = function(fieldName, data, page, site) {
if (!data) {
return data;
}
if (fieldName == "infos.image") {
return "https:" + data;
}
else if (fieldName == "infos.commision_rate") {
return data + "%";
}
return data;
};
var crawler = new Crawler(configs);
crawler.start();
使用Chrome瀏覽器分析淘寶客網頁,重新整理頁面後,在瀏覽器“開發者工具”中選擇“XHR”,搜尋“qqhd.json”,便可找到存放商品資訊的url,如下圖所示:
總結:該爬蟲的開發並無特別難的地方,只需找到列表頁資料就可以了,然後簡單組織程式碼後,便可輕鬆開發爬蟲了。可直接將程式碼賦值到網際網路上的大資料平臺上(比如,神箭手大資料平臺等),直接執行即可。