1. 程式人生 > >node爬蟲HTTP請求中的form data和request payload的區別

node爬蟲HTTP請求中的form data和request payload的區別

程式碼片段:

let request = require('request');
let cheerio = require('cheerio');
let async=require('async');
let querystring=require('querystring');
let login_url='https://auth2.cityads.com/login/';
let offers_url='https://cityads.com/stat/analytics/offers';
let create_job="https://cityads.com/stat/ds/create_job";
let
get_job_status="https://cityads.com/stat/ds/get_job_status"; let fs=require('fs'); let path=require('path'); let xlsx = require('node-xlsx'); let jobHash=''; let file_path=''; let headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
}; function start() { async.waterfall([ function (cb) { //當前方法可以省略 let _headers=headers; _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1"
; let param={ url:offers_url, method:"GET", headers:_headers }; request(param,function (err,res,body) { // console.log(body); cb() }); },function (cb) { let fiter={ "mainGroup":"203", "subGroup":"", "period":"11.09.2017 - 11.09.2017", "dateType":"event_time", "sort":"", "sdir":"", "cols":"is_sale,order_key,subaccount,click_time,lead_time,sale_time,lead_delta,sale_delta,total_delta,action_name,campaign_target_id,status,customer_type,payment_method,basket_count,original_total,currency,order_total,wm_currency,percent,commission_open_adv,commission_rejected_adv,commission_adv", "reportPageId":11020001, "lang":"cn", "keyword":"", "complexFilter":[], "skin":"", "useSkin":"0", "jd":"", "userCurrency":"rub", "format":"xls" }; let _headers=headers; _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1"; _headers["origin"]="https://cityads.com"; _headers["referer"]="https://cityads.com/stat/conversions"; _headers["x-json"]=1; _headers["x-requested-with"]="XMLHttpRequest"; _headers["Content-Type"]="text/plain;charset=UTF-8"; let param={ url:create_job, method:"POST", headers:_headers, form:JSON.stringify(fiter) }; request(param,function (err,res,body) { jobHash=JSON.parse(body)["jobHash"]; cb(err,jobHash); }); },function (jobHash,callback) { let flag=true; async.whilst( function () { return flag; }, function (cb) { let _headers=headers; _headers["origin"]="https://cityads.com"; _headers["referer"]="https://cityads.com/stat/conversions"; _headers["x-json"]=1; _headers["x-requested-with"]="XMLHttpRequest"; let param={ url:get_job_status, method:"POST", form:{ json:1, jobHash:jobHash }, headers:_headers }; request(param,function (err,res,body) { let result=JSON.parse(body); if (result["status"]==="success") { flag=false; return cb(err,result.uri) } cb(); }) }, function (err,uri) { if (err) console.log(err); console.log("uri=",uri); callback(err,uri); }); },function (uri,cb) { let download_url="https://cityads.com"+uri; let _headers=headers; _headers["accept"]="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"; _headers["referer"]="https://cityads.com/stat/conversions"; _headers["accept-encoding"]="gzip, deflate, br"; _headers["accept-language"]="zh-CN,zh;q=0.8"; _headers["upgrade-insecure-requests"]=1; // _headers["Content-Type"]="application/octet-stream"; _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1"; let param={ url:download_url, method:"GET", headers:_headers }; let _file_path=uri.split("/")[3]; file_path=_file_path; request(param).pipe(fs.createWriteStream(_file_path)); cb() }, function (cb) { setTimeout(read_file,5000); cb() } ],function (err) { if (err) console.log(err); }); } function read_file() { let _path=path.join(__dirname,file_path); let obj = xlsx.parse(_path); let excelArray = obj[0].data; path=''; console.log(excelArray); } start();