1. 程式人生 > >抓取夢幻西遊藏寶閣資料,php過驗證

抓取夢幻西遊藏寶閣資料,php過驗證

  public function login_cbg(){
$cookieVerify = dirname(__FILE__)."/cookie.cookie";
$cookieSuccess = dirname(__FILE__)."/cookie_2.cookie";
if(!$_POST){
// 獲取cookie並儲存
$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL, "http://xyq.cbg.163.com/cgi-bin/login.py?next_url=%2Fcgi-bin%2Fequipquery.py%3Fact%3Dbuy_show_by_ordersn%26ordersn%3D22_1458634981_24342109%26server_id%3D9&server_id=9&act=show_anon_auth_page");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieVerify);
$rs = curl_exec($ch);
curl_close($ch); 
 
// 帶上cookie抓取驗證碼,必須帶上cookie,否則驗證碼不對應
$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL, "http://xyq.cbg.163.com/cgi-bin/create_validate_image.py?");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify);
curl_setopt($ch, CURLOPT_COOKIEJAR,$cookieVerify);
$rs = curl_exec($ch);
// 把驗證碼在本地生成,二次拉取驗證碼可能無法通過驗證
@file_put_contents("verifycode.jpg",$rs);
curl_close($ch); 
// 手工驗證碼錶單
echo "<form action=\"\" method=\"post\"><input type=\"text\" name=\"vcode\"><img src=\"verifycode.jpg\" /><br><input type=\"submit\" value=\"ok\"></form>";
}else{
// 登入
$ch = curl_init(); 
// 使用者名稱\密碼 
$user = "abc123"; 
$pass = "123456";
$verify = $_POST["vcode"];
$url = "http://xyq.cbg.163.com/cgi-bin/login.py"; 
$next_url="/cgi-bin/equipquery.py?act=buy_show_by_ordersn&ordersn=22_1458634981_24342109&server_id=9";
 
// 返回結果存放在變數中,不輸出 
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch,CURLOPT_HTTPGET,1);
curl_setopt($ch,CURLOPT_REFERER,"http://xyq.cbg.163.com/cgi-bin/login.py?next_url=%2Fcgi-bin%2Fequipquery.py%3Fact%3Dbuy_show_by_ordersn%26ordersn%3D22_1458634981_24342109%26server_id%3D9&server_id=9&act=show_anon_auth_page"); 
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120); 
curl_setopt($ch, CURLOPT_POST, true); 
$fields_post = array("act"=>'do_anon_auth', 
"next_url"=>$next_url, 
"server_id"=>9,
"image_value"=>$verify); 
$headers_login = array("User-Agent" => "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.108 Safari/537.36"); 
// $fields_string = ""; 
// foreach($fields_post as $key => $value){ 
// $fields_string .= $key . "=" . $value . "&"; 
// } 
// $fields_string = rtrim($fields_string , "&"); 
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers_login); 
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieSuccess);
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($fields_post)); 
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result= curl_exec($ch);
$result=mb_convert_encoding($result,'utf-8','utf-8,ASCII,GBK,GB2312');
curl_close($ch);
$this->write_file($result);
//var_dump($result);

}

}

public function write_file($content){
$path=$_SERVER['DOCUMENT_ROOT'];
$path=$path.'/application/views/contents';
if(!file_exists($path)){
mkdir($path);
}
$myfile=fopen($path.'/mh.php','w');
fwrite($myfile, $content);
fclose($myfile);
}

原理就是第一次登陸時候獲取的cookie,裡面包含這驗證碼資訊。使用這個cookie去獲取驗證碼,然後手動輸入,然後post給網站,就登陸成功了。(http是無狀態的)

之後輸出view/mh.php檔案就能看到得到整個檔案,至於顯示的出錯的原因: 

js檔案內有些返回值是相對路徑,那麼get的路徑就會錯誤,例如:如果我的本地配置域名是www.first.com,那麼get的相對路徑預設是我本地域名了,會導致get資料出錯。解決辦法是將本地域名配置成跟藏寶閣同一個域名,這樣就會省去很多改動。

      還有一個顯示亂碼問題,看了很多天js程式碼,沒有發現錯誤。最後,是將js檔案的拷到本地,使用本地的js檔案,這樣中文顯示亂碼就解決掉了,原因還沒有想通。