1. 程式人生 > >curl模擬ip和來源進行網站採集的實現方法

curl模擬ip和來源進行網站採集的實現方法

對於限制了ip和來源的網站,使用正常的採集方式是不行的。這裡說我的一種方法吧,使用php的curl類實現模擬ip和來源,可以實現採集限制ip和來源的網站。

1.設定頁面限制ip和來源訪問
比如服務端的server.php

複製程式碼
<?php
 
$client_ip = getip();
$referer = getreferer();
 
$allow_ip = '192.168.0.100';
$allow_referer = 'http://www.xxx.cn';
 
if($client_ip==$allow_ip && strpos($referer, $allow_referer)===0){
  
echo 'allow access'; }else{ echo 'deny access'; } // 獲取訪問者ip function getip(){ if(!empty($_SERVER['HTTP_CLIENT_IP'])){ $cip = $_SERVER['HTTP_CLIENT_IP']; }elseif(!empty($_SERVER['HTTP_X_FORWARDED_FOR'])){ $cip = $_SERVER['HTTP_X_FORWARDED_FOR']; }elseif(!empty($_SERVER['REMOTE_ADDR'])){
$cip = $_SERVER['REMOTE_ADDR']; }else{ $cip = ''; } return $cip; } // 獲取訪問者來源 function getreferer(){ if(isset($_SERVER['HTTP_REFERER'])){ return $_SERVER['HTTP_REFERER']; } return ''; } ?>
複製程式碼

使用curl正常訪問

複製程式碼
<?php
function doCurl($url, $data=array(), $header=array(), $timeout=30){
 
  
$ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_HTTPHEADER, $header); curl_setopt($ch, CURLOPT_POST, true); curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data)); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); curl_setopt($ch, CURLOPT_TIMEOUT, $timeout); $response = curl_exec($ch); if($error=curl_error($ch)){ die($error); } curl_close($ch); return $response; } // 呼叫 $url = 'http://www.xxx.cn/server.php'; $response = doCurl($url); echo $response; ?>
複製程式碼

使用curl模擬ip和來源進行訪問

模擬來源

curl_setopt($ch, CURLOPT_REFERER, '來源');

模擬ip

curl_setopt($ch, CURLOPT_HTTPHEADER, array('CLIENT-IP: 模擬ip','X-FORWARDED-FOR: 模擬ip'));

完整程式碼如下:

複製程式碼
<?php
function doCurl($url, $data=array(), $header=array(), $referer='', $timeout=30){
 
  $ch = curl_init();
  curl_setopt($ch, CURLOPT_URL, $url);
  curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
  curl_setopt($ch, CURLOPT_POST, true);
  curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  curl_setopt($ch, CURLOPT_TIMEOUT, $timeout);
 
  // 模擬來源
  curl_setopt($ch, CURLOPT_REFERER, $referer);
 
  $response = curl_exec($ch);
 
  if($error=curl_error($ch)){
    die($error);
  }
 
  curl_close($ch);
 
  return $response;
 
}
 
// 呼叫
$url = 'http://www.example.cn/server.php';//外部採集網站
$data = array();
 
// 設定IP
$header = array(
  'CLIENT-IP: 192.168.0.100',
  'X-FORWARDED-FOR: 192.168.0.100'
);
 
// 設定來源
$referer = 'http://www.xxx.cn/';
 
$response = doCurl($url, $data, $header, $referer, 5);
 
echo $response;
?>
複製程式碼