1. 程式人生 > >PHP簡單爬蟲 爬取免費代理ip 一萬條

PHP簡單爬蟲 爬取免費代理ip 一萬條

img mys i++ .com log mage top100 dai code

目標站:http://www.xicidaili.com/

代碼:

<?php
require lib/phpQuery.php;
require lib/QueryList.php;
require "db/shared/ez_sql_core.php";  
require "db/mysql/ez_sql_mysql.php";  
require "public/function.php";  
use QL\QueryList;

//抓取貓眼電影TOP100榜單內容 
$db = new ezSQL_mysql(root, root, spider, localhost);  

for($j=1;$j<=100;$j++){ $gurl="http://www.xicidaili.com/nn/".$j; $html=curl_request($gurl); for($i=1;$i<=100;$i++){ getIpInfo($html,$i,$db); } echo "".$j."頁完成".PHP_EOL; } function getIpInfo($html,$t,$db){ $rules = array( //采集id為one這個元素裏面的純文本內容 ip => array("
#ip_list tr:eq($t) td:eq(1)",text),//ip port => array("#ip_list tr:eq($t) td:eq(2)",text),//端口 area => array("#ip_list tr:eq($t) td:eq(3)",text),//位置 anonymous => array("#ip_list tr:eq($t) td:eq(4)",text),//是否匿名 type => array("#ip_list tr:eq($t) td:eq(5)
",text),//類型 speed => array("#ip_list tr:eq($t) td:eq(6)",html,‘‘,function($content){ $num=explode(%, explode(:, $content)[1])[0]; if($num >= 60 && $num<80){ return "一般"; }else if($num >= 80){ return "很快"; }else{ return "較慢"; } }),//速度 chtime => array("#ip_list tr:eq($t) td:eq(8)",text),//存活時間. yztime => array("#ip_list tr:eq($t) td:eq(9)",text),//驗證時間 ); $data = QueryList::Query($html,$rules)->data; print_r($data); $ip=$data[0]["ip"]; $port=$data[0]["port"]; $area=$data[0]["area"]; $anonymous=$data[0]["anonymous"]; $type=$data[0]["type"]; $speed=$data[0]["speed"]; $chtime=$data[0]["chtime"]; $yztime=$data[0]["yztime"]; $db->query("INSERT INTO ip (ip, port,area,anonymous,type,speed,chtime,yztime) VALUES ($ip,$port,$area,$anonymous,$type,$speed,$chtime,$yztime)"); }

結果:

技術分享圖片

完整項目下載:https://files.cnblogs.com/files/wordblog/spider2.rar

PHP簡單爬蟲 爬取免費代理ip 一萬條