php抓取網頁內容
阿新 • • 發佈:2019-02-03
function curl_file_get_contents($durl){ $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $durl); curl_setopt($ch, CURLOPT_TIMEOUT, 5); curl_setopt($ch, CURLOPT_USERAGENT, _USERAGENT_); curl_setopt($ch, CURLOPT_REFERER,_REFERER_); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); $r = curl_exec($ch); curl_close($ch); return $r; } //例1 $txt = file_get_contents('http://ju.taobao.com/tg/today_items.htm?spm=608.1000525.0.51&frontCatId=4000'); //$txt=curl_file_get_contents('http://ju.taobao.com/tg/today_items.htm?spm=608.1000525.0.51&frontCatId=4000'); $txt=mb_convert_encoding($txt,"UTF-8","GBK"); //$tpic = '/<img width=\"285\" data-ks-lazyload=\"([^\"]+)\"\/>/isu'; $tpic = '/<img width=\"285\" data-ks-lazyload=\"([^<>]+)\"\/>/isu'; $ttitle = '/<h3><a target=\"_blank\" title=\"([^<>]+)\" href/s'; //preg_match_all($tpic, $txt, $m); //var_dump($m); preg_match_all($tpic,$txt,$match1[]); preg_match_all($ttitle,$txt,$match1[]); for($i=0;$i<10;$i++){ //echo $match1[1][1][$i].'<br>'; echo '圖片:<img src='.$match1[0][1][$i].'><br>'; echo '標題'.$match1[1][1][$i].'<br>'; } //例2 $contents = file_get_contents("http://video.baidu.com/top/"); $contents = iconv("gb2312", "utf-8",$contents); $paiming = '/<span class=\"color-v6 sum\">(\d+)<\/span>/s'; // 排名 $title = '/<span class=\"matter\" title=\"([^<>]+)\">/s'; // 標題 $url = '/<a statisic=\"name\" href=\'([^<>]+)\' class=\"block\" target=\"_blank\" >/s'; $num = '/<span class=\"color-v6 tr\">(\d+)<\/span>/s'; // 瀏覽量 preg_match_all($paiming,$contents,$match[]); preg_match_all($title,$contents,$match[]); preg_match_all($url,$contents,$match[]); preg_match_all($num,$contents,$match[]); //print_r($match); echo '<table><tr><td>排名</td><td>電影名稱</td><td>網址</td><td>點選量</td></tr>'; for($i=0;$i<10;$i++){ echo '<tr><td>'.$match[0][1][$i].'</td> <td>'.$match[1][1][$i].'</td> <td><a href='.$match[2][1][$i].' target="_blank">'.$match[2][1][$i].'</a></td> <td>'.$match[3][1][$i].'</td></tr>'; } echo '</table>';