1. 程式人生 > >採集金山詞霸每日一句一言Api

採集金山詞霸每日一句一言Api

Api官方介面

每日:http://open.iciba.com/dsapi/
查指定時間:http://sentence.iciba.com/index.php?c=dailysentence&m=getdetail&title=2018-11-06&_=1541655200812
楊小杰Apis:https://wiki.yum6.cn/docs/apis/wiki

搭建採集介面

<?php 
header("Content-type: text/html; charset=utf-8"); //設定編碼 utf-8 
$t1 = microtime(true);
$utime = date("Y-m-d");//api的尾綴時間
$translation = '0';//翻譯語句,0不採集,1採集
$content = '1';//英語版,0不採集,1採集
//使用curl提高執行速度 不用動
function httpGet($url) {
	$curl = curl_init();
	$httpheader[] = "Accept:*/*";
	$httpheader[] = "Accept-Language:zh-CN,zh;q=0.8";
	$httpheader[] = "Connection:close";
	curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
	curl_setopt($curl, CURLOPT_HTTPHEADER, $httpheader);
	curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
	curl_setopt($curl, CURLOPT_TIMEOUT, 3);
	curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
	curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
	curl_setopt($curl, CURLOPT_URL, $url);
	$res = curl_exec($curl);
	curl_close($curl);
	return $res;
}
$myfile = fopen("iciba.txt", "a+");//建立檔案儲存抓取的句子
//迴圈次數 2018-7-31 至現在日期相差的天數 
for ($i=1; $i<100; $i++) { 
    $json_string =httpGet('http://sentence.iciba.com/index.php?c=dailysentence&m=getdetail&title='.$utime.'&_='.time());//curl 自定義函式訪問api

    $data= json_decode($json_string,true);//解析json 轉為php
    if (isset($data['note'])) {
    	$text1= $data['note']."\n"; 
    	fwrite($myfile, $text1);
    }

    if (isset($data['translation'])&&$translation==1) {
    	$text2= str_replace('小編的話:', '', $data['translation'])."\n";
    	fwrite($myfile, $text2);
    }
    if (isset($data['content'])&&$content==1) {
    	$text3= $data['content']."\n"; 
    	fwrite($myfile, $text3);
    }
    $utime= date("Y-m-d",strtotime("-".strval($i)." day")); //每迴圈一次 當前日期減去迴圈變數
} 
fclose($myfile);
$t2 = microtime(true);
echo 'ok,耗時'.round($t2-$t1,3).'秒';

?>

執行這個介面能採集到之前100天的。

一言介面

<?php
//獲取句子檔案的絕對路徑
//如果你介意別人可能會拖走這個文字,可以把檔名自定義一下,或者通過Nginx禁止拉取也行。
$path = dirname(__FILE__);
$file = file($path."/iciba.txt");
 
//隨機讀取一行
$arr  = mt_rand( 0, count( $file ) - 1 );
$content  = trim($file[$arr]);
 
//編碼判斷,用於輸出相應的響應頭部編碼
if (isset($_GET['charset']) && !empty($_GET['charset'])) {
    $charset = $_GET['charset'];
    if (strcasecmp($charset,"gbk") == 0 ) {
        $content = mb_convert_encoding($content,'gbk', 'utf-8');
    }
} else {
    $charset = 'utf-8';
}
 
//格式化判斷,輸出js或純文字
if (isset($_GET['encode'])&&$_GET['encode'] === 'js') {
	header('Content-type: text/javascript;charset=utf-8'); 
    echo "function iciba(){document.write('" . $content ."');}";
} else {
    echo $content;
}

每日採集介面

<?php
header("Content-type: text/html; charset=utf-8"); //設定編碼 utf-8 
$utime = date("Y-m-d");
$file_data = 'data.txt';
if(!file_exists($file_data)){
    fopen($file_data, "w");
}
$str = file_get_contents('data.txt');
$d=date('Y/m/d H:i',strtotime($str));

$translation = '0';//翻譯語句,0不採集,1採集
$content = '1';//英語版,0不採集,1採集
//請更改監控key 預設iciba
if($_GET['p']==='iciba'){
//判斷今天是否已爬
    if(strtotime($utime)>strtotime($d)){
//爬蟲開始    
//使用curl提高執行速度 不用動
function httpGet($url) {
    $curl = curl_init();
    $httpheader[] = "Accept:*/*";
    $httpheader[] = "Accept-Language:zh-CN,zh;q=0.8";
    $httpheader[] = "Connection:close";
    curl_setopt($curl, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.1; rv:1.7.3) Gecko/20041001 Firefox/0.10.1" );
    curl_setopt($curl, CURLOPT_HTTPHEADER, $httpheader);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_TIMEOUT, 3);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($curl, CURLOPT_URL, $url);
    $res = curl_exec($curl);
    curl_close($curl);
    return $res;
}
//定義寫入函式 
function myfile($txt){
    $myfile = fopen("iciba.txt", "a+");
    fwrite($myfile,$txt);
    fclose($myfile);
}

$json_string =httpGet('http://open.iciba.com/dsapi/');//curl 自定義函式訪問api
$data= json_decode($json_string,true);//解析json 轉為php
//2018-4-11之前只有一條資料 so 加判斷 
if (isset($data['note'])) {
    $text1= $data['note']."\n"; 
    myfile($text1);
}

if (isset($data['translation'])&&$translation==1) {
    $text2= str_replace('小編的話:', '', $data['translation'])."\n";
    myfile($text2);
}
if (isset($data['content'])&&$content==1) {
    $text3= $data['content']."\n"; 
    myfile($text3);
}
$myfile = fopen("data.txt", "w");
fwrite($myfile,$utime);
fclose($myfile);
echo "ok";
//爬蟲結束
}else{
    echo "已爬";
}
}else
echo "老鐵 搞事情嗎";
?>

之後可以把這個每日採集介面使用寶塔計劃監控為每日採集即可。
ps:多的圖文介紹也就不多說了,檢視採集毒雞湯的文章即可。