1. 程式人生 > >java調用Linux執行Python爬蟲,並將數據存儲到elasticsearch中--(java後臺代碼)

java調用Linux執行Python爬蟲,並將數據存儲到elasticsearch中--(java後臺代碼)

創建 author cor 運行 hpa 詳細信息 多線程 under lean

該篇博客主要是java代碼,如需相應腳本及java連接elasticsearch工具類代碼,請移步到上一篇博客

一、創建連接執行Linux腳本工具類

package com.yjlc.platform.utils.Elasticsearch;

import ch.ethz.ssh2.Connection;
import ch.ethz.ssh2.StreamGobbler;

import java.io.*;
/**
 * --------------------------------------------------------------
 * CopyRights(c)2018,YJLC
 * All Rights Reserved
 * <p>
 * FileName: SingletonUtil.java
 * Description:
 * Author: cyb
 * CreateDate: 2018-11-15
 * --------------------------------------------------------------
 
*/ public class SingletonUtil { //無參構造 private SingletonUtil(){} private volatile static SingletonUtil instance; //字符編碼默認是utf-8 public static String DEFAULTCHART="UTF-8"; public static Connection conn; private String ip; private String userName; private String userPwd;
public static Boolean flag=false; //有參構造 public SingletonUtil(String ip, String userName, String userPwd) { this.ip = ip; this.userName = userName; this.userPwd = userPwd; } public SingletonUtil getInstance(String ip, String userName, String userPwd){ if(instance==null
){ synchronized(SingletonUtil.class){ //防止多線程多次創建 if(instance==null){ instance=new SingletonUtil(ip,userName, userPwd); } } } flag= instance.login();//調用登錄方法 return instance; } //登錄 public Boolean login(){ boolean flg=false; try { System.out.println("進入連接"); conn = new Connection(ip); try { conn.connect();//連接 } catch (IOException e) { e.printStackTrace(); } flg=conn.authenticateWithPassword(userName, userPwd);//認證 if (flg){ System.out.println("認證成功!"); } } catch (IOException e) { e.printStackTrace(); } return flg; } /** *@description:純文本格式返回 *@author:cyb *@date: 2018-11-15 16:56 *@param: in *@param: charset *@return: java.lang.String */ public static String processStdout(InputStream in, String charset){ InputStream stdout = new StreamGobbler(in); StringBuffer buffer = new StringBuffer();; try { BufferedReader br = new BufferedReader(new InputStreamReader(stdout,charset)); String line=null; while((line=br.readLine()) != null){ buffer.append(line+"\n"); } } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } return buffer.toString(); } }

二、控制層

/**
     *@description:開啟爬蟲
     *@author:cyb
     *@date: 2018-11-14 15:59
     *@param: id
     *@param: execute
     *@return: java.util.Map<java.lang.String,java.lang.Object>
     */
    @RequestMapping("openTask")
    @ResponseBody
    public Map<String,Object> openTask(String id,Boolean execute){
        Map<String,Object> map = new HashMap<>();
        //根據id查詢任務詳細信息
        BsKnowledgeInfoDTO  knowledgeInfoDTO=  knolegeService.getDataInfoById(id);
        if(execute==true){
            execute=false;
        }else {
            execute=true;
        }
        knowledgeInfoDTO.setExecute(execute);//修改任務的狀態(開啟、關閉)
        int k = knolegeService.updateDataInfo(knowledgeInfoDTO);
//        StringBuilder url = new StringBuilder(knowledgeInfoDTO.getPath()) ;//爬蟲目標路徑
        StringBuilder url= new StringBuilder("https://mil.news.sina.com.cn/");
        StringBuilder reptileMethod= new StringBuilder("http://192.168.200.8:8000/news");//爬蟲方法http://192.168.200.8:8000/news
        StringBuilder themeid= new StringBuilder("hottopic");//存儲索引名稱
       //http://192.168.200.8:8000/news?themeid=hottopic&url=https://mil.news.sina.com.cn/history/2018-11-15/doc-ihmutuec0443667.shtml
        StringBuilder path =reptileMethod.append("?").append("themid=").append(themeid).append("&").append("url=").append(url);
        String ip="192.168.200.8";//Linux 路徑
        String userName ="root";
        String userPwd ="yjlc20148";
        int w = knolegeService.reptile(path.toString(),ip,userName,userPwd);
        if(w==200){
            map.put("code",200);
            map.put("message","爬蟲成功!");
        }else if(w==206){
            map.put("code",206);
            map.put("message","連接失敗!");
        }
        return map;
    }

三、service層(此處省略了service接口層)

/**
 *@description: 爬蟲
 *@author:cyb
 *@date: 2018-11-15 20:52
*@param: path 爬蟲方法路徑+ES存儲索引+爬蟲目標url合集
*@param: ip 連接ip地址
*@param: userName :用戶名
*@param: userPwd:用戶密碼
 *@return: int
 */
@Override
public int reptile(String path,String ip,String userName,String userPwd) {
    SingletonUtil singletonUtil = new SingletonUtil("192.168.200.8", "root","yjlc20148");
    singletonUtil.getInstance(ip, userName,userPwd);
    Boolean b =SingletonUtil.flag;//看是否連接成功
    if(b==true){
        System.out.println("=====第一個步驟=====");
        Session session= null;//打開一個會話
        try {
            session = singletonUtil.conn.openSession();
            session.execCommand("sh /opt/zc/linux_sina.sh");//執行命令
        } catch (IOException e) {
            e.printStackTrace();
        }
        //TODO:多條命令
        String result=singletonUtil.processStdout(session.getStdout(),singletonUtil.DEFAULTCHART);
        //如果為得到標準輸出為空,說明腳本執行出錯了
        if(StringUtils.isBlank(result)){
            System.out.println("腳本出錯");
           result=singletonUtil.processStdout(session.getStderr(),singletonUtil.DEFAULTCHART);
        }
        System.out.println("第一個步驟腳本運行成功"+result);
        ConnectNetworkUtil connectNetworkUtil = new ConnectNetworkUtil();
        connectNetworkUtil.ConnectNetwork(path);
        System.out.println("采集成功!");
        session.close();//關閉session
        singletonUtil.conn.close();//爬蟲關閉連接
        return 200;//爬蟲成功
    }else {
        return 206;//連接失敗
    }

}

以上代碼已省略了service接口層和java連接elasticsearch工具類(上一篇博客中已寫到),以上代碼僅供參考,若代碼中有不合理或者不規範的地方,請各位指出,技術在於交流!

java調用Linux執行Python爬蟲,並將數據存儲到elasticsearch中--(java後臺代碼)