1. 程式人生 > >java selenium 無彈窗,下載檔案,格式是mht

java selenium 無彈窗,下載檔案,格式是mht

1.如果是單個網頁,可以用下面的程式碼操作,下載網頁,無彈窗。

package com.hlhlo.recruitment.download.service.impl;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxOptions;
import org.openqa.selenium.firefox.FirefoxProfile;

public class DownloadTest {

    public static void main(String[] args) {
        System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe");
        FirefoxProfile profile = new FirefoxProfile();

        // 可以在Firefox瀏覽器位址列中輸入about:config來檢視屬性
        // 設定下載檔案放置路徑,注意如果是windows環境一定要用\\,用/不行
        String path = "F:\\WebDriver\\download";

        // 配置響應下載引數
        profile.setPreference("browser.download.dir", path);// 下載路徑
        profile.setPreference("browser.download.folderList", 2);// 2為儲存在指定路徑,0代表預設路徑
        profile.setPreference("browser.download.manager.showWhenStarting", false);// 是否顯示開始
        // 禁止彈出儲存框,value是檔案格式,如zip檔案
        //常用的MIME型別   .mht .mhtml   message/rfc822
        profile.setPreference("browser.helperApps.neverAsk.saveToDisk","message/rfc822");
//關於型別:可以參考http://www.w3school.com.cn/media/media_mimeref.asp

        System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe");
        //WebDriver driver = new FirefoxDriver();

        FirefoxOptions options = new FirefoxOptions();
        options.setProfile(profile);
        WebDriver driver = new FirefoxDriver(options);
        //driver.get("file:///F:/dict_en_zhcn_2_pngs.rar");
        driver.get("file:///F:/a.mht");


    }


}

2.但是如果網頁一開始有登入頁面,然後開啟其他網頁,下載這個網頁的東西,這個就不靈了,下載彈窗還是會開啟。

如果另外新建一個WebDriver,又會提示先登入的資訊。

所以解決方法是,用HttpGet直接下載,但是必須帶上cookie,也就是帶上登入資訊才行。

step 1.現在獲取cookie

 Set<Cookie> cookies = this.webDriver.manage().getCookies();
        StringBuffer cookieStr = new StringBuffer();
        for (Cookie cookie : cookies) {
            cookieStr.append(String.format("%s=%s;", cookie.getName(), cookie.getValue()));
        }
        this.cookie = cookieStr.toString();

step2.根據url得到對應網頁的內容

/**
* 根據url下載頁面
* @param url:下載的頁面url
* @param cookie:網頁的cookie,也就是登入資訊。
**/
private String downloadByURL(String url,String cookie) {
        HttpGet get = new HttpGet(url);
        get.addHeader("Host", "jianli.58.com");
        get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0");
        get.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        get.addHeader("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2");
        get.addHeader("Accept-Encoding", "gzip, deflate");
        get.addHeader("Connection", "keep-alive");
        get.addHeader("Cookie", cookie);//剛才獲取的cookie;
        get.addHeader("Upgrade-Insecure-Requests", "1");

        try {
            CloseableHttpResponse response = this.httpClient.execute(get);//httpClient已經獲取
            HttpEntity entity = response.getEntity();
            InputStream is = entity.getContent();

            String html = Util.mht2String(is);//下載的網頁是mht格式,如果不想要,可以轉化成html
            IOUtils.closeQuietly(is);
            return html;

        } catch (IOException e) {
            log.error("發生異常:", e);
        }
        return null;
    }

step3.下載的網頁是mht格式,只能用ie開啟,所以要轉換成html,才可以被火狐瀏覽器開啟。轉換方法如下:

pom.xml

<!-- Jsoup,JavaHTML解析器 -->
<dependency>
    <groupId>org.jsoup</groupId>
    <artifactId>jsoup</artifactId>
    <version>1.8.2</version>
</dependency>

Util類,Mht格式轉換成html格式

import javax.activation.DataHandler;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Session;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimePartDataSource;
import java.io.*;
import java.util.Enumeration;

public class Util{

    public static String mht2String(InputStream fis) {
        try {
            Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
            MimeMessage msg = new MimeMessage(mailSession, fis);
            Object content = msg.getContent();
            if (content instanceof Multipart) {
                MimeMultipart mp = (MimeMultipart) content;
                MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);

                //獲取mht檔案內容程式碼的編碼
                String strEncodng = getEncoding(bp1);

                //獲取mht檔案的內容
                String strText = getHtmlText(bp1, strEncodng);
                return strText;
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
        return null;
    }

    /**
     * 將 mht檔案轉換成 html檔案
     * @param s_SrcMht
     * @param s_DescHtml
     */
    public static void mht2html(String s_SrcMht, String s_DescHtml) {
        try {
            InputStream fis = new FileInputStream(s_SrcMht);
            Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
            MimeMessage msg = new MimeMessage(mailSession, fis);
            Object content = msg.getContent();
            if (content instanceof Multipart){
                MimeMultipart mp = (MimeMultipart)content;
                MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);

                //獲取mht檔案內容程式碼的編碼
                String strEncodng = getEncoding(bp1);

                //獲取mht檔案的內容
                String strText = getHtmlText(bp1, strEncodng);
                if (strText == null)
                    return;

                //建立以mht檔名稱的資料夾,主要用來儲存資原始檔。
                File parent = null;
                if (mp.getCount() > 1) {
                    parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");
                    parent.mkdirs();
                    if (!parent.exists()){   //建立資料夾失敗的話則退出
                        return;
                    }
                }

                //FOR中程式碼 主要是儲存資原始檔及替換路徑
                for (int i = 1; i < mp.getCount(); ++i) {
                    MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
                    //獲取資原始檔的路徑
                    //例(獲取: http://xxx.com/abc.jpg)
                    String strUrl = getResourcesUrl(bp);
                    if (strUrl==null || strUrl.length()==0)
                        continue;

                    DataHandler dataHandler = bp.getDataHandler();
                    MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();

                    //獲取資原始檔的絕對路徑
                    String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);
                    File resources = new File(FilePath);

                    //儲存資原始檔
                    if (SaveResourcesFile(resources, bp.getInputStream())){
                        //將遠端地址替換為本地地址  如圖片、JS、CSS樣式等等
                        strText = strText.replace(strUrl, resources.getAbsolutePath());
                    }
                }

                //最後儲存HTML檔案
                SaveHtml(strText, s_DescHtml, strEncodng);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    /**
     * 獲取mht檔案內容中資原始檔的名稱
     * @param strName
     * @param ID
     * @return
     */
    public static String getName(String strName, int ID) {
        char separator1 = '/';
        char separator2 = '\\';
        //將換行替換
        strName = strName.replaceAll("\r\n", "");

        //獲取檔名稱
        if( strName.lastIndexOf(separator1) >= 0){
            return strName.substring(strName.lastIndexOf(separator1) + 1);
        }
        if( strName.lastIndexOf(separator2) >= 0){
            return strName.substring(strName.lastIndexOf(separator2) + 1);
        }
        return "";
    }


    /**
     * 將提取出來的html內容寫入儲存的路徑中。
     * @param s_HtmlTxt
     * @param s_HtmlPath
     * @param s_Encode
     */
    public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {
        try{
            Writer out = null;
            out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);
            out.write(s_HtmlTxt);
            out.close();
        }catch(Exception e){
            return false;
        }
        return true;
    }


    /**
     * 儲存網頁中的JS、圖片、CSS樣式等資原始檔
     * @param SrcFile     原始檔
     * @param inputStream 輸入流
     * @return
     */
    private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {
        if (SrcFile == null || inputStream == null) {
            return false;
        }

        BufferedInputStream in = null;
        FileOutputStream fio = null;
        BufferedOutputStream osw = null;
        try {
            in = new BufferedInputStream(inputStream);
            fio = new FileOutputStream(SrcFile);
            osw = new BufferedOutputStream(new DataOutputStream(fio));
            int index = 0;
            byte[] a = new byte[1024];
            while ((index = in.read(a)) != -1) {
                osw.write(a, 0, index);
            }
            osw.flush();
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        } finally{
            try {
                if (osw != null)
                    osw.close();
                if (fio != null)
                    fio.close();
                if (in != null)
                    in.close();
                if (inputStream != null)
                    inputStream.close();
            } catch (Exception e) {
                e.printStackTrace();
                return false;
            }
        }
    }


    /**
     * 獲取mht檔案裡資原始檔的URL路徑
     * @param bp
     * @return
     */
    private static String getResourcesUrl(MimeBodyPart bp) {
        if(bp==null){
            return null;
        }
        try {
            Enumeration list = bp.getAllHeaders();
            while (list.hasMoreElements()) {
                javax.mail.Header head = (javax.mail.Header)list.nextElement();
                if (head.getName().compareTo("Content-Location") == 0) {
                    return head.getValue();
                }
            }
            return null;
        } catch (MessagingException e) {
            return null;
        }
    }


    /**
     * 獲取mht檔案中的內容程式碼
     * @param bp
     * @param strEncoding 該mht檔案的編碼
     * @return
     */
    private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
        InputStream textStream = null;
        BufferedInputStream buff = null;
        BufferedReader br = null;
        Reader r = null;
        try {
            textStream = bp.getInputStream();
            buff = new BufferedInputStream(textStream);
            r = new InputStreamReader(buff, strEncoding);
            br = new BufferedReader(r);
            StringBuffer strHtml = new StringBuffer("");
            String strLine = null;
            while ((strLine = br.readLine()) != null) {
                strHtml.append(strLine + "\r\n");
            }
            br.close();
            r.close();
            textStream.close();
            return strHtml.toString();
        } catch (Exception e) {
            e.printStackTrace();
        } finally{
            try{
                if (br != null)
                    br.close();
                if (buff != null)
                    buff.close();
                if (textStream != null)
                    textStream.close();
            }catch(Exception e){
            }
        }
        return null;
    }

    /**
     * 獲取mht網頁檔案中內容程式碼的編碼
     * @param bp
     * @return
     */
    private static String getEncoding(MimeBodyPart bp) {
        if(bp==null){
            return null;
        }
        try {
            Enumeration list = bp.getAllHeaders();
            while (list.hasMoreElements()) {
                javax.mail.Header head = (javax.mail.Header)list.nextElement();
                if (head.getName().compareTo("Content-Type") == 0) {
                    String strType = head.getValue();
                    int pos = strType.indexOf("charset=");
                    if (pos>=0) {
                        String strEncoding = strType.substring(pos + 8, strType.length());
                        if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){
                            strEncoding = strEncoding.substring(1 , strEncoding.length());
                        }
                        if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){
                            strEncoding = strEncoding.substring(0 , strEncoding.length()-1);
                        }
                        if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
                            strEncoding = "gbk";
                        }
                        return strEncoding;
                    }
                }
            }
        } catch (MessagingException e) {
            e.printStackTrace();
        }
        return null;
    }
}