java selenium 無彈窗,下載檔案,格式是mht
阿新 • • 發佈:2018-12-17
1.如果是單個網頁,可以用下面的程式碼操作,下載網頁,無彈窗。
package com.hlhlo.recruitment.download.service.impl; import org.openqa.selenium.WebDriver; import org.openqa.selenium.firefox.FirefoxDriver; import org.openqa.selenium.firefox.FirefoxOptions; import org.openqa.selenium.firefox.FirefoxProfile; public class DownloadTest { public static void main(String[] args) { System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe"); FirefoxProfile profile = new FirefoxProfile(); // 可以在Firefox瀏覽器位址列中輸入about:config來檢視屬性 // 設定下載檔案放置路徑,注意如果是windows環境一定要用\\,用/不行 String path = "F:\\WebDriver\\download"; // 配置響應下載引數 profile.setPreference("browser.download.dir", path);// 下載路徑 profile.setPreference("browser.download.folderList", 2);// 2為儲存在指定路徑,0代表預設路徑 profile.setPreference("browser.download.manager.showWhenStarting", false);// 是否顯示開始 // 禁止彈出儲存框,value是檔案格式,如zip檔案 //常用的MIME型別 .mht .mhtml message/rfc822 profile.setPreference("browser.helperApps.neverAsk.saveToDisk","message/rfc822"); //關於型別:可以參考http://www.w3school.com.cn/media/media_mimeref.asp System.setProperty("webdriver.gecko.driver", "F:\\WebDriver\\geckodriver.exe"); //WebDriver driver = new FirefoxDriver(); FirefoxOptions options = new FirefoxOptions(); options.setProfile(profile); WebDriver driver = new FirefoxDriver(options); //driver.get("file:///F:/dict_en_zhcn_2_pngs.rar"); driver.get("file:///F:/a.mht"); } }
2.但是如果網頁一開始有登入頁面,然後開啟其他網頁,下載這個網頁的東西,這個就不靈了,下載彈窗還是會開啟。
如果另外新建一個WebDriver,又會提示先登入的資訊。
所以解決方法是,用HttpGet直接下載,但是必須帶上cookie,也就是帶上登入資訊才行。
step 1.現在獲取cookie
Set<Cookie> cookies = this.webDriver.manage().getCookies(); StringBuffer cookieStr = new StringBuffer(); for (Cookie cookie : cookies) { cookieStr.append(String.format("%s=%s;", cookie.getName(), cookie.getValue())); } this.cookie = cookieStr.toString();
step2.根據url得到對應網頁的內容
/** * 根據url下載頁面 * @param url:下載的頁面url * @param cookie:網頁的cookie,也就是登入資訊。 **/ private String downloadByURL(String url,String cookie) { HttpGet get = new HttpGet(url); get.addHeader("Host", "jianli.58.com"); get.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:63.0) Gecko/20100101 Firefox/63.0"); get.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); get.addHeader("Accept-Language", "zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2"); get.addHeader("Accept-Encoding", "gzip, deflate"); get.addHeader("Connection", "keep-alive"); get.addHeader("Cookie", cookie);//剛才獲取的cookie; get.addHeader("Upgrade-Insecure-Requests", "1"); try { CloseableHttpResponse response = this.httpClient.execute(get);//httpClient已經獲取 HttpEntity entity = response.getEntity(); InputStream is = entity.getContent(); String html = Util.mht2String(is);//下載的網頁是mht格式,如果不想要,可以轉化成html IOUtils.closeQuietly(is); return html; } catch (IOException e) { log.error("發生異常:", e); } return null; }
step3.下載的網頁是mht格式,只能用ie開啟,所以要轉換成html,才可以被火狐瀏覽器開啟。轉換方法如下:
pom.xml
<!-- Jsoup,JavaHTML解析器 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.8.2</version>
</dependency>
Util類,Mht格式轉換成html格式
import javax.activation.DataHandler;
import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.Session;
import javax.mail.internet.MimeBodyPart;
import javax.mail.internet.MimeMessage;
import javax.mail.internet.MimeMultipart;
import javax.mail.internet.MimePartDataSource;
import java.io.*;
import java.util.Enumeration;
public class Util{
public static String mht2String(InputStream fis) {
try {
Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
MimeMessage msg = new MimeMessage(mailSession, fis);
Object content = msg.getContent();
if (content instanceof Multipart) {
MimeMultipart mp = (MimeMultipart) content;
MimeBodyPart bp1 = (MimeBodyPart) mp.getBodyPart(0);
//獲取mht檔案內容程式碼的編碼
String strEncodng = getEncoding(bp1);
//獲取mht檔案的內容
String strText = getHtmlText(bp1, strEncodng);
return strText;
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* 將 mht檔案轉換成 html檔案
* @param s_SrcMht
* @param s_DescHtml
*/
public static void mht2html(String s_SrcMht, String s_DescHtml) {
try {
InputStream fis = new FileInputStream(s_SrcMht);
Session mailSession = Session.getDefaultInstance(System.getProperties(), null);
MimeMessage msg = new MimeMessage(mailSession, fis);
Object content = msg.getContent();
if (content instanceof Multipart){
MimeMultipart mp = (MimeMultipart)content;
MimeBodyPart bp1 = (MimeBodyPart)mp.getBodyPart(0);
//獲取mht檔案內容程式碼的編碼
String strEncodng = getEncoding(bp1);
//獲取mht檔案的內容
String strText = getHtmlText(bp1, strEncodng);
if (strText == null)
return;
//建立以mht檔名稱的資料夾,主要用來儲存資原始檔。
File parent = null;
if (mp.getCount() > 1) {
parent = new File(new File(s_DescHtml).getAbsolutePath() + ".files");
parent.mkdirs();
if (!parent.exists()){ //建立資料夾失敗的話則退出
return;
}
}
//FOR中程式碼 主要是儲存資原始檔及替換路徑
for (int i = 1; i < mp.getCount(); ++i) {
MimeBodyPart bp = (MimeBodyPart)mp.getBodyPart(i);
//獲取資原始檔的路徑
//例(獲取: http://xxx.com/abc.jpg)
String strUrl = getResourcesUrl(bp);
if (strUrl==null || strUrl.length()==0)
continue;
DataHandler dataHandler = bp.getDataHandler();
MimePartDataSource source = (MimePartDataSource)dataHandler.getDataSource();
//獲取資原始檔的絕對路徑
String FilePath = parent.getAbsolutePath() + File.separator + getName(strUrl, i);
File resources = new File(FilePath);
//儲存資原始檔
if (SaveResourcesFile(resources, bp.getInputStream())){
//將遠端地址替換為本地地址 如圖片、JS、CSS樣式等等
strText = strText.replace(strUrl, resources.getAbsolutePath());
}
}
//最後儲存HTML檔案
SaveHtml(strText, s_DescHtml, strEncodng);
}
} catch (Exception e) {
e.printStackTrace();
}
}
/**
* 獲取mht檔案內容中資原始檔的名稱
* @param strName
* @param ID
* @return
*/
public static String getName(String strName, int ID) {
char separator1 = '/';
char separator2 = '\\';
//將換行替換
strName = strName.replaceAll("\r\n", "");
//獲取檔名稱
if( strName.lastIndexOf(separator1) >= 0){
return strName.substring(strName.lastIndexOf(separator1) + 1);
}
if( strName.lastIndexOf(separator2) >= 0){
return strName.substring(strName.lastIndexOf(separator2) + 1);
}
return "";
}
/**
* 將提取出來的html內容寫入儲存的路徑中。
* @param s_HtmlTxt
* @param s_HtmlPath
* @param s_Encode
*/
public static boolean SaveHtml(String s_HtmlTxt, String s_HtmlPath , String s_Encode) {
try{
Writer out = null;
out = new OutputStreamWriter(new FileOutputStream(s_HtmlPath, false), s_Encode);
out.write(s_HtmlTxt);
out.close();
}catch(Exception e){
return false;
}
return true;
}
/**
* 儲存網頁中的JS、圖片、CSS樣式等資原始檔
* @param SrcFile 原始檔
* @param inputStream 輸入流
* @return
*/
private static boolean SaveResourcesFile(File SrcFile, InputStream inputStream) {
if (SrcFile == null || inputStream == null) {
return false;
}
BufferedInputStream in = null;
FileOutputStream fio = null;
BufferedOutputStream osw = null;
try {
in = new BufferedInputStream(inputStream);
fio = new FileOutputStream(SrcFile);
osw = new BufferedOutputStream(new DataOutputStream(fio));
int index = 0;
byte[] a = new byte[1024];
while ((index = in.read(a)) != -1) {
osw.write(a, 0, index);
}
osw.flush();
return true;
} catch (Exception e) {
e.printStackTrace();
return false;
} finally{
try {
if (osw != null)
osw.close();
if (fio != null)
fio.close();
if (in != null)
in.close();
if (inputStream != null)
inputStream.close();
} catch (Exception e) {
e.printStackTrace();
return false;
}
}
}
/**
* 獲取mht檔案裡資原始檔的URL路徑
* @param bp
* @return
*/
private static String getResourcesUrl(MimeBodyPart bp) {
if(bp==null){
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header)list.nextElement();
if (head.getName().compareTo("Content-Location") == 0) {
return head.getValue();
}
}
return null;
} catch (MessagingException e) {
return null;
}
}
/**
* 獲取mht檔案中的內容程式碼
* @param bp
* @param strEncoding 該mht檔案的編碼
* @return
*/
private static String getHtmlText(MimeBodyPart bp, String strEncoding) {
InputStream textStream = null;
BufferedInputStream buff = null;
BufferedReader br = null;
Reader r = null;
try {
textStream = bp.getInputStream();
buff = new BufferedInputStream(textStream);
r = new InputStreamReader(buff, strEncoding);
br = new BufferedReader(r);
StringBuffer strHtml = new StringBuffer("");
String strLine = null;
while ((strLine = br.readLine()) != null) {
strHtml.append(strLine + "\r\n");
}
br.close();
r.close();
textStream.close();
return strHtml.toString();
} catch (Exception e) {
e.printStackTrace();
} finally{
try{
if (br != null)
br.close();
if (buff != null)
buff.close();
if (textStream != null)
textStream.close();
}catch(Exception e){
}
}
return null;
}
/**
* 獲取mht網頁檔案中內容程式碼的編碼
* @param bp
* @return
*/
private static String getEncoding(MimeBodyPart bp) {
if(bp==null){
return null;
}
try {
Enumeration list = bp.getAllHeaders();
while (list.hasMoreElements()) {
javax.mail.Header head = (javax.mail.Header)list.nextElement();
if (head.getName().compareTo("Content-Type") == 0) {
String strType = head.getValue();
int pos = strType.indexOf("charset=");
if (pos>=0) {
String strEncoding = strType.substring(pos + 8, strType.length());
if(strEncoding.startsWith("\"") || strEncoding.startsWith("\'")){
strEncoding = strEncoding.substring(1 , strEncoding.length());
}
if(strEncoding.endsWith("\"") || strEncoding.endsWith("\'")){
strEncoding = strEncoding.substring(0 , strEncoding.length()-1);
}
if (strEncoding.toLowerCase().compareTo("gb2312") == 0) {
strEncoding = "gbk";
}
return strEncoding;
}
}
}
} catch (MessagingException e) {
e.printStackTrace();
}
return null;
}
}