httpclient工具類,使用連線池實現,原理同資料庫連線池。
阿新 • • 發佈:2019-02-19
使用httpclient4.5實現。
注意事項:
1.user-agent最好不要亂寫,百度一些放到檔案,每次請求隨機讀取最好。
2.最好設定請求停頓時間,防止訪問過快被封。
3.返回結果亂碼請設定返回的資料的編碼格式,預設utf8.
程式碼:
package com.common.util; import java.io.IOException; import java.io.InterruptedIOException; import java.net.URI; import java.net.URL; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.concurrent.TimeUnit; import javax.net.ssl.SSLException; import org.apache.http.HttpEntityEnclosingRequest; import org.apache.http.HttpHost; import org.apache.http.HttpRequest; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.HttpRequestRetryHandler; import org.apache.http.client.ResponseHandler; import org.apache.http.client.config.CookieSpecs; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.client.protocol.HttpClientContext; import org.apache.http.config.Registry; import org.apache.http.config.RegistryBuilder; import org.apache.http.conn.ConnectTimeoutException; import org.apache.http.conn.socket.ConnectionSocketFactory; import org.apache.http.conn.socket.PlainConnectionSocketFactory; import org.apache.http.conn.ssl.NoopHostnameVerifier; import org.apache.http.conn.ssl.SSLConnectionSocketFactory; import org.apache.http.conn.ssl.TrustSelfSignedStrategy; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.impl.client.LaxRedirectStrategy; import org.apache.http.impl.conn.PoolingHttpClientConnectionManager; import org.apache.http.message.AbstractHttpMessage; import org.apache.http.message.BasicNameValuePair; import org.apache.http.protocol.HttpContext; import org.apache.http.ssl.SSLContextBuilder; import org.apache.http.util.EntityUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @SuppressWarnings("all") public class HttpUtil { private static final Logger LOGGER = LoggerFactory.getLogger(HttpUtil.class); public static final long RELEASE_CONNECTION_WAIT_TIME = 5000;// 監控連線間隔 private static PoolingHttpClientConnectionManager httpClientConnectionManager = null; private static LaxRedirectStrategy redirectStrategy = null; private static HttpRequestRetryHandler myRetryHandler = null; private static SSLConnectionSocketFactory sslConnectionSocketFactory = null; private static MyResponseHandler rh = null; static { initHttpClient(); // 啟動清理連線池連結執行緒 Thread idleConnectionMonitorThread = new IdleConnectionMonitorThread(httpClientConnectionManager); idleConnectionMonitorThread.setDaemon(true); idleConnectionMonitorThread.start(); } public static void initHttpClient() { try { rh = new MyResponseHandler(); // 重定向策略初始化 redirectStrategy = new LaxRedirectStrategy(); // 請求重試機制,預設重試3次 myRetryHandler = new HttpRequestRetryHandler() { public boolean retryRequest(IOException exception, int executionCount, HttpContext context) { if (executionCount >= 3) { return false; } if (exception instanceof InterruptedIOException) { return false; } if (exception instanceof UnknownHostException) { return false; } if (exception instanceof ConnectTimeoutException) { return false; } if (exception instanceof SSLException) { // SSL handshake exception return false; } HttpClientContext clientContext = HttpClientContext.adapt(context); HttpRequest request = clientContext.getRequest(); boolean idempotent = !(request instanceof HttpEntityEnclosingRequest); if (idempotent) { // Retry if the request is considered idempotent return true; } return false; } }; SSLContextBuilder builder = new SSLContextBuilder(); builder.loadTrustMaterial(null, new TrustSelfSignedStrategy()); sslConnectionSocketFactory = new SSLConnectionSocketFactory(builder.build(), NoopHostnameVerifier.INSTANCE); Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create() .register("http", new PlainConnectionSocketFactory())// .register("https", sslConnectionSocketFactory)// .build(); // 建立httpclient連線池 httpClientConnectionManager = new PoolingHttpClientConnectionManager(registry); // 設定連線池最大數量,這個引數表示所有連線最大數。 httpClientConnectionManager.setMaxTotal(200); // 設定單個路由最大連線數量,表示單個域名的最大連線數, // 例如:www.baidu.com.www.google.com表示不同的域名,則連線統一域名下的資源的最大連線數就是該引數,總和是上面的引數。 httpClientConnectionManager.setDefaultMaxPerRoute(100); } catch (Exception e) { LOGGER.error("初始化httpclient連線池失敗.", e); } } public static CloseableHttpClient getHttpClient() { // HttpHost ip = ConfigFileUtil.getRandomIp(); RequestConfig requestConfig = RequestConfig.custom()// .setConnectTimeout(3000)// .setSocketTimeout(3000)// // 忽略cookie,如果不需要登陸最好去掉,否則修改策略儲存cookie即可 .setCookieSpec(CookieSpecs.IGNORE_COOKIES)// .setConnectionRequestTimeout(6000)// // .setProxy(ip)//設定代理ip,不設定就用本機 .build(); // 連線池配置 CloseableHttpClient httpClient = HttpClients.custom()// .setSSLSocketFactory(sslConnectionSocketFactory)// .setConnectionManager(httpClientConnectionManager)// .setDefaultRequestConfig(requestConfig)// .setRedirectStrategy(redirectStrategy)// .setRetryHandler(myRetryHandler)// .build(); return httpClient; } /** * get請求 headers表示特殊的請求頭 */ public static String getUrlContent(String urlString, Map<String, String>... headers) { String html = ""; HttpGet httpGet = null; urlString = urlString.trim(); if (null == urlString || urlString.isEmpty() || !urlString.startsWith("http")) { return html; } // 轉化String url為URI,解決url中包含特殊字元的情況 try { URL url = new URL(urlString); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); httpGet = new HttpGet(uri); setCommonHeaders(httpGet); // 額外的header if (headers != null && headers.length > 0) { for (Map.Entry<String, String> header : headers[0].entrySet()) { if (httpGet.containsHeader(header.getKey())) { httpGet.setHeader(header.getKey(), header.getValue()); } else { httpGet.addHeader(header.getKey(), header.getValue()); } } } HttpClient httpClient = getHttpClient(); html = httpClient.execute(httpGet, rh); } catch (Exception e) { LOGGER.error("請求錯誤·url=" + urlString); } finally { httpGet.abort(); } return html; } /** * post請求,params表示引數,headers表示請求頭 */ public static String postForEntity(String urlString, Map<String, String> params, Map<String, String>... headers) { String html = ""; HttpPost httpPost = null; urlString = urlString.trim(); try { // 引數設定 if (null == urlString || urlString.isEmpty() || !urlString.startsWith("http")) { return html; } URL url = new URL(urlString); URI uri = new URI(url.getProtocol(), url.getHost(), url.getPath(), url.getQuery(), null); httpPost = new HttpPost(uri); setCommonHeaders(httpPost); // 額外的header if (headers != null && headers.length > 0) { for (Map.Entry<String, String> header : headers[0].entrySet()) { if (httpPost.containsHeader(header.getKey())) { httpPost.setHeader(header.getKey(), header.getValue()); } else { httpPost.addHeader(header.getKey(), header.getValue()); } } } if (params != null && params.size() > 0) { List<BasicNameValuePair> nvps = new ArrayList<>(); for (Map.Entry<String, String> entry : params.entrySet()) { nvps.add(new BasicNameValuePair(entry.getKey(), entry.getValue())); } // 這裡設定的是返回結果編碼,大多數都是UTF-8,如果亂碼,可以檢視網頁的meta中的content的編碼.如果是GBK,這裡改為GBK即可. // 這裡entity只能讀取一次,想要讀取多次,百度一下. httpPost.setEntity(new UrlEncodedFormEntity(nvps, "UTF-8")); } HttpClient httpClient = getHttpClient(); html = httpClient.execute(httpPost, rh); } catch (Exception e) { LOGGER.error("請求錯誤·url=" + urlString); } finally { httpPost.abort(); } return html; } private static void setCommonHeaders(AbstractHttpMessage request) { request.addHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"); // request.addHeader("Connection", "keep-alive"); request.addHeader("Accept-Language", "zh-CN,zh;q=0.8"); request.addHeader("Accept-Encoding", "gzip, deflate, br"); // User-Agent最好隨機換 request.addHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"); } /** * 響應處理類,處理返回結果 * * @author 王 * */ public static class MyResponseHandler implements ResponseHandler<String> { @Override public String handleResponse(HttpResponse response) throws ClientProtocolException, IOException { try { // 返回狀態碼 int statusCode = response.getStatusLine().getStatusCode(); switch (statusCode) { case 200: return EntityUtils.toString(response.getEntity(), "UTF-8"); case 400: LOGGER.error("下載400錯誤程式碼,請求出現語法錯誤."); break; case 403: LOGGER.error("下載403錯誤程式碼,資源不可用."); break; case 404: LOGGER.error("下載404錯誤程式碼,無法找到指定資源地址."); break; case 503: LOGGER.error("下載503錯誤程式碼,服務不可用."); break; case 504: LOGGER.error("下載504錯誤程式碼,閘道器超時."); break; default: LOGGER.error("未處理的錯誤,code=" + statusCode); } } finally { if (response != null) { try { ((CloseableHttpResponse) response).close(); } catch (IOException e) { LOGGER.error("關閉響應錯誤.", e); } } } return ""; } } /** * 連線處理,釋放連線池連線 * * @author 王 * */ public static class IdleConnectionMonitorThread extends Thread { private static volatile boolean shutdown = false; private PoolingHttpClientConnectionManager poolingHttpClientConnectionManager; public IdleConnectionMonitorThread(PoolingHttpClientConnectionManager poolingHttpClientConnectionManager) { this.poolingHttpClientConnectionManager = poolingHttpClientConnectionManager; } @Override public void run() { try { while (!shutdown) { synchronized (IdleConnectionMonitorThread.class) { wait(RELEASE_CONNECTION_WAIT_TIME); // Close expired connections poolingHttpClientConnectionManager.closeExpiredConnections(); // that have been idle longer than 30 sec poolingHttpClientConnectionManager.closeIdleConnections(2, TimeUnit.MINUTES); } } } catch (InterruptedException ex) { LOGGER.error("釋放連線池連接出錯."); } } public void shutdown() { shutdown = true; synchronized (IdleConnectionMonitorThread.class) { notifyAll(); } } } public static boolean checkIp(HttpHost ip) { HttpResponse response; int code = 0; try { String url = "https://www.baidu.com"; HttpClient hc = HttpClients.custom()// .setProxy(ip)// .build(); HttpGet httpGet = new HttpGet(url); response = hc.execute(httpGet); code = response.getStatusLine().getStatusCode(); } catch (Exception e) { e.printStackTrace(); } return code == 200; } public static void main(String[] args) { System.out.println(getUrlContent("https://www.baidu.com")); } }