Java 通過URL獲取網站Html原始碼
阿新 • • 發佈:2019-02-08
package com.wsw.j2se.url; import java.io.ByteArrayOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; /** * 通過網站域名URL獲取該網站的原始碼 * @author Administrator * */ public class HtmlRequest { /** *//** * @param args * @throws MalformedURLException */ public static void main(String[] args) throws Exception { URL url = new URL("http://www.ifeng.com"); String urlsource = getURLSource(url); System.out.println(urlsource); } /** *//** * 通過網站域名URL獲取該網站的原始碼 * @param url * @return String * @throws Exception */ public static String getURLSource(URL url) throws Exception { HttpURLConnection conn = (HttpURLConnection)url.openConnection(); conn.setRequestMethod("GET"); conn.setConnectTimeout(5 * 1000); InputStream inStream = conn.getInputStream(); //通過輸入流獲取html二進位制資料 byte[] data = readInputStream(inStream); //把二進位制資料轉化為byte位元組資料 String htmlSource = new String(data); return htmlSource; } /** *//** * 把二進位制流轉化為byte位元組陣列 * @param instream * @return byte[] * @throws Exception */ public static byte[] readInputStream(InputStream instream) throws Exception { ByteArrayOutputStream outStream = new ByteArrayOutputStream(); byte[] buffer = new byte[1204]; int len = 0; while ((len = instream.read(buffer)) != -1){ outStream.write(buffer,0,len); } instream.close(); return outStream.toByteArray(); } }