1. 程式人生 > >java網路爬蟲——下載頁面圖片

java網路爬蟲——下載頁面圖片

package com.http5;

import java.io.InputStream;

import org.apache.commons.io.IOUtils;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.util.EntityUtils;

public class ImageShow {
public static String getimg(HttpClient http,String url){
	try{
		HttpGet get=new HttpGet(url);
		HttpResponse hr=http.execute(get);
		HttpEntity he=hr.getEntity();//鍝堝搱
		if(he!=null){
			String charset=EntityUtils.getContentCharSet(he);
			InputStream is=he.getContent();
			return IOUtils.toString(is,charset);
	}
	}catch(Exception e){
		e.printStackTrace();
	}
	return null;
	
}
public static byte[] getimage(HttpClient http,String url){
	try{HttpGet hg=new HttpGet(url);
	HttpResponse hr=http.execute(hg);
	HttpEntity he=hr.getEntity();
	if(he!=null){
		InputStream is=he.getContent();
		return IOUtils.toByteArray(is);
	}
	
	}
	catch(Exception e){
		e.printStackTrace();
	}
	return null;
}
}

package com.http5;

import java.io.FileOutputStream;

import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.apache.http.client.HttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeList;



public class Imagedownload {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		HttpClient http=new DefaultHttpClient();
		try{
		
		String html=ImageShow.getimg(http, "http://www.baidu.com/");
		Parser p=new Parser();//建立解析器
		p.setInputHTML(html);//解析html
		NodeList nl=p.parse(new NodeClassFilter(ImageTag.class));//選擇image標籤
		for(int i=0;i<nl.size();i++){
			ImageTag image=(ImageTag)nl.elementAt(i);
	String imageurl=image.getImageURL();//獲得圖片src屬性值
	String url="http://www.baidu.com"+imageurl;
	System.out.println(url);
	String jpg=FilenameUtils.getName(url);//圖片名
	byte[] im=ImageShow.getimage(http,url );
	IOUtils.write(im, new FileOutputStream("E:/temp/"+jpg));
	}}catch(Exception e){
		e.printStackTrace();
	}

	}

}