1. 程式人生 > >當二次元與程式設計師碰撞,會產生什麼?

當二次元與程式設計師碰撞,會產生什麼?

16年年底,發現了一個叫半次元的Coser網站,看圖片一個一個看太麻煩,直接寫個爬蟲吧所有圖片記錄下來重新構成一個只有圖片的html...

第一部分:介面

package easyspider.menu;

import easyspider.menu.logging.LoggingWriter;

public class MenuSystem {
	public static void init(){
		System.err.println("EasySprider [版本\t1.2]");
		LoggingWriter.printMessage("生成的HTML預設存放在C盤根目錄下...");
		LoggingWriter.printMessage("如果需要手動設定儲存碟符路徑,請直接輸入碟符:etc(C:)");
		changeSavePath();
	}
	private static void changeSavePath(){
		
	}
}

 第二部分:實際體


/**
 * @author o.kEnnponN
 * 重構時間:2016年11月26日00:26:31 
 * 		1. 在1.0的基礎上友好了介面
 * 		2.
 * 
 * 
 * 
 * */
package easyspider;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;

import easyspider.menu.MenuSystem;

public class CrawSystem {
	public static void main() {
		MenuSystem.init();//程式初始化 版本資訊列印
		CloseableHttpClient httpclient = HttpClients.createDefault();
		String html = null;
		HttpGet get = new HttpGet("http://bcy.net/coser");
		System.out.println("日誌:請求  ---- " + get.getURI());
		try {
			CloseableHttpResponse resp = httpclient.execute(get);
			HttpEntity entity = resp.getEntity();
			System.out.println("---------------------------------------");
			if (entity != null) {
				System.out.println("日誌:響應長度:---- " + entity.getContentLength());
				System.out.println("日誌:響應文件:---- ");
			}
			html = EntityUtils.toString(entity);
			System.out.println(html.length());
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} 
		
		Document document = Jsoup.parse(html);
		// http://img5.bcyimg.com/editor/flag/1789q/15469140839911e6b4b9bbaf8b79bd63.jpg
		Elements imgs = document.select("img");
		List tags = analysis(imgs);

	}


	public static List<String> analysis(Elements tags) {
		List<String> newTags = new ArrayList();
		String s = new String(tags.toString());
		String[] s1 = s.split("<img");
		String s2 = null;
		StringBuffer sb = new StringBuffer();
		sb.append("<link href='http://cdn.bootcss.com/bootstrap/2.3.2/css/bootstrap.min.css' rel='stylesheet'>");
		sb.append("<div class='container'>");
		sb.append("<h1>本頁面由o.kEnnponN開發的爬蟲軟體生成 - 軟體版本:V1.0 Bata</h1>");
		for (int i = 0; i < s1.length; i++) {
			if (s1[i].indexOf("img9.bcyimg.com/coser") != -1) {
				s2 = s1[i].replace("2X3", "~");
				s2 = s2.substring(s2.indexOf("http"));
				s2 = s2.substring(0, s2.indexOf("jpg") + 3);
				System.out.println(s2);
				if(!s2.equals("ht")){
					sb.append("<img src='");
					sb.append(s2);
					sb.append("'>");
				}
				// insert(s2);
			}
		}
		sb.append("</div>");
		System.out.println(s2.length()+"****************************");
		createHtml(new String(sb));
		return newTags;
	}
	
	public static void createHtml(String imgs) {
		byte[] bytes = imgs.getBytes();
		System.out.println(bytes.length+"--------------------------");
		File file = null;
		FileOutputStream output = null;
		BufferedOutputStream buffer = null;
		try {
			file = new File("C://htmls/" + new SimpleDateFormat("yyyy年MM月dd日HH時mm分ss秒").format(new Date()) + ".html");
			output = new FileOutputStream(file);
			buffer = new BufferedOutputStream(output);
			buffer.write(bytes);
			buffer.flush();
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} finally {
			try {
				if (buffer != null)
					buffer.close();
			} catch (IOException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}

	}
}

PS,當年的Jsoup玩的還沒有現在6,所以程式碼看起來比較2B...