當二次元與程式設計師碰撞,會產生什麼?
阿新 • • 發佈:2018-12-12
16年年底,發現了一個叫半次元的Coser網站,看圖片一個一個看太麻煩,直接寫個爬蟲吧所有圖片記錄下來重新構成一個只有圖片的html...
第一部分:介面
package easyspider.menu; import easyspider.menu.logging.LoggingWriter; public class MenuSystem { public static void init(){ System.err.println("EasySprider [版本\t1.2]"); LoggingWriter.printMessage("生成的HTML預設存放在C盤根目錄下..."); LoggingWriter.printMessage("如果需要手動設定儲存碟符路徑,請直接輸入碟符:etc(C:)"); changeSavePath(); } private static void changeSavePath(){ } }
第二部分:實際體
/** * @author o.kEnnponN * 重構時間:2016年11月26日00:26:31 * 1. 在1.0的基礎上友好了介面 * 2. * * * * */ package easyspider; import java.io.BufferedOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; import org.apache.http.HttpEntity; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.select.Elements; import easyspider.menu.MenuSystem; public class CrawSystem { public static void main() { MenuSystem.init();//程式初始化 版本資訊列印 CloseableHttpClient httpclient = HttpClients.createDefault(); String html = null; HttpGet get = new HttpGet("http://bcy.net/coser"); System.out.println("日誌:請求 ---- " + get.getURI()); try { CloseableHttpResponse resp = httpclient.execute(get); HttpEntity entity = resp.getEntity(); System.out.println("---------------------------------------"); if (entity != null) { System.out.println("日誌:響應長度:---- " + entity.getContentLength()); System.out.println("日誌:響應文件:---- "); } html = EntityUtils.toString(entity); System.out.println(html.length()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } Document document = Jsoup.parse(html); // http://img5.bcyimg.com/editor/flag/1789q/15469140839911e6b4b9bbaf8b79bd63.jpg Elements imgs = document.select("img"); List tags = analysis(imgs); } public static List<String> analysis(Elements tags) { List<String> newTags = new ArrayList(); String s = new String(tags.toString()); String[] s1 = s.split("<img"); String s2 = null; StringBuffer sb = new StringBuffer(); sb.append("<link href='http://cdn.bootcss.com/bootstrap/2.3.2/css/bootstrap.min.css' rel='stylesheet'>"); sb.append("<div class='container'>"); sb.append("<h1>本頁面由o.kEnnponN開發的爬蟲軟體生成 - 軟體版本:V1.0 Bata</h1>"); for (int i = 0; i < s1.length; i++) { if (s1[i].indexOf("img9.bcyimg.com/coser") != -1) { s2 = s1[i].replace("2X3", "~"); s2 = s2.substring(s2.indexOf("http")); s2 = s2.substring(0, s2.indexOf("jpg") + 3); System.out.println(s2); if(!s2.equals("ht")){ sb.append("<img src='"); sb.append(s2); sb.append("'>"); } // insert(s2); } } sb.append("</div>"); System.out.println(s2.length()+"****************************"); createHtml(new String(sb)); return newTags; } public static void createHtml(String imgs) { byte[] bytes = imgs.getBytes(); System.out.println(bytes.length+"--------------------------"); File file = null; FileOutputStream output = null; BufferedOutputStream buffer = null; try { file = new File("C://htmls/" + new SimpleDateFormat("yyyy年MM月dd日HH時mm分ss秒").format(new Date()) + ".html"); output = new FileOutputStream(file); buffer = new BufferedOutputStream(output); buffer.write(bytes); buffer.flush(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } finally { try { if (buffer != null) buffer.close(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } } }
PS,當年的Jsoup玩的還沒有現在6,所以程式碼看起來比較2B...