1. 程式人生 > >【垂直搜尋引擎搭建15】HtmlParser中Filter方法(本地URL地址)

【垂直搜尋引擎搭建15】HtmlParser中Filter方法(本地URL地址)

package org.algorithm;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.util.NodeList;
import
org.htmlparser.util.ParserException; public class LocalParserDemo { public static void getContent(String url) throws ParserException,IOException{//解析網頁 Parser parser = new Parser(url); NodeFilter filter = new HasAttributeFilter("div"); NodeList nodes = parser.extractAllNodesThatMatch(filter); String nodex = ""
; if(nodes!=null){ for(int i=0;i<nodes.size();i++){ Node node = nodes.elementAt(0); nodex = node.toString(); System.out.println(nodex); } } } @SuppressWarnings("resource") public static void main
(String[] args) throws IOException, ParserException {//載入本地網頁 String path = "c://n382738784.html"; StringBuffer sb = new StringBuffer(); BufferedReader br = new BufferedReader(new FileReader(new File(path))); String temp = ""; while((temp = br.readLine())!= null){ sb.append(temp); sb.append("\r\n"); } String url = sb.toString(); getContent(url); } }