【垂直搜尋引擎搭建15】HtmlParser中Filter方法(本地URL地址)
阿新 • • 發佈:2019-01-02
package org.algorithm;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class LocalParserDemo {
public static void getContent(String url) throws ParserException,IOException{//解析網頁
Parser parser = new Parser(url);
NodeFilter filter = new HasAttributeFilter("div");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
String nodex = "" ;
if(nodes!=null){
for(int i=0;i<nodes.size();i++){
Node node = nodes.elementAt(0);
nodex = node.toString();
System.out.println(nodex);
}
}
}
@SuppressWarnings("resource")
public static void main (String[] args) throws IOException, ParserException {//載入本地網頁
String path = "c://n382738784.html";
StringBuffer sb = new StringBuffer();
BufferedReader br = new BufferedReader(new FileReader(new File(path)));
String temp = "";
while((temp = br.readLine())!= null){
sb.append(temp);
sb.append("\r\n");
}
String url = sb.toString();
getContent(url);
}
}