1. 程式人生 > >Jsoup解析和遍歷一個HTML文檔(二)

Jsoup解析和遍歷一個HTML文檔(二)

spl nodes gif .org code htm ips method spa

關於Eclipse編輯器匯總console中字體調整:

技術分享

1,下載jsoup的jar包:http://jsoup.org/download

2, jsoup英文的開發手冊:http://jsoup.org/cookbook/

3,jsoup的jsoup cookbook中文版:http://www.open-open.com/jsoup/

- - - - - - - - - - - - - - -

- - - - - - - - - - - - - - -

小實例:

技術分享
 1 package cn.cast.test;
 2 
 3 
 4 
 5 import org.jsoup.Jsoup;
 6 import org.jsoup.nodes.Document;
7 import org.jsoup.nodes.Element; 8 import org.jsoup.select.Elements; 9 10 11 import java.io.IOException; 12 13 import org.jsoup.Jsoup; 14 import org.jsoup.Jsoup; 15 import org.jsoup.nodes.Document; 16 import org.jsoup.nodes.Element; 17 import org.jsoup.select.Elements; 18 19 20 public class test_1 {
21 22 public static void main(String[] args) { 23 // TODO Auto-generated method stub 24 getUrlAndTitle(); 25 getTextMes(); 26 } 27 28 public static void getUrlAndTitle() 29 { 30 String url="http://finance.sina.com.cn/"; 31 try { 32 Document doc=Jsoup.connect(url).timeout(10000).get();//get all infomation from url website 33 //System.out.println(doc); 34 Elements ListDiv = doc.getElementsByAttributeValue("class","fin_tabs0_c0"); 35 //System.out.println(ListDiv); 36 for (Element div :ListDiv) { 37 Elements links = div.getElementsByTag("a"); 38 // System.out.println(links); 39 for (Element link : links) { 40 String linkHref = link.attr("href").trim(); 41 String linkText = link.text().trim(); 42 System.out.println(linkHref+"\t"+linkText); 43 } 44 } 45 } catch (IOException e) { 46 // TODO Auto-generated catch block 47 e.printStackTrace(); 48 } 49 } 50 51 public static void getTextMes() 52 { 53 String url="http://finance.sina.com.cn/hy/20140823/100220099682.shtml"; 54 String textMes=""; 55 try { 56 Document doc=Jsoup.connect(url).timeout(10000).get(); 57 Elements ListDiv = doc.getElementsByAttributeValue("class","blkContainerSblkCon BSHARE_POP"); 58 //System.out.println(ListDiv); 59 for(Element div:ListDiv) 60 { 61 Elements textInfos=div.getElementsByTag("p"); 62 //System.out.println(textInfos); 63 for(Element textInfo:textInfos) 64 { 65 String text=textInfo.text().trim(); 66 textMes=textMes+text+"\n"; 67 } 68 } 69 System.out.println(textMes); 70 } catch (IOException e) { 71 // TODO Auto-generated catch block 72 e.printStackTrace(); 73 } 74 } 75 }
test_1

Jsoup解析和遍歷一個HTML文檔(二)