1. 程式人生 > >perl xpath 根據a標籤 查詢屬性為href的值

perl xpath 根據a標籤 查詢屬性為href的值

[[email protected] sbin]# 
[[email protected] sbin]# cat a2.pl 
use LWP::UserAgent;
use HTTP::Cookies;
use HTTP::Headers;
use HTTP::Response;
use Encode;
use JSON;
use File::Temp qw/tempfile/;
use HTML::TreeBuilder::XPath;
use Encode;
use HTML::TreeBuilder;
use Data::Dumper;
use HTML::TreeBuilder::XPath;
use DBI;
use Encode;
my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );;
$ua->timeout(10);
$ua->env_proxy;
my $now          = time();
$ua->agent("Mozilla/8.0");
my $cookie_jar = HTTP::Cookies->new(

    file           => 'lwp_cookies.txt',
    autosave       => 1,
    ignore_discard => 1
);
my $tree= HTML::TreeBuilder::XPath->new;
$tree->parse_file("test.html");
##獲取url
$tree->parse_file( "fh2.html");
#獲取部落格分類的URL,根據a標籤查詢屬性為href 
@Links = $tree->find_by_tag_name('a'); 
  foreach (@Links) {  
      @Href = $_->attr('href');
      print @Href;
      print "\n";
      print @Href + 0;
      print "\n";
};


[
[email protected]
sbin]# cat fh2.html <div class="daohang-kuai"> <div class="daohang-org"><span>風險管理部</span></div> <div class="daohang-links"><a href="http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信貸系統</a> <span >|</span><a href="http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信貸系統(授權碼)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">外部資料管理平</a> <span >|</span><a href="/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">非現場監測系統</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">風險事件報送系統</a> <span >|</span><div class="clear"></div></div> </div> <div class="daohang-kuai"> <div class="daohang-org"><span>國際業務部</span></div> <div class="daohang-links"><a href="http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">國際結算系統</a> <span >|</span><div class="clear"></div></div> </div> [
[email protected]
sbin]# perl a2.pl http://999.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.3.246.2:7001/newaml?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 /tailong/syslink/goAml.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 http://999.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1