perl xpath 根據a標籤 查詢屬性為href的值
阿新 • • 發佈:2018-12-15
[[email protected] sbin]# [[email protected] sbin]# cat a2.pl use LWP::UserAgent; use HTTP::Cookies; use HTTP::Headers; use HTTP::Response; use Encode; use JSON; use File::Temp qw/tempfile/; use HTML::TreeBuilder::XPath; use Encode; use HTML::TreeBuilder; use Data::Dumper; use HTML::TreeBuilder::XPath; use DBI; use Encode; my $ua = LWP::UserAgent->new( ssl_opts => { verify_hostname => 0 }, );; $ua->timeout(10); $ua->env_proxy; my $now = time(); $ua->agent("Mozilla/8.0"); my $cookie_jar = HTTP::Cookies->new( file => 'lwp_cookies.txt', autosave => 1, ignore_discard => 1 ); my $tree= HTML::TreeBuilder::XPath->new; $tree->parse_file("test.html"); ##獲取url $tree->parse_file( "fh2.html"); #獲取部落格分類的URL,根據a標籤查詢屬性為href @Links = $tree->find_by_tag_name('a'); foreach (@Links) { @Href = $_->attr('href'); print @Href; print "\n"; print @Href + 0; print "\n"; }; [
[email protected] sbin]# cat fh2.html <div class="daohang-kuai"> <div class="daohang-org"><span>風險管理部</span></div> <div class="daohang-links"><a href="http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信貸系統</a> <span >|</span><a href="http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">信貸系統(授權碼)</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">外部資料管理平</a> <span >|</span><a href="/tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">非現場監測系統</a> <span >|</span><div class="clear"></div></div><div class="daohang-links"><a href="/tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">風險事件報送系統</a> <span >|</span><div class="clear"></div></div> </div> <div class="daohang-kuai"> <div class="daohang-org"><span>國際業務部</span></div> <div class="daohang-links"><a href="http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org" target="_blank">國際結算系統</a> <span >|</span><div class="clear"></div></div> </div> [[email protected] sbin]# perl a2.pl http://999.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://999.3.246.2:7001/newaml?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 /tailong/syslink/goAml.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=crmis&tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 http://999.3.200.16:7001/UtanWeb/index.jsp?tgt=TGT-684883-iOIvoMW2UUuJirHRm6YaUDPuBBUJEMkPBUf6FgS90aZp9GYJsC-cas01.example.org 1 http://990.2.200.17:7003/loan-web/4ALogon.jsp?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://9000.2.200.17:7003/loan-web/sqlogon.html?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://800.16.75.1:9001/datamanage/IM00000001_4a_signIn.do?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=oics&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 /tailong/syslink/oaforward.jsp?idName=tlorm&tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1 http://800.3.248.1:7001/UtanWeb?tgt=TGT-684250-VtgMdfLLmP6b9Xtb2xjivAxqZR7Dhe0CTDqa2IjmleHgshVEJe-cas01.example.org 1