1. 程式人生 > >正則匹配div中的a標記的href 和content

正則匹配div中的a標記的href 和content

        很久後又一次接觸正則,那種似曾相似的感覺,你是不是也有這樣的感覺,這麼小CASE的東西,我一定要把他徹底掌握了,於是花時間看手冊,於是就明白了,於是下次又是似曾相似……

總寫下今天用到的地方:

//demo

$str = '<div class="test">
        <div class="wcb"><li>li</li>
        <a href="m.vip.com/shoe/47">跑步鞋</a>
        <a href="m.vip.com/shoe/48">籃球鞋</a>
        </div>
        <div class="wcb">tryagin</div>
        </div>';

        // 先匹配出DIV
        preg_match_all('/<div class=\"wcb\">(.*?)<\/div>/i', $str, $matches);

        if(!empty($matches))
        {
            $target_content = $matches[0][0];
        }

        //第二次匹配出其中的a標記,注意()是返回匹配的內容
        preg_match_all('/<a href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i', $target_content, $matches_detail);
        if(!empty($matches_detail))
        {
            $href_content = $matches_detail[1];
            $detail_content = $matches_detail[2];
        }

方法:
/**
    * function: 獲取主題館資料
    * 
    * @param string  $topic    主題名
    * @param string  $pattern  正則模式
    *
    * @return array  $data  $data['href_content']:連結資訊陣列;$data['detail_content']:名稱資訊陣列
    */
    private function get_topic_data($topic, $pattern1, $pattern2)
   	{
   		$data = array();

   		if( empty($topic) || empty($pattern1) || empty($pattern2))
   		{
   			return $data;
   		}
   		// 構建主題館資料
    	// 獲取整體資料
    	// 獲取頁面ID
        $modpage_id = 0;
        $modpages = $this->Modpage_Model->getAllPages();
        foreach($modpages as $one_page) {
            if ($one_page['department'] == $topic) {
                $modpage_id = $one_page['id'];
            }
        }

        // 取資訊
        $version_data = $this->Modpage_Model->getCurrentPageVersion($modpage_id);

        $page_info_raw = $this->Modpage_Model->getVersionData($modpage_id, $version_data['version']);

        // 生成資料
        foreach ($page_info_raw as $k => $v)
        {
        	//兒童單獨處理
        	if($topic == 'children')
        	{
        		if($v['module_id'] == 1 && $v['desc'] == '快速找寶貝')
        		{	
        			$parse_str = $v['content'];
        			break;
        		}
        	}
        	else
        	{
        		// 去後臺副導航資料
        		if($v['module_id'] == 4)
	        	{
	        		$parse_str = $v['content'];
	        		break;
	        	}
        	}
        }

    	// 匹配主題館需要的資料
        // 清洗資料
		$parse_str = $this -> clearData($parse_str);
    	preg_match_all($pattern1, $parse_str, $matches);

        if(!empty($matches))
        {
            $target_content = $matches[0][0];
        }

        // 替換主機名
		$target_content = str_replace($this -> website, TOUCH_URL, $target_content);

		// 清洗資料
        $target_content = $this -> clearData($target_content);

        preg_match_all($pattern2, $target_content, $matches_detail);

        if(!empty($matches_detail))
        {
            $data['href_content'] = $matches_detail[1];
            $data['detail_content'] = $matches_detail[2];
        }
        return $data;
   	}

   	/**
    * function: 清洗資料(過濾字串中的換行符、製表符)
    * 
    * @param string  $str    初始字串
    *
    * @return string  $str
    */
   	private function clearData( $str )
   	{
   		if( empty($str) )
   		{
   			return '';
   		}
   		// 清除換行符
   		$str = str_replace("\r\n", '', $str);
   		// 清除換行符 
		$str = str_replace("\n", '', $str);
		// 清除製表符
		$str = str_replace("\t", '', $str); 
		$str = trim( $str );
		return $str;
   	}

//測試方法
/**
    * 首頁入口
    * 
    */
    public function index() {

    	$data = array();
    	$result = array();
    	
    	// 獲取主題館資訊

    	// 主題館原始資料  array('部門'=>array('型別','正則一','正則二'))
        $topic_name_arr = array(
        	'sports' => array('sport','/<div class=\"spnavdiv\">(.*?)<\/div>/i','/<a target="_blank" href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i'),
        	'womenshoes' => array('women','/<div class=\"OB_floatL\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\" .*?>(.*?)<\/a>/i'),
        	'menshoes' => array('man','/<div class=\"OB_floatL\" .*?>(.*?)<\/div>/i','/<a target="_blank" href=\"((?!\/topic\/show).[^<]*)\">(.*?)<\/a>/i'),
        	'outdoor' => array('outdoor','/<div class=\"navlidiv\" style=\".*?\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\"  target="_blank" .*?>(.*?)<\/a>/i'),
        	'children' => array('children','/<table class=\"catetable_2\">(.*?)<\/div>/i','/<a href=\"((?!\/topic\/show).[^<]*)\" target="_blank">(.*?)<\/a>/i'),
        	);

        // 組織資料
        foreach ($topic_name_arr as $type => $pattern) {
        	$result = $this -> get_topic_data( $type, $pattern[1], $pattern[2] );
        	$data['data'][$pattern[0]] = $result;
        }

        $h_data['jspath'] = 'index';
        $this->load->view("touch/common/header",$h_data);
        $this->load->view("touch/index/index",$data);
        $this->load->view("touch/common/footer");
    }
應用場景:
<div class="navsubmenu" style="width:280px;left:0"> <span class="arr" style="left:30px;"></span>
          <div class="OB_floatL" style="margin-right:30px">
            <h5 class="subh5">全部男鞋</h5>
            <p><a target="_blank" href="http://m.vip.com/shoe/167"class="hot">休閒鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/165">正裝鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/166" class="hot">商務鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/170">涼鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/173">戶外鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/172">帆布鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/171">板鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/168" class="hot">男靴</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/351">棉鞋</a></p>
          </div>
          <div class="OB_floatL" style="margin-right:30px">
            <h5 class="subh5">熱門品類</h5>
            <p><a target="_blank" href="http://m.vip.com/topic/show/8049" class="hot">1月新品</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E5%B7%A5%E8%A3%85%E9%9E%8B&new_cat=164" class="hot">工裝鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E7%89%9B%E6%B4%A5%E9%9E%8B&new_cat=164">牛津鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/168">男靴</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/167-11v1585">伐木鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?top_key=%E5%B8%86%E8%88%B9%E9%9E%8B&new_cat=164">帆船鞋</a></p>
             <p><a target="_blank" href="http://m.vip.com/shoe/166">商務皮鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/shoe/166-16v243">增高鞋</a></p>
            <p><a target="_blank" href="http://m.vip.com/search?gender=1&top_key=%E5%86%9B%E8%AD%A6%E9%9D%B4">軍警靴</a></p>
          </div>
</div>


PS:

      preg快速檢視地址:http://msdn.microsoft.com/zh-cn/library/ae5bf541(v=vs.80).aspx

     小結:這是最笨,但是最快解決問題的方法

                 參考遞迴實現:http://zhidao.baidu.com/link?url=WYgzZnK-_kD_ooBmH3iALiPniS054Ympziofk0nX1B6Nywy1cPjGfnhSp3PaQ95qw_rEOG-E_GX3t4YFvtQeja


    總結:

            ()可以返回匹配到的內容