正則表示式獲取HTML標記中的內容(C#)
阿新 • • 發佈:2019-02-19
//=====================Begin1========================
//試驗字串
string strTmp =string.Empty;
//正則表示式
string tmpStr =string.Empty;
//取出指定HTML標記中的匹配項的值RegexOptions.IgnoreCase忽略大小寫,RegexOptions.Multiline忽略多行顯示,
//tmpStr = "<title>([^<]*)</title>" //獲取<title>之間內容
strTmp =@"<add key='ConnectionString' value='server=localhost;database=資料庫名;uid=sa;pwd=;pooling=true'/>";
//獲取“database=”與“;”號之間的字串:database=(.*);
tmpStr ="database=([^;]*);";
Match TitleMatch = Regex.Match(strTmp, tmpStr ,RegexOptions.IgnoreCase | RegexOptions.Multiline );
//如下例子作語法參考用獲取size的值,實際應用可能不會如此複雜
strTmp ="><font color='red' size=6>WebForm3</font><" ;
tmpStr =@"<(w+s+w+[=]+[']+w+[']+s+[size=]+d)>";
Match TitleMatch = Regex.Match(strTmp ,tmpStr , RegexOptions.IgnoreCase | RegexOptions.Multiline );
//取出匹配項的值
string tmpStrTitle = TitleMatch.Groups[1].Value;
//替換掉HTML頁中所有HTML標記
Label1.Text=Regex.Replace(Label1.Text.Trim(),"<.+?>","")+"*********"+TitleMatch.Groups[1].Value;
//判斷匹配正則表示式是否成功
if(Regex.Match(tmpStr,"<.+?>").Success)
{
//操作
}
//=====================End1========================
//=====================Begin2========================
string webDocContent="<a href=http://www.xxx.xxx/college/pages/default.htm target=_blank>師資隊伍</A>";
//解釋下面正則表示式:[s]表示匹配空格字元,"+" 表示連線
string strPattern=@"a[s]+href=(?<Link>[^s>]+)[^>]*>(?<Text>[^<]*)</a>";
//獲取連結顯示的文字
MatchCollection Matches=Regex.Matches(webDocContent,strPattern,RegexOptions.IgnoreCase|RegexOptions.Compiled);
foreach(Match NextMatch in Matches)
{
string URL=NextMatch.Groups["Link"].Value.ToString().Trim();
string URLText=NextMatch.Groups["Text"].Value.ToString().Trim();
Response.Write(URL+"****");
Response.Write(URLText);
}
//=====================End2========================
//=====================Begin3========================
string strPageContent =string.Empty;
StreamReader srPage =new StreamReader(@"e:save.txt",System.Text.Encoding.GetEncoding("gb2312"));
strPageContent = srPage.ReadToEnd();
srPage.Close();
//(/s)*表示0或多個空格符、回車符等,*表示比配0或多個。(.*?)表示除回車符外的所有資訊
MatchCollection TitleMatchs = Regex.Matches(strPageContent, "<td width="85%" class="common_text">((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline );
int tmpNum =0;
//迴圈正則表示式所獲取的,滿足表示式的內容集合
foreach(Match NextMatch in TitleMatchs)
{
++tmpNum;
Label1.Text += tmpNum +"<br>****"+ NextMatch.Groups[1].Value;
}
//=====================End3========================
//試驗字串
string strTmp =string.Empty;
//正則表示式
string tmpStr =string.Empty;
//取出指定HTML標記中的匹配項的值RegexOptions.IgnoreCase忽略大小寫,RegexOptions.Multiline忽略多行顯示,
//tmpStr = "<title>([^<]*)</title>" //獲取<title>之間內容
strTmp =@"<add key='ConnectionString' value='server=localhost;database=資料庫名;uid=sa;pwd=;pooling=true'/>";
//獲取“database=”與“;”號之間的字串:database=(.*);
tmpStr ="database=([^;]*);";
Match TitleMatch = Regex.Match(strTmp, tmpStr ,RegexOptions.IgnoreCase | RegexOptions.Multiline );
//如下例子作語法參考用獲取size的值,實際應用可能不會如此複雜
strTmp ="><font color='red' size=6>WebForm3</font><" ;
tmpStr =@"<(w+s+w+[=]+[']+w+[']+s+[size=]+d)>";
Match TitleMatch = Regex.Match(strTmp ,tmpStr , RegexOptions.IgnoreCase | RegexOptions.Multiline );
//取出匹配項的值
string tmpStrTitle = TitleMatch.Groups[1].Value;
//替換掉HTML頁中所有HTML標記
Label1.Text=Regex.Replace(Label1.Text.Trim(),"<.+?>","")+"*********"+TitleMatch.Groups[1].Value;
//判斷匹配正則表示式是否成功
if(Regex.Match(tmpStr,"<.+?>").Success)
{
//操作
}
//=====================End1========================
//=====================Begin2========================
string webDocContent="<a href=http://www.xxx.xxx/college/pages/default.htm target=_blank>師資隊伍</A>";
//解釋下面正則表示式:[s]表示匹配空格字元,"+" 表示連線
string strPattern=@"a[s]+href=(?<Link>[^s>]+)[^>]*>(?<Text>[^<]*)</a>";
//獲取連結顯示的文字
MatchCollection Matches=Regex.Matches(webDocContent,strPattern,RegexOptions.IgnoreCase|RegexOptions.Compiled);
foreach(Match NextMatch in Matches)
{
string URL=NextMatch.Groups["Link"].Value.ToString().Trim();
string URLText=NextMatch.Groups["Text"].Value.ToString().Trim();
Response.Write(URL+"****");
Response.Write(URLText);
}
//=====================End2========================
//=====================Begin3========================
string strPageContent =string.Empty;
StreamReader srPage =new StreamReader(@"e:save.txt",System.Text.Encoding.GetEncoding("gb2312"));
strPageContent = srPage.ReadToEnd();
srPage.Close();
//(/s)*表示0或多個空格符、回車符等,*表示比配0或多個。(.*?)表示除回車符外的所有資訊
MatchCollection TitleMatchs = Regex.Matches(strPageContent, "<td width="85%" class="common_text">((/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*(.*?)(/s)*)</td>", RegexOptions.IgnoreCase | RegexOptions.Multiline );
int tmpNum =0;
//迴圈正則表示式所獲取的,滿足表示式的內容集合
foreach(Match NextMatch in TitleMatchs)
{
++tmpNum;
Label1.Text += tmpNum +"<br>****"+ NextMatch.Groups[1].Value;
}
//=====================End3========================