1. 程式人生 > >萬用字元匹配字串 Wildcard Matching

萬用字元匹配字串 Wildcard Matching

問題:實現支援?和*兩個萬用字元的字串匹配函式。

Implement wildcard pattern matching with support for '?' and '*'.

'?' Matches any single character.
'*' Matches any sequence of characters (including the empty sequence).

The matching should cover the entire input string (not partial).

The function prototype should be:
bool isMatch(const char *s, const char *p)

Some examples:
isMatch("aa","a") → false
isMatch("aa","aa") → true
isMatch("aaa","aa") → false
isMatch("aa", "*") → true
isMatch("aa", "a*") → true
isMatch("ab", "?*") → true
isMatch("aab", "c*a*b") → false

思路一:遞迴求解。雖然已經把重複出現的*過濾,不過超時了

class Solution {
public:
    bool isMatch(const char *s, const char *p) {
        if(s == NULL || p == NULL)
            return false;
        return isValid(s, p);
    }
    
    bool isValid(const char *s, const char *p)
    {
        if(*p == '\0')
            return *s == '\0';
            
        if(*p == '?')
            return isValid(s+1, p+1);
        else if(*p != '*')
        {
            if(*p == *s)
                return isValid(s+1, p+1);
            else
                return false;
        }
        else
        {
            p++;
            while(*p == '*')
                p++;

            while(*s != '\0')
            {
                if(isValid(s, p))
                    return true;
                s++;
            }
            return isValid(s, p);
        }
    }
};
思路二:動態規劃法

設定狀態量H[pn+1][sn+1]。H[i][j]表示p的前i個字元能否匹配成功s的前j個字元。

遞推關係:如果H[i-1][j-1]=1,若p[i]='?'或者p[i]==s[j],那麼H[i][j]為1;若p[i]='*',那麼H[i][j-1]到H[i][sn]都為1。

初始條件:H[0][0]=1。

注意:必須要提前把不可能匹配的情況排除,否則會超時。當p串中非*字元的個數大於0且少於s串的字元個數時,匹配不可能成功。

class Solution {
public:
    bool isMatch(const char *s, const char *p) {
        if(s == NULL || p == NULL)
            return false;
            
        //計數:記錄p串的字元個數(pn)、s串的字元個數(sn)、p串中*的個數(stars)
        const char *p1;
        p1 = p;
        int stars = 0;
        while(*p1 != 0)
        {
            if(*p1 == '*')
                stars++;
            p1++;
        }
        int pn = p1 - p;
        
        p1 = s;
        while(*p1 != 0)
            p1++;
        int sn = p1 - s;
        
        if(pn == stars && stars > 0) //若p串中只有*,一定匹配
            return true;
        if(pn - stars > sn) //若p串中非*字元的個數多於s串,不可能匹配
            return false;
        
        int H[pn+1][sn+1];
        memset(H,0 ,sizeof(H));

        H[0][0] = 1;
        for(int i=1;i<=pn;i++)
        {
            if(p[i-1] != '*')
                break;
            H[i][0] = 1;
        }
        
        for(int i=1;i<=pn;i++)
        {
            for(int j=1;j<=sn;j++)
            {
                if(H[i-1][j-1] == 1)
                {
                    if(p[i-1] == '?' || p[i-1] == s[j-1])
                    {
                        H[i][j] = 1;
                    }
                    else if(p[i-1] == '*')
                    {
                        for(int k=j-1;k<=sn;k++)
                            H[i][k] = 1;
                    }
                }
            }
        }
        //當p串以*結尾時,與s的匹配有可能提前結束。
        int last;
        for(last=pn;last>=0;last--)
            if(H[last][sn] == 1)
                break;
        last++;
        while(last<=pn && p[last-1] == '*')
            last++;
        if(last == pn+1)
            return true;

        return H[pn][sn] == 1;
    }
};

思路二的優化:上面的動態規劃中對'*'星號的遞推處理不太恰當,使得DP之後還要再處理一下。現在改進一下DP的遞推方法:

在DP的二重迴圈遍歷到H[i][j](即判斷p串的前i項是否匹配s串的前j項)時,

如果有p[i]='?'或者p[i]==s[j] ,並且,H[i-1][j-1]=1,那麼H[i][j]為1。

如果有p[i]='*',並且,H[i-1][j]=1 ,那麼H[i][j]~H[i][sn]都為1。

另外初始情況時,要把*的情況考慮。

class Solution {
public:
    bool isMatch(const char *s, const char *p) {
        if(s == NULL || p == NULL)
            return false;
        
        //計數:記錄p串的字元個數(pn)、s串的字元個數(sn)、p串中*的個數(stars)
        const char *p1;
        p1 = p;
        int stars = 0;
        while(*p1 != 0)
        {
            if(*p1 == '*')
                stars++;
            p1++;
        }
        int pn = p1 - p;
        
        p1 = s;
        while(*p1 != 0)
            p1++;
        int sn = p1 - s;
        
        if(pn == stars && stars > 0)  //若p串中只有*,一定匹配
            return true;
        if(pn - stars > sn)  //若p串中非*字元的個數多於s串,不可能匹配
            return false;
        
        int H[pn+1][sn+1];
        memset(H,0 ,sizeof(H));

        H[0][0] = 1;
        for(int i=1;i<=pn;i++)
        {
            if(p[i-1] != '*')
                break;
			for(int j=0;j<=sn;j++) //當p串開頭就有*
				H[i][j] = 1;
        }
        
        for(int j=1;j<=sn;j++)
        {
            for(int i=1;i<=pn;i++)
            {
					if ((H[i-1][j-1] == 1) && (p[i-1] == '?' || p[i-1] == s[j-1]))
                    {
                        H[i][j] = 1;
                    }
                    else if(H[i-1][j] == 1 && p[i-1] == '*')
                    {
                        for(int k=j;k<=sn;k++)
                            H[i][k] = 1;
                    }
            }
        }
        return H[pn][sn] == 1;
    }
};

思路三:在網上看到的優化方法。記錄前一個*字元的位置,優先進行單字元匹配,當失敗的時候再回來進行通配。

class Solution {
public:
bool isMatch(const char *s, const char *p) {
    if(!s && !p) return true;

    const char *star_p=NULL,*star_s=NULL;

    while(*s)
    {
        if(*p == '?' || *p == *s)
        {
            ++p,++s;
        }else if(*p == '*')
        {
            //skip all continuous '*'
            while(*p == '*') ++p;

            if(!*p) return true; //if end with '*', its match.

            star_p = p; //store '*' pos for string and pattern
            star_s = s;
        }else if((!*p || *p != *s)  && star_p)
        {
            s = ++star_s; //skip non-match char of string, regard it matched in '*'
            p = star_p; //pattern backtrace to later char of '*'
        }else
            return false;
    }

    //check if later part of p are all '*'
    while(*p)
        if(*p++ != '*')
            return false;

    return true;
}
};