1. 程式人生 > >HDU - 6096 :String (AC自動機,已知前後綴,匹配單詞,弱資料)

HDU - 6096 :String (AC自動機,已知前後綴,匹配單詞,弱資料)

Bob has a dictionary with N words in it.
Now there is a list of words in which the middle part of the word has continuous letters disappeared. The middle part does not include the first and last character.
We only know the prefix and suffix of each word, and the number of characters missing is uncertain, it could be 0. But the prefix and suffix of each word can not overlap.
For each word in the list, Bob wants to determine which word is in the dictionary by prefix and suffix.
There are probably many answers. You just have to figure out how many words may be the answer.

InputThe first line of the input gives the number of test cases T; T test cases follow.
Each test case contains two integer N and Q, The number of words in the dictionary, and the number of words in the list.
Next N line, each line has a string Wi, represents the ith word in the dictionary (

0&lt;|Wi|&#x2264;100000">0<|Wi|100000 0<|Wi|≤100000 )
Next Q line, each line has two string Pi , Si, represents the prefix and suffix of the ith word in the list (0<|Pi|,|Si|100000,0<|Pi|+|Si|100000 0<|Pi|,|Si|≤100000,0<|Pi|+|Si|≤100000 )
All of the above characters are lowercase letters.
The dictionary does not contain the same words.

Limits
T&#x2264;5">TT≤5
0<N,Q100000 0<N,Q≤100000
Si+Pi500000 ∑Si+Pi≤500000
Wi500000 ∑Wi≤500000
OutputFor each test case, output Q lines, an integer per line, represents the answer to each word in the list.
Sample Input

1
4 4
aba
cde
acdefa
cdef
a a
cd ef
ac a
ce f

Sample Output

2
1
1
0

題意:已知N個單詞,Q次詢問,每次詢問給出pre和suf,統計有多少個單詞的字首為pre,字尾為suf,而且要滿足二者不相交。

思路:我們把詢問建立AC自動機,單詞用來跑AC自動機,跑到了就累計。

        合理建立AC自動機的方式為:每個詢問轉為為 suf+'{'+pre;

        跑AC自動機的方式為: 每個單詞轉化為 S+’{‘+S;

跑的時候如果fail可以走到某個詢問,說明這個詢問是這裡的前後綴。(AC了但是不嚴謹的程式碼)

#include<bits/stdc++.h>
#define rep(i,a,b) for(int i=a;i<=b;i++)
using namespace std;
const int maxn=500010;
char c[maxn],s[maxn],pre[maxn],suf[maxn];
int tot,F[maxn],L[maxn],ch[maxn][27],cnt,pos[maxn];
int N,Q,dep[maxn],sum[maxn],fail[maxn],q[maxn],head,tail;
void insert(int opt){
    int Now=0,len1=strlen(suf+1),len2=strlen(pre+1);
    rep(i,1,len1){
        if(!ch[Now][suf[i]-'a']) ch[Now][suf[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][suf[i]-'a'];
    }
    if(!ch[Now][26]) ch[Now][26]=++cnt,sum[cnt]=0; Now=ch[Now][26];
    rep(i,1,len2){
        if(!ch[Now][pre[i]-'a']) ch[Now][pre[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][pre[i]-'a'];
    }
    pos[opt]=Now; dep[Now]=len1+len2;
}
void buildfail()
{
    head=tail=0;
    for(int i=0;i<=26;i++) if(ch[0][i]) q[++head]=ch[0][i];
    while(tail<head){
        int Now=q[++tail];
        for(int i=0;i<=26;i++){
            if(ch[Now][i]) {
                fail[ch[Now][i]]=ch[fail[Now]][i];
                q[++head]=ch[Now][i];
            }
            else ch[Now][i]=ch[fail[Now]][i];
        }
    }
}
void solve(int B,int len)
{
    int Now=0;
    rep(i,B+1,B+len) Now=ch[Now][c[i]-'a'];
    Now=ch[Now][26];
    rep(i,B+1,B+len){
         Now=ch[Now][c[i]-'a']; int tmp=Now;
         while(dep[tmp]>len)  tmp=fail[tmp]; sum[tmp]++;
    }
}
int main()
{
    int T; scanf("%d",&T);
    while(T--){
        tot=cnt=0;
        memset(fail,0,sizeof(fail));
        memset(ch,0,sizeof(ch));
        scanf("%d%d",&N,&Q);
        rep(i,1,N){
            scanf("%s",s+1);
            L[i]=strlen(s+1); F[i]=tot;
            rep(j,1,L[i]) c[++tot]=s[j]; //儲存單詞
        }
        rep(i,1,Q){
            scanf("%s%s",pre+1,suf+1);
            insert(i);
        }
        buildfail();
        rep(i,1,N) solve(F[i],L[i]);
        for(int i=cnt;i>=1;i--) sum[fail[q[i]]]+=sum[q[i]]; //累加字首和
        rep(i,1,Q) printf("%d\n",sum[pos[i]]);
    }
    return 0;
}

 雖然上面的程式碼AC了,但是我感覺是可以hack掉,應該是資料比較水。 因為一個單詞對一個詢問最多有一個貢獻,而這樣跑下來有的單詞的貢獻可能大於1,所以我們加一個時間戳,保證每個單詞的貢獻最多為1。

#include<bits/stdc++.h>
#define rep(i,a,b) for(int i=a;i<=b;i++)
using namespace std;
const int maxn=500010;
char c[maxn],s[maxn],pre[maxn],suf[maxn];
int tot,F[maxn],L[maxn],ch[maxn][27],cnt,pos[maxn],Laxt[maxn];
int N,Q,dep[maxn],sum[maxn],fail[maxn],q[maxn],head,tail;
void insert(int opt){
    int Now=0,len1=strlen(suf+1),len2=strlen(pre+1);
    rep(i,1,len1){
        if(!ch[Now][suf[i]-'a']) ch[Now][suf[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][suf[i]-'a'];
    }
    if(!ch[Now][26]) ch[Now][26]=++cnt,sum[cnt]=0; Now=ch[Now][26];
    rep(i,1,len2){
        if(!ch[Now][pre[i]-'a']) ch[Now][pre[i]-'a']=++cnt,sum[cnt]=0;
        Now=ch[Now][pre[i]-'a'];
    }
    pos[opt]=Now; dep[Now]=len1+len2;
}
void buildfail()
{
    head=tail=0;
    for(int i=0;i<=26;i++) if(ch[0][i]) q[++head]=ch[0][i];
    while(tail<head){
        int Now=q[++tail];
        for(int i=0;i<=26;i++){
            if(ch[Now][i]) {
                fail[ch[Now][i]]=ch[fail[Now]][i];
                q[++head]=ch[Now][i];
            }
            else ch[Now][i]=ch[fail[Now]][i];
        }
    }
}
void solve(int time,int B,int len)
{
    int Now=0;
    rep(i,B+1,B+len) Now=ch[Now][c[i]-'a'];
    Now=ch[Now][26];
    rep(i,B+1,B+len){
         Now=ch[Now][c[i]-'a']; int tmp=Now;
         while(tmp) {
            if(Laxt[tmp]==time) break;
            Laxt[tmp]=time;//加一個時間戳,保證每個單詞的貢獻最多為1
            if(dep[tmp]<=len) sum[tmp]++;
            tmp=fail[tmp];
         }
    }
}
int main()
{
    int T; scanf("%d",&T);
    while(T--){
        tot=cnt=0;
        memset(fail,0,sizeof(fail));
        memset(ch,0,sizeof(ch));
        memset(Laxt,0,sizeof(Laxt));
        scanf("%d%d",&N,&Q);
        rep(i,1,N){
            scanf("%s",s+1);
            L[i]=strlen(s+1); F[i]=tot;
            rep(j,1,L[i]) c[++tot]=s[j]; //儲存單詞
        }
        rep(i,1,Q){
            scanf("%s%s",pre+1,suf+1);
            insert(i);
        }
        buildfail();
        rep(i,1,N) solve(i,F[i],L[i]);
        rep(i,1,Q) printf("%d\n",sum[pos[i]]);
    }
    return 0;
}