[HNOI2006]最短母串問題——AC自動機+狀壓+bfs環形處理

阿新 • • 發佈：2018-10-01

long uil rip none 點距還要 sizeof abc c++

Description

給定n個字符串（S1,S2,?,Sn），要求找到一個最短的字符串T，使得這n個字符串（S1,S2,?,Sn）都是T的子串。

32MB

Input

第一行是一個正整數n（n<=12），表示給定的字符串的個數。以下的n行，每行有一個全由大寫字母組成的字符串。每個字符串的長度不超過50.

Output

只有一行，為找到的最短的字符串T。在保證最短的前提下，如果有多個字符串都滿足要求，那麽必須輸出按字典序排列的第一個。

Sample Input

2
ABCD
BCDABC

Sample Output

ABCDABC

Solution

一看是一個AC自動機。

一看是一個狀壓。

一看AC自動機節點再記錄一個has包含的字符串集合。

一看要輸出方案，肯定也要先考慮怎麽弄出最短的長度。

f[i][(1<<n)-1]表示，匹配到AC自動機上的i點，包含的字符串集合為。。。的最短長度。

一看轉移有環，然後無法再加入新的階段，因為會MLE會TLE

所以要環形處理。

一看是一個取min的do

所以可以考慮最短路。

dij，spfa復雜度卡不過。

一看邊權只有1……

BFS大法吼！

長度OK

怎麽處理方案？

ywy_c_asm:

一遍bfs求出最短距離len，然後再一遍dfs找方案。

dfs時，相當於再把bfs的最短路怎麽來的再訪問一遍。如果dis[y]=dis[x]+1那麽可以轉移的，才可以訪問。

還需要知道一個點到終點的最短路。

（反向多起點BFS???不行，或運算不可逆）

我們dfs時就可以實現的。類似樹形dp

然後如果一個點到一個(1<<n)-1狀態的點距離為juli的話，如果有dis[x]+juli[x]==len，那麽，這次選擇的這個y，所填的字符，就是最終答案的一個字符。

直接加入答案字符串。

char從A~Z枚舉。保證第一次搜到的是字典序最小的。

而且一定是連續加入ans字符串。

dfs開頭放上，如果tot==n return；

代碼：

#include<bits/stdc++.h>
using namespace std;
typedef long 
 long ll;
const int N=13;
const int M=600;
const int U=11*50*((1<<12)-1)+100;
const int inf=0x3f3f3f3f;
int n;
char s[55];
struct trie{
    int fail[M],ch[M][26];
    int has[M];
    int cnt;
    void ins(char *s,int l,int id){
        int now=0;
        for(int i=1;i<=l;i++){
            int x=s[i]-‘A‘;
            if(!ch[now][x]) ch[now][x]=++cnt;
            now=ch[now][x];
        }
        has[now]|=(1<<(id-1));
    }
    void build(){
        queue<int>q;
        for(int i=0;i<=25;i++){
            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);
        }
        while(!q.empty()){
            int x=q.front();q.pop();
            has[x]|=has[fail[x]];
            for(int i=0;i<=25;i++){
                if(ch[x][i]){
                    fail[ch[x][i]]=ch[fail[x]][i];
                    q.push(ch[x][i]);
                }
                else ch[x][i]=ch[fail[x]][i];
            }
        }
    }
}ac;
int get(int ptr,int st){
    return ptr*(1<<n)+st;
}
int dis[U];
bool vis[U];
struct node{
    int P,S;
};
queue<node>q;
void bfs(){
    
    memset(dis,0x3f,sizeof dis);
    int str=get(0,0);
    dis[str]=0;
    vis[str]=1;
    node nn;nn.P=0,nn.S=0;
    q.push(nn);
    while(!q.empty()){
        node lp=q.front();q.pop();
        for(int i=0;i<=25;i++){
            int to=ac.ch[lp.P][i];
            int NS=lp.S|ac.has[ac.ch[lp.P][i]];
            int NID=get(to,NS);
            if(!vis[NID]){
                dis[NID]=dis[get(lp.P,lp.S)]+1;
                vis[NID]=1;
                node kk;
                kk.P=to;kk.S=NS;
                q.push(kk);
            }
        }
    }
}
int len;
int tot;
char ans[M];
int juli[U];
void dfs(int ptr,int st){
    
    int now=get(ptr,st);
    juli[now]=inf;
    
    if(tot==len) return;
    
    if(st==(1<<n)-1) {
        juli[now]=0;return;
    }
    for(int i=0;i<=25;i++){
        int to=ac.ch[ptr][i];
        int NS=st|ac.has[to];
        int NID=get(to,NS);
        if(dis[NID]==dis[now]+1){
            if(!vis[NID]){
                vis[NID]=1;
                dfs(to,NS);
            }
            juli[now]=min(juli[now],juli[NID]+1);
            if(dis[now]+juli[now]==len){
                ans[++tot]=‘A‘+i;return;
            }
        }
    }
}
int main(){
    scanf("%d",&n);
    for(int i=1;i<=n;i++){
        scanf("%s",s+1);
        int l=strlen(s+1);
        ac.ins(s,l,i);    
    }
    ac.build();
    bfs();
    len=inf;
    //for(int j=0;j<=(1<<n)-1;j++)
    //for(int i=0;i<=ac.cnt;i++){
    //    cout<<i<<" "<<j<<" : "<<dis[get(i,j)]<<endl;
    //}
    for(int i=0;i<=ac.cnt;i++){
        int id=get(i,(1<<n)-1);
        len=min(len,dis[id]);
    }
    //cout<<" len "<<len<<endl;
    memset(vis,0,sizeof vis);
    memset(juli,0x3f,sizeof juli);
    vis[get(0,0)]=1;
    dfs(0,0);
    //int haha=dfs(0,0);
    for(int i=tot;i>=1;i--){
        printf("%c",ans[i]);
    }
    return 0;
}

但是不夠優美。

為什麽要bfs然後再dfs呢？

bfs也可以求前驅啊！！
bfs時，第一更新到的就是最短路。

如果我們char A~Z，那麽更新到的char

也就叫from[y]，也就是到y這個點所形成的字典序最小字符串最後一個字符。

記錄from，pre（也就是前驅）

bfs後，先找到len

再把所有f[i][(1<<n)-1]的字符找出來，cmp一下。

反正復雜度不超過600*600

代碼：

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N=13;
const int M=600;
const int U=11*50*((1<<12)-1)+100;
const int inf=0x3f3f3f3f;
int n;
char s[55];
struct trie{
    int fail[M],ch[M][26];
    int has[M];
    int cnt;
    void ins(char *s,int l,int id){
        int now=0;
        for(int i=1;i<=l;i++){
            int x=s[i]-‘A‘;
            if(!ch[now][x]) ch[now][x]=++cnt;
            now=ch[now][x];
        }
        has[now]|=(1<<(id-1));
    }
    void build(){
        queue<int>q;
        for(int i=0;i<=25;i++){
            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);
        }
        while(!q.empty()){
            int x=q.front();q.pop();
            has[x]|=has[fail[x]];
            for(int i=0;i<=25;i++){
                if(ch[x][i]){
                    fail[ch[x][i]]=ch[fail[x]][i];
                    q.push(ch[x][i]);
                }
                else ch[x][i]=ch[fail[x]][i];
            }
        }
    }
}ac;
int get(int ptr,int st){
    return ptr*(1<<n)+st;
}
int dis[U];
bool vis[U];
struct node{
    int P,S;
};
queue<node>q;
int pre[U];
int from[U];
void bfs(){
    memset(dis,0x3f,sizeof dis);
    int str=get(0,0);
    dis[str]=0;
    vis[str]=1;
    pre[str]=-1;//warning!!
    node nn;nn.P=0,nn.S=0;
    q.push(nn);
    while(!q.empty()){
        node lp=q.front();q.pop();
        for(int i=0;i<=25;i++){
            int to=ac.ch[lp.P][i];
            int NS=lp.S|ac.has[ac.ch[lp.P][i]];
            int NID=get(to,NS);
            if(!vis[NID]){
                dis[NID]=dis[get(lp.P,lp.S)]+1;
                vis[NID]=1;
                from[NID]=i+1;//warning!!!!
                pre[NID]=get(lp.P,lp.S);
                node kk;
                kk.P=to;kk.S=NS;
                q.push(kk);
            }
        }
    }
}
int len;
int tot;
char ans[M];
char a[M];
bool fl;
bool cmp(char *a,char *b){//a better than b?
    for(int i=1;i<=len;i++){
        if(a[i]<b[i]) return 1;
        if(a[i]>b[i]) return 0;
    }
}
int main(){
    scanf("%d",&n);
    for(int i=1;i<=n;i++){
        scanf("%s",s+1);
        int l=strlen(s+1);
        ac.ins(s,l,i);    
    }
    ac.build();
    bfs();
    len=inf;
    for(int i=0;i<=ac.cnt;i++){
        int id=get(i,(1<<n)-1);
        len=min(len,dis[id]);
    }
    fl=false;
    //cout<<" len "<<len<<endl;
    for(int i=0;i<=ac.cnt;i++){
        int id=get(i,(1<<n)-1);
        if(dis[id]==len){
            int tmp=len;
            int z=id;
            while(pre[z]!=-1){
                //cout<<z<<endl;
                a[tmp]=‘A‘+(from[z]-1);
                z=pre[z];tmp--;
            }
            if(!fl){
                fl=true;
                memcpy(ans,a,sizeof a);
            }
            else{
                if(cmp(a,ans)) memcpy(ans,a,sizeof a);
            }
        }
    }
    printf("%s",ans+1);
    return 0;
}

但是還不夠優美！！

為什麽bfs之後還要再比較一遍字符串呢？？

bfs中，第一次到達一個(1<<n)-1的點,

這個點就一定是最優解的最後一個節點！！！

因為，bfs分層圖保證了最短。

for char A~Z保證了字典序最優。

直接輸出即可。

代碼：

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N=13;
const int M=600;
const int U=11*50*((1<<12)-1)+100;
const int inf=0x3f3f3f3f;
int n;
char s[55];
struct trie{
    int fail[M],ch[M][26];
    int has[M];
    int cnt;
    void ins(char *s,int l,int id){
        int now=0;
        for(int i=1;i<=l;i++){
            int x=s[i]-‘A‘;
            if(!ch[now][x]) ch[now][x]=++cnt;
            now=ch[now][x];
        }
        has[now]|=(1<<(id-1));
    }
    void build(){
        queue<int>q;
        for(int i=0;i<=25;i++){
            if(ch[0][i]) fail[ch[0][i]]=0,q.push(ch[0][i]);
        }
        while(!q.empty()){
            int x=q.front();q.pop();
            has[x]|=has[fail[x]];
            for(int i=0;i<=25;i++){
                if(ch[x][i]){
                    fail[ch[x][i]]=ch[fail[x]][i];
                    q.push(ch[x][i]);
                }
                else ch[x][i]=ch[fail[x]][i];
            }
        }
    }
}ac;
int get(int ptr,int st){
    return ptr*(1<<n)+st;
}
int dis[U];
bool vis[U];
struct node{
    int P,S;
};
queue<node>q;
int pre[U];
int from[U];
int len;
char ans[M];
void bfs(){
    memset(dis,0x3f,sizeof dis);
    int str=get(0,0);
    dis[str]=0;
    vis[str]=1;
    pre[str]=-1;//warning!!
    node nn;nn.P=0,nn.S=0;
    q.push(nn);
    while(!q.empty()){
        node lp=q.front();q.pop();
        for(int i=0;i<=25;i++){
            int to=ac.ch[lp.P][i];
            int NS=lp.S|ac.has[ac.ch[lp.P][i]];
            int NID=get(to,NS);
            if(!vis[NID]){
                dis[NID]=dis[get(lp.P,lp.S)]+1;
                vis[NID]=1;
                from[NID]=i+1;//warning!!!!
                pre[NID]=get(lp.P,lp.S);
                node kk;
                kk.P=to;kk.S=NS;
                q.push(kk);
                if(NS==(1<<n)-1){
                    int z=NID;
                    while(pre[z]!=-1){
                        ans[++len]=‘A‘+(from[z]-1);
                        z=pre[z];
                    }
                    return;
                }
            }
        }
    }
}

int main(){
    scanf("%d",&n);
    for(int i=1;i<=n;i++){
        scanf("%s",s+1);
        int l=strlen(s+1);
        ac.ins(s,l,i);    
    }
    ac.build();
    bfs();
    for(int i=len;i>=1;i--) printf("%c",ans[i]);
    return 0;
}

總結：

有的時候我們只關心最優答案。

但有的時候我們也關心方案。（畢竟知道方案比較實用）

方案的輸出就要求高了一些。

但是肯定也是在最優答案的基礎上的。

關於路徑轉移，湊字典序最小，經常通過松弛最優解的順序，恰好可以保證松弛路徑就是最小字典序。

本題就是一個很好的例子。

[HNOI2006]最短母串問題——AC自動機+狀壓+bfs環形處理

BZOJ 1195 [HNOI2006]最短母串 (Trie圖+狀壓+bfs最短路)

BZOJ1195 LOJ10061 題目大意：給你$n$個模式串，求一個最短且字典序最小的文字串並輸出這個串，$n<=12,len<=50$ 首先對所有模式串構造$Trie$圖，$Trie$圖的性質和$DP$的性質簡直是完美契合.. 模式串數量很少，考慮狀壓定義$f[x][s]

[HNOI2006]最短母串問題——AC自動機+狀壓+bfs環形處理

long uil rip none 點距還要 sizeof abc c++ Description 給定n個字符串（S1,S2,?,Sn），要求找到一個最短的字符串T，使得這n個字符串（S1,S2,?,Sn）都是T的子串。 32MB Input 第一行是一個正整數n

BZOJ1195[HNOI2006]最短母串——AC自動機+BFS+狀態壓縮

一行 AC urn printf 最少步數 ron 題意 mes 大寫題目描述給定n個字符串（S1,S2,„,Sn），要求找到一個最短的字符串T，使得這n個字符串（S1,S2,„,Sn）都是T的子串。輸入第一行是一個正整數n

[HNOI2006]最短母串問題 AC自動機

bit strlen 出現 spl ace 第一次 gis add 一個題面：洛谷題解：　　如果我們對這些小串建出AC自動機，那麽我們所求的大串就是要求滿足遍歷過所有AC自動機上的葉子節點，且經過步數最少的串。如果有多個步數相同的串，要輸出字典序最小的串。　　

[bzoj1195][HNOI2006]最短母串_動態規劃_狀壓dp

字典數據 n) 求一個表示 n! 規劃 esp zoj 最短母串 bzoj-1195 HNOI-2006 題目大意：給一個包含n個字符串的字符集，求一個字典序最小的字符串使得字符集中所有的串都是該串的子串。註釋：$1\le n\le 12$，$1\le max l

BZOJ1195 HNOI2006最短母串（狀壓dp）

　　按照子串出現的先後考慮。令f[i][j]為已經出現的字串集合為i，最後一個出現的字串為j時的最短串長，預處理一下任意兩個串的最長重疊長度，轉移顯然。有點麻煩的是字典序，強行增加程式碼難度。　　另一個比較簡單的做法是上AC自動機，建出來後類似地令f[i][j]為已經出現的字串集合為i，在自動機上點j時的

【刷題】BZOJ 1195 [HNOI2006]最短母串

getchar() n) init double gist c++ 超過字典序 ble Description 給定n個字符串（S1,S2,?,Sn），要求找到一個最短的字符串T，使得這n個字符串（S1,S2,?,Sn）都是T的子串。 Input 第一行是一個正整數n（n

【狀態壓縮dp】1195: [HNOI2006]最短母串

ring output 之前狀態壓縮 ems cout 長度 html 字典一個清晰的思路就是狀壓dp；不過也有AC自動機+BFS的做法 Description 給定n個字符串（S1,S2,„,Sn），要求找到一個最短的字符串T，使得這n個字

P2322 [HNOI2006]最短母串問題

ostream nod div noi 重復 tle 子串 pri math 傳送門看到題面肯定先搞個AC自動機考慮一位一位填字符那麽在自動機上就是一位一位匹配考慮什麽時候包含了所有子串顯然是經過了所有的結束標記（當然包括fail上的）最多只有11個

BZOJ1195: [HNOI2006]最短母串（Trie圖，搜尋）

Description 給定n個字串（S1,S2,„,Sn），要求找到一個最短的字串T，使得這n個字串（S1,S2,„,Sn）都是T的子串。 Input 第一行是一個正整數n（n<=12），表示給定的字串的個數。以下的n行，每行有一個全由大寫字母組成的字串。每

bzoj 1195: [HNOI2006]最短母串

bool string class pan urn make second style efi MEL卡空間幹嘛。。。不過代碼應該沒有問題 /* 建立AC自動機然後按照字典序跑bfs 直到找到組合要求的字符串 */ #inc

洛谷P2322 最短母串問題 [HNOI2006] AC自動機

style org pan 指向 https 之前 trie圖 pro tps 正解:AC自動機+最短路解題報告: 傳送門! 這題之前考試考到辣,,,我連題目都沒看懂這種傻逼事兒就不要說了QAQ 然後就港正解辣首先這題可以用dp做?等下寫但是一般來說看到這

[BZOJ 1195] 最短母串

limits display 定義 limit set www 選擇 -s oid Link:https://www.lydsy.com/JudgeOnline/problem.php?id=1195 Solution: 看到數據範圍n<=12，就要往狀壓DP上想

bzoj1195 最短母串

spa 路徑 cstring scan queue == namespace span ret 狀態壓 1 #include<queue> 2 #include<cstdio> 3 #include<cstring>

[bzoj1195][DP]最短母串

Description 給定n個字串（S1,S2,„,Sn），要求找到一個最短的字串T，使得這n個字串（S1,S2,„,Sn）都是T的子串。 Input 第一行是一個正整數n（n<=12），表示給定的字串的個數。以下的n行，每行有一個全由大寫字母組

hdu 3001 Travelling 經過所有點(最多兩次)的最短路徑三進制狀壓dp

所有 sin 狀態三進制狀壓dp math 移位 sizeof bits 數據題目鏈接題意給定一個$N$個點的無向圖，求從任意一個點出發，經過所有點的最短路徑長度（每個點至多可以經過兩次）。思路狀態表示、轉移及大體思路與 poj 3311 Hie with

BZOJ1559 [JSOI2009]密碼【AC自動機 + 狀壓dp】

包含關系 || fail queue string ron 密碼 sam lse 題目鏈接 BZOJ1559 題解考慮到這是一個包含子串的問題，而且子串非常少，我們考慮$AC$自動機上的狀壓$dp$ 設$f[i][j][s]$表示長度為$i$的串，匹配到了

hdu 4758 AC自動機+狀壓

這道題卡了好幾天，然後發現狀態寫錯的，dp[i][j][k][x]表示走到（i,j）點在AC自動機上第K個點狀態為X的步數，有狀態K是因為要求必須包含這兩種狀態，有一點不怎麼明白，這道題應該是能用到這兩個字串的方案數，比如說用2.5個第一種和1個第二種這種應該也是可以的。要不然最後統計答

HDU 2825 AC自動機+狀壓DP

這道題是說給定m個單詞，求最少包含q個單詞，長度為n的單詞的種類。這道題我們先對單詞建一個Trie，然後在樹上dp，dp[i][j][k]表示長度為i在樹上的第j個節點，使用了第K種方案的結果，第K種方案的意思是K的二進位制中為1的部分表示使用過了，為0的部分表示沒有使用過，最後將長度為

字串 - AC自動機 - 狀壓dp

題目大意：給定m個01串，問有多少長度為2n的反迴文01串，包含這m個串。一個01串被稱為反迴文的，當且僅當 ∀ i

[HNOI2006]最短母串問題——AC自動機+狀壓+bfs環形處理

Description

Input

Output

Sample Input

Sample Output

Solution

相關推薦