1. 程式人生 > >字尾自動機(多個穿的最長公共子串)spoj1812

字尾自動機(多個穿的最長公共子串)spoj1812

SPOJ Problem Set (classical)

1812. Longest Common Substring II

Problem code: LCS2

A string is finite sequence of characters over a non-empty finite set Σ.

In this problem, Σ is the set of lowercase letters.

Substring, also called factor, is a consecutive sequence of characters occurrences at least once in a string.

Now your task is a bit harder, for some given strings, find the length of the longest common substring of them.

Here common substring means a substring of two or more strings.

Input

The input contains at most 10 lines, each line consists of no more than 100000 lowercase letters, representing a string.

Output

The length of the longest common substring. If such string doesn't exist, print "0" instead.

Example

Input:
alsdfkjfjkdsal
fdjskalajfkdsla
aaaajfaaaa

Output:
2

原來的問題想明白了,請看註釋。。。

其實就是是不是能走到的問題。

先將一個串建SAM,然後用後面的串去匹配,對於每一個串,儲存最大值,對於不同的串,更新最小值。

SAM結點多兩個值,ml表示多個串的最小值,nl表示當前串匹配的最大值。將SAM拓撲排序後自底向上更新,設當前狀態為p,如果p->ml>p->nl,則p->ml=p->nl,如果p有父親,設q為p的父親,若p->nl>q->nl,則由SAM的性質可知,p的最大匹配數可以向上傳遞給它的父親

從大神的部落格上看到一句話,感覺挺有用的:出現次數向父親傳遞,接收串數從兒子獲取

#include<iostream>
#include<cstdio>
#include<string>
#include<cstring>
#include<vector>
#include<cmath>
#include<queue>
#include<stack>
#include<map>
#include<set>
#include<algorithm>
using namespace std;
const int maxn=1000100;
const int SIGMA_SIZE=26;
struct SAM_Node
{
    SAM_Node *par,*next[SIGMA_SIZE];
    int len,id,pos;
    int nl,ml;
    SAM_Node(){}
    SAM_Node(int _len)
    {
        par=0;
        len=_len;
        nl=0;
        ml=len;
        memset(next,0,sizeof(next));
    }
};
SAM_Node node[maxn*2],*root,*last;
int SAM_size;
SAM_Node *newSAM_Node(int len)
{
    node[SAM_size]=SAM_Node(len);
    node[SAM_size].id=SAM_size;
    return &node[SAM_size++];
}
SAM_Node *newSAM_Node(SAM_Node *p)
{
    node[SAM_size]=*p;
    node[SAM_size].id=SAM_size;
    return &node[SAM_size++];
}
void SAM_add(int x,int len)
{
    SAM_Node *p=last,*np=newSAM_Node(p->len+1);
    last=np;
    while(p&&!p->next[x])
    {
        p->next[x]=np;
        p=p->par;
    }
    if(!p)
        np->par=root;
    else
    {
        SAM_Node *q=p->next[x];
        if(q->len==p->len+1)
            np->par=q;
        else
        {
            SAM_Node *nq=newSAM_Node(q);
            nq->len=nq->ml=p->len+1;
            q->par=nq;
            np->par=nq;
            while(p&&p->next[x]==q)
            {
                p->next[x]=nq;
                p=p->par;
            }
        }
    }
}
void SAM_init()
{
    SAM_size=0;
    root=last=newSAM_Node(0);
    node[0].pos=0;
}
void SAM_build(char *s)
{
    SAM_init();
    int len=strlen(s);
    for(int i=0;i<len;i++)
        SAM_add(s[i]-'a',i+1);
}
int cnt[maxn*2];
char s[maxn];
SAM_Node *sa[maxn*2];
int main()
{
    scanf("%s",s);
    SAM_build(s);
    int len=strlen(s);
    for(int i=0;i<=len;i++)cnt[i]=0;
    for(int i=0;i<SAM_size;i++)cnt[node[i].len]++;
    for(int i=1;i<=len;i++)cnt[i]+=cnt[i-1];
    for(int i=SAM_size-1;i>=0;i--)sa[--cnt[node[i].len]]=&node[i];
    while(scanf("%s",s)!=EOF)
    {
        len=strlen(s);
        SAM_Node *p=root;
        int num=0;
        for(int i=0;i<len;i++)
        {
            int j=s[i]-'a';
            if(p->next[j]){num++;p=p->next[j];}
            else
            {
                while(p&&!p->next[j])p=p->par;
                if(p){num=p->len+1;p=p->next[j];}
                else num=0,p=root;
            }
            p->nl=max(num,p->nl);
        }
        for(int i=SAM_size-1;i>=0;i--)
        {
            p=sa[i];
            if(p->nl<p->ml)p->ml=p->nl;
            //好像想明白了,為什麼要比較p->par->nl和p->nl了
            //因為子節點的字尾長度要比父節點的長,有可能前面比父節點長的那部分沒有匹配
            //導致父節點的匹配長度比子節點長度長,反過來直接從子節點多出來的部分開始匹配
            //沒有走到父節點的話,父節點的匹配長度就小了,所以要更新
            if(p->par&&p->par->nl<p->nl)
                p->par->nl=p->nl;
            p->nl=0;
        }
    }
    int ans=0;
    for(int i=0;i<SAM_size;i++)
        if(node[i].ml>ans)ans=node[i].ml;
    printf("%d\n",ans);
    return 0;
}