1. 程式人生 > >POJ 2778:DNA Sequence(AC自動機+矩陣快速冪)

POJ 2778:DNA Sequence(AC自動機+矩陣快速冪)

DNA Sequence
Time Limit: 1000MS Memory Limit: 65536K
Total Submissions: 6232 Accepted: 2213

Description

It's well known that DNA Sequence is a sequence only contains A, C, T and G, and it's very useful to analyze a segment of DNA Sequence,For example, if a animal's DNA sequence contains segment ATC then it may mean that the animal may have a genetic disease. Until now scientists have found several those segments, the problem is how many kinds of DNA sequences of a species don't contain those segments. 

Suppose that DNA sequences of a species is a sequence that consist of A, C, T and G,and the length of sequences is a given integer n. 

Input

First line contains two integer m (0 <= m <= 10), n (1 <= n <=2000000000). Here, m is the number of genetic disease segment, and n is the length of sequences. 

Next m lines each line contain a DNA genetic disease segment, and length of these segments is not larger than 10. 

Output

An integer, the number of DNA sequences, mod 100000.

Sample Input

4 3
AT
AC
AG
AA

Sample Output

36

題意:給定m個DNA片段(A,C,G,T),求出長度為n的所有DNA中不含這些片段的有多少
(0 <= m <= 10, 1 <= n <=2000000000)

原始碼:

#include<iostream>
using namespace std;

const int KIND=4;
const int MAX=105;

struct TrieNode
{
	bool unsafe;
	int index;
	TrieNode *fail;
	TrieNode *next[KIND];
};

TrieNode memory[MAX];
int allocp;
TrieNode *q[MAX];
int m,c2i[100];
int n;
//n次的矩陣中,dp[i][j]表示從i狀態到j狀態之間連線n個字元,有dp[i][j]種安全的組合
__int64 g[MAX][MAX],dp[MAX][MAX];

TrieNode *CreateTrieNode()
{
	TrieNode *p=&memory[allocp];
	p->unsafe=false;
	p->index=allocp;
	allocp++;
	p->fail=NULL;
	memset(p->next,0,sizeof(p->next));
	return p;
}

void InsertTrieNode(TrieNode *pRoot,char s[])
{
	TrieNode *p=pRoot;
	int i=0;
	while(s[i])
	{
		int k=c2i[s[i]];
		if(p->next[k]==NULL)
			p->next[k]=CreateTrieNode();
		i++;
		p=p->next[k];
	}
	p->unsafe=true;
}

void Build_AC_Automation(TrieNode *pRoot)
{
	int head=0,tail=0,i;
	TrieNode *p;
	q[tail++]=pRoot;
	pRoot->fail=NULL;
	while(head!=tail)
	{
		p=q[head++];
		for(i=0;i<KIND;i++)
			if(p->next[i]!=NULL)
			{
				if(p==pRoot)
					p->next[i]->fail=pRoot;
				else
				{
					p->next[i]->fail=p->fail->next[i];
					if(p->next[i]->fail->unsafe)
						p->next[i]->unsafe=true;
				}
				q[tail++]=p->next[i];
			}
			else
			{
				if(p==pRoot)
					p->next[i]=pRoot;
				else
					p->next[i]=p->fail->next[i];
			}
	}
}

void MatrixMul(__int64 a[][MAX],__int64 b[][MAX],int sz)
{
	__int64 tmp[MAX][MAX]={0};
	int i,j,k;
	for(i=0;i<sz;i++)
		for(j=0;j<sz;j++)
			for(k=0;k<sz;k++)
			{
				tmp[i][j] += a[i][k]*b[k][j]; //兩個<100000的數相乘會超過int,所以要用64位
				if(tmp[i][j] >=100000)
					tmp[i][j] %= 100000;
			}
	for(i=0;i<sz;i++)
		for(j=0;j<sz;j++)
			a[i][j]=tmp[i][j];
}

void MatrixPow(__int64 t[][MAX],__int64 a[][MAX],int sz,int n)
{
	while(n>0)
	{
		if(1&n)
			MatrixMul(t,a,sz);
		MatrixMul(a,a,sz);
		n >>= 1;
	}
}

int main()
{
	int i,j,k;
	char word[15];
	TrieNode *pRoot;
	c2i['A']=0;   c2i['C']=1;
	c2i['G']=2;   c2i['T']=3;
	while(cin>>m>>n)
	{
		allocp=0;
		pRoot=CreateTrieNode();
		for(i=0;i<m;i++)
		{
			cin>>word;
			InsertTrieNode(pRoot,word);
		}
		Build_AC_Automation(pRoot);
		memset(g,0,sizeof(g));
		for(i=0;i<allocp;i++) //構建矩陣
			for(j=0;j<KIND;j++)
			{
				TrieNode *tmp=memory[i].next[j];
				if(memory[i].unsafe==false && tmp->unsafe==false)//要安全的
				   g[i][tmp->index]++;
			}
		//初始化dp為單位矩陣
		for(i=0;i<allocp;i++)
			for(j=0;j<allocp;j++)
			{
				if(i==j)  dp[i][j]=1;
				else dp[i][j]=0;
			}
		MatrixPow(dp,g,allocp,n);
		//cout<<n<<endl;
		__int64 ans=0;
		for(i=0;i<allocp;i++)
			ans += dp[0][i];
		printf("%I64d\n",ans%100000);
	}
	return 0;
}