1. 程式人生 > >HDU 5769 Substring(字尾陣列)

HDU 5769 Substring(字尾陣列)

題面:

Substring

Time Limit: 2000/1000 MS (Java/Others)    Memory Limit: 65536/65536 K (Java/Others)
Total Submission(s): 539    Accepted Submission(s): 226


Problem Description ?? is practicing his program skill, and now he is given a string, he has to calculate the total number of its distinct substrings.
But ?? thinks that is too easy, he wants to make this problem more interesting.
?? likes a character X very much, so he wants to know the number of distinct substrings which contains at least one X.
However, ?? is unable to solve it, please help him.
Input The first line of the input gives the number of test cases T;T test cases follow.
Each test case is consist of 2 lines:
First line is a character X, and second line is a string S.
X is a lowercase letter, and S contains lowercase letters(‘a’-‘z’) only.

T<=30
1<=|S|<=10^5
The sum of |S| in all the test cases is no more than 700,000.
Output For each test case, output one line containing “Case #x: y”(without quotes), where x is the test case number(starting from 1) and y is the answer you get for that case.

Sample Input 2 a abc b bbb
Sample Output Case #1: 3 Case #2: 3 Hint
In first case, all distinct substrings containing at least one a: a, ab, abc. In second case, all distinct substrings containing at least one b: b, bb, bbb.
Author FZU
Source

題意:

    給定一個字串,問該字串中包含某一字元的不重複子串的數量。

解題:

   字尾陣列的大致原理是懂的,自己寫是寫不出來的,勉強算是會用吧。這題可以先看一下,如何求某字串的不重複子串的數量。

   ans=sigma(length-sa[i]-height[i]),如何理解呢,sa[i]表示的是字典序排名為i的字尾,它的起始位置,length-sa[i],即為排名i的字尾的長度,height[i],是排名為i的串和它之前那個串的公共字首長度,故length-sa[i]-height[i],(減去和字典序前一個的公共字首)即為不重複子串數量。

   而針對這題,需要包含特殊字元,故預先找到各個字尾中,離該字尾左側最近的特殊字元的位置,綜合該字尾不重複子串長度,取得最值。

程式碼:

#include <iostream>
#include <string>
#include <cstring>
#include <cstdio>
#define rep(i,n) for(int i = 0;i < n; i++)
#define sz 100005
#define LL long long
using namespace std;
char ori[sz];
int rk[sz],sa[sz],height[sz],w[sz],wa[sz],res[sz],pos[sz];
int min(int a,int b)
{
	return a<b?a:b;
}
int max(int a,int b)
{
	return a>b?a:b;
}
void getSa (int len,int up) 
{
	int *k = rk,*id = height,*r = res, *cnt = wa;
	rep(i,up) cnt[i] = 0;
	rep(i,len) cnt[k[i] = w[i]]++;
	rep(i,up) cnt[i+1] += cnt[i];
	for(int i = len - 1; i >= 0; i--) 
		sa[--cnt[k[i]]] = i;
	int d = 1,p = 0;
	while(p < len)
	{
		for(int i = len - d; i < len; i++) id[p++] = i;
		rep(i,len)	if(sa[i] >= d) id[p++] = sa[i] - d;
		rep(i,len) r[i] = k[id[i]];
		rep(i,up) cnt[i] = 0;
		rep(i,len) cnt[r[i]]++;
		rep(i,up) cnt[i+1] += cnt[i];
		for(int i = len - 1; i >= 0; i--)
			sa[--cnt[r[i]]] = id[i];
		swap(k,r);
		p = 0;
		k[sa[0]] = p++;
		rep(i,len-1) 
		{
			if(sa[i]+d < len && sa[i+1]+d <len &&r[sa[i]] == r[sa[i+1]]&& r[sa[i]+d] == r[sa[i+1]+d])
				k[sa[i+1]] = p - 1;
			else k[sa[i+1]] = p++;
		}
		if(p >= len) return ;
		d *= 2,up = p, p = 0;
	}
}
void getHeight(int len) 
{
	rep(i,len) rk[sa[i]] = i;
	height[0] =  0;
	for(int i = 0,p = 0; i < len - 1; i++) 
	{
		int j = sa[rk[i]-1];
		while(i+p < len&& j+p < len&& w[i+p] == w[j+p]) p++;
		height[rk[i]] = p;
		p = max(0,p - 1);
	}
}
int getSuffix(char s[]) 
{
	int len = strlen(s),up = 0;	
	for(int i = 0; i < len; i++) 
	{
		w[i] = s[i];
		up = max(up,w[i]);
	}
	w[len++] = 0;
	getSa(len,up+1);
	getHeight(len);
	return len;
}
int main()
{
    int t,l;
	char c;
	LL ans;
	scanf("%d",&t);
	for(int ix=1;ix<=t;ix++)
	{
	   ans=0;
       scanf(" %c",&c);
	   scanf("%s",ori);
       getSuffix(ori);
	   l=strlen(ori);
	   int j;
	   for(pos[l]=l,j=l-1;j>=0;j--)
		   if(ori[j]==c)
			   pos[j]=j;
	       else
			   pos[j]=pos[j+1];
	   for(int i=1;i<=l;i++)
          ans=ans+min(l-sa[i]-height[i],l-pos[sa[i]]);
	   printf("Case #%d: %lld\n",ix,ans);
	}
	return 0;
}