1. 程式人生 > >poj 3415 Common Substrings

poj 3415 Common Substrings

top != accepted lang dep ons pri tro vector

Common Substrings
Time Limit: 5000MS Memory Limit: 65536K
Total Submissions: 12585 Accepted: 4228

Description

A substring of a string T is defined as:

T(i, k)=TiTi+1...Ti+k-1, 1≤ii+k-1≤|T|.

Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):

S = {(i, j
, k) | kK, A(i, k)=B(j, k)}.

You are to give the value of |S| for specific A, B and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

1 ≤ |A

|, |B| ≤ 105
1 ≤ Kmin{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2
aababaa
abaabaa
1
xx
xx
0

Sample Output

22
5

題意:求兩個字符串的長度大於k的子串的數量
思路:其實就是求兩個字符串當中的任意兩個後綴的相同前綴的數量,設lcp是任意兩個後綴的相同前綴的最大長度,那麽這兩個後綴的長度大於K的相同前綴數量為lcp-K+1.
直接枚舉兩個字符串的所有後綴並累加他們的前綴數量復雜度在O(n^2)行不通。
可以利用單調棧。首先把兩個字符串s1,s2進行合並,中間可以加個不同的字符(譬如‘$‘)來區別,即s=s1+‘$‘+s2 ,求s的後綴數組和高度數組。
首先任意兩個後綴,記它們在後綴數組中位置分別為i,j,則它們的高度lcp可以表示為min(lcp[i],lcp[i+1],...,lcp[j-1]),既然如此,可以用單調棧來維護lcp
對於s2的每一個後綴B,考慮所有字典序在B前面的s1的後綴Ai,計算所有Ai與B的相同前綴的數量和,可以用單調棧優化。對於s1中的每個後綴A,計算Bi與A的相同前綴數量和與之前是類似的。
在高度數組當中把高度大於等於K的連續的序列分成一塊,一塊一塊的用單調棧考慮,具體見代碼:

AC代碼:
#define _CRT_SECURE_NO_DEPRECATE
#include<iostream>
#include<algorithm>
#include<vector>
#include<cstring>
#include<string>
#include<cmath>
using namespace std;
const int INF = 0x3f3f3f3f;
const int N_MAX = 100000 + 20;
typedef long long ll;
int n, k;
int Rank[N_MAX*2];
int tmp[N_MAX*2];
int sa[N_MAX * 2];
int lcp[N_MAX*2];
bool compare_sa(const int& i,const int& j) {
    if (Rank[i] != Rank[j])return Rank[i] < Rank[j];
    else {
        int ri = i + k <= n ? Rank[i + k] : -1;
        int rj = j + k <= n ? Rank[j + k] : -1;
        return ri < rj;
    }
}

void construct_sa(const string& S,int *sa) {
    n = S.size();
    for (int i = 0; i <= n;i++) {
        sa[i] = i;
        Rank[i] = i < n ? S[i] : -1;
    }
    for (k = 1; k <= n;k*=2) {
        sort(sa,sa+n+1,compare_sa);
        tmp[sa[0]] = 0;
        for (int i = 1; i <= n;i++) {
            tmp[sa[i]] = tmp[sa[i - 1]] + (compare_sa(sa[i - 1], sa[i]) ? 1 : 0);
        }
        for (int i = 0; i <= n;i++) {
            Rank[i] = tmp[i];
        }
    }
}
void construct_lcp(const string& S,int *sa,int *lcp){
    memset(lcp,0,sizeof(lcp));
    int n = S.length();
    for (int i = 0; i <= n; i++)Rank[sa[i]] = i;
    int h = 0;
    lcp[0] = 0;
    for (int i = 0; i < n; i++) {
        int j = sa[Rank[i] - 1];
        if (h > 0)h--;
        for (; j + h < n&&i + h < n; h++) {
            if (S[j + h] != S[i + h])break;
        }
        lcp[Rank[i] - 1] = h;
    }
}

int K;
string s1, s2, s;
ll top, accumu;
int stack[N_MAX * 2][2];//1存放人數,0存放lcp
ll find_num(int sz1,bool is_s1) {
    ll res = 0; top = accumu = 0;
    for (int i = 0; i < s.size(); i++) {
        if (lcp[i] < K) {
            top = 0; accumu = 0;
        }
        else {
            int size = 0;//統計高度為lcp[i]的人數
            if ((is_s1&&sa[i] < sz1) || (!is_s1&&sa[i] > sz1)) {//如果是s1中的後綴
                size++;
                accumu += lcp[i] - K + 1;
            }
            while (top>0&&lcp[i]<=stack[top-1][0]) {//前面的lcp高度比較高,則要削減高度直到和lcp[i]一樣,這樣之前的那些人的高度也變成lcp[i]了
                top--;
                accumu -= stack[top][1] * (stack[top][0] - lcp[i]);
                size += stack[top][1];
            }
            if (size) {
                stack[top][0] = lcp[i];
                stack[top][1] = size;
                top++;//!!!
            }
            if ((is_s1&&sa[i+1] > sz1) || (!is_s1&&sa[i+1] < sz1)) {//sa[i+1]是s2中的後綴!!!
                res += accumu;
            }
        }
    }
    return res;
}

int main() {
    while (scanf("%d",&K)&&K) {
        cin >> s1 >> s2;
        int sz1 = s1.size();
        int sz2 = s2.size();
        s = s1 + $ + s2;
        construct_sa(s,sa);
        construct_lcp(s,sa,lcp);
        printf("%lld\n",find_num(sz1,1)+find_num(sz1,0));
    }
    return 0;
}



poj 3415 Common Substrings