1. 程式人生 > >字符串hash

字符串hash

cout -- 我們 puts del cstring ons sca mark

似乎沒寫過多少字符串hash
今天補一補

字符串hash重要思想就是把字符串看做一個N進制大整數,進行取模後直接比較
這樣子做的優劣很直觀:很快很簡單,也有取模後蜜汁碰撞的風險

對於i位置的hash值,可以這樣求:

    for (int i = 1; i <= n; i++) H[i] = H[i - 1] * p + s[i];

我們要取出子串[l,r]的hash值時,顯然就是\(H[r] - H[l - 1] * p^{r - l + 1}\)

來道【正解SAM】的例題:
最長公共子串
當然對串a建SAM,用串b在上邊匹配就可以了

SAM太深奧了,我們來看看簡單暴力的字符串hash
我們二分長度len,對A串的所有位置的長度為len的hash排序,那B串所有位置長度為len的hash去查找
復雜度O(nlog^2n)【似乎SAM接近O(n)?

#include<iostream>
#include<cstdio>
#include<cmath>
#include<cstring>
#include<algorithm>
#define LL long long int
#define Redge(u) for (int k = h[u]; k; k = ed[k].nxt)
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define ULL unsigned long long int
using namespace
std; const int maxn = 200005,maxm = 100005,INF = 1000000000; inline int read(){ int out = 0,flag = 1; char c = getchar(); while (c < 48 || c > 57){if (c == ‘-‘) flag = -1; c = getchar();} while (c >= 48 && c <= 57){out = (out << 3) + (out << 1) + c - 48; c = getchar();} return
out * flag; } char A[maxn],B[maxn]; int lena,lenb,n; ULL Ha[maxn],Hb[maxn]; ULL b[maxn]; bool check(int len){ n = 0; ULL P = 1; for (int i = 1; i <= len; i++) P *= 27; for (int i = len; i <= lena; i++) b[++n] = Ha[i] - Ha[i - len] * P; sort(b + 1,b + 1 + n); for (int i = len; i <= lenb; i++){ ULL temp = Hb[i] - Hb[i - len] * P; if (b[lower_bound(b + 1,b + 1 + n,temp) - b] == temp) return true; } return false; } int main(){ scanf("%s",A + 1); lena = strlen(A + 1); scanf("%s",B + 1); lenb = strlen(B + 1); for (int i = 1; i <= lena; i++) Ha[i] = Ha[i - 1] * 27 + A[i]; for (int i = 1; i <= lenb; i++) Hb[i] = Hb[i - 1] * 27 + B[i]; int l = 0,r = min(lena,lenb),mid; while (l < r){ mid = l + r + 1 >> 1; if (check(mid)) l = mid; else r = mid - 1; } printf("%d\n",l); return 0; }

BZOJ3207
此題K很小,我們用上hash之後,每個位置就對應一個hash值,問題就轉化為了一個區間內是否存在某個值,用可持續化線段樹就可以了

#include<iostream>
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
#define LL long long int
#define REP(i,n) for (int i = 1; i <= (n); i++)
#define Redge(u) for (int k = h[u],to; k; k = ed[k].nxt)
#define BUG(s,n) for (int i = 1; i <= (n); i++) cout<<s[i]<<‘ ‘; puts("");
#define inf 18446744073709551615UL
#define uLL unsigned long long int
using namespace std;
const int maxn = 100010,maxm = 8000005;
inline int read(){
    int out = 0,flag = 1; char c = getchar();
    while (c < 48 || c > 57) {if (c == ‘-‘) flag = -1; c = getchar();}
    while (c >= 48 && c <= 57) {out = (out << 3) + (out << 1) + c - ‘0‘; c = getchar();}
    return out * flag;
}
int ls[maxm],rs[maxm],sum[maxm],rt[maxn];
int n,m,K,cnt;
int A[maxn],T[maxn];
uLL H[maxn];
void modify(int& u,int pre,uLL l,uLL r,uLL pos){
    u = ++cnt; sum[u] = sum[pre] + 1; ls[u] = ls[pre]; rs[u] = rs[pre];
    if (l == r) return;
    uLL mid = l / 2 + r / 2;
    if (mid >= pos) modify(ls[u],ls[pre],l,mid,pos);
    else modify(rs[u],rs[pre],mid + 1,r,pos);
}
int query(int u,int v,uLL l,uLL r,uLL pos){
    if (l == r) return sum[u] - sum[v];
    uLL mid = l / 2 + r / 2;
    if (mid >= pos) return query(ls[u],ls[v],l,mid,pos);
    else return query(rs[u],rs[v],mid + 1,r,pos);
}
int main(){
    n = read(); m = read(); K = read();
    REP(i,n) A[i] = read();
    REP(i,n) H[i] = H[i - 1] * 107 + A[i];
    uLL P = 1; REP(i,K) P *= 107;
    for (int i = K; i <= n; i++)
        modify(rt[i],rt[i - 1],0,inf,H[i] - H[i - K] * P);
    while (m--){
        int l = read() + K - 1,r = read();
        uLL val = 0;
        for (int i = 1; i <= K; i++) val = val * 107 + read();
        if (query(rt[r],rt[l - 1],0,inf,val)) puts("No");
        else puts("Yes");
    }
    return 0;
}

字符串hash