1. 程式人生 > >字符串哈希專題

字符串哈希專題

esp cpp tdi mod sig bstr 不同 兩種 維護


layout: post
title: 字符串哈希專題
author: "luowentaoaa"
catalog: true
tags:
mathjax: true
- 字符串


傳送門

摘要 哈希進制轉換

題意

一個字符串分成長度為N的字串。且不同的字符不會超過NC個。問總共有多少個不同的子串

思路

以nc作為進制,把一個子串化為這個進制下的數,再用哈希判斷

#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
    ull SEED,MOD;
    vector<ull>p,h;
    Hash(){}
    Hash(const char* s,const int& seed_index,const int& mod_index){
        SEED=Seed_Pool[seed_index];
        MOD=Mod_Pool[mod_index];
        int n=strlen(s);
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
    }
    ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
    ull substr(int l,int m){return get(l,l+m);}
};
set<ull>st;
char s[16000005];
bool a[maxn*2];
int ha[256];
int main()
{
    int n,nc;
    cin>>n>>nc>>s;
    int cnt=0;
    memset(ha,-1,sizeof(ha));
    memset(a,false,sizeof(a));
    int len=strlen(s);
    for(int i=0;i<len;i++){
        if(ha[s[i]]==-1)ha[s[i]]=cnt++;
    }
    int res=0;
    for(int i=0;i+n<=len;i++){
        int sum=0;
        for(int j=i;j<i+n;j++){
            sum*=nc;
            sum+=ha[s[j]];
        }
        if(!a[sum])res++,a[sum]=true;
    }
    cout<<res<<endl;
    return 0;
}

C.POJ - 2774 Long Long Message

兩個字符串最長子串長度

題意

求兩個字符串的最長子串長度

題解

二分長度,然後把字符串A的長度mid的哈希值塞入數組,再在字符串B的數組中二分查找長度為mid

復雜度為O(logn×N×logN)

也可以直接用後綴數組的height

#include    <cstring>
#include   <iostream>
#include  <algorithm>
#include     <string>
#include     <vector>
#include        <set>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e6+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull seed=19260817;
struct Hash{
    vector<ull>p,h;
    Hash(){}
    Hash(const string& s){
        int n=s.length();
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*seed;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*seed+s[i-1]);
    }
    ull get(int l,int r){return(h[r]-h[l]*p[r-l]);}
    ull substr(int l,int m){return get(l,l+m);}
}A,B;
int n,m;
bool ok(int mid){
    vector<ull>ve;
    for(int i=0;i<=n-mid;i++){
        ve.push_back(A.substr(i,mid));
    }
    sort(ve.begin(),ve.end());
    for(int i=0;i<=m-mid;i++){
        if(binary_search(ve.begin(),ve.end(),B.substr(i,mid))){
            return true;
        }
    }
    return false;
}
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    string a,b;
    cin>>a>>b;
    n=a.length(),m=b.length();
    if(n>m){swap(a,b);swap(n,m);}
    A=Hash(a);B=Hash(b);
    int l=0,r=n;
    int haha=0;
    while(r-l>=0){
        int mid=(r+l)>>1;
       // cout<<mid<<endl;
        if(ok(mid)){
            haha=mid;
            l=mid+1;
        }
        else r=mid-1;
    }
    cout<<haha<<endl;
    return 0;
}

D.URAL - 1989 Subpalindromes

線段樹/樹狀數組和哈希應用 判斷回文

題意

給定一個字符串(長度<=100000),有兩個操作。 1:改變某個字符。 2:判斷某個子串是否構成回文串。

題解

把字符串正向,方向插入線段樹和樹狀數組中,然後單點修改,區間查值, 如果正向和方向值一樣,那就是回文了

//線段樹
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
#define lson (x<<1)
#define rson ((x<<1)|1)
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn];
string s;
struct node{
    int l,r;
    ull sum1,sum2;
}my[maxn<<2];
int n;
void pushup(int x){
    my[x].sum1=my[lson].sum1+my[rson].sum1;
    my[x].sum2=my[lson].sum2+my[rson].sum2;
}
void build(int x,int l,int r){
    my[x].l=l;my[x].r=r;
    if(my[x].l==my[x].r){
        my[x].sum1=bit[l-1]*(s[l-1]-'a');
        my[x].sum2=bit[n-l]*(s[l-1]-'a');
        return;
    }
    int mid=(l+r)>>1;
    build(lson,l,mid);
    build(rson,mid+1,r);
    pushup(x);
}
ull one,two;
void update(int x,int pos,int val){
    if(my[x].l==my[x].r){
        my[x].sum1=bit[pos-1]*val;
        my[x].sum2=bit[n-pos]*val;
        return;
    }
    int mid=(my[x].l+my[x].r)>>1;
    if(pos<=mid)
        update(lson,pos,val);
    else
        update(rson,pos,val);
    pushup(x);
}
void query(int x,int l,int r){
    if(my[x].l>=l&&my[x].r<=r){
        one+=my[x].sum1;
        two+=my[x].sum2;
        return;
    }
    int mid=(my[x].l+my[x].r)>>1;
    if(l<=mid)query(lson,l,r);
    if(r>mid)query(rson,l,r);
}
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    cin>>s;n=s.length();int t;
    cin>>t;
    bit[0]=1;
    for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
    build(1,1,n);
    while(t--){
        char s[50];
        cin>>s;
        if(s[0]=='p'){
            int x,y;
            cin>>x>>y;
            one=0;two=0;
            query(1,x,y);
            if((x-1)>(n-y))two*=bit[(x-1)-(n-y)];
            else one*=bit[(n-y)-(x-1)];
            if(one==two)cout<<"Yes"<<endl;
            else cout<<"No"<<endl;
        }
        else{
            int x;char ch;
            cin>>x>>ch;
            update(1,x,ch-'a');
        }
    }
    return 0;
}
//樹狀數組
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn],c[maxn][2];
int n,len;
inline int lowbit(int x){
    return x&(-x);
}
void update(int x,ull val,int flag){
    while(x<maxn){
        c[x][flag]+=val;
        x+=lowbit(x);
    }
}
ull sum(int x,int flag){
    ull cnt=0;
    while(x){
        cnt+=c[x][flag];
        x-=lowbit(x);
    }
    return cnt;
}
string s;
string str;
int main()
{
    std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);
    bit[0]=1;
    for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
    while(cin>>str){
        len=str.length();
        memset(c,0,sizeof(c));
        for(int i=0;i<len;i++){
            update(i+1,(str[i]-'a'+1)*bit[i],0);
            update(i+1,(str[len-i-1]-'a'+1)*bit[i],1);
        }
        cin>>n;
        int l,r;
        while(n--){
            cin>>s;
            if(s[0]=='p'){
                cin>>l>>r;
                ull a=(sum(r,0)-sum(l-1,0))*bit[len-r];
                ull b=(sum(len-l+1,1)-sum(len-r,1))*bit[l-1];
                if(a==b)cout<<"Yes"<<endl;
                else cout<<"No"<<endl;
            }
            else{
                int w;
                char ch;
                cin>>w>>ch;
                update(w,(ch-str[w-1])*bit[w-1],0);
                update(len-w+1,(ch-str[w-1])*bit[len-w],1);
                str[w-1]=ch;
            }
        }
    }
    return 0;
}

E.CodeForces - 580E Kefa and Watch

線段樹+哈希

題意

給你一個長度為n的字符串s,有兩種操作:

1 L R C : 把s[l,r]全部變為c;

2 L R d : 詢問s[l,r]是否是周期為d的重復串。

題解

n最大為1e5,且m+k最大也為1e5,這就要求操作1和操作2都要采用logn的算法,所以用線段樹.

對於更新操作,使用區間更新就可解決。

主要是如何在logn的時間內完成詢問操作.

我們采用線段樹維護hash值的方法.

結合於類似KMP的性質,我們發現,字符串[l,r]有長度為w的循環節,只需要使得[l,r-w]=[l+w,r]即可。證明過程看這裏

這題的hash不同於普通的字符串hash,因為涉及到動態修改,所以需要預先處理出所有的base,在修改的時候直接用.

#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull seed=19260817;
//ull seed=10;
ull s[maxn];
ull fs[maxn];
char ss[maxn];
void init(){
    s[0]=1;fs[0]=1;
    for(int i=1;i<maxn;i++)s[i]=(s[i-1]*seed)%mod;
    for(int i=1;i<maxn;i++)fs[i]=(fs[i-1]+s[i])%mod;
   /* for(int i=0;i<5;i++){
        cout<<i<<"\t"<<s[i]<<"\t"<<fs[i]<<endl;
    }*/
}
struct node{
    int l,r;
    int lazy;
    int ok;
    ull num;
}my[maxn<<2];
void pushup(int x){
    int mid=(my[x].l+my[x].r)>>1;
   // printf("x==%d x<<1=%d x<<1|1=%d  my[x<<1].num=%llu my[x<<1|1].num=%llu s==%d   \n",x,x<<1,x<<1|1,my[x<<1].num,my[(x<<1)|1].num,s[my[x].r-mid]);
    my[x].num=(my[x<<1].num*s[my[x].r-mid]+my[(x<<1|1)].num)%mod;
   // cout<<"x=="<<x<<" my[x].num"<<my[x].num<<endl;
}
void pushdown(int x){
    if(my[x].lazy){
        int mid=(my[x].l+my[x].r)>>1;
        my[x<<1].lazy=my[(x<<1)|1].lazy=my[x].lazy;
        my[x<<1].ok=my[x<<1|1].ok=my[x].ok;
        my[x<<1].num=(fs[mid-my[x].l]*my[x].ok)%mod;
        my[(x<<1)|1].num=(fs[my[x].r-mid-1]*my[x].ok)%mod;
        my[x].lazy=0;
    }
}
void build(int x,int l,int r){
    my[x].l=l;my[x].r=r;my[x].lazy=0;
    if(my[x].l==my[x].r){
        my[x].num=ss[l-1]-'0';
       // printf("my[%d].num=%d\n",x,my[x].num);
        return;
    }
    int mid=(l+r)>>1;
    build(x<<1,l,mid);
    build((x<<1)|1,mid+1,r);
    pushup(x);
}
void update(int x,int l,int r,int k){
    if(my[x].l>=l&&my[x].r<=r){
        my[x].num=(fs[my[x].r-my[x].l]*k)%mod;
        my[x].ok=k;
        my[x].lazy=1;
        return;
    }
    pushdown(x);
    int mid=(my[x].l+my[x].r)>>1;
    if(l<=mid)update(x<<1,l,r,k);
    if(r>mid)update(x<<1|1,l,r,k);
    pushup(x);
}
ull query(int x,int l,int r){
    if(my[x].l>=l&&my[x].r<=r)return my[x].num;
    pushdown(x);
    int mid=(my[x].l+my[x].r)>>1;
    if(l>mid)return query(x<<1|1,l,r);
    else if(r<=mid)return query(x<<1,l,r);
    else{
        ull t1=query(x<<1,l,r);
        ull t2=query(x<<1|1,l,r);
        int k=min(r,my[x].r)-mid;
        return (t1*s[k]+t2)%mod;
    }
    pushup(x);
}
void pri(int n){
    for(int i=1;i<=n*4;i++){
        printf("my[%d].num=%llu\n",i,my[i].num);
    }
}
int main()
{
   /* std::ios::sync_with_stdio(false);
    std::cin.tie(0);
    std::cout.tie(0);*/
    init();
    int n,q,t;
    scanf("%d%d%d",&n,&q,&t);
    q+=t;
    scanf("%s",ss);
    int len=strlen(ss);
    build(1,1,len);
   // pri(len);
    for(int i=0;i<q;i++){
        int op,l,r,d;
        scanf("%d%d%d%d",&op,&l,&r,&d);
        if(op==1)update(1,l,r,d);
        else {
            if(d==r-l+1){
                printf("YES\n");
                continue;
            }
            ull one=query(1,l,r-d);
         //   cout<<"one="<<one<<endl;
            ull two=query(1,l+d,r);
           // cout<<"two="<<two<<endl;
            if(one==two)printf("YES\n");
            else printf("NO\n");
        }
    }

    return 0;
}

H.HDU - 1686 Oulipo

哈希水題,求模式串出現次數

#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
    ull SEED,MOD;
    vector<ull>p,h;
    Hash(){}
    Hash(const string& s,const int& seed_index,const int& mod_index){
        SEED=Seed_Pool[seed_index];
        MOD=Mod_Pool[mod_index];
        int n=s.length();
        p.resize(n+1),h.resize(n+1);
        p[0]=1;
        for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
        for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
    }
    ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
    ull substr(int l,int m){return get(l,l+m);}
};
int main()
{
    int t;
    ios::sync_with_stdio(false);
    cin>>t;
    while(t--){
        string s;
        cin>>s;
        int n=s.length();
        Hash aa=Hash(s,0,0);
        ull a=aa.substr(0,n);
        cin>>s;
        int nn=s.length();
        aa=Hash(s,0,0);
        //cout<<"aa="<<a<<endl;
        int sum=0;
        for(int i=0;i+n<=nn;i++){
            if(aa.substr(i,n)==a){
                //cout<<aa.substr(i,n)<<endl;
                sum++;
            }
        }
        cout<<sum<<endl;
    }
    return 0;
}

字符串哈希專題