字符串哈希專題
阿新 • • 發佈:2019-01-29
esp cpp tdi mod sig bstr 不同 兩種 維護
layout: post
title: 字符串哈希專題
author: "luowentaoaa"
catalog: true
tags:
mathjax: true
- 字符串
傳送門
A.POJ - 1200 A - Crazy Search
摘要 哈希進制轉換
題意
一個字符串分成長度為N的字串。且不同的字符不會超過NC個。問總共有多少個不同的子串
思路
以nc作為進制,把一個子串化為這個進制下的數,再用哈希判斷
#include<cstdio> #include<iostream> #include<cstring> #include<string> #include<set> #include<vector> using namespace std; typedef long long ll; typedef unsigned long long ull; #define pp pair<int,int> const ll mod=998244353; const int maxn=1e7+50; const ll inf=0x3f3f3f3f3f3f3f3fLL; int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;} int lcm(int a,int b){return a*b/gcd(a,b);} const ull Seed_Pool[]={146527,19260817}; const ull Mod_Pool[]={1000000009,998244353}; struct Hash{ ull SEED,MOD; vector<ull>p,h; Hash(){} Hash(const char* s,const int& seed_index,const int& mod_index){ SEED=Seed_Pool[seed_index]; MOD=Mod_Pool[mod_index]; int n=strlen(s); p.resize(n+1),h.resize(n+1); p[0]=1; for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD; for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD; } ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;} ull substr(int l,int m){return get(l,l+m);} }; set<ull>st; char s[16000005]; bool a[maxn*2]; int ha[256]; int main() { int n,nc; cin>>n>>nc>>s; int cnt=0; memset(ha,-1,sizeof(ha)); memset(a,false,sizeof(a)); int len=strlen(s); for(int i=0;i<len;i++){ if(ha[s[i]]==-1)ha[s[i]]=cnt++; } int res=0; for(int i=0;i+n<=len;i++){ int sum=0; for(int j=i;j<i+n;j++){ sum*=nc; sum+=ha[s[j]]; } if(!a[sum])res++,a[sum]=true; } cout<<res<<endl; return 0; }
C.POJ - 2774 Long Long Message
兩個字符串最長子串長度
題意
求兩個字符串的最長子串長度
題解
二分長度,然後把字符串A的長度mid的哈希值塞入數組,再在字符串B的數組中二分查找長度為mid
復雜度為O(logn×N×logN)
也可以直接用後綴數組的height
#include <cstring> #include <iostream> #include <algorithm> #include <string> #include <vector> #include <set> using namespace std; typedef long long ll; typedef unsigned long long ull; #define pp pair<int,int> const ll mod=998244353; const int maxn=1e6+50; const ll inf=0x3f3f3f3f3f3f3f3fLL; int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;} int lcm(int a,int b){return a*b/gcd(a,b);} const ull seed=19260817; struct Hash{ vector<ull>p,h; Hash(){} Hash(const string& s){ int n=s.length(); p.resize(n+1),h.resize(n+1); p[0]=1; for(int i=1;i<=n;i++)p[i]=p[i-1]*seed; for(int i=1;i<=n;i++)h[i]=(h[i-1]*seed+s[i-1]); } ull get(int l,int r){return(h[r]-h[l]*p[r-l]);} ull substr(int l,int m){return get(l,l+m);} }A,B; int n,m; bool ok(int mid){ vector<ull>ve; for(int i=0;i<=n-mid;i++){ ve.push_back(A.substr(i,mid)); } sort(ve.begin(),ve.end()); for(int i=0;i<=m-mid;i++){ if(binary_search(ve.begin(),ve.end(),B.substr(i,mid))){ return true; } } return false; } int main() { std::ios::sync_with_stdio(false); std::cin.tie(0); std::cout.tie(0); string a,b; cin>>a>>b; n=a.length(),m=b.length(); if(n>m){swap(a,b);swap(n,m);} A=Hash(a);B=Hash(b); int l=0,r=n; int haha=0; while(r-l>=0){ int mid=(r+l)>>1; // cout<<mid<<endl; if(ok(mid)){ haha=mid; l=mid+1; } else r=mid-1; } cout<<haha<<endl; return 0; }
D.URAL - 1989 Subpalindromes
線段樹/樹狀數組和哈希應用 判斷回文
題意
給定一個字符串(長度<=100000),有兩個操作。 1:改變某個字符。 2:判斷某個子串是否構成回文串。
題解
把字符串正向,方向插入線段樹和樹狀數組中,然後單點修改,區間查值, 如果正向和方向值一樣,那就是回文了
//線段樹 #include<bits/stdc++.h> using namespace std; typedef long long ll; typedef unsigned long long ull; #define pp pair<int,int> const ll mod=998244353; const int maxn=1e5+50; const ll inf=0x3f3f3f3f3f3f3f3fLL; #define lson (x<<1) #define rson ((x<<1)|1) int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;} int lcm(int a,int b){return a*b/gcd(a,b);} ull bit[maxn]; string s; struct node{ int l,r; ull sum1,sum2; }my[maxn<<2]; int n; void pushup(int x){ my[x].sum1=my[lson].sum1+my[rson].sum1; my[x].sum2=my[lson].sum2+my[rson].sum2; } void build(int x,int l,int r){ my[x].l=l;my[x].r=r; if(my[x].l==my[x].r){ my[x].sum1=bit[l-1]*(s[l-1]-'a'); my[x].sum2=bit[n-l]*(s[l-1]-'a'); return; } int mid=(l+r)>>1; build(lson,l,mid); build(rson,mid+1,r); pushup(x); } ull one,two; void update(int x,int pos,int val){ if(my[x].l==my[x].r){ my[x].sum1=bit[pos-1]*val; my[x].sum2=bit[n-pos]*val; return; } int mid=(my[x].l+my[x].r)>>1; if(pos<=mid) update(lson,pos,val); else update(rson,pos,val); pushup(x); } void query(int x,int l,int r){ if(my[x].l>=l&&my[x].r<=r){ one+=my[x].sum1; two+=my[x].sum2; return; } int mid=(my[x].l+my[x].r)>>1; if(l<=mid)query(lson,l,r); if(r>mid)query(rson,l,r); } int main() { std::ios::sync_with_stdio(false); std::cin.tie(0); std::cout.tie(0); cin>>s;n=s.length();int t; cin>>t; bit[0]=1; for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817; build(1,1,n); while(t--){ char s[50]; cin>>s; if(s[0]=='p'){ int x,y; cin>>x>>y; one=0;two=0; query(1,x,y); if((x-1)>(n-y))two*=bit[(x-1)-(n-y)]; else one*=bit[(n-y)-(x-1)]; if(one==two)cout<<"Yes"<<endl; else cout<<"No"<<endl; } else{ int x;char ch; cin>>x>>ch; update(1,x,ch-'a'); } } return 0; }
//樹狀數組
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull bit[maxn],c[maxn][2];
int n,len;
inline int lowbit(int x){
return x&(-x);
}
void update(int x,ull val,int flag){
while(x<maxn){
c[x][flag]+=val;
x+=lowbit(x);
}
}
ull sum(int x,int flag){
ull cnt=0;
while(x){
cnt+=c[x][flag];
x-=lowbit(x);
}
return cnt;
}
string s;
string str;
int main()
{
std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);
bit[0]=1;
for(int i=1;i<maxn;i++)bit[i]=bit[i-1]*19260817;
while(cin>>str){
len=str.length();
memset(c,0,sizeof(c));
for(int i=0;i<len;i++){
update(i+1,(str[i]-'a'+1)*bit[i],0);
update(i+1,(str[len-i-1]-'a'+1)*bit[i],1);
}
cin>>n;
int l,r;
while(n--){
cin>>s;
if(s[0]=='p'){
cin>>l>>r;
ull a=(sum(r,0)-sum(l-1,0))*bit[len-r];
ull b=(sum(len-l+1,1)-sum(len-r,1))*bit[l-1];
if(a==b)cout<<"Yes"<<endl;
else cout<<"No"<<endl;
}
else{
int w;
char ch;
cin>>w>>ch;
update(w,(ch-str[w-1])*bit[w-1],0);
update(len-w+1,(ch-str[w-1])*bit[len-w],1);
str[w-1]=ch;
}
}
}
return 0;
}
E.CodeForces - 580E Kefa and Watch
線段樹+哈希
題意
給你一個長度為n的字符串s,有兩種操作:
1 L R C : 把s[l,r]全部變為c;
2 L R d : 詢問s[l,r]是否是周期為d的重復串。
題解
n最大為1e5,且m+k最大也為1e5,這就要求操作1和操作2都要采用logn的算法,所以用線段樹.
對於更新操作,使用區間更新就可解決。
主要是如何在logn的時間內完成詢問操作.
我們采用線段樹維護hash值的方法.
結合於類似KMP的性質,我們發現,字符串[l,r]有長度為w的循環節,只需要使得[l,r-w]=[l+w,r]即可。證明過程看這裏
這題的hash不同於普通的字符串hash,因為涉及到動態修改,所以需要預先處理出所有的base,在修改的時候直接用.
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e5+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
ull seed=19260817;
//ull seed=10;
ull s[maxn];
ull fs[maxn];
char ss[maxn];
void init(){
s[0]=1;fs[0]=1;
for(int i=1;i<maxn;i++)s[i]=(s[i-1]*seed)%mod;
for(int i=1;i<maxn;i++)fs[i]=(fs[i-1]+s[i])%mod;
/* for(int i=0;i<5;i++){
cout<<i<<"\t"<<s[i]<<"\t"<<fs[i]<<endl;
}*/
}
struct node{
int l,r;
int lazy;
int ok;
ull num;
}my[maxn<<2];
void pushup(int x){
int mid=(my[x].l+my[x].r)>>1;
// printf("x==%d x<<1=%d x<<1|1=%d my[x<<1].num=%llu my[x<<1|1].num=%llu s==%d \n",x,x<<1,x<<1|1,my[x<<1].num,my[(x<<1)|1].num,s[my[x].r-mid]);
my[x].num=(my[x<<1].num*s[my[x].r-mid]+my[(x<<1|1)].num)%mod;
// cout<<"x=="<<x<<" my[x].num"<<my[x].num<<endl;
}
void pushdown(int x){
if(my[x].lazy){
int mid=(my[x].l+my[x].r)>>1;
my[x<<1].lazy=my[(x<<1)|1].lazy=my[x].lazy;
my[x<<1].ok=my[x<<1|1].ok=my[x].ok;
my[x<<1].num=(fs[mid-my[x].l]*my[x].ok)%mod;
my[(x<<1)|1].num=(fs[my[x].r-mid-1]*my[x].ok)%mod;
my[x].lazy=0;
}
}
void build(int x,int l,int r){
my[x].l=l;my[x].r=r;my[x].lazy=0;
if(my[x].l==my[x].r){
my[x].num=ss[l-1]-'0';
// printf("my[%d].num=%d\n",x,my[x].num);
return;
}
int mid=(l+r)>>1;
build(x<<1,l,mid);
build((x<<1)|1,mid+1,r);
pushup(x);
}
void update(int x,int l,int r,int k){
if(my[x].l>=l&&my[x].r<=r){
my[x].num=(fs[my[x].r-my[x].l]*k)%mod;
my[x].ok=k;
my[x].lazy=1;
return;
}
pushdown(x);
int mid=(my[x].l+my[x].r)>>1;
if(l<=mid)update(x<<1,l,r,k);
if(r>mid)update(x<<1|1,l,r,k);
pushup(x);
}
ull query(int x,int l,int r){
if(my[x].l>=l&&my[x].r<=r)return my[x].num;
pushdown(x);
int mid=(my[x].l+my[x].r)>>1;
if(l>mid)return query(x<<1|1,l,r);
else if(r<=mid)return query(x<<1,l,r);
else{
ull t1=query(x<<1,l,r);
ull t2=query(x<<1|1,l,r);
int k=min(r,my[x].r)-mid;
return (t1*s[k]+t2)%mod;
}
pushup(x);
}
void pri(int n){
for(int i=1;i<=n*4;i++){
printf("my[%d].num=%llu\n",i,my[i].num);
}
}
int main()
{
/* std::ios::sync_with_stdio(false);
std::cin.tie(0);
std::cout.tie(0);*/
init();
int n,q,t;
scanf("%d%d%d",&n,&q,&t);
q+=t;
scanf("%s",ss);
int len=strlen(ss);
build(1,1,len);
// pri(len);
for(int i=0;i<q;i++){
int op,l,r,d;
scanf("%d%d%d%d",&op,&l,&r,&d);
if(op==1)update(1,l,r,d);
else {
if(d==r-l+1){
printf("YES\n");
continue;
}
ull one=query(1,l,r-d);
// cout<<"one="<<one<<endl;
ull two=query(1,l+d,r);
// cout<<"two="<<two<<endl;
if(one==two)printf("YES\n");
else printf("NO\n");
}
}
return 0;
}
H.HDU - 1686 Oulipo
哈希水題,求模式串出現次數
#include<cstdio>
#include<iostream>
#include<cstring>
#include<string>
#include<set>
#include<vector>
using namespace std;
typedef long long ll;
typedef unsigned long long ull;
#define pp pair<int,int>
const ll mod=998244353;
const int maxn=1e7+50;
const ll inf=0x3f3f3f3f3f3f3f3fLL;
int gcd(int a,int b){while(b){int t=a%b;a=b;b=t;}return a;}
int lcm(int a,int b){return a*b/gcd(a,b);}
const ull Seed_Pool[]={146527,19260817};
const ull Mod_Pool[]={1000000009,998244353};
struct Hash{
ull SEED,MOD;
vector<ull>p,h;
Hash(){}
Hash(const string& s,const int& seed_index,const int& mod_index){
SEED=Seed_Pool[seed_index];
MOD=Mod_Pool[mod_index];
int n=s.length();
p.resize(n+1),h.resize(n+1);
p[0]=1;
for(int i=1;i<=n;i++)p[i]=p[i-1]*SEED%MOD;
for(int i=1;i<=n;i++)h[i]=(h[i-1]*SEED%MOD+s[i-1])%MOD;
}
ull get(int l,int r){return (h[r]-h[l]*p[r-l]%MOD+MOD)%MOD;}
ull substr(int l,int m){return get(l,l+m);}
};
int main()
{
int t;
ios::sync_with_stdio(false);
cin>>t;
while(t--){
string s;
cin>>s;
int n=s.length();
Hash aa=Hash(s,0,0);
ull a=aa.substr(0,n);
cin>>s;
int nn=s.length();
aa=Hash(s,0,0);
//cout<<"aa="<<a<<endl;
int sum=0;
for(int i=0;i+n<=nn;i++){
if(aa.substr(i,n)==a){
//cout<<aa.substr(i,n)<<endl;
sum++;
}
}
cout<<sum<<endl;
}
return 0;
}
字符串哈希專題