之前我们给的SAM的例题,基本上是一个串建SAM的就能做的

如果要建多个串的SAM应该怎么做呢

首先看题,bzoj2780

我一开始的想法是SA以前的弄法,把串拼起来,中间加分隔符做SAM

这题确实可以这么做,这样根据SAM能识别所有子串的性质

而且每个节点都代表了唯一的一个串

每个询问串我们都能找到最终转移到哪(找不到就是没出现过)

问在多少个串出现过这就等价于在ST(s)的parent树的子树中,出现了多少种不同的权值

这显然可以维护dfs序,用经典的离线做法来搞(更好的写法见文末UPD)


关于广义后缀树(多串SAM)的总结_树状数组关于广义后缀树(多串SAM)的总结_二分_02


1 type node=record
2 po,next:longint;
3 end;
4
5 var go:array[0..300010,1..27] of longint;
6 d,mx,fa,l,r,p,c,wh,w,fir,next:array[0..300010] of longint;
7 ans,a,b:array[0..60010] of longint;
8 e:array[0..300010] of node;
9 h,t,k,last,len,i,j,n,q,x:longint;
10 s,ss:ansistring;
11
12 function lowbit(x:longint):longint;
13 begin
14 exit(x and (-x));
15 end;
16
17 function cmp(a,b:longint):boolean;
18 begin
19 exit(l[a]<l[b]);
20 end;
21
22 procedure swap(var a,b:longint);
23 var c:longint;
24 begin
25 c:=a;
26 a:=b;
27 b:=c;
28 end;
29
30 procedure sort(l,r:longint);
31 var i,j,x:longint;
32 begin
33 i:=l;
34 j:=r;
35 x:=a[(l+r) shr 1];
36 repeat
37 while cmp(a[i],x) do inc(i);
38 while cmp(x,a[j]) do dec(j);
39 if not(i>j) then
40 begin
41 swap(a[i],a[j]);
42 swap(b[i],b[j]);
43 inc(i);
44 dec(j);
45 end;
46 until i>j;
47 if l<j then sort(l,j);
48 if i<r then sort(i,r);
49 end;
50
51 procedure build(x,y:longint);
52 begin
53 inc(len);
54 e[len].po:=y;
55 e[len].next:=p[x];
56 p[x]:=len;
57 end;
58
59 procedure add(c,x:longint);
60 var p,q,np,nq:longint;
61 begin
62 p:=last;
63 inc(t); last:=t; np:=t;
64 w[np]:=x; mx[np]:=mx[p]+1;
65 while (p<>0) and (go[p,c]=0) do
66 begin
67 go[p,c]:=np;
68 p:=fa[p];
69 end;
70 if p=0 then fa[np]:=1
71 else begin
72 q:=go[p,c];
73 if mx[q]=mx[p]+1 then fa[np]:=q
74 else begin
75 inc(t); nq:=t;
76 mx[nq]:=mx[p]+1;
77 go[nq]:=go[q];
78 fa[nq]:=fa[q];
79 fa[q]:=nq; fa[np]:=nq;
80 while go[p,c]=q do
81 begin
82 go[p,c]:=nq;
83 p:=fa[p];
84 end;
85 end;
86 end;
87 end;
88
89 procedure dfs(x:longint);
90 var i:longint;
91 begin
92 inc(h);
93 l[x]:=h;
94 d[h]:=w[x]; //dfs序
95 i:=p[x];
96 while i<>0 do
97 begin
98 dfs(e[i].po);
99 i:=e[i].next;
100 end;
101 r[x]:=h;
102 end;
103
104 procedure work(x:longint);
105 begin
106 while x<=t do
107 begin
108 inc(c[x]);
109 x:=x+lowbit(x);
110 end;
111 end;
112
113 function ask(x:longint):longint;
114 begin
115 ask:=0;
116 while x>0 do
117 begin
118 ask:=ask+c[x];
119 x:=x-lowbit(x);
120 end;
121 end;
122
123 begin
124 readln(n,q);
125 for i:=1 to n do
126 begin
127 readln(ss);
128 len:=length(ss);
129 for j:=1 to len do //拼接
130 begin
131 s:=s+ss[j];
132 inc(t); wh[t]:=i;
133 end;
134 if i<>n then
135 begin
136 s:=s+chr(97+26);
137 inc(t);
138 end;
139 end;
140 len:=length(s);
141 t:=1; last:=1;
142 for i:=len downto 1 do
143 add(ord(s[i])-96,wh[i]);
144
145 len:=0;
146 for i:=2 to t do //构建树
147 build(fa[i],i);
148
149 dfs(1);
150 for i:=1 to q do
151 begin
152 readln(s);
153 j:=1;
154 len:=length(s);
155 for k:=len downto 1 do //每个询问串最终转移到哪
156 begin
157 x:=ord(s[k])-96;
158 if go[j,x]=0 then
159 begin
160 j:=0;
161 break;
162 end
163 else j:=go[j,x];
164 end;
165 a[i]:=j;
166 b[i]:=i;
167 end;
168 for i:=t downto 2 do //经典的离线做法
169 begin
170 next[i]:=fir[d[i]];
171 fir[d[i]]:=i;
172 end;
173 for i:=1 to n do
174 if fir[i]<>0 then work(fir[i]);
175 sort(1,q);
176 j:=1;
177 while a[j]=0 do inc(j);
178 for i:=1 to t do
179 begin
180 while (j<=q) and (l[a[j]]=i) do
181 begin
182 ans[b[j]]:=ask(r[a[j]])-ask(i-1);
183 inc(j);
184 end;
185 if d[i]<>0 then
186 if next[i]<>0 then work(next[i]);
187 end;
188 for i:=1 to q do
189 writeln(ans[i]);
190 end.

2780

然后我看到了bzoj3277 3473(双倍经验)

用我刚才的做法似乎不好搞,因为这题问每个字符串有多少子串出现在至少k个子串中

而刚才那种拼接,每个节点可接受的子串会搞出一堆不存在的子串

这时,我膜拜了wyfcyx的构建广义后缀树的做法,显然这里每个串要反序建SAM(这样才能构造出原串的后缀树)

他的做法是建完一个串的SAM后回到根,对下个串s先匹配

如果转移到的节点在SAM中可接受的最长串长度=当前匹配s的长度,那么这个节点可以代表s

否则的话就像SAM一样新开一个节点,感觉和可持久化的思想很像

这样每个节点可能代表了多个串的子串,并且也没有多出来奇怪的子串

而且每个节点可接受串出现在多少个串中依然=parent树的子树中,出现了多少种不同的权值

这样我们可以像刚才一样,求出每个点出现次数,如果大于等于k,

那么根据之前的性质,这个点p可接受串的长度为[max[fa[p]]+1,max[p]]

那么点p能做出的贡献即为max[p]-max[fa[p]],否则贡献为0

由于子串是某个后缀的前缀,所以每个字符串的答案等于所有这个字符串的后缀节点的从根到该节点的权值和


关于广义后缀树(多串SAM)的总结_树状数组关于广义后缀树(多串SAM)的总结_二分_02


1 type node=record
2 po,next:longint;
3 end;
4
5 var e,w,pr:array[0..400010] of node;
6 go:array[0..400010,'a'..'z'] of longint;
7 sa,r,h,q,p,c,d,cur,a,b,mx,fa:array[0..400010] of longint;
8 g,ans:array[0..400010] of int64;
9 n,k,l,ti,y,f,i,j,len,x,t,last:longint;
10 s:ansistring;
11 ch:char;
12
13 function lowbit(x:longint):longint;
14 begin
15 exit(x and (-x));
16 end;
17
18 procedure swap(var a,b:longint);
19 var c:longint;
20 begin
21 c:=a;
22 a:=b;
23 b:=c;
24 end;
25
26 procedure ins(x,y:longint);
27 begin
28 inc(len);
29 e[len].po:=y;
30 e[len].next:=p[x];
31 p[x]:=len;
32 end;
33
34 procedure add(c:char;x:longint);
35 var p,q,np,nq:longint;
36 begin
37 p:=last;
38 inc(t); last:=t; np:=t;
39 mx[np]:=mx[p]+1;
40 while (p<>0) and (go[p,c]=0) do
41 begin
42 go[p,c]:=np;
43 p:=fa[p];
44 end;
45 if p=0 then fa[np]:=1
46 else begin
47 q:=go[p,c];
48 if mx[q]=mx[p]+1 then fa[np]:=q
49 else begin
50 inc(t); nq:=t;
51 mx[nq]:=mx[p]+1;
52 go[nq]:=go[q];
53 fa[nq]:=fa[q];
54 fa[q]:=nq; fa[np]:=nq;
55 while go[p,c]=q do
56 begin
57 go[p,c]:=nq;
58 p:=fa[p];
59 end;
60 end;
61 end;
62 end;
63
64 procedure change(c:char);
65 var p,np,q:longint;
66 begin
67 p:=go[last,c];
68 if mx[p]=mx[last]+1 then last:=p
69 else begin
70 inc(t); np:=t;
71 mx[np]:=mx[last]+1;
72 go[np]:=go[p];
73 fa[np]:=fa[p];
74 fa[p]:=np;
75 q:=last;
76 while go[q,c]=p do
77 begin
78 go[q,c]:=np;
79 q:=fa[q];
80 end;
81 last:=np;
82 end;
83 end;
84
85 procedure dfs(x:longint);
86 var i:longint;
87 begin
88 inc(ti);
89 sa[ti]:=x; //dfs序上对应哪个点
90 b[x]:=ti;
91 i:=p[x];
92 while i<>0 do
93 begin
94 dfs(e[i].po);
95 i:=e[i].next;
96 end;
97 r[x]:=ti;
98 end;
99
100 procedure dfss(x:longint);
101 var i:longint;
102 begin
103 g[x]:=g[x]+g[fa[x]];
104 i:=p[x];
105 while i<>0 do
106 begin
107 dfss(e[i].po);
108 i:=e[i].next;
109 end;
110 end;
111
112 procedure work(x,w:longint);
113 begin
114 while x<=t do
115 begin
116 inc(c[x],w);
117 x:=x+lowbit(x);
118 end;
119 end;
120
121 function ask(x:longint):longint;
122 begin
123 ask:=0;
124 while x>0 do
125 begin
126 ask:=ask+c[x];
127 x:=x-lowbit(x);
128 end;
129 end;
130
131 procedure put(x,y:longint);
132 begin
133 inc(len); //这个串x所有后缀所在的节点
134 w[len].po:=y;
135 w[len].next:=q[x];
136 q[x]:=len;
137 pr[len].po:=x; //节点代表了哪些串的后缀
138 pr[len].next:=h[y];
139 h[y]:=len;
140 end;
141
142 function cmp(a,b:longint):boolean;
143 begin
144 exit(r[a]<r[b]);
145 end;
146
147 procedure sort(l,r:longint);
148 var i,j,x:longint;
149 begin
150 i:=l;
151 j:=r;
152 x:=a[(l+r) shr 1];
153 repeat
154 while cmp(a[i],x) do inc(i);
155 while cmp(x,a[j]) do dec(j);
156 if not(i>j) then
157 begin
158 swap(a[i],a[j]);
159 inc(i);
160 dec(j);
161 end;
162 until i>j;
163 if l<j then sort(l,j);
164 if i<r then sort(i,r);
165 end;
166
167 begin
168 readln(n,k);
169 last:=1; t:=1;
170 for i:=1 to n do
171 begin
172 readln(s);
173 l:=length(s); last:=1;
174 for j:=l downto 1 do
175 begin
176 if go[last,s[j]]<>0 then change(s[j]) //广义后缀树
177 else add(s[j],i);
178 put(i,last);
179 end;
180 end;
181 len:=0;
182 for i:=2 to t do
183 ins(fa[i],i);
184
185 dfs(1);
186 for i:=1 to t do
187 a[i]:=i;
188 sort(1,t);
189 j:=1; x:=a[1];
190 for i:=1 to t do
191 begin
192 f:=h[sa[i]];
193 while f<>0 do //因为一个节点可能代表了多个穿,插入相对麻烦
194 begin
195 y:=pr[f].po;
196 if cur[y]<>0 then work(cur[y],-1);
197 cur[y]:=i;
198 work(i,1);
199 f:=pr[f].next;
200 end;
201 while (j<=t) and (r[x]=i) do
202 begin
203 len:=ask(i)-ask(b[x]-1);
204 if len<k then g[x]:=0 else g[x]:=mx[x]-mx[fa[x]];
205 inc(j); x:=a[j];
206 end;
207 if j=t+1 then break;
208 end;
209 dfss(1);
210 for i:=1 to n do
211 begin
212 j:=q[i];
213 while j<>0 do
214 begin
215 x:=w[j].po;
216 ans[i]:=ans[i]+g[x];
217 j:=w[j].next;
218 end;
219 end;
220 for i:=1 to n do
221 write(ans[i],' ');
222 writeln;
223 end.

View Code

bzoj2806 第一道小强和阿米巴的题,解锁新成就

构造出标准作文库的SAM后,L0不难想到二分答案吧

然后我们可以求出以询问串每个位置i为结尾的最长子串长度P[i]

不难得到f[i]到i最长熟悉 f[i]=max(f[i-1],f[j]+i-j) (i-j>=l0 且 (i-j<=P[i])

然后这个是明显的单调队列优化吧


关于广义后缀树(多串SAM)的总结_树状数组关于广义后缀树(多串SAM)的总结_二分_02


1 var go:array[0..1200010*2,'0'..'1'] of longint;
2 q,v,f:array[0..1200010] of longint;
3 fa,mx:array[0..1200010*2] of longint;
4 ans,mid,i,n,m,last,t,l,r,j:longint;
5 s:ansistring;
6 c:char;
7
8 function max(a,b:longint):longint;
9 begin
10 if a>b then exit(a) else exit(b);
11 end;
12
13 procedure change(c:char);
14 var q,p,np:longint;
15 begin
16 p:=go[last,c];
17 if mx[p]=mx[last]+1 then last:=p
18 else begin
19 inc(t); np:=t;
20 mx[np]:=mx[last]+1;
21 go[np]:=go[p];
22 fa[np]:=fa[p];
23 fa[p]:=np;
24 q:=last;
25 while go[q,c]=p do
26 begin
27 go[q,c]:=np;
28 q:=fa[q];
29 end;
30 last:=np;
31 end;
32 end;
33
34 procedure add(c:char);
35 var p,q,np,nq:longint;
36 begin
37 p:=last;
38 inc(t); last:=t; np:=t;
39 mx[np]:=mx[p]+1;
40 while (p<>0) and (go[p,c]=0) do
41 begin
42 go[p,c]:=np;
43 p:=fa[p];
44 end;
45 if p=0 then fa[np]:=1
46 else begin
47 q:=go[p,c];
48 if mx[q]=mx[p]+1 then fa[np]:=q
49 else begin
50 inc(t); nq:=t;
51 mx[nq]:=mx[p]+1;
52 go[nq]:=go[q];
53 fa[nq]:=fa[q];
54 fa[q]:=nq; fa[np]:=nq;
55 while go[p,c]=q do
56 begin
57 go[p,c]:=nq;
58 p:=fa[p];
59 end;
60 end;
61 end;
62 end;
63
64 procedure match;
65 var i,j,l,t:longint;
66 begin
67 j:=1; t:=0;
68 l:=length(s);
69 for i:=1 to l do
70 begin
71 if go[j,s[i]]<>0 then
72 begin
73 inc(t);
74 j:=go[j,s[i]];
75 end
76 else begin
77 while (j<>0) and (go[j,s[i]]=0) do j:=fa[j];
78 if j=0 then
79 begin
80 t:=0;
81 j:=1;
82 end
83 else begin
84 t:=mx[j]+1;
85 j:=go[j,s[i]];
86 end;
87 end;
88 v[i]:=t;
89 end;
90 end;
91
92 function cmp(i,j:longint):boolean;
93 begin
94 exit(f[i]-i<f[j]-j);
95 end;
96
97 function check(l0:longint):boolean;
98 var h,t,i,n:longint;
99 begin
100 n:=length(s);
101 for i:=0 to l0-1 do
102 f[i]:=0;
103 h:=1; t:=0;
104 for i:=l0 to n do
105 begin
106 while (h<=t) and (cmp(q[t],i-l0)) do dec(t);
107 inc(t);
108 q[t]:=i-l0;
109 f[i]:=f[i-1];
110 while (h<=t) and (q[h]<i-v[i]) do inc(h);
111 if h<=t then f[i]:=max(f[i],f[q[h]]+i-q[h]);
112 end;
113 if f[n]/n>=0.89999999999 then exit(true) else exit(false);
114 end;
115
116 begin
117 readln(n,m);
118 t:=1;
119 for i:=1 to m do
120 begin
121 readln(s);
122 last:=1;
123 l:=length(s);
124 for j:=1 to l do
125 if go[last,s[j]]<>0 then change(s[j])
126 else add(s[j]);
127 end;
128 for i:=1 to n do
129 begin
130 readln(s);
131 match;
132 l:=0;
133 r:=length(s);
134 while l<=r do
135 begin
136 mid:=(l+r) shr 1;
137 if check(mid) then
138 begin
139 ans:=mid;
140 l:=mid+1;
141 end
142 else r:=mid-1;
143 end;
144 writeln(ans);
145 end;
146 end.

2806

 

UPD:以前写的广义后缀树有点冗长,最近转c++重新写了一份感觉好多了……

以我之前写的2780为例


关于广义后缀树(多串SAM)的总结_树状数组关于广义后缀树(多串SAM)的总结_二分_02


1 #include<iostream>
2 #include<cstring>
3 #include<cstdio>
4 #include<stdlib.h>
5 #include<algorithm>
6 #include<vector>
7
8 using namespace std;
9 vector<int> b[200010],q[10010];
10 //b[]记录每个节点是哪些串的子串,q[]记录每个串所有后缀所在的节点
11 struct way{int po,next;} e[200010];
12 struct node{int w,id;} a[60010];
13 int ans[60010],w[200010],go[200010][26],fa[200010],mx[200010],l[200010],r[200010],p[200010],c[200010];
14 int len,t,last,n,m;
15 char s[100010];
16
17 bool cmp(node a,node b)
18 {
19 return l[a.w]<l[b.w];
20 }
21
22 void work(int c) //比较优美的写法
23 {
24 int np,nq,q,p=last;
25 if (!go[last][c])
26 {
27 np=++t;
28 mx[np]=mx[p]+1;
29 for (;p&&!go[p][c];p=fa[p]) go[p][c]=np;
30 }
31 else np=0;
32 if (!p) fa[np]=1;
33 else {
34 q=go[p][c];
35 if (mx[q]==mx[p]+1) fa[np]=q;
36 else {
37 nq=++t;
38 mx[nq]=mx[p]+1;
39 memcpy(go[nq],go[q],sizeof(go[q]));
40 fa[nq]=fa[q]; fa[q]=fa[np]=nq;
41 for (;go[p][c]==q;p=fa[p]) go[p][c]=nq;
42 }
43 }
44 last=go[last][c];
45 }
46
47 void build(int x,int y)
48 {
49 e[++len].po=y;
50 e[len].next=p[x];
51 p[x]=len;
52 }
53
54 void dfs(int x)
55 {
56 l[x]=++t; w[t]=x;
57 for (int i=p[x];i;i=e[i].next)
58 {
59 dfs(e[i].po);
60 }
61 r[x]=t;
62 }
63
64 void add(int x,int w)
65 {
66 for (int i=x;i<=t;i+=i&-i) c[i]+=w;
67 }
68
69 int ask(int x)
70 {
71 int s=0;
72 for (int i=x;i;i-=i&-i) s+=c[i];
73 return s;
74 }
75 int main()
76 {
77 scanf("%d%d",&n,&m);
78 t=last=1;
79 for (int i=1; i<=n; i++)
80 {
81 scanf("%s",s+1); len=strlen(s+1);
82 last=1;
83 for (int j=1; j<=len;j++)
84 {
85 work(s[j]-'a');
86 b[last].push_back(i);
87 }
88 }
89 len=fa[0]=0;
90 for (int i=2; i<=t; i++) build(fa[i],i);
91 t=0; dfs(1);
92 for (int i=1; i<=m; i++)
93 {
94 scanf("%s",s+1); len=strlen(s+1);
95 int j=1;
96 for (int k=1;k<=len;k++)
97 {
98 if (!go[j][s[k]-'a']) {j=0;break;}
99 j=go[j][s[k]-'a'];
100 }
101 a[i].w=j; a[i].id=i;
102 }
103 sort(a+1,a+1+m,cmp);
104 vector<int>::iterator k;
105 for (int i=1;i<=t;i++)
106 for (k=b[w[i]].begin();k!=b[w[i]].end(); k++) q[*k].push_back(i);
107 vector<int>::iterator cur[10010];
108 for (int i=1; i<=n; i++)
109 {
110 cur[i]=q[i].begin();
111 if (cur[i]!=q[i].end()) {add(*cur[i],1); cur[i]++;}
112 }
113 int j=1;
114 while (!a[j].w) j++;
115 for (int i=1; i<=t; i++)
116 {
117 for (;l[a[j].w]==i; j++) ans[a[j].id]=ask(r[a[j].w])-ask(l[a[j].w]-1);
118 for (k=b[w[i]].begin();k!=b[w[i]].end(); k++)
119 {
120 int x=*k;
121 if (cur[x]!=q[x].end()) {add(*cur[x],1); cur[x]++;}
122 }
123 }
124 for (int i=1; i<=m; i++) printf("%d\n",ans[i]);
125 system("pause");
126 return 0;
127 }

2780(c++更新版)