字符串(所有模板下标均从1开始)
- 字符串
- 0x01 KMP与扩展KMP
- 1. KMP
- 2. 扩展KMP
- 3. 二维循环周期
- 0x02 字符串哈希
- 1. 一维哈希
- 2. 二维哈希
- 0x03 Trie树、Border树、AC自动机
- 0x04 Manacher
- 0x05 PAM
- PAM基础
- 广义PAM
- 0x06 后缀数组
- 倍增求SA
- SAIS
- 0x07 SAM
- SAM 基础
- 更新更加灵活的线段树合并模板
- 关于SAM的匹配问题
- 广义SAM(对字典构造SAM)
字符串
0x01 KMP与扩展KMP
1. KMP
struct KMP {
int n;
string p;
vector<int> nxt;
KMP(string &s) : n(s.size() - 1), p(s), nxt(n + 1) {
for (int i = 2, j = 0; i <= n; i ++ ) {
while (j && p[i] != p[j + 1]) j = nxt[j];
if (p[i] == p[j + 1]) j ++ ;
nxt[i] = j;
}
}
int board(int x) {
return nxt[x];
}
vector<int> match(string &s, bool first_only = false) {
vector<int> start_pos;
for (int i = 1, j = 0; i < s.size(); i ++ ) {
while (j && s[i] != p[j + 1]) j = nxt[j];
if (s[i] == p[j + 1]) j ++ ;
if (j == n) {
start_pos.push_back(i - j + 1);
if (first_only) return start_pos;
j = nxt[j];
}
}
return start_pos;
}
// 循环周期
vector<int> periodic() {
vector<int> ret;
int now = n;
while (now) {
now = nxt[now];
ret.push_back(n - now);
}
return ret;
}
// 循环节
vector<int> periodic_loop() {
vector<int> ret;
for (int x : periodic()) {
if (n % x == 0) {
ret.push_back(x);
}
}
return ret;
}
int min_periodic_loop() {
return periodic_loop()[0];
}
};
2. 扩展KMP
// nxt[i] = LCP(T[i,lent],T)
// extend[i] = LCP(S[i,lens],T)
struct EXKMP {
int n;
string p;
vector<int> nxt;
EXKMP(string &str) : n(str.size() - 1), p(str), nxt(exkmp(p)) {}
vector<int> exkmp(string &s) {
vector<int> extend(s.size());
extend[0] = 0;
for (int i = 1, st = 0, ed = 0; i < s.size(); i ++ ) {
extend[i] = i <= ed ? min(nxt[i - st + 1], ed - i + 1) : 0;
while (i + extend[i] < s.size() && extend[i] < n && s[i + extend[i]] == p[extend[i] + 1]) {
extend[i] ++ ;
}
if (i + extend[i] - 1 >= ed && i != 1) {
st = i, ed = i + extend[i] - 1;
}
}
return extend;
}
};
3. 二维循环周期
void KMP2(vector<string> &s) {
int p = n, q = m;
vector<int> row(m + 1);
for (int k = 1; k <= n; k ++ ) {
vector<int> ne(m + 1);
for (int i = 2, j = 0; i <= m; i ++ ) {
while (j && s[k][i] != s[k][j + 1]) j = ne[j];
if (s[k][i] == s[k][j + 1]) j ++ ;
ne[i] = j;
}
for (int j = ne[m]; j; j = ne[j]) {
row[m - j] ++ ;
}
}
for (int i = 1; i <= m; i ++ ) {
if (row[i] == n) {
q = i;
break;
}
}
vector<int> col(n + 1);
for (int k = 1; k <= m; k ++ ) {
vector<int> ne(n + 1);
for (int i = 2, j = 0; i <= n; i ++ ) {
while (j && s[i][k] != s[j + 1][k]) j = ne[j];
if (s[i][k] == s[j + 1][k]) j ++ ;
ne[i] = j;
}
for (int j = ne[n]; j; j = ne[j]) {
col[n - j] ++ ;
}
}
for (int i = 1; i <= n; i ++ ) {
if (col[i] == m) {
p = i;
break;
}
}
cout << p * q << "\n";
}
0x02 字符串哈希
1. 一维哈希
using ULL = unsigned long long;
ULL Prime_Pool[] = {1998585857ul,23333333333ul};
ULL Seed_Pool[]={911,146527,19260817,91815541};
ULL Mod_Pool[]={29123,998244353,1000000009,4294967291ull};
constexpr int P1 = 1e9 + 7, P2 = 1e9 + 9;
struct Hashv {
int h1, h2;
Hashv(int base1 = 0, int base2 = 0) : h1(base1), h2(base2) {}
i64 val() { return 1ll * h1 * P2 + h2; }
Hashv &operator*=(const Hashv &rhs) {
h1 = (i64)h1 * rhs.h1 % P1;
h2 = (i64)h2 * rhs.h2 % P2;
return *this;
}
Hashv &operator+=(const Hashv &rhs) {
h1 += rhs.h1; if (h1 >= P1) h1 -= P1;
h2 += rhs.h2; if (h2 >= P2) h2 -= P2;
return *this;
}
Hashv &operator-=(const Hashv &rhs) {
h1 -= rhs.h1; if (h1 < 0) h1 += P1;
h2 -= rhs.h2; if (h2 < 0) h2 += P2;
return *this;
}
friend Hashv operator*(const Hashv &lhs, const Hashv &rhs) {
Hashv res = lhs;
res *= rhs;
return res;
}
friend Hashv operator+(const Hashv &lhs, const Hashv &rhs) {
Hashv res = lhs;
res += rhs;
return res;
}
friend Hashv operator-(const Hashv &lhs, const Hashv &rhs) {
Hashv res = lhs;
res -= rhs;
return res;
}
friend bool operator==(const Hashv &lhs, const Hashv &rhs) {
return lhs.h1 == rhs.h1 && lhs.h2 == rhs.h2;
}
friend bool operator<(const Hashv &lhs, const Hashv &rhs) {
return lhs.h1 != rhs.h1 ? lhs.h1 < rhs.h1 : lhs.h2 < rhs.h2;
}
};
const Hashv base(131, 233);
// using Hashv = unsigned long long;
// const Hashv base = 233;
struct StringHash {
const int n;
vector<Hashv> h1, h2, power;
StringHash(string &s) : n(s.size() - 1), h1(n + 2), h2(n + 2), power(n + 2) {
for (int i = 1; i <= n; i ++ ) h1[i] = h1[i - 1] * base + s[i];
for (int i = n; i >= 1; i -- ) h2[i] = h2[i + 1] * base + s[i];
power[0] = Hashv(1, 1);
// power[0] = 1;
for (int i = 1; i <= n; i ++ ) power[i] = power[i - 1] * base;
}
Hashv hash1(int l, int r) {
return h1[r] - h1[l - 1] * power[r - l + 1];
}
Hashv hash2(int l, int r) {
return h2[l] - h2[r + 1] * power[r - l + 1];
}
};
2. 二维哈希
signed main(signed argc, char const *argv[])
{
ios::sync_with_stdio(false);
cin.tie(nullptr);
int n, m, a, b;
cin >> n >> m >> a >> b;
vector<string> s(n + 1);
for (int i = 1; i <= n; i ++ ) {
cin >> s[i];
s[i] = " " + s[i];
}
vector<Hashv> pow1(max(n, m) + 1);
vector<Hashv> pow2(max(n, m) + 1);
pow1[0] = pow2[0] = 1;
for (int i = 1; i < pow1.size(); i ++ ) {
pow1[i] = pow1[i - 1] * base1;
pow2[i] = pow2[i - 1] * base2;
}
auto get_hash = [&](vector<vector<Hashv>> &h, vector<string> &s) {
int n = h.size() - 1, m = h[0].size() - 1;
for (int i = 1; i <= n; i ++ ) {
for (int j = 1; j <= m; j ++ ) {
h[i][j] = h[i][j - 1] * base1 + s[i][j];
}
}
for (int j = 1; j <= m; j ++ ) {
for (int i = 1; i <= n; i ++ ) {
h[i][j] = h[i - 1][j] * base2 + h[i][j];
}
}
};
vector h(n + 1, vector<Hashv>(m + 1));
get_hash(h, s);
unordered_set<Hashv> S;
for (int i = a; i <= n; i ++ ) {
for (int j = b; j <= m; j ++ ) {
Hashv hs = h[i][j] - h[i - a][j] * pow2[a] - h[i][j - b] * pow1[b] + h[i - a][j - b] * pow2[a] * pow1[b];
S.insert(hs);
}
}
int q;
cin >> q;
while (q -- ) {
vector<string> p(a + 1);
for (int i = 1; i <= a; i ++ ) {
cin >> p[i];
p[i] = " " + p[i];
}
vector hp(a + 1, vector<Hashv>(b + 1));
get_hash(hp, p);
if (S.count(hp[a][b])) {
cout << "1\n";
} else {
cout << "0\n";
}
}
return 0;
}
0x03 Trie树、Border树、AC自动机
Border树的性质:
- 每个前缀的所有 :节点
- 哪些前缀有长度为 的 :
- 求两个前缀的公共
AC自动机 = Trie树 + Board树
概念上 AC自动机 和 Trie图 是两个不同的概念,但一般 AC自动机 在建立的过程中顺便就把 Trie图 建立好了,因此实际应用中不做区分。
constexpr int N = 1000010;
struct ACAM {
struct Node {
int s[26], fail;
}tr[N];
int idx, root;
ACAM() { clear(); }
void clear() {
memset(tr[0].s, 0, sizeof tr[0].s);
tr[0].fail = root = idx = 0;
}
int new_node() {
idx ++ ;
memset(tr[idx].s, 0, sizeof tr[idx].s);
tr[idx].fail = 0;
return idx;
}
void insert(string &s, int i) {
int p = root;
for (auto c : s) {
int u = c - 'a';
if (!tr[p].s[u]) tr[p].s[u] = new_node();
p = tr[p].s[u];
}
}
void build() {
queue<int> q;
for (int i = 0; i < 26; i ++ ) {
if (tr[0].s[i]) {
q.push(tr[0].s[i]);
}
}
while (q.size()) {
int u = q.front();
q.pop();
for (int i = 0; i < 26; i ++ ) {
int v = tr[u].s[i];
if (v == 0) {
tr[u].s[i] = tr[tr[u].fail].s[i];
} else {
tr[v].fail = tr[tr[u].fail].s[i];
q.push(v);
}
}
}
}
}AC;
0x04 Manacher
回文串性质:
- 不同回文串的个数有
- 回文串
vector<int> manacher(string &s)
{
string b = "$|";
for (auto c : s) {
b += c;
b += '|';
}
vector<int> len(b.size());
int maxright = 1, mid = 1;
int ans = 0;
for (int i = 1; i < b.size(); i ++ ) {
if (i < maxright) {
len[i] = min(len[2 * mid - i], maxright - i);
} else {
len[i] = 1;
}
if (i < maxright && i - len[i] < mid) {
ans = max(ans, (i - mid) * 2);
}
while (b[i - len[i]] == b[i + len[i]]) {
len[i] ++ ;
}
if (i + len[i] > maxright) {
maxright = i + len[i];
mid = i;
}
}
return len;
}
0x05 PAM
PAM基础
PAM 基础知识:
- 节点:至多有 个,每个节点表示一种回文串,长度为 。
- 后继边:S(v) = ch S(u) ch,len[v] = len[u] + 2 。
- 失配边:S(v) 是 S(u) 的最大 Border,即最长回文子串。
struct PAM {
static constexpr int ALPHABET_SIZE = 26, N = 1000010;
struct Node {
// basic
int s[ALPHABET_SIZE];
int fail, len, num; // num记录以该点为结尾的回文串的数量
// extension
int cnt; // 节点代表的回文串出现的次数
}tr[N];
int idx, last;
string s;
PAM() {
// 1:奇数根 2:偶数根
tr[0].fail = 1, tr[0].len = 0;
tr[1].fail = 0, tr[1].len = -1;
idx = 1, last = 0;
memset(tr[0].s, 0, sizeof tr[0].s);
memset(tr[1].s, 0, sizeof tr[1].s);
}
int get_fail(int x, int i) {
while (s[i - tr[x].len - 1] != s[i]) {
x = tr[x].fail;
}
return x;
}
int new_node(int p) {
int q = ++ idx;
memset(tr[q].s, 0, sizeof tr[q].s);
tr[q].fail = tr[q].cnt = 0;
tr[q].len = tr[p].len + 2;
return q;
}
void insert(string &str) {
s = str;
for (int i = 1; i < s.size(); i ++ ) {
int u = s[i] - 'a';
int p = get_fail(last, i);
if (!tr[p].s[u]) {
int q = new_node(p);
tr[q].fail = tr[get_fail(tr[p].fail, i)].s[u];
tr[q].num = tr[tr[q].fail].num + 1;
tr[p].s[u] = q;
}
last = tr[p].s[u];
tr[last].cnt ++ ;
}
}
// 拓扑更新cnt
void build() {
for (int i = idx; i >= 2; i -- ) {
tr[tr[i].fail].cnt += tr[i].cnt;
}
tr[0].cnt = tr[1].cnt = 0;
}
}pam;
广义PAM
0x06 后缀数组
倍增求SA
struct ST {
int n, m;
vector<vector<int>> st;
ST() {}
ST(vector<int> &w) : n(w.size()), m(31), st(n, vector<int>(m)) {
for (int j = 0; j < m; j ++ ) {
for (int i = 0; i + (1 << j) - 1 < n; i ++ ) {
if (j == 0) {
st[i][j] = w[i];
} else {
st[i][j] = max(st[i][j - 1], st[i + (1 << j - 1)][j - 1]);
}
}
}
}
int query(int l, int r) {
int len = r - l + 1;
int k = log(len) / log(2);
return max(st[l][k], st[r - (1 << k) + 1][k]);
}
};
constexpr int maxm = 127;
struct SA {
int n, m;
string s;
vector<int> sa, rk, height;
vector<int> x, y, c;
ST st;
void get_sa() {
for (int i = 1; i <= n; i ++ ) c[x[i] = s[i]] ++ ;
for (int i = 2; i <= m; i ++ ) c[i] += c[i - 1];
for (int i = n; i >= 1; i -- ) sa[c[x[i]] -- ] = i;
for (int k = 1; k <= n; k *= 2) {
int num = 0;
for (int i = n - k + 1; i <= n; i ++ ) y[ ++ num] = i;
for (int i = 1; i <= n; i ++ ) { // 此处按顺序枚举所有后缀
if (sa[i] > k) { // 第i个后缀的第二关键字为第i+k个后缀的第一关键字
y[ ++ num] = sa[i] - k; // y记录的是按照第二关键字排序的结果
}
}
for (int i = 1; i <= m; i ++ ) c[i] = 0;
for (int i = 1; i <= n; i ++ ) c[x[i]] ++ ;
for (int i = 1; i <= m; i ++ ) c[i] += c[i - 1];
for (int i = n; i; i -- ) sa[c[x[y[i]]] -- ] = y[i], y[i] = 0;
swap(x, y); // 此处y记录的为x的信息
x[sa[1]] = 1, num = 1;
for (int i = 2; i <= n; i ++ ) {
x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? num : ++ num;
}
if (num == n) break;
m = num;
}
}
void get_height() { // 表示为排名为i和排名为i-1的后缀的最长公共前缀
for (int i = 1; i <= n; i ++ ) rk[sa[i]] = i;
for (int i = 1, k = 0; i <= n; i ++ ) { // 按rank枚举
if (rk[i] == 1) continue;
// h[i] = height[rk[i]],即位与第i个的后缀与排名在它之前的第一个后缀的最长公共前缀
if (k) k -- ; // 由于h[i]>=h[i-1]-1,因此从k-1开始枚举
int j = sa[rk[i] - 1];
while (i + k <= n && j + k <= n && s[i + k] == s[j + k]) k ++ ;
height[rk[i]] = k;
}
}
SA(string &str) : s(str), n(str.size() - 1), m(maxm), sa(n + m), rk(n + m), height(n + m), x(n + m), y(n + m), c(n + m) {
get_sa();
get_height();
st = ST(height);
}
int lcp(int x, int y) { // 得到两个后缀的lcp
int rkx = rk[x];
int rky = rk[y];
if (rkx > rky) swap(rkx, rky);
rkx ++ ;
int lcp = st.query(rkx, rky);
return lcp;
}
i64 get_distinct_substrings() { // sum(n - sa[i] + 1 - height[i])
i64 ans = 0;
for (int i = 1; i <= n; i ++ ) {
ans += n - sa[i] + 1 - height[i];
}
return ans;
}
};
SAIS
#include <bits/stdc++.h>
using namespace std;
using i64 = long long;
const int MAXN=1e6+10;
int n,m;
char s[MAXN];
template <size_t size>
struct SuffixArray {
bool t[size<<1];
int sa[size],ht[size],rk[size];
inline bool islms(const int i, const bool *t){
return i>0 && t[i]&&!t[i-1];
}
template<class T>
void sort(T s,int *sa, const int len, const int sigma, const int sz, bool *t, int *b,int *cb, int *p) {
memset(b, 0, sigma*sizeof(int));
memset(sa,-1,len*sizeof(int));
for (int i=0;i<len;i++)b[s[i]]++;
cb[0]=b[0];
for(int i=1;i<sigma;i++)cb[i]=cb[i-1]+b[i];
for(int i=sz-1;i>=0;--i)sa[--cb[s[p[i]]]]=p[i];
for(int i=1;i<sigma;i++)cb[i]=cb[i-1]+b[i-1];
for (int i=0;i<len;i++)if(sa[i]>0&&!t[sa[i]-1])
sa[cb[s[sa[i]-1]]++]=sa[i]-1;
cb[0]=b[0];
for(int i=1;i<sigma;i++)cb[i]=cb[i-1]+b[i];
for(int i=len-1;i>=0;--i)if(sa[i]>0 && t[sa[i]-1])
sa[--cb[s[sa[i]-1]]]=sa[i]-1;
}
template<class T>
void sais(T s,int *sa, const int len, bool *t, int *b, int *b1, const int sigma) {
int i,j,x,p=-1,cnt=0,sz=0,*cb=b+sigma;
for(t[len-1]=1,i=len-2;i>=0;--i)t[i]=s[i]<s[i+1]||(s[i]==s[i+1]&&t[i+1]);
for(i=1;i<len;i++)if(t[i]&&!t[i-1])b1[sz++]=i;
sort(s,sa,len,sigma,sz,t,b,cb,b1);
for(i=sz=0;i<len;i++)if(islms(sa[i],t))sa[sz++]=sa[i];
for(i=sz;i<len;i++)sa[i]=-1;
for(i=0;i<sz;i++){
for(x=sa[i],j=0;j<len;j++){
if(p==-1||s[x+j]!=s[p+j]||t[x+j]!=t[p+j]){
cnt++,p=x;break;
}
else if(j>0&&(islms(x+j,t)||islms(p+j,t)))break;
}
sa[sz+(x>>=1)]=cnt-1;
}
for(i=j=len-1;i>=sz;i--)if(sa[i]>=0)sa[j--]=sa[i];
int *s1=sa+len-sz,*b2=b1+sz;
if(cnt<sz)sais(s1,sa,sz,t+len,b,b1+sz,cnt);
else for(i=0;i<sz;i++) sa[s1[i]]=i;
for(i=0;i<sz;i++)b2[i]=b1[sa[i]];
sort(s,sa,len,sigma,sz,t,b,cb,b2);
}
template<class T>
void getHeight(T s,int n){
int j=0,k=0;
for(int i=0;i<n;ht[rk[i++]]=k)
for(k?k--:0,j=sa[rk[i]-1];s[i+k]==s[j+k];k++);
for (int i = 1; i <= n; i ++ ) sa[i] ++ ;
}
template<class T>
void init(T s,const int len, const int sigma){
sais(s,sa,len+1,t,rk,ht,sigma),rk[0]=0;
for(int i=1;i<=len;i++)rk[sa[i]]=i;
}
};
SuffixArray<MAXN> SA;
signed main()
{
scanf("%s", s + 1);
n = strlen(s + 1);
m = 127;
SA.init(s + 1, n, m);
SA.getHeight(s + 1, n);
for (int i = 1; i <= n; i ++ ) printf("%d ", SA.sa[i]);
puts("");
for (int i = 1; i <= n; i ++ ) printf("%d ", SA.ht[i]);
puts("");
return 0;
}
0x07 SAM
SAM 基础
Right 集合: 表示 s 状态代表子串的出现位置右端点,性质如下:
- 所有节点的 Right 集合 互不相等。
- Right 集合 之间只存在包含关系。
SC树(后缀链接树): Right 集合 包含关系形成的树,性质如下:
- 每个前缀所在状态两两不同。
- 共有 |S| 个节点代表每个前缀,因此最多有 |S| 个节点,加之 Right 集合的性质,因此 SAM 的节点个数为
- 任意串 w 的后缀全部位于 s(w) 的后缀链接树上。
- 每个状态的 Right 集合 等价于他在后缀链接树子树的叶子节点集合。
constexpr int ALPHABET_SIZE = 26, N = 1000010;
#ifdef RIGHT
struct Chairman_Tree {
struct Node { int l, r, val; }tr[N * 30];
int root[N * 2];
int idx = 0;
void init() {
memset(root, 0, sizeof root);
idx = 0;
}
int build(int l, int r) {
int q = ++ idx;
tr[q].val = 0;
if (l == r) return q;
int mid = l + r >> 1;
tr[q].l = build(l, mid), tr[q].r = build(mid + 1, r);
return q;
}
int update(int p, int l, int r, int x, int v) {
assert(idx < N * 50);
int q = ++ idx;
tr[q].val = tr[p].val + v;
if (l == r) return q;
int mid = l + r >> 1;
if (x <= mid) {
tr[q].l = update(tr[p].l, l, mid, x, v);
tr[q].r = tr[p].r;
} else {
tr[q].l = tr[p].l;
tr[q].r = update(tr[p].r, mid + 1, r, x, v);
}
return q;
}
int query(int p, int q, int l, int r, int L, int R) {
if (l > R || r < L) return 0;
if (L <= l && R >= r) return tr[q].val - tr[p].val;
int mid = l + r >> 1;
return query(tr[p].l, tr[q].l, l, mid, L, R) + query(tr[p].r, tr[q].r, mid + 1, r, L, R);
}
}tree;
#endif
int ans[N]; // ans表示长度为len的串的最大数量
struct SuffixAutomaton {
// basic
int n;
string s;
struct Node {
int s[ALPHABET_SIZE];
int fa, len;
}tr[2 * N];
int last, idx;
// extension
int cntA[N * 2], A[N * 2];
int num[N * 2]; // 记录right集合的大小
#ifdef RIGHT
vector<int> g[N * 2];
int L[N * 2], R[N * 2], dfn;
int pos[N * 2]; // 记录dfs序映射的点
int end_pos[N * 2];
#endif
SuffixAutomaton(){ clear(); }
void clear() {
last = idx = 1;
tr[1].fa = tr[1].len = 0;
fill(tr[1].s, tr[1].s + 26, 0);
}
void init(string &str) {
n = str.size();
s = str;
for (auto c : s) extend(c - 'a');
}
void extend(int c) {
int p = last;
int np = last = ++ idx;
tr[np].len = tr[p].len + 1;
memset(tr[np].s, 0, sizeof tr[np].s);
while (p && !tr[p].s[c]) tr[p].s[c] = np, p = tr[p].fa;
if (!p) {
tr[np].fa = 1;
} else {
int q = tr[p].s[c];
if (tr[q].len == tr[p].len + 1) {
tr[np].fa = q;
} else {
int nq = ++ idx;
tr[nq] = tr[q];
tr[nq].len = tr[p].len + 1;
tr[q].fa = tr[np].fa = nq;
while (tr[p].s[c] == q) tr[p].s[c] = nq, p = tr[p].fa;
}
}
}
void build() {
memset(cntA, 0, sizeof cntA);
memset(num, 0, sizeof num);
for (int i = 1; i <= idx; i ++ ) cntA[tr[i].len] ++ ;
for (int i = 1; i <= idx; i ++ ) cntA[i] += cntA[i - 1];
for (int i = idx; i >= 1; i -- ) A[cntA[tr[i].len] -- ] = i;
// 更行主串节点
int temp = 1;
for (int i = 0; i < n; i ++ ){
num[temp = tr[temp].s[s[i] - 'a']] = 1;
}
/*拓扑更新*/
for (int i = idx; i >= 1; i -- ){
//basic
int x = A[i];
num[tr[x].fa] += num[x];
//special
ans[tr[x].len] = max(ans[tr[x].len], num[x]);
}
//special
for (int i = tr[last].len; i > 1; i -- ){
ans[i - 1] = max(ans[i - 1], ans[i]);
}
}
#ifdef RIGHT
int get_right_between(int u, int l, int r) {
return tree.query(tree.root[L[u] - 1], tree.root[R[u]], 1, n, l, r);
}
void dfs(int u, int father) {
L[u] = ++ dfn;
pos[dfn] = u;
for (auto v : g[u]) {
dfs(v, u);
num[u] += num[v];
}
R[u] = dfn;
}
void build_sc_tree() {
int temp = 1;
for (int i = 0; i < n; i ++ ) {
temp = tr[temp].s[s[i] - 'a'];
end_pos[temp] = i + 1;
num[temp] = 1;
}
for (int i = 2; i <= idx; i ++ ) {
g[tr[i].fa].push_back(i);
}
}
void build_chairman_tree() {
tree.root[0] = tree.build(1, n);
for (int i = 1; i <= idx; i ++ ) {
int u = pos[i];
if (end_pos[u]) {
int x = end_pos[u];
tree.root[i] = tree.update(tree.root[i - 1], 1, n, x, 1);
} else {
tree.root[i] = tree.root[i - 1];
}
}
}
void extract_right() {
build_sc_tree();
dfn = 0;
dfs(1, 0);
build_chairman_tree();
}
#endif
}sam;
更新更加灵活的线段树合并模板
constexpr int ALPHABET_SIZE = 26, N = 100010;
#ifdef RIGHT
struct SegmentTree {
struct Node {
int l, r, val;
void clear() {
l = r = val = 0;
}
}tr[N * 50];
int root[N * 2], idx = 0;
void init() {
memset(root, 0, sizeof root);
tr[0].clear();
idx = 0;
}
void pushup(int u) {
tr[u].val = tr[tr[u].l].val + tr[tr[u].r].val;
}
int modify(int u, int l, int r, int x) {
if (!u) {
u = ++ idx;
tr[u].clear();
}
if (l == r) {
tr[u].val = 1;
tr[u].maxv = tr[u].minv = x;
return u;
}
int mid = l + r >> 1;
if (x <= mid) tr[u].l = modify(tr[u].l, l, mid, x);
else tr[u].r = modify(tr[u].r, mid + 1, r, x);
pushup(u);
return u;
}
int merge(int x, int y, int l, int r) {
if (!x || !y) return x + y;
int u = ++ idx;
tr[u].clear();
if (l == r) return u;
int mid = l + r >> 1;
tr[u].l = merge(tr[x].l, tr[y].l, l, mid);
tr[u].r = merge(tr[x].r, tr[y].r, mid + 1, r);
pushup(u);
return u;
}
int query(int u, int l, int r, int L, int R) {
if (R < l || L > r) return 0;
if (l >= L && r <= R) return tr[u].val;
int mid = l + r >> 1;
return query(tr[u].l, l, mid, L, R) + query(tr[u].r, mid + 1, r, L, R);
}
}tree;
#endif
int ans[N]; // ans表示长度为len的串的最大数量
int n;
string s;
struct SuffixAutomaton {
// basic
int n;
struct Node {
int s[ALPHABET_SIZE];
int fa, len;
}tr[2 * N];
int last, idx;
// extension
int cntA[N * 2], A[N * 2];
int num[N * 2]; // 记录right集合的大小
#ifdef RIGHT
vector<int> g[N * 2];
int end_pos[N * 2];
#endif
SuffixAutomaton(){ clear(); }
void clear() {
for (int i = 1; i <= idx; i ++ ) {
g[i].clear();
end_pos[i] = 0;
}
last = idx = 1;
tr[1].fa = tr[1].len = 0;
fill(tr[1].s, tr[1].s + 26, 0);
}
void init(string &s) {
n = s.size();
for (auto c : s) extend(c - 'a');
}
void extend(int c) {
int p = last;
int np = last = ++ idx;
tr[np].len = tr[p].len + 1;
memset(tr[np].s, 0, sizeof tr[np].s);
while (p && !tr[p].s[c]) tr[p].s[c] = np, p = tr[p].fa;
if (!p) {
tr[np].fa = 1;
} else {
int q = tr[p].s[c];
if (tr[q].len == tr[p].len + 1) {
tr[np].fa = q;
} else {
int nq = ++ idx;
tr[nq] = tr[q];
tr[nq].len = tr[p].len + 1;
tr[q].fa = tr[np].fa = nq;
while (tr[p].s[c] == q) tr[p].s[c] = nq, p = tr[p].fa;
}
}
}
void build() {
memset(cntA, 0, sizeof cntA);
memset(num, 0, sizeof num);
for (int i = 1; i <= idx; i ++ ) cntA[tr[i].len] ++ ;
for (int i = 1; i <= idx; i ++ ) cntA[i] += cntA[i - 1];
for (int i = idx; i >= 1; i -- ) A[cntA[tr[i].len] -- ] = i;
// 更行主串节点
int temp = 1;
for (int i = 0; i < n; i ++ ){
num[temp = tr[temp].s[s[i] - 'a']] = 1;
}
/*拓扑更新*/
for (int i = idx; i >= 1; i -- ){
//basic
int x = A[i];
num[tr[x].fa] += num[x];
//special
ans[tr[x].len] = max(ans[tr[x].len], num[x]);
}
//special
for (int i = tr[last].len; i > 1; i -- ){
ans[i - 1] = max(ans[i - 1], ans[i]);
}
}
#ifdef RIGHT
int get_right_between(int u, int l, int r) {
return tree.query(tree.root[u], 1, n, l, r);
}
void dfs(int u, int father) {
for (auto v : g[u]) {
dfs(v, u);
num[u] += num[v];
tree.root[u] = tree.merge(tree.root[u], tree.root[v], 1, n);
}
}
void build_sc_tree() {
int temp = 1;
for (int i = 0; i < n; i ++ ) {
temp = tr[temp].s[s[i] - 'a'];
end_pos[temp] = i + 1;
num[temp] = 1;
}
for (int i = 2; i <= idx; i ++ ) {
g[tr[i].fa].push_back(i);
}
}
void extract_right() {
build_sc_tree();
tree.init();
for (int u = 1; u <= idx; u ++ ) {
if (end_pos[u]) {
tree.root[u] = tree.modify(tree.root[u], 1, n, end_pos[u]);
}
}
dfs(1, 0);
}
#endif
}sam;
关于SAM的匹配问题
#ifdef MATCH
int len[N * 2];
int blen[N * 2];
int get_longest_common_substr() {
return *max_element(len, len + idx + 1);
}
i64 num_of_different_common_substr;
void get_num_of_different_common_substr(int u) {
for (auto v : g[u]) {
get_num_of_different_common_substr(v);
len[u] = max(len[u], len[v]);
}
int lenth = min(len[u], tr[u].len);
num_of_different_common_substr += max(0, lenth - tr[tr[u].fa].len);
}
i64 get_num_of_different_common_substr() {
num_of_different_common_substr = 0;
get_num_of_different_common_substr(1);
return num_of_different_common_substr;
}
void match(vector<string> &S) {
fill(len, len + idx + 1, 2e9);
for (auto str : S) {
fill(blen, blen + idx + 1, 0);
int temp = 1, lenth = 0;
for (int i = 0; i < str.size(); i ++ ) {
char c = str[i];
while (temp && !tr[temp].s[c - 'a']) {
temp = tr[temp].fa;
lenth = tr[temp].len;
}
if (!temp) {
lenth = 0;
temp = 1;
} else {
lenth ++ ;
temp = tr[temp].s[c - 'a'];
}
blen[temp] = max(blen[temp], lenth);
}
for (int i = 0; i <= idx; i ++ ) {
len[i] = min(len[i], blen[i]);
}
}
}
#endif
广义SAM(对字典构造SAM)
constexpr int ALPHABET_SIZE = 26, N = 1000010;
struct Suffix_Automaton {
struct Node {
int s[ALPHABET_SIZE];
int fa, len;
}tr[N * 2];
int last, idx;
Suffix_Automaton() { clear(); };
void clear() {
last = idx = 1;
tr[1].fa = tr[1].len = 0;
memset(tr[1].s, 0, sizeof tr[1].s);
}
int extend(int pre, int c) {
last = pre;
int p = last;
int np = last = ++ idx;
tr[np].len = tr[p].len + 1;
memset(tr[np].s, 0, sizeof tr[np].s);
while (p && !tr[p].s[c]) tr[p].s[c] = np, p = tr[p].fa;
if (!p) {
tr[np].fa = 1;
} else {
int q = tr[p].s[c];
if (tr[q].len == tr[p].len + 1) {
tr[np].fa = q;
} else {
int nq = ++ idx;
tr[nq] = tr[q];
tr[nq].len = tr[p].len + 1;
tr[q].fa = tr[np].fa = nq;
while (tr[p].s[c] == q) tr[p].s[c] = nq, p = tr[p].fa;
}
}
return np;
}
}sam;
struct Trie_tree {
struct Node {
int s[ALPHABET_SIZE];
}tr[N];
int root = 1, idx = 1;
int sam_pos[N];
void insert(string &s) {
int p = root;
for (auto c : s) {
int u = c - 'a';
if (!tr[p].s[u]) tr[p].s[u] = ++ idx;
p = tr[p].s[u];
}
}
void bfs() {
queue<int> q;
q.push(1);
sam_pos[1] = 1;
while (q.size()) {
int u = q.front();
q.pop();
for (int i = 0; i < 26; i ++ ) {
if (!tr[u].s[i]) continue;
int v = tr[u].s[i];
sam_pos[v] = sam.extend(sam_pos[u], i);
q.push(v);
}
}
}
}trie;