【模板】后缀数组(仅代码)

1.定义

\(sa_i\):字典序排名为 \(i\) 的后缀,其首字符下标为 \(sa_i\)

\(rk_i\):首字符下标为 \(i\) 的后缀,其字典序排名为 \(rk_i\)。(显然 \(sa_{rk_i} = i\)

\(h_i\)\(sa_i\)\(sa_{i-1}\) 所表示的后缀的最长公共前缀的长度。(满足 \(h_{rk_i} \ge h_{rk_{i-1}}-1\)

2.代码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
#include <bits/stdc++.h>
using namespace std;

const int N = 1e6 + 5, M = 200;
int n;
char a[N];
int sa[N*2], rk[N*2], rk0[N*2], cnt[M], id[N*2], h[N*2];
int pos = 0, ans = 0;

int main() {
ios::sync_with_stdio(false);
cin.tie(0);

cin.getline(a+1, N);
n = strlen(a+1);

int m = 128, i;
for (i = 1; i <= n; i++)
cnt[rk[i] = a[i]]++;
for (i = 1; i <= m; i++)
cnt[i] += cnt[i - 1];
for (i = n; i >= 1; i--)
sa[cnt[rk[i]]--] = i;
int p = 1;
for (int len = 1; p < n; len *= 2, m = p) {
p = 1;
for (i = n - len + 1; i <= n; i++)
id[p++] = i;
for (i = 1; i <= n; i++)
if (sa[i] > len)
id[p++] = sa[i] - len;
for (i = 0; i <= m; i++)
cnt[i] = 0;
for (i = 1; i <= n; i++)
cnt[rk[i]]++;
for (i = 1; i <= m; i++)
cnt[i] += cnt[i - 1];
for (i = n; i >= 1; i--)
sa[cnt[rk[id[i]]]--] = id[i];
for (i = 0; i <= n; i++)
rk0[i] = rk[i];
p = 0;
for (i = 1; i <= n; i++) {
if (rk0[sa[i]] == rk0[sa[i - 1]] && rk0[sa[i] + len] == rk0[sa[i - 1] + len])
rk[sa[i]] = p;
else
rk[sa[i]] = ++p;
}
}

for (int i = 1, k = 0; i <= n; i++) {
if (k) k--;
while (a[i+k] == a[sa[rk[i]-1]+k]) k++;
h[rk[i]] = k;
}

for (int i = 1; i <= n; i++)
cout << sa[i] << ' ';
cout << endl;
for (int i = 1; i <= n; i++)
cout << h[i] << ' ';
return 0;
}

字符串下标从 \(1\) 开始。