Loj 3247 Non-Decreasing Subsequences

矩阵乘法.

dp 显然可以写成转移矩阵的形式.

记 $v$ 为长度为 $K$ ,值全为 $1$ 的列向量, $w$ 为长度为 $K$ ,仅第一个值为 $1$ 的行向量, $T_i$ 表示第 $i$ 个数对应的转移矩阵.

那么询问 $[l,r]$ 的答案就是
$$
(w(\prod_{i=l}^r T_i)v)_{0,0}
$$
注意到转移矩阵 $T_i$ 有逆,且逆是容易直接求出的,于是可以考虑维护转移矩阵前缀积,以及逆矩阵的前缀积.

记 $x_i=T_1T_2T_3\cdots T_iv,y_i=wT_{i}^{-1}\cdots T_{3}^{-1}T_2^{-1}T_1^{-1}$ ,则询问的答案为 $(y_{l-1}x_r)_{0,0}$ .

$T_i$ 和 $T_{i}^{-1}$ 都只有 $O(k)$ 个位置有值,于是右乘 $T_{i}$ 与左乘 $T_{i}^{-1}$ 都可以 $O(K^2)$ 完成.

时间复杂度 $O(nK^2+qK)$ .

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
//%std
#include<bits/stdc++.h>
using namespace std;
typedef long long ll;
inline int read()
{
int out = 0, fh = 1;
char jp = getchar();
while ((jp > '9' || jp < '0') && jp != '-')
jp = getchar();
if (jp == '-')
fh = -1, jp = getchar();
while (jp >= '0' && jp <= '9')
out = out * 10 + jp - '0', jp = getchar();
return out * fh;
}
void print(int x)
{
if (x >= 10)
print(x / 10);
putchar('0' + x % 10);
}
void write(int x, char c)
{
if (x < 0)
putchar('-'), x = -x;
print(x);
putchar(c);
}
const int P = 1e9 + 7, inv2 = (P + 1) >> 1;
int add(int a, int b)
{
return a + b >= P ? a + b - P : a + b;
}
void inc(int &a, int b)
{
a = add(a, b);
}
int mul(int a, int b)
{
return 1LL * a * b % P;
}
const int M = 20;
const int N = 5e4 + 10;
int n, K, Q, a[N], op;
struct Matrix
{
int v[M][M];
Matrix(){memset(v, 0, sizeof v);}
} x[N], y[N], tmp;
Matrix operator * (Matrix A, Matrix B)
{
Matrix res;
if (op == 0)
{
for (int k = 0; k < K; ++k)
for (int j = 0; j < K; ++j) if (B.v[k][j])
for (int i = 0; i < K; ++i) if (A.v[i][k])
inc(res.v[i][j], mul(A.v[i][k], B.v[k][j]));
}
else
{
for (int i = 0; i < K; ++i)
for (int k = 0; k < K; ++k) if (A.v[i][k])
for (int j = 0; j < K; ++j) if (B.v[k][j])
inc(res.v[i][j], mul(A.v[i][k], B.v[k][j]));
}
return res;
}
int main()
{
n = read(), K = read();
for (int i = 1; i <= n; ++i)
a[i] = read() - 1;
op = 0;
for (int i = 0; i < K; ++i)
x[0].v[i][i] = y[0].v[i][i] = 1;
for (int i = 1; i <= n; ++i)
{
memset(tmp.v, 0, sizeof tmp.v);
for (int j = 0; j < K; ++j)
inc(tmp.v[j][j], 1);
for (int j = 0; j <= a[i]; ++j)
inc(tmp.v[j][a[i]], 1);
x[i] = x[i - 1] * tmp;
}
memset(tmp.v, 0, sizeof tmp.v);
for (int i = 0; i < K; ++i)
tmp.v[i][0] = 1;
for (int i = 1; i <= n; ++i)
x[i] = x[i] * tmp;
op = 1;
for (int i = 1; i <= n; ++i)
{
memset(tmp.v, 0, sizeof tmp.v);
for (int j = 0; j < K; ++j)
inc(tmp.v[j][j], 1);
for (int j = 0; j <= a[i]; ++j)
inc(tmp.v[j][a[i]], P - inv2);
y[i] = tmp * y[i - 1];
}
memset(tmp.v, 0, sizeof tmp.v);
tmp.v[0][0] = 1;
for (int i = 1; i <= n; ++i)
y[i] = tmp * y[i];
int Q = read();
while (Q--)
{
int l = read(), r = read(), ans = 0;
for (int k = 0; k < K; ++k)
inc(ans, mul(y[l - 1].v[0][k], x[r].v[k][0]));
write(ans, '\n');
}
return 0;
}