#include <iostream>
#include <vector>
#include <cassert>
#include <complex>
using ll = long long;
int MOD = 998244353;
#ifdef LOCAL
template<typename T>
struct vector: std::vector<T> {
template<typename... V> vector(V&&... args): std::vector<T>(std::forward<V>(args)...) {}
T& operator [](size_t idx) {return vector::at(idx);}
T const& operator[](size_t idx) const {return vector::at(idx);}
};
#else
using std::vector;
#endif
using std::pair, std::cin, std::cout;
#define rep(i, a, b) for(int i = a; i < (b); ++i)
#define all(x) std::begin(x), std::end(x)
#define sz(x) (int)(x).size()
typedef long long ll;
typedef pair<int, int> pii;
typedef vector<int> vi;
ll euclid(ll a, ll b, ll &x, ll &y) {
if (!b) return x = 1, y = 0, a;
ll d = euclid(b, a % b, y, x);
return y -= a/b * x, d;
}
struct mint {
int v;
explicit operator int() {return v;}
mint(): v(0) {}
mint(int z) {
if(z < -MOD || MOD <= z) z %= MOD;
if (z < 0) z += MOD;
v = z;
}
mint(ll z) {
z %= MOD;
if (z < 0) z += MOD;
v = z;
}
friend mint invert(mint a) {
ll x, y, g = euclid(a.v, MOD, x, y);
assert(g == 1); return mint(x);
}
mint& operator+= (mint const& o) {if((v+=o.v)>=MOD) v-=MOD; return *this;}
mint& operator-= (mint const& o) {if((v-=o.v)<0) v+=MOD; return *this;}
mint& operator*= (mint const& o) {v=(ll)v*o.v%MOD; return *this;}
mint& operator/= (mint const& o) {return *this *= invert(o);}
friend mint operator+ (mint a, mint const& b) {return a+=b;}
friend mint operator- (mint a, mint const& b) {return a-=b;}
friend mint operator* (mint a, mint const& b) {return a*=b;}
friend mint operator/ (mint const& a, mint const& b) {return a*invert(b);}
};
using std::complex, std::imag, std::real, std::polar, std::acos;
using namespace std::complex_literals;
typedef complex<double> C;
typedef vector<double> vd;
void fft(vector<C>& a) {
int n = sz(a), L = 31 - __builtin_clz(n);
static vector<complex<long double>> R(2, 1);
static vector<C> rt(2, 1); // (^ 10% faster if double)
for (static int k = 2; k < n; k *= 2) {
R.resize(n); rt.resize(n);
auto x = polar(1.0L, acos(-1.0L) / k);
rep(i,k,2*k) rt[i] = R[i] = i&1 ? R[i/2] * x : R[i/2];
}
vi rev(n);
rep(i,0,n) rev[i] = (rev[i / 2] | (i & 1) << L) / 2;
rep(i,0,n) if (i < rev[i]) swap(a[i], a[rev[i]]);
for (int k = 1; k < n; k *= 2)
for (int i = 0; i < n; i += 2 * k) rep(j,0,k) {
// C z = rt[j+k] * a[i+j+k]; // (25% faster if hand-rolled) /// include-line
auto x = (double *)&rt[j+k], y = (double *)&a[i+j+k]; /// exclude-line
C z(x[0]*y[0] - x[1]*y[1], x[0]*y[1] + x[1]*y[0]); /// exclude-line
a[i + j + k] = a[i + j] - z;
a[i + j] += z;
}
}
vd conv(const vd& a, const vd& b) {
if (a.empty() || b.empty()) return {};
vd res(sz(a) + sz(b) - 1);
int L = 32 - __builtin_clz(sz(res)), n = 1 << L;
vector<C> in(n), out(n);
copy(all(a), begin(in));
rep(i,0,sz(b)) in[i].imag(b[i]);
fft(in);
for (C& x : in) x *= x;
rep(i,0,n) out[i] = in[-i & (n - 1)] - conj(in[i]);
fft(out);
rep(i,0,sz(res)) res[i] = imag(out[i]) / (4 * n);
return res;
}
typedef vector<ll> vl;
vl convMod(const vl &a, const vl &b) {
if (a.empty() || b.empty()) return {};
vl res(sz(a) + sz(b) - 1);
int B=32-__builtin_clz(sz(res)), n=1<<B, cut=int(sqrt(MOD));
vector<C> L(n), R(n), outs(n), outl(n);
rep(i,0,sz(a)) L[i] = C((int)a[i] / cut, (int)a[i] % cut);
rep(i,0,sz(b)) R[i] = C((int)b[i] / cut, (int)b[i] % cut);
fft(L), fft(R);
rep(i,0,n) {
int j = -i & (n - 1);
outl[j] = (L[i] + conj(L[j])) * R[i] / (2.0 * n);
outs[j] = (L[i] - conj(L[j])) * R[i] / (2.0 * n) / 1i;
}
fft(outl), fft(outs);
rep(i,0,sz(res)) {
ll av = ll(real(outl[i])+.5), cv = ll(imag(outs[i])+.5);
ll bv = ll(imag(outl[i])+.5) + ll(real(outs[i])+.5);
res[i] = ((av % MOD * cut + bv) % MOD * cut + cv) % MOD;
}
return res;
}
vector<mint> multiply(vector<mint> &&a, vector<mint> &&b) {
vl al, bl;
al.resize(a.size());
bl.resize(b.size());
for(int i = 0;i < a.size(); ++i) al[i] = (int)a[i];
for(int i = 0;i < b.size(); ++i) bl[i] = (int)b[i];
vl res = convMod(al, bl);
vector<mint> ans(res.size());
for(int i = 0;i < ans.size(); ++i) ans[i] = mint(res[i]);
return ans;
}
int const maxn = 2e4 + 10;
mint fac[2*maxn],invfac[2*maxn];
int main(){
int N;
cin.tie(0);cin.sync_with_stdio(0);
cin >> N >> MOD;
fac[0] = mint(1);invfac[0] = mint(1);
for(int i=1; i<2*maxn; i++){
fac[i] = (fac[i-1] * mint(i));
invfac[i] = mint(1) / fac[i];
}
vector<mint> dp(N + 1);
vector<mint> sum_dp(N + 1);
dp[0] = mint(1);
sum_dp[0] = mint(1);
auto induce = [&](int l, int m, int r) {
auto apply = [&](vector<mint>& ans, auto &&dep_i, auto &&dep_j, auto &&dep_i_j) {
// ans[i] += fj(j) * fij(i+j)
// => ans[K+i-1] += fj(K-j-1) * fij(i+j)
// => ans[K+i-1 - (off)] += fj(K-j-1) * fij(i+j - off)
int K = m;
int off = l + m;
vector<mint> f1(K - l);
for(int j = l;j < m; ++j) f1[K - j - 1] = dep_j(j);
vector<mint> f2;
for(int ij = l + m;ij <= r + m - 2; ++ij) f2.push_back(dep_i_j(ij));
vector<mint> res = multiply(std::move(f1), std::move(f2));
for(int i = m;i < r; ++i)
ans[i] += res[i + K - 1 - (off)] * dep_i(i);
};
apply(dp,
[&](int i){return mint(1);},
[&](int j){return dp[j]*invfac[2*j];},
[&](int ij){return fac[ij-1];});
apply(sum_dp,
[&](int i){return mint(i);},
[&](int j){return dp[j]*invfac[2*j];},
[&](int ij){return fac[ij-1];});
apply(sum_dp,
[&](int i){return mint(1);},
[&](int j){return dp[j]*invfac[2*j]*mint(-j);},
[&](int ij){return fac[ij-1];});
apply(dp,
[&](int i){return mint(i);},
[&](int j){return sum_dp[j]*invfac[2*j+1];},
[&](int ij){return fac[ij-1];});
apply(dp,
[&](int i){return mint(1);},
[&](int j){return sum_dp[j]*invfac[2*j+1]*mint(-j-1);},
[&](int ij){return fac[ij-1];});
apply(sum_dp,
[&](int i){return mint(i) * mint(i) - mint(i);},
[&](int j){return sum_dp[j]*invfac[2*j+1];},
[&](int ij){return fac[ij-1];});
apply(sum_dp,
[&](int i){return mint(1);},
[&](int j){return sum_dp[j]*invfac[2*j+1] * (mint(j) * mint(j) + mint(j));},
[&](int ij){return fac[ij-1];});
apply(sum_dp,
[&](int i){return mint(-2*i);},
[&](int j){return sum_dp[j]*invfac[2*j+1]*mint(j);},
[&](int ij){return fac[ij-1];});
};
auto solve = [&](auto const& solve, int l, int r) ->void{
if(l + 1 == r) return;
int m = l + (r - l) / 2;
solve(solve, l, m);
induce(l, m, r);
solve(solve, m, r);
};
solve(solve, 0, N + 1);
mint default_ans(1);
for(int i = N * 2 - 1;i >= 1; i -= 2)
default_ans *= mint(i);
#ifdef LOCAL
for(int i = 0;i <= N; ++i)
printf("%d -- %d\n", dp[i], sum_dp[i]);
#endif
printf("%d\n", (int)(default_ans - dp[N]));
return 0;
}
Compilation message
festival2.cpp: In function 'std::vector<mint> multiply(std::vector<mint>&&, std::vector<mint>&&)':
festival2.cpp:128:18: warning: comparison of integer expressions of different signedness: 'int' and 'std::vector<mint>::size_type' {aka 'long unsigned int'} [-Wsign-compare]
128 | for(int i = 0;i < a.size(); ++i) al[i] = (int)a[i];
| ~~^~~~~~~~~~
festival2.cpp:129:18: warning: comparison of integer expressions of different signedness: 'int' and 'std::vector<mint>::size_type' {aka 'long unsigned int'} [-Wsign-compare]
129 | for(int i = 0;i < b.size(); ++i) bl[i] = (int)b[i];
| ~~^~~~~~~~~~
festival2.cpp:132:18: warning: comparison of integer expressions of different signedness: 'int' and 'std::vector<mint>::size_type' {aka 'long unsigned int'} [-Wsign-compare]
132 | for(int i = 0;i < ans.size(); ++i) ans[i] = mint(res[i]);
| ~~^~~~~~~~~~~~
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
8 |
Correct |
5 ms |
604 KB |
Output is correct |
9 |
Correct |
5 ms |
600 KB |
Output is correct |
10 |
Correct |
5 ms |
604 KB |
Output is correct |
11 |
Correct |
5 ms |
768 KB |
Output is correct |
12 |
Correct |
5 ms |
768 KB |
Output is correct |
13 |
Correct |
5 ms |
856 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
8 |
Correct |
5 ms |
604 KB |
Output is correct |
9 |
Correct |
5 ms |
600 KB |
Output is correct |
10 |
Correct |
5 ms |
604 KB |
Output is correct |
11 |
Correct |
5 ms |
768 KB |
Output is correct |
12 |
Correct |
5 ms |
768 KB |
Output is correct |
13 |
Correct |
5 ms |
856 KB |
Output is correct |
14 |
Correct |
5 ms |
772 KB |
Output is correct |
15 |
Correct |
6 ms |
600 KB |
Output is correct |
16 |
Correct |
5 ms |
768 KB |
Output is correct |
17 |
Correct |
5 ms |
604 KB |
Output is correct |
18 |
Correct |
6 ms |
604 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
8 |
Correct |
5 ms |
604 KB |
Output is correct |
9 |
Correct |
5 ms |
600 KB |
Output is correct |
10 |
Correct |
5 ms |
604 KB |
Output is correct |
11 |
Correct |
5 ms |
768 KB |
Output is correct |
12 |
Correct |
5 ms |
768 KB |
Output is correct |
13 |
Correct |
5 ms |
856 KB |
Output is correct |
14 |
Correct |
5 ms |
772 KB |
Output is correct |
15 |
Correct |
6 ms |
600 KB |
Output is correct |
16 |
Correct |
5 ms |
768 KB |
Output is correct |
17 |
Correct |
5 ms |
604 KB |
Output is correct |
18 |
Correct |
6 ms |
604 KB |
Output is correct |
19 |
Correct |
8 ms |
604 KB |
Output is correct |
20 |
Correct |
8 ms |
860 KB |
Output is correct |
21 |
Correct |
8 ms |
764 KB |
Output is correct |
22 |
Correct |
6 ms |
600 KB |
Output is correct |
23 |
Correct |
6 ms |
604 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
8 |
Correct |
5 ms |
604 KB |
Output is correct |
9 |
Correct |
5 ms |
600 KB |
Output is correct |
10 |
Correct |
5 ms |
604 KB |
Output is correct |
11 |
Correct |
5 ms |
768 KB |
Output is correct |
12 |
Correct |
5 ms |
768 KB |
Output is correct |
13 |
Correct |
5 ms |
856 KB |
Output is correct |
14 |
Correct |
5 ms |
772 KB |
Output is correct |
15 |
Correct |
6 ms |
600 KB |
Output is correct |
16 |
Correct |
5 ms |
768 KB |
Output is correct |
17 |
Correct |
5 ms |
604 KB |
Output is correct |
18 |
Correct |
6 ms |
604 KB |
Output is correct |
19 |
Correct |
8 ms |
604 KB |
Output is correct |
20 |
Correct |
8 ms |
860 KB |
Output is correct |
21 |
Correct |
8 ms |
764 KB |
Output is correct |
22 |
Correct |
6 ms |
600 KB |
Output is correct |
23 |
Correct |
6 ms |
604 KB |
Output is correct |
24 |
Correct |
64 ms |
1904 KB |
Output is correct |
25 |
Correct |
65 ms |
1896 KB |
Output is correct |
26 |
Correct |
65 ms |
1832 KB |
Output is correct |
27 |
Correct |
8 ms |
860 KB |
Output is correct |
28 |
Correct |
21 ms |
860 KB |
Output is correct |
29 |
Correct |
20 ms |
860 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
5 ms |
604 KB |
Output is correct |
2 |
Correct |
5 ms |
604 KB |
Output is correct |
3 |
Correct |
5 ms |
604 KB |
Output is correct |
4 |
Correct |
5 ms |
604 KB |
Output is correct |
5 |
Correct |
6 ms |
604 KB |
Output is correct |
6 |
Correct |
5 ms |
600 KB |
Output is correct |
7 |
Correct |
5 ms |
604 KB |
Output is correct |
8 |
Correct |
5 ms |
604 KB |
Output is correct |
9 |
Correct |
5 ms |
600 KB |
Output is correct |
10 |
Correct |
5 ms |
604 KB |
Output is correct |
11 |
Correct |
5 ms |
768 KB |
Output is correct |
12 |
Correct |
5 ms |
768 KB |
Output is correct |
13 |
Correct |
5 ms |
856 KB |
Output is correct |
14 |
Correct |
5 ms |
772 KB |
Output is correct |
15 |
Correct |
6 ms |
600 KB |
Output is correct |
16 |
Correct |
5 ms |
768 KB |
Output is correct |
17 |
Correct |
5 ms |
604 KB |
Output is correct |
18 |
Correct |
6 ms |
604 KB |
Output is correct |
19 |
Correct |
8 ms |
604 KB |
Output is correct |
20 |
Correct |
8 ms |
860 KB |
Output is correct |
21 |
Correct |
8 ms |
764 KB |
Output is correct |
22 |
Correct |
6 ms |
600 KB |
Output is correct |
23 |
Correct |
6 ms |
604 KB |
Output is correct |
24 |
Correct |
64 ms |
1904 KB |
Output is correct |
25 |
Correct |
65 ms |
1896 KB |
Output is correct |
26 |
Correct |
65 ms |
1832 KB |
Output is correct |
27 |
Correct |
8 ms |
860 KB |
Output is correct |
28 |
Correct |
21 ms |
860 KB |
Output is correct |
29 |
Correct |
20 ms |
860 KB |
Output is correct |
30 |
Correct |
387 ms |
5920 KB |
Output is correct |
31 |
Correct |
394 ms |
6148 KB |
Output is correct |
32 |
Correct |
388 ms |
5920 KB |
Output is correct |
33 |
Correct |
336 ms |
5644 KB |
Output is correct |
34 |
Correct |
140 ms |
3056 KB |
Output is correct |
35 |
Correct |
128 ms |
3184 KB |
Output is correct |