This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
#pragma GCC optimize("O3")
#pragma GCC optimize("Ofast")
#pragma GCC optimize("unroll-loops")
#pragma GCC target("avx,avx2,fma")
using namespace std;
typedef long long ll;
typedef pair<ll, ll> pll;
typedef pair<int, int> pii;
#define MAX 2020
#define MAXS 22
#define INF 1000000000000000001
#define bb ' '
#define ln '\n'
#define Ln '\n'
#define MOD 1000000007
ll fact[MAX * 2];
int A[MAX];
ll dp[MAX][MAX * 2];
ll C[MAX * 2][MAX * 2];
void add(ll& a, ll b) { a = (a + b) % MOD; }
signed main() {
	ios::sync_with_stdio(false), cin.tie(0);
	int N, M;
	cin >> N >> M;
	int i, j, k;
	fact[0] = 1;
	for (i = 1; i <= N * 2; i++) fact[i] = (fact[i - 1] * i) % MOD;
	C[0][0] = 1;
	for (i = 1; i <= N * 2; i++) {
		C[i][0] = C[i][i] = 1;
		for (j = 1; j < i; j++) C[i][j] = (C[i - 1][j] + C[i - 1][j - 1]) % MOD;
	}
	for (i = 1; i <= N * 2; i++) A[i % M]++;
	dp[0][A[0]] = 1;
	int sum = A[0];
	for (i = 1; i < M; i++) {
		for (j = 0; j <= sum; j++) for (k = 0; k <= min(j, A[i]); k++) dp[i][j + A[i] - k * 2] = (dp[i][j + A[i] - k * 2] + dp[i - 1][j] * ((C[A[i]][k] * ((C[j][k] * fact[k]) % MOD)) % MOD)) % MOD;
		sum += A[i];
	}
	ll ans = (dp[M - 1][0] * fact[N]) % MOD;
	for (i = 1; i <= N; i++) {
		ans <<= 1;
		ans %= MOD;
	}
	cout << ans;
}
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |