#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 200'064;
int rg[N];
short reg[2*N];
char delta[2*N];
int len = 0, len31;
vector<int> A[N];
int n, r, q;
char sum[2*N/32][32][32];
char cnt[2*N/32][32];
char scnt[2*N/32][32];
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,abm,bmi,bmi2")
typedef char c32 __attribute__((vector_size(32),aligned(1)));
typedef short s16 __attribute__((vector_size(32),aligned(1)));
void dfs(int v)
{
reg[len] = rg[v];
delta[len] = 1;
++len;
for (int u : A[v])
dfs(u);
reg[len] = rg[v];
delta[len] = -1;
++len;
}
void init()
{
len31 = len/31+1;
Loop (i,0,len31)
{
int ii = i*31;
Loop (ir1,0,31) {
short r1 = reg[ii + ir1];
Loop (j,ii,ii+31) {
cnt[i][ir1] += reg[j] == r1;
scnt[i][ir1] += delta[j] & -(reg[j] == r1);
}
Loop (ir2,0,31) {
short r2 = reg[ii + ir2];
char ans = 0, pre = 0;
Loop (j,ii,ii+31) {
ans += pre & -(reg[j] == r2);
pre += delta[j] & -(reg[j] == r1);
}
sum[i][ir1][ir2] = ans;
}
}
}
}
int solve(short r1, short r2)
{
int ans = 0, pre = 0;
for (int i = 0, ii = 0; i < len31; ++i, ii += 31) {
s16 rl = *(s16 *)(reg+ii);
s16 rr = *(s16 *)(reg+ii+16);
c32 is_r1 = __builtin_ia32_packsswb256(rl == r1, rr == r1);
c32 is_r2 = __builtin_ia32_packsswb256(rl == r2, rr == r2);
int r1msk = __builtin_ia32_pmovmskb256(is_r1) | INT_MIN;
int r2msk = __builtin_ia32_pmovmskb256(is_r2) | INT_MIN;
int fr1 = __builtin_ctz(r1msk);
int fr2 = __builtin_ctz(r2msk);
ans += pre * cnt[i][fr2];
ans += sum[i][fr1][fr2];
pre += scnt[i][fr1];
}
return ans/2;
}
int main()
{
cin.tie(0) -> sync_with_stdio(false);
cin >> n >> r >> q;
cin >> rg[0];
Loop (i,1,n) {
int p;
cin >> p >> rg[i];
A[p-1].push_back(i);
}
dfs(0);
init();
while (q--) {
int r1, r2;
cin >> r1 >> r2;
cout << solve(r1, r2) << '\n';
cout.flush();
}
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
4 ms |
4944 KB |
Output is correct |
2 |
Incorrect |
3 ms |
4944 KB |
Output isn't correct |
3 |
Incorrect |
6 ms |
4944 KB |
Output isn't correct |
4 |
Incorrect |
7 ms |
4944 KB |
Output isn't correct |
5 |
Incorrect |
10 ms |
5072 KB |
Output isn't correct |
6 |
Incorrect |
27 ms |
5072 KB |
Output isn't correct |
7 |
Incorrect |
38 ms |
5196 KB |
Output isn't correct |
8 |
Incorrect |
48 ms |
5200 KB |
Output isn't correct |
9 |
Incorrect |
100 ms |
5688 KB |
Output isn't correct |
10 |
Incorrect |
156 ms |
5992 KB |
Output isn't correct |
11 |
Incorrect |
212 ms |
6624 KB |
Output isn't correct |
12 |
Incorrect |
283 ms |
7296 KB |
Output isn't correct |
13 |
Incorrect |
349 ms |
7296 KB |
Output isn't correct |
14 |
Incorrect |
422 ms |
8120 KB |
Output isn't correct |
15 |
Incorrect |
593 ms |
10492 KB |
Output isn't correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Incorrect |
4170 ms |
13564 KB |
Output isn't correct |
2 |
Incorrect |
5344 ms |
12872 KB |
Output isn't correct |
3 |
Execution timed out |
8053 ms |
15880 KB |
Time limit exceeded |
4 |
Incorrect |
607 ms |
8152 KB |
Output isn't correct |
5 |
Incorrect |
1004 ms |
9772 KB |
Output isn't correct |
6 |
Incorrect |
1842 ms |
10460 KB |
Output isn't correct |
7 |
Incorrect |
4123 ms |
12440 KB |
Output isn't correct |
8 |
Execution timed out |
8071 ms |
17960 KB |
Time limit exceeded |
9 |
Execution timed out |
8013 ms |
21628 KB |
Time limit exceeded |
10 |
Execution timed out |
8039 ms |
26276 KB |
Time limit exceeded |
11 |
Execution timed out |
8050 ms |
24056 KB |
Time limit exceeded |
12 |
Execution timed out |
8048 ms |
25836 KB |
Time limit exceeded |
13 |
Execution timed out |
8057 ms |
25876 KB |
Time limit exceeded |
14 |
Execution timed out |
8055 ms |
25672 KB |
Time limit exceeded |
15 |
Execution timed out |
8007 ms |
28624 KB |
Time limit exceeded |
16 |
Execution timed out |
8010 ms |
32052 KB |
Time limit exceeded |
17 |
Execution timed out |
8082 ms |
31288 KB |
Time limit exceeded |