Submission #648825

#TimeUsernameProblemLanguageResultExecution timeMemory
648825ymmRegions (IOI09_regions)C++17
1 / 100
8082 ms32052 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; const int N = 200'064; int rg[N]; short reg[2*N]; char delta[2*N]; int len = 0, len31; vector<int> A[N]; int n, r, q; char sum[2*N/32][32][32]; char cnt[2*N/32][32]; char scnt[2*N/32][32]; #pragma GCC optimize("O3,unroll-loops") #pragma GCC target("avx2,abm,bmi,bmi2") typedef char c32 __attribute__((vector_size(32),aligned(1))); typedef short s16 __attribute__((vector_size(32),aligned(1))); void dfs(int v) { reg[len] = rg[v]; delta[len] = 1; ++len; for (int u : A[v]) dfs(u); reg[len] = rg[v]; delta[len] = -1; ++len; } void init() { len31 = len/31+1; Loop (i,0,len31) { int ii = i*31; Loop (ir1,0,31) { short r1 = reg[ii + ir1]; Loop (j,ii,ii+31) { cnt[i][ir1] += reg[j] == r1; scnt[i][ir1] += delta[j] & -(reg[j] == r1); } Loop (ir2,0,31) { short r2 = reg[ii + ir2]; char ans = 0, pre = 0; Loop (j,ii,ii+31) { ans += pre & -(reg[j] == r2); pre += delta[j] & -(reg[j] == r1); } sum[i][ir1][ir2] = ans; } } } } int solve(short r1, short r2) { int ans = 0, pre = 0; for (int i = 0, ii = 0; i < len31; ++i, ii += 31) { s16 rl = *(s16 *)(reg+ii); s16 rr = *(s16 *)(reg+ii+16); c32 is_r1 = __builtin_ia32_packsswb256(rl == r1, rr == r1); c32 is_r2 = __builtin_ia32_packsswb256(rl == r2, rr == r2); int r1msk = __builtin_ia32_pmovmskb256(is_r1) | INT_MIN; int r2msk = __builtin_ia32_pmovmskb256(is_r2) | INT_MIN; int fr1 = __builtin_ctz(r1msk); int fr2 = __builtin_ctz(r2msk); ans += pre * cnt[i][fr2]; ans += sum[i][fr1][fr2]; pre += scnt[i][fr1]; } return ans/2; } int main() { cin.tie(0) -> sync_with_stdio(false); cin >> n >> r >> q; cin >> rg[0]; Loop (i,1,n) { int p; cin >> p >> rg[i]; A[p-1].push_back(i); } dfs(0); init(); while (q--) { int r1, r2; cin >> r1 >> r2; cout << solve(r1, r2) << '\n'; cout.flush(); } }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...