Submission #648824

#TimeUsernameProblemLanguageResultExecution timeMemory
648824ymmRegions (IOI09_regions)C++17
1 / 100
872 ms64880 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
 
const int N = 200'064;
int rg[N];
short reg[2*N];
char delta[2*N];
int len = 0, len31;
vector<int> A[N];
int n, r, q;
char sum[2*N/32][32][32];
char cnt[2*N/32][32];
char scnt[2*N/32][32];

#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,abm,bmi,bmi2")
typedef char  c32 __attribute__((vector_size(32)));
typedef short s16 __attribute__((vector_size(32)));

void dfs(int v)
{
	reg[len] = rg[v];
	delta[len] = 1;
	++len;
	for (int u : A[v])
		dfs(u);
	reg[len] = rg[v];
	delta[len] = -1;
	++len;
}

void init()
{
	len31 = len/31+1;
	Loop (i,0,len31)
	{
		int ii = i*31;
		Loop (ir1,0,31) {
			short r1 = reg[ii + ir1];
			Loop (j,ii,ii+31) {
				cnt[i][ir1] += reg[j] == r1;
				scnt[i][ir1] += delta[j] & -(reg[j] == r1);
			}
			Loop (ir2,0,31) {
				short r2 = reg[ii + ir2];
				char ans = 0, pre = 0;
				Loop (j,ii,ii+31) {
					ans += pre & -(reg[j] == r2);
					pre += delta[j] & -(reg[j] == r1);
				}
				sum[i][ir1][ir2] = ans;
			}
		}
	}
}

int solve(short r1, short r2)
{
	int ans = 0, pre = 0;
	for (int i = 0, ii = 0; i < len31; ++i, ii += 31) {
		s16 rl = *(s16 *)(reg+ii);
		s16 rr = *(s16 *)(reg+ii+16);
		c32 is_r1 = __builtin_ia32_packsswb256(rl == r1, rr == r1);
		c32 is_r2 = __builtin_ia32_packsswb256(rl == r2, rr == r2);
		int r1msk = __builtin_ia32_pmovmskb256(is_r1) | INT_MIN;
		int r2msk = __builtin_ia32_pmovmskb256(is_r2) | INT_MIN;
		int fr1 = __builtin_ctz(r1msk);
		int fr2 = __builtin_ctz(r2msk);
		ans += pre * cnt[i][fr2];
		ans += sum[i][fr1][fr2];
		pre += scnt[i][fr1];
	}
	return ans/2;
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n >> r >> q;
	cin >> rg[0];
	Loop (i,1,n) {
		int p;
		cin >> p >> rg[i];
		A[p-1].push_back(i);
	}
	dfs(0);
	init();
	while (q--) {
		int r1, r2;
		cin >> r1 >> r2;
		cout << solve(r1, r2) << '\n';
		cout.flush();
	}
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...