답안 #558603

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
558603 2022-05-07T15:20:24 Z DanShaders Sprinkler (JOI22_sprinkler) C++17
3 / 100
4000 ms 646348 KB
#pragma GCC target("avx2")

#include <bits/stdc++.h>
#include <immintrin.h>
#include <ext/pb_ds/assoc_container.hpp>
using namespace std;

namespace x = __gnu_pbds;
template <typename T>
using ordered_set = x::tree<T, x::null_type, less<T>, x::rb_tree_tag, x::tree_order_statistics_node_update>;

template <typename T>
using normal_queue = priority_queue<T, vector<T>, greater<>>;

#define all(x) begin(x), end(x)
#define sz(x) ((int) (x).size())
#define x first
#define y second
using ll = long long;
using ld = long double;

const int N = 2e5 + 10, EULER = 2 * N, LOG = 19;
const int BUFF = 4096;

char buff[BUFF + 1];
int buff_it = 0, buff_len = 0;

char wbuff[BUFF];
int wbuff_len = 0;

char read_char() {
	if (buff_it == buff_len) {
		buff_len = (int) fread(buff, 1, BUFF, stdin);
		buff[buff_len] = 0;
		buff_it = 0;
	}
	return buff[buff_it++];
}

int read_int() {
	char c = ' ';
	while (!isdigit(c = read_char()));
	int res = 0;
	do {
		res *= 10;
		res += c - '0';
	} while (isdigit(c = read_char()));
	return res;
}

void wflush() {
	fwrite(wbuff, 1, wbuff_len, stdout);
	wbuff_len = 0;
}

void write_char(char c) {
	if (wbuff_len == BUFF) {
		wflush();
	}
	wbuff[wbuff_len++] = c;
}

void write_int(ll x) {
	static char cbuff[20];
	int i = 0;
	if (!x) {
		write_char('0');
		return;
	}
	for (; x; x /= 10) {
		cbuff[i++] = char(x % 10 + '0');
	}
	for (; i--; ) {
		write_char(cbuff[i]);
	}
}

vector<int> g[N];
int h[N], sz[N];
char used[N];

const int INT_MEM = 7600380;

struct Arr {
	int a[9] __attribute__((aligned(32)));
	ll i;
} int_mem[INT_MEM];

struct CentroidNode {
	int parent, depth, sz;
	vector<int> ob, pb;
	Arr *op, *pp;
} tc[N];

int next_free = 0;

Arr *allocate(int x) {
	Arr *res = int_mem + next_free;
	next_free += x;
	assert(next_free < INT_MEM);
	return res;
}

int where[LOG][N], where2[LOG][N];

int dfs_sz(int u, int p = -1) {
	if (used[u]) {
		return 0;
	}
	sz[u] = 1;
	for (int v : g[u]) {
		if (v == p) {
			continue;
		}
		sz[u] += dfs_sz(v, u);
	}
	return sz[u];
}

int dfs_find_centroid(int u, int csz, int p = -1) {
	for (int v : g[u]) {
		if (v != p && !used[v] && 2 * sz[v] >= csz) {
			return dfs_find_centroid(v, csz, u);
		}
	}
	return u;
}

int croot;

queue<tuple<int, int, int>> bfs;

void dfs_centroid(int root, int parent = -1, int depth = 0) {
	int centroid = dfs_find_centroid(root, dfs_sz(root));
	auto &node = tc[centroid];
	node.parent = parent;
	node.depth = depth;
	node.sz = sz[root];
	if (parent == -1) {
		croot = centroid;
	}

	bfs.push({1, root, -1});
	int prev = -1, at = 0;
	node.pb.push_back(0);
	while (sz(bfs)) {
		auto [d, u, p] = bfs.front();
		bfs.pop();
		where2[depth][u] = at;
		if (prev != d) {
			node.pb.push_back(at);
			prev = d;
		}
		++at;
		for (int v : g[u]) {
			if (!used[v] && v != p) {
				bfs.push({d + 1, v, u});
			}
		}
	}

	bfs.push({0, centroid, -1});
	prev = -1, at = 0;
	while (sz(bfs)) {
		auto [d, u, p] = bfs.front();
		bfs.pop();
		where[depth][u] = at;
		if (prev != d) {
			node.ob.push_back(at);
			prev = d;
		}
		++at;
		for (int v : g[u]) {
			if (!used[v] && v != p) {
				bfs.push({d + 1, v, u});
			}
		}
	}
	
	node.op = allocate(node.sz);
	node.pp = allocate(node.sz);

	used[centroid] = 1;
	for (int v : g[centroid]) {
		if (!used[v]) {
			dfs_centroid(v, centroid, depth + 1);
		}
	}
}

int ipow[EULER], depth[N];
pair<int, int> sp[LOG][EULER];
int order[N], timer = 0;

void dfs_euler(int u, int d = 0, int p = -1) {
	order[u] = timer;
	depth[u] = d;
	sp[0][timer++] = {d, u};
	for (int v : g[u]) {
		if (v != p) {
			dfs_euler(v, d + 1, u);
			sp[0][timer++] = {d, u};
		}
	}
}

int lca(int u, int v) {
	if (u == v) {
		return u;
	}
	u = order[u];
	v = order[v];
	if (u > v) {
		swap(u, v);
	}
	++v;
	int pw = ipow[v - u];
	return min(sp[pw][u], sp[pw][v - (1 << pw)]).y;
}

int dist(int u, int v) {
	return depth[u] + depth[v] - 2 * depth[lca(u, v)];
}

vector<pair<int, int>> factor(int x) {
	vector<pair<int, int>> res;
	for (int i = 2; i * i <= x; ++i) {
		if (x % i == 0) {
			res.push_back({i, 0});
			while (x % i == 0) {
				++res.back().y;
				x /= i;
			}
		}
	}
	if (x != 1) {
		res.push_back({x, 1});
	}
	return res;
}

pair<vector<int>, int> get_pw(int x, const vector<pair<int, int>> &fact) {
	vector<int> pw;
	for (auto [prime, _] : fact) {
		pw.push_back(0);
		while (x % prime == 0) {
			x /= prime;
			++pw.back();
		}
	}
	return {pw, x};
}

void exgcd(int a, int b, ll &x, ll &y) {
	if (b == 0) {
		x = 1;
		y = 0;
		return;
	}
	exgcd(b, a % b, x, y);
	ll nw = x - (a / b) * y;
	x = y;
	y = nw;
}

int diff;
ll l;

template <typename T>
void tdo(int, int r, int, T func) {
	for (; r >= 0; r &= r + 1, --r) {
		func(r);
	}
}

int part[12] __attribute__((aligned(32)));

void apply_for(int ineq, int inv, int u, int x, int d) {
	auto &node = tc[u];

	int dst = d - dist(u, x);
	int bound = dst < 0 ? 0 : (dst + 1 >= sz(node.ob) ? node.sz : node.ob[dst + 1]);

	__m256i part1 = _mm256_load_si256((__m256i*) (part));
	if (ineq != 1) {
		tdo(0, bound - 1, node.sz, [&](int j) {
			__m256i x1 = _mm256_load_si256((__m256i*) (node.op[j].a));
			_mm256_store_si256((__m256i*) (node.op[j].a), _mm256_add_epi32(x1, part1));
			node.op[j].a[8] += part[8];
 
			(node.op[j].i *= ineq) %= l;
		});
	} else {
		tdo(0, bound - 1, node.sz, [&](int j) {
			__m256i x1 = _mm256_load_si256((__m256i*) (node.op[j].a));
			_mm256_store_si256((__m256i*) (node.op[j].a), _mm256_add_epi32(x1, part1));
			node.op[j].a[8] += part[8];
		});
	}

	if (node.parent == -1) {
		return;
	}

	dst = d - dist(node.parent, x);
	bound = dst < 0 ? 0 : (dst + 1 >= sz(node.pb) ? node.sz : node.pb[dst + 1]);
	if (inv != 1) {
		tdo(0, bound - 1, node.sz, [&](int j) {
			__m256i x1 = _mm256_load_si256((__m256i*) (node.pp[j].a));
			_mm256_store_si256((__m256i*) (node.pp[j].a), _mm256_sub_epi32(x1, part1));
			node.pp[j].a[8] -= part[8];

			(node.pp[j].i *= inv) %= l;
		});
	} else {
		tdo(0, bound - 1, node.sz, [&](int j) {
			__m256i x1 = _mm256_load_si256((__m256i*) (node.pp[j].a));
			_mm256_store_si256((__m256i*) (node.pp[j].a), _mm256_sub_epi32(x1, part1));
			node.pp[j].a[8] -= part[8];
		});
	}
}

void count_for(ll &ineq, int u, int x) {
	const auto &node = tc[u];

	__m256i lpart{};
	int lpart8 = 0;

	int i = where[node.depth][x];
	for (; i < node.sz; i |= i + 1) {
		__m256i x1 = _mm256_load_si256((__m256i*) (node.op[i].a));
		lpart = _mm256_add_epi32(x1, lpart);
		lpart8 += node.op[i].a[8];

// #pragma GCC ivdep
// 		for (int j = 0; j < 9; ++j) {
// 			part[j] += node.op[i].a[j];
// 		}
		(ineq *= node.op[i].i) %= l;
	}

	if (node.parent != -1) {

	i = where2[node.depth][x];
	for (; i < node.sz; i |= i + 1) {
// #pragma GCC ivdep
// 		for (int j = 0; j < 9; ++j) {
// 			part[j] += node.pp[i].a[j];
// 		}
		__m256i x1 = _mm256_load_si256((__m256i*) (node.pp[i].a));
		lpart = _mm256_add_epi32(x1, lpart);
		lpart8 += node.pp[i].a[8];

		(ineq *= node.pp[i].i) %= l;
	}
	}

	int rpart[8] __attribute__((aligned(32)));
	_mm256_store_si256((__m256i *) rpart, lpart);
	
	part[8] += lpart8;
	for (int i = 0; i < 8; ++i) {
		part[i] += rpart[i];
	}
}

ll fpow(ll a, ll b) {
	ll c = 1;
	for (int i = 1; i <= b; i *= 2) {
		if (b & i) {
			(c *= a) %= l;
		}
		(a *= a) %= l;
	}
	return c;
}

signed main() {
#ifdef DEBUG
	freopen("output.txt", "w", stdout);
#endif
	int n = read_int();
	l = read_int();
	for (int i = 0; i < INT_MEM; ++i) {
		int_mem[i].i = 1;
	}
	for (int i = 1; i < n; ++i) {
		int u = read_int(), v = read_int();
		g[--u].push_back(--v);
		g[v].push_back(u);
	}
	for (int i = 0; i < n; ++i) {
		h[i] = read_int();
	}
	dfs_euler(0);
	for (int i = 1; i < LOG; ++i) {
		for (int j = 0; j <= timer - (1 << i); ++j) {
			sp[i][j] = min(sp[i - 1][j], sp[i - 1][j + (1 << (i - 1))]);
		}
	}
	for (int i = 2; i <= timer; ++i) {
		ipow[i] = ipow[i / 2] + 1;
	}
	dfs_centroid(0);
	int queries = read_int();
	auto fact = factor(int(l));
	diff = sz(fact);

	while (queries--) {
		int type = read_int();
		if (type == 1) {
			int x = read_int(), d = read_int(), w = read_int();
			--x;
			if (!w) {
				w = int(l);
			}
			auto [partr, ineq] = get_pw(w, fact);
			fill(all(part), 0);
			copy_n(begin(partr), sz(partr), begin(part));
			ll inv, tmp;
			exgcd(ineq, int(l), inv, tmp);
			inv = (inv % l + l) % l;

			int curr = x;
			while (curr != -1) {
				apply_for(ineq, int(inv), curr, x, d);
				curr = tc[curr].parent;
			}
		} else {
			int x = read_int() - 1;
			fill(all(part), 0);
			ll ineq = 1;
			int curr = x;
			while (curr != -1) {
				count_for(ineq, curr, x);
				curr = tc[curr].parent;
			}

			ll res = 1;
			for (int i = 0; i < diff; ++i) {
				(res *= fpow(fact[i].x, part[i])) %= l;
			}
			(res *= ineq) %= l;
			write_int((h[x] * res) % l);
			write_char('\n');
			// for (int u : part) {
			// 	cout << u << " ";
			// }
			// cout << ineq << "\n";
		}
	}
	wflush();
	cerr << clock() << endl;
}
# 결과 실행 시간 메모리 Grader output
1 Correct 208 ms 496588 KB Output is correct
2 Correct 179 ms 496672 KB Output is correct
3 Correct 177 ms 496716 KB Output is correct
4 Correct 178 ms 497204 KB Output is correct
5 Correct 187 ms 497208 KB Output is correct
6 Correct 189 ms 497180 KB Output is correct
7 Correct 176 ms 497100 KB Output is correct
8 Correct 174 ms 497104 KB Output is correct
9 Correct 190 ms 496588 KB Output is correct
10 Correct 178 ms 496712 KB Output is correct
11 Correct 176 ms 496624 KB Output is correct
12 Correct 186 ms 496732 KB Output is correct
13 Correct 185 ms 496716 KB Output is correct
14 Correct 176 ms 496596 KB Output is correct
15 Correct 181 ms 496672 KB Output is correct
16 Correct 189 ms 496604 KB Output is correct
17 Correct 186 ms 496716 KB Output is correct
18 Correct 176 ms 496668 KB Output is correct
19 Correct 185 ms 496708 KB Output is correct
20 Correct 183 ms 496648 KB Output is correct
21 Correct 176 ms 496676 KB Output is correct
22 Correct 182 ms 496732 KB Output is correct
23 Correct 182 ms 496652 KB Output is correct
24 Correct 175 ms 496600 KB Output is correct
25 Correct 180 ms 496616 KB Output is correct
26 Correct 186 ms 496716 KB Output is correct
27 Correct 178 ms 496728 KB Output is correct
28 Correct 174 ms 496728 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 186 ms 496588 KB Output is correct
2 Correct 3236 ms 608192 KB Output is correct
3 Correct 2160 ms 606188 KB Output is correct
4 Correct 3475 ms 639164 KB Output is correct
5 Correct 2634 ms 608540 KB Output is correct
6 Correct 2050 ms 603432 KB Output is correct
7 Correct 1721 ms 601500 KB Output is correct
8 Correct 681 ms 584224 KB Output is correct
9 Execution timed out 4089 ms 646348 KB Time limit exceeded
10 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 186 ms 496588 KB Output is correct
2 Correct 3236 ms 608192 KB Output is correct
3 Correct 2160 ms 606188 KB Output is correct
4 Correct 3475 ms 639164 KB Output is correct
5 Correct 2634 ms 608540 KB Output is correct
6 Correct 2050 ms 603432 KB Output is correct
7 Correct 1721 ms 601500 KB Output is correct
8 Correct 681 ms 584224 KB Output is correct
9 Execution timed out 4089 ms 646348 KB Time limit exceeded
10 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 180 ms 496756 KB Output is correct
2 Execution timed out 4090 ms 640936 KB Time limit exceeded
3 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 180 ms 496580 KB Output is correct
2 Execution timed out 4062 ms 638036 KB Time limit exceeded
3 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 208 ms 496588 KB Output is correct
2 Correct 179 ms 496672 KB Output is correct
3 Correct 177 ms 496716 KB Output is correct
4 Correct 178 ms 497204 KB Output is correct
5 Correct 187 ms 497208 KB Output is correct
6 Correct 189 ms 497180 KB Output is correct
7 Correct 176 ms 497100 KB Output is correct
8 Correct 174 ms 497104 KB Output is correct
9 Correct 190 ms 496588 KB Output is correct
10 Correct 178 ms 496712 KB Output is correct
11 Correct 176 ms 496624 KB Output is correct
12 Correct 186 ms 496732 KB Output is correct
13 Correct 185 ms 496716 KB Output is correct
14 Correct 176 ms 496596 KB Output is correct
15 Correct 181 ms 496672 KB Output is correct
16 Correct 189 ms 496604 KB Output is correct
17 Correct 186 ms 496716 KB Output is correct
18 Correct 176 ms 496668 KB Output is correct
19 Correct 185 ms 496708 KB Output is correct
20 Correct 183 ms 496648 KB Output is correct
21 Correct 176 ms 496676 KB Output is correct
22 Correct 182 ms 496732 KB Output is correct
23 Correct 182 ms 496652 KB Output is correct
24 Correct 175 ms 496600 KB Output is correct
25 Correct 180 ms 496616 KB Output is correct
26 Correct 186 ms 496716 KB Output is correct
27 Correct 178 ms 496728 KB Output is correct
28 Correct 174 ms 496728 KB Output is correct
29 Correct 186 ms 496588 KB Output is correct
30 Correct 3236 ms 608192 KB Output is correct
31 Correct 2160 ms 606188 KB Output is correct
32 Correct 3475 ms 639164 KB Output is correct
33 Correct 2634 ms 608540 KB Output is correct
34 Correct 2050 ms 603432 KB Output is correct
35 Correct 1721 ms 601500 KB Output is correct
36 Correct 681 ms 584224 KB Output is correct
37 Execution timed out 4089 ms 646348 KB Time limit exceeded
38 Halted 0 ms 0 KB -