Submission #558569

# Submission time Handle Problem Language Result Execution time Memory
558569 2022-05-07T14:33:47 Z DanShaders Sprinkler (JOI22_sprinkler) C++17
0 / 100
4000 ms 765632 KB
#pragma GCC target("avx2")

#include <bits/stdc++.h>
#include <immintrin.h>
#include <ext/pb_ds/assoc_container.hpp>
using namespace std;

namespace x = __gnu_pbds;
template <typename T>
using ordered_set = x::tree<T, x::null_type, less<T>, x::rb_tree_tag, x::tree_order_statistics_node_update>;

template <typename T>
using normal_queue = priority_queue<T, vector<T>, greater<>>;

#define all(x) begin(x), end(x)
#define sz(x) ((int) (x).size())
#define x first
#define y second
using ll = long long;
using ld = long double;

const int N = 2e5 + 10, EULER = 2 * N, LOG = 19;
const int BUFF = 4096;

char buff[BUFF + 1];
int buff_it = 0, buff_len = 0;

char wbuff[BUFF];
int wbuff_len = 0;

char read_char() {
	if (buff_it == buff_len) {
		buff_len = (int) fread(buff, 1, BUFF, stdin);
		buff[buff_len] = 0;
		buff_it = 0;
	}
	return buff[buff_it++];
}

int read_int() {
	char c = ' ';
	while (!isdigit(c = read_char()));
	int res = 0;
	do {
		res *= 10;
		res += c - '0';
	} while (isdigit(c = read_char()));
	return res;
}

void wflush() {
	fwrite(wbuff, 1, wbuff_len, stdout);
	wbuff_len = 0;
}

void write_char(char c) {
	if (wbuff_len == BUFF) {
		wflush();
	}
	wbuff[wbuff_len++] = c;
}

void write_int(ll x) {
	static char cbuff[20];
	int i = 0;
	if (!x) {
		write_char('0');
		return;
	}
	for (; x; x /= 10) {
		cbuff[i++] = char(x % 10 + '0');
	}
	for (; i--; ) {
		write_char(cbuff[i]);
	}
}

vector<int> g[N];
int h[N], sz[N];
char used[N];

const int INT_MEM = 7600380;

__attribute__((aligned(16))) struct Arr {
	int a[9];
	ll i;
} int_mem[INT_MEM];

struct CentroidNode {
	int parent, depth, sz;
	vector<int> ob, pb;
	Arr *op, *pp;
} tc[N];

int next_free = 0;

Arr *allocate(int x) {
	Arr *res = int_mem + next_free;
	next_free += x;
	assert(next_free < INT_MEM);
	return res;
}

int where[LOG][N], where2[LOG][N];

int dfs_sz(int u, int p = -1) {
	if (used[u]) {
		return 0;
	}
	sz[u] = 1;
	for (int v : g[u]) {
		if (v == p) {
			continue;
		}
		sz[u] += dfs_sz(v, u);
	}
	return sz[u];
}

int dfs_find_centroid(int u, int csz, int p = -1) {
	for (int v : g[u]) {
		if (v != p && !used[v] && 2 * sz[v] >= csz) {
			return dfs_find_centroid(v, csz, u);
		}
	}
	return u;
}

int croot;

queue<tuple<int, int, int>> bfs;

void dfs_centroid(int root, int parent = -1, int depth = 0) {
	int centroid = dfs_find_centroid(root, dfs_sz(root));
	auto &node = tc[centroid];
	node.parent = parent;
	node.depth = depth;
	node.sz = sz[root];
	if (parent == -1) {
		croot = centroid;
	}

	bfs.push({1, root, -1});
	int prev = -1, at = 0;
	node.pb.push_back(0);
	while (sz(bfs)) {
		auto [d, u, p] = bfs.front();
		bfs.pop();
		where2[depth][u] = at;
		if (prev != d) {
			node.pb.push_back(at);
			prev = d;
		}
		++at;
		for (int v : g[u]) {
			if (!used[v] && v != p) {
				bfs.push({d + 1, v, u});
			}
		}
	}

	bfs.push({0, centroid, -1});
	prev = -1, at = 0;
	while (sz(bfs)) {
		auto [d, u, p] = bfs.front();
		bfs.pop();
		where[depth][u] = at;
		if (prev != d) {
			node.ob.push_back(at);
			prev = d;
		}
		++at;
		for (int v : g[u]) {
			if (!used[v] && v != p) {
				bfs.push({d + 1, v, u});
			}
		}
	}
	
	node.op = allocate(node.sz);
	node.pp = allocate(node.sz);

	used[centroid] = 1;
	for (int v : g[centroid]) {
		if (!used[v]) {
			dfs_centroid(v, centroid, depth + 1);
		}
	}
}

int ipow[EULER], depth[N];
pair<int, int> sp[LOG][EULER];
int order[N], timer = 0;

void dfs_euler(int u, int d = 0, int p = -1) {
	order[u] = timer;
	depth[u] = d;
	sp[0][timer++] = {d, u};
	for (int v : g[u]) {
		if (v != p) {
			dfs_euler(v, d + 1, u);
			sp[0][timer++] = {d, u};
		}
	}
}

int lca(int u, int v) {
	if (u == v) {
		return u;
	}
	u = order[u];
	v = order[v];
	if (u > v) {
		swap(u, v);
	}
	++v;
	int pw = ipow[v - u];
	return min(sp[pw][u], sp[pw][v - (1 << pw)]).y;
}

int dist(int u, int v) {
	return depth[u] + depth[v] - 2 * depth[lca(u, v)];
}

vector<pair<int, int>> factor(int x) {
	vector<pair<int, int>> res;
	for (int i = 2; i * i <= x; ++i) {
		if (x % i == 0) {
			res.push_back({i, 0});
			while (x % i == 0) {
				++res.back().y;
				x /= i;
			}
		}
	}
	if (x != 1) {
		res.push_back({x, 1});
	}
	return res;
}

pair<vector<int>, int> get_pw(int x, const vector<pair<int, int>> &fact) {
	vector<int> pw;
	for (auto [prime, _] : fact) {
		pw.push_back(0);
		while (x % prime == 0) {
			x /= prime;
			++pw.back();
		}
	}
	return {pw, x};
}

void exgcd(int a, int b, ll &x, ll &y) {
	if (b == 0) {
		x = 1;
		y = 0;
		return;
	}
	exgcd(b, a % b, x, y);
	ll nw = x - (a / b) * y;
	x = y;
	y = nw;
}

int diff;
ll l;

template <typename T>
void tdo(int, int r, int, T func) {
	for (; r >= 0; r &= r + 1, --r) {
		func(r);
	}
}

int part[12] __attribute__((aligned(16)));

void apply_for(int ineq, int inv, int u, int x, int d) {
	auto &node = tc[u];

	int dst = d - dist(u, x);
	int bound = dst < 0 ? 0 : (dst + 1 >= sz(node.ob) ? node.sz : node.ob[dst + 1]);

	if (ineq != 1) {
		tdo(0, bound - 1, node.sz, [&](int j) {
#pragma GCC ivdep
			for (int i = 0; i < 9; ++i) {
				node.op[j].a[i] += part[i];
			}
			(node.op[j].i *= ineq) %= l;
		});
	} else {
		tdo(0, bound - 1, node.sz, [&](int j) {
#pragma GCC ivdep
			for (int i = 0; i < 9; ++i) {
				node.op[j].a[i] += part[i];
			}
		});
	}

	if (node.parent == -1) {
		return;
	}

	dst = d - dist(node.parent, x);
	bound = dst < 0 ? 0 : (dst + 1 >= sz(node.pb) ? node.sz : node.pb[dst + 1]);
	if (inv != 1) {
		__m256i part1 = _mm256_load_si256((__m256i*) (part)),
			part2 = _mm256_load_si256((__m256i*) (part + 4));
		tdo(0, bound - 1, node.sz, [&](int j) {
			__m256i x1 = _mm256_load_si256((__m256i*) (node.pp[j].a)),
				x2 = _mm256_load_si256((__m256i*) (node.pp[j].a + 4));
			_mm256_store_si256((__m256i*) (node.pp[j].a), _mm256_sub_epi32(x1, part1));
			_mm256_store_si256((__m256i*) (node.pp[j].a + 4), _mm256_sub_epi32(x2, part2));
			node.pp[j].a[8] -= part[8];

			(node.pp[j].i *= inv) %= l;
		});
	} else {
		tdo(0, bound - 1, node.sz, [&](int j) {
#pragma GCC ivdep
			for (int i = 0; i < 9; ++i) {
				node.pp[j].a[i] -= part[i];
			}
		});
	}
}

void count_for(ll &ineq, int u, int x) {
	const auto &node = tc[u];

	int i = where[node.depth][x];
	for (; i < node.sz; i |= i + 1) {
#pragma GCC ivdep
		for (int j = 0; j < 9; ++j) {
			part[j] += node.op[i].a[j];
		}
		(ineq *= node.op[i].i) %= l;
	}

	if (node.parent == -1) {
		return;
	}

	i = where2[node.depth][x];
	for (; i < node.sz; i |= i + 1) {
#pragma GCC ivdep
		for (int j = 0; j < 9; ++j) {
			part[j] += node.pp[i].a[j];
		}
		(ineq *= node.pp[i].i) %= l;
	}
}

ll fpow(ll a, ll b) {
	ll c = 1;
	for (int i = 1; i <= b; i *= 2) {
		if (b & i) {
			(c *= a) %= l;
		}
		(a *= a) %= l;
	}
	return c;
}

signed main() {
#ifdef DEBUG
	freopen("output.txt", "w", stdout);
#endif
	int n = read_int();
	l = read_int();
	for (int i = 0; i < INT_MEM; ++i) {
		int_mem[i].i = 1;
	}
	for (int i = 1; i < n; ++i) {
		int u = read_int(), v = read_int();
		g[--u].push_back(--v);
		g[v].push_back(u);
	}
	for (int i = 0; i < n; ++i) {
		h[i] = read_int();
	}
	dfs_euler(0);
	for (int i = 1; i < LOG; ++i) {
		for (int j = 0; j <= timer - (1 << i); ++j) {
			sp[i][j] = min(sp[i - 1][j], sp[i - 1][j + (1 << (i - 1))]);
		}
	}
	for (int i = 2; i <= timer; ++i) {
		ipow[i] = ipow[i / 2] + 1;
	}
	dfs_centroid(0);
	int queries = read_int();
	auto fact = factor(int(l));
	diff = sz(fact);

	while (queries--) {
		int type = read_int();
		if (type == 1) {
			int x = read_int(), d = read_int(), w = read_int();
			--x;
			if (!w) {
				w = int(l);
			}
			auto [partr, ineq] = get_pw(w, fact);
			fill(all(part), 0);
			copy_n(begin(partr), sz(partr), begin(part));
			ll inv, tmp;
			exgcd(ineq, int(l), inv, tmp);
			inv = (inv % l + l) % l;

			int curr = x;
			while (curr != -1) {
				apply_for(ineq, int(inv), curr, x, d);
				curr = tc[curr].parent;
			}
		} else {
			int x = read_int() - 1;
			fill(all(part), 0);
			ll ineq = 1;
			int curr = x;
			while (curr != -1) {
				count_for(ineq, curr, x);
				curr = tc[curr].parent;
			}

			ll res = 1;
			for (int i = 0; i < diff; ++i) {
				(res *= fpow(fact[i].x, part[i])) %= l;
			}
			(res *= ineq) %= l;
			write_int((h[x] * res) % l);
			write_char('\n');
			// for (int u : part) {
			// 	cout << u << " ";
			// }
			// cout << ineq << "\n";
		}
	}
	wflush();
	cerr << clock() << endl;
}
# Verdict Execution time Memory Grader output
1 Runtime error 507 ms 765372 KB Execution killed with signal 11
2 Halted 0 ms 0 KB -
# Verdict Execution time Memory Grader output
1 Correct 186 ms 377696 KB Output is correct
2 Execution timed out 4053 ms 489436 KB Time limit exceeded
3 Halted 0 ms 0 KB -
# Verdict Execution time Memory Grader output
1 Correct 186 ms 377696 KB Output is correct
2 Execution timed out 4053 ms 489436 KB Time limit exceeded
3 Halted 0 ms 0 KB -
# Verdict Execution time Memory Grader output
1 Correct 149 ms 377708 KB Output is correct
2 Execution timed out 4038 ms 522248 KB Time limit exceeded
3 Halted 0 ms 0 KB -
# Verdict Execution time Memory Grader output
1 Runtime error 467 ms 765632 KB Execution killed with signal 11
2 Halted 0 ms 0 KB -
# Verdict Execution time Memory Grader output
1 Runtime error 507 ms 765372 KB Execution killed with signal 11
2 Halted 0 ms 0 KB -