Submission #558590

#TimeUsernameProblemLanguageResultExecution timeMemory
558590DanShadersSprinkler (JOI22_sprinkler)C++17
0 / 100
4121 ms641580 KiB
//bs:sanitizers #pragma GCC target("avx2") #include <bits/stdc++.h> #include <immintrin.h> #include <ext/pb_ds/assoc_container.hpp> using namespace std; namespace x = __gnu_pbds; template <typename T> using ordered_set = x::tree<T, x::null_type, less<T>, x::rb_tree_tag, x::tree_order_statistics_node_update>; template <typename T> using normal_queue = priority_queue<T, vector<T>, greater<>>; #define all(x) begin(x), end(x) #define sz(x) ((int) (x).size()) #define x first #define y second using ll = long long; using ld = long double; const int N = 2e5 + 10, EULER = 2 * N, LOG = 19; const int BUFF = 4096; char buff[BUFF + 1]; int buff_it = 0, buff_len = 0; char wbuff[BUFF]; int wbuff_len = 0; char read_char() { if (buff_it == buff_len) { buff_len = (int) fread(buff, 1, BUFF, stdin); buff[buff_len] = 0; buff_it = 0; } return buff[buff_it++]; } int read_int() { char c = ' '; while (!isdigit(c = read_char())); int res = 0; do { res *= 10; res += c - '0'; } while (isdigit(c = read_char())); return res; } void wflush() { fwrite(wbuff, 1, wbuff_len, stdout); wbuff_len = 0; } void write_char(char c) { if (wbuff_len == BUFF) { wflush(); } wbuff[wbuff_len++] = c; } void write_int(ll x) { static char cbuff[20]; int i = 0; if (!x) { write_char('0'); return; } for (; x; x /= 10) { cbuff[i++] = char(x % 10 + '0'); } for (; i--; ) { write_char(cbuff[i]); } } vector<int> g[N]; int h[N], sz[N]; char used[N]; const int INT_MEM = 7600380; struct Arr { int a[9] __attribute__((aligned(32))); ll i; } int_mem[INT_MEM]; struct CentroidNode { int parent, depth, sz; vector<int> ob, pb; Arr *op, *pp; } tc[N]; int next_free = 0; Arr *allocate(int x) { Arr *res = int_mem + next_free; next_free += x; assert(next_free < INT_MEM); return res; } int where[LOG][N], where2[LOG][N]; int dfs_sz(int u, int p = -1) { if (used[u]) { return 0; } sz[u] = 1; for (int v : g[u]) { if (v == p) { continue; } sz[u] += dfs_sz(v, u); } return sz[u]; } int dfs_find_centroid(int u, int csz, int p = -1) { for (int v : g[u]) { if (v != p && !used[v] && 2 * sz[v] >= csz) { return dfs_find_centroid(v, csz, u); } } return u; } int croot; queue<tuple<int, int, int>> bfs; void dfs_centroid(int root, int parent = -1, int depth = 0) { int centroid = dfs_find_centroid(root, dfs_sz(root)); auto &node = tc[centroid]; node.parent = parent; node.depth = depth; node.sz = sz[root]; if (parent == -1) { croot = centroid; } bfs.push({1, root, -1}); int prev = -1, at = 0; node.pb.push_back(0); while (sz(bfs)) { auto [d, u, p] = bfs.front(); bfs.pop(); where2[depth][u] = at; if (prev != d) { node.pb.push_back(at); prev = d; } ++at; for (int v : g[u]) { if (!used[v] && v != p) { bfs.push({d + 1, v, u}); } } } bfs.push({0, centroid, -1}); prev = -1, at = 0; while (sz(bfs)) { auto [d, u, p] = bfs.front(); bfs.pop(); where[depth][u] = at; if (prev != d) { node.ob.push_back(at); prev = d; } ++at; for (int v : g[u]) { if (!used[v] && v != p) { bfs.push({d + 1, v, u}); } } } node.op = allocate(node.sz); node.pp = allocate(node.sz); used[centroid] = 1; for (int v : g[centroid]) { if (!used[v]) { dfs_centroid(v, centroid, depth + 1); } } } int ipow[EULER], depth[N]; pair<int, int> sp[LOG][EULER]; int order[N], timer = 0; void dfs_euler(int u, int d = 0, int p = -1) { order[u] = timer; depth[u] = d; sp[0][timer++] = {d, u}; for (int v : g[u]) { if (v != p) { dfs_euler(v, d + 1, u); sp[0][timer++] = {d, u}; } } } int lca(int u, int v) { if (u == v) { return u; } u = order[u]; v = order[v]; if (u > v) { swap(u, v); } ++v; int pw = ipow[v - u]; return min(sp[pw][u], sp[pw][v - (1 << pw)]).y; } int dist(int u, int v) { return depth[u] + depth[v] - 2 * depth[lca(u, v)]; } vector<pair<int, int>> factor(int x) { vector<pair<int, int>> res; for (int i = 2; i * i <= x; ++i) { if (x % i == 0) { res.push_back({i, 0}); while (x % i == 0) { ++res.back().y; x /= i; } } } if (x != 1) { res.push_back({x, 1}); } return res; } pair<vector<int>, int> get_pw(int x, const vector<pair<int, int>> &fact) { vector<int> pw; for (auto [prime, _] : fact) { pw.push_back(0); while (x % prime == 0) { x /= prime; ++pw.back(); } } return {pw, x}; } void exgcd(int a, int b, ll &x, ll &y) { if (b == 0) { x = 1; y = 0; return; } exgcd(b, a % b, x, y); ll nw = x - (a / b) * y; x = y; y = nw; } int diff; ll l; template <typename T> void tdo(int, int r, int, T func) { for (; r >= 0; r &= r + 1, --r) { func(r); } } int part[9] __attribute__((aligned(32))); void apply_for(int ineq, int inv, int u, int x, int d) { auto &node = tc[u]; int dst = d - dist(u, x); if (dst < 0) { return; } int bound = dst < 0 ? 0 : (dst + 1 >= sz(node.ob) ? node.sz : node.ob[dst + 1]); __m256i part1 = _mm256_load_si256((__m256i*) (part)); if (ineq != 1) { tdo(0, bound - 1, node.sz, [&](int j) { // for (int i = 0; i < 9; ++i) { // node.op[j].a[i] += part[i]; // } // __m256i x1 = _mm256_load_si256((__m256i*) (node.op[j].a)); // _mm256_store_si256((__m256i*) (node.op[j].a), _mm256_add_epi32(x1, part1)); // node.op[j].a[8] += part[8]; #pragma GCC ivdep for (int i = 0; i < 9; ++i) { node.op[j].a[i] += part[i]; } (node.op[j].i *= ineq) %= l; }); } else { tdo(0, bound - 1, node.sz, [&](int j) { #pragma GCC ivdep for (int i = 0; i < 9; ++i) { node.op[j].a[i] += part[i]; } }); } if (node.parent == -1) { return; } dst = d - dist(node.parent, x); bound = dst < 0 ? 0 : (dst + 1 >= sz(node.pb) ? node.sz : node.pb[dst + 1]); if (inv != 1) { tdo(0, bound - 1, node.sz, [&](int j) { #pragma GCC ivdep for (int i = 0; i < 9; ++i) { node.op[j].a[i] += part[i]; } (node.pp[j].i *= inv) %= l; }); } else { tdo(0, bound - 1, node.sz, [&](int j) { #pragma GCC ivdep for (int i = 0; i < 9; ++i) { node.pp[j].a[i] -= part[i]; } }); } } void count_for(ll &ineq, int u, int x) { const auto &node = tc[u]; int i = where[node.depth][x]; for (; i < node.sz; i |= i + 1) { #pragma GCC ivdep for (int j = 0; j < 9; ++j) { part[j] += node.op[i].a[j]; } (ineq *= node.op[i].i) %= l; } if (node.parent == -1) { return; } i = where2[node.depth][x]; for (; i < node.sz; i |= i + 1) { #pragma GCC ivdep for (int j = 0; j < 9; ++j) { part[j] += node.pp[i].a[j]; } (ineq *= node.pp[i].i) %= l; } } ll fpow(ll a, ll b) { ll c = 1; for (int i = 1; i <= b; i *= 2) { if (b & i) { (c *= a) %= l; } (a *= a) %= l; } return c; } signed main() { #ifdef DEBUG freopen("output.txt", "w", stdout); #endif int n = read_int(); l = read_int(); for (int i = 0; i < INT_MEM; ++i) { int_mem[i].i = 1; } for (int i = 1; i < n; ++i) { int u = read_int(), v = read_int(); g[--u].push_back(--v); g[v].push_back(u); } for (int i = 0; i < n; ++i) { h[i] = read_int(); } dfs_euler(0); for (int i = 1; i < LOG; ++i) { for (int j = 0; j <= timer - (1 << i); ++j) { sp[i][j] = min(sp[i - 1][j], sp[i - 1][j + (1 << (i - 1))]); } } for (int i = 2; i <= timer; ++i) { ipow[i] = ipow[i / 2] + 1; } dfs_centroid(0); int queries = read_int(); auto fact = factor(int(l)); diff = sz(fact); while (queries--) { int type = read_int(); if (type == 1) { int x = read_int(), d = read_int(), w = read_int(); --x; if (!w) { w = int(l); } auto [partr, ineq] = get_pw(w, fact); fill(all(part), 0); copy_n(begin(partr), sz(partr), begin(part)); ll inv, tmp; exgcd(ineq, int(l), inv, tmp); inv = (inv % l + l) % l; int curr = x; while (curr != -1) { apply_for(ineq, int(inv), curr, x, d); curr = tc[curr].parent; } } else { int x = read_int() - 1; fill(all(part), 0); ll ineq = 1; int curr = x; while (curr != -1) { count_for(ineq, curr, x); curr = tc[curr].parent; } ll res = 1; for (int i = 0; i < diff; ++i) { (res *= fpow(fact[i].x, part[i])) %= l; } (res *= ineq) %= l; write_int((h[x] * res) % l); write_char('\n'); // for (int u : part) { // cout << u << " "; // } // cout << ineq << "\n"; } } wflush(); cerr << clock() << endl; }

Compilation message (stderr)

sprinkler.cpp: In function 'void apply_for(int, int, int, int, int)':
sprinkler.cpp:288:10: warning: unused variable 'part1' [-Wunused-variable]
  288 |  __m256i part1 = _mm256_load_si256((__m256i*) (part));
      |          ^~~~~
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...