제출 #1333781

#제출 시각아이디문제언어결과실행 시간메모리
1333781gelastropodWind Turbines (EGOI25_windturbines)C++20
컴파일 에러
0 ms0 KiB
#pragma GCC optimize("O3,inline")
#include <bits/stdc++.h>
using namespace std;

int cnt = 0, cnt1 = 0, sqrtN;
long long ans1 = 0;
vector<vector<int>> adjlist, p2;
vector<int> p, idx, redness, pre, post;

int fin(int n) {
    if (p[n] == n) return n;
    return p[n] = fin(p[n]);
}

void join(int a, int b, int x) {
    a = fin(a);
    b = fin(b);
    if (a == b) return;
    p[b] = a;
    adjlist[cnt].push_back(idx[a]);
    adjlist[cnt].push_back(idx[b]);
    idx[a] = p2[0][idx[a]] = p2[0][idx[b]] = cnt++;
    redness[idx[a]] = x;
    ans1 += x;
}

void dfs(int n) {
    pre[n] = cnt1++;
    for (int i : adjlist[n]) {
        dfs(i);
    }
    post[n] = cnt1 - 1;
}

bool comp(pair<pair<int, int>, int> a, pair<pair<int, int>, int> b) {
    if (a.first.first / sqrtN == b.first.first / sqrtN) return a.first.second < b.first.second;
    return a.first.first / sqrtN < b.first.first / sqrtN;
}

const int R = 1e8;
alignas(64) int tree[R];

const int B = 32;

int root = 0;   // where the keys of the root start
int n_tree = B; // number of allocated array cells
int H = 1;      // current tree height

typedef __m256i reg;

reg cmp(reg x, int* node) {
    reg y = _mm256_load_si256((reg*)node);
    return _mm256_cmpgt_epi32(x, y);
}

// returns how many keys are less than x
unsigned rank32(reg x, int* node) {
    reg m1 = cmp(x, node);
    reg m2 = cmp(x, node + 8);
    reg m3 = cmp(x, node + 16);
    reg m4 = cmp(x, node + 24);

    // take lower 16 bits from m1/m3 and higher 16 bits from m2/m4
    m1 = _mm256_blend_epi16(m1, m2, 0b01010101);
    m3 = _mm256_blend_epi16(m3, m4, 0b01010101);
    m1 = _mm256_packs_epi16(m1, m3); // can also use blendv here, but packs is simpler

    unsigned mask = _mm256_movemask_epi8(m1);
    return __builtin_popcount(mask);
}

int lower_bound(int _x) {
    unsigned k = root;
    reg x = _mm256_set1_epi32(_x);

    for (int h = 0; h < H - 1; h++) {
        unsigned i = rank32(x, &tree[k]);
        k = tree[k + B + i];
    }

    unsigned i = rank32(x, &tree[k]);

    return tree[k + i];
}

struct Precalc {
    alignas(64) int mask[B][B];

    constexpr Precalc() : mask{} {
        for (int i = 0; i < B; i++)
            for (int j = i; j < B - 1; j++)
                // everything from i to B - 2 inclusive needs to be moved
                mask[i][j] = -1;
    }
};

constexpr Precalc P;

void insert(int* node, int i, int x) {
    // need to iterate right-to-left to not overwrite the first element of the next lane
    for (int j = B - 8; j >= 0; j -= 8) {
        // load the keys
        reg t = _mm256_load_si256((reg*)&node[j]);
        // load the corresponding mask
        reg mask = _mm256_load_si256((reg*)&P.mask[i][j]);
        // mask-write them one position to the right
        _mm256_maskstore_epi32(&node[j + 1], mask, t);
    }
    node[i] = x; // finally, write the element itself
}

// move the second half of a node and fill it with infinities
void move(int* from, int* to) {
    const reg infs = _mm256_set1_epi32(INT_MAX);
    for (int i = 0; i < B / 2; i += 8) {
        reg t = _mm256_load_si256((reg*)&from[B / 2 + i]);
        _mm256_store_si256((reg*)&to[i], t);
        _mm256_store_si256((reg*)&from[B / 2 + i], infs);
    }
}

void insert(int _x) {
    // the beginning of the procedure is the same as in lower_bound,
    // except that we save the path in case we need to update some of our ancestors
    unsigned sk[10], si[10]; // k and i on each iteration
    //           ^------^ We assume that the tree height does not exceed 10
    //                    (which would require at least 16^10 elements)

    unsigned k = root;
    reg x = _mm256_set1_epi32(_x);

    for (int h = 0; h < H - 1; h++) {
        unsigned i = rank32(x, &tree[k]);

        // optionally update the key i right away
        tree[k + i] = (_x > tree[k + i] ? _x : tree[k + i]);
        sk[h] = k, si[h] = i; // and save the path

        k = tree[k + B + i];
    }

    unsigned i = rank32(x, &tree[k]);

    // we can start computing the is-full check before insertion completes
    bool filled = (tree[k + B - 2] != INT_MAX);

    insert(tree + k, i, _x);

    if (filled) {
        // the node needs to be split, so we create a new leaf node
        move(tree + k, tree + n_tree);

        int v = tree[k + B / 2 - 1]; // new key to be inserted
        int p = n_tree;              // pointer to the newly created node

        n_tree += B;

        for (int h = H - 2; h >= 0; h--) {
            // ascend and repeat until we reach the root or find a the node is not split
            k = sk[h], i = si[h];

            filled = (tree[k + B - 3] != INT_MAX);

            // the node already has a correct key (the right one)
            //                  and a correct pointer (the left one)
            insert(tree + k, i, v);
            insert(tree + k + B, i + 1, p);

            if (!filled)
                return; // we're done

            // create a new internal node
            move(tree + k, tree + n_tree);     // move keys
            move(tree + k + B, tree + n_tree + B); // move pointers

            v = tree[k + B / 2 - 1];
            tree[k + B / 2 - 1] = INT_MAX;

            p = n_tree;
            n_tree += 2 * B;
        }

        // if reach here, this means we've reached the root,
        // and it was split into two, so we need a new root
        tree[n_tree] = v;

        tree[n_tree + B] = root;
        tree[n_tree + B + 1] = p;

        root = n_tree;
        n_tree += 2 * B;
        H++;
    }
}

signed main() {
    ios_base::sync_with_stdio(0); cin.tie(0); cout.tie(0);
    int N, M, Q, a, b, c; cin >> N >> M >> Q;
    sqrtN = pow(N, 0.5);
    adjlist.resize(2 * N + 5, vector<int>());
    p2.resize(18, vector<int>(2 * N + 5, -1));
    redness.resize(2 * N + 5, 0);
    pre.resize(2 * N + 5, 0);
    post.resize(2 * N + 5, 0);
    for (int i = 0; i < N; i++) {
        p.push_back(i);
        idx.push_back(i);
    }
    cnt = N;
    vector<pair<int, pair<int, int>>> edges;
    for (int i = 0; i < M; i++) {
        cin >> a >> b >> c;
        edges.push_back({ c, {a, b} });
    }
    sort(edges.begin(), edges.end());
    for (int i = 0; i < M; i++) join(edges[i].second.first, edges[i].second.second, edges[i].first);
    dfs(cnt - 1);
    for (int i = 1; i < 18; i++) {
        for (int j = 0; j < cnt; j++) {
            if (p2[i - 1][j] == -1) p2[i][j] = -1;
            else p2[i][j] = p2[i - 1][p2[i - 1][j]];
        }
    }
    vector<pair<pair<int, int>, int>> quers;
    vector<long long> anss(Q, 0);
    for (int i = 0; i < Q; i++) {
        cin >> a >> b;
        quers.push_back({ {a, b}, i });
    }
    sort(quers.begin(), quers.end(), comp);
    pair<int, int> crntr = { 0, 0 };
    for (int i = 0; i < R; i++)
        tree[i] = INT_MAX;
    set<int> ss;
    long long crntv = 0;
    for (int i = 0; i < Q; i++) {
        while (crntr.second <= quers[i].first.second) {
            if (ss.empty()) {
                ss.insert(pre[crntr.second]);
                crntr.second++;
                continue;
            }
            int aa = crntr.second;
            for (int j = 17; j >= 0; j--) if (p2[j][aa] != -1 && ss.upper_bound(post[p2[j][aa]]) == ss.lower_bound(pre[p2[j][aa]])) aa = p2[j][aa];
            aa = p2[0][aa];
            crntv += redness[aa];
            ss.insert(pre[crntr.second]);
            crntr.second++;
        }
        while (crntr.first > quers[i].first.first) {
            crntr.first--;
            if (ss.empty()) {
                ss.insert(pre[crntr.first]);
                continue;
            }
            int aa = crntr.first;
            for (int j = 17; j >= 0; j--) if (p2[j][aa] != -1 && ss.upper_bound(post[p2[j][aa]]) == ss.lower_bound(pre[p2[j][aa]])) aa = p2[j][aa];
            aa = p2[0][aa];
            crntv += redness[aa];
            ss.insert(pre[crntr.first]);
        }
        while (crntr.second > quers[i].first.second + 1) {
            crntr.second--;
            ss.erase(pre[crntr.second]);
            if (ss.empty()) continue;
            int aa = crntr.second;
            for (int j = 17; j >= 0; j--) if (p2[j][aa] != -1 && ss.upper_bound(post[p2[j][aa]]) == ss.lower_bound(pre[p2[j][aa]])) aa = p2[j][aa];
            aa = p2[0][aa];
            crntv -= redness[aa];
        }
        while (crntr.first < quers[i].first.first) {
            ss.erase(pre[crntr.first]);
            if (ss.empty()) {
                crntr.first++;
                continue;
            }
            int aa = crntr.first;
            for (int j = 17; j >= 0; j--) if (p2[j][aa] != -1 && ss.upper_bound(post[p2[j][aa]]) == ss.lower_bound(pre[p2[j][aa]])) aa = p2[j][aa];
            aa = p2[0][aa];
            crntv -= redness[aa];
            crntr.first++;
        }
        anss[quers[i].second] = crntv;
    }
    for (int i : anss) cout << ans1 - i << '\n';
}

컴파일 시 표준 에러 (stderr) 메시지

Main.cpp:49:9: error: '__m256i' does not name a type
   49 | typedef __m256i reg;
      |         ^~~~~~~
Main.cpp:51:1: error: 'reg' does not name a type
   51 | reg cmp(reg x, int* node) {
      | ^~~
Main.cpp:57:17: error: 'reg' was not declared in this scope
   57 | unsigned rank32(reg x, int* node) {
      |                 ^~~
Main.cpp:57:24: error: expected primary-expression before 'int'
   57 | unsigned rank32(reg x, int* node) {
      |                        ^~~
Main.cpp:57:33: error: expression list treated as compound expression in initializer [-fpermissive]
   57 | unsigned rank32(reg x, int* node) {
      |                                 ^
Main.cpp: In function 'int lower_bound(int)':
Main.cpp:74:5: error: 'reg' was not declared in this scope
   74 |     reg x = _mm256_set1_epi32(_x);
      |     ^~~
Main.cpp:77:29: error: 'x' was not declared in this scope
   77 |         unsigned i = rank32(x, &tree[k]);
      |                             ^
Main.cpp:77:28: error: 'rank32' cannot be used as a function
   77 |         unsigned i = rank32(x, &tree[k]);
      |                      ~~~~~~^~~~~~~~~~~~~
Main.cpp:81:25: error: 'x' was not declared in this scope
   81 |     unsigned i = rank32(x, &tree[k]);
      |                         ^
Main.cpp:81:24: error: 'rank32' cannot be used as a function
   81 |     unsigned i = rank32(x, &tree[k]);
      |                  ~~~~~~^~~~~~~~~~~~~
Main.cpp: In function 'void insert(int*, int, int)':
Main.cpp:103:9: error: 'reg' was not declared in this scope
  103 |         reg t = _mm256_load_si256((reg*)&node[j]);
      |         ^~~
Main.cpp:105:12: error: expected ';' before 'mask'
  105 |         reg mask = _mm256_load_si256((reg*)&P.mask[i][j]);
      |            ^~~~~
      |            ;
Main.cpp:107:46: error: 'mask' was not declared in this scope; did you mean 'std::filesystem::perms::mask'?
  107 |         _mm256_maskstore_epi32(&node[j + 1], mask, t);
      |                                              ^~~~
      |                                              std::filesystem::perms::mask
In file included from /usr/include/c++/13/filesystem:48,
                 from /usr/include/x86_64-linux-gnu/c++/13/bits/stdc++.h:200,
                 from Main.cpp:2:
/usr/include/c++/13/bits/fs_fwd.h:158:7: note: 'std::filesystem::perms::mask' declared here
  158 |       mask              = 07777,
      |       ^~~~
Main.cpp:107:52: error: 't' was not declared in this scope
  107 |         _mm256_maskstore_epi32(&node[j + 1], mask, t);
      |                                                    ^
Main.cpp:107:9: error: '_mm256_maskstore_epi32' was not declared in this scope
  107 |         _mm256_maskstore_epi32(&node[j + 1], mask, t);
      |         ^~~~~~~~~~~~~~~~~~~~~~
Main.cpp: In function 'void move(int*, int*)':
Main.cpp:114:11: error: 'reg' does not name a type
  114 |     const reg infs = _mm256_set1_epi32(INT_MAX);
      |           ^~~
Main.cpp:116:9: error: 'reg' was not declared in this scope
  116 |         reg t = _mm256_load_si256((reg*)&from[B / 2 + i]);
      |         ^~~
Main.cpp:117:33: error: expected primary-expression before ')' token
  117 |         _mm256_store_si256((reg*)&to[i], t);
      |                                 ^
Main.cpp:117:42: error: 't' was not declared in this scope
  117 |         _mm256_store_si256((reg*)&to[i], t);
      |                                          ^
Main.cpp:117:9: error: '_mm256_store_si256' was not declared in this scope
  117 |         _mm256_store_si256((reg*)&to[i], t);
      |         ^~~~~~~~~~~~~~~~~~
Main.cpp:118:33: error: expected primary-expression before ')' token
  118 |         _mm256_store_si256((reg*)&from[B / 2 + i], infs);
      |                                 ^
Main.cpp:118:52: error: 'infs' was not declared in this scope
  118 |         _mm256_store_si256((reg*)&from[B / 2 + i], infs);
      |                                                    ^~~~
Main.cpp: In function 'void insert(int)':
Main.cpp:130:5: error: 'reg' was not declared in this scope
  130 |     reg x = _mm256_set1_epi32(_x);
      |     ^~~
Main.cpp:133:29: error: 'x' was not declared in this scope
  133 |         unsigned i = rank32(x, &tree[k]);
      |                             ^
Main.cpp:133:28: error: 'rank32' cannot be used as a function
  133 |         unsigned i = rank32(x, &tree[k]);
      |                      ~~~~~~^~~~~~~~~~~~~
Main.cpp:142:25: error: 'x' was not declared in this scope
  142 |     unsigned i = rank32(x, &tree[k]);
      |                         ^
Main.cpp:142:24: error: 'rank32' cannot be used as a function
  142 |     unsigned i = rank32(x, &tree[k]);
      |                  ~~~~~~^~~~~~~~~~~~~