Submission #1281814

#TimeUsernameProblemLanguageResultExecution timeMemory
1281814lamCookies (JOI23_cookies)C++20
25 / 100
155 ms279092 KiB
// GPT5-Pro code
// cookies.cpp
#include <bits/stdc++.h>
using namespace std;

using ull = unsigned long long;

struct BitsetVec {
    // dynamic bitset stored as vector<ull>, length in bits = Lbits
    int Lbits, W;
    vector<ull> a;
    BitsetVec() : Lbits(0), W(0) {}
    BitsetVec(int Lbits_) : Lbits(Lbits_) {
        W = (Lbits + 63) >> 6;
        a.assign(W, 0ULL);
    }
    inline void reset() { std::fill(a.begin(), a.end(), 0ULL); }
    inline void set0() { if (!a.empty()) a[0] |= 1ULL; }
    inline bool test(int k) const {
        if (k < 0 || k > Lbits) return false;
        int w = k >> 6, b = k & 63; if (w >= W) return false;
        return (a[w] >> b) & 1ULL;
    }
    inline void trim_to(int limitBits) { // keep bits 0..limitBits
        if (limitBits >= Lbits) return; // nothing to trim
        int last = limitBits >> 6, r = limitBits & 63;
        for (int i = last + 1; i < W; ++i) a[i] = 0ULL;
        ull mask = (r == 63 ? ~0ULL : ((1ULL << (r + 1)) - 1ULL));
        if (last < W) a[last] &= mask;
        for (int i = last + 1; i < W; ++i) a[i] = 0ULL;
    }
};

// OR into dst: (src << shift), truncated to limitBits
static inline void or_shift_trunc(vector<ull>& dst, const vector<ull>& src,
                                  int shift, int limitBits) {
    if (shift == 0) {
        int Wlim = ((limitBits + 63) >> 6);
        for (int i = 0; i < Wlim && i < (int)dst.size() && i < (int)src.size(); ++i)
            dst[i] |= src[i];
        if (Wlim < (int)dst.size()) dst[Wlim] = 0; // will be masked below
    } else {
        int ws = shift >> 6, bs = shift & 63;
        int Wdst = (limitBits + 63) >> 6;
        for (int i = Wdst; i >= 0; --i) {
            ull x = 0;
            int j = i - ws;
            if (j >= 0 && j < (int)src.size()) {
                x = (bs == 0 ? src[j] : (src[j] << bs));
                if (bs != 0 && j - 1 >= 0)
                    x |= (src[j - 1] >> (64 - bs));
            } else if (bs != 0 && j - 1 >= 0 && j - 1 < (int)src.size()) {
                x = (src[j - 1] >> (64 - bs));
            } else {
                x = 0;
            }
            if (i < (int)dst.size()) dst[i] |= x;
        }
    }
    // mask dst to 0..limitBits
    int last = limitBits >> 6, r = limitBits & 63;
    if (last < (int)dst.size()) {
        ull mask = (r == 63 ? ~0ULL : ((1ULL << (r + 1)) - 1ULL));
        dst[last] &= mask;
        for (int i = last + 1; i < (int)dst.size(); ++i) dst[i] = 0ULL;
    }
}

// Same as above, but also return "added bits" via a lambda for backtracking.
template<class RecLambda>
static inline void or_shift_trunc_record(vector<ull>& dst, const vector<ull>& src,
                                         int shift, int limitBits, RecLambda rec) {
    int last = limitBits >> 6, r = limitBits & 63;
    if (shift == 0) {
        for (int i = 0; i <= last && i < (int)dst.size() && i < (int)src.size(); ++i) {
            ull x = src[i];
            if (i == last && r != 63) x &= ((1ULL << (r + 1)) - 1ULL);
            ull old = dst[i];
            ull add = x & (~old);
            if (add) {
                int base = (i << 6);
                while (add) {
                    int b = __builtin_ctzll(add);
                    rec(base + b); // position set newly
                    add &= add - 1;
                }
            }
            dst[i] = old | x;
        }
        // clear above limit
        if (last < (int)dst.size()) {
            for (int i = last + 1; i < (int)dst.size(); ++i) dst[i] = 0ULL;
        }
        return;
    }
    int ws = shift >> 6, bs = shift & 63;
    for (int i = last; i >= 0; --i) {
        ull x = 0;
        int j = i - ws;
        if (j >= 0 && j < (int)src.size()) {
            x = (bs == 0 ? src[j] : (src[j] << bs));
            if (bs != 0 && j - 1 >= 0)
                x |= (src[j - 1] >> (64 - bs));
        } else if (bs != 0 && j - 1 >= 0 && j - 1 < (int)src.size()) {
            x = (src[j - 1] >> (64 - bs));
        } else {
            x = 0;
        }
        if (i == last && r != 63) x &= ((1ULL << (r + 1)) - 1ULL);
        ull old = dst[i];
        ull add = x & (~old);
        if (add) {
            int base = (i << 6);
            while (add) {
                int b = __builtin_ctzll(add);
                rec(base + b);
                add &= add - 1;
            }
        }
        dst[i] = old | x;
    }
    for (int i = last + 1; i < (int)dst.size(); ++i) dst[i] = 0ULL;
}

int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);
    int N; 
    if (!(cin >> N)) return 0;
    vector<int> A(N);
    long long Sll = 0;
    for (int i = 0; i < N; ++i) { cin >> A[i]; Sll += A[i]; }
    int M; cin >> M;
    vector<int> B(M);
    for (int j = 0; j < M; ++j) cin >> B[j];

    int S = (int)Sll;
    // Edge: S==0 (not possible per constraints Ai>=1)
    if (S == 0) { cout << 0 << "\n"; return 0; }

    // Allowed sizes (bounded by N and by S)
    sort(B.begin(), B.end());
    B.erase(unique(B.begin(), B.end()), B.end());
    vector<int> sizes;
    sizes.reserve(B.size());
    for (int x : B) if (x >= 1 && x <= N && x <= S) sizes.push_back(x);
    if (sizes.empty()) { cout << -1 << "\n"; return 0; }
    sort(sizes.begin(), sizes.end(), greater<int>());

    // Compute F(t) = sum_i min(Ai, t) for t=0..S
    vector<int> cnt(S + 1, 0);
    for (int x : A) { if (x <= S) cnt[x]++; else cnt[S]++; } // Ai <= S always, but safe
    vector<int> atLeast(S + 1, 0); // atLeast[t] = #i with Ai >= t
    int cum = 0;
    for (int t = S; t >= 1; --t) { cum += cnt[t]; atLeast[t] = cum; }
    vector<int> F(S + 1, 0);
    for (int t = 1; t <= S; ++t) {
        long long v = (long long)F[t - 1] + atLeast[t];
        if (v > S) v = S; // saturate to S
        F[t] = (int)v;
    }
    int maxA = 0; for (int x : A) maxA = max(maxA, x);

    // Quick necessary conditions (helpful short-circuits)
    // gcd of sizes must divide S
    int g = sizes[0];
    for (int i = 1; i < (int)sizes.size(); ++i) g = std::gcd(g, sizes[i]);
    if (S % g != 0) { cout << -1 << "\n"; return 0; }
    // Also obvious: must have maxA <= #boxes chosen; our DP enforces this via F, so no extra check.

    // Prepare DP bitsets dp[i] over sums (0..S)
    const int Lbits = S; // we manage indices 0..S
    const int W = (Lbits + 63) >> 6;
    vector<BitsetVec> dp(S + 1, BitsetVec(Lbits));
    dp[0].a[0] |= 1ULL;

    // First pass: find minimal x (number of boxes)
    for (int s : sizes) {
        int iLim = S / s; // only these i can get new contributions using size s (see editorial)
        // For i from 1..iLim: dp[i] |= dp[i-1] << s
        for (int i = 1; i <= iLim; ++i) {
            or_shift_trunc(dp[i].a, dp[i - 1].a, s, F[i]);
        }
        // dp[i>iLim] just carry (already trimmed in earlier iterations)
    }

    int x = -1;
    for (int i = maxA; i <= S; ++i) {
        if (dp[i].test(S)) { x = i; break; }
    }
    if (x == -1) { cout << -1 << "\n"; return 0; }

    // Second pass: reconstruct sizes used for (x, S).
    // Only keep dp up to i=x and record parent size for first time a (i,k) becomes reachable.
    vector<BitsetVec> dp2(x + 1, BitsetVec(Lbits));
    dp2[0].a[0] |= 1ULL;

    // parent[i][k] = size j used last to reach k with i boxes (0 if unset)
    vector<vector<unsigned short>> parent(x + 1);
    for (int i = 0; i <= x; ++i) parent[i].assign(F[i] + 1, 0);

    for (int s : sizes) {
        int iLim = min(x, S / s);
        for (int i = 1; i <= iLim; ++i) {
            // incorporate (dp2[i-1] << s) into dp2[i], and record which k newly set
            or_shift_trunc_record(dp2[i].a, dp2[i - 1].a, s, F[i],
                [&](int kpos){
                    if (kpos <= F[i]) parent[i][kpos] = (unsigned short)s;
                }
            );
        }
    }
    if (!dp2[x].test(S)) { // Should never happen
        cout << -1 << "\n"; return 0;
    }

    // Backtrack to get counts of each size
    unordered_map<int,int> cntSize; cntSize.reserve(sizes.size()*2);
    int curI = x, curK = S;
    while (curI > 0) {
        unsigned short s = parent[curI][curK];
        // Because we used "first-touch", s is guaranteed non-zero
        ++cntSize[(int)s];
        curK -= (int)s;
        --curI;
    }

    // Build actual boxes with those sizes and fill them greedily by remaining capacity
    struct Box { int cap, rem, id; vector<int> types; };
    vector<Box> boxes;
    boxes.reserve(x);
    int id = 0;
    for (int s : sizes) {
        int c = cntSize[s];
        for (int t = 0; t < c; ++t) {
            boxes.push_back(Box{s, s, id++, {}});
        }
    }
    // Sanity
    if ((int)boxes.size() != x) {
        cout << -1 << "\n"; return 0;
    }
    // Max-heap by remaining capacity
    struct Node { int rem, idx; bool operator<(const Node& o) const { return rem < o.rem; } };
    priority_queue<Node> pq;
    for (int i = 0; i < x; ++i) pq.push(Node{boxes[i].rem, i});

    // order types by Ai descending to make greedy robust
    vector<int> ord(N);
    iota(ord.begin(), ord.end(), 0);
    sort(ord.begin(), ord.end(), [&](int u, int v){ return A[u] > A[v]; });

    for (int u : ord) {
        int need = A[u];
        if (need == 0) continue;
        if (need > x) { cout << -1 << "\n"; return 0; } // should not happen
        vector<Node> taken;
        taken.reserve(need);
        for (int t = 0; t < need; ++t) {
            if (pq.empty() || pq.top().rem <= 0) { cout << -1 << "\n"; return 0; }
            Node nd = pq.top(); pq.pop();
            taken.push_back(nd);
        }
        for (auto nd : taken) {
            boxes[nd.idx].types.push_back(u + 1); // 1-based type id
            boxes[nd.idx].rem = nd.rem - 1;
            if (boxes[nd.idx].rem > 0) pq.push(Node{boxes[nd.idx].rem, nd.idx});
        }
    }
    // Final sanity
    for (auto &b : boxes) if (b.rem != 0 || (int)b.types.size() != b.cap) {
        cout << -1 << "\n"; return 0;
    }

    // Output
    cout << x << "\n";
    for (auto &b : boxes) {
        cout << b.cap;
        for (int v : b.types) cout << " " << v;
        cout << "\n";
    }
    return 0;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...