Submission #1330779

#TimeUsernameProblemLanguageResultExecution timeMemory
1330779TheMiraiTraveller0711Languages (IOI10_languages)C++20
94 / 100
313 ms57880 KiB
#include "grader.h"
#include <bits/stdc++.h>
using namespace std;

/*
  IOI 2010 "Language" style interactive solution.

  - Feature hashing for n-grams (1,2,4 heavily weighted).
  - Online perceptron-like updates: +1 to correct, -1 to predicted (if wrong).
  - Uses presence (unique features per excerpt) to reduce domination by repeats.

  Tuneable knobs:
    BINS: hashing bins (power of two)
    W1, W2, W4: gram weights
    USE_3GRAM, W3: optional
*/

static constexpr int LANGS = 56;

// Hash bins (power of two). 2^17 = 131072.
// Memory per table: BINS * LANGS * 2 bytes ~ 14.7 MB (int16)
// With 3 tables (1,2,4): ~44 MB.
static constexpr int BINS_POW = 17;
static constexpr int BINS = 1 << BINS_POW;
static constexpr uint32_t MASK = BINS - 1;

// Gram weights (you can tweak)
static constexpr int W1 = 1;
static constexpr int W2 = 2;
static constexpr int W4 = 3;

// Optional 3-gram support (default off to match your request)
static constexpr bool USE_3GRAM = true;
static constexpr int W3 = 1;

static constexpr int BIAS_W = 1; // small prior

// Tables are stored transposed as [bin][lang] for cache-friendly scoring.
static vector<int16_t> tab1, tab2, tab3, tab4;
static int seen_cnt[LANGS];
static int total_seen = 0;

static inline uint64_t splitmix64(uint64_t x) {
    x += 0x9e3779b97f4a7c15ULL;
    x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
    x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
    return x ^ (x >> 31);
}

static inline uint32_t h64_to_bin(uint64_t key) {
    return (uint32_t)(splitmix64(key) & MASK);
}

static inline void sat_inc(vector<int16_t>& tab, uint32_t bin, int lang, int delta) {
    int idx = (int)bin * LANGS + lang;
    int v = (int)tab[idx] + delta;
    if (v > 32767) v = 32767;
    if (v < -32768) v = -32768;
    tab[idx] = (int16_t)v;
}

static inline void make_features_1(const int E[100], vector<uint32_t>& out) {
    out.clear();
    out.reserve(100);
    for (int i = 0; i < 100; i++) {
        uint64_t key = (uint64_t)(uint32_t)E[i] * 0x9e3779b1u + 0x1234567u;
        out.push_back(h64_to_bin(key));
    }
    sort(out.begin(), out.end());
    out.erase(unique(out.begin(), out.end()), out.end());
}

static inline void make_features_2(const int E[100], vector<uint32_t>& out) {
    out.clear();
    out.reserve(99);
    for (int i = 0; i + 1 < 100; i++) {
        // Pack two 16-bit symbols into 32 bits safely (symbols in [1..65535])
        uint64_t packed = (uint64_t)((uint32_t)E[i] | ((uint32_t)E[i + 1] << 16));
        uint64_t key = packed ^ 0xA5A5A5A5ULL;
        out.push_back(h64_to_bin(key));
    }
    sort(out.begin(), out.end());
    out.erase(unique(out.begin(), out.end()), out.end());
}

static inline void make_features_3(const int E[100], vector<uint32_t>& out) {
    out.clear();
    out.reserve(98);
    for (int i = 0; i + 2 < 100; i++) {
        uint64_t packed =
            (uint64_t)(uint32_t)E[i] |
            ((uint64_t)(uint32_t)E[i + 1] << 16) |
            ((uint64_t)(uint32_t)E[i + 2] << 32);
        uint64_t key = packed ^ 0xC3C3C3C3C3ULL;
        out.push_back(h64_to_bin(key));
    }
    sort(out.begin(), out.end());
    out.erase(unique(out.begin(), out.end()), out.end());
}

static inline void make_features_4(const int E[100], vector<uint32_t>& out) {
    out.clear();
    out.reserve(97);
    for (int i = 0; i + 3 < 100; i++) {
        uint64_t packed =
            (uint64_t)(uint32_t)E[i] |
            ((uint64_t)(uint32_t)E[i + 1] << 16) |
            ((uint64_t)(uint32_t)E[i + 2] << 32) |
            ((uint64_t)(uint32_t)E[i + 3] << 48);
        uint64_t key = packed ^ 0x5F5F5F5F5F5FULL;
        out.push_back(h64_to_bin(key));
    }
    sort(out.begin(), out.end());
    out.erase(unique(out.begin(), out.end()), out.end());
}

static inline int predict_lang(const vector<uint32_t>& f1,
                               const vector<uint32_t>& f2,
                               const vector<uint32_t>& f3,
                               const vector<uint32_t>& f4) {
    int32_t score[LANGS];
    for (int l = 0; l < LANGS; l++) score[l] = (int32_t)seen_cnt[l] * BIAS_W;

    // Add contributions feature-by-feature, updating all 56 languages contiguously.
    for (uint32_t bin : f1) {
        int base = (int)bin * LANGS;
        for (int l = 0; l < LANGS; l++) score[l] += (int32_t)tab1[base + l] * W1;
    }
    for (uint32_t bin : f2) {
        int base = (int)bin * LANGS;
        for (int l = 0; l < LANGS; l++) score[l] += (int32_t)tab2[base + l] * W2;
    }
    if (USE_3GRAM) {
        for (uint32_t bin : f3) {
            int base = (int)bin * LANGS;
            for (int l = 0; l < LANGS; l++) score[l] += (int32_t)tab3[base + l] * W3;
        }
    }
    for (uint32_t bin : f4) {
        int base = (int)bin * LANGS;
        for (int l = 0; l < LANGS; l++) score[l] += (int32_t)tab4[base + l] * W4;
    }

    // Choose best; tie-break toward less-seen languages (gentle exploration).
    int bestL = 0;
    for (int l = 1; l < LANGS; l++) {
        if (score[l] > score[bestL]) bestL = l;
        else if (score[l] == score[bestL] && seen_cnt[l] < seen_cnt[bestL]) bestL = l;
    }
    return bestL;
}

static inline void update_model(int trueL, int predL,
                                const vector<uint32_t>& f1,
                                const vector<uint32_t>& f2,
                                const vector<uint32_t>& f3,
                                const vector<uint32_t>& f4) {
    // Always reinforce the true language; penalize predicted if wrong.
    for (uint32_t bin : f1) {
        sat_inc(tab1, bin, trueL, +1);
        if (predL != trueL) sat_inc(tab1, bin, predL, -1);
    }
    for (uint32_t bin : f2) {
        sat_inc(tab2, bin, trueL, +1);
        if (predL != trueL) sat_inc(tab2, bin, predL, -1);
    }
    if (USE_3GRAM) {
        for (uint32_t bin : f3) {
            sat_inc(tab3, bin, trueL, +1);
            if (predL != trueL) sat_inc(tab3, bin, predL, -1);
        }
    }
    for (uint32_t bin : f4) {
        sat_inc(tab4, bin, trueL, +1);
        if (predL != trueL) sat_inc(tab4, bin, predL, -1);
    }

    seen_cnt[trueL]++;
    total_seen++;
}

void excerpt(int E[100]) {
    // Lazy init (avoids static init order headaches)
    static bool inited = false;
    if (!inited) {
        tab1.assign((size_t)BINS * LANGS, 0);
        tab2.assign((size_t)BINS * LANGS, 0);
        tab4.assign((size_t)BINS * LANGS, 0);
        if (USE_3GRAM) tab3.assign((size_t)BINS * LANGS, 0);
        memset(seen_cnt, 0, sizeof(seen_cnt));
        inited = true;
    }

    vector<uint32_t> f1, f2, f3, f4;
    make_features_1(E, f1);
    make_features_2(E, f2);
    if (USE_3GRAM) make_features_3(E, f3);
    make_features_4(E, f4);

    int guess = predict_lang(f1, f2, f3, f4);

    // interactive call: returns the correct language
    int correct = language(guess);

    update_model(correct, guess, f1, f2, f3, f4);
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...