Submission #1330812

#TimeUsernameProblemLanguageResultExecution timeMemory
1330812normankr07Languages (IOI10_languages)C++20
99 / 100
2869 ms41240 KiB
#include "lang.h"
#include "grader.h"

#include <cmath>
#include <algorithm>
#include <cstring>
#include <unordered_set>

static const int NL = 56;
static const int MS = 65536;

// ── unigram data (14 MB array for O(1) lookup) ──
static int sym[NL][MS];
static int sym_total[NL];

// ── n-gram data (2-, 3-, 4-grams stored as hash-sets per language) ──
static std::unordered_set<int64_t> ng2[NL];
static std::unordered_set<int64_t> ng3[NL];
static std::unordered_set<int64_t> ng4[NL];

// ── bookkeeping ──
static int cnt[NL];          // excerpts seen per language
static int N = 0;            // total excerpts seen
static int V = 0;            // global vocabulary size
static bool seen[MS];

// ── helpers to build n-gram keys ──
static inline int64_t key2(const int *E, int i) {
    return (int64_t)E[i] * MS + E[i + 1];
}
static inline int64_t key3(const int *E, int i) {
    return ((int64_t)E[i] * MS + E[i + 1]) * MS + E[i + 2];
}
static inline int64_t key4(const int *E, int i) {
    // Use a mixing scheme to stay within 64 bits:
    // 16 bits per symbol → 64 bits total for 4 symbols
    return ((int64_t)E[i] << 48) | ((int64_t)E[i + 1] << 32)
         | ((int64_t)E[i + 2] << 16) | (int64_t)E[i + 3];
}

void excerpt(int *E) {
    int best = 0;
    double best_s = -1e100;

    if (N > 0) {
        const double alpha = 0.5;
        const int Vv = V > 0 ? V : 1;

        // ── Pass 1: unigram Naive-Bayes score for every seen language ──
        double scores[NL];
        int order[NL];
        int nlang = 0;

        for (int L = 0; L < NL; L++) {
            if (!cnt[L]) { scores[L] = -1e100; continue; }

            double s = std::log((double)cnt[L] / N);            // prior
            double ld = std::log(sym_total[L] + alpha * Vv);    // denom

            for (int i = 0; i < 100; i++)
                s += std::log(sym[L][E[i]] + alpha) - ld;

            scores[L] = s;
            order[nlang++] = L;
        }

        // Sort candidates by unigram score (descending)
        std::sort(order, order + nlang,
                  [&](int a, int b) { return scores[a] > scores[b]; });

        // ── Pass 2: add n-gram bonuses for top-K candidates ──
        int K = std::min(nlang, 20);

        for (int k = 0; k < K; k++) {
            int L = order[k];
            int m2 = 0, m3 = 0, m4 = 0;

            for (int i = 0; i < 99; i++)
                if (ng2[L].count(key2(E, i))) m2++;
            for (int i = 0; i < 98; i++)
                if (ng3[L].count(key3(E, i))) m3++;
            for (int i = 0; i < 97; i++)
                if (ng4[L].count(key4(E, i))) m4++;

            scores[L] += 2.0 * m2 + 4.0 * m3 + 6.0 * m4;
        }

        // Pick best among top-K
        best   = order[0];
        best_s = scores[order[0]];
        for (int k = 1; k < K; k++) {
            if (scores[order[k]] > best_s) {
                best_s = scores[order[k]];
                best   = order[k];
            }
        }
    }

    int c = language(best);

    // ── online learning: update stats with correct answer ──
    for (int i = 0; i < 100; i++) {
        if (!seen[E[i]]) { seen[E[i]] = true; V++; }
        sym[c][E[i]]++;
        sym_total[c]++;
    }
    for (int i = 0; i < 99; i++)  ng2[c].insert(key2(E, i));
    for (int i = 0; i < 98; i++)  ng3[c].insert(key3(E, i));
    for (int i = 0; i < 97; i++)  ng4[c].insert(key4(E, i));

    cnt[c]++;
    N++;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...