Submission #1330793

#TimeUsernameProblemLanguageResultExecution timeMemory
1330793TheMiraiTraveller0711Languages (IOI10_languages)C++20
95 / 100
463 ms56988 KiB
#pragma GCC optimize("O3")
#include "grader.h"
#include <bits/stdc++.h>

using namespace std;

// Carefully tuned memory to sit comfortably at ~215 MiB (Limit: 256 MiB)
static constexpr int MAX_NODES = 1750000;
static constexpr int HASH_CAP = 1 << 22; // 4,194,304

static int head[HASH_CAP];
static int next_node[MAX_NODES];
static uint64_t keys[MAX_NODES];
static int16_t weights[MAX_NODES][56];
static int node_cnt = 0;
static int seen_cnt[56];

// High-quality avalanche for the flat hash map
static inline uint64_t splitmix64(uint64_t x) {
    x += 0x9e3779b97f4a7c15ULL;
    x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
    x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
    return x ^ (x >> 31);
}

// Retrieves the unique ID for an n-gram
static int get_id(uint64_t key) {
    uint32_t h = splitmix64(key) & (HASH_CAP - 1);
    for (int i = head[h]; i != -1; i = next_node[i]) {
        if (keys[i] == key) return i;
    }
    // Graceful degradation if we approach memory limits
    if (node_cnt >= MAX_NODES) return -1;
    
    int id = node_cnt++;
    keys[id] = key;
    next_node[id] = head[h];
    head[h] = id;
    return id;
}

// Safely update perceptron weights
static inline void sat_add(int16_t& w, int delta) {
    int v = w + delta;
    if (v > 30000) v = 30000;
    else if (v < -30000) v = -30000;
    w = v;
}

void excerpt(int E[100]) {
    static bool inited = false;
    if (!inited) {
        memset(head, -1, sizeof(head));
        inited = true;
    }

    // Count 1-gram frequencies up to a clip limit
    map<int, int> uni_counts;
    vector<int> f2, f3, f4;
    f2.reserve(100); f3.reserve(100); f4.reserve(100);

    // Because E[i] is guaranteed >= 1, upper bits of larger n-grams 
    // will never be zero, making 1, 2, 3, and 4-grams inherently distinct!
    for (int i = 0; i < 100; i++) {
        uint64_t h1 = E[i];
        int id1 = get_id(h1);
        if (id1 != -1) uni_counts[id1]++;

        if (i < 99) {
            uint64_t h2 = ((uint64_t)E[i] << 16) | E[i + 1];
            int id2 = get_id(h2);
            if (id2 != -1) f2.push_back(id2);
        }
        if (i < 98) {
            uint64_t h3 = ((uint64_t)E[i] << 32) | ((uint64_t)E[i + 1] << 16) | E[i + 2];
            int id3 = get_id(h3);
            if (id3 != -1) f3.push_back(id3);
        }
        if (i < 97) {
            uint64_t h4 = ((uint64_t)E[i] << 48) | ((uint64_t)E[i + 1] << 32) | ((uint64_t)E[i + 2] << 16) | E[i + 3];
            int id4 = get_id(h4);
            if (id4 != -1) f4.push_back(id4);
        }
    }

    vector<pair<int, int>> f1;
    // Cap unigram counts to avoid single-character spam skewing the score
    for (auto& kv : uni_counts) f1.push_back({kv.first, min(kv.second, 3)});

    auto make_unique = [](vector<int>& v) {
        sort(v.begin(), v.end());
        v.erase(unique(v.begin(), v.end()), v.end());
    };
    make_unique(f2);
    make_unique(f3);
    make_unique(f4);

    int best_l = 0, second_l = 1;
    int32_t best_score = -2e9, second_score = -2e9;

    // Evaluate scores
    for (int l = 0; l < 56; l++) {
        int32_t score = seen_cnt[l] * 2; // Small bias toward more historically seen languages

        for (auto& p : f1) score += weights[p.first][l] * p.second;
        for (int id : f2) score += weights[id][l] * 2;
        for (int id : f3) score += weights[id][l] * 2;
        for (int id : f4) score += weights[id][l] * 2;

        if (score > best_score) {
            second_score = best_score;
            second_l = best_l;
            best_score = score;
            best_l = l;
        } else if (score > second_score) {
            second_score = score;
            second_l = l;
        }
    }

    int guess = best_l;
    int correct = language(guess);

    // Multi-class Perceptron update logic
    auto update = [&](int pos_l, int neg_l, int multiplier) {
        for (auto& p : f1) {
            sat_add(weights[p.first][pos_l], p.second * multiplier);
            sat_add(weights[p.first][neg_l], -p.second * multiplier);
        }
        for (int id : f2) {
            sat_add(weights[id][pos_l], 2 * multiplier);
            sat_add(weights[id][neg_l], -2 * multiplier);
        }
        for (int id : f3) {
            sat_add(weights[id][pos_l], 2 * multiplier);
            sat_add(weights[id][neg_l], -2 * multiplier);
        }
        for (int id : f4) {
            sat_add(weights[id][pos_l], 2 * multiplier);
            sat_add(weights[id][neg_l], -2 * multiplier);
        }
    };

    if (correct != guess) {
        // Punish incorrect guess heavily
        update(correct, guess, 1);
    } else {
        // Margin-sharpening: if it was right but too close for comfort, sharpen boundary
        if (best_score - second_score < 40) {
            update(correct, second_l, 1);
        }
    }
    
    seen_cnt[correct]++;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...