제출 #1330784

#제출 시각아이디문제언어결과실행 시간메모리
1330784TheMiraiTraveller0711Languages (IOI10_languages)C++20
91 / 100
2153 ms15796 KiB
/*
 * IOI 2010 - Language
 * N-gram approach (unigrams through 4-grams) with log-frequency scoring.
 * No hashing — all flat arrays.
 *
 * Alphabet reduction: symbols are folded to 4 bits (0–15) via E[i] & 0xF
 * so that 4-gram tables fit in memory.
 *
 * Memory:
 * - freq1[56][16]              =      3.6 KB
 * - freq2[56][16][16]          =     57.3 KB
 * - freq3[56][16][16][16]      =    917 KB
 * - freq4[56][16][16][16][16]  =    ~14 MB
 * Total: ~15 MB
 */

#include "grader.h"
#include <bits/stdc++.h>
using namespace std;

static const int NLANGS = 56;
static const int ALPHA  = 16;   // 4-bit folded alphabet

static int freq1[NLANGS][ALPHA];
static int freq2[NLANGS][ALPHA][ALPHA];
static int freq3[NLANGS][ALPHA][ALPHA][ALPHA];
static int freq4[NLANGS][ALPHA][ALPHA][ALPHA][ALPHA];

static int lang_count[NLANGS];

static const double W1 =  1.0;
static const double W2 =  3.0;
static const double W3 =  6.0;
static const double W4 = 10.0;

void excerpt(int *E) {

    // Fold each symbol to 4 bits
    int B[100];
    for (int i = 0; i < 100; i++) B[i] = E[i] & 0xF;

    // ── Score each language ─────────────────────────────────────────────────

    double best_score = -1e30;
    int    best_lang  = 0;

    for (int l = 0; l < NLANGS; l++) {

        if (lang_count[l] == 0) {
            double s = -1e10 + l * 1e-6;
            if (s > best_score) { best_score = s; best_lang = l; }
            continue;
        }

        double score = 0.0;

        // Unigrams
        bool seen1[ALPHA] = {};
        for (int i = 0; i < 100; i++) {
            int a = B[i];
            if (!seen1[a]) {
                seen1[a] = true;
                int f = freq1[l][a];
                if (f) score += W1 * log(1.0 + f);
            }
        }

        // Bigrams
        bool seen2[ALPHA][ALPHA] = {};
        for (int i = 0; i < 99; i++) {
            int a = B[i], b = B[i+1];
            if (!seen2[a][b]) {
                seen2[a][b] = true;
                int f = freq2[l][a][b];
                if (f) score += W2 * log(1.0 + f);
            }
        }

        // Trigrams
        bool seen3[ALPHA][ALPHA][ALPHA] = {};
        for (int i = 0; i < 98; i++) {
            int a = B[i], b = B[i+1], c = B[i+2];
            if (!seen3[a][b][c]) {
                seen3[a][b][c] = true;
                int f = freq3[l][a][b][c];
                if (f) score += W3 * log(1.0 + f);
            }
        }

        // 4-grams
        bool seen4[ALPHA][ALPHA][ALPHA][ALPHA] = {};
        for (int i = 0; i < 97; i++) {
            int a = B[i], b = B[i+1], c = B[i+2], d = B[i+3];
            if (!seen4[a][b][c][d]) {
                seen4[a][b][c][d] = true;
                int f = freq4[l][a][b][c][d];
                if (f) score += W4 * log(1.0 + f);
            }
        }

        if (score > best_score) { best_score = score; best_lang = l; }
    }

    // ── Guess and receive correct label ─────────────────────────────────────

    int correct = language(best_lang);

    // ── Update model ─────────────────────────────────────────────────────────

    for (int i = 0; i < 100; i++)
        freq1[correct][B[i]]++;

    for (int i = 0; i < 99; i++)
        freq2[correct][B[i]][B[i+1]]++;

    for (int i = 0; i < 98; i++)
        freq3[correct][B[i]][B[i+1]][B[i+2]]++;

    for (int i = 0; i < 97; i++)
        freq4[correct][B[i]][B[i+1]][B[i+2]][B[i+3]]++;

    lang_count[correct]++;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...