#pragma GCC optimize("O3")
#include "grader.h"
#include <bits/stdc++.h>
using namespace std;
// Carefully tuned memory to sit comfortably at ~215 MiB (Limit: 256 MiB)
static constexpr int MAX_NODES = 1750000;
static constexpr int HASH_CAP = 1 << 22; // 4,194,304
static int head[HASH_CAP];
static int next_node[MAX_NODES];
static uint64_t keys[MAX_NODES];
static int16_t weights[MAX_NODES][56];
static int node_cnt = 0;
static int seen_cnt[56];
// High-quality avalanche for the flat hash map
static inline uint64_t splitmix64(uint64_t x) {
x += 0x9e3779b97f4a7c15ULL;
x = (x ^ (x >> 30)) * 0xbf58476d1ce4e5b9ULL;
x = (x ^ (x >> 27)) * 0x94d049bb133111ebULL;
return x ^ (x >> 31);
}
// Retrieves the unique ID for an n-gram
static int get_id(uint64_t key) {
uint32_t h = splitmix64(key) & (HASH_CAP - 1);
for (int i = head[h]; i != -1; i = next_node[i]) {
if (keys[i] == key) return i;
}
// Graceful degradation if we approach memory limits
if (node_cnt >= MAX_NODES) return -1;
int id = node_cnt++;
keys[id] = key;
next_node[id] = head[h];
head[h] = id;
return id;
}
// Safely update perceptron weights
static inline void sat_add(int16_t& w, int delta) {
int v = w + delta;
if (v > 30000) v = 30000;
else if (v < -30000) v = -30000;
w = v;
}
void excerpt(int E[100]) {
static bool inited = false;
if (!inited) {
memset(head, -1, sizeof(head));
inited = true;
}
// Count 1-gram frequencies up to a clip limit
map<int, int> uni_counts;
vector<int> f2, f3, f4;
f2.reserve(100); f3.reserve(100); f4.reserve(100);
// Because E[i] is guaranteed >= 1, upper bits of larger n-grams
// will never be zero, making 1, 2, 3, and 4-grams inherently distinct!
for (int i = 0; i < 100; i++) {
uint64_t h1 = E[i];
int id1 = get_id(h1);
if (id1 != -1) uni_counts[id1]++;
if (i < 99) {
uint64_t h2 = ((uint64_t)E[i] << 16) | E[i + 1];
int id2 = get_id(h2);
if (id2 != -1) f2.push_back(id2);
}
if (i < 98) {
uint64_t h3 = ((uint64_t)E[i] << 32) | ((uint64_t)E[i + 1] << 16) | E[i + 2];
int id3 = get_id(h3);
if (id3 != -1) f3.push_back(id3);
}
if (i < 97) {
uint64_t h4 = ((uint64_t)E[i] << 48) | ((uint64_t)E[i + 1] << 32) | ((uint64_t)E[i + 2] << 16) | E[i + 3];
int id4 = get_id(h4);
if (id4 != -1) f4.push_back(id4);
}
}
vector<pair<int, int>> f1;
// Cap unigram counts to avoid single-character spam skewing the score
for (auto& kv : uni_counts) f1.push_back({kv.first, min(kv.second, 3)});
auto make_unique = [](vector<int>& v) {
sort(v.begin(), v.end());
v.erase(unique(v.begin(), v.end()), v.end());
};
make_unique(f2);
make_unique(f3);
make_unique(f4);
int best_l = 0, second_l = 1;
int32_t best_score = -2e9, second_score = -2e9;
// Evaluate scores
for (int l = 0; l < 56; l++) {
int32_t score = seen_cnt[l] * 2; // Small bias toward more historically seen languages
for (auto& p : f1) score += weights[p.first][l] * p.second;
for (int id : f2) score += weights[id][l] * 2;
for (int id : f3) score += weights[id][l] * 2;
for (int id : f4) score += weights[id][l] * 2;
if (score > best_score) {
second_score = best_score;
second_l = best_l;
best_score = score;
best_l = l;
} else if (score > second_score) {
second_score = score;
second_l = l;
}
}
int guess = best_l;
int correct = language(guess);
// Multi-class Perceptron update logic
auto update = [&](int pos_l, int neg_l, int multiplier) {
for (auto& p : f1) {
sat_add(weights[p.first][pos_l], p.second * multiplier);
sat_add(weights[p.first][neg_l], -p.second * multiplier);
}
for (int id : f2) {
sat_add(weights[id][pos_l], 2 * multiplier);
sat_add(weights[id][neg_l], -2 * multiplier);
}
for (int id : f3) {
sat_add(weights[id][pos_l], 2 * multiplier);
sat_add(weights[id][neg_l], -2 * multiplier);
}
for (int id : f4) {
sat_add(weights[id][pos_l], 2 * multiplier);
sat_add(weights[id][neg_l], -2 * multiplier);
}
};
if (correct != guess) {
// Punish incorrect guess heavily
update(correct, guess, 1);
} else {
// Margin-sharpening: if it was right but too close for comfort, sharpen boundary
if (best_score - second_score < 40) {
update(correct, second_l, 1);
}
}
seen_cnt[correct]++;
}