#include "grader.h"
#include <bits/stdc++.h>
using namespace std;
static const int LANGS = 56;
static const int SZ = 100;
// Power-of-two table -> use & mask instead of % (fast)
static const int MOD = 1 << 20; // 1,048,576
static const uint32_t MASK = MOD - 1;
// 56 * 1,048,576 bytes ~= 56 MB
static uint8_t freq[LANGS][MOD];
static uint16_t cnt[LANGS];
static float lut[256];
static bool inited = false;
static inline void init_lut() {
if (inited) return;
for (int i = 0; i < 256; i++) {
float x = (float)i;
lut[i] = x / (x + 1.1f); // same "hyperb" shape used in popular solutions
}
inited = true;
}
static inline void inc_sat(uint8_t &v) { if (v != 255) v++; }
void excerpt(int *E) {
init_lut();
// Precompute hashed 1/2/3/4-grams for positions 0..96
uint32_t si[SZ - 3], bi[SZ - 3], tr[SZ - 3], qu[SZ - 3];
for (int i = 0; i < SZ - 3; i++) {
uint32_t s = (uint32_t)E[i] & MASK;
uint32_t b = (((uint32_t)E[i] << 16) + (uint32_t)E[i + 1]) & MASK;
uint32_t t = ((b << 16) + (uint32_t)E[i + 2]) & MASK;
uint32_t q = ((t << 16) + (uint32_t)E[i + 3]) & MASK;
si[i] = s; bi[i] = b; tr[i] = t; qu[i] = q;
}
int best = 0;
float best_sim = -1e30f;
for (int l = 0; l < LANGS; l++) {
if (!cnt[l]) continue; // unseen langs won't score well anyway
const uint8_t *F = freq[l];
float sim = 0.0f;
// Weights: tuned-ish (taken from a known good template)
for (int i = 0; i < SZ - 3; i++) {
sim += 9.0f * lut[F[qu[i]]];
sim += 1.0f * lut[F[tr[i]]];
sim += 6.0f * lut[F[bi[i]]];
sim += 7.0f * lut[F[si[i]]];
}
// Mild normalization (also common)
sim /= logf((float)cnt[l] + 2.0f);
if (sim > best_sim) best_sim = sim, best = l;
}
int ans = language(best);
cnt[ans]++;
uint8_t *F = freq[ans];
for (int i = 0; i < SZ - 3; i++) {
inc_sat(F[qu[i]]);
inc_sat(F[tr[i]]);
inc_sat(F[bi[i]]);
inc_sat(F[si[i]]);
}
}