#include "grader.h"
#include "lang.h"
#include <bits/stdc++.h>
//#include <ext/pb_ds/tree_policy.hpp>
//#include <ext/pb_ds/assoc_container.hpp>
using namespace std;
//using namespace __gnu_pbds;
#define FOR(i,start,end) for(int i=start;i<(int)(end);i++)
#define FORE(i,start,end) for(int i=start;i<=(int)end;i++)
#define RFORE(i,start,end) for(int i = start; i>=end; i--)
#define all(a) a.begin(), a.end()
#define v vector
typedef long long ll;
typedef pair<int, int > pii;
//template <class T> using ordered_set = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;
template<class T> void minn(T &a, T b) { a = min(a, b); }
template<class T> void maxx(T &a, T b) { a = max(a, b); }
const int LEN = 100, NLANGS = 56, MXSYMB = 1e6, MAXBUFFER = 152;
template<class Mt_T, class Int_T> struct Rand{
Mt_T mt;
Rand(){
mt = Mt_T(chrono::steady_clock::now().time_since_epoch().count());
}
inline Int_T operator()(Int_T a, Int_T b){
return uniform_int_distribution<Int_T>(a,b)(mt);
}
};
Rand<mt19937_64, ll> rng;
unordered_map<ll,int> symbols;
int getSymbol(ll rep){
auto iter = symbols.find(rep);
if(iter != symbols.end()) return iter->second;
int tmp = symbols.size();
if(tmp >= MXSYMB){
assert(0);
}
return symbols[rep] = tmp;
}
struct LangStats{
int totalCnt;
int pattCnt[MXSYMB];
set<pii> highestOcc; // {cnt, pattern}
LangStats(){
totalCnt = 0;
// memset(pattCnt, 0, sizeof pattCnt);
}
void add(int pattern, int freq){
totalCnt += freq;
highestOcc.erase({pattCnt[pattern], pattern});
pattCnt[pattern] += freq;
highestOcc.insert({pattCnt[pattern], pattern});
while(highestOcc.size() > MAXBUFFER)
highestOcc.erase(highestOcc.begin());
}
double getfreq(int pattern){
if(totalCnt == 0) return 0;
return 1.0*pattCnt[pattern]/totalCnt;
}
} ls[NLANGS];
int myDistr[MXSYMB];
const int ALPHA[3] = {1,1,1};
const int mytotal = (ALPHA[0]*LEN+ALPHA[1]*(LEN-1)+ALPHA[2]*(LEN-2));
void excerpt(int *E) {
//unordered_set<int> occPatts;
v<int> occPatts;
FOR(i,0,LEN) {
int tmp = getSymbol(E[i]);
//occPatts.insert(tmp);
occPatts.push_back(tmp);
myDistr[tmp] += ALPHA[0];
if(i+1<LEN){
tmp = getSymbol(E[i]*65536LL+E[i+1]);
//occPatts.insert(tmp);
occPatts.push_back(tmp);
myDistr[tmp] += ALPHA[1];
}
if(i+2 < LEN){
tmp = getSymbol(E[i]*65536LL*65536LL+E[i+1]*65536LL+E[i+2]);
//occPatts.insert(tmp);
occPatts.push_back(tmp);
myDistr[tmp] += ALPHA[2];
}
}
sort(all(occPatts));
occPatts.resize(unique(all(occPatts))-occPatts.begin());
pair<double, int> best = {1e50, rng(0,NLANGS-1)};
FOR(l,0,NLANGS) if(ls[l].totalCnt > 0){
double error = 0;
for(auto p : occPatts) {
error += abs(1.0*myDistr[p]/mytotal - ls[l].getfreq(p));
}
for(auto pp : ls[l].highestOcc)
if(myDistr[pp.second] == 0)
error+= ls[l].getfreq(pp.second);
minn(best, {error, l});
}
int ans = language(best.second);
for(auto pp : occPatts) {
ls[ans].add(pp, myDistr[pp]);
myDistr[pp] = 0;
}
}
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Correct |
4537 ms |
46804 KB |
Output is correct |
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Partially correct |
4913 ms |
46732 KB |
Output is partially correct - 87.39% |