#include <stdlib.h>
#include <stdio.h>
#include <iostream>
#include "grader.h"
#include "lang.h"
#include <map>
#include <set>
#include <vector>
using namespace std;
#define SZ 100
map<int, int> uniq_letter_to_lang;
map<int, int> uniq_digraph_to_lang;
map<long, int> uniq_trigraph_to_lang;
map<long, int> uniq_quatgraph_to_lang;
map<vector<int>, int> uniq_word_to_lang;
int lang_to_letter_to_frequency[56][65536];
int letter_to_text_frequency[65536];
int most_frequent_letter = -1;
double norm_f[56];
int get_digraph(int* E){
return E[0] * 65536 + E[1];
}
long get_trigraph(int* E){
return E[0] * 65536L * 65536L + E[1] * 65536L + E[2];
}
long get_quatgraph(int* E){
return E[0] * 65536L * 65536L * 65536L + E[1] * 65536L * 65536L + E[2] * 65536L + E[3];
}
double norm(map<int, int>& m){
double w = 0;
for(auto ite : m){
w += ite.second * ite.second;
}
return w;
}
double get_dot_product(map<int, int>& a, int b){
double dot_product = 0.;
for(auto &ite : a){
dot_product += ite.second * lang_to_letter_to_frequency[b][ite.first];
}
dot_product /= norm(a) * norm_f[b];
return dot_product;
}
int test_cases = 0;
void excerpt(int *E) {
if(test_cases == 0){
for(int i = 0; i < 56; i++){
norm_f[i] = 0.;
for(int j = 0; j < 65536; j++){
lang_to_letter_to_frequency[i][j] = 0;
}
}
for(int j = 0; j < 65536; j++){
letter_to_text_frequency[j] = 0;
}
}
test_cases++;
int languages_to_unique_letters_shared[56];
int languages_to_unique_digraphs_shared[56];
int languages_to_unique_trigraphs_shared[56];
int languages_to_unique_quatgraphs_shared[56];
int languages_to_unique_words_shared[56];
for(int i = 0; i < 56; i++){
languages_to_unique_letters_shared[i] =
languages_to_unique_digraphs_shared[i] =
languages_to_unique_trigraphs_shared[i] =
languages_to_unique_quatgraphs_shared[i] =
languages_to_unique_words_shared[i] = 0;
}
vector<vector<int>> words;
vector<int> curr_word;
for(int i = 0; i <= SZ; i++){
if(i == SZ or E[i] == most_frequent_letter){
words.push_back(curr_word);
curr_word.clear();
continue;
}
curr_word.push_back(E[i]);
}
for(int i = 0; i < SZ; i++){
auto ite = uniq_letter_to_lang.find(E[i]);
if(ite == uniq_letter_to_lang.end() or ite->second == -1){
continue;
}
languages_to_unique_letters_shared[ite->second]++;
}
for(int i = 0; i < SZ - 1; i++){
auto ite = uniq_digraph_to_lang.find(get_digraph(&E[i]));
if(ite == uniq_digraph_to_lang.end() or ite->second == -1){
continue;
}
languages_to_unique_digraphs_shared[ite->second]++;
}
for(int i = 0; i < SZ - 2; i++){
auto ite = uniq_trigraph_to_lang.find(get_trigraph(&E[i]));
if(ite == uniq_trigraph_to_lang.end() or ite->second == -1){
continue;
}
languages_to_unique_trigraphs_shared[ite->second]++;
}
for(int i = 0; i < SZ - 3; i++){
auto ite = uniq_quatgraph_to_lang.find(get_quatgraph(&E[i]));
if(ite == uniq_quatgraph_to_lang.end() or ite->second == -1){
continue;
}
languages_to_unique_quatgraphs_shared[ite->second]++;
}
map<int, int> letter_to_text_frequency;
for(int i = 0; i < SZ; i++){
letter_to_text_frequency[E[i]]++;
}
for(vector<int>& word : words){
auto ite = uniq_word_to_lang.find(word);
if(ite == uniq_word_to_lang.end() or ite->second == -1){
continue;
}
languages_to_unique_words_shared[ite->second]++;
}
int best_lang = 0;
double best_score = -1.;
for(int i = 0; i < 56; i++){
int uniq_letter_score = languages_to_unique_letters_shared[i];
int uniq_digraph_score = languages_to_unique_digraphs_shared[i];
int uniq_trigraph_score = languages_to_unique_trigraphs_shared[i];
int uniq_quatgraph_score = languages_to_unique_quatgraphs_shared[i];
int uniq_word_score = languages_to_unique_words_shared[i];
double dot_product = get_dot_product(letter_to_text_frequency, i);
double curr_score = (
uniq_letter_score / 100. +
uniq_digraph_score / 99. +
uniq_trigraph_score / 98. +
uniq_quatgraph_score / 97. +
dot_product +
((double) uniq_word_score) / words.size() / 32
);
if(best_score < curr_score){
best_lang = i;
best_score = curr_score;
}
}
int resp = language(best_lang);
for(int i = 0; i < SZ; i++){
auto ite = uniq_letter_to_lang.find(E[i]);
if(ite == uniq_letter_to_lang.end()){
uniq_letter_to_lang[E[i]] = resp;
continue;
} else if(ite->second != resp){
ite->second = -1;
}
}
for(int i = 0; i < SZ - 1; i++){
auto ite = uniq_digraph_to_lang.find(get_digraph(&E[i]));
if(ite == uniq_digraph_to_lang.end()){
uniq_digraph_to_lang[get_digraph(&E[i])] = resp;
continue;
} else if(ite->second != resp){
ite->second = -1;
}
}
for(int i = 0; i < SZ - 2; i++){
auto ite = uniq_trigraph_to_lang.find(get_trigraph(&E[i]));
if(ite == uniq_trigraph_to_lang.end()){
uniq_trigraph_to_lang[get_trigraph(&E[i])] = resp;
continue;
} else if(ite->second != resp){
ite->second = -1;
}
}
for(int i = 0; i < SZ - 3; i++){
auto ite = uniq_quatgraph_to_lang.find(get_quatgraph(&E[i]));
if(ite == uniq_quatgraph_to_lang.end()){
uniq_quatgraph_to_lang[get_quatgraph(&E[i])] = resp;
continue;
} else if(ite->second != resp){
ite->second = -1;
}
}
for(int i = 0; i < SZ; i++){
norm_f[resp] -= lang_to_letter_to_frequency[resp][E[i]] * lang_to_letter_to_frequency[resp][E[i]];
lang_to_letter_to_frequency[resp][E[i]]++;
norm_f[resp] += lang_to_letter_to_frequency[resp][E[i]] * lang_to_letter_to_frequency[resp][E[i]];
}
set<int> distinct_letters;
for(int i = 0; i < SZ; i++){
distinct_letters.insert(E[i]);
}
for(int letter : distinct_letters){
letter_to_text_frequency[letter]++;
if(most_frequent_letter == -1 or letter_to_text_frequency[most_frequent_letter] < letter_to_text_frequency[letter]){
most_frequent_letter = letter;
uniq_word_to_lang.clear();
words.clear();
}
}
for(vector<int>& word : words){
auto ite = uniq_word_to_lang.find(word);
if(ite == uniq_word_to_lang.end()){
uniq_word_to_lang[word] = resp;
continue;
} else if(ite->second != resp){
ite->second = -1;
}
}
}
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |