Submission #1259214

#TimeUsernameProblemLanguageResultExecution timeMemory
1259214SortingTriple Peaks (IOI25_triples)C++20
70 / 100
1732 ms15292 KiB
#include <iostream> #include <algorithm> #include <vector> #include <map> #include <set> #include <array> #include <iomanip> #include <queue> #include <stack> #include <numeric> #include <cassert> #include <cmath> #include <random> #include <ctime> #include <chrono> #include <x86intrin.h> #pragma GCC optimize("O3") #pragma GCC target("avx2") using namespace std; typedef long long ll; typedef vector<int> vi; typedef vector<ll> vll; typedef pair<int, int> pii; typedef pair<ll, ll> pll; #define all(x) (x).begin(), (x).end() #define sz(x) ((int)x.size()) #define rep(i, a, b) for(int i = a; i < b; ++i) template<typename C> struct rge{C l, r;}; template<typename C> rge<C> range(C i, C j) { return rge<C>{i, j}; } template<typename C> ostream& operator<<(ostream &os, rge<C> r) { os << '{'; for(auto it = r.l; it != r.r; it++) os << "," + (it == r.l) << *it; os << '}'; return os; } template<typename A, typename B> ostream& operator<<(ostream &os, const pair<A, B> &p) { return os << '{' << p.first << "," << p.second << '}'; } template<typename T_container, typename T = typename enable_if<!is_same<T_container, string>::value, typename T_container::value_type>::type> ostream& operator<<(ostream &os, const T_container &v) { os << '{'; string sep; for (const T &x : v) os << sep << x, sep = ","; return os << '}'; } void dbg_out() { cerr << ']' << endl; } template<typename A> void dbg_out(A H) { cerr << H; dbg_out(); } template<typename A, typename B, typename... C> void dbg_out(A H, B G, C... T) { cerr << H << ","; dbg_out(G, T...); } #ifdef DEBUG #define debug(...) cerr << "[" << #__VA_ARGS__ << "] = [", dbg_out(__VA_ARGS__) #else #define debug(...) #endif template <typename T> void remove_duplicates(vector<T> &v){ sort(all(v)); v.resize(unique(all(v)) - v.begin()); } template <typename T> void chmin(T &a, T b){ a = (b < a) ? b : a; } template <typename T> void chmax(T &a, T b){ a = (b > a) ? b : a; } const int N = 2e5 + 3; ll n; alignas(64) int h[N]; ll count_easy() { set<array<int, 3>> triples; auto test3 = [&](int a, int b, int c) { if (c < 0 || c >= n) { return; } int arr[3]{a, b, c}; sort(arr, arr + 3); a = arr[0]; b = arr[1]; c = arr[2]; int diffs[3]{b - a, c - b, c - a}; int vals[3]{h[a], h[b], h[c]}; sort(diffs, diffs + 3); sort(vals, vals + 3); for (int i = 0; i < 3; ++i) { if (diffs[i] != vals[i]) { return; } } triples.insert({a, b, c}); }; auto test2 = [&](int a, int b) { if (b < 0 || b >= n) { return; } test3(a, b, a + h[b]); test3(a, b, a - h[b]); test3(a, b, b + h[b]); test3(a, b, b - h[b]); }; for (int i = 0; i < n; ++i) { test2(i, i - h[i]); test2(i, i + h[i]); } return triples.size(); } static inline __m256i seq8_from(int i) { const __m256i base = _mm256_set1_epi32(i); // [i,i,i,i,i,i,i,i] const __m256i inc = _mm256_setr_epi32(0,1,2,3,4,5,6,7); // [0..7] in ascending lane order return _mm256_add_epi32(base, inc); // [i..i+7] } int hsum(__m256i x) { __m128i l = _mm256_extracti128_si256(x, 0); __m128i h = _mm256_extracti128_si256(x, 1); l = _mm_add_epi32(l, h); l = _mm_hadd_epi32(l, l); return _mm_extract_epi32(l, 0) + _mm_extract_epi32(l, 1); } ll count_triples(vector<int> H) { n = H.size(); for (int i = 0; i < n; ++i) { h[i] = H[i]; } ll ans = count_easy(); for (int i = 0; i < n; ++i) { int j = i + h[i]; int k = j + h[i]; if (k >= n) { continue; } ans -= h[k] == h[i] && h[j] == 2 * h[i]; } ll mx = *max_element(h, h + n); for (int i = 0; i < n; ++i) { int j = i + 1; int k = j + h[i]; __m256i kdiff = seq8_from(k - i); __m256i jdiff = seq8_from(j - i); __m256i vans = _mm256_set1_epi32(0); const __m256i ones = _mm256_set1_epi32(1); const __m256i eights = _mm256_set1_epi32(8); ll C = min(n, (ll)i + mx + 1); for (; k + 8 < C; k += 8, j += 8) { __m256i hj = _mm256_loadu_si256((const __m256i*)&h[j]); __m256i hk = _mm256_loadu_si256((const __m256i*)&h[k]); // vector compares -> 0xFFFFFFFF per lane on match, else 0x0 __m256i mj = _mm256_cmpeq_epi32(hj, kdiff); __m256i mk = _mm256_cmpeq_epi32(hk, jdiff); // lanes where both match __m256i m = _mm256_and_si256(mj, mk); vans = _mm256_add_epi32(vans, m); kdiff = _mm256_add_epi32(kdiff, eights); jdiff = _mm256_add_epi32(jdiff, eights); } ans -= hsum(vans); for (; k < C; ++j, ++k) { ans += (h[j] == (k - i)) && (h[k] == (j - i)); } } return ans; } std::vector<int> construct_range(int M, int K) { return {};} #ifdef LOCAL_TEST int main() { vector<int> H = {4,1,4,3,2,6,1}; cout << count_triples(H) << "\n"; // 3 } #endif
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...