Submission #802784

#TimeUsernameProblemLanguageResultExecution timeMemory
802784ymmGiraffes (JOI22_giraffes)C++17
100 / 100
6854 ms64508 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x) typedef long long ll; using namespace std; typedef unsigned char u8; const int N = 8192; u8 dp[N][N]; short a[N]; int n; #define MAX(x,y) ((x)>(y)?(x):(y)) short pos[N]; __attribute__((optimize("O3,unroll-loops"),target("avx2"))) void up(short len, int l1, int r1) { l1 = max(l1, 0); r1 = min(r1, n-len); int vec[4], vec2[4]; Loop (i,l1,r1) { int r2 = n-len; short iil = i+len; short ii = i; vec[0] = pos[iil]; vec[1] = pos[iil]-len; vec[2] = pos[ii]; vec[3] = pos[ii]-len; Loop (jj,0,4) { int j = vec[jj]; if (j < 0 || r2 <= j) continue; u8 x1 = dp[i][j] + (a[j+len] == iil); u8 x2 = dp[i][j+1] + (a[j] == iil); u8 x3 = dp[i+1][j] + (a[j+len] == ii); u8 x4 = dp[i+1][j+1] + (a[j] == ii); u8 x12 = MAX(x1, x2); u8 x34 = MAX(x3, x4); u8 x1234 = MAX(x12, x34); vec2[jj] = x1234; } typedef u8 xmm __attribute__((vector_size(16),aligned(16))); xmm *dp00 = (xmm *)(void *)(dp[i+0]+0); xmm *dp10 = (xmm *)(void *)(dp[i+1]+0); int rr2 = (r2+15)/16; xmm dard0 = MAX(dp00[0], dp10[0]); xmm dard1 = MAX(dp00[1], dp10[1]); for (int j = 0; j < rr2; j += 2) { xmm marg; marg = __builtin_shuffle(dard0, dard1, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); dp00[j+0] = MAX(dard0, marg); dard0 = MAX(dp00[j+2], dp10[j+2]); marg = __builtin_shuffle(dard1, dard0, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}); dp00[j+1] = MAX(dard1, marg); dard1 = MAX(dp00[j+3], dp10[j+3]); } Loop (jj,0,4) { int j = vec[jj]; if (j < 0 || r2 <= j) continue; dp[i][j] = vec2[jj]; } } } const int S = 64; void Do(int len) { for (int i = 0; i < n-len; i += S) { for (int k = 0; k < S; k += 1) up(len+1+k, i-k, i+S-k); } } int main() { cin.tie(0) -> sync_with_stdio(false); cin >> n; Loop (i,0,n) { cin >> a[i]; --a[i]; } Loop (i,0,n) pos[a[i]] = i; Loop (i,0,n) Loop (j,0,n) dp[i][j] = a[j] == i; for (int i = 0; i < n-1; i += S) Do(i); cout << n-dp[0][0] << '\n'; }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...