This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
using namespace std;
typedef unsigned char u8;
const int N = 8192;
u8 dp[N][N];
short a[N];
int n;
#define MAX(x,y) ((x)>(y)?(x):(y))
short pos[N];
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(short len, int l1, int r1)
{
l1 = max(l1, 0);
r1 = min(r1, n-len);
int vec[4], vec2[4];
Loop (i,l1,r1) {
int r2 = n-len;
short iil = i+len;
short ii = i;
vec[0] = pos[iil];
vec[1] = pos[iil]-len;
vec[2] = pos[ii];
vec[3] = pos[ii]-len;
Loop (jj,0,4) {
int j = vec[jj];
if (j < 0 || r2 <= j)
continue;
u8 x1 = dp[i][j] + (a[j+len] == iil);
u8 x2 = dp[i][j+1] + (a[j] == iil);
u8 x3 = dp[i+1][j] + (a[j+len] == ii);
u8 x4 = dp[i+1][j+1] + (a[j] == ii);
u8 x12 = MAX(x1, x2);
u8 x34 = MAX(x3, x4);
u8 x1234 = MAX(x12, x34);
vec2[jj] = x1234;
}
typedef u8 xmm __attribute__((vector_size(16),aligned(16)));
xmm *dp00 = (xmm *)(void *)(dp[i+0]+0);
xmm *dp10 = (xmm *)(void *)(dp[i+1]+0);
int rr2 = (r2+15)/16;
xmm dard0 = MAX(dp00[0], dp10[0]);
xmm dard1 = MAX(dp00[1], dp10[1]);
for (int j = 0; j < rr2; j += 2) {
xmm marg;
marg = __builtin_shuffle(dard0, dard1, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
dp00[j+0] = MAX(dard0, marg);
dard0 = MAX(dp00[j+2], dp10[j+2]);
marg = __builtin_shuffle(dard1, dard0, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
dp00[j+1] = MAX(dard1, marg);
dard1 = MAX(dp00[j+3], dp10[j+3]);
}
Loop (jj,0,4) {
int j = vec[jj];
if (j < 0 || r2 <= j)
continue;
dp[i][j] = vec2[jj];
}
}
}
const int S = 64;
void Do(int len)
{
for (int i = 0; i < n-len; i += S) {
for (int k = 0; k < S; k += 1)
up(len+1+k, i-k, i+S-k);
}
}
int main()
{
cin.tie(0) -> sync_with_stdio(false);
cin >> n;
Loop (i,0,n) {
cin >> a[i];
--a[i];
}
Loop (i,0,n)
pos[a[i]] = i;
Loop (i,0,n) Loop (j,0,n)
dp[i][j] = a[j] == i;
for (int i = 0; i < n-1; i += S)
Do(i);
cout << n-dp[0][0] << '\n';
}
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |