Submission #802784

#TimeUsernameProblemLanguageResultExecution timeMemory
802784ymmGiraffes (JOI22_giraffes)C++17
100 / 100
6854 ms64508 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
using namespace std;

typedef unsigned char u8;
const int N = 8192;
u8 dp[N][N];
short a[N];
int n;

#define MAX(x,y) ((x)>(y)?(x):(y))

short pos[N];
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(short len, int l1, int r1)
{
	l1 = max(l1, 0);
	r1 = min(r1, n-len);
	int vec[4], vec2[4];
	Loop (i,l1,r1) {
		int r2 = n-len;
		short iil = i+len;
		short ii = i;
		vec[0] = pos[iil];
		vec[1] = pos[iil]-len;
		vec[2] = pos[ii];
		vec[3] = pos[ii]-len;
		Loop (jj,0,4) {
			int j = vec[jj];
			if (j < 0 || r2 <= j)
				continue;
			u8 x1 = dp[i][j] + (a[j+len] == iil);
			u8 x2 = dp[i][j+1] + (a[j] == iil);
			u8 x3 = dp[i+1][j] + (a[j+len] == ii);
			u8 x4 = dp[i+1][j+1] + (a[j] == ii);
			u8 x12 = MAX(x1, x2);
			u8 x34 = MAX(x3, x4);
			u8 x1234 = MAX(x12, x34);
			vec2[jj] = x1234;
		}
		typedef u8 xmm __attribute__((vector_size(16),aligned(16)));
		xmm *dp00 = (xmm *)(void *)(dp[i+0]+0);
		xmm *dp10 = (xmm *)(void *)(dp[i+1]+0);
		int rr2 = (r2+15)/16;
		xmm dard0 = MAX(dp00[0], dp10[0]);
		xmm dard1 = MAX(dp00[1], dp10[1]);
		for (int j = 0; j < rr2; j += 2) {
			xmm marg;
			marg = __builtin_shuffle(dard0, dard1, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
			dp00[j+0] = MAX(dard0, marg);
			dard0 = MAX(dp00[j+2], dp10[j+2]);
			marg = __builtin_shuffle(dard1, dard0, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
			dp00[j+1] = MAX(dard1, marg);
			dard1 = MAX(dp00[j+3], dp10[j+3]);
		}
		Loop (jj,0,4) {
			int j = vec[jj];
			if (j < 0 || r2 <= j)
				continue;
			dp[i][j] = vec2[jj];
		}
	}
}

const int S = 64;
void Do(int len)
{
	for (int i = 0; i < n-len; i += S) {
		for (int k = 0; k < S; k += 1)
			up(len+1+k, i-k, i+S-k);
	}
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n;
	Loop (i,0,n) {
		cin >> a[i];
		--a[i];
	}
	Loop (i,0,n)
		pos[a[i]] = i;
	Loop (i,0,n) Loop (j,0,n)
		dp[i][j] = a[j] == i;
	for (int i = 0; i < n-1; i += S)
		Do(i);
	cout << n-dp[0][0] << '\n';
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...