답안 #802784

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
802784 2023-08-02T14:26:01 Z ymm Giraffes (JOI22_giraffes) C++17
100 / 100
6854 ms 64508 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
using namespace std;

typedef unsigned char u8;
const int N = 8192;
u8 dp[N][N];
short a[N];
int n;

#define MAX(x,y) ((x)>(y)?(x):(y))

short pos[N];
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(short len, int l1, int r1)
{
	l1 = max(l1, 0);
	r1 = min(r1, n-len);
	int vec[4], vec2[4];
	Loop (i,l1,r1) {
		int r2 = n-len;
		short iil = i+len;
		short ii = i;
		vec[0] = pos[iil];
		vec[1] = pos[iil]-len;
		vec[2] = pos[ii];
		vec[3] = pos[ii]-len;
		Loop (jj,0,4) {
			int j = vec[jj];
			if (j < 0 || r2 <= j)
				continue;
			u8 x1 = dp[i][j] + (a[j+len] == iil);
			u8 x2 = dp[i][j+1] + (a[j] == iil);
			u8 x3 = dp[i+1][j] + (a[j+len] == ii);
			u8 x4 = dp[i+1][j+1] + (a[j] == ii);
			u8 x12 = MAX(x1, x2);
			u8 x34 = MAX(x3, x4);
			u8 x1234 = MAX(x12, x34);
			vec2[jj] = x1234;
		}
		typedef u8 xmm __attribute__((vector_size(16),aligned(16)));
		xmm *dp00 = (xmm *)(void *)(dp[i+0]+0);
		xmm *dp10 = (xmm *)(void *)(dp[i+1]+0);
		int rr2 = (r2+15)/16;
		xmm dard0 = MAX(dp00[0], dp10[0]);
		xmm dard1 = MAX(dp00[1], dp10[1]);
		for (int j = 0; j < rr2; j += 2) {
			xmm marg;
			marg = __builtin_shuffle(dard0, dard1, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
			dp00[j+0] = MAX(dard0, marg);
			dard0 = MAX(dp00[j+2], dp10[j+2]);
			marg = __builtin_shuffle(dard1, dard0, xmm{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16});
			dp00[j+1] = MAX(dard1, marg);
			dard1 = MAX(dp00[j+3], dp10[j+3]);
		}
		Loop (jj,0,4) {
			int j = vec[jj];
			if (j < 0 || r2 <= j)
				continue;
			dp[i][j] = vec2[jj];
		}
	}
}

const int S = 64;
void Do(int len)
{
	for (int i = 0; i < n-len; i += S) {
		for (int k = 0; k < S; k += 1)
			up(len+1+k, i-k, i+S-k);
	}
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n;
	Loop (i,0,n) {
		cin >> a[i];
		--a[i];
	}
	Loop (i,0,n)
		pos[a[i]] = i;
	Loop (i,0,n) Loop (j,0,n)
		dp[i][j] = a[j] == i;
	for (int i = 0; i < n-1; i += S)
		Do(i);
	cout << n-dp[0][0] << '\n';
}
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 328 KB Output is correct
2 Correct 1 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 328 KB Output is correct
6 Correct 1 ms 324 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 336 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 328 KB Output is correct
2 Correct 1 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 328 KB Output is correct
6 Correct 1 ms 324 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 336 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 332 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
14 Correct 1 ms 340 KB Output is correct
15 Correct 1 ms 328 KB Output is correct
16 Correct 1 ms 328 KB Output is correct
17 Correct 1 ms 340 KB Output is correct
18 Correct 1 ms 340 KB Output is correct
19 Correct 1 ms 340 KB Output is correct
20 Correct 1 ms 340 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 328 KB Output is correct
2 Correct 1 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 328 KB Output is correct
6 Correct 1 ms 324 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 336 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 332 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
14 Correct 1 ms 340 KB Output is correct
15 Correct 1 ms 328 KB Output is correct
16 Correct 1 ms 328 KB Output is correct
17 Correct 1 ms 340 KB Output is correct
18 Correct 1 ms 340 KB Output is correct
19 Correct 1 ms 340 KB Output is correct
20 Correct 1 ms 340 KB Output is correct
21 Correct 1 ms 328 KB Output is correct
22 Correct 1 ms 596 KB Output is correct
23 Correct 1 ms 968 KB Output is correct
24 Correct 2 ms 1308 KB Output is correct
25 Correct 2 ms 1492 KB Output is correct
26 Correct 2 ms 1492 KB Output is correct
27 Correct 2 ms 1492 KB Output is correct
28 Correct 3 ms 1492 KB Output is correct
29 Correct 3 ms 1492 KB Output is correct
30 Correct 2 ms 1492 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 328 KB Output is correct
2 Correct 1 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 328 KB Output is correct
6 Correct 1 ms 324 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 336 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 332 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
14 Correct 1 ms 340 KB Output is correct
15 Correct 1 ms 328 KB Output is correct
16 Correct 1 ms 328 KB Output is correct
17 Correct 1 ms 340 KB Output is correct
18 Correct 1 ms 340 KB Output is correct
19 Correct 1 ms 340 KB Output is correct
20 Correct 1 ms 340 KB Output is correct
21 Correct 1 ms 328 KB Output is correct
22 Correct 1 ms 596 KB Output is correct
23 Correct 1 ms 968 KB Output is correct
24 Correct 2 ms 1308 KB Output is correct
25 Correct 2 ms 1492 KB Output is correct
26 Correct 2 ms 1492 KB Output is correct
27 Correct 2 ms 1492 KB Output is correct
28 Correct 3 ms 1492 KB Output is correct
29 Correct 3 ms 1492 KB Output is correct
30 Correct 2 ms 1492 KB Output is correct
31 Correct 199 ms 9556 KB Output is correct
32 Correct 2706 ms 47112 KB Output is correct
33 Correct 6589 ms 64448 KB Output is correct
34 Correct 6421 ms 64332 KB Output is correct
35 Correct 6513 ms 64460 KB Output is correct
36 Correct 6502 ms 64476 KB Output is correct
37 Correct 6431 ms 64476 KB Output is correct
38 Correct 6854 ms 64488 KB Output is correct
39 Correct 6760 ms 64492 KB Output is correct
40 Correct 6608 ms 64508 KB Output is correct