Submission #644726

# Submission time Handle Problem Language Result Execution time Memory
644726 2022-09-25T06:47:35 Z ymm Fortune Telling 2 (JOI14_fortune_telling2) C++17
35 / 100
2766 ms 4204 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 2000*100;
const int S0 = 2000*16;
const int S1 = 2000;
pii ab[N];
unsigned short sa[N], sb[N];
int q[N+32];
unsigned short sq[N];
int n;

/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r)
{
	l *= S1; r *= S1;
	Loop (i,l,r) {
		unsigned short v = sa[i], u = sb[i];
		v ^= v <= x? u: 0;
		v ^= v <= y? u: 0;
		v ^= v <= z? u: 0;
		sa[i] = v;
	}
}
*/
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r);

asm("\n"
"	.p2align 4\n"
"	.globl	_Z2uptttii\n"
"	.type	_Z2uptttii, @function\n"
"_Z2uptttii:\n"
".myLFB9901:\n"
"	.cfi_startproc\n"
"	movl	%edi, %eax\n"
"	imull	$2000, %ecx, %r10d\n"
"	movl	%esi, %edi\n"
"	movl	%edx, %r9d\n"
"	imull	$2000, %r8d, %esi\n"
"	cmpl	%r8d, %ecx\n"
"	jge	.myL103\n"
"	movslq	%esi, %rsi\n"
"	movslq	%r10d, %r10\n"
"	vmovd	%edi, %xmm5\n"
"	movslq	%ecx, %rdx\n"
"	vmovd	%eax, %xmm6\n"
"	subq	%r10, %rsi\n"
"	leaq	sa(%rip), %rcx\n"
"	xorl	%eax, %eax\n"
"	vmovd	%r9d, %xmm4\n"
"	vpbroadcastw	%xmm6, %ymm6\n"
"	vpxor	%xmm3, %xmm3, %xmm3\n"
"	addq	%rsi, %rsi\n"
"	imulq	$4000, %rdx, %rdx\n"
"	leaq	-32(%rsi), %rdi\n"
"	vpbroadcastw	%xmm5, %ymm5\n"
"	shrq	$5, %rdi\n"
"	leaq	sb(%rip), %r8\n"
"	vpbroadcastw	%xmm4, %ymm4\n"
"	addq	$1, %rdi\n"
"	addq	%rdx, %rcx\n"
"	addq	%r8, %rdx\n"
"	andl	$3, %edi\n"
"	je	.myL85\n"
"	cmpq	$1, %rdi\n"
"	je	.myL97\n"
"	cmpq	$2, %rdi\n"
"	je	.myL98\n"
"	vmovdqa	(%rcx), %ymm0\n"
"	vmovdqa	(%rdx), %ymm1\n"
"	movl	$32, %eax\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx)\n"
".myL98:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	addq	$32, %rax\n"
".myL97:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	addq	$32, %rax\n"
"	cmpq	%rsi, %rax\n"
"	je	.myL104\n"
".myL85:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	leaq	32(%rax), %rdi\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	vmovdqa	32(%rcx,%rax), %ymm0\n"
"	vmovdqa	32(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 32(%rcx,%rax)\n"
"	vmovdqa	64(%rcx,%rax), %ymm0\n"
"	vmovdqa	64(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 64(%rcx,%rax)\n"
"	vmovdqa	64(%rcx,%rdi), %ymm0\n"
"	vmovdqa	64(%rdx,%rdi), %ymm1\n"
"	leaq	96(%rdi), %rax\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 64(%rcx,%rdi)\n"
"	cmpq	%rsi, %rax\n"
"	jne	.myL85\n"
".myL104:\n"
"	vzeroupper\n"
".myL103:\n"
"	ret\n"
"	.cfi_endproc\n"
".myLFE9901:\n"
"	.size	_Z2uptttii, .-_Z2uptttii\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	int k;
	cin >> n >> k;
	Loop (i,0,n)
		cin >> ab[i].first >> ab[i].second;
	Loop (i,0,k)
		cin >> q[i];
	mt19937_64 rd(time(0));
	shuffle(ab, ab+N, rd);
	ll ans = 0;
	for (int l0 = 0; l0 < N; l0 += S0) {
		int r0 = min(N, l0+S0);
		vector<int> vec = {0};
		Loop (i,l0,r0) {
			vec.push_back(ab[i].first);
			vec.push_back(ab[i].second);
		}
		sort(vec.begin(), vec.end());
		vec.resize(unique(vec.begin(), vec.end()) - vec.begin());
		Loop (i,l0,r0) {
			sa[i] = lower_bound(vec.begin(), vec.end(), ab[i].first) - vec.begin();
			sb[i] = lower_bound(vec.begin(), vec.end(), ab[i].second) - vec.begin();
			sb[i] ^= sa[i];
		}
		Loop (i,0,k)
			sq[i] = upper_bound(vec.begin(), vec.end(), q[i]) - vec.begin() - 1;
		for (int l1 = l0; l1 < r0; l1 += S1) {
			int r1 = min(r0, l1+S1);
			for (int i = 0; i < k; i += 3)
				up(sq[i+0], sq[i+1], sq[i+2], l1/S1, r1/S1);
		}
		Loop (i,l0,r0)
			ans += vec[sa[i]];
	}
	cout << ans << '\n';
}
# Verdict Execution time Memory Grader output
1 Correct 27 ms 3048 KB Output is correct
2 Correct 30 ms 3052 KB Output is correct
3 Correct 29 ms 3072 KB Output is correct
4 Correct 28 ms 3052 KB Output is correct
5 Correct 27 ms 3084 KB Output is correct
6 Correct 29 ms 3064 KB Output is correct
7 Correct 30 ms 3048 KB Output is correct
8 Correct 27 ms 3072 KB Output is correct
9 Correct 28 ms 3156 KB Output is correct
10 Correct 28 ms 3072 KB Output is correct
11 Correct 30 ms 3052 KB Output is correct
12 Correct 27 ms 3052 KB Output is correct
13 Correct 28 ms 3164 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 27 ms 3048 KB Output is correct
2 Correct 30 ms 3052 KB Output is correct
3 Correct 29 ms 3072 KB Output is correct
4 Correct 28 ms 3052 KB Output is correct
5 Correct 27 ms 3084 KB Output is correct
6 Correct 29 ms 3064 KB Output is correct
7 Correct 30 ms 3048 KB Output is correct
8 Correct 27 ms 3072 KB Output is correct
9 Correct 28 ms 3156 KB Output is correct
10 Correct 28 ms 3072 KB Output is correct
11 Correct 30 ms 3052 KB Output is correct
12 Correct 27 ms 3052 KB Output is correct
13 Correct 28 ms 3164 KB Output is correct
14 Correct 163 ms 3212 KB Output is correct
15 Correct 293 ms 3144 KB Output is correct
16 Correct 453 ms 3328 KB Output is correct
17 Correct 583 ms 3396 KB Output is correct
18 Correct 606 ms 3400 KB Output is correct
19 Correct 581 ms 3380 KB Output is correct
20 Correct 623 ms 3284 KB Output is correct
21 Correct 574 ms 3408 KB Output is correct
22 Correct 590 ms 3384 KB Output is correct
23 Correct 592 ms 3280 KB Output is correct
24 Correct 584 ms 3392 KB Output is correct
25 Correct 619 ms 3284 KB Output is correct
26 Correct 569 ms 3296 KB Output is correct
27 Correct 599 ms 3388 KB Output is correct
28 Correct 583 ms 3412 KB Output is correct
29 Correct 587 ms 3292 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 27 ms 3048 KB Output is correct
2 Correct 30 ms 3052 KB Output is correct
3 Correct 29 ms 3072 KB Output is correct
4 Correct 28 ms 3052 KB Output is correct
5 Correct 27 ms 3084 KB Output is correct
6 Correct 29 ms 3064 KB Output is correct
7 Correct 30 ms 3048 KB Output is correct
8 Correct 27 ms 3072 KB Output is correct
9 Correct 28 ms 3156 KB Output is correct
10 Correct 28 ms 3072 KB Output is correct
11 Correct 30 ms 3052 KB Output is correct
12 Correct 27 ms 3052 KB Output is correct
13 Correct 28 ms 3164 KB Output is correct
14 Correct 163 ms 3212 KB Output is correct
15 Correct 293 ms 3144 KB Output is correct
16 Correct 453 ms 3328 KB Output is correct
17 Correct 583 ms 3396 KB Output is correct
18 Correct 606 ms 3400 KB Output is correct
19 Correct 581 ms 3380 KB Output is correct
20 Correct 623 ms 3284 KB Output is correct
21 Correct 574 ms 3408 KB Output is correct
22 Correct 590 ms 3384 KB Output is correct
23 Correct 592 ms 3280 KB Output is correct
24 Correct 584 ms 3392 KB Output is correct
25 Correct 619 ms 3284 KB Output is correct
26 Correct 569 ms 3296 KB Output is correct
27 Correct 599 ms 3388 KB Output is correct
28 Correct 583 ms 3412 KB Output is correct
29 Correct 587 ms 3292 KB Output is correct
30 Incorrect 2766 ms 4204 KB Output isn't correct
31 Halted 0 ms 0 KB -