답안 #644642

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
644642 2022-09-25T05:09:38 Z ymm 운세 보기 2 (JOI14_fortune_telling2) C++17
35 / 100
3000 ms 2636 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 200'032;
const int S = 1024;
int a[N], b[N];
int q[N];
int n;

/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(int x, int y, int z, int w, int l, int r)
{
	Loop (i,l,r) {
		int v = a[i], u = b[i];
		v ^= v <= x? u: 0;
		v ^= v <= y? u: 0;
		v ^= v <= z? u: 0;
		v ^= v <= w? u: 0;
		a[i] = v;
	}
}
*/
void up(int,int,int,int,int,int);

asm("\n"
"	.text\n"
"	.p2align 4\n"
"	.globl	_Z2upiiiiii\n"
"	.type	_Z2upiiiiii, @function\n"
"_Z2upiiiiii:\n"
".myLFB9703:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movslq	%r8d, %r8\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r15\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 15, -24\n"
"	.cfi_offset 14, -32\n"
"	.cfi_offset 13, -40\n"
"	.cfi_offset 12, -48\n"
"	.cfi_offset 3, -56\n"
"	movslq	%r9d, %rbx\n"
"	andq	$-32, %rsp\n"
"	movl	%esi, -4(%rsp)\n"
"	cmpq	%rbx, %r8\n"
"	jge	.myL42\n"
"	movl	%ecx, %r11d\n"
"	movq	%rbx, %rcx\n"
"	movl	%edx, %r10d\n"
"	movq	%r8, %r15\n"
"	subq	%r8, %rcx\n"
"	leaq	-1(%rcx), %rax\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL23\n"
"	movq	%rcx, %r13\n"
"	leaq	a(%rip), %r9\n"
"	vmovd	%edi, %xmm7\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r13\n"
"	leaq	0(,%r8,4), %rsi\n"
"	leaq	b(%rip), %r14\n"
"	vpbroadcastd	-4(%rsp), %ymm6\n"
"	vmovd	%r10d, %xmm5\n"
"	salq	$5, %r13\n"
"	vmovd	%r11d, %xmm4\n"
"	leaq	(%r9,%rsi), %rdx\n"
"	leaq	-32(%r13), %r12\n"
"	addq	%r14, %rsi\n"
"	vpbroadcastd	%xmm7, %ymm7\n"
"	shrq	$5, %r12\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	addq	$1, %r12\n"
"	andl	$3, %r12d\n"
"	je	.myL4\n"
"	cmpq	$1, %r12\n"
"	je	.myL34\n"
"	cmpq	$2, %r12\n"
"	je	.myL35\n"
"	vmovdqu	(%rdx), %ymm2\n"
"	vmovdqu	(%rsi), %ymm0\n"
"	movl	$32, %eax\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx)\n"
".myL35:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
".myL34:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
"	cmpq	%r13, %rax\n"
"	je	.myL40\n"
".myL4:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	leaq	32(%rax), %r12\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	32(%rdx,%rax), %ymm2\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vmovdqu	32(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	64(%rdx,%rax), %ymm2\n"
"	vmovdqu	%ymm0, 32(%rdx,%rax)\n"
"	vmovdqu	64(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%rax)\n"
"	vmovdqu	64(%rdx,%r12), %ymm2\n"
"	leaq	96(%r12), %rax\n"
"	vmovdqu	64(%rsi,%r12), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%r12)\n"
"	cmpq	%r13, %rax\n"
"	jne	.myL4\n"
".myL40:\n"
"	movq	%rcx, %rax\n"
"	andq	$-8, %rax\n"
"	addq	%rax, %r8\n"
"	cmpq	%rax, %rcx\n"
"	je	.myL45\n"
"	vzeroupper\n"
".myL3:\n"
"	subq	%rax, %rcx\n"
"	leaq	-1(%rcx), %rdx\n"
"	cmpq	$2, %rdx\n"
"	jbe	.myL8\n"
"	vmovd	%edi, %xmm4\n"
"	vmovd	%r10d, %xmm5\n"
"	vmovd	%r11d, %xmm6\n"
"	addq	%r15, %rax\n"
"	leaq	(%r9,%rax,4), %rdx\n"
"	vpshufd	$0, %xmm4, %xmm2\n"
"	vmovdqu	(%r14,%rax,4), %xmm0\n"
"	movq	%rcx, %rax\n"
"	vmovdqu	(%rdx), %xmm3\n"
"	andq	$-4, %rax\n"
"	addq	%rax, %r8\n"
"	vpcmpgtd	%xmm2, %xmm3, %xmm2\n"
"	vpxor	%xmm0, %xmm3, %xmm1\n"
"	vpblendvb	%xmm2, %xmm3, %xmm1, %xmm1\n"
"	vbroadcastss	-4(%rsp), %xmm3\n"
"	vpxor	%xmm1, %xmm0, %xmm2\n"
"	vpcmpgtd	%xmm3, %xmm1, %xmm3\n"
"	vpblendvb	%xmm3, %xmm1, %xmm2, %xmm2\n"
"	vpshufd	$0, %xmm5, %xmm3\n"
"	vpcmpgtd	%xmm3, %xmm2, %xmm3\n"
"	vpxor	%xmm2, %xmm0, %xmm1\n"
"	vpblendvb	%xmm3, %xmm2, %xmm1, %xmm1\n"
"	vpshufd	$0, %xmm6, %xmm2\n"
"	vpcmpgtd	%xmm2, %xmm1, %xmm2\n"
"	vpxor	%xmm1, %xmm0, %xmm0\n"
"	vpblendvb	%xmm2, %xmm1, %xmm0, %xmm0\n"
"	vmovdqu	%xmm0, (%rdx)\n"
"	cmpq	%rax, %rcx\n"
"	je	.myL42\n"
".myL8:\n"
"	movl	(%r9,%r8,4), %eax\n"
"	movl	(%r14,%r8,4), %edx\n"
"	movl	-4(%rsp), %r15d\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%edi, %eax\n"
"	cmovle	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%ecx, %eax\n"
"	leaq	1(%r8), %rcx\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	movl	%eax, (%r9,%r8,4)\n"
"	cmpq	%rcx, %rbx\n"
"	jle	.myL42\n"
"	movl	(%r9,%rcx,4), %eax\n"
"	movl	(%r14,%rcx,4), %edx\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %edi\n"
"	cmovge	%esi, %eax\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%esi, %eax\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%esi, %eax\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	addq	$2, %r8\n"
"	movl	%eax, (%r9,%rcx,4)\n"
"	cmpq	%r8, %rbx\n"
"	jle	.myL42\n"
"	movl	(%r9,%r8,4), %eax\n"
"	movl	(%r14,%r8,4), %edx\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %edi\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%ecx, %eax\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	movl	%eax, (%r9,%r8,4)\n"
".myL42:\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL45:\n"
"	.cfi_restore_state\n"
"	vzeroupper\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
".myL23:\n"
"	.cfi_restore_state\n"
"	xorl	%eax, %eax\n"
"	leaq	a(%rip), %r9\n"
"	leaq	b(%rip), %r14\n"
"	jmp	.myL3\n"
"	.cfi_endproc\n"
".myLFE9703:\n"
"	.size	_Z2upiiiiii, .-_Z2upiiiiii\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	int k;
	cin >> n >> k;
	Loop (i,0,n) {
		cin >> a[i] >> b[i];
		b[i] ^= a[i];
	}
	Loop (i,0,k)
		cin >> q[i];
	for (int l = 0; l < n; l += S) {
		int r = min(n, l+S);
		for (int i = 0; i < k; i += 4)
			up(q[i+0], q[i+1], q[i+2], q[i+3], l, r);
	}
	ll ans = 0;
	Loop (i,0,n)
		ans += a[i];
	cout << ans << '\n';
}
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 1 ms 340 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 280 KB Output is correct
6 Correct 1 ms 340 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 340 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 340 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 1 ms 340 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 280 KB Output is correct
6 Correct 1 ms 340 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 340 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 340 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
14 Correct 13 ms 340 KB Output is correct
15 Correct 49 ms 552 KB Output is correct
16 Correct 99 ms 672 KB Output is correct
17 Correct 181 ms 724 KB Output is correct
18 Correct 169 ms 784 KB Output is correct
19 Correct 176 ms 976 KB Output is correct
20 Correct 168 ms 788 KB Output is correct
21 Correct 169 ms 788 KB Output is correct
22 Correct 167 ms 796 KB Output is correct
23 Correct 165 ms 788 KB Output is correct
24 Correct 166 ms 724 KB Output is correct
25 Correct 166 ms 784 KB Output is correct
26 Correct 145 ms 736 KB Output is correct
27 Correct 168 ms 724 KB Output is correct
28 Correct 173 ms 788 KB Output is correct
29 Correct 172 ms 796 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 1 ms 340 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 280 KB Output is correct
6 Correct 1 ms 340 KB Output is correct
7 Correct 1 ms 340 KB Output is correct
8 Correct 1 ms 340 KB Output is correct
9 Correct 1 ms 340 KB Output is correct
10 Correct 1 ms 340 KB Output is correct
11 Correct 1 ms 340 KB Output is correct
12 Correct 1 ms 340 KB Output is correct
13 Correct 1 ms 340 KB Output is correct
14 Correct 13 ms 340 KB Output is correct
15 Correct 49 ms 552 KB Output is correct
16 Correct 99 ms 672 KB Output is correct
17 Correct 181 ms 724 KB Output is correct
18 Correct 169 ms 784 KB Output is correct
19 Correct 176 ms 976 KB Output is correct
20 Correct 168 ms 788 KB Output is correct
21 Correct 169 ms 788 KB Output is correct
22 Correct 167 ms 796 KB Output is correct
23 Correct 165 ms 788 KB Output is correct
24 Correct 166 ms 724 KB Output is correct
25 Correct 166 ms 784 KB Output is correct
26 Correct 145 ms 736 KB Output is correct
27 Correct 168 ms 724 KB Output is correct
28 Correct 173 ms 788 KB Output is correct
29 Correct 172 ms 796 KB Output is correct
30 Correct 221 ms 1172 KB Output is correct
31 Correct 1018 ms 1480 KB Output is correct
32 Correct 1990 ms 1880 KB Output is correct
33 Execution timed out 3082 ms 2636 KB Time limit exceeded
34 Halted 0 ms 0 KB -