Submission #644661

#TimeUsernameProblemLanguageResultExecution timeMemory
644661ymmFortune Telling 2 (JOI14_fortune_telling2)C++17
35 / 100
3068 ms2652 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 200'032;
const int S = 512;
int a[N], b[N];
int q[N];
int n;

/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(int x, int y, int z, int w, int l, int r)
{
	Loop (i,l,r) {
		int v = a[i], u = b[i];
		v ^= v <= x? u: 0;
		v ^= v <= y? u: 0;
		v ^= v <= z? u: 0;
		v ^= v <= w? u: 0;
		a[i] = v;
	}
}
*/
void up(int,int,int,int,int,int);

asm("\n"
"	.text\n"
"	.p2align 4\n"
"	.globl	_Z2upiiiiii\n"
"	.type	_Z2upiiiiii, @function\n"
"_Z2upiiiiii:\n"
".myLFB9703:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movslq	%r8d, %r8\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r15\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 15, -24\n"
"	.cfi_offset 14, -32\n"
"	.cfi_offset 13, -40\n"
"	.cfi_offset 12, -48\n"
"	.cfi_offset 3, -56\n"
"	movslq	%r9d, %rbx\n"
"	andq	$-32, %rsp\n"
"	movl	%esi, -4(%rsp)\n"
"	cmpq	%rbx, %r8\n"
"	jge	.myL42\n"
"	movl	%ecx, %r11d\n"
"	movq	%rbx, %rcx\n"
"	movl	%edx, %r10d\n"
"	movq	%r8, %r15\n"
"	subq	%r8, %rcx\n"
"	leaq	-1(%rcx), %rax\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL23\n"
"	movq	%rcx, %r13\n"
"	leaq	a(%rip), %r9\n"
"	vmovd	%edi, %xmm7\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r13\n"
"	leaq	0(,%r8,4), %rsi\n"
"	leaq	b(%rip), %r14\n"
"	vpbroadcastd	-4(%rsp), %ymm6\n"
"	vmovd	%r10d, %xmm5\n"
"	salq	$5, %r13\n"
"	vmovd	%r11d, %xmm4\n"
"	leaq	(%r9,%rsi), %rdx\n"
"	leaq	-32(%r13), %r12\n"
"	addq	%r14, %rsi\n"
"	vpbroadcastd	%xmm7, %ymm7\n"
"	shrq	$5, %r12\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	addq	$1, %r12\n"
"	andl	$3, %r12d\n"
"	je	.myL4\n"
"	cmpq	$1, %r12\n"
"	je	.myL34\n"
"	cmpq	$2, %r12\n"
"	je	.myL35\n"
"	vmovdqu	(%rdx), %ymm2\n"
"	vmovdqu	(%rsi), %ymm0\n"
"	movl	$32, %eax\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx)\n"
".myL35:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
".myL34:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
"	cmpq	%r13, %rax\n"
"	je	.myL40\n"
".myL4:\n"
"	vmovdqu	(%rdx,%rax), %ymm2\n"
"	vmovdqu	(%rsi,%rax), %ymm0\n"
"	leaq	32(%rax), %r12\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	32(%rdx,%rax), %ymm2\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vmovdqu	32(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	64(%rdx,%rax), %ymm2\n"
"	vmovdqu	%ymm0, 32(%rdx,%rax)\n"
"	vmovdqu	64(%rsi,%rax), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%rax)\n"
"	vmovdqu	64(%rdx,%r12), %ymm2\n"
"	leaq	96(%r12), %rax\n"
"	vmovdqu	64(%rsi,%r12), %ymm0\n"
"	vpcmpgtd	%ymm7, %ymm2, %ymm3\n"
"	vpxor	%ymm0, %ymm2, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm6, %ymm1, %ymm3\n"
"	vpxor	%ymm1, %ymm0, %ymm2\n"
"	vpblendvb	%ymm3, %ymm1, %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm5, %ymm2, %ymm3\n"
"	vpxor	%ymm2, %ymm0, %ymm1\n"
"	vpblendvb	%ymm3, %ymm2, %ymm1, %ymm1\n"
"	vpcmpgtd	%ymm4, %ymm1, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%r12)\n"
"	cmpq	%r13, %rax\n"
"	jne	.myL4\n"
".myL40:\n"
"	movq	%rcx, %rax\n"
"	andq	$-8, %rax\n"
"	addq	%rax, %r8\n"
"	cmpq	%rax, %rcx\n"
"	je	.myL45\n"
"	vzeroupper\n"
".myL3:\n"
"	subq	%rax, %rcx\n"
"	leaq	-1(%rcx), %rdx\n"
"	cmpq	$2, %rdx\n"
"	jbe	.myL8\n"
"	vmovd	%edi, %xmm4\n"
"	vmovd	%r10d, %xmm5\n"
"	vmovd	%r11d, %xmm6\n"
"	addq	%r15, %rax\n"
"	leaq	(%r9,%rax,4), %rdx\n"
"	vpshufd	$0, %xmm4, %xmm2\n"
"	vmovdqu	(%r14,%rax,4), %xmm0\n"
"	movq	%rcx, %rax\n"
"	vmovdqu	(%rdx), %xmm3\n"
"	andq	$-4, %rax\n"
"	addq	%rax, %r8\n"
"	vpcmpgtd	%xmm2, %xmm3, %xmm2\n"
"	vpxor	%xmm0, %xmm3, %xmm1\n"
"	vpblendvb	%xmm2, %xmm3, %xmm1, %xmm1\n"
"	vbroadcastss	-4(%rsp), %xmm3\n"
"	vpxor	%xmm1, %xmm0, %xmm2\n"
"	vpcmpgtd	%xmm3, %xmm1, %xmm3\n"
"	vpblendvb	%xmm3, %xmm1, %xmm2, %xmm2\n"
"	vpshufd	$0, %xmm5, %xmm3\n"
"	vpcmpgtd	%xmm3, %xmm2, %xmm3\n"
"	vpxor	%xmm2, %xmm0, %xmm1\n"
"	vpblendvb	%xmm3, %xmm2, %xmm1, %xmm1\n"
"	vpshufd	$0, %xmm6, %xmm2\n"
"	vpcmpgtd	%xmm2, %xmm1, %xmm2\n"
"	vpxor	%xmm1, %xmm0, %xmm0\n"
"	vpblendvb	%xmm2, %xmm1, %xmm0, %xmm0\n"
"	vmovdqu	%xmm0, (%rdx)\n"
"	cmpq	%rax, %rcx\n"
"	je	.myL42\n"
".myL8:\n"
"	movl	(%r9,%r8,4), %eax\n"
"	movl	(%r14,%r8,4), %edx\n"
"	movl	-4(%rsp), %r15d\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%edi, %eax\n"
"	cmovle	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%ecx, %eax\n"
"	leaq	1(%r8), %rcx\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	movl	%eax, (%r9,%r8,4)\n"
"	cmpq	%rcx, %rbx\n"
"	jle	.myL42\n"
"	movl	(%r9,%rcx,4), %eax\n"
"	movl	(%r14,%rcx,4), %edx\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %edi\n"
"	cmovge	%esi, %eax\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%esi, %eax\n"
"	movl	%eax, %esi\n"
"	xorl	%edx, %esi\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%esi, %eax\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	addq	$2, %r8\n"
"	movl	%eax, (%r9,%rcx,4)\n"
"	cmpq	%r8, %rbx\n"
"	jle	.myL42\n"
"	movl	(%r9,%r8,4), %eax\n"
"	movl	(%r14,%r8,4), %edx\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %edi\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r15d\n"
"	cmovge	%ecx, %eax\n"
"	movl	%eax, %ecx\n"
"	xorl	%edx, %ecx\n"
"	cmpl	%eax, %r10d\n"
"	cmovge	%ecx, %eax\n"
"	xorl	%eax, %edx\n"
"	cmpl	%eax, %r11d\n"
"	cmovge	%edx, %eax\n"
"	movl	%eax, (%r9,%r8,4)\n"
".myL42:\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL45:\n"
"	.cfi_restore_state\n"
"	vzeroupper\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
".myL23:\n"
"	.cfi_restore_state\n"
"	xorl	%eax, %eax\n"
"	leaq	a(%rip), %r9\n"
"	leaq	b(%rip), %r14\n"
"	jmp	.myL3\n"
"	.cfi_endproc\n"
".myLFE9703:\n"
"	.size	_Z2upiiiiii, .-_Z2upiiiiii\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	int k;
	cin >> n >> k;
	Loop (i,0,n) {
		cin >> a[i] >> b[i];
		b[i] ^= a[i];
	}
	Loop (i,0,k)
		cin >> q[i];
	for (int l = 0; l < n; l += S) {
		int r = min(n, l+S);
		for (int i = 0; i < k; i += 4)
			up(q[i+0], q[i+1], q[i+2], q[i+3], l, r);
	}
	ll ans = 0;
	Loop (i,0,n)
		ans += a[i];
	cout << ans << '\n';
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...