답안 #644659

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
644659 2022-09-25T05:25:21 Z ymm 운세 보기 2 (JOI14_fortune_telling2) C++17
35 / 100
3000 ms 2444 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 400'000;
const int S = 2000;
int a[N];
int q[N];
int n;

/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(int x, int y, int z, int l, int r)
{
	typedef int ymm __attribute((vector_size(32),aligned(32)));
	ymm *const b = (ymm*)a;
	for (int i = l/8; i < r/8; i += 2) {
		auto v = b[i], u = b[i+1];
		v ^= v <= x? u: 0;
		v ^= v <= y? u: 0;
		v ^= v <= z? u: 0;
		b[i] = v;
	}
}
*/
void up(int,int,int,int,int);
asm("\n"
"	.text\n"
"	.p2align 4\n"
"	.globl	_Z2upiiiii\n"
"	.type	_Z2upiiiii, @function\n"
"_Z2upiiiii:\n"
".myLFB9897:\n"
"	.cfi_startproc\n"
"	movl	%edx, %eax\n"
"	movl	%ecx, %edx\n"
"	leal	7(%rcx), %ecx\n"
"	testl	%edx, %edx\n"
"	cmovns	%edx, %ecx\n"
"	leal	7(%r8), %edx\n"
"	sarl	$3, %ecx\n"
"	testl	%r8d, %r8d\n"
"	cmovns	%r8d, %edx\n"
"	sarl	$3, %edx\n"
"	cmpl	%edx, %ecx\n"
"	jge	.myL21\n"
"	vmovd	%esi, %xmm4\n"
"	subl	$1, %edx\n"
"	vmovd	%edi, %xmm5\n"
"	movslq	%ecx, %rsi\n"
"	subq	%rcx, %rdx\n"
"	leaq	a(%rip), %rdi\n"
"	vmovd	%eax, %xmm3\n"
"	movq	%rsi, %rax\n"
"	andl	$4294967294, %edx\n"
"	leaq	64(%rdi), %rcx\n"
"	salq	$5, %rax\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	addq	%rsi, %rdx\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	vpbroadcastd	%xmm3, %ymm3\n"
"	addq	%rdi, %rax\n"
"	salq	$5, %rdx\n"
"	addq	%rcx, %rdx\n"
"	movq	%rdx, %rcx\n"
"	subq	%rax, %rcx\n"
"	subq	$64, %rcx\n"
"	shrq	$6, %rcx\n"
"	addq	$1, %rcx\n"
"	andl	$3, %ecx\n"
"	je	.myL3\n"
"	cmpq	$1, %rcx\n"
"	je	.myL15\n"
"	cmpq	$2, %rcx\n"
"	je	.myL16\n"
"	vmovdqa	(%rax), %ymm0\n"
"	vmovdqa	32(%rax), %ymm2\n"
"	addq	$64, %rax\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, -64(%rax)\n"
".myL16:\n"
"	vmovdqa	(%rax), %ymm0\n"
"	vmovdqa	32(%rax), %ymm2\n"
"	addq	$64, %rax\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, -64(%rax)\n"
".myL15:\n"
"	vmovdqa	(%rax), %ymm0\n"
"	vmovdqa	32(%rax), %ymm2\n"
"	addq	$64, %rax\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, -64(%rax)\n"
"	cmpq	%rax, %rdx\n"
"	je	.myL22\n"
".myL3:\n"
"	vmovdqa	(%rax), %ymm0\n"
"	vmovdqa	32(%rax), %ymm2\n"
"	leaq	64(%rax), %rcx\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vmovdqa	96(%rax), %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rax)\n"
"	vmovdqa	64(%rax), %ymm0\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vmovdqa	160(%rax), %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 64(%rax)\n"
"	vmovdqa	128(%rax), %ymm0\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 128(%rax)\n"
"	vmovdqa	128(%rcx), %ymm0\n"
"	vmovdqa	160(%rcx), %ymm2\n"
"	leaq	192(%rcx), %rax\n"
"	vpcmpgtd	%ymm5, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm4, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm1\n"
"	vpandn	%ymm2, %ymm1, %ymm1\n"
"	vpxor	%ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 128(%rcx)\n"
"	cmpq	%rax, %rdx\n"
"	jne	.myL3\n"
".myL22:\n"
"	vzeroupper\n"
".myL21:\n"
"	ret\n"
"	.cfi_endproc\n"
".myLFE9897:\n"
"	.size	_Z2upiiiii, .-_Z2upiiiii\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	int k;
	cin >> n >> k;
	Loop (i,0,n) {
		int x, y;
		cin >> x >> y;
		y ^= x;
		a[i/8*16 + i%8] = x;
		a[i/8*16 + i%8 + 8] = y;
	}
	Loop (i,0,k)
		cin >> q[i];
	for (int l = 0; l < N; l += S) {
		for (int i = 0; i < k; i += 3)
			up(q[i+0], q[i+1], q[i+2], l, l+S);
	}
	ll ans = 0;
	Loop (i,0,n)
		ans += a[i/8*16 + i%8];
	cout << ans << '\n';
}
# 결과 실행 시간 메모리 Grader output
1 Correct 18 ms 1876 KB Output is correct
2 Correct 18 ms 1800 KB Output is correct
3 Correct 18 ms 1836 KB Output is correct
4 Correct 19 ms 1876 KB Output is correct
5 Correct 18 ms 1876 KB Output is correct
6 Correct 18 ms 1784 KB Output is correct
7 Correct 18 ms 1876 KB Output is correct
8 Correct 18 ms 1780 KB Output is correct
9 Correct 18 ms 1876 KB Output is correct
10 Correct 18 ms 1800 KB Output is correct
11 Correct 23 ms 1880 KB Output is correct
12 Correct 20 ms 1784 KB Output is correct
13 Correct 20 ms 1792 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 18 ms 1876 KB Output is correct
2 Correct 18 ms 1800 KB Output is correct
3 Correct 18 ms 1836 KB Output is correct
4 Correct 19 ms 1876 KB Output is correct
5 Correct 18 ms 1876 KB Output is correct
6 Correct 18 ms 1784 KB Output is correct
7 Correct 18 ms 1876 KB Output is correct
8 Correct 18 ms 1780 KB Output is correct
9 Correct 18 ms 1876 KB Output is correct
10 Correct 18 ms 1800 KB Output is correct
11 Correct 23 ms 1880 KB Output is correct
12 Correct 20 ms 1784 KB Output is correct
13 Correct 20 ms 1792 KB Output is correct
14 Correct 173 ms 1868 KB Output is correct
15 Correct 350 ms 1948 KB Output is correct
16 Correct 526 ms 1908 KB Output is correct
17 Correct 695 ms 2132 KB Output is correct
18 Correct 693 ms 2000 KB Output is correct
19 Correct 672 ms 1948 KB Output is correct
20 Correct 679 ms 1996 KB Output is correct
21 Correct 687 ms 1952 KB Output is correct
22 Correct 688 ms 2028 KB Output is correct
23 Correct 687 ms 1996 KB Output is correct
24 Correct 679 ms 1988 KB Output is correct
25 Correct 684 ms 1916 KB Output is correct
26 Correct 687 ms 1960 KB Output is correct
27 Correct 676 ms 2016 KB Output is correct
28 Correct 679 ms 2032 KB Output is correct
29 Correct 677 ms 1940 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 18 ms 1876 KB Output is correct
2 Correct 18 ms 1800 KB Output is correct
3 Correct 18 ms 1836 KB Output is correct
4 Correct 19 ms 1876 KB Output is correct
5 Correct 18 ms 1876 KB Output is correct
6 Correct 18 ms 1784 KB Output is correct
7 Correct 18 ms 1876 KB Output is correct
8 Correct 18 ms 1780 KB Output is correct
9 Correct 18 ms 1876 KB Output is correct
10 Correct 18 ms 1800 KB Output is correct
11 Correct 23 ms 1880 KB Output is correct
12 Correct 20 ms 1784 KB Output is correct
13 Correct 20 ms 1792 KB Output is correct
14 Correct 173 ms 1868 KB Output is correct
15 Correct 350 ms 1948 KB Output is correct
16 Correct 526 ms 1908 KB Output is correct
17 Correct 695 ms 2132 KB Output is correct
18 Correct 693 ms 2000 KB Output is correct
19 Correct 672 ms 1948 KB Output is correct
20 Correct 679 ms 1996 KB Output is correct
21 Correct 687 ms 1952 KB Output is correct
22 Correct 688 ms 2028 KB Output is correct
23 Correct 687 ms 1996 KB Output is correct
24 Correct 679 ms 1988 KB Output is correct
25 Correct 684 ms 1916 KB Output is correct
26 Correct 687 ms 1960 KB Output is correct
27 Correct 676 ms 2016 KB Output is correct
28 Correct 679 ms 2032 KB Output is correct
29 Correct 677 ms 1940 KB Output is correct
30 Execution timed out 3075 ms 2444 KB Time limit exceeded
31 Halted 0 ms 0 KB -