Submission #643048

# Submission time Handle Problem Language Result Execution time Memory
643048 2022-09-21T05:25:29 Z ymm Examination (JOI19_examination) C++17
100 / 100
2300 ms 1924 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 100'010;
int a[N], b[N];
int n, q;

__attribute__((naked))
int cnt(int x, int y, int z) {
asm(
"	movl	%edi, %r9d\n"
"	movslq	n(%rip), %rdi\n"
"	testq	%rdi, %rdi\n"
"	jle	.myL9\n"
"	leaq	-1(%rdi), %rax\n"
"	movl	%esi, %r10d\n"
"	movl	%edx, %r11d\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL10\n"
"	movq	%rdi, %r8\n"
"	vmovd	%edx, %xmm5\n"
"	vmovd	%esi, %xmm4\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r8\n"
"	vmovd	%r9d, %xmm6\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	salq	$5, %r8\n"
"	vpbroadcastd	%xmm6, %ymm6\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	leaq	-32(%r8), %rdx\n"
"	vpxor	%xmm3, %xmm3, %xmm3\n"
"	leaq	a(%rip), %rcx\n"
"	shrq	$5, %rdx\n"
"	leaq	b(%rip), %rsi\n"
"	addq	$1, %rdx\n"
"	andl	$3, %edx\n"
"	je	.myL4\n"
"	cmpq	$1, %rdx\n"
"	je	.myL21\n"
"	cmpq	$2, %rdx\n"
"	je	.myL22\n"
"	vmovdqa	(%rcx), %ymm0\n"
"	vmovdqa	(%rsi), %ymm1\n"
"	movl	$32, %eax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
".myL22:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	addq	$32, %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
".myL21:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	addq	$32, %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	cmpq	%r8, %rax\n"
"	je	.myL27\n"
".myL4:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	leaq	32(%rax), %rdx\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	32(%rsi,%rax), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	32(%rcx,%rax), %ymm0\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	64(%rsi,%rax), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	64(%rcx,%rax), %ymm0\n"
"	leaq	96(%rdx), %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	64(%rsi,%rdx), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	64(%rcx,%rdx), %ymm0\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	cmpq	%r8, %rax\n"
"	jne	.myL4\n"
".myL27:\n"
"	vmovdqa	%xmm3, %xmm0\n"
"	vextracti128	$0x1, %ymm3, %xmm3\n"
"	movq	%rdi, %rax\n"
"	vpaddd	%xmm3, %xmm0, %xmm0\n"
"	andq	$-8, %rax\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	movq	%rax, %rdx\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %r8d\n"
"	cmpq	%rdi, %rax\n"
"	je	.myL33\n"
"	vzeroupper\n"
".myL3:\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 13, -24\n"
"	.cfi_offset 12, -32\n"
"	.cfi_offset 3, -40\n"
"	movq	%rdi, %rbx\n"
"	subq	%rax, %rbx\n"
"	leaq	-1(%rbx), %r12\n"
"	cmpq	$2, %r12\n"
"	jbe	.myL7\n"
"	vmovdqa	(%rcx,%rax,4), %xmm2\n"
"	vmovdqa	(%rsi,%rax,4), %xmm1\n"
"	vmovd	%r10d, %xmm7\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vmovd	%r9d, %xmm7\n"
"	vpaddd	%xmm1, %xmm2, %xmm3\n"
"	vpminsd	%xmm1, %xmm0, %xmm1\n"
"	vpcmpeqd	%xmm1, %xmm0, %xmm1\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vmovd	%r11d, %xmm7\n"
"	vpcmpgtd	%xmm2, %xmm0, %xmm0\n"
"	vpandn	%xmm1, %xmm0, %xmm0\n"
"	vpshufd	$0, %xmm7, %xmm1\n"
"	vpcmpgtd	%xmm3, %xmm1, %xmm1\n"
"	vpandn	%xmm0, %xmm1, %xmm1\n"
"	vpxor	%xmm0, %xmm0, %xmm0\n"
"	vpsubd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm0\n"
"	vpsubd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	addl	%eax, %r8d\n"
"	movq	%rbx, %rax\n"
"	andq	$-4, %rax\n"
"	addq	%rax, %rdx\n"
"	cmpq	%rax, %rbx\n"
"	je	.myL1\n"
".myL7:\n"
"	movl	(%rcx,%rdx,4), %ebx\n"
"	movl	(%rsi,%rdx,4), %r13d\n"
"	xorl	%eax, %eax\n"
"	cmpl	%r9d, %ebx\n"
"	setge	%al\n"
"	xorl	%r12d, %r12d\n"
"	negl	%eax\n"
"	cmpl	%r10d, %r13d\n"
"	setge	%r12b\n"
"	addl	%r13d, %ebx\n"
"	negl	%r12d\n"
"	andl	%r12d, %eax\n"
"	cmpl	%r11d, %ebx\n"
"	setge	%bl\n"
"	movzbl	%bl, %ebx\n"
"	negl	%ebx\n"
"	andl	%ebx, %eax\n"
"	subl	%eax, %r8d\n"
"	leaq	1(%rdx), %rax\n"
"	cmpq	%rdi, %rax\n"
"	jge	.myL1\n"
"	movl	(%rsi,%rax,4), %ebx\n"
"	movl	(%rcx,%rax,4), %r12d\n"
"	xorl	%eax, %eax\n"
"	cmpl	%ebx, %r10d\n"
"	setle	%al\n"
"	xorl	%r13d, %r13d\n"
"	negl	%eax\n"
"	cmpl	%r12d, %r9d\n"
"	setle	%r13b\n"
"	addl	%r12d, %ebx\n"
"	negl	%r13d\n"
"	andl	%r13d, %eax\n"
"	cmpl	%ebx, %r11d\n"
"	setle	%bl\n"
"	addq	$2, %rdx\n"
"	movzbl	%bl, %ebx\n"
"	negl	%ebx\n"
"	andl	%ebx, %eax\n"
"	subl	%eax, %r8d\n"
"	cmpq	%rdx, %rdi\n"
"	jle	.myL1\n"
"	movl	(%rcx,%rdx,4), %ecx\n"
"	movl	(%rsi,%rdx,4), %esi\n"
"	xorl	%eax, %eax\n"
"	cmpl	%ecx, %r9d\n"
"	setle	%al\n"
"	xorl	%edx, %edx\n"
"	negl	%eax\n"
"	cmpl	%esi, %r10d\n"
"	setle	%dl\n"
"	addl	%esi, %ecx\n"
"	negl	%edx\n"
"	andl	%edx, %eax\n"
"	xorl	%edx, %edx\n"
"	cmpl	%ecx, %r11d\n"
"	setle	%dl\n"
"	negl	%edx\n"
"	andl	%edx, %eax\n"
"	subl	%eax, %r8d\n"
".myL1:\n"
"	popq	%rbx\n"
"	movl	%r8d, %eax\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%rbp\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL9:\n"
"	.cfi_restore 3\n"
"	.cfi_restore 6\n"
"	.cfi_restore 12\n"
"	.cfi_restore 13\n"
"	xorl	%r8d, %r8d\n"
".myL30:\n"
"	movl	%r8d, %eax\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL33:\n"
"	vzeroupper\n"
"	jmp	.myL30\n"
".myL10:\n"
"	xorl	%eax, %eax\n"
"	xorl	%edx, %edx\n"
"	leaq	a(%rip), %rcx\n"
"	xorl	%r8d, %r8d\n"
"	leaq	b(%rip), %rsi\n"
"	jmp	.myL3\n"
);
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n >> q;
	Loop (i,0,n)
		cin >> a[i] >> b[i];
	Loop (i,0,q) {
		int x, y, z;
		cin >> x >> y >> z;
		cout << cnt(x, y, z) << '\n';
	}
}
# Verdict Execution time Memory Grader output
1 Correct 1 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 0 ms 212 KB Output is correct
4 Correct 0 ms 212 KB Output is correct
5 Correct 0 ms 212 KB Output is correct
6 Correct 0 ms 212 KB Output is correct
7 Correct 4 ms 340 KB Output is correct
8 Correct 4 ms 340 KB Output is correct
9 Correct 4 ms 340 KB Output is correct
10 Correct 4 ms 340 KB Output is correct
11 Correct 4 ms 340 KB Output is correct
12 Correct 3 ms 340 KB Output is correct
13 Correct 4 ms 340 KB Output is correct
14 Correct 4 ms 340 KB Output is correct
15 Correct 4 ms 340 KB Output is correct
16 Correct 3 ms 340 KB Output is correct
17 Correct 5 ms 340 KB Output is correct
18 Correct 3 ms 340 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 2179 ms 1628 KB Output is correct
2 Correct 2243 ms 1576 KB Output is correct
3 Correct 2202 ms 1800 KB Output is correct
4 Correct 2263 ms 1656 KB Output is correct
5 Correct 2300 ms 1660 KB Output is correct
6 Correct 2247 ms 1648 KB Output is correct
7 Correct 2286 ms 1688 KB Output is correct
8 Correct 2189 ms 1720 KB Output is correct
9 Correct 2163 ms 1736 KB Output is correct
10 Correct 2221 ms 1440 KB Output is correct
11 Correct 2225 ms 1704 KB Output is correct
12 Correct 2153 ms 1428 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 2179 ms 1628 KB Output is correct
2 Correct 2243 ms 1576 KB Output is correct
3 Correct 2202 ms 1800 KB Output is correct
4 Correct 2263 ms 1656 KB Output is correct
5 Correct 2300 ms 1660 KB Output is correct
6 Correct 2247 ms 1648 KB Output is correct
7 Correct 2286 ms 1688 KB Output is correct
8 Correct 2189 ms 1720 KB Output is correct
9 Correct 2163 ms 1736 KB Output is correct
10 Correct 2221 ms 1440 KB Output is correct
11 Correct 2225 ms 1704 KB Output is correct
12 Correct 2153 ms 1428 KB Output is correct
13 Correct 2270 ms 1608 KB Output is correct
14 Correct 2192 ms 1612 KB Output is correct
15 Correct 2234 ms 1616 KB Output is correct
16 Correct 2242 ms 1544 KB Output is correct
17 Correct 2131 ms 1596 KB Output is correct
18 Correct 2141 ms 1628 KB Output is correct
19 Correct 2170 ms 1612 KB Output is correct
20 Correct 2114 ms 1868 KB Output is correct
21 Correct 2104 ms 1856 KB Output is correct
22 Correct 2093 ms 1520 KB Output is correct
23 Correct 2118 ms 1544 KB Output is correct
24 Correct 2089 ms 1528 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 1 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 0 ms 212 KB Output is correct
4 Correct 0 ms 212 KB Output is correct
5 Correct 0 ms 212 KB Output is correct
6 Correct 0 ms 212 KB Output is correct
7 Correct 4 ms 340 KB Output is correct
8 Correct 4 ms 340 KB Output is correct
9 Correct 4 ms 340 KB Output is correct
10 Correct 4 ms 340 KB Output is correct
11 Correct 4 ms 340 KB Output is correct
12 Correct 3 ms 340 KB Output is correct
13 Correct 4 ms 340 KB Output is correct
14 Correct 4 ms 340 KB Output is correct
15 Correct 4 ms 340 KB Output is correct
16 Correct 3 ms 340 KB Output is correct
17 Correct 5 ms 340 KB Output is correct
18 Correct 3 ms 340 KB Output is correct
19 Correct 2179 ms 1628 KB Output is correct
20 Correct 2243 ms 1576 KB Output is correct
21 Correct 2202 ms 1800 KB Output is correct
22 Correct 2263 ms 1656 KB Output is correct
23 Correct 2300 ms 1660 KB Output is correct
24 Correct 2247 ms 1648 KB Output is correct
25 Correct 2286 ms 1688 KB Output is correct
26 Correct 2189 ms 1720 KB Output is correct
27 Correct 2163 ms 1736 KB Output is correct
28 Correct 2221 ms 1440 KB Output is correct
29 Correct 2225 ms 1704 KB Output is correct
30 Correct 2153 ms 1428 KB Output is correct
31 Correct 2270 ms 1608 KB Output is correct
32 Correct 2192 ms 1612 KB Output is correct
33 Correct 2234 ms 1616 KB Output is correct
34 Correct 2242 ms 1544 KB Output is correct
35 Correct 2131 ms 1596 KB Output is correct
36 Correct 2141 ms 1628 KB Output is correct
37 Correct 2170 ms 1612 KB Output is correct
38 Correct 2114 ms 1868 KB Output is correct
39 Correct 2104 ms 1856 KB Output is correct
40 Correct 2093 ms 1520 KB Output is correct
41 Correct 2118 ms 1544 KB Output is correct
42 Correct 2089 ms 1528 KB Output is correct
43 Correct 2107 ms 1580 KB Output is correct
44 Correct 2139 ms 1576 KB Output is correct
45 Correct 2130 ms 1720 KB Output is correct
46 Correct 2144 ms 1684 KB Output is correct
47 Correct 2142 ms 1548 KB Output is correct
48 Correct 2230 ms 1428 KB Output is correct
49 Correct 2127 ms 1924 KB Output is correct
50 Correct 2110 ms 1728 KB Output is correct
51 Correct 2186 ms 1624 KB Output is correct
52 Correct 2174 ms 1568 KB Output is correct
53 Correct 2157 ms 1720 KB Output is correct