Submission #643048

#TimeUsernameProblemLanguageResultExecution timeMemory
643048ymmExamination (JOI19_examination)C++17
100 / 100
2300 ms1924 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 100'010;
int a[N], b[N];
int n, q;

__attribute__((naked))
int cnt(int x, int y, int z) {
asm(
"	movl	%edi, %r9d\n"
"	movslq	n(%rip), %rdi\n"
"	testq	%rdi, %rdi\n"
"	jle	.myL9\n"
"	leaq	-1(%rdi), %rax\n"
"	movl	%esi, %r10d\n"
"	movl	%edx, %r11d\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL10\n"
"	movq	%rdi, %r8\n"
"	vmovd	%edx, %xmm5\n"
"	vmovd	%esi, %xmm4\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r8\n"
"	vmovd	%r9d, %xmm6\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	salq	$5, %r8\n"
"	vpbroadcastd	%xmm6, %ymm6\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	leaq	-32(%r8), %rdx\n"
"	vpxor	%xmm3, %xmm3, %xmm3\n"
"	leaq	a(%rip), %rcx\n"
"	shrq	$5, %rdx\n"
"	leaq	b(%rip), %rsi\n"
"	addq	$1, %rdx\n"
"	andl	$3, %edx\n"
"	je	.myL4\n"
"	cmpq	$1, %rdx\n"
"	je	.myL21\n"
"	cmpq	$2, %rdx\n"
"	je	.myL22\n"
"	vmovdqa	(%rcx), %ymm0\n"
"	vmovdqa	(%rsi), %ymm1\n"
"	movl	$32, %eax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
".myL22:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	addq	$32, %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
".myL21:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	addq	$32, %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	cmpq	%r8, %rax\n"
"	je	.myL27\n"
".myL4:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rsi,%rax), %ymm1\n"
"	leaq	32(%rax), %rdx\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	32(%rsi,%rax), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	32(%rcx,%rax), %ymm0\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	64(%rsi,%rax), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	64(%rcx,%rax), %ymm0\n"
"	leaq	96(%rdx), %rax\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vmovdqa	64(%rsi,%rdx), %ymm1\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqa	64(%rcx,%rdx), %ymm0\n"
"	vpaddd	%ymm1, %ymm0, %ymm2\n"
"	vpminsd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpgtd	%ymm0, %ymm6, %ymm0\n"
"	vpcmpeqd	%ymm1, %ymm4, %ymm1\n"
"	vpandn	%ymm1, %ymm0, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm1\n"
"	vpandn	%ymm0, %ymm1, %ymm0\n"
"	vpsubd	%ymm0, %ymm3, %ymm3\n"
"	cmpq	%r8, %rax\n"
"	jne	.myL4\n"
".myL27:\n"
"	vmovdqa	%xmm3, %xmm0\n"
"	vextracti128	$0x1, %ymm3, %xmm3\n"
"	movq	%rdi, %rax\n"
"	vpaddd	%xmm3, %xmm0, %xmm0\n"
"	andq	$-8, %rax\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	movq	%rax, %rdx\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %r8d\n"
"	cmpq	%rdi, %rax\n"
"	je	.myL33\n"
"	vzeroupper\n"
".myL3:\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 13, -24\n"
"	.cfi_offset 12, -32\n"
"	.cfi_offset 3, -40\n"
"	movq	%rdi, %rbx\n"
"	subq	%rax, %rbx\n"
"	leaq	-1(%rbx), %r12\n"
"	cmpq	$2, %r12\n"
"	jbe	.myL7\n"
"	vmovdqa	(%rcx,%rax,4), %xmm2\n"
"	vmovdqa	(%rsi,%rax,4), %xmm1\n"
"	vmovd	%r10d, %xmm7\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vmovd	%r9d, %xmm7\n"
"	vpaddd	%xmm1, %xmm2, %xmm3\n"
"	vpminsd	%xmm1, %xmm0, %xmm1\n"
"	vpcmpeqd	%xmm1, %xmm0, %xmm1\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vmovd	%r11d, %xmm7\n"
"	vpcmpgtd	%xmm2, %xmm0, %xmm0\n"
"	vpandn	%xmm1, %xmm0, %xmm0\n"
"	vpshufd	$0, %xmm7, %xmm1\n"
"	vpcmpgtd	%xmm3, %xmm1, %xmm1\n"
"	vpandn	%xmm0, %xmm1, %xmm1\n"
"	vpxor	%xmm0, %xmm0, %xmm0\n"
"	vpsubd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm0\n"
"	vpsubd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	addl	%eax, %r8d\n"
"	movq	%rbx, %rax\n"
"	andq	$-4, %rax\n"
"	addq	%rax, %rdx\n"
"	cmpq	%rax, %rbx\n"
"	je	.myL1\n"
".myL7:\n"
"	movl	(%rcx,%rdx,4), %ebx\n"
"	movl	(%rsi,%rdx,4), %r13d\n"
"	xorl	%eax, %eax\n"
"	cmpl	%r9d, %ebx\n"
"	setge	%al\n"
"	xorl	%r12d, %r12d\n"
"	negl	%eax\n"
"	cmpl	%r10d, %r13d\n"
"	setge	%r12b\n"
"	addl	%r13d, %ebx\n"
"	negl	%r12d\n"
"	andl	%r12d, %eax\n"
"	cmpl	%r11d, %ebx\n"
"	setge	%bl\n"
"	movzbl	%bl, %ebx\n"
"	negl	%ebx\n"
"	andl	%ebx, %eax\n"
"	subl	%eax, %r8d\n"
"	leaq	1(%rdx), %rax\n"
"	cmpq	%rdi, %rax\n"
"	jge	.myL1\n"
"	movl	(%rsi,%rax,4), %ebx\n"
"	movl	(%rcx,%rax,4), %r12d\n"
"	xorl	%eax, %eax\n"
"	cmpl	%ebx, %r10d\n"
"	setle	%al\n"
"	xorl	%r13d, %r13d\n"
"	negl	%eax\n"
"	cmpl	%r12d, %r9d\n"
"	setle	%r13b\n"
"	addl	%r12d, %ebx\n"
"	negl	%r13d\n"
"	andl	%r13d, %eax\n"
"	cmpl	%ebx, %r11d\n"
"	setle	%bl\n"
"	addq	$2, %rdx\n"
"	movzbl	%bl, %ebx\n"
"	negl	%ebx\n"
"	andl	%ebx, %eax\n"
"	subl	%eax, %r8d\n"
"	cmpq	%rdx, %rdi\n"
"	jle	.myL1\n"
"	movl	(%rcx,%rdx,4), %ecx\n"
"	movl	(%rsi,%rdx,4), %esi\n"
"	xorl	%eax, %eax\n"
"	cmpl	%ecx, %r9d\n"
"	setle	%al\n"
"	xorl	%edx, %edx\n"
"	negl	%eax\n"
"	cmpl	%esi, %r10d\n"
"	setle	%dl\n"
"	addl	%esi, %ecx\n"
"	negl	%edx\n"
"	andl	%edx, %eax\n"
"	xorl	%edx, %edx\n"
"	cmpl	%ecx, %r11d\n"
"	setle	%dl\n"
"	negl	%edx\n"
"	andl	%edx, %eax\n"
"	subl	%eax, %r8d\n"
".myL1:\n"
"	popq	%rbx\n"
"	movl	%r8d, %eax\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%rbp\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL9:\n"
"	.cfi_restore 3\n"
"	.cfi_restore 6\n"
"	.cfi_restore 12\n"
"	.cfi_restore 13\n"
"	xorl	%r8d, %r8d\n"
".myL30:\n"
"	movl	%r8d, %eax\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL33:\n"
"	vzeroupper\n"
"	jmp	.myL30\n"
".myL10:\n"
"	xorl	%eax, %eax\n"
"	xorl	%edx, %edx\n"
"	leaq	a(%rip), %rcx\n"
"	xorl	%r8d, %r8d\n"
"	leaq	b(%rip), %rsi\n"
"	jmp	.myL3\n"
);
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n >> q;
	Loop (i,0,n)
		cin >> a[i] >> b[i];
	Loop (i,0,q) {
		int x, y, z;
		cin >> x >> y >> z;
		cout << cnt(x, y, z) << '\n';
	}
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...