답안 #652332

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
652332 2022-10-22T06:53:07 Z ymm Lottery (CEOI18_lot) C++17
45 / 100
3000 ms 764 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 10016;
const int Q = 100;
int mylist[N];
short ans[Q][N];
char ansc[Q][N];
short query[Q];
int n, q, l;

int noncmp_a[N];
short a[N];

__attribute__((optimize("O3,unroll-loops"),target("avx")))
short get_sim(int i, int j, int l)
{
	short ans = 0;
	for (int k = 0; k < l; ++k)
		ans += a[i+k] == a[j+k];
	return l-ans;
}
//__attribute__((optimize("O3,unroll-loops"),target("avx2")))
tuple<short,short,short,short> get_sim4(int i, int j0, int j1, int j2, int j3, int l);
/*{
	short ans0 = 0, ans1 = 0, ans2 = 0, ans3 = 0;
	for (int k = 0; k < l; ++k) {
		ans0 += a[i+k] == a[j0+k];
		ans1 += a[i+k] == a[j1+k];
		ans2 += a[i+k] == a[j2+k];
		ans3 += a[i+k] == a[j3+k];
	}
	return {l-ans0, l-ans1, l-ans2, l-ans3};
}*/
asm("\n"
"	.p2align 4\n"
"	.globl	_Z8get_sim4iiiiii\n"
"	.type	_Z8get_sim4iiiiii, @function\n"
"_Z8get_sim4iiiiii:\n"
".myLFB9901:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r15\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	.cfi_offset 15, -24\n"
"	.cfi_offset 14, -32\n"
"	.cfi_offset 13, -40\n"
"	movl	%ecx, %r13d\n"
"	pushq	%r12\n"
"	.cfi_offset 12, -48\n"
"	movq	%rdi, %r12\n"
"	pushq	%rbx\n"
"	andq	$-32, %rsp\n"
"	.cfi_offset 3, -56\n"
"	movl	%esi, -4(%rsp)\n"
"	movl	16(%rbp), %ecx\n"
"	movl	%edx, -8(%rsp)\n"
"	testl	%ecx, %ecx\n"
"	jle	.myL129\n"
"	leal	-1(%rcx), %eax\n"
"	cmpl	$14, %eax\n"
"	jbe	.myL130\n"
"	leaq	a(%rip), %rax\n"
"	movl	%edx, %edi\n"
"	vpxor	%xmm1, %xmm1, %xmm1\n"
"	movslq	%esi, %rdx\n"
"	leaq	(%rax,%rdx,2), %r15\n"
"	vmovdqa	%ymm1, %ymm3\n"
"	vmovdqa	%ymm1, %ymm4\n"
"	movslq	%edi, %rdx\n"
"	movl	%ecx, %edi\n"
"	leaq	(%rax,%rdx,2), %r14\n"
"	vmovdqa	%ymm1, %ymm2\n"
"	movslq	%r13d, %rdx\n"
"	shrl	$4, %edi\n"
"	leaq	(%rax,%rdx,2), %rbx\n"
"	movslq	%r8d, %rdx\n"
"	salq	$5, %rdi\n"
"	leaq	(%rax,%rdx,2), %r11\n"
"	movslq	%r9d, %rdx\n"
"	leaq	-32(%rdi), %rsi\n"
"	leaq	(%rax,%rdx,2), %r10\n"
"	xorl	%edx, %edx\n"
"	shrq	$5, %rsi\n"
"	addq	$1, %rsi\n"
"	andl	$3, %esi\n"
"	je	.myL125\n"
"	cmpq	$1, %rsi\n"
"	je	.myL141\n"
"	cmpq	$2, %rsi\n"
"	je	.myL142\n"
"	vmovdqu	(%r15), %ymm0\n"
"	movl	$32, %edx\n"
"	vpcmpeqw	(%r14), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm1, %ymm2\n"
"	vpcmpeqw	(%rbx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm1, %ymm4\n"
"	vpcmpeqw	(%r11), %ymm0, %ymm5\n"
"	vpcmpeqw	(%r10), %ymm0, %ymm0\n"
"	vpsubw	%ymm5, %ymm1, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
".myL142:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	(%rbx,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm5\n"
"	vpcmpeqw	(%r10,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
".myL141:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	(%rbx,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm5\n"
"	vpcmpeqw	(%r10,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rdx, %rdi\n"
"	je	.myL147\n"
".myL125:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	leaq	32(%rdx), %rsi\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	(%rbx,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm5\n"
"	vpcmpeqw	(%r10,%rdx), %ymm0, %ymm0\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	32(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	32(%r14,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	32(%rbx,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	32(%r11,%rdx), %ymm0, %ymm5\n"
"	vpcmpeqw	32(%r10,%rdx), %ymm0, %ymm0\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	64(%r14,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	64(%rbx,%rdx), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	64(%r11,%rdx), %ymm0, %ymm5\n"
"	vpcmpeqw	64(%r10,%rdx), %ymm0, %ymm0\n"
"	leaq	96(%rsi), %rdx\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r15,%rsi), %ymm0\n"
"	vpcmpeqw	64(%r14,%rsi), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm2, %ymm2\n"
"	vpcmpeqw	64(%rbx,%rsi), %ymm0, %ymm5\n"
"	vpsubw	%ymm5, %ymm4, %ymm4\n"
"	vpcmpeqw	64(%r11,%rsi), %ymm0, %ymm5\n"
"	vpcmpeqw	64(%r10,%rsi), %ymm0, %ymm0\n"
"	vpsubw	%ymm5, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rdx, %rdi\n"
"	jne	.myL125\n"
".myL147:\n"
"	vmovdqa	%xmm1, %xmm0\n"
"	vextracti128	$0x1, %ymm1, %xmm1\n"
"	movl	%ecx, %ebx\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	andl	$-16, %ebx\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	movl	%ebx, %edx\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %esi\n"
"	vmovdqa	%xmm3, %xmm0\n"
"	vextracti128	$0x1, %ymm3, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edi\n"
"	vmovdqa	%xmm4, %xmm0\n"
"	vextracti128	$0x1, %ymm4, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r10d\n"
"	vextracti128	$0x1, %ymm2, %xmm0\n"
"	vpaddw	%xmm2, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r11d\n"
"	cmpl	%ecx, %ebx\n"
"	je	.myL153\n"
"	vzeroupper\n"
".myL124:\n"
"	movl	%ecx, %r15d\n"
"	subl	%ebx, %r15d\n"
"	leal	-1(%r15), %r14d\n"
"	cmpl	$6, %r14d\n"
"	jbe	.myL127\n"
"	movslq	-4(%rsp), %r14\n"
"	vpcmpeqw	%xmm4, %xmm4, %xmm4\n"
"	vpxor	%xmm0, %xmm0, %xmm0\n"
"	vpsubw	%xmm4, %xmm0, %xmm4\n"
"	addq	%rbx, %r14\n"
"	vmovdqu	(%rax,%r14,2), %xmm0\n"
"	movslq	-8(%rsp), %r14\n"
"	addq	%rbx, %r14\n"
"	vpcmpeqw	(%rax,%r14,2), %xmm0, %xmm2\n"
"	movslq	%r13d, %r14\n"
"	addq	%rbx, %r14\n"
"	vpcmpeqw	(%rax,%r14,2), %xmm0, %xmm3\n"
"	movslq	%r8d, %r14\n"
"	addq	%rbx, %r14\n"
"	vpand	%xmm4, %xmm2, %xmm2\n"
"	vpcmpeqw	(%rax,%r14,2), %xmm0, %xmm1\n"
"	movslq	%r9d, %r14\n"
"	addq	%rbx, %r14\n"
"	vpand	%xmm4, %xmm3, %xmm3\n"
"	vpcmpeqw	(%rax,%r14,2), %xmm0, %xmm0\n"
"	vpand	%xmm4, %xmm1, %xmm1\n"
"	vpand	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ebx\n"
"	vpsrldq	$8, %xmm1, %xmm0\n"
"	vpaddw	%xmm0, %xmm1, %xmm0\n"
"	addl	%ebx, %esi\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ebx\n"
"	vpsrldq	$8, %xmm3, %xmm0\n"
"	vpaddw	%xmm0, %xmm3, %xmm0\n"
"	addl	%ebx, %edi\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ebx\n"
"	vpsrldq	$8, %xmm2, %xmm0\n"
"	vpaddw	%xmm0, %xmm2, %xmm0\n"
"	addl	%ebx, %r10d\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ebx\n"
"	addl	%ebx, %r11d\n"
"	movl	%r15d, %ebx\n"
"	andl	$-8, %ebx\n"
"	addl	%ebx, %edx\n"
"	cmpl	%ebx, %r15d\n"
"	je	.myL126\n"
".myL127:\n"
"	movl	-4(%rsp), %r15d\n"
"	movl	-8(%rsp), %r14d\n"
"	leal	(%r15,%rdx), %ebx\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	movslq	%ebx, %rbx\n"
"	movzwl	(%rax,%rbx,2), %ebx\n"
"	cmpw	%bx, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	leal	0(%r13,%rdx), %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%bx, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	leal	(%r8,%rdx), %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%bx, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %edi\n"
"	leal	(%r9,%rdx), %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%bx, (%rax,%r14,2)\n"
"	sete	%bl\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	leal	1(%rdx), %ebx\n"
"	cmpl	%ebx, %ecx\n"
"	jle	.myL126\n"
"	leal	(%r15,%rbx), %r14d\n"
"	movl	-8(%rsp), %r15d\n"
"	movslq	%r14d, %r14\n"
"	addl	%ebx, %r15d\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r11d\n"
"	leal	0(%r13,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r10d\n"
"	leal	(%r8,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r9d, %ebx\n"
"	movslq	%ebx, %rbx\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %edi\n"
"	cmpw	%r14w, (%rax,%rbx,2)\n"
"	sete	%bl\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	leal	2(%rdx), %ebx\n"
"	cmpl	%ebx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r15d\n"
"	leal	(%r15,%rbx), %r14d\n"
"	movl	-8(%rsp), %r15d\n"
"	movslq	%r14d, %r14\n"
"	addl	%ebx, %r15d\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r11d\n"
"	leal	0(%r13,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r10d\n"
"	leal	(%r8,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r9d, %ebx\n"
"	movslq	%ebx, %rbx\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %edi\n"
"	cmpw	%r14w, (%rax,%rbx,2)\n"
"	sete	%bl\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	leal	3(%rdx), %ebx\n"
"	cmpl	%ebx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r15d\n"
"	leal	(%r15,%rbx), %r14d\n"
"	movl	-8(%rsp), %r15d\n"
"	movslq	%r14d, %r14\n"
"	addl	%ebx, %r15d\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r11d\n"
"	leal	0(%r13,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r10d\n"
"	leal	(%r8,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r9d, %ebx\n"
"	movslq	%ebx, %rbx\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %edi\n"
"	cmpw	%r14w, (%rax,%rbx,2)\n"
"	sete	%bl\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	leal	4(%rdx), %ebx\n"
"	cmpl	%ebx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r15d\n"
"	leal	(%r15,%rbx), %r14d\n"
"	movl	-8(%rsp), %r15d\n"
"	movslq	%r14d, %r14\n"
"	addl	%ebx, %r15d\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r11d\n"
"	leal	0(%r13,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r10d\n"
"	leal	(%r8,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r9d, %ebx\n"
"	movslq	%ebx, %rbx\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %edi\n"
"	cmpw	%r14w, (%rax,%rbx,2)\n"
"	sete	%bl\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	leal	5(%rdx), %ebx\n"
"	cmpl	%ebx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r15d\n"
"	leal	(%r15,%rbx), %r14d\n"
"	movl	-8(%rsp), %r15d\n"
"	movslq	%r14d, %r14\n"
"	addl	%ebx, %r15d\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r11d\n"
"	leal	0(%r13,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %r10d\n"
"	leal	(%r8,%rbx), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r9d, %ebx\n"
"	movslq	%ebx, %rbx\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %edi\n"
"	cmpw	%r14w, (%rax,%rbx,2)\n"
"	sete	%bl\n"
"	addl	$6, %edx\n"
"	movzbl	%bl, %ebx\n"
"	addl	%ebx, %esi\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %ebx\n"
"	movl	-8(%rsp), %r14d\n"
"	addl	%edx, %ebx\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	movslq	%ebx, %rbx\n"
"	movzwl	(%rax,%rbx,2), %ebx\n"
"	cmpw	%bx, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	%edx, %r13d\n"
"	movslq	%r13d, %r13\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	cmpw	%bx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	addl	%edx, %r8d\n"
"	movslq	%r8d, %r8\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r10d\n"
"	cmpw	%bx, (%rax,%r8,2)\n"
"	sete	%r8b\n"
"	addl	%r9d, %edx\n"
"	movzbl	%r8b, %r8d\n"
"	movslq	%edx, %rdx\n"
"	addl	%r8d, %edi\n"
"	cmpw	%bx, (%rax,%rdx,2)\n"
"	sete	%al\n"
"	movzbl	%al, %eax\n"
"	addl	%eax, %esi\n"
".myL126:\n"
"	movl	%ecx, %eax\n"
"	movl	%ecx, %edx\n"
"	movl	%ecx, %r8d\n"
"	movswl	%r11w, %r11d\n"
"	movswl	%r10w, %r10d\n"
"	movswl	%di, %edi\n"
"	movswl	%si, %esi\n"
"	subl	%r11d, %eax\n"
"	subl	%r10d, %edx\n"
"	subl	%edi, %r8d\n"
"	subl	%esi, %ecx\n"
".myL123:\n"
"	movw	%ax, 6(%r12)\n"
"	movq	%r12, %rax\n"
"	movw	%cx, (%r12)\n"
"	movw	%r8w, 2(%r12)\n"
"	movw	%dx, 4(%r12)\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL129:\n"
"	.cfi_restore_state\n"
"	movl	%ecx, %r8d\n"
"	movl	%ecx, %edx\n"
"	movl	%ecx, %eax\n"
"	jmp	.myL123\n"
".myL130:\n"
"	xorl	%ebx, %ebx\n"
"	xorl	%edx, %edx\n"
"	xorl	%esi, %esi\n"
"	xorl	%edi, %edi\n"
"	xorl	%r10d, %r10d\n"
"	xorl	%r11d, %r11d\n"
"	leaq	a(%rip), %rax\n"
"	jmp	.myL124\n"
".myL153:\n"
"	vzeroupper\n"
"	jmp	.myL126\n"
"	.cfi_endproc\n"
".myLFE9901:\n"
"	.size	_Z8get_sim4iiiiii, .-_Z8get_sim4iiiiii\n"
);

short sim_cnt[N+1];
short sim_pre[N+2];
short sim[N];

__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up_from_other2(int qr0, int qr1, int st)
{
	char *ans0 = ::ansc[qr0];
	char *ans1 = ::ansc[qr1];
	short val0 = query[qr0]+1;
	short val1 = query[qr1]+1;
	while (st%32) {
		ans0[st] += sim[st] < val0;
		ans1[st] += sim[st] < val1;
		++st;
	}
	typedef short ymms __attribute__((vector_size(32),aligned(32)));
	typedef char ymmc __attribute__((vector_size(32),aligned(32)));
	ymmc *vans0 = (ymmc *)ans0;
	ymmc *vans1 = (ymmc *)ans1;
	ymms *vsim = (ymms *)sim;
	for (int i = st/32; i < N/32; ++i) {
		ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1];
		ymms t00 = tmp0 < val0;
		ymms t10 = tmp1 < val0;
		ymms t01 = tmp0 < val1;
		ymms t11 = tmp1 < val1;
		ymmc x0 = __builtin_shuffle((ymmc)t00, (ymmc)t10, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
		ymmc x1 = __builtin_shuffle((ymmc)t01, (ymmc)t11, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
		vans0[i] -= x0;
		vans1[i] -= x1;
	}
}

__attribute__((optimize("O3,unroll-loops"),target("avx")))
void flush_ansc()
{
	Loop (i,0,q) Loop (j,0,n) {
		ans[i][j] += (unsigned char)ansc[i][j];
		ansc[i][j] = 0;
	}
}

void process(int i)
{
	if (i%255 == 0)
		flush_ansc();
	memset(sim_cnt, 0, sizeof(sim_cnt));
	for (int j = i+1; j+4 <= n-l+1; j += 4) {
		auto [t0, t1, t2, t3] = get_sim4(i, j, j+1, j+2, j+3, l);
		sim[j+0] = t0; sim[j+1] = t1; sim[j+2] = t2; sim[j+3] = t3;
		sim_cnt[t0]++; sim_cnt[t1]++; sim_cnt[t2]++; sim_cnt[t3]++;
	}
	Loop (j, n-l+1 - (n-l+1 - (i+1))%4, n-l+1) {
		sim[j] = get_sim(i, j, l);
		sim_cnt[sim[j]]++;
	}
	sim_pre[0] = 0;
	Loop (j,0,l+1)
		sim_pre[j+1] = sim_pre[j] + sim_cnt[j];
	Loop (j,0,q)
		ans[j][i] += sim_pre[query[j]+1];
	for (int j = 0; j < q; j += 2)
		up_from_other2(j, j+1, i+1);
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	vector<int> cmper;
	cin >> n >> l;
	Loop (i,0,n) {
		cin >> noncmp_a[i];
		cmper.push_back(noncmp_a[i]);
	}
	cin >> q;
	Loop (i,0,q)
		cin >> query[i];
	sort(cmper.begin(), cmper.end());
	cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin());
	Loop (i,0,n) {
		a[i] = lower_bound(cmper.begin(), cmper.end(),
		                   noncmp_a[i]) - cmper.begin();
	}
	Loop (i,0,n-l+1)
		process(i);
	flush_ansc();
	Loop (i,0,q) {
		Loop (j,0,n-l+1)
			cout << ans[i][j] << ' ';
		cout << '\n';
	}
}

# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 2 ms 596 KB Output is correct
3 Correct 2 ms 584 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 2 ms 468 KB Output is correct
7 Correct 1 ms 460 KB Output is correct
8 Correct 4 ms 580 KB Output is correct
9 Correct 3 ms 596 KB Output is correct
10 Correct 6 ms 580 KB Output is correct
11 Correct 7 ms 596 KB Output is correct
12 Correct 5 ms 596 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 2 ms 596 KB Output is correct
3 Correct 2 ms 584 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 2 ms 468 KB Output is correct
7 Correct 1 ms 460 KB Output is correct
8 Correct 4 ms 580 KB Output is correct
9 Correct 3 ms 596 KB Output is correct
10 Correct 6 ms 580 KB Output is correct
11 Correct 7 ms 596 KB Output is correct
12 Correct 5 ms 596 KB Output is correct
13 Correct 21 ms 596 KB Output is correct
14 Correct 51 ms 764 KB Output is correct
15 Correct 42 ms 608 KB Output is correct
16 Correct 54 ms 764 KB Output is correct
17 Correct 56 ms 756 KB Output is correct
18 Correct 57 ms 752 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 257 ms 596 KB Output is correct
2 Correct 378 ms 596 KB Output is correct
3 Correct 256 ms 596 KB Output is correct
4 Correct 663 ms 668 KB Output is correct
5 Correct 2563 ms 664 KB Output is correct
6 Correct 1484 ms 664 KB Output is correct
7 Correct 2514 ms 660 KB Output is correct
8 Execution timed out 3004 ms 660 KB Time limit exceeded
9 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 257 ms 596 KB Output is correct
2 Correct 378 ms 596 KB Output is correct
3 Correct 256 ms 596 KB Output is correct
4 Correct 663 ms 668 KB Output is correct
5 Correct 2563 ms 664 KB Output is correct
6 Correct 1484 ms 664 KB Output is correct
7 Correct 2514 ms 660 KB Output is correct
8 Execution timed out 3004 ms 660 KB Time limit exceeded
9 Halted 0 ms 0 KB -
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 2 ms 596 KB Output is correct
3 Correct 2 ms 584 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 2 ms 468 KB Output is correct
7 Correct 1 ms 460 KB Output is correct
8 Correct 4 ms 580 KB Output is correct
9 Correct 3 ms 596 KB Output is correct
10 Correct 6 ms 580 KB Output is correct
11 Correct 7 ms 596 KB Output is correct
12 Correct 5 ms 596 KB Output is correct
13 Correct 21 ms 596 KB Output is correct
14 Correct 51 ms 764 KB Output is correct
15 Correct 42 ms 608 KB Output is correct
16 Correct 54 ms 764 KB Output is correct
17 Correct 56 ms 756 KB Output is correct
18 Correct 57 ms 752 KB Output is correct
19 Correct 257 ms 596 KB Output is correct
20 Correct 378 ms 596 KB Output is correct
21 Correct 256 ms 596 KB Output is correct
22 Correct 663 ms 668 KB Output is correct
23 Correct 2563 ms 664 KB Output is correct
24 Correct 1484 ms 664 KB Output is correct
25 Correct 2514 ms 660 KB Output is correct
26 Execution timed out 3004 ms 660 KB Time limit exceeded
27 Halted 0 ms 0 KB -