Submission #652334

#TimeUsernameProblemLanguageResultExecution timeMemory
652334ymmLottery (CEOI18_lot)C++17
100 / 100
2741 ms7444 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 10016;
const int Q = 100;
int mylist[N];
short ans[Q][N];
char ansc[Q][N];
short query[Q];
int n, q, l;

int noncmp_a[N];
short a[N];

__attribute__((optimize("O3,unroll-loops"),target("avx")))
short get_sim(int i, int j, int l)
{
	short ans = 0;
	for (int k = 0; k < l; ++k)
		ans += a[i+k] == a[j+k];
	return l-ans;
}
//__attribute__((optimize("O3,unroll-loops"),target("avx2")))
tuple<short,short,short,short,short,short,short,short> get_sim8(int i, int j0, int j1, int j2, int j3, int j4, int j5, int j6, int j7, int l);
/*{
	short ans0 = 0, ans1 = 0, ans2 = 0, ans3 = 0, ans4 = 0, ans5 = 0, ans6 = 0, ans7 = 0;
	for (int k = 0; k < l; ++k) {
		ans0 += a[i+k] == a[j0+k];
		ans1 += a[i+k] == a[j1+k];
		ans2 += a[i+k] == a[j2+k];
		ans3 += a[i+k] == a[j3+k];
		ans4 += a[i+k] == a[j4+k];
		ans5 += a[i+k] == a[j5+k];
		ans6 += a[i+k] == a[j6+k];
		ans7 += a[i+k] == a[j7+k];
	}
	return {l-ans0, l-ans1, l-ans2, l-ans3, l-ans4, l-ans5, l-ans6, l-ans7};
}*/
asm("\n"
"	.p2align 4\n"
"	.globl	_Z8get_sim8iiiiiiiiii\n"
"	.type	_Z8get_sim8iiiiiiiiii, @function\n"
"_Z8get_sim8iiiiiiiiii:\n"
".myLFB9901:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r15\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 15, -24\n"
"	.cfi_offset 14, -32\n"
"	.cfi_offset 13, -40\n"
"	.cfi_offset 12, -48\n"
"	.cfi_offset 3, -56\n"
"	movl	%ecx, %ebx\n"
"	andq	$-32, %rsp\n"
"	movl	%ecx, -12(%rsp)\n"
"	movl	48(%rbp), %ecx\n"
"	movq	%rdi, -32(%rsp)\n"
"	movl	%esi, -4(%rsp)\n"
"	movl	%edx, -8(%rsp)\n"
"	movl	%r8d, -16(%rsp)\n"
"	movl	%r9d, -20(%rsp)\n"
"	testl	%ecx, %ecx\n"
"	jle	.myL129\n"
"	leal	-1(%rcx), %eax\n"
"	cmpl	$14, %eax\n"
"	jbe	.myL130\n"
"	leaq	a(%rip), %rax\n"
"	movl	%edx, %edi\n"
"	movslq	%esi, %rdx\n"
"	movl	%ecx, %r14d\n"
"	leaq	(%rax,%rdx,2), %r13\n"
"	movslq	%edi, %rdx\n"
"	shrl	$4, %r14d\n"
"	vpxor	%xmm1, %xmm1, %xmm1\n"
"	leaq	(%rax,%rdx,2), %r12\n"
"	movslq	%ebx, %rdx\n"
"	vmovdqa	%ymm1, %ymm4\n"
"	movq	%r14, %r15\n"
"	leaq	(%rax,%rdx,2), %rbx\n"
"	movslq	%r8d, %rdx\n"
"	salq	$5, %r15\n"
"	vmovdqa	%ymm1, %ymm5\n"
"	leaq	(%rax,%rdx,2), %r11\n"
"	vmovdqa	%ymm1, %ymm6\n"
"	vmovdqa	%ymm1, %ymm7\n"
"	movslq	%r9d, %rdx\n"
"	leaq	(%rax,%rdx,2), %r10\n"
"	movslq	16(%rbp), %rdx\n"
"	vmovdqa	%ymm1, %ymm8\n"
"	vmovdqa	%ymm1, %ymm2\n"
"	vmovdqa	%ymm1, %ymm3\n"
"	leaq	(%rax,%rdx,2), %r9\n"
"	movslq	24(%rbp), %rdx\n"
"	leaq	(%rax,%rdx,2), %r8\n"
"	movslq	32(%rbp), %rdx\n"
"	leaq	(%rax,%rdx,2), %rdi\n"
"	movslq	40(%rbp), %rdx\n"
"	leaq	(%rax,%rdx,2), %rsi\n"
"	xorl	%edx, %edx\n"
"	andl	$1, %r14d\n"
"	je	.myL125\n"
"	vmovdqu	0(%r13), %ymm0\n"
"	movl	$32, %edx\n"
"	vpcmpeqw	(%r12), %ymm0, %ymm3\n"
"	vpcmpeqw	(%rbx), %ymm0, %ymm2\n"
"	vpcmpeqw	(%r11), %ymm0, %ymm8\n"
"	vpcmpeqw	(%r10), %ymm0, %ymm7\n"
"	vpcmpeqw	(%r9), %ymm0, %ymm6\n"
"	vpcmpeqw	(%r8), %ymm0, %ymm5\n"
"	vpcmpeqw	(%rdi), %ymm0, %ymm4\n"
"	vpcmpeqw	(%rsi), %ymm0, %ymm0\n"
"	vpsubw	%ymm3, %ymm1, %ymm3\n"
"	vpsubw	%ymm2, %ymm1, %ymm2\n"
"	vpsubw	%ymm8, %ymm1, %ymm8\n"
"	vpsubw	%ymm7, %ymm1, %ymm7\n"
"	vpsubw	%ymm6, %ymm1, %ymm6\n"
"	vpsubw	%ymm5, %ymm1, %ymm5\n"
"	vpsubw	%ymm4, %ymm1, %ymm4\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	$32, %r15\n"
"	je	.myL137\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL125:\n"
"	vmovdqu	0(%r13,%rdx), %ymm0\n"
"	vpcmpeqw	(%r12,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm3, %ymm3\n"
"	vpcmpeqw	(%rbx,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm2, %ymm2\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm8, %ymm8\n"
"	vpcmpeqw	(%r10,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm7, %ymm7\n"
"	vpcmpeqw	(%r9,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm6, %ymm6\n"
"	vpcmpeqw	(%r8,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm5, %ymm5\n"
"	vpcmpeqw	(%rdi,%rdx), %ymm0, %ymm9\n"
"	vpcmpeqw	(%rsi,%rdx), %ymm0, %ymm0\n"
"	vpsubw	%ymm9, %ymm4, %ymm4\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	32(%r13,%rdx), %ymm0\n"
"	vpcmpeqw	32(%r12,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm3, %ymm3\n"
"	vpcmpeqw	32(%rbx,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm2, %ymm2\n"
"	vpcmpeqw	32(%r11,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm8, %ymm8\n"
"	vpcmpeqw	32(%r10,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm7, %ymm7\n"
"	vpcmpeqw	32(%r9,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm6, %ymm6\n"
"	vpcmpeqw	32(%r8,%rdx), %ymm0, %ymm9\n"
"	vpsubw	%ymm9, %ymm5, %ymm5\n"
"	vpcmpeqw	32(%rdi,%rdx), %ymm0, %ymm9\n"
"	vpcmpeqw	32(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$64, %rdx\n"
"	vpsubw	%ymm9, %ymm4, %ymm4\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rdx, %r15\n"
"	jne	.myL125\n"
".myL137:\n"
"	vmovdqa	%xmm1, %xmm0\n"
"	vextracti128	$0x1, %ymm1, %xmm1\n"
"	movl	%ecx, %edx\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	andl	$-16, %edx\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	movl	%edx, %esi\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edi\n"
"	vmovdqa	%xmm4, %xmm0\n"
"	vextracti128	$0x1, %ymm4, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r15d\n"
"	vmovdqa	%xmm5, %xmm0\n"
"	vextracti128	$0x1, %ymm5, %xmm5\n"
"	vpaddw	%xmm5, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r8d\n"
"	vmovdqa	%xmm6, %xmm0\n"
"	vextracti128	$0x1, %ymm6, %xmm6\n"
"	vpaddw	%xmm6, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r9d\n"
"	vmovdqa	%xmm7, %xmm0\n"
"	vextracti128	$0x1, %ymm7, %xmm7\n"
"	vpaddw	%xmm7, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r10d\n"
"	vextracti128	$0x1, %ymm8, %xmm0\n"
"	vpaddw	%xmm8, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r11d\n"
"	vextracti128	$0x1, %ymm2, %xmm0\n"
"	vpaddw	%xmm2, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ebx\n"
"	vextracti128	$0x1, %ymm3, %xmm0\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r12d\n"
"	cmpl	%ecx, %edx\n"
"	je	.myL142\n"
"	vzeroupper\n"
".myL124:\n"
"	movl	%ecx, %r14d\n"
"	subl	%edx, %r14d\n"
"	leal	-1(%r14), %r13d\n"
"	cmpl	$6, %r13d\n"
"	jbe	.myL127\n"
"	movslq	-4(%rsp), %r13\n"
"	vpcmpeqw	%xmm8, %xmm8, %xmm8\n"
"	vpxor	%xmm0, %xmm0, %xmm0\n"
"	vpsubw	%xmm8, %xmm0, %xmm8\n"
"	addq	%rdx, %r13\n"
"	vmovdqu	(%rax,%r13,2), %xmm0\n"
"	movslq	-8(%rsp), %r13\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm1\n"
"	movslq	-12(%rsp), %r13\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm2\n"
"	movslq	-16(%rsp), %r13\n"
"	vpand	%xmm8, %xmm1, %xmm1\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm3\n"
"	movslq	-20(%rsp), %r13\n"
"	vpand	%xmm8, %xmm2, %xmm2\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm4\n"
"	movslq	16(%rbp), %r13\n"
"	vpand	%xmm8, %xmm3, %xmm3\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm5\n"
"	movslq	24(%rbp), %r13\n"
"	vpand	%xmm8, %xmm4, %xmm4\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm6\n"
"	movslq	32(%rbp), %r13\n"
"	vpand	%xmm8, %xmm5, %xmm5\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm7\n"
"	movslq	40(%rbp), %r13\n"
"	vpand	%xmm8, %xmm6, %xmm6\n"
"	addq	%rdx, %r13\n"
"	vpcmpeqw	(%rax,%r13,2), %xmm0, %xmm0\n"
"	vpand	%xmm8, %xmm7, %xmm7\n"
"	vpand	%xmm8, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm8\n"
"	vpaddw	%xmm8, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm8\n"
"	vpaddw	%xmm8, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm8\n"
"	vpaddw	%xmm8, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm7, %xmm0\n"
"	vpaddw	%xmm0, %xmm7, %xmm0\n"
"	addl	%edx, %edi\n"
"	vpsrldq	$4, %xmm0, %xmm7\n"
"	vpaddw	%xmm7, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm7\n"
"	vpaddw	%xmm7, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm6, %xmm0\n"
"	vpaddw	%xmm0, %xmm6, %xmm0\n"
"	addl	%edx, %r15d\n"
"	vpsrldq	$4, %xmm0, %xmm6\n"
"	vpaddw	%xmm6, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm6\n"
"	vpaddw	%xmm6, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm5, %xmm0\n"
"	vpaddw	%xmm0, %xmm5, %xmm0\n"
"	addl	%edx, %r8d\n"
"	vpsrldq	$4, %xmm0, %xmm5\n"
"	vpaddw	%xmm5, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm5\n"
"	vpaddw	%xmm5, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm4, %xmm0\n"
"	vpaddw	%xmm0, %xmm4, %xmm0\n"
"	addl	%edx, %r9d\n"
"	vpsrldq	$4, %xmm0, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm4\n"
"	vpaddw	%xmm4, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm3, %xmm0\n"
"	vpaddw	%xmm0, %xmm3, %xmm0\n"
"	addl	%edx, %r10d\n"
"	vpsrldq	$4, %xmm0, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm2, %xmm0\n"
"	vpaddw	%xmm0, %xmm2, %xmm0\n"
"	addl	%edx, %r11d\n"
"	vpsrldq	$4, %xmm0, %xmm2\n"
"	vpaddw	%xmm2, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm2\n"
"	vpaddw	%xmm2, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vpsrldq	$8, %xmm1, %xmm0\n"
"	vpaddw	%xmm0, %xmm1, %xmm0\n"
"	addl	%edx, %ebx\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	addl	%edx, %r12d\n"
"	movl	%r14d, %edx\n"
"	andl	$-8, %edx\n"
"	addl	%edx, %esi\n"
"	cmpl	%edx, %r14d\n"
"	je	.myL126\n"
".myL127:\n"
"	movl	-4(%rsp), %r14d\n"
"	movl	-8(%rsp), %r13d\n"
"	leal	(%r14,%rsi), %edx\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	movslq	%edx, %rdx\n"
"	movzwl	(%rax,%rdx,2), %edx\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r12d\n"
"	movl	-12(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %ebx\n"
"	movl	-16(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r11d\n"
"	movl	-20(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r10d\n"
"	movl	16(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r9d\n"
"	movl	24(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r8d\n"
"	movl	32(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r15d\n"
"	movl	40(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%dl\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	leal	1(%rsi), %edx\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	leal	(%r14,%rdx), %r13d\n"
"	movl	-8(%rsp), %r14d\n"
"	movslq	%r13d, %r13\n"
"	addl	%edx, %r14d\n"
"	movzwl	(%rax,%r13,2), %r13d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r12d\n"
"	movl	-12(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ebx\n"
"	movl	-16(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	movl	-20(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	movl	16(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r9d\n"
"	movl	24(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r8d\n"
"	movl	32(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	40(%rbp), %edx\n"
"	movslq	%edx, %rdx\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r15d\n"
"	cmpw	%r13w, (%rax,%rdx,2)\n"
"	sete	%dl\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	leal	2(%rsi), %edx\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r14d\n"
"	leal	(%r14,%rdx), %r13d\n"
"	movl	-8(%rsp), %r14d\n"
"	movslq	%r13d, %r13\n"
"	addl	%edx, %r14d\n"
"	movzwl	(%rax,%r13,2), %r13d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r12d\n"
"	movl	-12(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ebx\n"
"	movl	-16(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	movl	-20(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	movl	16(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r9d\n"
"	movl	24(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r8d\n"
"	movl	32(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	40(%rbp), %edx\n"
"	movslq	%edx, %rdx\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r15d\n"
"	cmpw	%r13w, (%rax,%rdx,2)\n"
"	sete	%dl\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	leal	3(%rsi), %edx\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r14d\n"
"	leal	(%r14,%rdx), %r13d\n"
"	movl	-8(%rsp), %r14d\n"
"	movslq	%r13d, %r13\n"
"	addl	%edx, %r14d\n"
"	movzwl	(%rax,%r13,2), %r13d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r12d\n"
"	movl	-12(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ebx\n"
"	movl	-16(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	movl	-20(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	movl	16(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r9d\n"
"	movl	24(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r8d\n"
"	movl	32(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	40(%rbp), %edx\n"
"	movslq	%edx, %rdx\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r15d\n"
"	cmpw	%r13w, (%rax,%rdx,2)\n"
"	sete	%dl\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	leal	4(%rsi), %edx\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r14d\n"
"	leal	(%r14,%rdx), %r13d\n"
"	movl	-8(%rsp), %r14d\n"
"	movslq	%r13d, %r13\n"
"	addl	%edx, %r14d\n"
"	movzwl	(%rax,%r13,2), %r13d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r12d\n"
"	movl	-12(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ebx\n"
"	movl	-16(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	movl	-20(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	movl	16(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r9d\n"
"	movl	24(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r8d\n"
"	movl	32(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	40(%rbp), %edx\n"
"	movslq	%edx, %rdx\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r15d\n"
"	cmpw	%r13w, (%rax,%rdx,2)\n"
"	sete	%dl\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	leal	5(%rsi), %edx\n"
"	cmpl	%edx, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %r14d\n"
"	leal	(%r14,%rdx), %r13d\n"
"	movl	-8(%rsp), %r14d\n"
"	movslq	%r13d, %r13\n"
"	addl	%edx, %r14d\n"
"	movzwl	(%rax,%r13,2), %r13d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r12d\n"
"	movl	-12(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ebx\n"
"	movl	-16(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r11d\n"
"	movl	-20(%rsp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r10d\n"
"	movl	16(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r9d\n"
"	movl	24(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r8d\n"
"	movl	32(%rbp), %r14d\n"
"	addl	%edx, %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r13w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	addl	40(%rbp), %edx\n"
"	movslq	%edx, %rdx\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %r15d\n"
"	cmpw	%r13w, (%rax,%rdx,2)\n"
"	sete	%dl\n"
"	addl	$6, %esi\n"
"	movzbl	%dl, %edx\n"
"	addl	%edx, %edi\n"
"	cmpl	%esi, %ecx\n"
"	jle	.myL126\n"
"	movl	-4(%rsp), %edx\n"
"	movl	-8(%rsp), %r13d\n"
"	addl	%esi, %edx\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	movslq	%edx, %rdx\n"
"	movzwl	(%rax,%rdx,2), %edx\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r12d\n"
"	movl	-12(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %ebx\n"
"	movl	-16(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r11d\n"
"	movl	-20(%rsp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r10d\n"
"	movl	16(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r9d\n"
"	movl	24(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	movzbl	%r13b, %r13d\n"
"	addl	%r13d, %r8d\n"
"	movl	32(%rbp), %r13d\n"
"	addl	%esi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	cmpw	%dx, (%rax,%r13,2)\n"
"	sete	%r13b\n"
"	addl	40(%rbp), %esi\n"
"	movzbl	%r13b, %r13d\n"
"	movslq	%esi, %rsi\n"
"	addl	%r13d, %r15d\n"
"	cmpw	%dx, (%rax,%rsi,2)\n"
"	sete	%al\n"
"	movzbl	%al, %eax\n"
"	addl	%eax, %edi\n"
".myL126:\n"
"	movswl	%r11w, %r11d\n"
"	movl	%ecx, %esi\n"
"	movswl	%r10w, %r10d\n"
"	movswl	%r9w, %r9d\n"
"	subl	%r11d, %esi\n"
"	movl	%ecx, %r11d\n"
"	movswl	%r8w, %r8d\n"
"	movl	%ecx, %eax\n"
"	subl	%r10d, %r11d\n"
"	movl	%ecx, %r10d\n"
"	movl	%ecx, %edx\n"
"	movswl	%r12w, %r12d\n"
"	subl	%r9d, %r10d\n"
"	movl	%ecx, %r9d\n"
"	movswl	%bx, %ebx\n"
"	movswl	%r15w, %r15d\n"
"	subl	%r8d, %r9d\n"
"	movswl	%di, %edi\n"
"	movl	%ecx, %r8d\n"
"	subl	%r12d, %eax\n"
"	subl	%ebx, %edx\n"
"	subl	%r15d, %r8d\n"
"	subl	%edi, %ecx\n"
".myL123:\n"
"	movq	-32(%rsp), %rbx\n"
"	movw	%ax, 14(%rbx)\n"
"	movq	%rbx, %rax\n"
"	movw	%cx, (%rbx)\n"
"	movw	%r8w, 2(%rbx)\n"
"	movw	%r9w, 4(%rbx)\n"
"	movw	%r10w, 6(%rbx)\n"
"	movw	%r11w, 8(%rbx)\n"
"	movw	%si, 10(%rbx)\n"
"	movw	%dx, 12(%rbx)\n"
"	leaq	-40(%rbp), %rsp\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL129:\n"
"	.cfi_restore_state\n"
"	movl	%ecx, %r8d\n"
"	movl	%ecx, %r9d\n"
"	movl	%ecx, %r10d\n"
"	movl	%ecx, %r11d\n"
"	movl	%ecx, %esi\n"
"	movl	%ecx, %edx\n"
"	movl	%ecx, %eax\n"
"	jmp	.myL123\n"
".myL130:\n"
"	xorl	%edx, %edx\n"
"	xorl	%esi, %esi\n"
"	xorl	%edi, %edi\n"
"	xorl	%r15d, %r15d\n"
"	xorl	%r8d, %r8d\n"
"	xorl	%r9d, %r9d\n"
"	xorl	%r10d, %r10d\n"
"	xorl	%r11d, %r11d\n"
"	xorl	%ebx, %ebx\n"
"	xorl	%r12d, %r12d\n"
"	leaq	a(%rip), %rax\n"
"	jmp	.myL124\n"
".myL142:\n"
"	vzeroupper\n"
"	jmp	.myL126\n"
"	.cfi_endproc\n"
".myLFE9901:\n"
"	.size	_Z8get_sim8iiiiiiiiii, .-_Z8get_sim8iiiiiiiiii\n"
);

short sim_cnt[N+1];
short sim_pre[N+2];
short sim[N];

__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up_from_other2(int qr0, int qr1, int st)
{
	char *ans0 = ::ansc[qr0];
	char *ans1 = ::ansc[qr1];
	short val0 = query[qr0]+1;
	short val1 = query[qr1]+1;
	while (st%32) {
		ans0[st] += sim[st] < val0;
		ans1[st] += sim[st] < val1;
		++st;
	}
	typedef short ymms __attribute__((vector_size(32),aligned(32)));
	typedef char ymmc __attribute__((vector_size(32),aligned(32)));
	ymmc *vans0 = (ymmc *)ans0;
	ymmc *vans1 = (ymmc *)ans1;
	ymms *vsim = (ymms *)sim;
	for (int i = st/32; i < N/32; ++i) {
		ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1];
		ymms t00 = tmp0 < val0;
		ymms t10 = tmp1 < val0;
		ymms t01 = tmp0 < val1;
		ymms t11 = tmp1 < val1;
		ymmc x0 = __builtin_shuffle((ymmc)t00, (ymmc)t10, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
		ymmc x1 = __builtin_shuffle((ymmc)t01, (ymmc)t11, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
		vans0[i] -= x0;
		vans1[i] -= x1;
	}
}

__attribute__((optimize("O3,unroll-loops"),target("avx")))
void flush_ansc()
{
	Loop (i,0,q) Loop (j,0,n) {
		ans[i][j] += (unsigned char)ansc[i][j];
		ansc[i][j] = 0;
	}
}

void process(int i)
{
	if (i%255 == 0)
		flush_ansc();
	memset(sim_cnt, 0, sizeof(sim_cnt));
	for (int j = i+1; j+8 <= n-l+1; j += 8) {
		auto [t0, t1, t2, t3, t4, t5, t6, t7] = get_sim8(i, j, j+1, j+2, j+3, j+4, j+5, j+6, j+7, l);
		sim[j+0] = t0; sim[j+1] = t1; sim[j+2] = t2; sim[j+3] = t3;
		sim[j+4] = t4; sim[j+5] = t5; sim[j+6] = t6; sim[j+7] = t7;
		sim_cnt[t0]++; sim_cnt[t1]++; sim_cnt[t2]++; sim_cnt[t3]++;
		sim_cnt[t4]++; sim_cnt[t5]++; sim_cnt[t6]++; sim_cnt[t7]++;
	}
	Loop (j, n-l+1 - (n-l+1 - (i+1))%8, n-l+1) {
		sim[j] = get_sim(i, j, l);
		sim_cnt[sim[j]]++;
	}
	sim_pre[0] = 0;
	Loop (j,0,l+1)
		sim_pre[j+1] = sim_pre[j] + sim_cnt[j];
	Loop (j,0,q)
		ans[j][i] += sim_pre[query[j]+1];
	for (int j = 0; j < q; j += 2)
		up_from_other2(j, j+1, i+1);
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	vector<int> cmper;
	cin >> n >> l;
	Loop (i,0,n) {
		cin >> noncmp_a[i];
		cmper.push_back(noncmp_a[i]);
	}
	cin >> q;
	Loop (i,0,q)
		cin >> query[i];
	sort(cmper.begin(), cmper.end());
	cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin());
	Loop (i,0,n) {
		a[i] = lower_bound(cmper.begin(), cmper.end(),
		                   noncmp_a[i]) - cmper.begin();
	}
	Loop (i,0,n-l+1)
		process(i);
	flush_ansc();
	Loop (i,0,q) {
		Loop (j,0,n-l+1)
			cout << ans[i][j] << ' ';
		cout << '\n';
	}
}

#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...