답안 #652264

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
652264 2022-10-21T22:05:38 Z ymm Lottery (CEOI18_lot) C++17
컴파일 오류
0 ms 0 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 10016;
const int Q = 100;
int mylist[N];
short ans[Q][N];
char ansc[Q][N];
short query[Q];
int n, q, l;

int noncmp_a[N];
short a[N];

__attribute__((optimize("O3,unroll-loops"),target("avx")))
short get_sim(int i, int j, int l)
{
	short ans = 0;
	for (int k = 0; k < l; ++k)
		ans += a[i+k] == a[j+k];
	return l-ans;
}
//__attribute__((optimize("O3,unroll-loops"),target("avx2")))
tuple<short,short,short> get_sim3(int i, int j0, int j1, int j2, int l);
/*{
	short ans0 = 0, ans1 = 0, ans2 = 0;
	for (int k = 0; k < l; ++k) {
		ans0 += a[i+k] == a[j0+k];
		ans1 += a[i+k] == a[j1+k];
		ans2 += a[i+k] == a[j2+k];
	}
	return {l-ans0, l-ans1, l-ans2};
}*/
asm("\n"
"	.p2align 4\n"
"	.globl	_Z8get_sim3iiiii\n"
"	.type	_Z8get_sim3iiiii, @function\n"
"_Z8get_sim3iiiii:\n"
".myLFB9918:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%rdi, %r10\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r15\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 15, -24\n"
"	.cfi_offset 14, -32\n"
"	.cfi_offset 13, -40\n"
"	.cfi_offset 12, -48\n"
"	.cfi_offset 3, -56\n"
"	testl	%r9d, %r9d\n"
"	jle	.myL148\n"
"	leal	-1(%r9), %eax\n"
"	movl	%esi, %ebx\n"
"	movl	%edx, %r12d\n"
"	movl	%ecx, %r13d\n"
"	cmpl	$14, %eax\n"
"	jbe	.myL149\n"
"	movslq	%esi, %rdx\n"
"	movl	%r9d, %esi\n"
"	leaq	a(%rip), %rax\n"
"	shrl	$4, %esi\n"
"	leaq	(%rax,%rdx,2), %r15\n"
"	vpxor	%xmm1, %xmm1, %xmm1\n"
"	movslq	%r12d, %rdx\n"
"	leaq	(%rax,%rdx,2), %r14\n"
"	salq	$5, %rsi\n"
"	vmovdqa	%ymm1, %ymm3\n"
"	movslq	%ecx, %rdx\n"
"	leaq	-32(%rsi), %rcx\n"
"	leaq	(%rax,%rdx,2), %r11\n"
"	vmovdqa	%ymm1, %ymm2\n"
"	movslq	%r8d, %rdx\n"
"	shrq	$5, %rcx\n"
"	leaq	(%rax,%rdx,2), %rdi\n"
"	xorl	%edx, %edx\n"
"	addq	$1, %rcx\n"
"	andl	$3, %ecx\n"
"	je	.myL144\n"
"	cmpq	$1, %rcx\n"
"	je	.myL160\n"
"	cmpq	$2, %rcx\n"
"	je	.myL161\n"
"	vmovdqu	(%r15), %ymm0\n"
"	movl	$32, %edx\n"
"	vpcmpeqw	(%r14), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm1, %ymm2\n"
"	vpcmpeqw	(%r11), %ymm0, %ymm4\n"
"	vpcmpeqw	(%rdi), %ymm0, %ymm0\n"
"	vpsubw	%ymm4, %ymm1, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
".myL161:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm4\n"
"	vpcmpeqw	(%rdi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
".myL160:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm4\n"
"	vpcmpeqw	(%rdi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rdx, %rsi\n"
"	je	.myL166\n"
".myL144:\n"
"	vmovdqu	(%r15,%rdx), %ymm0\n"
"	leaq	32(%rdx), %rcx\n"
"	vpcmpeqw	(%r14,%rdx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	(%r11,%rdx), %ymm0, %ymm4\n"
"	vpcmpeqw	(%rdi,%rdx), %ymm0, %ymm0\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	32(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	32(%r14,%rdx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	32(%r11,%rdx), %ymm0, %ymm4\n"
"	vpcmpeqw	32(%rdi,%rdx), %ymm0, %ymm0\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r15,%rdx), %ymm0\n"
"	vpcmpeqw	64(%r14,%rdx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	64(%r11,%rdx), %ymm0, %ymm4\n"
"	vpcmpeqw	64(%rdi,%rdx), %ymm0, %ymm0\n"
"	leaq	96(%rcx), %rdx\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r15,%rcx), %ymm0\n"
"	vpcmpeqw	64(%r14,%rcx), %ymm0, %ymm4\n"
"	vpsubw	%ymm4, %ymm2, %ymm2\n"
"	vpcmpeqw	64(%r11,%rcx), %ymm0, %ymm4\n"
"	vpcmpeqw	64(%rdi,%rcx), %ymm0, %ymm0\n"
"	vpsubw	%ymm4, %ymm3, %ymm3\n"
"	vpsubw	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rdx, %rsi\n"
"	jne	.myL144\n"
".myL166:\n"
"	vmovdqa	%xmm1, %xmm0\n"
"	vextracti128	$0x1, %ymm1, %xmm1\n"
"	movl	%r9d, %r11d\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	andl	$-16, %r11d\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	movl	%r11d, %edi\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %edx\n"
"	vmovdqa	%xmm3, %xmm0\n"
"	vextracti128	$0x1, %ymm3, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %ecx\n"
"	vextracti128	$0x1, %ymm2, %xmm0\n"
"	vpaddw	%xmm2, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %esi\n"
"	cmpl	%r9d, %r11d\n"
"	je	.myL169\n"
"	vzeroupper\n"
".myL143:\n"
"	movl	%r9d, %r14d\n"
"	subl	%r11d, %r14d\n"
"	leal	-1(%r14), %r15d\n"
"	cmpl	$6, %r15d\n"
"	jbe	.myL146\n"
"	movslq	%ebx, %r15\n"
"	vpcmpeqw	%xmm3, %xmm3, %xmm3\n"
"	vpxor	%xmm0, %xmm0, %xmm0\n"
"	vpsubsw	%xmm3, %xmm0, %xmm3\n"
"	addq	%r11, %r15\n"
"	vmovdqu	(%rax,%r15,2), %xmm0\n"
"	movslq	%r12d, %r15\n"
"	addq	%r11, %r15\n"
"	vpcmpeqw	(%rax,%r15,2), %xmm0, %xmm2\n"
"	movslq	%r13d, %r15\n"
"	addq	%r11, %r15\n"
"	vpcmpeqw	(%rax,%r15,2), %xmm0, %xmm1\n"
"	movslq	%r8d, %r15\n"
"	addq	%r15, %r11\n"
"	vpand	%xmm3, %xmm2, %xmm2\n"
"	vpcmpeqw	(%rax,%r11,2), %xmm0, %xmm0\n"
"	vpand	%xmm3, %xmm1, %xmm1\n"
"	vpand	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$8, %xmm0, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm3\n"
"	vpaddw	%xmm3, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r11d\n"
"	vpsrldq	$8, %xmm1, %xmm0\n"
"	vpaddw	%xmm0, %xmm1, %xmm0\n"
"	addl	%r11d, %edx\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r11d\n"
"	vpsrldq	$8, %xmm2, %xmm0\n"
"	vpaddw	%xmm0, %xmm2, %xmm0\n"
"	addl	%r11d, %ecx\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$2, %xmm0, %xmm1\n"
"	vpaddw	%xmm1, %xmm0, %xmm0\n"
"	vpextrw	$0, %xmm0, %r11d\n"
"	addl	%r11d, %esi\n"
"	movl	%r14d, %r11d\n"
"	andl	$-8, %r11d\n"
"	addl	%r11d, %edi\n"
"	cmpl	%r11d, %r14d\n"
"	je	.myL145\n"
".myL146:\n"
"	leal	(%rbx,%rdi), %r11d\n"
"	leal	(%r12,%rdi), %r14d\n"
"	movslq	%r14d, %r14\n"
"	movslq	%r11d, %r11\n"
"	movzwl	(%rax,%r11,2), %r11d\n"
"	cmpw	%r11w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %esi\n"
"	leal	0(%r13,%rdi), %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r11w, (%rax,%r14,2)\n"
"	sete	%r14b\n"
"	movzbl	%r14b, %r14d\n"
"	addl	%r14d, %ecx\n"
"	leal	(%r8,%rdi), %r14d\n"
"	movslq	%r14d, %r14\n"
"	cmpw	%r11w, (%rax,%r14,2)\n"
"	sete	%r11b\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	leal	1(%rdi), %r11d\n"
"	cmpl	%r11d, %r9d\n"
"	jle	.myL145\n"
"	leal	(%rbx,%r11), %r14d\n"
"	leal	(%r12,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	movslq	%r14d, %r14\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %esi\n"
"	leal	0(%r13,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r8d, %r11d\n"
"	movslq	%r11d, %r11\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %ecx\n"
"	cmpw	%r14w, (%rax,%r11,2)\n"
"	sete	%r11b\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	leal	2(%rdi), %r11d\n"
"	cmpl	%r11d, %r9d\n"
"	jle	.myL145\n"
"	leal	(%rbx,%r11), %r14d\n"
"	leal	(%r12,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	movslq	%r14d, %r14\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %esi\n"
"	leal	0(%r13,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r8d, %r11d\n"
"	movslq	%r11d, %r11\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %ecx\n"
"	cmpw	%r14w, (%rax,%r11,2)\n"
"	sete	%r11b\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	leal	3(%rdi), %r11d\n"
"	cmpl	%r11d, %r9d\n"
"	jle	.myL145\n"
"	leal	(%rbx,%r11), %r14d\n"
"	leal	(%r12,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	movslq	%r14d, %r14\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %esi\n"
"	leal	0(%r13,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r8d, %r11d\n"
"	movslq	%r11d, %r11\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %ecx\n"
"	cmpw	%r14w, (%rax,%r11,2)\n"
"	sete	%r11b\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	leal	4(%rdi), %r11d\n"
"	cmpl	%r11d, %r9d\n"
"	jle	.myL145\n"
"	leal	(%rbx,%r11), %r14d\n"
"	leal	(%r12,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	movslq	%r14d, %r14\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %esi\n"
"	leal	0(%r13,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r8d, %r11d\n"
"	movslq	%r11d, %r11\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %ecx\n"
"	cmpw	%r14w, (%rax,%r11,2)\n"
"	sete	%r11b\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	leal	5(%rdi), %r11d\n"
"	cmpl	%r11d, %r9d\n"
"	jle	.myL145\n"
"	leal	(%rbx,%r11), %r14d\n"
"	leal	(%r12,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	movslq	%r14d, %r14\n"
"	movzwl	(%rax,%r14,2), %r14d\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %esi\n"
"	leal	0(%r13,%r11), %r15d\n"
"	movslq	%r15d, %r15\n"
"	cmpw	%r14w, (%rax,%r15,2)\n"
"	sete	%r15b\n"
"	addl	%r8d, %r11d\n"
"	movslq	%r11d, %r11\n"
"	movzbl	%r15b, %r15d\n"
"	addl	%r15d, %ecx\n"
"	cmpw	%r14w, (%rax,%r11,2)\n"
"	sete	%r11b\n"
"	addl	$6, %edi\n"
"	movzbl	%r11b, %r11d\n"
"	addl	%r11d, %edx\n"
"	cmpl	%edi, %r9d\n"
"	jle	.myL145\n"
"	addl	%edi, %ebx\n"
"	addl	%edi, %r12d\n"
"	movslq	%ebx, %rbx\n"
"	movslq	%r12d, %r12\n"
"	movzwl	(%rax,%rbx,2), %r11d\n"
"	xorl	%ebx, %ebx\n"
"	cmpw	%r11w, (%rax,%r12,2)\n"
"	sete	%bl\n"
"	addl	%edi, %r13d\n"
"	movslq	%r13d, %r13\n"
"	addl	%ebx, %esi\n"
"	xorl	%ebx, %ebx\n"
"	cmpw	%r11w, (%rax,%r13,2)\n"
"	sete	%bl\n"
"	addl	%r8d, %edi\n"
"	movslq	%edi, %rdi\n"
"	addl	%ebx, %ecx\n"
"	cmpw	%r11w, (%rax,%rdi,2)\n"
"	sete	%al\n"
"	movzbl	%al, %eax\n"
"	addl	%eax, %edx\n"
".myL145:\n"
"	movswl	%si, %esi\n"
"	movl	%r9d, %eax\n"
"	movswl	%cx, %ecx\n"
"	movswl	%dx, %edx\n"
"	subl	%esi, %eax\n"
"	movl	%r9d, %esi\n"
"	subl	%edx, %r9d\n"
"	subl	%ecx, %esi\n"
".myL142:\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	movw	%ax, 4(%r10)\n"
"	movq	%r10, %rax\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	movw	%r9w, (%r10)\n"
"	popq	%r15\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	movw	%si, 2(%r10)\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL148:\n"
"	.cfi_restore_state\n"
"	movl	%r9d, %esi\n"
"	movl	%r9d, %eax\n"
"	jmp	.myL142\n"
".myL149:\n"
"	xorl	%r11d, %r11d\n"
"	xorl	%edi, %edi\n"
"	xorl	%edx, %edx\n"
"	xorl	%ecx, %ecx\n"
"	xorl	%esi, %esi\n"
"	leaq	a(%rip), %rax\n"
"	jmp	.myL143\n"
".myL169:\n"
"	vzeroupper\n"
"	jmp	.myL145\n"
"	.cfi_endproc\n"
".myLFE9918:\n"
"	.size	_Z8get_sim3iiiii, .-_Z8get_sim3iiiii\n"
);

short sim_cnt[N+1];
short sim_pre[N+2];
short sim[N];

__attribute__((optimize("O3,unroll-loops"),target("avx")))
void up_from_other2(int qr0, int qr1, int st)
{
	char *ans0 = ::ansc[qr0];
	char *ans1 = ::ansc[qr1];
	short val0 = query[qr0]+1;
	short val1 = query[qr1]+1;
	Loop (i,st,n) {
		ans0[i] += sim[i] < val0;
		ans1[i] += sim[i] < val1;
	}
	return;
	typedef short ymms __attribute__((vector_size(32),aligned(32)));
	typedef char ymmc __attribute__((vector_size(32),aligned(32)));
	ymmc *vans0 = (ymmc *)ans0;
	ymmc *vans1 = (ymmc *)ans1;
	ymms *vsim = (ymms *)sim;
	for (int i = st/32; i < (n-1)/32+1; ++i) {
		ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1];
		ymms t00 = tmp0 < val0;
		ymms t10 = tmp1 < val0;
		ymms t01 = tmp0 < val1;
		ymms t11 = tmp1 < val1;
		ymmc x0 = __builtin_ia32_packsswb256(t00, t10);
		ymmc x1 = __builtin_ia32_packsswb256(t01, t11);
		vans0[i] -= x0;
		vans1[i] -= x1;
	}
}

__attribute__((optimize("O3,unroll-loops"),target("avx")))
void flush_ansc()
{
	Loop (i,0,q) Loop (j,0,n) {
		ans[i][j] += (unsigned char)ansc[i][j];
		ansc[i][j] = 0;
	}
}

void process(int i)
{
	if (i%255 == 0)
		flush_ansc();
	memset(sim_cnt, 0, sizeof(sim_cnt));
	short h3[3];
	Loop (j,i+1,n-l+1) {
		if (j%3 == (i+1)%3) {
			if (j+3 <= n-l+1) {
				auto tmp = get_sim3(i, j, j+1, j+2, l);
				tie(h3[0], h3[1], h3[2]) = tmp;
			} else {
				Loop (k,j,n-l+1)
					h3[k-j] = get_sim(i, k, l);
			}
		} else {
			h3[0] = h3[1];
			h3[1] = h3[2];
		}
		sim[j] = h3[0];
		sim_cnt[sim[j]]++;
	}
	sim_pre[0] = 0;
	Loop (j,0,l+1)
		sim_pre[j+1] = sim_pre[j] + sim_cnt[j];
	Loop (j,0,q)
		ans[j][i] += sim_pre[query[j]+1];
	for (int j = 0; j < q; j += 2)
		up_from_other2(j, j+1, i+1);
}

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	vector<int> cmper;
	cin >> n >> l;
	Loop (i,0,n) {
		cin >> noncmp_a[i];
		cmper.push_back(noncmp_a[i]);
	}
	cin >> q;
	Loop (i,0,q)
		cin >> query[i];
	sort(cmper.begin(), cmper.end());
	cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin());
	Loop (i,0,n) {
		a[i] = lower_bound(cmper.begin(), cmper.end(),
		                   noncmp_a[i]) - cmper.begin();
	}
	Loop (i,0,n-l+1)
		process(i);
	flush_ansc();
	Loop (i,0,q) {
		Loop (j,0,n-l+1)
			cout << ans[i][j] << ' ';
		cout << '\n';
	}
}

Compilation message

lot.cpp: In function 'void up_from_other2(int, int, int)':
lot.cpp:484:13: error: '__builtin_ia32_packsswb256' was not declared in this scope; did you mean '__builtin_ia32_packsswb128'?
  484 |   ymmc x0 = __builtin_ia32_packsswb256(t00, t10);
      |             ^~~~~~~~~~~~~~~~~~~~~~~~~~
      |             __builtin_ia32_packsswb128