#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 10016;
const int Q = 100;
int mylist[N];
short ans[Q][N];
char ansc[Q][N];
short query[Q];
int n, q, l;
int noncmp_a[N];
short a[N];
__attribute__((optimize("O3,unroll-loops"),target("avx")))
short get_sim(int i, int j, int l)
{
short ans = 0;
for (int k = 0; k < l; ++k)
ans += a[i+k] == a[j+k];
return l-ans;
}
//__attribute__((optimize("O3,unroll-loops"),target("avx2")))
tuple<short,short,short,short> get_sim4(int i, int j0, int j1, int j2, int j3, int l);
/*{
short ans0 = 0, ans1 = 0, ans2 = 0, ans3 = 0;
for (int k = 0; k < l; ++k) {
ans0 += a[i+k] == a[j0+k];
ans1 += a[i+k] == a[j1+k];
ans2 += a[i+k] == a[j2+k];
ans3 += a[i+k] == a[j3+k];
}
return {l-ans0, l-ans1, l-ans2, l-ans3};
}*/
asm("\n"
" .p2align 4\n"
" .globl _Z8get_sim4iiiiii\n"
" .type _Z8get_sim4iiiiii, @function\n"
"_Z8get_sim4iiiiii:\n"
".myLFB9901:\n"
" .cfi_startproc\n"
" pushq %rbp\n"
" .cfi_def_cfa_offset 16\n"
" .cfi_offset 6, -16\n"
" movq %rsp, %rbp\n"
" .cfi_def_cfa_register 6\n"
" pushq %r15\n"
" pushq %r14\n"
" pushq %r13\n"
" .cfi_offset 15, -24\n"
" .cfi_offset 14, -32\n"
" .cfi_offset 13, -40\n"
" movl %ecx, %r13d\n"
" pushq %r12\n"
" .cfi_offset 12, -48\n"
" movq %rdi, %r12\n"
" pushq %rbx\n"
" andq $-32, %rsp\n"
" .cfi_offset 3, -56\n"
" movl %esi, -4(%rsp)\n"
" movl 16(%rbp), %ecx\n"
" movl %edx, -8(%rsp)\n"
" testl %ecx, %ecx\n"
" jle .myL129\n"
" leal -1(%rcx), %eax\n"
" cmpl $14, %eax\n"
" jbe .myL130\n"
" leaq a(%rip), %rax\n"
" movl %edx, %edi\n"
" vpxor %xmm1, %xmm1, %xmm1\n"
" movslq %esi, %rdx\n"
" leaq (%rax,%rdx,2), %r15\n"
" vmovdqa %ymm1, %ymm3\n"
" vmovdqa %ymm1, %ymm4\n"
" movslq %edi, %rdx\n"
" movl %ecx, %edi\n"
" leaq (%rax,%rdx,2), %r14\n"
" vmovdqa %ymm1, %ymm2\n"
" movslq %r13d, %rdx\n"
" shrl $4, %edi\n"
" leaq (%rax,%rdx,2), %rbx\n"
" movslq %r8d, %rdx\n"
" salq $5, %rdi\n"
" leaq (%rax,%rdx,2), %r11\n"
" movslq %r9d, %rdx\n"
" leaq -32(%rdi), %rsi\n"
" leaq (%rax,%rdx,2), %r10\n"
" xorl %edx, %edx\n"
" shrq $5, %rsi\n"
" addq $1, %rsi\n"
" andl $3, %esi\n"
" je .myL125\n"
" cmpq $1, %rsi\n"
" je .myL141\n"
" cmpq $2, %rsi\n"
" je .myL142\n"
" vmovdqu (%r15), %ymm0\n"
" movl $32, %edx\n"
" vpcmpeqw (%r14), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm1, %ymm2\n"
" vpcmpeqw (%rbx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm1, %ymm4\n"
" vpcmpeqw (%r11), %ymm0, %ymm5\n"
" vpcmpeqw (%r10), %ymm0, %ymm0\n"
" vpsubw %ymm5, %ymm1, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
".myL142:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n"
" vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
".myL141:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n"
" vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" cmpq %rdx, %rdi\n"
" je .myL147\n"
".myL125:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" leaq 32(%rdx), %rsi\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n"
" vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 32(%r15,%rdx), %ymm0\n"
" vpcmpeqw 32(%r14,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw 32(%rbx,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw 32(%r11,%rdx), %ymm0, %ymm5\n"
" vpcmpeqw 32(%r10,%rdx), %ymm0, %ymm0\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 64(%r15,%rdx), %ymm0\n"
" vpcmpeqw 64(%r14,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw 64(%rbx,%rdx), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw 64(%r11,%rdx), %ymm0, %ymm5\n"
" vpcmpeqw 64(%r10,%rdx), %ymm0, %ymm0\n"
" leaq 96(%rsi), %rdx\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 64(%r15,%rsi), %ymm0\n"
" vpcmpeqw 64(%r14,%rsi), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm2, %ymm2\n"
" vpcmpeqw 64(%rbx,%rsi), %ymm0, %ymm5\n"
" vpsubw %ymm5, %ymm4, %ymm4\n"
" vpcmpeqw 64(%r11,%rsi), %ymm0, %ymm5\n"
" vpcmpeqw 64(%r10,%rsi), %ymm0, %ymm0\n"
" vpsubw %ymm5, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" cmpq %rdx, %rdi\n"
" jne .myL125\n"
".myL147:\n"
" vmovdqa %xmm1, %xmm0\n"
" vextracti128 $0x1, %ymm1, %xmm1\n"
" movl %ecx, %ebx\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" andl $-16, %ebx\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" movl %ebx, %edx\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %esi\n"
" vmovdqa %xmm3, %xmm0\n"
" vextracti128 $0x1, %ymm3, %xmm3\n"
" vpaddw %xmm3, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %edi\n"
" vmovdqa %xmm4, %xmm0\n"
" vextracti128 $0x1, %ymm4, %xmm4\n"
" vpaddw %xmm4, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %r10d\n"
" vextracti128 $0x1, %ymm2, %xmm0\n"
" vpaddw %xmm2, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %r11d\n"
" cmpl %ecx, %ebx\n"
" je .myL153\n"
" vzeroupper\n"
".myL124:\n"
" movl %ecx, %r15d\n"
" subl %ebx, %r15d\n"
" leal -1(%r15), %r14d\n"
" cmpl $6, %r14d\n"
" jbe .myL127\n"
" movslq -4(%rsp), %r14\n"
" vpcmpeqw %xmm4, %xmm4, %xmm4\n"
" vpxor %xmm0, %xmm0, %xmm0\n"
" vpsubw %xmm4, %xmm0, %xmm4\n"
" addq %rbx, %r14\n"
" vmovdqu (%rax,%r14,2), %xmm0\n"
" movslq -8(%rsp), %r14\n"
" addq %rbx, %r14\n"
" vpcmpeqw (%rax,%r14,2), %xmm0, %xmm2\n"
" movslq %r13d, %r14\n"
" addq %rbx, %r14\n"
" vpcmpeqw (%rax,%r14,2), %xmm0, %xmm3\n"
" movslq %r8d, %r14\n"
" addq %rbx, %r14\n"
" vpand %xmm4, %xmm2, %xmm2\n"
" vpcmpeqw (%rax,%r14,2), %xmm0, %xmm1\n"
" movslq %r9d, %r14\n"
" addq %rbx, %r14\n"
" vpand %xmm4, %xmm3, %xmm3\n"
" vpcmpeqw (%rax,%r14,2), %xmm0, %xmm0\n"
" vpand %xmm4, %xmm1, %xmm1\n"
" vpand %xmm4, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm4\n"
" vpaddw %xmm4, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm4\n"
" vpaddw %xmm4, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm4\n"
" vpaddw %xmm4, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %ebx\n"
" vpsrldq $8, %xmm1, %xmm0\n"
" vpaddw %xmm0, %xmm1, %xmm0\n"
" addl %ebx, %esi\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %ebx\n"
" vpsrldq $8, %xmm3, %xmm0\n"
" vpaddw %xmm0, %xmm3, %xmm0\n"
" addl %ebx, %edi\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %ebx\n"
" vpsrldq $8, %xmm2, %xmm0\n"
" vpaddw %xmm0, %xmm2, %xmm0\n"
" addl %ebx, %r10d\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %ebx\n"
" addl %ebx, %r11d\n"
" movl %r15d, %ebx\n"
" andl $-8, %ebx\n"
" addl %ebx, %edx\n"
" cmpl %ebx, %r15d\n"
" je .myL126\n"
".myL127:\n"
" movl -4(%rsp), %r15d\n"
" movl -8(%rsp), %r14d\n"
" leal (%r15,%rdx), %ebx\n"
" addl %edx, %r14d\n"
" movslq %r14d, %r14\n"
" movslq %ebx, %rbx\n"
" movzwl (%rax,%rbx,2), %ebx\n"
" cmpw %bx, (%rax,%r14,2)\n"
" sete %r14b\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %r11d\n"
" leal 0(%r13,%rdx), %r14d\n"
" movslq %r14d, %r14\n"
" cmpw %bx, (%rax,%r14,2)\n"
" sete %r14b\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %r10d\n"
" leal (%r8,%rdx), %r14d\n"
" movslq %r14d, %r14\n"
" cmpw %bx, (%rax,%r14,2)\n"
" sete %r14b\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %edi\n"
" leal (%r9,%rdx), %r14d\n"
" movslq %r14d, %r14\n"
" cmpw %bx, (%rax,%r14,2)\n"
" sete %bl\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" leal 1(%rdx), %ebx\n"
" cmpl %ebx, %ecx\n"
" jle .myL126\n"
" leal (%r15,%rbx), %r14d\n"
" movl -8(%rsp), %r15d\n"
" movslq %r14d, %r14\n"
" addl %ebx, %r15d\n"
" movzwl (%rax,%r14,2), %r14d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r11d\n"
" leal 0(%r13,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r10d\n"
" leal (%r8,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r9d, %ebx\n"
" movslq %ebx, %rbx\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %edi\n"
" cmpw %r14w, (%rax,%rbx,2)\n"
" sete %bl\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" leal 2(%rdx), %ebx\n"
" cmpl %ebx, %ecx\n"
" jle .myL126\n"
" movl -4(%rsp), %r15d\n"
" leal (%r15,%rbx), %r14d\n"
" movl -8(%rsp), %r15d\n"
" movslq %r14d, %r14\n"
" addl %ebx, %r15d\n"
" movzwl (%rax,%r14,2), %r14d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r11d\n"
" leal 0(%r13,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r10d\n"
" leal (%r8,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r9d, %ebx\n"
" movslq %ebx, %rbx\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %edi\n"
" cmpw %r14w, (%rax,%rbx,2)\n"
" sete %bl\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" leal 3(%rdx), %ebx\n"
" cmpl %ebx, %ecx\n"
" jle .myL126\n"
" movl -4(%rsp), %r15d\n"
" leal (%r15,%rbx), %r14d\n"
" movl -8(%rsp), %r15d\n"
" movslq %r14d, %r14\n"
" addl %ebx, %r15d\n"
" movzwl (%rax,%r14,2), %r14d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r11d\n"
" leal 0(%r13,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r10d\n"
" leal (%r8,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r9d, %ebx\n"
" movslq %ebx, %rbx\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %edi\n"
" cmpw %r14w, (%rax,%rbx,2)\n"
" sete %bl\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" leal 4(%rdx), %ebx\n"
" cmpl %ebx, %ecx\n"
" jle .myL126\n"
" movl -4(%rsp), %r15d\n"
" leal (%r15,%rbx), %r14d\n"
" movl -8(%rsp), %r15d\n"
" movslq %r14d, %r14\n"
" addl %ebx, %r15d\n"
" movzwl (%rax,%r14,2), %r14d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r11d\n"
" leal 0(%r13,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r10d\n"
" leal (%r8,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r9d, %ebx\n"
" movslq %ebx, %rbx\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %edi\n"
" cmpw %r14w, (%rax,%rbx,2)\n"
" sete %bl\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" leal 5(%rdx), %ebx\n"
" cmpl %ebx, %ecx\n"
" jle .myL126\n"
" movl -4(%rsp), %r15d\n"
" leal (%r15,%rbx), %r14d\n"
" movl -8(%rsp), %r15d\n"
" movslq %r14d, %r14\n"
" addl %ebx, %r15d\n"
" movzwl (%rax,%r14,2), %r14d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r11d\n"
" leal 0(%r13,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %r10d\n"
" leal (%r8,%rbx), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r9d, %ebx\n"
" movslq %ebx, %rbx\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %edi\n"
" cmpw %r14w, (%rax,%rbx,2)\n"
" sete %bl\n"
" addl $6, %edx\n"
" movzbl %bl, %ebx\n"
" addl %ebx, %esi\n"
" cmpl %edx, %ecx\n"
" jle .myL126\n"
" movl -4(%rsp), %ebx\n"
" movl -8(%rsp), %r14d\n"
" addl %edx, %ebx\n"
" addl %edx, %r14d\n"
" movslq %r14d, %r14\n"
" movslq %ebx, %rbx\n"
" movzwl (%rax,%rbx,2), %ebx\n"
" cmpw %bx, (%rax,%r14,2)\n"
" sete %r14b\n"
" addl %edx, %r13d\n"
" movslq %r13d, %r13\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %r11d\n"
" cmpw %bx, (%rax,%r13,2)\n"
" sete %r13b\n"
" addl %edx, %r8d\n"
" movslq %r8d, %r8\n"
" movzbl %r13b, %r13d\n"
" addl %r13d, %r10d\n"
" cmpw %bx, (%rax,%r8,2)\n"
" sete %r8b\n"
" addl %r9d, %edx\n"
" movzbl %r8b, %r8d\n"
" movslq %edx, %rdx\n"
" addl %r8d, %edi\n"
" cmpw %bx, (%rax,%rdx,2)\n"
" sete %al\n"
" movzbl %al, %eax\n"
" addl %eax, %esi\n"
".myL126:\n"
" movl %ecx, %eax\n"
" movl %ecx, %edx\n"
" movl %ecx, %r8d\n"
" movswl %r11w, %r11d\n"
" movswl %r10w, %r10d\n"
" movswl %di, %edi\n"
" movswl %si, %esi\n"
" subl %r11d, %eax\n"
" subl %r10d, %edx\n"
" subl %edi, %r8d\n"
" subl %esi, %ecx\n"
".myL123:\n"
" movw %ax, 6(%r12)\n"
" movq %r12, %rax\n"
" movw %cx, (%r12)\n"
" movw %r8w, 2(%r12)\n"
" movw %dx, 4(%r12)\n"
" leaq -40(%rbp), %rsp\n"
" popq %rbx\n"
" popq %r12\n"
" popq %r13\n"
" popq %r14\n"
" popq %r15\n"
" popq %rbp\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL129:\n"
" .cfi_restore_state\n"
" movl %ecx, %r8d\n"
" movl %ecx, %edx\n"
" movl %ecx, %eax\n"
" jmp .myL123\n"
".myL130:\n"
" xorl %ebx, %ebx\n"
" xorl %edx, %edx\n"
" xorl %esi, %esi\n"
" xorl %edi, %edi\n"
" xorl %r10d, %r10d\n"
" xorl %r11d, %r11d\n"
" leaq a(%rip), %rax\n"
" jmp .myL124\n"
".myL153:\n"
" vzeroupper\n"
" jmp .myL126\n"
" .cfi_endproc\n"
".myLFE9901:\n"
" .size _Z8get_sim4iiiiii, .-_Z8get_sim4iiiiii\n"
);
short sim_cnt[N+1];
short sim_pre[N+2];
short sim[N];
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up_from_other2(int qr0, int qr1, int st)
{
char *ans0 = ::ansc[qr0];
char *ans1 = ::ansc[qr1];
short val0 = query[qr0]+1;
short val1 = query[qr1]+1;
while (st%32) {
ans0[st] += sim[st] < val0;
ans1[st] += sim[st] < val1;
++st;
}
typedef short ymms __attribute__((vector_size(32),aligned(32)));
typedef char ymmc __attribute__((vector_size(32),aligned(32)));
ymmc *vans0 = (ymmc *)ans0;
ymmc *vans1 = (ymmc *)ans1;
ymms *vsim = (ymms *)sim;
for (int i = st/32; i < N/32; ++i) {
ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1];
ymms t00 = tmp0 < val0;
ymms t10 = tmp1 < val0;
ymms t01 = tmp0 < val1;
ymms t11 = tmp1 < val1;
ymmc x0 = __builtin_shuffle((ymmc)t00, (ymmc)t10, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
ymmc x1 = __builtin_shuffle((ymmc)t01, (ymmc)t11, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62});
vans0[i] -= x0;
vans1[i] -= x1;
}
}
__attribute__((optimize("O3,unroll-loops"),target("avx")))
void flush_ansc()
{
Loop (i,0,q) Loop (j,0,n) {
ans[i][j] += (unsigned char)ansc[i][j];
ansc[i][j] = 0;
}
}
void process(int i)
{
if (i%255 == 0)
flush_ansc();
memset(sim_cnt, 0, sizeof(sim_cnt));
for (int j = i+1; j+4 <= n-l+1; j += 4) {
auto [t0, t1, t2, t3] = get_sim4(i, j, j+1, j+2, j+3, l);
sim[j+0] = t0; sim[j+1] = t1; sim[j+2] = t2; sim[j+3] = t3;
sim_cnt[t0]++; sim_cnt[t1]++; sim_cnt[t2]++; sim_cnt[t3]++;
}
Loop (j, n-l+1 - (n-l+1 - (i+1))%4, n-l+1) {
sim[j] = get_sim(i, j, l);
sim_cnt[sim[j]]++;
}
sim_pre[0] = 0;
Loop (j,0,l+1)
sim_pre[j+1] = sim_pre[j] + sim_cnt[j];
Loop (j,0,q)
ans[j][i] += sim_pre[query[j]+1];
for (int j = 0; j < q; j += 2)
up_from_other2(j, j+1, i+1);
}
int main()
{
cin.tie(0) -> sync_with_stdio(false);
vector<int> cmper;
cin >> n >> l;
Loop (i,0,n) {
cin >> noncmp_a[i];
cmper.push_back(noncmp_a[i]);
}
cin >> q;
Loop (i,0,q)
cin >> query[i];
sort(cmper.begin(), cmper.end());
cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin());
Loop (i,0,n) {
a[i] = lower_bound(cmper.begin(), cmper.end(),
noncmp_a[i]) - cmper.begin();
}
Loop (i,0,n-l+1)
process(i);
flush_ansc();
Loop (i,0,q) {
Loop (j,0,n-l+1)
cout << ans[i][j] << ' ';
cout << '\n';
}
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
2 ms |
596 KB |
Output is correct |
3 |
Correct |
2 ms |
584 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
6 |
Correct |
2 ms |
468 KB |
Output is correct |
7 |
Correct |
1 ms |
460 KB |
Output is correct |
8 |
Correct |
4 ms |
580 KB |
Output is correct |
9 |
Correct |
3 ms |
596 KB |
Output is correct |
10 |
Correct |
6 ms |
580 KB |
Output is correct |
11 |
Correct |
7 ms |
596 KB |
Output is correct |
12 |
Correct |
5 ms |
596 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
2 ms |
596 KB |
Output is correct |
3 |
Correct |
2 ms |
584 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
6 |
Correct |
2 ms |
468 KB |
Output is correct |
7 |
Correct |
1 ms |
460 KB |
Output is correct |
8 |
Correct |
4 ms |
580 KB |
Output is correct |
9 |
Correct |
3 ms |
596 KB |
Output is correct |
10 |
Correct |
6 ms |
580 KB |
Output is correct |
11 |
Correct |
7 ms |
596 KB |
Output is correct |
12 |
Correct |
5 ms |
596 KB |
Output is correct |
13 |
Correct |
21 ms |
596 KB |
Output is correct |
14 |
Correct |
51 ms |
764 KB |
Output is correct |
15 |
Correct |
42 ms |
608 KB |
Output is correct |
16 |
Correct |
54 ms |
764 KB |
Output is correct |
17 |
Correct |
56 ms |
756 KB |
Output is correct |
18 |
Correct |
57 ms |
752 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
257 ms |
596 KB |
Output is correct |
2 |
Correct |
378 ms |
596 KB |
Output is correct |
3 |
Correct |
256 ms |
596 KB |
Output is correct |
4 |
Correct |
663 ms |
668 KB |
Output is correct |
5 |
Correct |
2563 ms |
664 KB |
Output is correct |
6 |
Correct |
1484 ms |
664 KB |
Output is correct |
7 |
Correct |
2514 ms |
660 KB |
Output is correct |
8 |
Execution timed out |
3004 ms |
660 KB |
Time limit exceeded |
9 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
257 ms |
596 KB |
Output is correct |
2 |
Correct |
378 ms |
596 KB |
Output is correct |
3 |
Correct |
256 ms |
596 KB |
Output is correct |
4 |
Correct |
663 ms |
668 KB |
Output is correct |
5 |
Correct |
2563 ms |
664 KB |
Output is correct |
6 |
Correct |
1484 ms |
664 KB |
Output is correct |
7 |
Correct |
2514 ms |
660 KB |
Output is correct |
8 |
Execution timed out |
3004 ms |
660 KB |
Time limit exceeded |
9 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
2 ms |
596 KB |
Output is correct |
3 |
Correct |
2 ms |
584 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
6 |
Correct |
2 ms |
468 KB |
Output is correct |
7 |
Correct |
1 ms |
460 KB |
Output is correct |
8 |
Correct |
4 ms |
580 KB |
Output is correct |
9 |
Correct |
3 ms |
596 KB |
Output is correct |
10 |
Correct |
6 ms |
580 KB |
Output is correct |
11 |
Correct |
7 ms |
596 KB |
Output is correct |
12 |
Correct |
5 ms |
596 KB |
Output is correct |
13 |
Correct |
21 ms |
596 KB |
Output is correct |
14 |
Correct |
51 ms |
764 KB |
Output is correct |
15 |
Correct |
42 ms |
608 KB |
Output is correct |
16 |
Correct |
54 ms |
764 KB |
Output is correct |
17 |
Correct |
56 ms |
756 KB |
Output is correct |
18 |
Correct |
57 ms |
752 KB |
Output is correct |
19 |
Correct |
257 ms |
596 KB |
Output is correct |
20 |
Correct |
378 ms |
596 KB |
Output is correct |
21 |
Correct |
256 ms |
596 KB |
Output is correct |
22 |
Correct |
663 ms |
668 KB |
Output is correct |
23 |
Correct |
2563 ms |
664 KB |
Output is correct |
24 |
Correct |
1484 ms |
664 KB |
Output is correct |
25 |
Correct |
2514 ms |
660 KB |
Output is correct |
26 |
Execution timed out |
3004 ms |
660 KB |
Time limit exceeded |
27 |
Halted |
0 ms |
0 KB |
- |