#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 10000;
const int Q = 100;
int mylist[N];
short ans[Q][N];
short query[Q];
int n, q, l;
int noncmp_a[N];
short a[N];
__attribute__((optimize("O3,unroll-loops"),target("avx")))
short get_sim(int i, int j, int l)
{
short ans = 0;
for (int k = 0; k < l; ++k)
ans += a[i+k] == a[j+k];
return l-ans;
}
//__attribute__((optimize("O3,unroll-loops"),target("avx2")))
tuple<short,short,short> get_sim3(int i, int j0, int j1, int j2, int l);
/*{
short ans0 = 0, ans1 = 0, ans2 = 0;
for (int k = 0; k < l; ++k) {
ans0 += a[i+k] == a[j0+k];
ans1 += a[i+k] == a[j1+k];
ans2 += a[i+k] == a[j2+k];
}
return {l-ans0, l-ans1, l-ans2};
}*/
asm("\n"
" .p2align 4\n"
" .globl _Z8get_sim3iiiii\n"
" .type _Z8get_sim3iiiii, @function\n"
"_Z8get_sim3iiiii:\n"
".myLFB9918:\n"
" .cfi_startproc\n"
" pushq %rbp\n"
" .cfi_def_cfa_offset 16\n"
" .cfi_offset 6, -16\n"
" movq %rdi, %r10\n"
" movq %rsp, %rbp\n"
" .cfi_def_cfa_register 6\n"
" pushq %r15\n"
" pushq %r14\n"
" pushq %r13\n"
" pushq %r12\n"
" pushq %rbx\n"
" .cfi_offset 15, -24\n"
" .cfi_offset 14, -32\n"
" .cfi_offset 13, -40\n"
" .cfi_offset 12, -48\n"
" .cfi_offset 3, -56\n"
" testl %r9d, %r9d\n"
" jle .myL148\n"
" leal -1(%r9), %eax\n"
" movl %esi, %ebx\n"
" movl %edx, %r12d\n"
" movl %ecx, %r13d\n"
" cmpl $14, %eax\n"
" jbe .myL149\n"
" movslq %esi, %rdx\n"
" movl %r9d, %esi\n"
" leaq a(%rip), %rax\n"
" shrl $4, %esi\n"
" leaq (%rax,%rdx,2), %r15\n"
" vpxor %xmm1, %xmm1, %xmm1\n"
" movslq %r12d, %rdx\n"
" leaq (%rax,%rdx,2), %r14\n"
" salq $5, %rsi\n"
" vmovdqa %ymm1, %ymm3\n"
" movslq %ecx, %rdx\n"
" leaq -32(%rsi), %rcx\n"
" leaq (%rax,%rdx,2), %r11\n"
" vmovdqa %ymm1, %ymm2\n"
" movslq %r8d, %rdx\n"
" shrq $5, %rcx\n"
" leaq (%rax,%rdx,2), %rdi\n"
" xorl %edx, %edx\n"
" addq $1, %rcx\n"
" andl $3, %ecx\n"
" je .myL144\n"
" cmpq $1, %rcx\n"
" je .myL160\n"
" cmpq $2, %rcx\n"
" je .myL161\n"
" vmovdqu (%r15), %ymm0\n"
" movl $32, %edx\n"
" vpcmpeqw (%r14), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm1, %ymm2\n"
" vpcmpeqw (%r11), %ymm0, %ymm4\n"
" vpcmpeqw (%rdi), %ymm0, %ymm0\n"
" vpsubw %ymm4, %ymm1, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
".myL161:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n"
" vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
".myL160:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n"
" vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" cmpq %rdx, %rsi\n"
" je .myL166\n"
".myL144:\n"
" vmovdqu (%r15,%rdx), %ymm0\n"
" leaq 32(%rdx), %rcx\n"
" vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n"
" vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 32(%r15,%rdx), %ymm0\n"
" vpcmpeqw 32(%r14,%rdx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw 32(%r11,%rdx), %ymm0, %ymm4\n"
" vpcmpeqw 32(%rdi,%rdx), %ymm0, %ymm0\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 64(%r15,%rdx), %ymm0\n"
" vpcmpeqw 64(%r14,%rdx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw 64(%r11,%rdx), %ymm0, %ymm4\n"
" vpcmpeqw 64(%rdi,%rdx), %ymm0, %ymm0\n"
" leaq 96(%rcx), %rdx\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" vmovdqu 64(%r15,%rcx), %ymm0\n"
" vpcmpeqw 64(%r14,%rcx), %ymm0, %ymm4\n"
" vpsubw %ymm4, %ymm2, %ymm2\n"
" vpcmpeqw 64(%r11,%rcx), %ymm0, %ymm4\n"
" vpcmpeqw 64(%rdi,%rcx), %ymm0, %ymm0\n"
" vpsubw %ymm4, %ymm3, %ymm3\n"
" vpsubw %ymm0, %ymm1, %ymm1\n"
" cmpq %rdx, %rsi\n"
" jne .myL144\n"
".myL166:\n"
" vmovdqa %xmm1, %xmm0\n"
" vextracti128 $0x1, %ymm1, %xmm1\n"
" movl %r9d, %r11d\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" andl $-16, %r11d\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" movl %r11d, %edi\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %edx\n"
" vmovdqa %xmm3, %xmm0\n"
" vextracti128 $0x1, %ymm3, %xmm3\n"
" vpaddw %xmm3, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %ecx\n"
" vextracti128 $0x1, %ymm2, %xmm0\n"
" vpaddw %xmm2, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %esi\n"
" cmpl %r9d, %r11d\n"
" je .myL169\n"
" vzeroupper\n"
".myL143:\n"
" movl %r9d, %r14d\n"
" subl %r11d, %r14d\n"
" leal -1(%r14), %r15d\n"
" cmpl $6, %r15d\n"
" jbe .myL146\n"
" movslq %ebx, %r15\n"
" vpcmpeqw %xmm3, %xmm3, %xmm3\n"
" vpxor %xmm0, %xmm0, %xmm0\n"
" vpsubsw %xmm3, %xmm0, %xmm3\n"
" addq %r11, %r15\n"
" vmovdqu (%rax,%r15,2), %xmm0\n"
" movslq %r12d, %r15\n"
" addq %r11, %r15\n"
" vpcmpeqw (%rax,%r15,2), %xmm0, %xmm2\n"
" movslq %r13d, %r15\n"
" addq %r11, %r15\n"
" vpcmpeqw (%rax,%r15,2), %xmm0, %xmm1\n"
" movslq %r8d, %r15\n"
" addq %r15, %r11\n"
" vpand %xmm3, %xmm2, %xmm2\n"
" vpcmpeqw (%rax,%r11,2), %xmm0, %xmm0\n"
" vpand %xmm3, %xmm1, %xmm1\n"
" vpand %xmm3, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm3\n"
" vpaddw %xmm3, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm3\n"
" vpaddw %xmm3, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm3\n"
" vpaddw %xmm3, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %r11d\n"
" vpsrldq $8, %xmm1, %xmm0\n"
" vpaddw %xmm0, %xmm1, %xmm0\n"
" addl %r11d, %edx\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %r11d\n"
" vpsrldq $8, %xmm2, %xmm0\n"
" vpaddw %xmm0, %xmm2, %xmm0\n"
" addl %r11d, %ecx\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpsrldq $2, %xmm0, %xmm1\n"
" vpaddw %xmm1, %xmm0, %xmm0\n"
" vpextrw $0, %xmm0, %r11d\n"
" addl %r11d, %esi\n"
" movl %r14d, %r11d\n"
" andl $-8, %r11d\n"
" addl %r11d, %edi\n"
" cmpl %r11d, %r14d\n"
" je .myL145\n"
".myL146:\n"
" leal (%rbx,%rdi), %r11d\n"
" leal (%r12,%rdi), %r14d\n"
" movslq %r14d, %r14\n"
" movslq %r11d, %r11\n"
" movzwl (%rax,%r11,2), %r11d\n"
" cmpw %r11w, (%rax,%r14,2)\n"
" sete %r14b\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %esi\n"
" leal 0(%r13,%rdi), %r14d\n"
" movslq %r14d, %r14\n"
" cmpw %r11w, (%rax,%r14,2)\n"
" sete %r14b\n"
" movzbl %r14b, %r14d\n"
" addl %r14d, %ecx\n"
" leal (%r8,%rdi), %r14d\n"
" movslq %r14d, %r14\n"
" cmpw %r11w, (%rax,%r14,2)\n"
" sete %r11b\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" leal 1(%rdi), %r11d\n"
" cmpl %r11d, %r9d\n"
" jle .myL145\n"
" leal (%rbx,%r11), %r14d\n"
" leal (%r12,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" movslq %r14d, %r14\n"
" movzwl (%rax,%r14,2), %r14d\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %esi\n"
" leal 0(%r13,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r8d, %r11d\n"
" movslq %r11d, %r11\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %ecx\n"
" cmpw %r14w, (%rax,%r11,2)\n"
" sete %r11b\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" leal 2(%rdi), %r11d\n"
" cmpl %r11d, %r9d\n"
" jle .myL145\n"
" leal (%rbx,%r11), %r14d\n"
" leal (%r12,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" movslq %r14d, %r14\n"
" movzwl (%rax,%r14,2), %r14d\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %esi\n"
" leal 0(%r13,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r8d, %r11d\n"
" movslq %r11d, %r11\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %ecx\n"
" cmpw %r14w, (%rax,%r11,2)\n"
" sete %r11b\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" leal 3(%rdi), %r11d\n"
" cmpl %r11d, %r9d\n"
" jle .myL145\n"
" leal (%rbx,%r11), %r14d\n"
" leal (%r12,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" movslq %r14d, %r14\n"
" movzwl (%rax,%r14,2), %r14d\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %esi\n"
" leal 0(%r13,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r8d, %r11d\n"
" movslq %r11d, %r11\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %ecx\n"
" cmpw %r14w, (%rax,%r11,2)\n"
" sete %r11b\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" leal 4(%rdi), %r11d\n"
" cmpl %r11d, %r9d\n"
" jle .myL145\n"
" leal (%rbx,%r11), %r14d\n"
" leal (%r12,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" movslq %r14d, %r14\n"
" movzwl (%rax,%r14,2), %r14d\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %esi\n"
" leal 0(%r13,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r8d, %r11d\n"
" movslq %r11d, %r11\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %ecx\n"
" cmpw %r14w, (%rax,%r11,2)\n"
" sete %r11b\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" leal 5(%rdi), %r11d\n"
" cmpl %r11d, %r9d\n"
" jle .myL145\n"
" leal (%rbx,%r11), %r14d\n"
" leal (%r12,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" movslq %r14d, %r14\n"
" movzwl (%rax,%r14,2), %r14d\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %esi\n"
" leal 0(%r13,%r11), %r15d\n"
" movslq %r15d, %r15\n"
" cmpw %r14w, (%rax,%r15,2)\n"
" sete %r15b\n"
" addl %r8d, %r11d\n"
" movslq %r11d, %r11\n"
" movzbl %r15b, %r15d\n"
" addl %r15d, %ecx\n"
" cmpw %r14w, (%rax,%r11,2)\n"
" sete %r11b\n"
" addl $6, %edi\n"
" movzbl %r11b, %r11d\n"
" addl %r11d, %edx\n"
" cmpl %edi, %r9d\n"
" jle .myL145\n"
" addl %edi, %ebx\n"
" addl %edi, %r12d\n"
" movslq %ebx, %rbx\n"
" movslq %r12d, %r12\n"
" movzwl (%rax,%rbx,2), %r11d\n"
" xorl %ebx, %ebx\n"
" cmpw %r11w, (%rax,%r12,2)\n"
" sete %bl\n"
" addl %edi, %r13d\n"
" movslq %r13d, %r13\n"
" addl %ebx, %esi\n"
" xorl %ebx, %ebx\n"
" cmpw %r11w, (%rax,%r13,2)\n"
" sete %bl\n"
" addl %r8d, %edi\n"
" movslq %edi, %rdi\n"
" addl %ebx, %ecx\n"
" cmpw %r11w, (%rax,%rdi,2)\n"
" sete %al\n"
" movzbl %al, %eax\n"
" addl %eax, %edx\n"
".myL145:\n"
" movswl %si, %esi\n"
" movl %r9d, %eax\n"
" movswl %cx, %ecx\n"
" movswl %dx, %edx\n"
" subl %esi, %eax\n"
" movl %r9d, %esi\n"
" subl %edx, %r9d\n"
" subl %ecx, %esi\n"
".myL142:\n"
" popq %rbx\n"
" popq %r12\n"
" movw %ax, 4(%r10)\n"
" movq %r10, %rax\n"
" popq %r13\n"
" popq %r14\n"
" movw %r9w, (%r10)\n"
" popq %r15\n"
" popq %rbp\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" movw %si, 2(%r10)\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL148:\n"
" .cfi_restore_state\n"
" movl %r9d, %esi\n"
" movl %r9d, %eax\n"
" jmp .myL142\n"
".myL149:\n"
" xorl %r11d, %r11d\n"
" xorl %edi, %edi\n"
" xorl %edx, %edx\n"
" xorl %ecx, %ecx\n"
" xorl %esi, %esi\n"
" leaq a(%rip), %rax\n"
" jmp .myL143\n"
".myL169:\n"
" vzeroupper\n"
" jmp .myL145\n"
" .cfi_endproc\n"
".myLFE9918:\n"
" .size _Z8get_sim3iiiii, .-_Z8get_sim3iiiii\n"
);
short sim_cnt[N+1];
short sim_pre[N+2];
short sim[N];
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up_from_other(int qr, int st)
{
short *ans = ::ans[qr];
short val = query[qr];
while (st%16 && st < n) {
ans[st] += sim[st] <= val;
++st;
}
if (st == n)
return;
typedef short ymm __attribute__((vector_size(32),aligned(32)));
ymm *vans = (ymm *)ans;
ymm *vsim = (ymm *)sim;
for (int i = st/16; i < n/16; ++i)
vans[i] += vsim[i] <= val;
for (int i = n-n%16; i < n; ++i)
ans[i] += sim[i] <= val;
}
void process(int i)
{
memset(sim_cnt, 0, sizeof(sim_cnt));
short h3[3];
Loop (j,i+1,n-l+1) {
if (j%3 == (i+1)%3) {
if (j+3 <= n-l+1) {
auto tmp = get_sim3(i, j, j+1, j+2, l);
tie(h3[0], h3[1], h3[2]) = tmp;
} else {
Loop (k,j,n-l+1)
h3[k-j] = get_sim(i, k, l);
}
} else {
h3[0] = h3[1];
h3[1] = h3[2];
}
sim[j] = h3[0];
sim_cnt[sim[j]]++;
}
sim_pre[0] = 0;
Loop (j,0,l+1)
sim_pre[j+1] = sim_pre[j] + sim_cnt[j];
Loop (j,0,q) {
ans[j][i] += sim_pre[query[j]+1];
up_from_other(j, i+1);
}
}
int main()
{
cin.tie(0) -> sync_with_stdio(false);
vector<int> cmper;
cin >> n >> l;
Loop (i,0,n) {
cin >> noncmp_a[i];
cmper.push_back(noncmp_a[i]);
}
cin >> q;
Loop (i,0,q)
cin >> query[i];
sort(cmper.begin(), cmper.end());
cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin());
Loop (i,0,n) {
a[i] = lower_bound(cmper.begin(), cmper.end(),
noncmp_a[i]) - cmper.begin();
}
Loop (i,0,n-l+1)
process(i);
Loop (i,0,q) {
Loop (j,0,n-l+1)
cout << ans[i][j] << ' ';
cout << '\n';
}
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Incorrect |
1 ms |
340 KB |
Output isn't correct |
3 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Incorrect |
1 ms |
340 KB |
Output isn't correct |
3 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Incorrect |
354 ms |
540 KB |
Output isn't correct |
2 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Incorrect |
354 ms |
540 KB |
Output isn't correct |
2 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Incorrect |
1 ms |
340 KB |
Output isn't correct |
3 |
Halted |
0 ms |
0 KB |
- |