제출 #652332

#제출 시각아이디문제언어결과실행 시간메모리
652332ymmLottery (CEOI18_lot)C++17
45 / 100
3004 ms764 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; const int N = 10016; const int Q = 100; int mylist[N]; short ans[Q][N]; char ansc[Q][N]; short query[Q]; int n, q, l; int noncmp_a[N]; short a[N]; __attribute__((optimize("O3,unroll-loops"),target("avx"))) short get_sim(int i, int j, int l) { short ans = 0; for (int k = 0; k < l; ++k) ans += a[i+k] == a[j+k]; return l-ans; } //__attribute__((optimize("O3,unroll-loops"),target("avx2"))) tuple<short,short,short,short> get_sim4(int i, int j0, int j1, int j2, int j3, int l); /*{ short ans0 = 0, ans1 = 0, ans2 = 0, ans3 = 0; for (int k = 0; k < l; ++k) { ans0 += a[i+k] == a[j0+k]; ans1 += a[i+k] == a[j1+k]; ans2 += a[i+k] == a[j2+k]; ans3 += a[i+k] == a[j3+k]; } return {l-ans0, l-ans1, l-ans2, l-ans3}; }*/ asm("\n" " .p2align 4\n" " .globl _Z8get_sim4iiiiii\n" " .type _Z8get_sim4iiiiii, @function\n" "_Z8get_sim4iiiiii:\n" ".myLFB9901:\n" " .cfi_startproc\n" " pushq %rbp\n" " .cfi_def_cfa_offset 16\n" " .cfi_offset 6, -16\n" " movq %rsp, %rbp\n" " .cfi_def_cfa_register 6\n" " pushq %r15\n" " pushq %r14\n" " pushq %r13\n" " .cfi_offset 15, -24\n" " .cfi_offset 14, -32\n" " .cfi_offset 13, -40\n" " movl %ecx, %r13d\n" " pushq %r12\n" " .cfi_offset 12, -48\n" " movq %rdi, %r12\n" " pushq %rbx\n" " andq $-32, %rsp\n" " .cfi_offset 3, -56\n" " movl %esi, -4(%rsp)\n" " movl 16(%rbp), %ecx\n" " movl %edx, -8(%rsp)\n" " testl %ecx, %ecx\n" " jle .myL129\n" " leal -1(%rcx), %eax\n" " cmpl $14, %eax\n" " jbe .myL130\n" " leaq a(%rip), %rax\n" " movl %edx, %edi\n" " vpxor %xmm1, %xmm1, %xmm1\n" " movslq %esi, %rdx\n" " leaq (%rax,%rdx,2), %r15\n" " vmovdqa %ymm1, %ymm3\n" " vmovdqa %ymm1, %ymm4\n" " movslq %edi, %rdx\n" " movl %ecx, %edi\n" " leaq (%rax,%rdx,2), %r14\n" " vmovdqa %ymm1, %ymm2\n" " movslq %r13d, %rdx\n" " shrl $4, %edi\n" " leaq (%rax,%rdx,2), %rbx\n" " movslq %r8d, %rdx\n" " salq $5, %rdi\n" " leaq (%rax,%rdx,2), %r11\n" " movslq %r9d, %rdx\n" " leaq -32(%rdi), %rsi\n" " leaq (%rax,%rdx,2), %r10\n" " xorl %edx, %edx\n" " shrq $5, %rsi\n" " addq $1, %rsi\n" " andl $3, %esi\n" " je .myL125\n" " cmpq $1, %rsi\n" " je .myL141\n" " cmpq $2, %rsi\n" " je .myL142\n" " vmovdqu (%r15), %ymm0\n" " movl $32, %edx\n" " vpcmpeqw (%r14), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm1, %ymm2\n" " vpcmpeqw (%rbx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm1, %ymm4\n" " vpcmpeqw (%r11), %ymm0, %ymm5\n" " vpcmpeqw (%r10), %ymm0, %ymm0\n" " vpsubw %ymm5, %ymm1, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" ".myL142:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n" " vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n" " addq $32, %rdx\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" ".myL141:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n" " vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n" " addq $32, %rdx\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq %rdx, %rdi\n" " je .myL147\n" ".myL125:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " leaq 32(%rdx), %rsi\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw (%rbx,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm5\n" " vpcmpeqw (%r10,%rdx), %ymm0, %ymm0\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 32(%r15,%rdx), %ymm0\n" " vpcmpeqw 32(%r14,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw 32(%rbx,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw 32(%r11,%rdx), %ymm0, %ymm5\n" " vpcmpeqw 32(%r10,%rdx), %ymm0, %ymm0\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 64(%r15,%rdx), %ymm0\n" " vpcmpeqw 64(%r14,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw 64(%rbx,%rdx), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw 64(%r11,%rdx), %ymm0, %ymm5\n" " vpcmpeqw 64(%r10,%rdx), %ymm0, %ymm0\n" " leaq 96(%rsi), %rdx\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 64(%r15,%rsi), %ymm0\n" " vpcmpeqw 64(%r14,%rsi), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm2, %ymm2\n" " vpcmpeqw 64(%rbx,%rsi), %ymm0, %ymm5\n" " vpsubw %ymm5, %ymm4, %ymm4\n" " vpcmpeqw 64(%r11,%rsi), %ymm0, %ymm5\n" " vpcmpeqw 64(%r10,%rsi), %ymm0, %ymm0\n" " vpsubw %ymm5, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq %rdx, %rdi\n" " jne .myL125\n" ".myL147:\n" " vmovdqa %xmm1, %xmm0\n" " vextracti128 $0x1, %ymm1, %xmm1\n" " movl %ecx, %ebx\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " andl $-16, %ebx\n" " vpsrldq $8, %xmm0, %xmm1\n" " movl %ebx, %edx\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %esi\n" " vmovdqa %xmm3, %xmm0\n" " vextracti128 $0x1, %ymm3, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edi\n" " vmovdqa %xmm4, %xmm0\n" " vextracti128 $0x1, %ymm4, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r10d\n" " vextracti128 $0x1, %ymm2, %xmm0\n" " vpaddw %xmm2, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r11d\n" " cmpl %ecx, %ebx\n" " je .myL153\n" " vzeroupper\n" ".myL124:\n" " movl %ecx, %r15d\n" " subl %ebx, %r15d\n" " leal -1(%r15), %r14d\n" " cmpl $6, %r14d\n" " jbe .myL127\n" " movslq -4(%rsp), %r14\n" " vpcmpeqw %xmm4, %xmm4, %xmm4\n" " vpxor %xmm0, %xmm0, %xmm0\n" " vpsubw %xmm4, %xmm0, %xmm4\n" " addq %rbx, %r14\n" " vmovdqu (%rax,%r14,2), %xmm0\n" " movslq -8(%rsp), %r14\n" " addq %rbx, %r14\n" " vpcmpeqw (%rax,%r14,2), %xmm0, %xmm2\n" " movslq %r13d, %r14\n" " addq %rbx, %r14\n" " vpcmpeqw (%rax,%r14,2), %xmm0, %xmm3\n" " movslq %r8d, %r14\n" " addq %rbx, %r14\n" " vpand %xmm4, %xmm2, %xmm2\n" " vpcmpeqw (%rax,%r14,2), %xmm0, %xmm1\n" " movslq %r9d, %r14\n" " addq %rbx, %r14\n" " vpand %xmm4, %xmm3, %xmm3\n" " vpcmpeqw (%rax,%r14,2), %xmm0, %xmm0\n" " vpand %xmm4, %xmm1, %xmm1\n" " vpand %xmm4, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ebx\n" " vpsrldq $8, %xmm1, %xmm0\n" " vpaddw %xmm0, %xmm1, %xmm0\n" " addl %ebx, %esi\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ebx\n" " vpsrldq $8, %xmm3, %xmm0\n" " vpaddw %xmm0, %xmm3, %xmm0\n" " addl %ebx, %edi\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ebx\n" " vpsrldq $8, %xmm2, %xmm0\n" " vpaddw %xmm0, %xmm2, %xmm0\n" " addl %ebx, %r10d\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ebx\n" " addl %ebx, %r11d\n" " movl %r15d, %ebx\n" " andl $-8, %ebx\n" " addl %ebx, %edx\n" " cmpl %ebx, %r15d\n" " je .myL126\n" ".myL127:\n" " movl -4(%rsp), %r15d\n" " movl -8(%rsp), %r14d\n" " leal (%r15,%rdx), %ebx\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " movslq %ebx, %rbx\n" " movzwl (%rax,%rbx,2), %ebx\n" " cmpw %bx, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " leal 0(%r13,%rdx), %r14d\n" " movslq %r14d, %r14\n" " cmpw %bx, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " leal (%r8,%rdx), %r14d\n" " movslq %r14d, %r14\n" " cmpw %bx, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %edi\n" " leal (%r9,%rdx), %r14d\n" " movslq %r14d, %r14\n" " cmpw %bx, (%rax,%r14,2)\n" " sete %bl\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " leal 1(%rdx), %ebx\n" " cmpl %ebx, %ecx\n" " jle .myL126\n" " leal (%r15,%rbx), %r14d\n" " movl -8(%rsp), %r15d\n" " movslq %r14d, %r14\n" " addl %ebx, %r15d\n" " movzwl (%rax,%r14,2), %r14d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r11d\n" " leal 0(%r13,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r10d\n" " leal (%r8,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r9d, %ebx\n" " movslq %ebx, %rbx\n" " movzbl %r15b, %r15d\n" " addl %r15d, %edi\n" " cmpw %r14w, (%rax,%rbx,2)\n" " sete %bl\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " leal 2(%rdx), %ebx\n" " cmpl %ebx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r15d\n" " leal (%r15,%rbx), %r14d\n" " movl -8(%rsp), %r15d\n" " movslq %r14d, %r14\n" " addl %ebx, %r15d\n" " movzwl (%rax,%r14,2), %r14d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r11d\n" " leal 0(%r13,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r10d\n" " leal (%r8,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r9d, %ebx\n" " movslq %ebx, %rbx\n" " movzbl %r15b, %r15d\n" " addl %r15d, %edi\n" " cmpw %r14w, (%rax,%rbx,2)\n" " sete %bl\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " leal 3(%rdx), %ebx\n" " cmpl %ebx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r15d\n" " leal (%r15,%rbx), %r14d\n" " movl -8(%rsp), %r15d\n" " movslq %r14d, %r14\n" " addl %ebx, %r15d\n" " movzwl (%rax,%r14,2), %r14d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r11d\n" " leal 0(%r13,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r10d\n" " leal (%r8,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r9d, %ebx\n" " movslq %ebx, %rbx\n" " movzbl %r15b, %r15d\n" " addl %r15d, %edi\n" " cmpw %r14w, (%rax,%rbx,2)\n" " sete %bl\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " leal 4(%rdx), %ebx\n" " cmpl %ebx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r15d\n" " leal (%r15,%rbx), %r14d\n" " movl -8(%rsp), %r15d\n" " movslq %r14d, %r14\n" " addl %ebx, %r15d\n" " movzwl (%rax,%r14,2), %r14d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r11d\n" " leal 0(%r13,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r10d\n" " leal (%r8,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r9d, %ebx\n" " movslq %ebx, %rbx\n" " movzbl %r15b, %r15d\n" " addl %r15d, %edi\n" " cmpw %r14w, (%rax,%rbx,2)\n" " sete %bl\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " leal 5(%rdx), %ebx\n" " cmpl %ebx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r15d\n" " leal (%r15,%rbx), %r14d\n" " movl -8(%rsp), %r15d\n" " movslq %r14d, %r14\n" " addl %ebx, %r15d\n" " movzwl (%rax,%r14,2), %r14d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r11d\n" " leal 0(%r13,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %r10d\n" " leal (%r8,%rbx), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r9d, %ebx\n" " movslq %ebx, %rbx\n" " movzbl %r15b, %r15d\n" " addl %r15d, %edi\n" " cmpw %r14w, (%rax,%rbx,2)\n" " sete %bl\n" " addl $6, %edx\n" " movzbl %bl, %ebx\n" " addl %ebx, %esi\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %ebx\n" " movl -8(%rsp), %r14d\n" " addl %edx, %ebx\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " movslq %ebx, %rbx\n" " movzwl (%rax,%rbx,2), %ebx\n" " cmpw %bx, (%rax,%r14,2)\n" " sete %r14b\n" " addl %edx, %r13d\n" " movslq %r13d, %r13\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " cmpw %bx, (%rax,%r13,2)\n" " sete %r13b\n" " addl %edx, %r8d\n" " movslq %r8d, %r8\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r10d\n" " cmpw %bx, (%rax,%r8,2)\n" " sete %r8b\n" " addl %r9d, %edx\n" " movzbl %r8b, %r8d\n" " movslq %edx, %rdx\n" " addl %r8d, %edi\n" " cmpw %bx, (%rax,%rdx,2)\n" " sete %al\n" " movzbl %al, %eax\n" " addl %eax, %esi\n" ".myL126:\n" " movl %ecx, %eax\n" " movl %ecx, %edx\n" " movl %ecx, %r8d\n" " movswl %r11w, %r11d\n" " movswl %r10w, %r10d\n" " movswl %di, %edi\n" " movswl %si, %esi\n" " subl %r11d, %eax\n" " subl %r10d, %edx\n" " subl %edi, %r8d\n" " subl %esi, %ecx\n" ".myL123:\n" " movw %ax, 6(%r12)\n" " movq %r12, %rax\n" " movw %cx, (%r12)\n" " movw %r8w, 2(%r12)\n" " movw %dx, 4(%r12)\n" " leaq -40(%rbp), %rsp\n" " popq %rbx\n" " popq %r12\n" " popq %r13\n" " popq %r14\n" " popq %r15\n" " popq %rbp\n" " .cfi_remember_state\n" " .cfi_def_cfa 7, 8\n" " ret\n" " .p2align 4,,10\n" " .p2align 3\n" ".myL129:\n" " .cfi_restore_state\n" " movl %ecx, %r8d\n" " movl %ecx, %edx\n" " movl %ecx, %eax\n" " jmp .myL123\n" ".myL130:\n" " xorl %ebx, %ebx\n" " xorl %edx, %edx\n" " xorl %esi, %esi\n" " xorl %edi, %edi\n" " xorl %r10d, %r10d\n" " xorl %r11d, %r11d\n" " leaq a(%rip), %rax\n" " jmp .myL124\n" ".myL153:\n" " vzeroupper\n" " jmp .myL126\n" " .cfi_endproc\n" ".myLFE9901:\n" " .size _Z8get_sim4iiiiii, .-_Z8get_sim4iiiiii\n" ); short sim_cnt[N+1]; short sim_pre[N+2]; short sim[N]; __attribute__((optimize("O3,unroll-loops"),target("avx2"))) void up_from_other2(int qr0, int qr1, int st) { char *ans0 = ::ansc[qr0]; char *ans1 = ::ansc[qr1]; short val0 = query[qr0]+1; short val1 = query[qr1]+1; while (st%32) { ans0[st] += sim[st] < val0; ans1[st] += sim[st] < val1; ++st; } typedef short ymms __attribute__((vector_size(32),aligned(32))); typedef char ymmc __attribute__((vector_size(32),aligned(32))); ymmc *vans0 = (ymmc *)ans0; ymmc *vans1 = (ymmc *)ans1; ymms *vsim = (ymms *)sim; for (int i = st/32; i < N/32; ++i) { ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1]; ymms t00 = tmp0 < val0; ymms t10 = tmp1 < val0; ymms t01 = tmp0 < val1; ymms t11 = tmp1 < val1; ymmc x0 = __builtin_shuffle((ymmc)t00, (ymmc)t10, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62}); ymmc x1 = __builtin_shuffle((ymmc)t01, (ymmc)t11, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62}); vans0[i] -= x0; vans1[i] -= x1; } } __attribute__((optimize("O3,unroll-loops"),target("avx"))) void flush_ansc() { Loop (i,0,q) Loop (j,0,n) { ans[i][j] += (unsigned char)ansc[i][j]; ansc[i][j] = 0; } } void process(int i) { if (i%255 == 0) flush_ansc(); memset(sim_cnt, 0, sizeof(sim_cnt)); for (int j = i+1; j+4 <= n-l+1; j += 4) { auto [t0, t1, t2, t3] = get_sim4(i, j, j+1, j+2, j+3, l); sim[j+0] = t0; sim[j+1] = t1; sim[j+2] = t2; sim[j+3] = t3; sim_cnt[t0]++; sim_cnt[t1]++; sim_cnt[t2]++; sim_cnt[t3]++; } Loop (j, n-l+1 - (n-l+1 - (i+1))%4, n-l+1) { sim[j] = get_sim(i, j, l); sim_cnt[sim[j]]++; } sim_pre[0] = 0; Loop (j,0,l+1) sim_pre[j+1] = sim_pre[j] + sim_cnt[j]; Loop (j,0,q) ans[j][i] += sim_pre[query[j]+1]; for (int j = 0; j < q; j += 2) up_from_other2(j, j+1, i+1); } int main() { cin.tie(0) -> sync_with_stdio(false); vector<int> cmper; cin >> n >> l; Loop (i,0,n) { cin >> noncmp_a[i]; cmper.push_back(noncmp_a[i]); } cin >> q; Loop (i,0,q) cin >> query[i]; sort(cmper.begin(), cmper.end()); cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin()); Loop (i,0,n) { a[i] = lower_bound(cmper.begin(), cmper.end(), noncmp_a[i]) - cmper.begin(); } Loop (i,0,n-l+1) process(i); flush_ansc(); Loop (i,0,q) { Loop (j,0,n-l+1) cout << ans[i][j] << ' '; cout << '\n'; } }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...