제출 #652334

#제출 시각아이디문제언어결과실행 시간메모리
652334ymmLottery (CEOI18_lot)C++17
100 / 100
2741 ms7444 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; const int N = 10016; const int Q = 100; int mylist[N]; short ans[Q][N]; char ansc[Q][N]; short query[Q]; int n, q, l; int noncmp_a[N]; short a[N]; __attribute__((optimize("O3,unroll-loops"),target("avx"))) short get_sim(int i, int j, int l) { short ans = 0; for (int k = 0; k < l; ++k) ans += a[i+k] == a[j+k]; return l-ans; } //__attribute__((optimize("O3,unroll-loops"),target("avx2"))) tuple<short,short,short,short,short,short,short,short> get_sim8(int i, int j0, int j1, int j2, int j3, int j4, int j5, int j6, int j7, int l); /*{ short ans0 = 0, ans1 = 0, ans2 = 0, ans3 = 0, ans4 = 0, ans5 = 0, ans6 = 0, ans7 = 0; for (int k = 0; k < l; ++k) { ans0 += a[i+k] == a[j0+k]; ans1 += a[i+k] == a[j1+k]; ans2 += a[i+k] == a[j2+k]; ans3 += a[i+k] == a[j3+k]; ans4 += a[i+k] == a[j4+k]; ans5 += a[i+k] == a[j5+k]; ans6 += a[i+k] == a[j6+k]; ans7 += a[i+k] == a[j7+k]; } return {l-ans0, l-ans1, l-ans2, l-ans3, l-ans4, l-ans5, l-ans6, l-ans7}; }*/ asm("\n" " .p2align 4\n" " .globl _Z8get_sim8iiiiiiiiii\n" " .type _Z8get_sim8iiiiiiiiii, @function\n" "_Z8get_sim8iiiiiiiiii:\n" ".myLFB9901:\n" " .cfi_startproc\n" " pushq %rbp\n" " .cfi_def_cfa_offset 16\n" " .cfi_offset 6, -16\n" " movq %rsp, %rbp\n" " .cfi_def_cfa_register 6\n" " pushq %r15\n" " pushq %r14\n" " pushq %r13\n" " pushq %r12\n" " pushq %rbx\n" " .cfi_offset 15, -24\n" " .cfi_offset 14, -32\n" " .cfi_offset 13, -40\n" " .cfi_offset 12, -48\n" " .cfi_offset 3, -56\n" " movl %ecx, %ebx\n" " andq $-32, %rsp\n" " movl %ecx, -12(%rsp)\n" " movl 48(%rbp), %ecx\n" " movq %rdi, -32(%rsp)\n" " movl %esi, -4(%rsp)\n" " movl %edx, -8(%rsp)\n" " movl %r8d, -16(%rsp)\n" " movl %r9d, -20(%rsp)\n" " testl %ecx, %ecx\n" " jle .myL129\n" " leal -1(%rcx), %eax\n" " cmpl $14, %eax\n" " jbe .myL130\n" " leaq a(%rip), %rax\n" " movl %edx, %edi\n" " movslq %esi, %rdx\n" " movl %ecx, %r14d\n" " leaq (%rax,%rdx,2), %r13\n" " movslq %edi, %rdx\n" " shrl $4, %r14d\n" " vpxor %xmm1, %xmm1, %xmm1\n" " leaq (%rax,%rdx,2), %r12\n" " movslq %ebx, %rdx\n" " vmovdqa %ymm1, %ymm4\n" " movq %r14, %r15\n" " leaq (%rax,%rdx,2), %rbx\n" " movslq %r8d, %rdx\n" " salq $5, %r15\n" " vmovdqa %ymm1, %ymm5\n" " leaq (%rax,%rdx,2), %r11\n" " vmovdqa %ymm1, %ymm6\n" " vmovdqa %ymm1, %ymm7\n" " movslq %r9d, %rdx\n" " leaq (%rax,%rdx,2), %r10\n" " movslq 16(%rbp), %rdx\n" " vmovdqa %ymm1, %ymm8\n" " vmovdqa %ymm1, %ymm2\n" " vmovdqa %ymm1, %ymm3\n" " leaq (%rax,%rdx,2), %r9\n" " movslq 24(%rbp), %rdx\n" " leaq (%rax,%rdx,2), %r8\n" " movslq 32(%rbp), %rdx\n" " leaq (%rax,%rdx,2), %rdi\n" " movslq 40(%rbp), %rdx\n" " leaq (%rax,%rdx,2), %rsi\n" " xorl %edx, %edx\n" " andl $1, %r14d\n" " je .myL125\n" " vmovdqu 0(%r13), %ymm0\n" " movl $32, %edx\n" " vpcmpeqw (%r12), %ymm0, %ymm3\n" " vpcmpeqw (%rbx), %ymm0, %ymm2\n" " vpcmpeqw (%r11), %ymm0, %ymm8\n" " vpcmpeqw (%r10), %ymm0, %ymm7\n" " vpcmpeqw (%r9), %ymm0, %ymm6\n" " vpcmpeqw (%r8), %ymm0, %ymm5\n" " vpcmpeqw (%rdi), %ymm0, %ymm4\n" " vpcmpeqw (%rsi), %ymm0, %ymm0\n" " vpsubw %ymm3, %ymm1, %ymm3\n" " vpsubw %ymm2, %ymm1, %ymm2\n" " vpsubw %ymm8, %ymm1, %ymm8\n" " vpsubw %ymm7, %ymm1, %ymm7\n" " vpsubw %ymm6, %ymm1, %ymm6\n" " vpsubw %ymm5, %ymm1, %ymm5\n" " vpsubw %ymm4, %ymm1, %ymm4\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq $32, %r15\n" " je .myL137\n" " .p2align 4,,10\n" " .p2align 3\n" ".myL125:\n" " vmovdqu 0(%r13,%rdx), %ymm0\n" " vpcmpeqw (%r12,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm3, %ymm3\n" " vpcmpeqw (%rbx,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm2, %ymm2\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm8, %ymm8\n" " vpcmpeqw (%r10,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm7, %ymm7\n" " vpcmpeqw (%r9,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm6, %ymm6\n" " vpcmpeqw (%r8,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm5, %ymm5\n" " vpcmpeqw (%rdi,%rdx), %ymm0, %ymm9\n" " vpcmpeqw (%rsi,%rdx), %ymm0, %ymm0\n" " vpsubw %ymm9, %ymm4, %ymm4\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 32(%r13,%rdx), %ymm0\n" " vpcmpeqw 32(%r12,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm3, %ymm3\n" " vpcmpeqw 32(%rbx,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm2, %ymm2\n" " vpcmpeqw 32(%r11,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm8, %ymm8\n" " vpcmpeqw 32(%r10,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm7, %ymm7\n" " vpcmpeqw 32(%r9,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm6, %ymm6\n" " vpcmpeqw 32(%r8,%rdx), %ymm0, %ymm9\n" " vpsubw %ymm9, %ymm5, %ymm5\n" " vpcmpeqw 32(%rdi,%rdx), %ymm0, %ymm9\n" " vpcmpeqw 32(%rsi,%rdx), %ymm0, %ymm0\n" " addq $64, %rdx\n" " vpsubw %ymm9, %ymm4, %ymm4\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq %rdx, %r15\n" " jne .myL125\n" ".myL137:\n" " vmovdqa %xmm1, %xmm0\n" " vextracti128 $0x1, %ymm1, %xmm1\n" " movl %ecx, %edx\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " andl $-16, %edx\n" " vpsrldq $8, %xmm0, %xmm1\n" " movl %edx, %esi\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edi\n" " vmovdqa %xmm4, %xmm0\n" " vextracti128 $0x1, %ymm4, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r15d\n" " vmovdqa %xmm5, %xmm0\n" " vextracti128 $0x1, %ymm5, %xmm5\n" " vpaddw %xmm5, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r8d\n" " vmovdqa %xmm6, %xmm0\n" " vextracti128 $0x1, %ymm6, %xmm6\n" " vpaddw %xmm6, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r9d\n" " vmovdqa %xmm7, %xmm0\n" " vextracti128 $0x1, %ymm7, %xmm7\n" " vpaddw %xmm7, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r10d\n" " vextracti128 $0x1, %ymm8, %xmm0\n" " vpaddw %xmm8, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r11d\n" " vextracti128 $0x1, %ymm2, %xmm0\n" " vpaddw %xmm2, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ebx\n" " vextracti128 $0x1, %ymm3, %xmm0\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r12d\n" " cmpl %ecx, %edx\n" " je .myL142\n" " vzeroupper\n" ".myL124:\n" " movl %ecx, %r14d\n" " subl %edx, %r14d\n" " leal -1(%r14), %r13d\n" " cmpl $6, %r13d\n" " jbe .myL127\n" " movslq -4(%rsp), %r13\n" " vpcmpeqw %xmm8, %xmm8, %xmm8\n" " vpxor %xmm0, %xmm0, %xmm0\n" " vpsubw %xmm8, %xmm0, %xmm8\n" " addq %rdx, %r13\n" " vmovdqu (%rax,%r13,2), %xmm0\n" " movslq -8(%rsp), %r13\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm1\n" " movslq -12(%rsp), %r13\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm2\n" " movslq -16(%rsp), %r13\n" " vpand %xmm8, %xmm1, %xmm1\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm3\n" " movslq -20(%rsp), %r13\n" " vpand %xmm8, %xmm2, %xmm2\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm4\n" " movslq 16(%rbp), %r13\n" " vpand %xmm8, %xmm3, %xmm3\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm5\n" " movslq 24(%rbp), %r13\n" " vpand %xmm8, %xmm4, %xmm4\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm6\n" " movslq 32(%rbp), %r13\n" " vpand %xmm8, %xmm5, %xmm5\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm7\n" " movslq 40(%rbp), %r13\n" " vpand %xmm8, %xmm6, %xmm6\n" " addq %rdx, %r13\n" " vpcmpeqw (%rax,%r13,2), %xmm0, %xmm0\n" " vpand %xmm8, %xmm7, %xmm7\n" " vpand %xmm8, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm8\n" " vpaddw %xmm8, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm8\n" " vpaddw %xmm8, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm8\n" " vpaddw %xmm8, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm7, %xmm0\n" " vpaddw %xmm0, %xmm7, %xmm0\n" " addl %edx, %edi\n" " vpsrldq $4, %xmm0, %xmm7\n" " vpaddw %xmm7, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm7\n" " vpaddw %xmm7, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm6, %xmm0\n" " vpaddw %xmm0, %xmm6, %xmm0\n" " addl %edx, %r15d\n" " vpsrldq $4, %xmm0, %xmm6\n" " vpaddw %xmm6, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm6\n" " vpaddw %xmm6, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm5, %xmm0\n" " vpaddw %xmm0, %xmm5, %xmm0\n" " addl %edx, %r8d\n" " vpsrldq $4, %xmm0, %xmm5\n" " vpaddw %xmm5, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm5\n" " vpaddw %xmm5, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm4, %xmm0\n" " vpaddw %xmm0, %xmm4, %xmm0\n" " addl %edx, %r9d\n" " vpsrldq $4, %xmm0, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm4\n" " vpaddw %xmm4, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm3, %xmm0\n" " vpaddw %xmm0, %xmm3, %xmm0\n" " addl %edx, %r10d\n" " vpsrldq $4, %xmm0, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm2, %xmm0\n" " vpaddw %xmm0, %xmm2, %xmm0\n" " addl %edx, %r11d\n" " vpsrldq $4, %xmm0, %xmm2\n" " vpaddw %xmm2, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm2\n" " vpaddw %xmm2, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vpsrldq $8, %xmm1, %xmm0\n" " vpaddw %xmm0, %xmm1, %xmm0\n" " addl %edx, %ebx\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " addl %edx, %r12d\n" " movl %r14d, %edx\n" " andl $-8, %edx\n" " addl %edx, %esi\n" " cmpl %edx, %r14d\n" " je .myL126\n" ".myL127:\n" " movl -4(%rsp), %r14d\n" " movl -8(%rsp), %r13d\n" " leal (%r14,%rsi), %edx\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " movslq %edx, %rdx\n" " movzwl (%rax,%rdx,2), %edx\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r12d\n" " movl -12(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %ebx\n" " movl -16(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r11d\n" " movl -20(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r10d\n" " movl 16(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r9d\n" " movl 24(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r8d\n" " movl 32(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r15d\n" " movl 40(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %dl\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " leal 1(%rsi), %edx\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " leal (%r14,%rdx), %r13d\n" " movl -8(%rsp), %r14d\n" " movslq %r13d, %r13\n" " addl %edx, %r14d\n" " movzwl (%rax,%r13,2), %r13d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r12d\n" " movl -12(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ebx\n" " movl -16(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " movl -20(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " movl 16(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r9d\n" " movl 24(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r8d\n" " movl 32(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " addl 40(%rbp), %edx\n" " movslq %edx, %rdx\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r15d\n" " cmpw %r13w, (%rax,%rdx,2)\n" " sete %dl\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " leal 2(%rsi), %edx\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r14d\n" " leal (%r14,%rdx), %r13d\n" " movl -8(%rsp), %r14d\n" " movslq %r13d, %r13\n" " addl %edx, %r14d\n" " movzwl (%rax,%r13,2), %r13d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r12d\n" " movl -12(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ebx\n" " movl -16(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " movl -20(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " movl 16(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r9d\n" " movl 24(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r8d\n" " movl 32(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " addl 40(%rbp), %edx\n" " movslq %edx, %rdx\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r15d\n" " cmpw %r13w, (%rax,%rdx,2)\n" " sete %dl\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " leal 3(%rsi), %edx\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r14d\n" " leal (%r14,%rdx), %r13d\n" " movl -8(%rsp), %r14d\n" " movslq %r13d, %r13\n" " addl %edx, %r14d\n" " movzwl (%rax,%r13,2), %r13d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r12d\n" " movl -12(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ebx\n" " movl -16(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " movl -20(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " movl 16(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r9d\n" " movl 24(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r8d\n" " movl 32(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " addl 40(%rbp), %edx\n" " movslq %edx, %rdx\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r15d\n" " cmpw %r13w, (%rax,%rdx,2)\n" " sete %dl\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " leal 4(%rsi), %edx\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r14d\n" " leal (%r14,%rdx), %r13d\n" " movl -8(%rsp), %r14d\n" " movslq %r13d, %r13\n" " addl %edx, %r14d\n" " movzwl (%rax,%r13,2), %r13d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r12d\n" " movl -12(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ebx\n" " movl -16(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " movl -20(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " movl 16(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r9d\n" " movl 24(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r8d\n" " movl 32(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " addl 40(%rbp), %edx\n" " movslq %edx, %rdx\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r15d\n" " cmpw %r13w, (%rax,%rdx,2)\n" " sete %dl\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " leal 5(%rsi), %edx\n" " cmpl %edx, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %r14d\n" " leal (%r14,%rdx), %r13d\n" " movl -8(%rsp), %r14d\n" " movslq %r13d, %r13\n" " addl %edx, %r14d\n" " movzwl (%rax,%r13,2), %r13d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r12d\n" " movl -12(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ebx\n" " movl -16(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r11d\n" " movl -20(%rsp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r10d\n" " movl 16(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r9d\n" " movl 24(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r8d\n" " movl 32(%rbp), %r14d\n" " addl %edx, %r14d\n" " movslq %r14d, %r14\n" " cmpw %r13w, (%rax,%r14,2)\n" " sete %r14b\n" " addl 40(%rbp), %edx\n" " movslq %edx, %rdx\n" " movzbl %r14b, %r14d\n" " addl %r14d, %r15d\n" " cmpw %r13w, (%rax,%rdx,2)\n" " sete %dl\n" " addl $6, %esi\n" " movzbl %dl, %edx\n" " addl %edx, %edi\n" " cmpl %esi, %ecx\n" " jle .myL126\n" " movl -4(%rsp), %edx\n" " movl -8(%rsp), %r13d\n" " addl %esi, %edx\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " movslq %edx, %rdx\n" " movzwl (%rax,%rdx,2), %edx\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r12d\n" " movl -12(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %ebx\n" " movl -16(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r11d\n" " movl -20(%rsp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r10d\n" " movl 16(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r9d\n" " movl 24(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " movzbl %r13b, %r13d\n" " addl %r13d, %r8d\n" " movl 32(%rbp), %r13d\n" " addl %esi, %r13d\n" " movslq %r13d, %r13\n" " cmpw %dx, (%rax,%r13,2)\n" " sete %r13b\n" " addl 40(%rbp), %esi\n" " movzbl %r13b, %r13d\n" " movslq %esi, %rsi\n" " addl %r13d, %r15d\n" " cmpw %dx, (%rax,%rsi,2)\n" " sete %al\n" " movzbl %al, %eax\n" " addl %eax, %edi\n" ".myL126:\n" " movswl %r11w, %r11d\n" " movl %ecx, %esi\n" " movswl %r10w, %r10d\n" " movswl %r9w, %r9d\n" " subl %r11d, %esi\n" " movl %ecx, %r11d\n" " movswl %r8w, %r8d\n" " movl %ecx, %eax\n" " subl %r10d, %r11d\n" " movl %ecx, %r10d\n" " movl %ecx, %edx\n" " movswl %r12w, %r12d\n" " subl %r9d, %r10d\n" " movl %ecx, %r9d\n" " movswl %bx, %ebx\n" " movswl %r15w, %r15d\n" " subl %r8d, %r9d\n" " movswl %di, %edi\n" " movl %ecx, %r8d\n" " subl %r12d, %eax\n" " subl %ebx, %edx\n" " subl %r15d, %r8d\n" " subl %edi, %ecx\n" ".myL123:\n" " movq -32(%rsp), %rbx\n" " movw %ax, 14(%rbx)\n" " movq %rbx, %rax\n" " movw %cx, (%rbx)\n" " movw %r8w, 2(%rbx)\n" " movw %r9w, 4(%rbx)\n" " movw %r10w, 6(%rbx)\n" " movw %r11w, 8(%rbx)\n" " movw %si, 10(%rbx)\n" " movw %dx, 12(%rbx)\n" " leaq -40(%rbp), %rsp\n" " popq %rbx\n" " popq %r12\n" " popq %r13\n" " popq %r14\n" " popq %r15\n" " popq %rbp\n" " .cfi_remember_state\n" " .cfi_def_cfa 7, 8\n" " ret\n" " .p2align 4,,10\n" " .p2align 3\n" ".myL129:\n" " .cfi_restore_state\n" " movl %ecx, %r8d\n" " movl %ecx, %r9d\n" " movl %ecx, %r10d\n" " movl %ecx, %r11d\n" " movl %ecx, %esi\n" " movl %ecx, %edx\n" " movl %ecx, %eax\n" " jmp .myL123\n" ".myL130:\n" " xorl %edx, %edx\n" " xorl %esi, %esi\n" " xorl %edi, %edi\n" " xorl %r15d, %r15d\n" " xorl %r8d, %r8d\n" " xorl %r9d, %r9d\n" " xorl %r10d, %r10d\n" " xorl %r11d, %r11d\n" " xorl %ebx, %ebx\n" " xorl %r12d, %r12d\n" " leaq a(%rip), %rax\n" " jmp .myL124\n" ".myL142:\n" " vzeroupper\n" " jmp .myL126\n" " .cfi_endproc\n" ".myLFE9901:\n" " .size _Z8get_sim8iiiiiiiiii, .-_Z8get_sim8iiiiiiiiii\n" ); short sim_cnt[N+1]; short sim_pre[N+2]; short sim[N]; __attribute__((optimize("O3,unroll-loops"),target("avx2"))) void up_from_other2(int qr0, int qr1, int st) { char *ans0 = ::ansc[qr0]; char *ans1 = ::ansc[qr1]; short val0 = query[qr0]+1; short val1 = query[qr1]+1; while (st%32) { ans0[st] += sim[st] < val0; ans1[st] += sim[st] < val1; ++st; } typedef short ymms __attribute__((vector_size(32),aligned(32))); typedef char ymmc __attribute__((vector_size(32),aligned(32))); ymmc *vans0 = (ymmc *)ans0; ymmc *vans1 = (ymmc *)ans1; ymms *vsim = (ymms *)sim; for (int i = st/32; i < N/32; ++i) { ymms tmp0 = vsim[i*2], tmp1 = vsim[i*2+1]; ymms t00 = tmp0 < val0; ymms t10 = tmp1 < val0; ymms t01 = tmp0 < val1; ymms t11 = tmp1 < val1; ymmc x0 = __builtin_shuffle((ymmc)t00, (ymmc)t10, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62}); ymmc x1 = __builtin_shuffle((ymmc)t01, (ymmc)t11, ymmc{0,2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32,34,36,38,40,42,44,46,48,50,52,54,56,58,60,62}); vans0[i] -= x0; vans1[i] -= x1; } } __attribute__((optimize("O3,unroll-loops"),target("avx"))) void flush_ansc() { Loop (i,0,q) Loop (j,0,n) { ans[i][j] += (unsigned char)ansc[i][j]; ansc[i][j] = 0; } } void process(int i) { if (i%255 == 0) flush_ansc(); memset(sim_cnt, 0, sizeof(sim_cnt)); for (int j = i+1; j+8 <= n-l+1; j += 8) { auto [t0, t1, t2, t3, t4, t5, t6, t7] = get_sim8(i, j, j+1, j+2, j+3, j+4, j+5, j+6, j+7, l); sim[j+0] = t0; sim[j+1] = t1; sim[j+2] = t2; sim[j+3] = t3; sim[j+4] = t4; sim[j+5] = t5; sim[j+6] = t6; sim[j+7] = t7; sim_cnt[t0]++; sim_cnt[t1]++; sim_cnt[t2]++; sim_cnt[t3]++; sim_cnt[t4]++; sim_cnt[t5]++; sim_cnt[t6]++; sim_cnt[t7]++; } Loop (j, n-l+1 - (n-l+1 - (i+1))%8, n-l+1) { sim[j] = get_sim(i, j, l); sim_cnt[sim[j]]++; } sim_pre[0] = 0; Loop (j,0,l+1) sim_pre[j+1] = sim_pre[j] + sim_cnt[j]; Loop (j,0,q) ans[j][i] += sim_pre[query[j]+1]; for (int j = 0; j < q; j += 2) up_from_other2(j, j+1, i+1); } int main() { cin.tie(0) -> sync_with_stdio(false); vector<int> cmper; cin >> n >> l; Loop (i,0,n) { cin >> noncmp_a[i]; cmper.push_back(noncmp_a[i]); } cin >> q; Loop (i,0,q) cin >> query[i]; sort(cmper.begin(), cmper.end()); cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin()); Loop (i,0,n) { a[i] = lower_bound(cmper.begin(), cmper.end(), noncmp_a[i]) - cmper.begin(); } Loop (i,0,n-l+1) process(i); flush_ansc(); Loop (i,0,q) { Loop (j,0,n-l+1) cout << ans[i][j] << ' '; cout << '\n'; } }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...