Submission #652244

#TimeUsernameProblemLanguageResultExecution timeMemory
652244ymmLottery (CEOI18_lot)C++17
0 / 100
354 ms540 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; const int N = 10000; const int Q = 100; int mylist[N]; short ans[Q][N]; short query[Q]; int n, q, l; int noncmp_a[N]; short a[N]; __attribute__((optimize("O3,unroll-loops"),target("avx"))) short get_sim(int i, int j, int l) { short ans = 0; for (int k = 0; k < l; ++k) ans += a[i+k] == a[j+k]; return l-ans; } //__attribute__((optimize("O3,unroll-loops"),target("avx2"))) tuple<short,short,short> get_sim3(int i, int j0, int j1, int j2, int l); /*{ short ans0 = 0, ans1 = 0, ans2 = 0; for (int k = 0; k < l; ++k) { ans0 += a[i+k] == a[j0+k]; ans1 += a[i+k] == a[j1+k]; ans2 += a[i+k] == a[j2+k]; } return {l-ans0, l-ans1, l-ans2}; }*/ asm("\n" " .p2align 4\n" " .globl _Z8get_sim3iiiii\n" " .type _Z8get_sim3iiiii, @function\n" "_Z8get_sim3iiiii:\n" ".myLFB9918:\n" " .cfi_startproc\n" " pushq %rbp\n" " .cfi_def_cfa_offset 16\n" " .cfi_offset 6, -16\n" " movq %rdi, %r10\n" " movq %rsp, %rbp\n" " .cfi_def_cfa_register 6\n" " pushq %r15\n" " pushq %r14\n" " pushq %r13\n" " pushq %r12\n" " pushq %rbx\n" " .cfi_offset 15, -24\n" " .cfi_offset 14, -32\n" " .cfi_offset 13, -40\n" " .cfi_offset 12, -48\n" " .cfi_offset 3, -56\n" " testl %r9d, %r9d\n" " jle .myL148\n" " leal -1(%r9), %eax\n" " movl %esi, %ebx\n" " movl %edx, %r12d\n" " movl %ecx, %r13d\n" " cmpl $14, %eax\n" " jbe .myL149\n" " movslq %esi, %rdx\n" " movl %r9d, %esi\n" " leaq a(%rip), %rax\n" " shrl $4, %esi\n" " leaq (%rax,%rdx,2), %r15\n" " vpxor %xmm1, %xmm1, %xmm1\n" " movslq %r12d, %rdx\n" " leaq (%rax,%rdx,2), %r14\n" " salq $5, %rsi\n" " vmovdqa %ymm1, %ymm3\n" " movslq %ecx, %rdx\n" " leaq -32(%rsi), %rcx\n" " leaq (%rax,%rdx,2), %r11\n" " vmovdqa %ymm1, %ymm2\n" " movslq %r8d, %rdx\n" " shrq $5, %rcx\n" " leaq (%rax,%rdx,2), %rdi\n" " xorl %edx, %edx\n" " addq $1, %rcx\n" " andl $3, %ecx\n" " je .myL144\n" " cmpq $1, %rcx\n" " je .myL160\n" " cmpq $2, %rcx\n" " je .myL161\n" " vmovdqu (%r15), %ymm0\n" " movl $32, %edx\n" " vpcmpeqw (%r14), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm1, %ymm2\n" " vpcmpeqw (%r11), %ymm0, %ymm4\n" " vpcmpeqw (%rdi), %ymm0, %ymm0\n" " vpsubw %ymm4, %ymm1, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" ".myL161:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n" " vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n" " addq $32, %rdx\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" ".myL160:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n" " vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n" " addq $32, %rdx\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq %rdx, %rsi\n" " je .myL166\n" ".myL144:\n" " vmovdqu (%r15,%rdx), %ymm0\n" " leaq 32(%rdx), %rcx\n" " vpcmpeqw (%r14,%rdx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw (%r11,%rdx), %ymm0, %ymm4\n" " vpcmpeqw (%rdi,%rdx), %ymm0, %ymm0\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 32(%r15,%rdx), %ymm0\n" " vpcmpeqw 32(%r14,%rdx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw 32(%r11,%rdx), %ymm0, %ymm4\n" " vpcmpeqw 32(%rdi,%rdx), %ymm0, %ymm0\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 64(%r15,%rdx), %ymm0\n" " vpcmpeqw 64(%r14,%rdx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw 64(%r11,%rdx), %ymm0, %ymm4\n" " vpcmpeqw 64(%rdi,%rdx), %ymm0, %ymm0\n" " leaq 96(%rcx), %rdx\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " vmovdqu 64(%r15,%rcx), %ymm0\n" " vpcmpeqw 64(%r14,%rcx), %ymm0, %ymm4\n" " vpsubw %ymm4, %ymm2, %ymm2\n" " vpcmpeqw 64(%r11,%rcx), %ymm0, %ymm4\n" " vpcmpeqw 64(%rdi,%rcx), %ymm0, %ymm0\n" " vpsubw %ymm4, %ymm3, %ymm3\n" " vpsubw %ymm0, %ymm1, %ymm1\n" " cmpq %rdx, %rsi\n" " jne .myL144\n" ".myL166:\n" " vmovdqa %xmm1, %xmm0\n" " vextracti128 $0x1, %ymm1, %xmm1\n" " movl %r9d, %r11d\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " andl $-16, %r11d\n" " vpsrldq $8, %xmm0, %xmm1\n" " movl %r11d, %edi\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %edx\n" " vmovdqa %xmm3, %xmm0\n" " vextracti128 $0x1, %ymm3, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %ecx\n" " vextracti128 $0x1, %ymm2, %xmm0\n" " vpaddw %xmm2, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %esi\n" " cmpl %r9d, %r11d\n" " je .myL169\n" " vzeroupper\n" ".myL143:\n" " movl %r9d, %r14d\n" " subl %r11d, %r14d\n" " leal -1(%r14), %r15d\n" " cmpl $6, %r15d\n" " jbe .myL146\n" " movslq %ebx, %r15\n" " vpcmpeqw %xmm3, %xmm3, %xmm3\n" " vpxor %xmm0, %xmm0, %xmm0\n" " vpsubsw %xmm3, %xmm0, %xmm3\n" " addq %r11, %r15\n" " vmovdqu (%rax,%r15,2), %xmm0\n" " movslq %r12d, %r15\n" " addq %r11, %r15\n" " vpcmpeqw (%rax,%r15,2), %xmm0, %xmm2\n" " movslq %r13d, %r15\n" " addq %r11, %r15\n" " vpcmpeqw (%rax,%r15,2), %xmm0, %xmm1\n" " movslq %r8d, %r15\n" " addq %r15, %r11\n" " vpand %xmm3, %xmm2, %xmm2\n" " vpcmpeqw (%rax,%r11,2), %xmm0, %xmm0\n" " vpand %xmm3, %xmm1, %xmm1\n" " vpand %xmm3, %xmm0, %xmm0\n" " vpsrldq $8, %xmm0, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $4, %xmm0, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm3\n" " vpaddw %xmm3, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r11d\n" " vpsrldq $8, %xmm1, %xmm0\n" " vpaddw %xmm0, %xmm1, %xmm0\n" " addl %r11d, %edx\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r11d\n" " vpsrldq $8, %xmm2, %xmm0\n" " vpaddw %xmm0, %xmm2, %xmm0\n" " addl %r11d, %ecx\n" " vpsrldq $4, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpsrldq $2, %xmm0, %xmm1\n" " vpaddw %xmm1, %xmm0, %xmm0\n" " vpextrw $0, %xmm0, %r11d\n" " addl %r11d, %esi\n" " movl %r14d, %r11d\n" " andl $-8, %r11d\n" " addl %r11d, %edi\n" " cmpl %r11d, %r14d\n" " je .myL145\n" ".myL146:\n" " leal (%rbx,%rdi), %r11d\n" " leal (%r12,%rdi), %r14d\n" " movslq %r14d, %r14\n" " movslq %r11d, %r11\n" " movzwl (%rax,%r11,2), %r11d\n" " cmpw %r11w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %esi\n" " leal 0(%r13,%rdi), %r14d\n" " movslq %r14d, %r14\n" " cmpw %r11w, (%rax,%r14,2)\n" " sete %r14b\n" " movzbl %r14b, %r14d\n" " addl %r14d, %ecx\n" " leal (%r8,%rdi), %r14d\n" " movslq %r14d, %r14\n" " cmpw %r11w, (%rax,%r14,2)\n" " sete %r11b\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " leal 1(%rdi), %r11d\n" " cmpl %r11d, %r9d\n" " jle .myL145\n" " leal (%rbx,%r11), %r14d\n" " leal (%r12,%r11), %r15d\n" " movslq %r15d, %r15\n" " movslq %r14d, %r14\n" " movzwl (%rax,%r14,2), %r14d\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %esi\n" " leal 0(%r13,%r11), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r8d, %r11d\n" " movslq %r11d, %r11\n" " movzbl %r15b, %r15d\n" " addl %r15d, %ecx\n" " cmpw %r14w, (%rax,%r11,2)\n" " sete %r11b\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " leal 2(%rdi), %r11d\n" " cmpl %r11d, %r9d\n" " jle .myL145\n" " leal (%rbx,%r11), %r14d\n" " leal (%r12,%r11), %r15d\n" " movslq %r15d, %r15\n" " movslq %r14d, %r14\n" " movzwl (%rax,%r14,2), %r14d\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %esi\n" " leal 0(%r13,%r11), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r8d, %r11d\n" " movslq %r11d, %r11\n" " movzbl %r15b, %r15d\n" " addl %r15d, %ecx\n" " cmpw %r14w, (%rax,%r11,2)\n" " sete %r11b\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " leal 3(%rdi), %r11d\n" " cmpl %r11d, %r9d\n" " jle .myL145\n" " leal (%rbx,%r11), %r14d\n" " leal (%r12,%r11), %r15d\n" " movslq %r15d, %r15\n" " movslq %r14d, %r14\n" " movzwl (%rax,%r14,2), %r14d\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %esi\n" " leal 0(%r13,%r11), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r8d, %r11d\n" " movslq %r11d, %r11\n" " movzbl %r15b, %r15d\n" " addl %r15d, %ecx\n" " cmpw %r14w, (%rax,%r11,2)\n" " sete %r11b\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " leal 4(%rdi), %r11d\n" " cmpl %r11d, %r9d\n" " jle .myL145\n" " leal (%rbx,%r11), %r14d\n" " leal (%r12,%r11), %r15d\n" " movslq %r15d, %r15\n" " movslq %r14d, %r14\n" " movzwl (%rax,%r14,2), %r14d\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %esi\n" " leal 0(%r13,%r11), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r8d, %r11d\n" " movslq %r11d, %r11\n" " movzbl %r15b, %r15d\n" " addl %r15d, %ecx\n" " cmpw %r14w, (%rax,%r11,2)\n" " sete %r11b\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " leal 5(%rdi), %r11d\n" " cmpl %r11d, %r9d\n" " jle .myL145\n" " leal (%rbx,%r11), %r14d\n" " leal (%r12,%r11), %r15d\n" " movslq %r15d, %r15\n" " movslq %r14d, %r14\n" " movzwl (%rax,%r14,2), %r14d\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " movzbl %r15b, %r15d\n" " addl %r15d, %esi\n" " leal 0(%r13,%r11), %r15d\n" " movslq %r15d, %r15\n" " cmpw %r14w, (%rax,%r15,2)\n" " sete %r15b\n" " addl %r8d, %r11d\n" " movslq %r11d, %r11\n" " movzbl %r15b, %r15d\n" " addl %r15d, %ecx\n" " cmpw %r14w, (%rax,%r11,2)\n" " sete %r11b\n" " addl $6, %edi\n" " movzbl %r11b, %r11d\n" " addl %r11d, %edx\n" " cmpl %edi, %r9d\n" " jle .myL145\n" " addl %edi, %ebx\n" " addl %edi, %r12d\n" " movslq %ebx, %rbx\n" " movslq %r12d, %r12\n" " movzwl (%rax,%rbx,2), %r11d\n" " xorl %ebx, %ebx\n" " cmpw %r11w, (%rax,%r12,2)\n" " sete %bl\n" " addl %edi, %r13d\n" " movslq %r13d, %r13\n" " addl %ebx, %esi\n" " xorl %ebx, %ebx\n" " cmpw %r11w, (%rax,%r13,2)\n" " sete %bl\n" " addl %r8d, %edi\n" " movslq %edi, %rdi\n" " addl %ebx, %ecx\n" " cmpw %r11w, (%rax,%rdi,2)\n" " sete %al\n" " movzbl %al, %eax\n" " addl %eax, %edx\n" ".myL145:\n" " movswl %si, %esi\n" " movl %r9d, %eax\n" " movswl %cx, %ecx\n" " movswl %dx, %edx\n" " subl %esi, %eax\n" " movl %r9d, %esi\n" " subl %edx, %r9d\n" " subl %ecx, %esi\n" ".myL142:\n" " popq %rbx\n" " popq %r12\n" " movw %ax, 4(%r10)\n" " movq %r10, %rax\n" " popq %r13\n" " popq %r14\n" " movw %r9w, (%r10)\n" " popq %r15\n" " popq %rbp\n" " .cfi_remember_state\n" " .cfi_def_cfa 7, 8\n" " movw %si, 2(%r10)\n" " ret\n" " .p2align 4,,10\n" " .p2align 3\n" ".myL148:\n" " .cfi_restore_state\n" " movl %r9d, %esi\n" " movl %r9d, %eax\n" " jmp .myL142\n" ".myL149:\n" " xorl %r11d, %r11d\n" " xorl %edi, %edi\n" " xorl %edx, %edx\n" " xorl %ecx, %ecx\n" " xorl %esi, %esi\n" " leaq a(%rip), %rax\n" " jmp .myL143\n" ".myL169:\n" " vzeroupper\n" " jmp .myL145\n" " .cfi_endproc\n" ".myLFE9918:\n" " .size _Z8get_sim3iiiii, .-_Z8get_sim3iiiii\n" ); short sim_cnt[N+1]; short sim_pre[N+2]; short sim[N]; __attribute__((optimize("O3,unroll-loops"),target("avx2"))) void up_from_other(int qr, int st) { short *ans = ::ans[qr]; short val = query[qr]; while (st%16 && st < n) { ans[st] += sim[st] <= val; ++st; } if (st == n) return; typedef short ymm __attribute__((vector_size(32),aligned(32))); ymm *vans = (ymm *)ans; ymm *vsim = (ymm *)sim; for (int i = st/16; i < n/16; ++i) vans[i] += vsim[i] <= val; for (int i = n-n%16; i < n; ++i) ans[i] += sim[i] <= val; } void process(int i) { memset(sim_cnt, 0, sizeof(sim_cnt)); short h3[3]; Loop (j,i+1,n-l+1) { if (j%3 == (i+1)%3) { if (j+3 <= n-l+1) { auto tmp = get_sim3(i, j, j+1, j+2, l); tie(h3[0], h3[1], h3[2]) = tmp; } else { Loop (k,j,n-l+1) h3[k-j] = get_sim(i, k, l); } } else { h3[0] = h3[1]; h3[1] = h3[2]; } sim[j] = h3[0]; sim_cnt[sim[j]]++; } sim_pre[0] = 0; Loop (j,0,l+1) sim_pre[j+1] = sim_pre[j] + sim_cnt[j]; Loop (j,0,q) { ans[j][i] += sim_pre[query[j]+1]; up_from_other(j, i+1); } } int main() { cin.tie(0) -> sync_with_stdio(false); vector<int> cmper; cin >> n >> l; Loop (i,0,n) { cin >> noncmp_a[i]; cmper.push_back(noncmp_a[i]); } cin >> q; Loop (i,0,q) cin >> query[i]; sort(cmper.begin(), cmper.end()); cmper.resize(unique(cmper.begin(), cmper.end()) - cmper.begin()); Loop (i,0,n) { a[i] = lower_bound(cmper.begin(), cmper.end(), noncmp_a[i]) - cmper.begin(); } Loop (i,0,n-l+1) process(i); Loop (i,0,q) { Loop (j,0,n-l+1) cout << ans[i][j] << ' '; cout << '\n'; } }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...