제출 #644659

#제출 시각아이디문제언어결과실행 시간메모리
644659ymm운세 보기 2 (JOI14_fortune_telling2)C++17
35 / 100
3075 ms2444 KiB
#include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; const int N = 400'000; const int S = 2000; int a[N]; int q[N]; int n; /* __attribute__((optimize("O3,unroll-loops"),target("avx2"))) void up(int x, int y, int z, int l, int r) { typedef int ymm __attribute((vector_size(32),aligned(32))); ymm *const b = (ymm*)a; for (int i = l/8; i < r/8; i += 2) { auto v = b[i], u = b[i+1]; v ^= v <= x? u: 0; v ^= v <= y? u: 0; v ^= v <= z? u: 0; b[i] = v; } } */ void up(int,int,int,int,int); asm("\n" " .text\n" " .p2align 4\n" " .globl _Z2upiiiii\n" " .type _Z2upiiiii, @function\n" "_Z2upiiiii:\n" ".myLFB9897:\n" " .cfi_startproc\n" " movl %edx, %eax\n" " movl %ecx, %edx\n" " leal 7(%rcx), %ecx\n" " testl %edx, %edx\n" " cmovns %edx, %ecx\n" " leal 7(%r8), %edx\n" " sarl $3, %ecx\n" " testl %r8d, %r8d\n" " cmovns %r8d, %edx\n" " sarl $3, %edx\n" " cmpl %edx, %ecx\n" " jge .myL21\n" " vmovd %esi, %xmm4\n" " subl $1, %edx\n" " vmovd %edi, %xmm5\n" " movslq %ecx, %rsi\n" " subq %rcx, %rdx\n" " leaq a(%rip), %rdi\n" " vmovd %eax, %xmm3\n" " movq %rsi, %rax\n" " andl $4294967294, %edx\n" " leaq 64(%rdi), %rcx\n" " salq $5, %rax\n" " vpbroadcastd %xmm5, %ymm5\n" " addq %rsi, %rdx\n" " vpbroadcastd %xmm4, %ymm4\n" " vpbroadcastd %xmm3, %ymm3\n" " addq %rdi, %rax\n" " salq $5, %rdx\n" " addq %rcx, %rdx\n" " movq %rdx, %rcx\n" " subq %rax, %rcx\n" " subq $64, %rcx\n" " shrq $6, %rcx\n" " addq $1, %rcx\n" " andl $3, %ecx\n" " je .myL3\n" " cmpq $1, %rcx\n" " je .myL15\n" " cmpq $2, %rcx\n" " je .myL16\n" " vmovdqa (%rax), %ymm0\n" " vmovdqa 32(%rax), %ymm2\n" " addq $64, %rax\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, -64(%rax)\n" ".myL16:\n" " vmovdqa (%rax), %ymm0\n" " vmovdqa 32(%rax), %ymm2\n" " addq $64, %rax\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, -64(%rax)\n" ".myL15:\n" " vmovdqa (%rax), %ymm0\n" " vmovdqa 32(%rax), %ymm2\n" " addq $64, %rax\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, -64(%rax)\n" " cmpq %rax, %rdx\n" " je .myL22\n" ".myL3:\n" " vmovdqa (%rax), %ymm0\n" " vmovdqa 32(%rax), %ymm2\n" " leaq 64(%rax), %rcx\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vmovdqa 96(%rax), %ymm2\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, (%rax)\n" " vmovdqa 64(%rax), %ymm0\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vmovdqa 160(%rax), %ymm2\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, 64(%rax)\n" " vmovdqa 128(%rax), %ymm0\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, 128(%rax)\n" " vmovdqa 128(%rcx), %ymm0\n" " vmovdqa 160(%rcx), %ymm2\n" " leaq 192(%rcx), %rax\n" " vpcmpgtd %ymm5, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm4, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vpcmpgtd %ymm3, %ymm0, %ymm1\n" " vpandn %ymm2, %ymm1, %ymm1\n" " vpxor %ymm1, %ymm0, %ymm0\n" " vmovdqa %ymm0, 128(%rcx)\n" " cmpq %rax, %rdx\n" " jne .myL3\n" ".myL22:\n" " vzeroupper\n" ".myL21:\n" " ret\n" " .cfi_endproc\n" ".myLFE9897:\n" " .size _Z2upiiiii, .-_Z2upiiiii\n" ); int main() { cin.tie(0) -> sync_with_stdio(false); int k; cin >> n >> k; Loop (i,0,n) { int x, y; cin >> x >> y; y ^= x; a[i/8*16 + i%8] = x; a[i/8*16 + i%8 + 8] = y; } Loop (i,0,k) cin >> q[i]; for (int l = 0; l < N; l += S) { for (int i = 0; i < k; i += 3) up(q[i+0], q[i+1], q[i+2], l, l+S); } ll ans = 0; Loop (i,0,n) ans += a[i/8*16 + i%8]; cout << ans << '\n'; }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...