#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 400'000;
const int S = 2000;
int a[N];
int q[N];
int n;
/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(int x, int y, int z, int l, int r)
{
typedef int ymm __attribute((vector_size(32),aligned(32)));
ymm *const b = (ymm*)a;
for (int i = l/8; i < r/8; i += 2) {
auto v = b[i], u = b[i+1];
v ^= v <= x? u: 0;
v ^= v <= y? u: 0;
v ^= v <= z? u: 0;
b[i] = v;
}
}
*/
void up(int,int,int,int,int);
asm("\n"
" .text\n"
" .p2align 4\n"
" .globl _Z2upiiiii\n"
" .type _Z2upiiiii, @function\n"
"_Z2upiiiii:\n"
".myLFB9897:\n"
" .cfi_startproc\n"
" movl %edx, %eax\n"
" movl %ecx, %edx\n"
" leal 7(%rcx), %ecx\n"
" testl %edx, %edx\n"
" cmovns %edx, %ecx\n"
" leal 7(%r8), %edx\n"
" sarl $3, %ecx\n"
" testl %r8d, %r8d\n"
" cmovns %r8d, %edx\n"
" sarl $3, %edx\n"
" cmpl %edx, %ecx\n"
" jge .myL21\n"
" vmovd %esi, %xmm4\n"
" subl $1, %edx\n"
" vmovd %edi, %xmm5\n"
" movslq %ecx, %rsi\n"
" subq %rcx, %rdx\n"
" leaq a(%rip), %rdi\n"
" vmovd %eax, %xmm3\n"
" movq %rsi, %rax\n"
" andl $4294967294, %edx\n"
" leaq 64(%rdi), %rcx\n"
" salq $5, %rax\n"
" vpbroadcastd %xmm5, %ymm5\n"
" addq %rsi, %rdx\n"
" vpbroadcastd %xmm4, %ymm4\n"
" vpbroadcastd %xmm3, %ymm3\n"
" addq %rdi, %rax\n"
" salq $5, %rdx\n"
" addq %rcx, %rdx\n"
" movq %rdx, %rcx\n"
" subq %rax, %rcx\n"
" subq $64, %rcx\n"
" shrq $6, %rcx\n"
" addq $1, %rcx\n"
" andl $3, %ecx\n"
" je .myL3\n"
" cmpq $1, %rcx\n"
" je .myL15\n"
" cmpq $2, %rcx\n"
" je .myL16\n"
" vmovdqa (%rax), %ymm0\n"
" vmovdqa 32(%rax), %ymm2\n"
" addq $64, %rax\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, -64(%rax)\n"
".myL16:\n"
" vmovdqa (%rax), %ymm0\n"
" vmovdqa 32(%rax), %ymm2\n"
" addq $64, %rax\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, -64(%rax)\n"
".myL15:\n"
" vmovdqa (%rax), %ymm0\n"
" vmovdqa 32(%rax), %ymm2\n"
" addq $64, %rax\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, -64(%rax)\n"
" cmpq %rax, %rdx\n"
" je .myL22\n"
".myL3:\n"
" vmovdqa (%rax), %ymm0\n"
" vmovdqa 32(%rax), %ymm2\n"
" leaq 64(%rax), %rcx\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vmovdqa 96(%rax), %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, (%rax)\n"
" vmovdqa 64(%rax), %ymm0\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vmovdqa 160(%rax), %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 64(%rax)\n"
" vmovdqa 128(%rax), %ymm0\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 128(%rax)\n"
" vmovdqa 128(%rcx), %ymm0\n"
" vmovdqa 160(%rcx), %ymm2\n"
" leaq 192(%rcx), %rax\n"
" vpcmpgtd %ymm5, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm4, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm3, %ymm0, %ymm1\n"
" vpandn %ymm2, %ymm1, %ymm1\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 128(%rcx)\n"
" cmpq %rax, %rdx\n"
" jne .myL3\n"
".myL22:\n"
" vzeroupper\n"
".myL21:\n"
" ret\n"
" .cfi_endproc\n"
".myLFE9897:\n"
" .size _Z2upiiiii, .-_Z2upiiiii\n"
);
int main()
{
cin.tie(0) -> sync_with_stdio(false);
int k;
cin >> n >> k;
Loop (i,0,n) {
int x, y;
cin >> x >> y;
y ^= x;
a[i/8*16 + i%8] = x;
a[i/8*16 + i%8 + 8] = y;
}
Loop (i,0,k)
cin >> q[i];
for (int l = 0; l < N; l += S) {
for (int i = 0; i < k; i += 3)
up(q[i+0], q[i+1], q[i+2], l, l+S);
}
ll ans = 0;
Loop (i,0,n)
ans += a[i/8*16 + i%8];
cout << ans << '\n';
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
18 ms |
1876 KB |
Output is correct |
2 |
Correct |
18 ms |
1800 KB |
Output is correct |
3 |
Correct |
18 ms |
1836 KB |
Output is correct |
4 |
Correct |
19 ms |
1876 KB |
Output is correct |
5 |
Correct |
18 ms |
1876 KB |
Output is correct |
6 |
Correct |
18 ms |
1784 KB |
Output is correct |
7 |
Correct |
18 ms |
1876 KB |
Output is correct |
8 |
Correct |
18 ms |
1780 KB |
Output is correct |
9 |
Correct |
18 ms |
1876 KB |
Output is correct |
10 |
Correct |
18 ms |
1800 KB |
Output is correct |
11 |
Correct |
23 ms |
1880 KB |
Output is correct |
12 |
Correct |
20 ms |
1784 KB |
Output is correct |
13 |
Correct |
20 ms |
1792 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
18 ms |
1876 KB |
Output is correct |
2 |
Correct |
18 ms |
1800 KB |
Output is correct |
3 |
Correct |
18 ms |
1836 KB |
Output is correct |
4 |
Correct |
19 ms |
1876 KB |
Output is correct |
5 |
Correct |
18 ms |
1876 KB |
Output is correct |
6 |
Correct |
18 ms |
1784 KB |
Output is correct |
7 |
Correct |
18 ms |
1876 KB |
Output is correct |
8 |
Correct |
18 ms |
1780 KB |
Output is correct |
9 |
Correct |
18 ms |
1876 KB |
Output is correct |
10 |
Correct |
18 ms |
1800 KB |
Output is correct |
11 |
Correct |
23 ms |
1880 KB |
Output is correct |
12 |
Correct |
20 ms |
1784 KB |
Output is correct |
13 |
Correct |
20 ms |
1792 KB |
Output is correct |
14 |
Correct |
173 ms |
1868 KB |
Output is correct |
15 |
Correct |
350 ms |
1948 KB |
Output is correct |
16 |
Correct |
526 ms |
1908 KB |
Output is correct |
17 |
Correct |
695 ms |
2132 KB |
Output is correct |
18 |
Correct |
693 ms |
2000 KB |
Output is correct |
19 |
Correct |
672 ms |
1948 KB |
Output is correct |
20 |
Correct |
679 ms |
1996 KB |
Output is correct |
21 |
Correct |
687 ms |
1952 KB |
Output is correct |
22 |
Correct |
688 ms |
2028 KB |
Output is correct |
23 |
Correct |
687 ms |
1996 KB |
Output is correct |
24 |
Correct |
679 ms |
1988 KB |
Output is correct |
25 |
Correct |
684 ms |
1916 KB |
Output is correct |
26 |
Correct |
687 ms |
1960 KB |
Output is correct |
27 |
Correct |
676 ms |
2016 KB |
Output is correct |
28 |
Correct |
679 ms |
2032 KB |
Output is correct |
29 |
Correct |
677 ms |
1940 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
18 ms |
1876 KB |
Output is correct |
2 |
Correct |
18 ms |
1800 KB |
Output is correct |
3 |
Correct |
18 ms |
1836 KB |
Output is correct |
4 |
Correct |
19 ms |
1876 KB |
Output is correct |
5 |
Correct |
18 ms |
1876 KB |
Output is correct |
6 |
Correct |
18 ms |
1784 KB |
Output is correct |
7 |
Correct |
18 ms |
1876 KB |
Output is correct |
8 |
Correct |
18 ms |
1780 KB |
Output is correct |
9 |
Correct |
18 ms |
1876 KB |
Output is correct |
10 |
Correct |
18 ms |
1800 KB |
Output is correct |
11 |
Correct |
23 ms |
1880 KB |
Output is correct |
12 |
Correct |
20 ms |
1784 KB |
Output is correct |
13 |
Correct |
20 ms |
1792 KB |
Output is correct |
14 |
Correct |
173 ms |
1868 KB |
Output is correct |
15 |
Correct |
350 ms |
1948 KB |
Output is correct |
16 |
Correct |
526 ms |
1908 KB |
Output is correct |
17 |
Correct |
695 ms |
2132 KB |
Output is correct |
18 |
Correct |
693 ms |
2000 KB |
Output is correct |
19 |
Correct |
672 ms |
1948 KB |
Output is correct |
20 |
Correct |
679 ms |
1996 KB |
Output is correct |
21 |
Correct |
687 ms |
1952 KB |
Output is correct |
22 |
Correct |
688 ms |
2028 KB |
Output is correct |
23 |
Correct |
687 ms |
1996 KB |
Output is correct |
24 |
Correct |
679 ms |
1988 KB |
Output is correct |
25 |
Correct |
684 ms |
1916 KB |
Output is correct |
26 |
Correct |
687 ms |
1960 KB |
Output is correct |
27 |
Correct |
676 ms |
2016 KB |
Output is correct |
28 |
Correct |
679 ms |
2032 KB |
Output is correct |
29 |
Correct |
677 ms |
1940 KB |
Output is correct |
30 |
Execution timed out |
3075 ms |
2444 KB |
Time limit exceeded |
31 |
Halted |
0 ms |
0 KB |
- |