#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 200'032;
const int S = 1024;
int a[N], b[N];
int q[N];
int n;
/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(int x, int y, int z, int w, int l, int r)
{
Loop (i,l,r) {
int v = a[i], u = b[i];
v ^= v <= x? u: 0;
v ^= v <= y? u: 0;
v ^= v <= z? u: 0;
v ^= v <= w? u: 0;
a[i] = v;
}
}
*/
void up(int,int,int,int,int,int);
asm("\n"
" .text\n"
" .p2align 4\n"
" .globl _Z2upiiiiii\n"
" .type _Z2upiiiiii, @function\n"
"_Z2upiiiiii:\n"
".myLFB9703:\n"
" .cfi_startproc\n"
" pushq %rbp\n"
" .cfi_def_cfa_offset 16\n"
" .cfi_offset 6, -16\n"
" movslq %r8d, %r8\n"
" movq %rsp, %rbp\n"
" .cfi_def_cfa_register 6\n"
" pushq %r15\n"
" pushq %r14\n"
" pushq %r13\n"
" pushq %r12\n"
" pushq %rbx\n"
" .cfi_offset 15, -24\n"
" .cfi_offset 14, -32\n"
" .cfi_offset 13, -40\n"
" .cfi_offset 12, -48\n"
" .cfi_offset 3, -56\n"
" movslq %r9d, %rbx\n"
" andq $-32, %rsp\n"
" movl %esi, -4(%rsp)\n"
" cmpq %rbx, %r8\n"
" jge .myL42\n"
" movl %ecx, %r11d\n"
" movq %rbx, %rcx\n"
" movl %edx, %r10d\n"
" movq %r8, %r15\n"
" subq %r8, %rcx\n"
" leaq -1(%rcx), %rax\n"
" cmpq $6, %rax\n"
" jbe .myL23\n"
" movq %rcx, %r13\n"
" leaq a(%rip), %r9\n"
" vmovd %edi, %xmm7\n"
" xorl %eax, %eax\n"
" shrq $3, %r13\n"
" leaq 0(,%r8,4), %rsi\n"
" leaq b(%rip), %r14\n"
" vpbroadcastd -4(%rsp), %ymm6\n"
" vmovd %r10d, %xmm5\n"
" salq $5, %r13\n"
" vmovd %r11d, %xmm4\n"
" leaq (%r9,%rsi), %rdx\n"
" leaq -32(%r13), %r12\n"
" addq %r14, %rsi\n"
" vpbroadcastd %xmm7, %ymm7\n"
" shrq $5, %r12\n"
" vpbroadcastd %xmm5, %ymm5\n"
" vpbroadcastd %xmm4, %ymm4\n"
" addq $1, %r12\n"
" andl $3, %r12d\n"
" je .myL4\n"
" cmpq $1, %r12\n"
" je .myL34\n"
" cmpq $2, %r12\n"
" je .myL35\n"
" vmovdqu (%rdx), %ymm2\n"
" vmovdqu (%rsi), %ymm0\n"
" movl $32, %eax\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu %ymm0, (%rdx)\n"
".myL35:\n"
" vmovdqu (%rdx,%rax), %ymm2\n"
" vmovdqu (%rsi,%rax), %ymm0\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu %ymm0, (%rdx,%rax)\n"
" addq $32, %rax\n"
".myL34:\n"
" vmovdqu (%rdx,%rax), %ymm2\n"
" vmovdqu (%rsi,%rax), %ymm0\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu %ymm0, (%rdx,%rax)\n"
" addq $32, %rax\n"
" cmpq %r13, %rax\n"
" je .myL40\n"
".myL4:\n"
" vmovdqu (%rdx,%rax), %ymm2\n"
" vmovdqu (%rsi,%rax), %ymm0\n"
" leaq 32(%rax), %r12\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu 32(%rdx,%rax), %ymm2\n"
" vmovdqu %ymm0, (%rdx,%rax)\n"
" vmovdqu 32(%rsi,%rax), %ymm0\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu 64(%rdx,%rax), %ymm2\n"
" vmovdqu %ymm0, 32(%rdx,%rax)\n"
" vmovdqu 64(%rsi,%rax), %ymm0\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu %ymm0, 64(%rdx,%rax)\n"
" vmovdqu 64(%rdx,%r12), %ymm2\n"
" leaq 96(%r12), %rax\n"
" vmovdqu 64(%rsi,%r12), %ymm0\n"
" vpcmpgtd %ymm7, %ymm2, %ymm3\n"
" vpxor %ymm0, %ymm2, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm6, %ymm1, %ymm3\n"
" vpxor %ymm1, %ymm0, %ymm2\n"
" vpblendvb %ymm3, %ymm1, %ymm2, %ymm2\n"
" vpcmpgtd %ymm5, %ymm2, %ymm3\n"
" vpxor %ymm2, %ymm0, %ymm1\n"
" vpblendvb %ymm3, %ymm2, %ymm1, %ymm1\n"
" vpcmpgtd %ymm4, %ymm1, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm0\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqu %ymm0, 64(%rdx,%r12)\n"
" cmpq %r13, %rax\n"
" jne .myL4\n"
".myL40:\n"
" movq %rcx, %rax\n"
" andq $-8, %rax\n"
" addq %rax, %r8\n"
" cmpq %rax, %rcx\n"
" je .myL45\n"
" vzeroupper\n"
".myL3:\n"
" subq %rax, %rcx\n"
" leaq -1(%rcx), %rdx\n"
" cmpq $2, %rdx\n"
" jbe .myL8\n"
" vmovd %edi, %xmm4\n"
" vmovd %r10d, %xmm5\n"
" vmovd %r11d, %xmm6\n"
" addq %r15, %rax\n"
" leaq (%r9,%rax,4), %rdx\n"
" vpshufd $0, %xmm4, %xmm2\n"
" vmovdqu (%r14,%rax,4), %xmm0\n"
" movq %rcx, %rax\n"
" vmovdqu (%rdx), %xmm3\n"
" andq $-4, %rax\n"
" addq %rax, %r8\n"
" vpcmpgtd %xmm2, %xmm3, %xmm2\n"
" vpxor %xmm0, %xmm3, %xmm1\n"
" vpblendvb %xmm2, %xmm3, %xmm1, %xmm1\n"
" vbroadcastss -4(%rsp), %xmm3\n"
" vpxor %xmm1, %xmm0, %xmm2\n"
" vpcmpgtd %xmm3, %xmm1, %xmm3\n"
" vpblendvb %xmm3, %xmm1, %xmm2, %xmm2\n"
" vpshufd $0, %xmm5, %xmm3\n"
" vpcmpgtd %xmm3, %xmm2, %xmm3\n"
" vpxor %xmm2, %xmm0, %xmm1\n"
" vpblendvb %xmm3, %xmm2, %xmm1, %xmm1\n"
" vpshufd $0, %xmm6, %xmm2\n"
" vpcmpgtd %xmm2, %xmm1, %xmm2\n"
" vpxor %xmm1, %xmm0, %xmm0\n"
" vpblendvb %xmm2, %xmm1, %xmm0, %xmm0\n"
" vmovdqu %xmm0, (%rdx)\n"
" cmpq %rax, %rcx\n"
" je .myL42\n"
".myL8:\n"
" movl (%r9,%r8,4), %eax\n"
" movl (%r14,%r8,4), %edx\n"
" movl -4(%rsp), %r15d\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %edi, %eax\n"
" cmovle %ecx, %eax\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %eax, %r15d\n"
" cmovge %ecx, %eax\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %eax, %r10d\n"
" cmovge %ecx, %eax\n"
" leaq 1(%r8), %rcx\n"
" xorl %eax, %edx\n"
" cmpl %eax, %r11d\n"
" cmovge %edx, %eax\n"
" movl %eax, (%r9,%r8,4)\n"
" cmpq %rcx, %rbx\n"
" jle .myL42\n"
" movl (%r9,%rcx,4), %eax\n"
" movl (%r14,%rcx,4), %edx\n"
" movl %eax, %esi\n"
" xorl %edx, %esi\n"
" cmpl %eax, %edi\n"
" cmovge %esi, %eax\n"
" movl %eax, %esi\n"
" xorl %edx, %esi\n"
" cmpl %eax, %r15d\n"
" cmovge %esi, %eax\n"
" movl %eax, %esi\n"
" xorl %edx, %esi\n"
" cmpl %eax, %r10d\n"
" cmovge %esi, %eax\n"
" xorl %eax, %edx\n"
" cmpl %eax, %r11d\n"
" cmovge %edx, %eax\n"
" addq $2, %r8\n"
" movl %eax, (%r9,%rcx,4)\n"
" cmpq %r8, %rbx\n"
" jle .myL42\n"
" movl (%r9,%r8,4), %eax\n"
" movl (%r14,%r8,4), %edx\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %eax, %edi\n"
" cmovge %ecx, %eax\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %eax, %r15d\n"
" cmovge %ecx, %eax\n"
" movl %eax, %ecx\n"
" xorl %edx, %ecx\n"
" cmpl %eax, %r10d\n"
" cmovge %ecx, %eax\n"
" xorl %eax, %edx\n"
" cmpl %eax, %r11d\n"
" cmovge %edx, %eax\n"
" movl %eax, (%r9,%r8,4)\n"
".myL42:\n"
" leaq -40(%rbp), %rsp\n"
" popq %rbx\n"
" popq %r12\n"
" popq %r13\n"
" popq %r14\n"
" popq %r15\n"
" popq %rbp\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL45:\n"
" .cfi_restore_state\n"
" vzeroupper\n"
" leaq -40(%rbp), %rsp\n"
" popq %rbx\n"
" popq %r12\n"
" popq %r13\n"
" popq %r14\n"
" popq %r15\n"
" popq %rbp\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
".myL23:\n"
" .cfi_restore_state\n"
" xorl %eax, %eax\n"
" leaq a(%rip), %r9\n"
" leaq b(%rip), %r14\n"
" jmp .myL3\n"
" .cfi_endproc\n"
".myLFE9703:\n"
" .size _Z2upiiiiii, .-_Z2upiiiiii\n"
);
int main()
{
cin.tie(0) -> sync_with_stdio(false);
int k;
cin >> n >> k;
Loop (i,0,n) {
cin >> a[i] >> b[i];
b[i] ^= a[i];
}
Loop (i,0,k)
cin >> q[i];
for (int l = 0; l < n; l += S) {
int r = min(n, l+S);
for (int i = 0; i < k; i += 4)
up(q[i+0], q[i+1], q[i+2], q[i+3], l, r);
}
ll ans = 0;
Loop (i,0,n)
ans += a[i];
cout << ans << '\n';
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
1 ms |
340 KB |
Output is correct |
3 |
Correct |
1 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
280 KB |
Output is correct |
6 |
Correct |
1 ms |
340 KB |
Output is correct |
7 |
Correct |
1 ms |
340 KB |
Output is correct |
8 |
Correct |
1 ms |
340 KB |
Output is correct |
9 |
Correct |
1 ms |
340 KB |
Output is correct |
10 |
Correct |
1 ms |
340 KB |
Output is correct |
11 |
Correct |
1 ms |
340 KB |
Output is correct |
12 |
Correct |
1 ms |
340 KB |
Output is correct |
13 |
Correct |
1 ms |
340 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
1 ms |
340 KB |
Output is correct |
3 |
Correct |
1 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
280 KB |
Output is correct |
6 |
Correct |
1 ms |
340 KB |
Output is correct |
7 |
Correct |
1 ms |
340 KB |
Output is correct |
8 |
Correct |
1 ms |
340 KB |
Output is correct |
9 |
Correct |
1 ms |
340 KB |
Output is correct |
10 |
Correct |
1 ms |
340 KB |
Output is correct |
11 |
Correct |
1 ms |
340 KB |
Output is correct |
12 |
Correct |
1 ms |
340 KB |
Output is correct |
13 |
Correct |
1 ms |
340 KB |
Output is correct |
14 |
Correct |
13 ms |
340 KB |
Output is correct |
15 |
Correct |
49 ms |
552 KB |
Output is correct |
16 |
Correct |
99 ms |
672 KB |
Output is correct |
17 |
Correct |
181 ms |
724 KB |
Output is correct |
18 |
Correct |
169 ms |
784 KB |
Output is correct |
19 |
Correct |
176 ms |
976 KB |
Output is correct |
20 |
Correct |
168 ms |
788 KB |
Output is correct |
21 |
Correct |
169 ms |
788 KB |
Output is correct |
22 |
Correct |
167 ms |
796 KB |
Output is correct |
23 |
Correct |
165 ms |
788 KB |
Output is correct |
24 |
Correct |
166 ms |
724 KB |
Output is correct |
25 |
Correct |
166 ms |
784 KB |
Output is correct |
26 |
Correct |
145 ms |
736 KB |
Output is correct |
27 |
Correct |
168 ms |
724 KB |
Output is correct |
28 |
Correct |
173 ms |
788 KB |
Output is correct |
29 |
Correct |
172 ms |
796 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
340 KB |
Output is correct |
2 |
Correct |
1 ms |
340 KB |
Output is correct |
3 |
Correct |
1 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
280 KB |
Output is correct |
6 |
Correct |
1 ms |
340 KB |
Output is correct |
7 |
Correct |
1 ms |
340 KB |
Output is correct |
8 |
Correct |
1 ms |
340 KB |
Output is correct |
9 |
Correct |
1 ms |
340 KB |
Output is correct |
10 |
Correct |
1 ms |
340 KB |
Output is correct |
11 |
Correct |
1 ms |
340 KB |
Output is correct |
12 |
Correct |
1 ms |
340 KB |
Output is correct |
13 |
Correct |
1 ms |
340 KB |
Output is correct |
14 |
Correct |
13 ms |
340 KB |
Output is correct |
15 |
Correct |
49 ms |
552 KB |
Output is correct |
16 |
Correct |
99 ms |
672 KB |
Output is correct |
17 |
Correct |
181 ms |
724 KB |
Output is correct |
18 |
Correct |
169 ms |
784 KB |
Output is correct |
19 |
Correct |
176 ms |
976 KB |
Output is correct |
20 |
Correct |
168 ms |
788 KB |
Output is correct |
21 |
Correct |
169 ms |
788 KB |
Output is correct |
22 |
Correct |
167 ms |
796 KB |
Output is correct |
23 |
Correct |
165 ms |
788 KB |
Output is correct |
24 |
Correct |
166 ms |
724 KB |
Output is correct |
25 |
Correct |
166 ms |
784 KB |
Output is correct |
26 |
Correct |
145 ms |
736 KB |
Output is correct |
27 |
Correct |
168 ms |
724 KB |
Output is correct |
28 |
Correct |
173 ms |
788 KB |
Output is correct |
29 |
Correct |
172 ms |
796 KB |
Output is correct |
30 |
Correct |
221 ms |
1172 KB |
Output is correct |
31 |
Correct |
1018 ms |
1480 KB |
Output is correct |
32 |
Correct |
1990 ms |
1880 KB |
Output is correct |
33 |
Execution timed out |
3082 ms |
2636 KB |
Time limit exceeded |
34 |
Halted |
0 ms |
0 KB |
- |