This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 2000*100;
const int S0 = 2000*16;
const int S1 = 2000;
pii ab[N];
unsigned short sa[N], sb[N];
int q[N+32];
unsigned short sq[N];
int n;
/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r)
{
l *= S1; r *= S1;
Loop (i,l,r) {
unsigned short v = sa[i], u = sb[i];
v ^= v <= x? u: 0;
v ^= v <= y? u: 0;
v ^= v <= z? u: 0;
sa[i] = v;
}
}
*/
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r);
asm("\n"
" .p2align 4\n"
" .globl _Z2uptttii\n"
" .type _Z2uptttii, @function\n"
"_Z2uptttii:\n"
".myLFB9901:\n"
" .cfi_startproc\n"
" movl %edi, %eax\n"
" imull $2000, %ecx, %r10d\n"
" movl %esi, %edi\n"
" movl %edx, %r9d\n"
" imull $2000, %r8d, %esi\n"
" cmpl %r8d, %ecx\n"
" jge .myL103\n"
" movslq %esi, %rsi\n"
" movslq %r10d, %r10\n"
" vmovd %edi, %xmm5\n"
" movslq %ecx, %rdx\n"
" vmovd %eax, %xmm6\n"
" subq %r10, %rsi\n"
" leaq sa(%rip), %rcx\n"
" xorl %eax, %eax\n"
" vmovd %r9d, %xmm4\n"
" vpbroadcastw %xmm6, %ymm6\n"
" vpxor %xmm3, %xmm3, %xmm3\n"
" addq %rsi, %rsi\n"
" imulq $4000, %rdx, %rdx\n"
" leaq -32(%rsi), %rdi\n"
" vpbroadcastw %xmm5, %ymm5\n"
" shrq $5, %rdi\n"
" leaq sb(%rip), %r8\n"
" vpbroadcastw %xmm4, %ymm4\n"
" addq $1, %rdi\n"
" addq %rdx, %rcx\n"
" addq %r8, %rdx\n"
" andl $3, %edi\n"
" je .myL85\n"
" cmpq $1, %rdi\n"
" je .myL97\n"
" cmpq $2, %rdi\n"
" je .myL98\n"
" vmovdqa (%rcx), %ymm0\n"
" vmovdqa (%rdx), %ymm1\n"
" movl $32, %eax\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, (%rcx)\n"
".myL98:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rdx,%rax), %ymm1\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, (%rcx,%rax)\n"
" addq $32, %rax\n"
".myL97:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rdx,%rax), %ymm1\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, (%rcx,%rax)\n"
" addq $32, %rax\n"
" cmpq %rsi, %rax\n"
" je .myL104\n"
".myL85:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rdx,%rax), %ymm1\n"
" leaq 32(%rax), %rdi\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, (%rcx,%rax)\n"
" vmovdqa 32(%rcx,%rax), %ymm0\n"
" vmovdqa 32(%rdx,%rax), %ymm1\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 32(%rcx,%rax)\n"
" vmovdqa 64(%rcx,%rax), %ymm0\n"
" vmovdqa 64(%rdx,%rax), %ymm1\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 64(%rcx,%rax)\n"
" vmovdqa 64(%rcx,%rdi), %ymm0\n"
" vmovdqa 64(%rdx,%rdi), %ymm1\n"
" leaq 96(%rdi), %rax\n"
" vpsubusw %ymm6, %ymm0, %ymm2\n"
" vpxor %ymm1, %ymm0, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm5, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm7\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm7, %ymm0, %ymm0\n"
" vpsubusw %ymm4, %ymm0, %ymm2\n"
" vpxor %ymm0, %ymm1, %ymm1\n"
" vpcmpeqw %ymm3, %ymm2, %ymm2\n"
" vpblendvb %ymm2, %ymm1, %ymm0, %ymm0\n"
" vmovdqa %ymm0, 64(%rcx,%rdi)\n"
" cmpq %rsi, %rax\n"
" jne .myL85\n"
".myL104:\n"
" vzeroupper\n"
".myL103:\n"
" ret\n"
" .cfi_endproc\n"
".myLFE9901:\n"
" .size _Z2uptttii, .-_Z2uptttii\n"
);
int main()
{
cin.tie(0) -> sync_with_stdio(false);
int k;
cin >> n >> k;
Loop (i,0,n)
cin >> ab[i].first >> ab[i].second;
Loop (i,0,k)
cin >> q[i];
mt19937_64 rd(time(0));
shuffle(ab, ab+N, rd);
ll ans = 0;
for (int l0 = 0; l0 < N; l0 += S0) {
int r0 = min(N, l0+S0);
vector<int> vec = {0};
Loop (i,l0,r0) {
vec.push_back(ab[i].first);
vec.push_back(ab[i].second);
}
sort(vec.begin(), vec.end());
vec.resize(unique(vec.begin(), vec.end()) - vec.begin());
Loop (i,l0,r0) {
sa[i] = lower_bound(vec.begin(), vec.end(), ab[i].first) - vec.begin();
sb[i] = lower_bound(vec.begin(), vec.end(), ab[i].second) - vec.begin();
sb[i] ^= sa[i];
}
Loop (i,0,k)
sq[i] = upper_bound(vec.begin(), vec.end(), q[i]) - vec.begin() - 1;
for (int l1 = l0; l1 < r0; l1 += S1) {
int r1 = min(r0, l1+S1);
for (int i = 0; i < k; i += 3)
up(sq[i+0], sq[i+1], sq[i+2], l1/S1, r1/S1);
}
Loop (i,l0,r0)
ans += vec[sa[i]];
}
cout << ans << '\n';
}
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |