제출 #644726

#제출 시각아이디문제언어결과실행 시간메모리
644726ymmFortune Telling 2 (JOI14_fortune_telling2)C++17
35 / 100
2766 ms4204 KiB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 2000*100;
const int S0 = 2000*16;
const int S1 = 2000;
pii ab[N];
unsigned short sa[N], sb[N];
int q[N+32];
unsigned short sq[N];
int n;

/*
__attribute__((optimize("O3,unroll-loops"),target("avx2")))
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r)
{
	l *= S1; r *= S1;
	Loop (i,l,r) {
		unsigned short v = sa[i], u = sb[i];
		v ^= v <= x? u: 0;
		v ^= v <= y? u: 0;
		v ^= v <= z? u: 0;
		sa[i] = v;
	}
}
*/
void up(unsigned short x, unsigned short y, unsigned short z, int l, int r);

asm("\n"
"	.p2align 4\n"
"	.globl	_Z2uptttii\n"
"	.type	_Z2uptttii, @function\n"
"_Z2uptttii:\n"
".myLFB9901:\n"
"	.cfi_startproc\n"
"	movl	%edi, %eax\n"
"	imull	$2000, %ecx, %r10d\n"
"	movl	%esi, %edi\n"
"	movl	%edx, %r9d\n"
"	imull	$2000, %r8d, %esi\n"
"	cmpl	%r8d, %ecx\n"
"	jge	.myL103\n"
"	movslq	%esi, %rsi\n"
"	movslq	%r10d, %r10\n"
"	vmovd	%edi, %xmm5\n"
"	movslq	%ecx, %rdx\n"
"	vmovd	%eax, %xmm6\n"
"	subq	%r10, %rsi\n"
"	leaq	sa(%rip), %rcx\n"
"	xorl	%eax, %eax\n"
"	vmovd	%r9d, %xmm4\n"
"	vpbroadcastw	%xmm6, %ymm6\n"
"	vpxor	%xmm3, %xmm3, %xmm3\n"
"	addq	%rsi, %rsi\n"
"	imulq	$4000, %rdx, %rdx\n"
"	leaq	-32(%rsi), %rdi\n"
"	vpbroadcastw	%xmm5, %ymm5\n"
"	shrq	$5, %rdi\n"
"	leaq	sb(%rip), %r8\n"
"	vpbroadcastw	%xmm4, %ymm4\n"
"	addq	$1, %rdi\n"
"	addq	%rdx, %rcx\n"
"	addq	%r8, %rdx\n"
"	andl	$3, %edi\n"
"	je	.myL85\n"
"	cmpq	$1, %rdi\n"
"	je	.myL97\n"
"	cmpq	$2, %rdi\n"
"	je	.myL98\n"
"	vmovdqa	(%rcx), %ymm0\n"
"	vmovdqa	(%rdx), %ymm1\n"
"	movl	$32, %eax\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx)\n"
".myL98:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	addq	$32, %rax\n"
".myL97:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	addq	$32, %rax\n"
"	cmpq	%rsi, %rax\n"
"	je	.myL104\n"
".myL85:\n"
"	vmovdqa	(%rcx,%rax), %ymm0\n"
"	vmovdqa	(%rdx,%rax), %ymm1\n"
"	leaq	32(%rax), %rdi\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, (%rcx,%rax)\n"
"	vmovdqa	32(%rcx,%rax), %ymm0\n"
"	vmovdqa	32(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 32(%rcx,%rax)\n"
"	vmovdqa	64(%rcx,%rax), %ymm0\n"
"	vmovdqa	64(%rdx,%rax), %ymm1\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 64(%rcx,%rax)\n"
"	vmovdqa	64(%rcx,%rdi), %ymm0\n"
"	vmovdqa	64(%rdx,%rdi), %ymm1\n"
"	leaq	96(%rdi), %rax\n"
"	vpsubusw	%ymm6, %ymm0, %ymm2\n"
"	vpxor	%ymm1, %ymm0, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm5, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm7\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm7, %ymm0, %ymm0\n"
"	vpsubusw	%ymm4, %ymm0, %ymm2\n"
"	vpxor	%ymm0, %ymm1, %ymm1\n"
"	vpcmpeqw	%ymm3, %ymm2, %ymm2\n"
"	vpblendvb	%ymm2, %ymm1, %ymm0, %ymm0\n"
"	vmovdqa	%ymm0, 64(%rcx,%rdi)\n"
"	cmpq	%rsi, %rax\n"
"	jne	.myL85\n"
".myL104:\n"
"	vzeroupper\n"
".myL103:\n"
"	ret\n"
"	.cfi_endproc\n"
".myLFE9901:\n"
"	.size	_Z2uptttii, .-_Z2uptttii\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	int k;
	cin >> n >> k;
	Loop (i,0,n)
		cin >> ab[i].first >> ab[i].second;
	Loop (i,0,k)
		cin >> q[i];
	mt19937_64 rd(time(0));
	shuffle(ab, ab+N, rd);
	ll ans = 0;
	for (int l0 = 0; l0 < N; l0 += S0) {
		int r0 = min(N, l0+S0);
		vector<int> vec = {0};
		Loop (i,l0,r0) {
			vec.push_back(ab[i].first);
			vec.push_back(ab[i].second);
		}
		sort(vec.begin(), vec.end());
		vec.resize(unique(vec.begin(), vec.end()) - vec.begin());
		Loop (i,l0,r0) {
			sa[i] = lower_bound(vec.begin(), vec.end(), ab[i].first) - vec.begin();
			sb[i] = lower_bound(vec.begin(), vec.end(), ab[i].second) - vec.begin();
			sb[i] ^= sa[i];
		}
		Loop (i,0,k)
			sq[i] = upper_bound(vec.begin(), vec.end(), q[i]) - vec.begin() - 1;
		for (int l1 = l0; l1 < r0; l1 += S1) {
			int r1 = min(r0, l1+S1);
			for (int i = 0; i < k; i += 3)
				up(sq[i+0], sq[i+1], sq[i+2], l1/S1, r1/S1);
		}
		Loop (i,l0,r0)
			ans += vec[sa[i]];
	}
	cout << ans << '\n';
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...