Submission #650315

#TimeUsernameProblemLanguageResultExecution timeMemory
650315ymmBubble Sort 2 (JOI18_bubblesort2)C++17
60 / 100
9030 ms22340 KiB
#include "bubblesort2.h"
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 500'032;
const int Sa = 2048, Sq = 8192;
int cnt_gt[N];
int val[N];
int mx_qrange[Sq];
int added_to[N];
vector<int> ans;
int n, q, qx[N], qy[N];

asm("\n"
"	.section	.rodata.cst32,\"aM\",@progbits,32\n"
"	.align 32\n"
".myLC0:\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.long	-2147483648\n"
"	.section	.rodata.cst16,\"aM\",@progbits,16\n"
"	.align 16\n"
".myLC1:\n"
"	.long	1\n"
"	.long	1\n"
"	.long	1\n"
"	.long	1\n"
"	.align 16\n"
".myLC2:\n"
"	.long	-2147483648\n"
"	.long	0\n"
"	.long	0\n"
"	.long	0\n"
"	.section	.text\n"
);


int fen[N];
void fen_add(int i, int x)
{
	++i;
	while (i < N) {
		fen[i] += x;
		i += i & -i;
	}
}
int fen_get(int r)
{
	int ans = 0;
	while (r > 0) {
		ans += fen[r];
		r -= r & -r;
	}
	return ans;
}

void init()
{
	vector<int> cmp(val, val+n);
	sort(cmp.begin(), cmp.end());
	cmp.resize(unique(cmp.begin(), cmp.end()) - cmp.begin());
	int mx = cmp.size() - 1;
	Loop (i,0,n) {
		int j =   lower_bound(cmp.begin(), cmp.end(), val[i])
		        - cmp.begin();
		cnt_gt[i] = fen_get(mx-j);
		fen_add(mx-j, 1);
	}
}

pii do_cnt_gt(int l, int r, int x);
int do_sub_inrange(int l, int r, int xl, int xr);
int do_add_inrange(int l, int r, int xl, int xr);
asm("\n"
"	.p2align 4\n"
"	.globl	_Z9do_cnt_gtiii\n"
"	.type	_Z9do_cnt_gtiii, @function\n"
"_Z9do_cnt_gtiii:\n"
".myLFB9910:\n"
"	.cfi_startproc\n"
"	movslq	%esi, %r10\n"
"	movslq	%edi, %rsi\n"
"	cmpq	%r10, %rsi\n"
"	jge	.myL24\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movq	%r10, %rcx\n"
"	movl	%edx, %r9d\n"
"	subq	%rsi, %rcx\n"
"	leaq	-1(%rcx), %rax\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	.cfi_offset 13, -24\n"
"	.cfi_offset 12, -32\n"
"	movq	%rsi, %r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 3, -40\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL25\n"
"	movq	%rcx, %r13\n"
"	leaq	val(%rip), %r11\n"
"	vmovd	%r9d, %xmm3\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r13\n"
"	leaq	0(,%rsi,4), %rdx\n"
"	leaq	cnt_gt(%rip), %rbx\n"
"	vmovdqa	.myLC0(%rip), %ymm2\n"
"	salq	$5, %r13\n"
"	leaq	(%r11,%rdx), %r8\n"
"	vpbroadcastd	%xmm3, %ymm3\n"
"	addq	%rbx, %rdx\n"
"	leaq	-32(%r13), %rdi\n"
"	vpxor	%xmm1, %xmm1, %xmm1\n"
"	shrq	$5, %rdi\n"
"	addq	$1, %rdi\n"
"	andl	$7, %edi\n"
"	je	.myL19\n"
"	cmpq	$1, %rdi\n"
"	je	.myL48\n"
"	cmpq	$2, %rdi\n"
"	je	.myL49\n"
"	cmpq	$3, %rdi\n"
"	je	.myL50\n"
"	cmpq	$4, %rdi\n"
"	je	.myL51\n"
"	cmpq	$5, %rdi\n"
"	je	.myL52\n"
"	cmpq	$6, %rdi\n"
"	jne	.myL69\n"
".myL53:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
".myL52:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
".myL51:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
".myL50:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
".myL49:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
".myL48:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rax, %r13\n"
"	je	.myL62\n"
".myL19:\n"
"	vmovdqu	(%r8,%rax), %ymm0\n"
"	leaq	32(%rax), %rdi\n"
"	vpmaxsd	(%rdx,%rax), %ymm2, %ymm2\n"
"	vpmaxsd	32(%rdx,%rax), %ymm2, %ymm2\n"
"	vpmaxsd	64(%rdx,%rax), %ymm2, %ymm2\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpmaxsd	64(%rdx,%rdi), %ymm2, %ymm2\n"
"	vpmaxsd	96(%rdx,%rdi), %ymm2, %ymm2\n"
"	vpmaxsd	128(%rdx,%rdi), %ymm2, %ymm2\n"
"	vpmaxsd	160(%rdx,%rdi), %ymm2, %ymm2\n"
"	vpmaxsd	192(%rdx,%rdi), %ymm2, %ymm2\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	32(%r8,%rax), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r8,%rax), %ymm0\n"
"	leaq	224(%rdi), %rax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	64(%r8,%rdi), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	96(%r8,%rdi), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	128(%r8,%rdi), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	160(%r8,%rdi), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	vmovdqu	192(%r8,%rdi), %ymm0\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	cmpq	%rax, %r13\n"
"	jne	.myL19\n"
".myL62:\n"
"	vextracti128	$0x1, %ymm2, %xmm0\n"
"	movq	%rcx, %rdi\n"
"	vpmaxsd	%xmm2, %xmm0, %xmm0\n"
"	andq	$-8, %rdi\n"
"	vpsrldq	$8, %xmm0, %xmm2\n"
"	addq	%rdi, %rsi\n"
"	vpmaxsd	%xmm2, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm2\n"
"	vpmaxsd	%xmm2, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %edx\n"
"	vmovdqa	%xmm1, %xmm0\n"
"	vextracti128	$0x1, %ymm1, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%edx, %xmm2\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	cmpq	%rdi, %rcx\n"
"	je	.myL70\n"
"	vzeroupper\n"
".myL18:\n"
"	subq	%rdi, %rcx\n"
"	movq	%rcx, %r8\n"
"	leaq	-1(%rcx), %rcx\n"
"	cmpq	$2, %rcx\n"
"	jbe	.myL22\n"
"	vmovd	%edx, %xmm4\n"
"	addq	%r12, %rdi\n"
"	vmovd	%r9d, %xmm5\n"
"	vpshufd	$0, %xmm4, %xmm1\n"
"	vpmaxsd	(%rbx,%rdi,4), %xmm1, %xmm1\n"
"	vmovdqu	(%r11,%rdi,4), %xmm6\n"
"	vpshufd	$0, %xmm5, %xmm0\n"
"	vpsrldq	$8, %xmm1, %xmm2\n"
"	vpcmpgtd	%xmm0, %xmm6, %xmm0\n"
"	vpand	.myLC1(%rip), %xmm0, %xmm0\n"
"	vpmaxsd	%xmm2, %xmm1, %xmm1\n"
"	vpsrldq	$4, %xmm1, %xmm2\n"
"	vpmaxsd	%xmm2, %xmm1, %xmm1\n"
"	vmovd	%xmm1, %edx\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%edx, %xmm2\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpaddd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %ecx\n"
"	addl	%ecx, %eax\n"
"	movq	%r8, %rcx\n"
"	andq	$-4, %rcx\n"
"	addq	%rcx, %rsi\n"
"	cmpq	%rcx, %r8\n"
"	je	.myL17\n"
".myL22:\n"
"	xorl	%edx, %edx\n"
"	vmovd	(%rbx,%rsi,4), %xmm0\n"
"	leaq	1(%rsi), %rcx\n"
"	cmpl	%r9d, (%r11,%rsi,4)\n"
"	setg	%dl\n"
"	vpmaxsd	%xmm0, %xmm2, %xmm2\n"
"	addl	%edx, %eax\n"
"	vmovd	%xmm2, %edx\n"
"	cmpq	%r10, %rcx\n"
"	jge	.myL17\n"
"	xorl	%edx, %edx\n"
"	vmovd	(%rbx,%rcx,4), %xmm0\n"
"	cmpl	(%r11,%rcx,4), %r9d\n"
"	setl	%dl\n"
"	vpmaxsd	%xmm0, %xmm2, %xmm2\n"
"	addq	$2, %rsi\n"
"	addl	%edx, %eax\n"
"	vmovd	%xmm2, %edx\n"
"	cmpq	%rsi, %r10\n"
"	jle	.myL17\n"
"	xorl	%edx, %edx\n"
"	vmovd	(%rbx,%rsi,4), %xmm0\n"
"	cmpl	(%r11,%rsi,4), %r9d\n"
"	setl	%dl\n"
"	vpmaxsd	%xmm0, %xmm2, %xmm2\n"
"	addl	%edx, %eax\n"
"	vmovd	%xmm2, %edx\n"
".myL17:\n"
"	salq	$32, %rdx\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	orq	%rdx, %rax\n"
"	popq	%r13\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL69:\n"
"	.cfi_restore_state\n"
"	vmovdqu	(%r8), %ymm0\n"
"	vpmaxsd	(%rdx), %ymm2, %ymm2\n"
"	movl	$32, %eax\n"
"	vpcmpgtd	%ymm3, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm1\n"
"	jmp	.myL53\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL24:\n"
"	.cfi_def_cfa 7, 8\n"
"	.cfi_restore 3\n"
"	.cfi_restore 6\n"
"	.cfi_restore 12\n"
"	.cfi_restore 13\n"
"	movl	$-2147483648, %edx\n"
"	xorl	%eax, %eax\n"
"	salq	$32, %rdx\n"
"	orq	%rdx, %rax\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL70:\n"
"	.cfi_def_cfa 6, 16\n"
"	.cfi_offset 3, -40\n"
"	.cfi_offset 6, -16\n"
"	.cfi_offset 12, -32\n"
"	.cfi_offset 13, -24\n"
"	vzeroupper\n"
"	salq	$32, %rdx\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	orq	%rdx, %rax\n"
"	popq	%r13\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
".myL25:\n"
"	.cfi_restore_state\n"
"	vmovdqa	.myLC2(%rip), %xmm2\n"
"	xorl	%edi, %edi\n"
"	xorl	%eax, %eax\n"
"	leaq	val(%rip), %r11\n"
"	leaq	cnt_gt(%rip), %rbx\n"
"	vmovd	%xmm2, %edx\n"
"	jmp	.myL18\n"
"	.cfi_endproc\n"
".myLFE9910:\n"
"	.size	_Z9do_cnt_gtiii, .-_Z9do_cnt_gtiii\n"
"	.p2align 4\n"
"	.globl	_Z14do_sub_inrangeiiii\n"
"	.type	_Z14do_sub_inrangeiiii, @function\n"
"_Z14do_sub_inrangeiiii:\n"
".myLFB9919:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movl	%ecx, %r10d\n"
"	movslq	%edi, %rcx\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 14, -24\n"
"	.cfi_offset 13, -32\n"
"	.cfi_offset 12, -40\n"
"	.cfi_offset 3, -48\n"
"	movslq	%esi, %rbx\n"
"	cmpq	%rbx, %rcx\n"
"	jge	.myL79\n"
"	movq	%rbx, %rsi\n"
"	movl	%edx, %r9d\n"
"	movq	%rcx, %r13\n"
"	subq	%rcx, %rsi\n"
"	leaq	-1(%rsi), %rax\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL80\n"
"	movq	%rsi, %r14\n"
"	leaq	cnt_gt(%rip), %r11\n"
"	vmovd	%r9d, %xmm3\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r14\n"
"	leaq	0(,%rcx,4), %rdi\n"
"	vmovd	%r10d, %xmm4\n"
"	vmovdqa	.myLC0(%rip), %ymm2\n"
"	salq	$5, %r14\n"
"	leaq	val(%rip), %r12\n"
"	leaq	(%r11,%rdi), %rdx\n"
"	leaq	-32(%r14), %r8\n"
"	vpbroadcastd	%xmm3, %ymm3\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	addq	%r12, %rdi\n"
"	shrq	$5, %r8\n"
"	addq	$1, %r8\n"
"	andl	$3, %r8d\n"
"	je	.myL74\n"
"	cmpq	$1, %r8\n"
"	je	.myL91\n"
"	cmpq	$2, %r8\n"
"	je	.myL92\n"
"	vmovdqu	(%rdi), %ymm1\n"
"	movl	$32, %eax\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	(%rdx), %ymm0, %ymm0\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	vmovdqu	%ymm0, (%rdx)\n"
".myL92:\n"
"	vmovdqu	(%rdi,%rax), %ymm1\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	(%rdx,%rax), %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	addq	$32, %rax\n"
".myL91:\n"
"	vmovdqu	(%rdi,%rax), %ymm1\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	(%rdx,%rax), %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	cmpq	%r14, %rax\n"
"	je	.myL97\n"
".myL74:\n"
"	vmovdqu	(%rdi,%rax), %ymm1\n"
"	leaq	32(%rax), %r8\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	(%rdx,%rax), %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vmovdqu	32(%rdi,%rax), %ymm1\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	32(%rdx,%rax), %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 32(%rdx,%rax)\n"
"	vmovdqu	64(%rdi,%rax), %ymm1\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	64(%rdx,%rax), %ymm0, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%rax)\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	leaq	96(%r8), %rax\n"
"	vmovdqu	64(%rdi,%r8), %ymm1\n"
"	vpminsd	%ymm1, %ymm3, %ymm0\n"
"	vpcmpgtd	%ymm1, %ymm4, %ymm1\n"
"	vpcmpeqd	%ymm0, %ymm3, %ymm0\n"
"	vpand	%ymm1, %ymm0, %ymm0\n"
"	vpaddd	64(%rdx,%r8), %ymm0, %ymm0\n"
"	vpmaxsd	%ymm0, %ymm2, %ymm2\n"
"	vmovdqu	%ymm0, 64(%rdx,%r8)\n"
"	cmpq	%r14, %rax\n"
"	jne	.myL74\n"
".myL97:\n"
"	vextracti128	$0x1, %ymm2, %xmm0\n"
"	movq	%rsi, %rdx\n"
"	vpmaxsd	%xmm2, %xmm0, %xmm0\n"
"	andq	$-8, %rdx\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	addq	%rdx, %rcx\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	cmpq	%rdx, %rsi\n"
"	je	.myL100\n"
"	vzeroupper\n"
".myL73:\n"
"	subq	%rdx, %rsi\n"
"	leaq	-1(%rsi), %rdi\n"
"	cmpq	$2, %rdi\n"
"	jbe	.myL77\n"
"	vmovd	%r9d, %xmm5\n"
"	vmovd	%r10d, %xmm6\n"
"	vmovd	%eax, %xmm7\n"
"	addq	%r13, %rdx\n"
"	vmovdqu	(%r12,%rdx,4), %xmm2\n"
"	vpshufd	$0, %xmm5, %xmm1\n"
"	leaq	(%r11,%rdx,4), %rdi\n"
"	movq	%rsi, %rdx\n"
"	andq	$-4, %rdx\n"
"	vpminsd	%xmm2, %xmm1, %xmm0\n"
"	addq	%rdx, %rcx\n"
"	vpcmpeqd	%xmm0, %xmm1, %xmm1\n"
"	vpshufd	$0, %xmm6, %xmm0\n"
"	vpcmpgtd	%xmm2, %xmm0, %xmm0\n"
"	vpand	%xmm0, %xmm1, %xmm1\n"
"	vpaddd	(%rdi), %xmm1, %xmm1\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovdqu	%xmm1, (%rdi)\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	cmpq	%rdx, %rsi\n"
"	je	.myL71\n"
".myL77:\n"
"	movl	(%r12,%rcx,4), %edx\n"
"	cmpl	%r9d, %edx\n"
"	setge	%dil\n"
"	xorl	%esi, %esi\n"
"	cmpl	%r10d, %edx\n"
"	movl	(%r11,%rcx,4), %edx\n"
"	setl	%sil\n"
"	andl	%edi, %esi\n"
"	subl	%esi, %edx\n"
"	leaq	1(%rcx), %rsi\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rcx,4)\n"
"	cmovl	%edx, %eax\n"
"	cmpq	%rsi, %rbx\n"
"	jle	.myL71\n"
"	movl	(%r12,%rsi,4), %edx\n"
"	cmpl	%r10d, %edx\n"
"	setl	%r8b\n"
"	xorl	%edi, %edi\n"
"	cmpl	%r9d, %edx\n"
"	movl	(%r11,%rsi,4), %edx\n"
"	setge	%dil\n"
"	andl	%r8d, %edi\n"
"	subl	%edi, %edx\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rsi,4)\n"
"	cmovl	%edx, %eax\n"
"	addq	$2, %rcx\n"
"	cmpq	%rcx, %rbx\n"
"	jle	.myL71\n"
"	movl	(%r12,%rcx,4), %edx\n"
"	cmpl	%edx, %r9d\n"
"	setle	%dil\n"
"	xorl	%esi, %esi\n"
"	cmpl	%edx, %r10d\n"
"	movl	(%r11,%rcx,4), %edx\n"
"	setg	%sil\n"
"	andl	%edi, %esi\n"
"	subl	%esi, %edx\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rcx,4)\n"
"	cmovl	%edx, %eax\n"
".myL71:\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL79:\n"
"	.cfi_restore_state\n"
"	popq	%rbx\n"
"	movl	$-2147483648, %eax\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL100:\n"
"	.cfi_restore_state\n"
"	vzeroupper\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
".myL80:\n"
"	.cfi_restore_state\n"
"	xorl	%edx, %edx\n"
"	movl	$-2147483648, %eax\n"
"	leaq	cnt_gt(%rip), %r11\n"
"	leaq	val(%rip), %r12\n"
"	jmp	.myL73\n"
"	.cfi_endproc\n"
".myLFE9919:\n"
"	.size	_Z14do_sub_inrangeiiii, .-_Z14do_sub_inrangeiiii\n"
"	.p2align 4\n"
"	.globl	_Z14do_add_inrangeiiii\n"
"	.type	_Z14do_add_inrangeiiii, @function\n"
"_Z14do_add_inrangeiiii:\n"
".myLFB9920:\n"
"	.cfi_startproc\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	movl	%ecx, %r10d\n"
"	movslq	%edi, %rcx\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%r14\n"
"	pushq	%r13\n"
"	pushq	%r12\n"
"	pushq	%rbx\n"
"	.cfi_offset 14, -24\n"
"	.cfi_offset 13, -32\n"
"	.cfi_offset 12, -40\n"
"	.cfi_offset 3, -48\n"
"	movslq	%esi, %rbx\n"
"	cmpq	%rbx, %rcx\n"
"	jge	.myL109\n"
"	movq	%rbx, %rsi\n"
"	movl	%edx, %r9d\n"
"	movq	%rcx, %r13\n"
"	subq	%rcx, %rsi\n"
"	leaq	-1(%rsi), %rax\n"
"	cmpq	$6, %rax\n"
"	jbe	.myL110\n"
"	movq	%rsi, %r14\n"
"	leaq	cnt_gt(%rip), %r11\n"
"	vmovd	%r9d, %xmm4\n"
"	xorl	%eax, %eax\n"
"	shrq	$3, %r14\n"
"	leaq	0(,%rcx,4), %rdi\n"
"	vmovd	%r10d, %xmm5\n"
"	vmovdqa	.myLC0(%rip), %ymm3\n"
"	salq	$5, %r14\n"
"	leaq	val(%rip), %r12\n"
"	leaq	(%r11,%rdi), %rdx\n"
"	leaq	-32(%r14), %r8\n"
"	vpbroadcastd	%xmm4, %ymm4\n"
"	vpbroadcastd	%xmm5, %ymm5\n"
"	addq	%r12, %rdi\n"
"	shrq	$5, %r8\n"
"	addq	$1, %r8\n"
"	andl	$3, %r8d\n"
"	je	.myL104\n"
"	cmpq	$1, %r8\n"
"	je	.myL121\n"
"	cmpq	$2, %r8\n"
"	je	.myL122\n"
"	vmovdqu	(%rdi), %ymm2\n"
"	vmovdqu	(%rdx), %ymm1\n"
"	movl	$32, %eax\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqu	%ymm0, (%rdx)\n"
".myL122:\n"
"	vmovdqu	(%rdi,%rax), %ymm2\n"
"	vmovdqu	(%rdx,%rax), %ymm1\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	addq	$32, %rax\n"
".myL121:\n"
"	vmovdqu	(%rdi,%rax), %ymm2\n"
"	vmovdqu	(%rdx,%rax), %ymm1\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	addq	$32, %rax\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	cmpq	%r14, %rax\n"
"	je	.myL127\n"
".myL104:\n"
"	vmovdqu	(%rdi,%rax), %ymm2\n"
"	vmovdqu	(%rdx,%rax), %ymm1\n"
"	leaq	32(%rax), %r8\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vmovdqu	32(%rdx,%rax), %ymm1\n"
"	vmovdqu	%ymm0, (%rdx,%rax)\n"
"	vmovdqu	32(%rdi,%rax), %ymm2\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vmovdqu	64(%rdx,%rax), %ymm1\n"
"	vmovdqu	%ymm0, 32(%rdx,%rax)\n"
"	vmovdqu	64(%rdi,%rax), %ymm2\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vmovdqu	%ymm0, 64(%rdx,%rax)\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	leaq	96(%r8), %rax\n"
"	vmovdqu	64(%rdi,%r8), %ymm2\n"
"	vmovdqu	64(%rdx,%r8), %ymm1\n"
"	vpminsd	%ymm2, %ymm4, %ymm0\n"
"	vpcmpgtd	%ymm2, %ymm5, %ymm2\n"
"	vpcmpeqd	%ymm0, %ymm4, %ymm0\n"
"	vpand	%ymm2, %ymm0, %ymm0\n"
"	vpsubd	%ymm0, %ymm1, %ymm0\n"
"	vpmaxsd	%ymm0, %ymm3, %ymm3\n"
"	vmovdqu	%ymm0, 64(%rdx,%r8)\n"
"	cmpq	%r14, %rax\n"
"	jne	.myL104\n"
".myL127:\n"
"	vextracti128	$0x1, %ymm3, %xmm0\n"
"	movq	%rsi, %rdx\n"
"	vpmaxsd	%xmm3, %xmm0, %xmm0\n"
"	andq	$-8, %rdx\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	addq	%rdx, %rcx\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	cmpq	%rdx, %rsi\n"
"	je	.myL130\n"
"	vzeroupper\n"
".myL103:\n"
"	subq	%rdx, %rsi\n"
"	leaq	-1(%rsi), %rdi\n"
"	cmpq	$2, %rdi\n"
"	jbe	.myL107\n"
"	addq	%r13, %rdx\n"
"	vmovd	%r9d, %xmm6\n"
"	vmovd	%r10d, %xmm7\n"
"	vmovdqu	(%r12,%rdx,4), %xmm2\n"
"	vpshufd	$0, %xmm6, %xmm0\n"
"	leaq	(%r11,%rdx,4), %rdi\n"
"	movq	%rsi, %rdx\n"
"	vmovdqu	(%rdi), %xmm6\n"
"	andq	$-4, %rdx\n"
"	vpminsd	%xmm2, %xmm0, %xmm1\n"
"	addq	%rdx, %rcx\n"
"	vpcmpeqd	%xmm1, %xmm0, %xmm0\n"
"	vpshufd	$0, %xmm7, %xmm1\n"
"	vmovd	%eax, %xmm7\n"
"	vpcmpgtd	%xmm2, %xmm1, %xmm1\n"
"	vpand	%xmm1, %xmm0, %xmm0\n"
"	vpsubd	%xmm0, %xmm6, %xmm1\n"
"	vpshufd	$0, %xmm7, %xmm0\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovdqu	%xmm1, (%rdi)\n"
"	vpsrldq	$8, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vpsrldq	$4, %xmm0, %xmm1\n"
"	vpmaxsd	%xmm1, %xmm0, %xmm0\n"
"	vmovd	%xmm0, %eax\n"
"	cmpq	%rdx, %rsi\n"
"	je	.myL101\n"
".myL107:\n"
"	movl	(%r12,%rcx,4), %edx\n"
"	cmpl	%r9d, %edx\n"
"	setge	%sil\n"
"	cmpl	%r10d, %edx\n"
"	setl	%dl\n"
"	movzbl	%dl, %edx\n"
"	andl	%esi, %edx\n"
"	addl	(%r11,%rcx,4), %edx\n"
"	leaq	1(%rcx), %rsi\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rcx,4)\n"
"	cmovl	%edx, %eax\n"
"	cmpq	%rsi, %rbx\n"
"	jle	.myL101\n"
"	movl	(%r12,%rsi,4), %edx\n"
"	cmpl	%r10d, %edx\n"
"	setl	%dil\n"
"	cmpl	%r9d, %edx\n"
"	setge	%dl\n"
"	movzbl	%dl, %edx\n"
"	andl	%edi, %edx\n"
"	addl	(%r11,%rsi,4), %edx\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rsi,4)\n"
"	cmovl	%edx, %eax\n"
"	addq	$2, %rcx\n"
"	cmpq	%rcx, %rbx\n"
"	jle	.myL101\n"
"	movl	(%r12,%rcx,4), %edx\n"
"	cmpl	%edx, %r9d\n"
"	setle	%sil\n"
"	cmpl	%edx, %r10d\n"
"	setg	%dl\n"
"	movzbl	%dl, %edx\n"
"	andl	%esi, %edx\n"
"	addl	(%r11,%rcx,4), %edx\n"
"	cmpl	%edx, %eax\n"
"	movl	%edx, (%r11,%rcx,4)\n"
"	cmovl	%edx, %eax\n"
".myL101:\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL109:\n"
"	.cfi_restore_state\n"
"	popq	%rbx\n"
"	movl	$-2147483648, %eax\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL130:\n"
"	.cfi_restore_state\n"
"	vzeroupper\n"
"	popq	%rbx\n"
"	popq	%r12\n"
"	popq	%r13\n"
"	popq	%r14\n"
"	popq	%rbp\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
".myL110:\n"
"	.cfi_restore_state\n"
"	xorl	%edx, %edx\n"
"	movl	$-2147483648, %eax\n"
"	leaq	cnt_gt(%rip), %r11\n"
"	leaq	val(%rip), %r12\n"
"	jmp	.myL103\n"
"	.cfi_endproc\n"
".myLFE9920:\n"
"	.size	_Z14do_add_inrangeiiii, .-_Z14do_add_inrangeiiii\n"
);

int apply(int al, int ar, int qi)
{
	int i = qx[qi], x = qy[qi];
	int mx = INT_MIN;
	if (al < i) {
		int cr = min(ar, i);
		auto [tmp, new_mx] = do_cnt_gt(al, cr, x);
		added_to[qi] += tmp;
		mx = max(mx, new_mx);
	}
	if (i+1 < ar) {
		int cl = max(al, i+1);
		if (val[i] < x) {
			auto tmp = do_add_inrange(cl, ar, val[i], x);
			mx = max(mx, tmp);
		} else {
			auto tmp = do_sub_inrange(cl, ar, x, val[i]);
			mx = max(mx, tmp);
		}
	}
	if (al <= i && i < ar) {
		cnt_gt[i] = added_to[qi];
		mx = max(mx, cnt_gt[i]);
	}
	val[i] = x;
	return mx;
}

void solve_qrange(int ql, int qr)
{
	vector<int> ch;
	Loop (i,ql,qr)
		ch.push_back(val[qx[i]]);
	memset(mx_qrange, 0, sizeof(mx_qrange));
	for (int al = 0; al < n; al += Sa) {
		int ar = min(al + Sa, n);
		Loop (qi,ql,qr) {
			int x = apply(al, ar, qi);
			mx_qrange[qi-ql] = max(mx_qrange[qi-ql], x);
		}
		Loop (i,ql,qr)
			val[qx[i]] = ch[i-ql];
	}
	Loop (i,ql,qr)
		val[qx[i]] = qy[i];
	Loop (i,ql,qr)
		ans.push_back(mx_qrange[i-ql]);
}

vector<int> countScans(vector<int> A, vector<int> X, vector<int> V)
{
	n = A.size();
	Loop (i,0,n)
		val[i] = A[i];
	q = X.size();
	Loop (i,0,q) {
		qx[i] = X[i];
		qy[i] = V[i];
	}
	init();
	for (int ql = 0; ql < q; ql += Sq) {
		int qr = min(ql+Sq, q);
		solve_qrange(ql, qr);
	}
	return ans;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...