답안 #831078

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
831078 2023-08-19T17:04:15 Z NothingXD 사탕 분배 (IOI21_candies) C++17
100 / 100
1386 ms 11712 KB
#include "candies.h"
#include <bits/stdc++.h>

#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")

using namespace std;

typedef long long ll;
typedef pair<int,int> pii;
typedef pair<ll,ll> pll;

void debug_out(){cerr << endl;}
template<typename Head, typename... Tail>
void debug_out(Head H, Tail... T){
	cerr << H << ' ';
	debug_out(T...);
}

#define debug(...) cerr << "(" << #__VA_ARGS__ << "): ", debug_out(__VA_ARGS__)
#define F first
#define S second
#define all(x) x.begin(), x.end()
#define MP(x, y) make_pair(x, y)

const int maxn = 2e5 + 10;
const int S = 2048;

int n, q, a[maxn], c[maxn], l[maxn], r[maxn], v[maxn];

void add1(int l, int r, int x);
void add2(int l, int r, int x);
void add3(int l, int r, int x, int y);
void add4(int l, int r, int x, int y);

asm(R"dard(
	.p2align 4
	.globl	_Z4add1iii
	.type	_Z4add1iii, @function
_Z4add1iii:
.myLFB9900:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	vmovd	%edx, %xmm1
	movslq	%edi, %rcx
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	vmovd	%xmm1, -4(%rsp)
	cmpl	%esi, %ecx
	jg	.myL57
	movl	%esi, %r11d
	movl	%esi, %r9d
	subl	%ecx, %r11d
	leal	1(%r11), %ebx
	cmpl	$6, %r11d
	jbe	.myL18
	movl	%ebx, %r12d
	movslq	%ecx, %rsi
	leaq	a(%rip), %r8
	xorl	%eax, %eax
	shrl	$3, %r12d
	salq	$2, %rsi
	vpbroadcastd	%xmm1, %ymm2
	leaq	c(%rip), %r10
	salq	$5, %r12
	leaq	(%r8,%rsi), %rdx
	addq	%r10, %rsi
	leaq	-32(%r12), %rdi
	shrq	$5, %rdi
	addq	$1, %rdi
	andl	$7, %edi
	je	.myL14
	cmpq	$1, %rdi
	je	.myL41
	cmpq	$2, %rdi
	je	.myL42
	cmpq	$3, %rdi
	je	.myL43
	cmpq	$4, %rdi
	je	.myL44
	cmpq	$5, %rdi
	je	.myL45
	cmpq	$6, %rdi
	jne	.myL59
.myL46:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL45:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL44:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL43:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL42:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL41:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
	cmpq	%r12, %rax
	je	.myL55
.myL14:
	vpaddd	(%rdx,%rax), %ymm2, %ymm0
	vpminsd	(%rsi,%rax), %ymm0, %ymm0
	leaq	32(%rax), %rdi
	vmovdqu	%ymm0, (%rdx,%rax)
	vpaddd	32(%rdx,%rax), %ymm2, %ymm0
	vpminsd	32(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, 32(%rdx,%rax)
	vpaddd	64(%rdx,%rax), %ymm2, %ymm0
	vpminsd	64(%rsi,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, 64(%rdx,%rax)
	vpaddd	64(%rdx,%rdi), %ymm2, %ymm0
	vpminsd	64(%rsi,%rdi), %ymm0, %ymm0
	leaq	224(%rdi), %rax
	vmovdqu	%ymm0, 64(%rdx,%rdi)
	vpaddd	96(%rdx,%rdi), %ymm2, %ymm0
	vpminsd	96(%rsi,%rdi), %ymm0, %ymm0
	vmovdqu	%ymm0, 96(%rdx,%rdi)
	vpaddd	128(%rdx,%rdi), %ymm2, %ymm0
	vpminsd	128(%rsi,%rdi), %ymm0, %ymm0
	vmovdqu	%ymm0, 128(%rdx,%rdi)
	vpaddd	160(%rdx,%rdi), %ymm2, %ymm0
	vpminsd	160(%rsi,%rdi), %ymm0, %ymm0
	vmovdqu	%ymm0, 160(%rdx,%rdi)
	vpaddd	192(%rdx,%rdi), %ymm2, %ymm0
	vpminsd	192(%rsi,%rdi), %ymm0, %ymm0
	vmovdqu	%ymm0, 192(%rdx,%rdi)
	cmpq	%r12, %rax
	jne	.myL14
.myL55:
	movl	%ebx, %edx
	andl	$-8, %edx
	leal	(%rdx,%rcx), %eax
	cmpl	%edx, %ebx
	je	.myL60
	vzeroupper
.myL13:
	subl	%edx, %r11d
	leal	1(%r11), %esi
	cmpl	$2, %r11d
	jbe	.myL16
	addq	%rdx, %rcx
	vbroadcastss	-4(%rsp), %xmm0
	leaq	(%r8,%rcx,4), %rdx
	vpaddd	(%rdx), %xmm0, %xmm0
	vpminsd	(%r10,%rcx,4), %xmm0, %xmm0
	vmovdqu	%xmm0, (%rdx)
	movl	%esi, %edx
	andl	$-4, %edx
	addl	%edx, %eax
	cmpl	%edx, %esi
	je	.myL57
.myL16:
	movslq	%eax, %rdx
	vmovd	(%r8,%rdx,4), %xmm0
	vmovd	(%r10,%rdx,4), %xmm2
	vpaddd	%xmm0, %xmm1, %xmm0
	vpminsd	%xmm2, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	leal	1(%rax), %edx
	cmpl	%r9d, %edx
	jg	.myL57
	movslq	%edx, %rdx
	addl	$2, %eax
	vmovd	(%r8,%rdx,4), %xmm0
	vmovd	(%r10,%rdx,4), %xmm2
	vpaddd	%xmm0, %xmm1, %xmm0
	vpminsd	%xmm2, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	cmpl	%eax, %r9d
	jl	.myL57
	cltq
	vmovd	(%r8,%rax,4), %xmm0
	vpaddd	%xmm0, %xmm1, %xmm1
	vmovd	(%r10,%rax,4), %xmm0
	vpminsd	%xmm0, %xmm1, %xmm1
	vmovd	%xmm1, (%r8,%rax,4)
.myL57:
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL59:
	.cfi_restore_state
	vpaddd	(%rdx), %ymm2, %ymm0
	vpminsd	(%rsi), %ymm0, %ymm0
	movl	$32, %eax
	vmovdqu	%ymm0, (%rdx)
	jmp	.myL46
	.p2align 4,,10
	.p2align 3
.myL60:
	vzeroupper
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL18:
	.cfi_restore_state
	movl	%ecx, %eax
	xorl	%edx, %edx
	leaq	a(%rip), %r8
	leaq	c(%rip), %r10
	jmp	.myL13
	.cfi_endproc
.myLFE9900:
	.size	_Z4add1iii, .-_Z4add1iii
	.p2align 4
	.globl	_Z4add2iii
	.type	_Z4add2iii, @function
_Z4add2iii:
.myLFB9901:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	vmovd	%edx, %xmm1
	movslq	%edi, %rcx
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	andq	$-32, %rsp
	vmovd	%xmm1, -4(%rsp)
	cmpl	%esi, %ecx
	jg	.myL106
	movl	%esi, %r9d
	movl	%esi, %edi
	subl	%ecx, %r9d
	leal	1(%r9), %r10d
	cmpl	$6, %r9d
	jbe	.myL68
	movl	%r10d, %edx
	vpbroadcastd	%xmm1, %ymm3
	vpxor	%xmm2, %xmm2, %xmm2
	movslq	%ecx, %rax
	shrl	$3, %edx
	leaq	a(%rip), %r8
	leal	-1(%rdx), %esi
	leaq	(%r8,%rax,4), %rax
	salq	$5, %rsi
	leaq	32(%rax), %rdx
	leaq	(%rsi,%rdx), %r11
	shrq	$5, %rsi
	andl	$7, %esi
	je	.myL64
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
	cmpq	$1, %rsi
	je	.myL64
	cmpq	$2, %rsi
	je	.myL91
	cmpq	$3, %rsi
	je	.myL92
	cmpq	$4, %rsi
	je	.myL93
	cmpq	$5, %rsi
	je	.myL94
	cmpq	$6, %rsi
	jne	.myL108
.myL95:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
.myL94:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
.myL93:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
.myL92:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
.myL91:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
	jmp	.myL64
	.p2align 4,,10
	.p2align 3
.myL109:
	vpaddd	(%rdx), %ymm3, %ymm0
	leaq	224(%rdx), %rax
	addq	$256, %rdx
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -256(%rdx)
	vpaddd	-224(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -224(%rdx)
	vpaddd	-192(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -192(%rdx)
	vpaddd	-160(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -160(%rdx)
	vpaddd	-128(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -128(%rdx)
	vpaddd	-96(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -96(%rdx)
	vpaddd	-64(%rdx), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, -64(%rdx)
.myL64:
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	cmpq	%r11, %rdx
	jne	.myL109
	movl	%r10d, %edx
	andl	$-8, %edx
	leal	(%rdx,%rcx), %eax
	cmpl	%edx, %r10d
	je	.myL110
	vzeroupper
.myL63:
	subl	%edx, %r9d
	leal	1(%r9), %esi
	cmpl	$2, %r9d
	jbe	.myL66
	addq	%rdx, %rcx
	vbroadcastss	-4(%rsp), %xmm0
	vpxor	%xmm2, %xmm2, %xmm2
	leaq	(%r8,%rcx,4), %rdx
	vpaddd	(%rdx), %xmm0, %xmm0
	vpmaxsd	%xmm2, %xmm0, %xmm0
	vmovdqu	%xmm0, (%rdx)
	movl	%esi, %edx
	andl	$-4, %edx
	addl	%edx, %eax
	cmpl	%edx, %esi
	je	.myL106
.myL66:
	movslq	%eax, %rdx
	vpxor	%xmm2, %xmm2, %xmm2
	vmovd	(%r8,%rdx,4), %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpmaxsd	%xmm2, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	leal	1(%rax), %edx
	cmpl	%edx, %edi
	jl	.myL106
	movslq	%edx, %rdx
	addl	$2, %eax
	vpinsrd	$0, (%r8,%rdx,4), %xmm2, %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpmaxsd	%xmm2, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	cmpl	%eax, %edi
	jl	.myL106
	cltq
	vpinsrd	$0, (%r8,%rax,4), %xmm2, %xmm0
	vpaddd	%xmm0, %xmm1, %xmm1
	vpmaxsd	%xmm2, %xmm1, %xmm1
	vmovd	%xmm1, (%r8,%rax,4)
.myL106:
	leave
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL108:
	.cfi_restore_state
	vpaddd	(%rax), %ymm3, %ymm0
	vpmaxsd	%ymm2, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rax)
	movq	%rdx, %rax
	addq	$32, %rdx
	jmp	.myL95
	.p2align 4,,10
	.p2align 3
.myL110:
	vzeroupper
	leave
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL68:
	.cfi_restore_state
	movl	%ecx, %eax
	xorl	%edx, %edx
	leaq	a(%rip), %r8
	jmp	.myL63
	.cfi_endproc
.myLFE9901:
	.size	_Z4add2iii, .-_Z4add2iii
	.p2align 4
	.globl	_Z4add3iiii
	.type	_Z4add3iiii, @function
_Z4add3iiii:
.myLFB9902:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	vmovd	%edx, %xmm1
	vmovd	%ecx, %xmm2
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	vmovd	%xmm1, -4(%rsp)
	vmovd	%xmm2, -8(%rsp)
	cmpl	%esi, %edi
	jg	.myL157
	movl	%esi, %r11d
	movl	%esi, %r9d
	subl	%edi, %r11d
	leal	1(%r11), %ebx
	cmpl	$6, %r11d
	jbe	.myL118
	movl	%ebx, %r12d
	movslq	%edi, %rcx
	leaq	a(%rip), %r8
	xorl	%eax, %eax
	shrl	$3, %r12d
	salq	$2, %rcx
	vpbroadcastd	%xmm1, %ymm5
	leaq	c(%rip), %r10
	salq	$5, %r12
	leaq	(%r8,%rcx), %rdx
	vpbroadcastd	%xmm2, %ymm4
	addq	%r10, %rcx
	leaq	-32(%r12), %rsi
	vpxor	%xmm3, %xmm3, %xmm3
	shrq	$5, %rsi
	addq	$1, %rsi
	andl	$7, %esi
	je	.myL114
	cmpq	$1, %rsi
	je	.myL141
	cmpq	$2, %rsi
	je	.myL142
	cmpq	$3, %rsi
	je	.myL143
	cmpq	$4, %rsi
	je	.myL144
	cmpq	$5, %rsi
	je	.myL145
	cmpq	$6, %rsi
	jne	.myL159
.myL146:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL145:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL144:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL143:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL142:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL141:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
	cmpq	%r12, %rax
	je	.myL155
.myL114:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	leaq	32(%rax), %rsi
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	vpaddd	32(%rdx,%rax), %ymm5, %ymm0
	vpminsd	32(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 32(%rdx,%rax)
	vpaddd	64(%rdx,%rax), %ymm5, %ymm0
	vpminsd	64(%rcx,%rax), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 64(%rdx,%rax)
	vpaddd	64(%rdx,%rsi), %ymm5, %ymm0
	vpminsd	64(%rcx,%rsi), %ymm0, %ymm0
	leaq	224(%rsi), %rax
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 64(%rdx,%rsi)
	vpaddd	96(%rdx,%rsi), %ymm5, %ymm0
	vpminsd	96(%rcx,%rsi), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 96(%rdx,%rsi)
	vpaddd	128(%rdx,%rsi), %ymm5, %ymm0
	vpminsd	128(%rcx,%rsi), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 128(%rdx,%rsi)
	vpaddd	160(%rdx,%rsi), %ymm5, %ymm0
	vpminsd	160(%rcx,%rsi), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 160(%rdx,%rsi)
	vpaddd	192(%rdx,%rsi), %ymm5, %ymm0
	vpminsd	192(%rcx,%rsi), %ymm0, %ymm0
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, 192(%rdx,%rsi)
	cmpq	%r12, %rax
	jne	.myL114
.myL155:
	movl	%ebx, %edx
	andl	$-8, %edx
	leal	(%rdx,%rdi), %eax
	cmpl	%edx, %ebx
	je	.myL160
	vzeroupper
.myL113:
	subl	%edx, %r11d
	leal	1(%r11), %ecx
	cmpl	$2, %r11d
	jbe	.myL116
	movslq	%edi, %rdi
	vbroadcastss	-8(%rsp), %xmm3
	vbroadcastss	-4(%rsp), %xmm0
	addq	%rdx, %rdi
	leaq	(%r8,%rdi,4), %rdx
	vpaddd	(%rdx), %xmm0, %xmm0
	vpminsd	(%r10,%rdi,4), %xmm0, %xmm0
	vpaddd	%xmm3, %xmm0, %xmm0
	vpxor	%xmm3, %xmm3, %xmm3
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vmovdqu	%xmm0, (%rdx)
	movl	%ecx, %edx
	andl	$-4, %edx
	addl	%edx, %eax
	cmpl	%edx, %ecx
	je	.myL157
.myL116:
	movslq	%eax, %rdx
	vmovd	(%r8,%rdx,4), %xmm0
	vmovd	(%r10,%rdx,4), %xmm3
	vpaddd	%xmm0, %xmm1, %xmm0
	vpminsd	%xmm3, %xmm0, %xmm0
	vpxor	%xmm3, %xmm3, %xmm3
	vpaddd	%xmm2, %xmm0, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	leal	1(%rax), %edx
	cmpl	%edx, %r9d
	jl	.myL157
	movslq	%edx, %rdx
	addl	$2, %eax
	vpinsrd	$0, (%r8,%rdx,4), %xmm3, %xmm0
	vpinsrd	$0, (%r10,%rdx,4), %xmm3, %xmm3
	vpaddd	%xmm0, %xmm1, %xmm0
	vpminsd	%xmm3, %xmm0, %xmm0
	vpxor	%xmm3, %xmm3, %xmm3
	vpaddd	%xmm2, %xmm0, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	cmpl	%eax, %r9d
	jl	.myL157
	cltq
	vpinsrd	$0, (%r8,%rax,4), %xmm3, %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpinsrd	$0, (%r10,%rax,4), %xmm3, %xmm1
	vpminsd	%xmm1, %xmm0, %xmm0
	vpaddd	%xmm2, %xmm0, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rax,4)
.myL157:
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL159:
	.cfi_restore_state
	vpaddd	(%rdx), %ymm5, %ymm0
	vpminsd	(%rcx), %ymm0, %ymm0
	movl	$32, %eax
	vpaddd	%ymm4, %ymm0, %ymm0
	vpmaxsd	%ymm3, %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx)
	jmp	.myL146
	.p2align 4,,10
	.p2align 3
.myL160:
	vzeroupper
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL118:
	.cfi_restore_state
	movl	%edi, %eax
	xorl	%edx, %edx
	leaq	a(%rip), %r8
	leaq	c(%rip), %r10
	jmp	.myL113
	.cfi_endproc
.myLFE9902:
	.size	_Z4add3iiii, .-_Z4add3iiii
	.p2align 4
	.globl	_Z4add4iiii
	.type	_Z4add4iiii, @function
_Z4add4iiii:
.myLFB9903:
	.cfi_startproc
	endbr64
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	vmovd	%edx, %xmm1
	vmovd	%ecx, %xmm2
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	andq	$-32, %rsp
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	vmovd	%xmm1, -4(%rsp)
	vmovd	%xmm2, -8(%rsp)
	cmpl	%esi, %edi
	jg	.myL207
	movl	%esi, %r11d
	movl	%esi, %r9d
	subl	%edi, %r11d
	leal	1(%r11), %ebx
	cmpl	$6, %r11d
	jbe	.myL168
	movl	%ebx, %r12d
	movslq	%edi, %rcx
	leaq	a(%rip), %r8
	xorl	%eax, %eax
	shrl	$3, %r12d
	salq	$2, %rcx
	vpbroadcastd	%xmm1, %ymm5
	leaq	c(%rip), %r10
	salq	$5, %r12
	leaq	(%r8,%rcx), %rdx
	vpbroadcastd	%xmm2, %ymm3
	addq	%r10, %rcx
	leaq	-32(%r12), %rsi
	vpxor	%xmm4, %xmm4, %xmm4
	shrq	$5, %rsi
	addq	$1, %rsi
	andl	$7, %esi
	je	.myL164
	cmpq	$1, %rsi
	je	.myL191
	cmpq	$2, %rsi
	je	.myL192
	cmpq	$3, %rsi
	je	.myL193
	cmpq	$4, %rsi
	je	.myL194
	cmpq	$5, %rsi
	je	.myL195
	cmpq	$6, %rsi
	jne	.myL209
.myL196:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL195:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL194:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL193:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL192:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
.myL191:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	addq	$32, %rax
	cmpq	%r12, %rax
	je	.myL205
.myL164:
	vpaddd	(%rdx,%rax), %ymm5, %ymm0
	leaq	32(%rax), %rsi
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx,%rax)
	vpaddd	32(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	32(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, 32(%rdx,%rax)
	vpaddd	64(%rdx,%rax), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	64(%rcx,%rax), %ymm0, %ymm0
	vmovdqu	%ymm0, 64(%rdx,%rax)
	vpaddd	64(%rdx,%rsi), %ymm5, %ymm0
	leaq	224(%rsi), %rax
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	64(%rcx,%rsi), %ymm0, %ymm0
	vmovdqu	%ymm0, 64(%rdx,%rsi)
	vpaddd	96(%rdx,%rsi), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	96(%rcx,%rsi), %ymm0, %ymm0
	vmovdqu	%ymm0, 96(%rdx,%rsi)
	vpaddd	128(%rdx,%rsi), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	128(%rcx,%rsi), %ymm0, %ymm0
	vmovdqu	%ymm0, 128(%rdx,%rsi)
	vpaddd	160(%rdx,%rsi), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	160(%rcx,%rsi), %ymm0, %ymm0
	vmovdqu	%ymm0, 160(%rdx,%rsi)
	vpaddd	192(%rdx,%rsi), %ymm5, %ymm0
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	192(%rcx,%rsi), %ymm0, %ymm0
	vmovdqu	%ymm0, 192(%rdx,%rsi)
	cmpq	%r12, %rax
	jne	.myL164
.myL205:
	movl	%ebx, %edx
	andl	$-8, %edx
	leal	(%rdx,%rdi), %eax
	cmpl	%edx, %ebx
	je	.myL210
	vzeroupper
.myL163:
	subl	%edx, %r11d
	leal	1(%r11), %ecx
	cmpl	$2, %r11d
	jbe	.myL166
	movslq	%edi, %rdi
	vbroadcastss	-4(%rsp), %xmm0
	vpxor	%xmm3, %xmm3, %xmm3
	addq	%rdx, %rdi
	leaq	(%r8,%rdi,4), %rdx
	vpaddd	(%rdx), %xmm0, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vbroadcastss	-8(%rsp), %xmm3
	vpaddd	%xmm3, %xmm0, %xmm0
	vpminsd	(%r10,%rdi,4), %xmm0, %xmm0
	vmovdqu	%xmm0, (%rdx)
	movl	%ecx, %edx
	andl	$-4, %edx
	addl	%edx, %eax
	cmpl	%edx, %ecx
	je	.myL207
.myL166:
	movslq	%eax, %rdx
	vpxor	%xmm3, %xmm3, %xmm3
	vmovd	(%r8,%rdx,4), %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vpinsrd	$0, (%r10,%rdx,4), %xmm3, %xmm3
	vpaddd	%xmm2, %xmm0, %xmm0
	vpminsd	%xmm3, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	leal	1(%rax), %edx
	cmpl	%edx, %r9d
	jl	.myL207
	movslq	%edx, %rdx
	vpxor	%xmm3, %xmm3, %xmm3
	addl	$2, %eax
	vmovd	(%r8,%rdx,4), %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm0
	vpinsrd	$0, (%r10,%rdx,4), %xmm3, %xmm3
	vpaddd	%xmm2, %xmm0, %xmm0
	vpminsd	%xmm3, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rdx,4)
	cmpl	%eax, %r9d
	jl	.myL207
	cltq
	vmovd	(%r8,%rax,4), %xmm0
	vpaddd	%xmm0, %xmm1, %xmm0
	vpxor	%xmm1, %xmm1, %xmm1
	vpmaxsd	%xmm1, %xmm0, %xmm0
	vpinsrd	$0, (%r10,%rax,4), %xmm1, %xmm1
	vpaddd	%xmm2, %xmm0, %xmm0
	vpminsd	%xmm1, %xmm0, %xmm0
	vmovd	%xmm0, (%r8,%rax,4)
.myL207:
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL209:
	.cfi_restore_state
	vpaddd	(%rdx), %ymm5, %ymm0
	movl	$32, %eax
	vpmaxsd	%ymm4, %ymm0, %ymm0
	vpaddd	%ymm3, %ymm0, %ymm0
	vpminsd	(%rcx), %ymm0, %ymm0
	vmovdqu	%ymm0, (%rdx)
	jmp	.myL196
	.p2align 4,,10
	.p2align 3
.myL210:
	vzeroupper
	leaq	-16(%rbp), %rsp
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL168:
	.cfi_restore_state
	movl	%edi, %eax
	xorl	%edx, %edx
	leaq	a(%rip), %r8
	leaq	c(%rip), %r10
	jmp	.myL163
	.cfi_endproc
.myLFE9903:
	.size	_Z4add4iiii, .-_Z4add4iiii
)dard");
/*
void add1(int l, int r, int x){
//	debug(1, l, r, x);
	for (int i = l; i <= r; i++){
		a[i] = (a[i]+x > c[i]? c[i]: a[i]+x);
	}
}

void add2(int l, int r, int x){
//	debug(2, l, r, x);
	for (int i = l; i <= r; i++){
		a[i] = (a[i]+x < 0? 0: a[i]+x);
	}
}

void add3(int l, int r, int x, int y){
//	debug(3, l, r, x, y);
	for (int i = l; i <= r; i++){
		a[i] = (a[i]+x > c[i]? c[i]: a[i]+x);
//		debug(i, a[i]);
		a[i] = (a[i]+y < 0? 0: a[i]+y);
//		debug(i, a[i]);
	}
}

void add4(int l, int r, int x, int y){
	//debug(4, l, r, x, y);
	for (int i = l; i <= r; i++){
		a[i] = (a[i]+x < 0? 0: a[i]+x);
		a[i] = (a[i]+y > c[i]? c[i]: a[i]+y);
	}
}*/

vector<int> distribute_candies(vector<int> C, vector<int> L, vector<int> R, vector<int> V) {
	n = C.size();
	q = L.size();
	for (int i = 0; i < n; i++){
		c[i] = C[i];
	}
	for (int i = 0; i < q; i++){
		l[i] = L[i];
		r[i] = R[i];
		v[i] = V[i];
	}
	for (int st = 0; st < n; st += S){
	for (int i = 0; i < q; i += 2){
		if (i + 1 == q){
			int L = max(st, l[i]);
			int R = min(st+S-1, r[i]);
			if (v[i] > 0) add1(L, R, v[i]);
			if (v[i] < 0) add2(L, R, v[i]);
			break;
		}
		int L1 = max(st, l[i]);
		int R1 = min(st+S-1, r[i]);
		int L2 = max(st, l[i+1]);
		int R2 = min(st+S-1, r[i+1]);
		if (v[i] == 0 && v[i+1] == 0) continue;
		if (v[i] >= 0 && v[i+1] >= 0){
			add1(L1, min(R1, L2-1), v[i]);
			add1(L2, min(R2, L1-1), v[i+1]);
			add1(max(L1, L2), min(R1, R2), min(v[i]+v[i+1],(int)1e9));
			add1(max(L1, R2+1), R1, v[i]);
			add1(max(L2, R1+1), R2, v[i+1]);
		}
		else if (v[i] <= 0 && v[i+1] <= 0){
			add2(L1, min(R1, L2-1), v[i]);
			add2(L2, min(R2, L1-1), v[i+1]);
			add2(max(L1, L2), min(R1, R2), max(v[i]+v[i+1],(int)-1e9));
			add2(max(L1, R2+1), R1, v[i]);
			add2(max(L2, R1+1), R2, v[i+1]);
		}
		else if (v[i] >= 0 && v[i+1] <= 0){
			add1(L1, min(R1, L2-1), v[i]);
			add2(L2, min(R2, L1-1), v[i+1]);
			add3(max(L1, L2), min(R1, R2), v[i], v[i+1]);
			add1(max(L1, R2+1), R1, v[i]);
			add2(max(L2, R1+1), R2, v[i+1]);
		}
		else{
			add2(L1, min(R1, L2-1), v[i]);
			add1(L2, min(R2, L1-1), v[i+1]);
			add4(max(L1, L2), min(R1, R2), v[i], v[i+1]);
			add2(max(L1, R2+1), R1, v[i]);
			add1(max(L2, R1+1), R2, v[i+1]);
		}
	}
	}
	vector<int> ans;
	for (int i = 0; i < n; i++){
		ans.push_back(a[i]);
	}
	return ans;
}
# 결과 실행 시간 메모리 Grader output
1 Correct 0 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 939 ms 11560 KB Output is correct
2 Correct 910 ms 11708 KB Output is correct
3 Correct 917 ms 11620 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 340 KB Output is correct
2 Correct 56 ms 7404 KB Output is correct
3 Correct 47 ms 5544 KB Output is correct
4 Correct 862 ms 11616 KB Output is correct
5 Correct 867 ms 11584 KB Output is correct
6 Correct 862 ms 11532 KB Output is correct
7 Correct 812 ms 11712 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1 ms 212 KB Output is correct
2 Correct 0 ms 340 KB Output is correct
3 Correct 54 ms 7408 KB Output is correct
4 Correct 49 ms 4664 KB Output is correct
5 Correct 1333 ms 11624 KB Output is correct
6 Correct 1329 ms 11552 KB Output is correct
7 Correct 1333 ms 11632 KB Output is correct
8 Correct 1386 ms 11596 KB Output is correct
9 Correct 1383 ms 11712 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 0 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 939 ms 11560 KB Output is correct
7 Correct 910 ms 11708 KB Output is correct
8 Correct 917 ms 11620 KB Output is correct
9 Correct 1 ms 340 KB Output is correct
10 Correct 56 ms 7404 KB Output is correct
11 Correct 47 ms 5544 KB Output is correct
12 Correct 862 ms 11616 KB Output is correct
13 Correct 867 ms 11584 KB Output is correct
14 Correct 862 ms 11532 KB Output is correct
15 Correct 812 ms 11712 KB Output is correct
16 Correct 1 ms 212 KB Output is correct
17 Correct 0 ms 340 KB Output is correct
18 Correct 54 ms 7408 KB Output is correct
19 Correct 49 ms 4664 KB Output is correct
20 Correct 1333 ms 11624 KB Output is correct
21 Correct 1329 ms 11552 KB Output is correct
22 Correct 1333 ms 11632 KB Output is correct
23 Correct 1386 ms 11596 KB Output is correct
24 Correct 1383 ms 11712 KB Output is correct
25 Correct 0 ms 212 KB Output is correct
26 Correct 44 ms 4560 KB Output is correct
27 Correct 56 ms 7412 KB Output is correct
28 Correct 874 ms 11628 KB Output is correct
29 Correct 902 ms 11668 KB Output is correct
30 Correct 886 ms 11632 KB Output is correct
31 Correct 937 ms 11608 KB Output is correct
32 Correct 901 ms 11628 KB Output is correct