Submission #831345

# Submission time Handle Problem Language Result Execution time Memory
831345 2023-08-20T06:26:05 Z ymm Distributing Candies (IOI21_candies) C++17
100 / 100
1071 ms 8960 KB
#include "candies.h"
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
typedef long long ll;
using namespace std;

#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")

const int inf = 1e9+1;
const int N = 200'010;
const int S = 2048;
int a[N], c[N];

asm(R"dard(
_Z3addiii:
.myLFB9795:
	.cfi_startproc
	vmovd	%edx, %xmm1
	movslq	%esi, %r10
	movslq	%edi, %rdx
	cmpq	%r10, %rdx
	jge	.myL55
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%r10, %r8
	vmovdqa	%xmm1, %xmm0
	subq	%rdx, %r8
	leaq	-1(%r8), %rax
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	movq	%rdx, %rbx
	cmpq	$6, %rax
	jbe	.myL8
	movq	%r8, %r12
	leaq	a(%rip), %r9
	xorl	%eax, %eax
	shrq	$3, %r12
	leaq	0(,%rdx,4), %rdi
	vpbroadcastd	%xmm1, %ymm2
	salq	$5, %r12
	leaq	c(%rip), %r11
	leaq	(%r9,%rdi), %rcx
	leaq	-32(%r12), %rsi
	addq	%r11, %rdi
	shrq	$5, %rsi
	addq	$1, %rsi
	andl	$7, %esi
	je	.myL4
	cmpq	$1, %rsi
	je	.myL35
	cmpq	$2, %rsi
	je	.myL36
	cmpq	$3, %rsi
	je	.myL37
	cmpq	$4, %rsi
	je	.myL38
	cmpq	$5, %rsi
	je	.myL39
	cmpq	$6, %rsi
	jne	.myL59
.myL40:
	vpaddd	(%rcx,%rax), %ymm2, %ymm5
	vpminsd	(%rdi,%rax), %ymm5, %ymm6
	vmovdqu	%ymm6, (%rcx,%rax)
	addq	$32, %rax
.myL39:
	vpaddd	(%rcx,%rax), %ymm2, %ymm7
	vpminsd	(%rdi,%rax), %ymm7, %ymm8
	vmovdqu	%ymm8, (%rcx,%rax)
	addq	$32, %rax
.myL38:
	vpaddd	(%rcx,%rax), %ymm2, %ymm9
	vpminsd	(%rdi,%rax), %ymm9, %ymm10
	vmovdqu	%ymm10, (%rcx,%rax)
	addq	$32, %rax
.myL37:
	vpaddd	(%rcx,%rax), %ymm2, %ymm11
	vpminsd	(%rdi,%rax), %ymm11, %ymm12
	vmovdqu	%ymm12, (%rcx,%rax)
	addq	$32, %rax
.myL36:
	vpaddd	(%rcx,%rax), %ymm2, %ymm13
	vpminsd	(%rdi,%rax), %ymm13, %ymm14
	vmovdqu	%ymm14, (%rcx,%rax)
	addq	$32, %rax
.myL35:
	vpaddd	(%rcx,%rax), %ymm2, %ymm15
	vpminsd	(%rdi,%rax), %ymm15, %ymm3
	vmovdqu	%ymm3, (%rcx,%rax)
	addq	$32, %rax
	cmpq	%rax, %r12
	je	.myL51
.myL4:
	vpaddd	(%rcx,%rax), %ymm2, %ymm4
	vpminsd	(%rdi,%rax), %ymm4, %ymm5
	vpaddd	32(%rcx,%rax), %ymm2, %ymm6
	vpaddd	64(%rcx,%rax), %ymm2, %ymm8
	vmovdqu	%ymm5, (%rcx,%rax)
	vpminsd	32(%rdi,%rax), %ymm6, %ymm7
	vpaddd	96(%rcx,%rax), %ymm2, %ymm10
	vpaddd	128(%rcx,%rax), %ymm2, %ymm12
	vpaddd	160(%rcx,%rax), %ymm2, %ymm14
	vpaddd	192(%rcx,%rax), %ymm2, %ymm3
	vmovdqu	%ymm7, 32(%rcx,%rax)
	vpminsd	64(%rdi,%rax), %ymm8, %ymm9
	vpaddd	224(%rcx,%rax), %ymm2, %ymm5
	vmovdqu	%ymm9, 64(%rcx,%rax)
	vpminsd	96(%rdi,%rax), %ymm10, %ymm11
	vmovdqu	%ymm11, 96(%rcx,%rax)
	vpminsd	128(%rdi,%rax), %ymm12, %ymm13
	vmovdqu	%ymm13, 128(%rcx,%rax)
	vpminsd	160(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, 160(%rcx,%rax)
	vpminsd	192(%rdi,%rax), %ymm3, %ymm4
	vmovdqu	%ymm4, 192(%rcx,%rax)
	vpminsd	224(%rdi,%rax), %ymm5, %ymm6
	vmovdqu	%ymm6, 224(%rcx,%rax)
	addq	$256, %rax
	cmpq	%rax, %r12
	jne	.myL4
.myL51:
	movq	%r8, %rdi
	andq	$-8, %rdi
	addq	%rdi, %rdx
	testb	$7, %r8b
	je	.myL60
	vzeroupper
.myL3:
	subq	%rdi, %r8
	leaq	-1(%r8), %rcx
	cmpq	$2, %rcx
	jbe	.myL6
	addq	%rbx, %rdi
	movq	%r8, %r12
	vpshufd	$0, %xmm0, %xmm0
	leaq	(%r9,%rdi,4), %rbx
	andq	$-4, %r12
	vpaddd	(%rbx), %xmm0, %xmm2
	vpminsd	(%r11,%rdi,4), %xmm2, %xmm7
	addq	%r12, %rdx
	andl	$3, %r8d
	vmovdqu	%xmm7, (%rbx)
	je	.myL53
.myL6:
	vmovd	(%r9,%rdx,4), %xmm8
	vmovd	(%r11,%rdx,4), %xmm10
	leaq	1(%rdx), %r8
	vpaddd	%xmm8, %xmm1, %xmm9
	vpminsd	%xmm10, %xmm9, %xmm11
	vmovd	%xmm11, (%r9,%rdx,4)
	cmpq	%r10, %r8
	jge	.myL53
	vmovd	(%r9,%r8,4), %xmm12
	vmovd	(%r11,%r8,4), %xmm14
	addq	$2, %rdx
	vpaddd	%xmm12, %xmm1, %xmm13
	vpminsd	%xmm14, %xmm13, %xmm15
	vmovd	%xmm15, (%r9,%r8,4)
	cmpq	%rdx, %r10
	jle	.myL53
	vmovd	(%r9,%rdx,4), %xmm3
	vpaddd	%xmm3, %xmm1, %xmm4
	vmovd	(%r11,%rdx,4), %xmm1
	vpminsd	%xmm1, %xmm4, %xmm5
	vmovd	%xmm5, (%r9,%rdx,4)
.myL53:
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL59:
	.cfi_restore_state
	vpaddd	(%rcx), %ymm2, %ymm3
	vpminsd	(%rdi), %ymm3, %ymm4
	movl	$32, %eax
	vmovdqu	%ymm4, (%rcx)
	jmp	.myL40
	.p2align 4,,10
	.p2align 3
.myL55:
	.cfi_def_cfa 7, 8
	.cfi_restore 3
	.cfi_restore 6
	.cfi_restore 12
	ret
	.p2align 4,,10
	.p2align 3
.myL60:
	.cfi_def_cfa 6, 16
	.cfi_offset 3, -32
	.cfi_offset 6, -16
	.cfi_offset 12, -24
	vzeroupper
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL8:
	.cfi_restore_state
	xorl	%edi, %edi
	leaq	a(%rip), %r9
	leaq	c(%rip), %r11
	jmp	.myL3
	.cfi_endproc
.myLFE9795:
	.size	_Z3addiii, .-_Z3addiii
	.p2align 4
	.globl	_Z3subiii
	.type	_Z3subiii, @function
_Z3subiii:
.myLFB9796:
	.cfi_startproc
	vmovd	%edx, %xmm1
	movslq	%esi, %r8
	movslq	%edi, %rdx
	vmovdqa	%xmm1, %xmm3
	cmpq	%r8, %rdx
	jge	.myL113
	movq	%r8, %rsi
	movq	%rdx, %r9
	subq	%rdx, %rsi
	leaq	-1(%rsi), %rax
	cmpq	$6, %rax
	jbe	.myL68
	movq	%rsi, %rcx
	leaq	a(%rip), %rdi
	vpbroadcastd	%xmm1, %ymm2
	shrq	$3, %rcx
	leaq	(%rdi,%rdx,4), %r11
	vpxor	%xmm0, %xmm0, %xmm0
	salq	$5, %rcx
	leaq	(%rcx,%r11), %r10
	subq	$32, %rcx
	shrq	$5, %rcx
	addq	$1, %rcx
	andl	$7, %ecx
	je	.myL64
	cmpq	$1, %rcx
	je	.myL95
	cmpq	$2, %rcx
	je	.myL96
	cmpq	$3, %rcx
	je	.myL97
	cmpq	$4, %rcx
	je	.myL98
	cmpq	$5, %rcx
	je	.myL99
	cmpq	$6, %rcx
	jne	.myL114
.myL100:
	vmovdqu	(%r11), %ymm6
	addq	$32, %r11
	vpsubd	%ymm2, %ymm6, %ymm8
	vpmaxsd	%ymm0, %ymm8, %ymm9
	vmovdqu	%ymm9, -32(%r11)
.myL99:
	vmovdqu	(%r11), %ymm10
	addq	$32, %r11
	vpsubd	%ymm2, %ymm10, %ymm11
	vpmaxsd	%ymm0, %ymm11, %ymm12
	vmovdqu	%ymm12, -32(%r11)
.myL98:
	vmovdqu	(%r11), %ymm13
	addq	$32, %r11
	vpsubd	%ymm2, %ymm13, %ymm14
	vpmaxsd	%ymm0, %ymm14, %ymm15
	vmovdqu	%ymm15, -32(%r11)
.myL97:
	vmovdqu	(%r11), %ymm7
	addq	$32, %r11
	vpsubd	%ymm2, %ymm7, %ymm4
	vpmaxsd	%ymm0, %ymm4, %ymm5
	vmovdqu	%ymm5, -32(%r11)
.myL96:
	vmovdqu	(%r11), %ymm6
	addq	$32, %r11
	vpsubd	%ymm2, %ymm6, %ymm8
	vpmaxsd	%ymm0, %ymm8, %ymm9
	vmovdqu	%ymm9, -32(%r11)
.myL95:
	vmovdqu	(%r11), %ymm10
	addq	$32, %r11
	vpsubd	%ymm2, %ymm10, %ymm11
	vpmaxsd	%ymm0, %ymm11, %ymm12
	vmovdqu	%ymm12, -32(%r11)
	cmpq	%r11, %r10
	je	.myL111
.myL64:
	vmovdqu	(%r11), %ymm13
	vmovdqu	32(%r11), %ymm7
	addq	$256, %r11
	vmovdqu	-192(%r11), %ymm6
	vmovdqu	-160(%r11), %ymm10
	vpsubd	%ymm2, %ymm13, %ymm14
	vpsubd	%ymm2, %ymm7, %ymm4
	vmovdqu	-128(%r11), %ymm13
	vmovdqu	-96(%r11), %ymm7
	vpsubd	%ymm2, %ymm6, %ymm8
	vpsubd	%ymm2, %ymm10, %ymm11
	vmovdqu	-64(%r11), %ymm6
	vmovdqu	-32(%r11), %ymm10
	vpmaxsd	%ymm0, %ymm14, %ymm15
	vpmaxsd	%ymm0, %ymm4, %ymm5
	vpmaxsd	%ymm0, %ymm8, %ymm9
	vpmaxsd	%ymm0, %ymm11, %ymm12
	vmovdqu	%ymm15, -256(%r11)
	vpsubd	%ymm2, %ymm13, %ymm14
	vpsubd	%ymm2, %ymm7, %ymm4
	vmovdqu	%ymm5, -224(%r11)
	vpsubd	%ymm2, %ymm6, %ymm8
	vpsubd	%ymm2, %ymm10, %ymm11
	vmovdqu	%ymm9, -192(%r11)
	vmovdqu	%ymm12, -160(%r11)
	vpmaxsd	%ymm0, %ymm14, %ymm15
	vpmaxsd	%ymm0, %ymm4, %ymm5
	vpmaxsd	%ymm0, %ymm8, %ymm9
	vpmaxsd	%ymm0, %ymm11, %ymm12
	vmovdqu	%ymm15, -128(%r11)
	vmovdqu	%ymm5, -96(%r11)
	vmovdqu	%ymm9, -64(%r11)
	vmovdqu	%ymm12, -32(%r11)
	cmpq	%r11, %r10
	jne	.myL64
.myL111:
	movq	%rsi, %rax
	andq	$-8, %rax
	addq	%rax, %rdx
	testb	$7, %sil
	je	.myL115
	vzeroupper
.myL63:
	subq	%rax, %rsi
	leaq	-1(%rsi), %r11
	cmpq	$2, %r11
	jbe	.myL66
	addq	%r9, %rax
	vpshufd	$0, %xmm3, %xmm3
	vpxor	%xmm13, %xmm13, %xmm13
	movq	%rsi, %rcx
	leaq	(%rdi,%rax,4), %r9
	andq	$-4, %rcx
	vmovdqu	(%r9), %xmm2
	addq	%rcx, %rdx
	andl	$3, %esi
	vpsubd	%xmm3, %xmm2, %xmm0
	vpmaxsd	%xmm13, %xmm0, %xmm14
	vmovdqu	%xmm14, (%r9)
	je	.myL113
.myL66:
	vmovd	(%rdi,%rdx,4), %xmm15
	vpxor	%xmm4, %xmm4, %xmm4
	leaq	1(%rdx), %rsi
	vpsubd	%xmm1, %xmm15, %xmm7
	vpmaxsd	%xmm4, %xmm7, %xmm5
	vmovd	%xmm5, (%rdi,%rdx,4)
	cmpq	%rsi, %r8
	jle	.myL113
	vpinsrd	$0, (%rdi,%rsi,4), %xmm4, %xmm6
	addq	$2, %rdx
	vpsubd	%xmm1, %xmm6, %xmm8
	vpmaxsd	%xmm4, %xmm8, %xmm9
	vmovd	%xmm9, (%rdi,%rsi,4)
	cmpq	%rdx, %r8
	jle	.myL113
	vpinsrd	$0, (%rdi,%rdx,4), %xmm4, %xmm10
	vpsubd	%xmm1, %xmm10, %xmm11
	vpmaxsd	%xmm4, %xmm11, %xmm1
	vmovd	%xmm1, (%rdi,%rdx,4)
.myL113:
	ret
	.p2align 4,,10
	.p2align 3
.myL114:
	vmovdqu	(%r11), %ymm7
	addq	$32, %r11
	vpsubd	%ymm2, %ymm7, %ymm4
	vpmaxsd	%ymm0, %ymm4, %ymm5
	vmovdqu	%ymm5, -32(%r11)
	jmp	.myL100
	.p2align 4,,10
	.p2align 3
.myL115:
	vzeroupper
	ret
.myL68:
	xorl	%eax, %eax
	leaq	a(%rip), %rdi
	jmp	.myL63
	.cfi_endproc
.myLFE9796:
	.size	_Z3subiii, .-_Z3subiii
	.p2align 4
	.globl	_Z6addsubiiii
	.type	_Z6addsubiiii, @function
_Z6addsubiiii:
.myLFB9797:
	.cfi_startproc
	vmovd	%edx, %xmm1
	movslq	%esi, %r10
	movslq	%edi, %rdx
	cmpq	%r10, %rdx
	jge	.myL170
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%r10, %rdi
	vmovd	%ecx, %xmm2
	vmovdqa	%xmm1, %xmm0
	subq	%rdx, %rdi
	vmovdqa	%xmm2, %xmm6
	leaq	-1(%rdi), %rax
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	movq	%rdx, %rbx
	cmpq	$6, %rax
	jbe	.myL123
	movq	%rdi, %r12
	leaq	a(%rip), %r9
	vpxor	%xmm3, %xmm3, %xmm3
	xorl	%eax, %eax
	shrq	$3, %r12
	vpbroadcastd	%xmm1, %ymm5
	vpbroadcastd	%xmm2, %ymm4
	salq	$5, %r12
	leaq	0(,%rdx,4), %rsi
	leaq	c(%rip), %r11
	leaq	-32(%r12), %r8
	leaq	(%r9,%rsi), %rcx
	addq	%r11, %rsi
	shrq	$5, %r8
	addq	$1, %r8
	andl	$7, %r8d
	je	.myL119
	cmpq	$1, %r8
	je	.myL150
	cmpq	$2, %r8
	je	.myL151
	cmpq	$3, %r8
	je	.myL152
	cmpq	$4, %r8
	je	.myL153
	cmpq	$5, %r8
	je	.myL154
	cmpq	$6, %r8
	jne	.myL173
.myL155:
	vpaddd	(%rcx,%rax), %ymm5, %ymm11
	vpminsd	(%rsi,%rax), %ymm11, %ymm12
	vpsubd	%ymm4, %ymm12, %ymm13
	vpmaxsd	%ymm3, %ymm13, %ymm14
	vmovdqu	%ymm14, (%rcx,%rax)
	addq	$32, %rax
.myL154:
	vpaddd	(%rcx,%rax), %ymm5, %ymm15
	vpminsd	(%rsi,%rax), %ymm15, %ymm7
	vpsubd	%ymm4, %ymm7, %ymm8
	vpmaxsd	%ymm3, %ymm8, %ymm9
	vmovdqu	%ymm9, (%rcx,%rax)
	addq	$32, %rax
.myL153:
	vpaddd	(%rcx,%rax), %ymm5, %ymm10
	vpminsd	(%rsi,%rax), %ymm10, %ymm11
	vpsubd	%ymm4, %ymm11, %ymm12
	vpmaxsd	%ymm3, %ymm12, %ymm13
	vmovdqu	%ymm13, (%rcx,%rax)
	addq	$32, %rax
.myL152:
	vpaddd	(%rcx,%rax), %ymm5, %ymm14
	vpminsd	(%rsi,%rax), %ymm14, %ymm15
	vpsubd	%ymm4, %ymm15, %ymm7
	vpmaxsd	%ymm3, %ymm7, %ymm8
	vmovdqu	%ymm8, (%rcx,%rax)
	addq	$32, %rax
.myL151:
	vpaddd	(%rcx,%rax), %ymm5, %ymm9
	vpminsd	(%rsi,%rax), %ymm9, %ymm10
	vpsubd	%ymm4, %ymm10, %ymm11
	vpmaxsd	%ymm3, %ymm11, %ymm12
	vmovdqu	%ymm12, (%rcx,%rax)
	addq	$32, %rax
.myL150:
	vpaddd	(%rcx,%rax), %ymm5, %ymm13
	vpminsd	(%rsi,%rax), %ymm13, %ymm14
	vpsubd	%ymm4, %ymm14, %ymm15
	vpmaxsd	%ymm3, %ymm15, %ymm7
	vmovdqu	%ymm7, (%rcx,%rax)
	addq	$32, %rax
	cmpq	%rax, %r12
	je	.myL166
.myL119:
	vpaddd	(%rcx,%rax), %ymm5, %ymm8
	vpminsd	(%rsi,%rax), %ymm8, %ymm9
	vpaddd	32(%rcx,%rax), %ymm5, %ymm12
	vpaddd	64(%rcx,%rax), %ymm5, %ymm7
	vpsubd	%ymm4, %ymm9, %ymm10
	vpmaxsd	%ymm3, %ymm10, %ymm11
	vmovdqu	%ymm11, (%rcx,%rax)
	vpminsd	32(%rsi,%rax), %ymm12, %ymm13
	vpaddd	96(%rcx,%rax), %ymm5, %ymm11
	vpsubd	%ymm4, %ymm13, %ymm14
	vpmaxsd	%ymm3, %ymm14, %ymm15
	vmovdqu	%ymm15, 32(%rcx,%rax)
	vpminsd	64(%rsi,%rax), %ymm7, %ymm8
	vpaddd	128(%rcx,%rax), %ymm5, %ymm15
	vpsubd	%ymm4, %ymm8, %ymm9
	vpmaxsd	%ymm3, %ymm9, %ymm10
	vmovdqu	%ymm10, 64(%rcx,%rax)
	vpminsd	96(%rsi,%rax), %ymm11, %ymm12
	vpaddd	160(%rcx,%rax), %ymm5, %ymm10
	vpsubd	%ymm4, %ymm12, %ymm13
	vpmaxsd	%ymm3, %ymm13, %ymm14
	vmovdqu	%ymm14, 96(%rcx,%rax)
	vpminsd	128(%rsi,%rax), %ymm15, %ymm7
	vpaddd	192(%rcx,%rax), %ymm5, %ymm14
	vpsubd	%ymm4, %ymm7, %ymm8
	vpmaxsd	%ymm3, %ymm8, %ymm9
	vmovdqu	%ymm9, 128(%rcx,%rax)
	vpminsd	160(%rsi,%rax), %ymm10, %ymm11
	vpaddd	224(%rcx,%rax), %ymm5, %ymm9
	vpsubd	%ymm4, %ymm11, %ymm12
	vpmaxsd	%ymm3, %ymm12, %ymm13
	vmovdqu	%ymm13, 160(%rcx,%rax)
	vpminsd	192(%rsi,%rax), %ymm14, %ymm15
	vpsubd	%ymm4, %ymm15, %ymm7
	vpmaxsd	%ymm3, %ymm7, %ymm8
	vmovdqu	%ymm8, 192(%rcx,%rax)
	vpminsd	224(%rsi,%rax), %ymm9, %ymm10
	vpsubd	%ymm4, %ymm10, %ymm11
	vpmaxsd	%ymm3, %ymm11, %ymm12
	vmovdqu	%ymm12, 224(%rcx,%rax)
	addq	$256, %rax
	cmpq	%rax, %r12
	jne	.myL119
.myL166:
	movq	%rdi, %rsi
	andq	$-8, %rsi
	addq	%rsi, %rdx
	testb	$7, %dil
	je	.myL174
	vzeroupper
.myL118:
	subq	%rsi, %rdi
	leaq	-1(%rdi), %rcx
	cmpq	$2, %rcx
	jbe	.myL121
	addq	%rbx, %rsi
	vpshufd	$0, %xmm0, %xmm0
	vpshufd	$0, %xmm6, %xmm6
	movq	%rdi, %r12
	leaq	(%r9,%rsi,4), %rbx
	andq	$-4, %r12
	vpxor	%xmm3, %xmm3, %xmm3
	vpaddd	(%rbx), %xmm0, %xmm5
	vpminsd	(%r11,%rsi,4), %xmm5, %xmm4
	addq	%r12, %rdx
	andl	$3, %edi
	vpsubd	%xmm6, %xmm4, %xmm13
	vpmaxsd	%xmm3, %xmm13, %xmm14
	vmovdqu	%xmm14, (%rbx)
	je	.myL168
.myL121:
	vmovd	(%r9,%rdx,4), %xmm15
	vmovd	(%r11,%rdx,4), %xmm8
	vpxor	%xmm11, %xmm11, %xmm11
	vpaddd	%xmm15, %xmm1, %xmm7
	leaq	1(%rdx), %rdi
	vpminsd	%xmm8, %xmm7, %xmm9
	vpsubd	%xmm2, %xmm9, %xmm10
	vpmaxsd	%xmm11, %xmm10, %xmm12
	vmovd	%xmm12, (%r9,%rdx,4)
	cmpq	%rdi, %r10
	jle	.myL168
	vpinsrd	$0, (%r9,%rdi,4), %xmm11, %xmm0
	vpinsrd	$0, (%r11,%rdi,4), %xmm11, %xmm4
	vpxor	%xmm3, %xmm3, %xmm3
	addq	$2, %rdx
	vpaddd	%xmm0, %xmm1, %xmm5
	vpminsd	%xmm4, %xmm5, %xmm6
	vpsubd	%xmm2, %xmm6, %xmm13
	vpmaxsd	%xmm3, %xmm13, %xmm14
	vmovd	%xmm14, (%r9,%rdi,4)
	cmpq	%rdx, %r10
	jle	.myL168
	vpinsrd	$0, (%r9,%rdx,4), %xmm3, %xmm15
	vpaddd	%xmm15, %xmm1, %xmm7
	vpinsrd	$0, (%r11,%rdx,4), %xmm3, %xmm1
	vpminsd	%xmm1, %xmm7, %xmm8
	vpsubd	%xmm2, %xmm8, %xmm2
	vpmaxsd	%xmm3, %xmm2, %xmm9
	vmovd	%xmm9, (%r9,%rdx,4)
.myL168:
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL173:
	.cfi_restore_state
	vpaddd	(%rcx), %ymm5, %ymm7
	vpminsd	(%rsi), %ymm7, %ymm8
	movl	$32, %eax
	vpsubd	%ymm4, %ymm8, %ymm9
	vpmaxsd	%ymm3, %ymm9, %ymm10
	vmovdqu	%ymm10, (%rcx)
	jmp	.myL155
	.p2align 4,,10
	.p2align 3
.myL170:
	.cfi_def_cfa 7, 8
	.cfi_restore 3
	.cfi_restore 6
	.cfi_restore 12
	ret
	.p2align 4,,10
	.p2align 3
.myL174:
	.cfi_def_cfa 6, 16
	.cfi_offset 3, -32
	.cfi_offset 6, -16
	.cfi_offset 12, -24
	vzeroupper
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL123:
	.cfi_restore_state
	xorl	%esi, %esi
	leaq	a(%rip), %r9
	leaq	c(%rip), %r11
	jmp	.myL118
	.cfi_endproc
.myLFE9797:
	.size	_Z6addsubiiii, .-_Z6addsubiiii
	.p2align 4
	.globl	_Z6subaddiiii
	.type	_Z6subaddiiii, @function
_Z6subaddiiii:
.myLFB9798:
	.cfi_startproc
	vmovd	%edx, %xmm1
	movslq	%esi, %r10
	movslq	%edi, %rdx
	cmpq	%r10, %rdx
	jge	.myL229
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%r10, %rdi
	vmovd	%ecx, %xmm2
	vmovdqa	%xmm1, %xmm6
	subq	%rdx, %rdi
	vmovdqa	%xmm2, %xmm7
	leaq	-1(%rdi), %rax
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	movq	%rdx, %rbx
	cmpq	$6, %rax
	jbe	.myL182
	movq	%rdi, %r12
	leaq	a(%rip), %r9
	vpxor	%xmm3, %xmm3, %xmm3
	xorl	%eax, %eax
	shrq	$3, %r12
	vpbroadcastd	%xmm1, %ymm4
	vpbroadcastd	%xmm2, %ymm0
	salq	$5, %r12
	leaq	0(,%rdx,4), %rsi
	leaq	c(%rip), %r11
	leaq	-32(%r12), %r8
	leaq	(%r9,%rsi), %rcx
	addq	%r11, %rsi
	shrq	$5, %r8
	addq	$1, %r8
	andl	$7, %r8d
	je	.myL178
	cmpq	$1, %r8
	je	.myL209
	cmpq	$2, %r8
	je	.myL210
	cmpq	$3, %r8
	je	.myL211
	cmpq	$4, %r8
	je	.myL212
	cmpq	$5, %r8
	je	.myL213
	cmpq	$6, %r8
	jne	.myL232
.myL214:
	vmovdqu	(%rcx,%rax), %ymm12
	vpsubd	%ymm4, %ymm12, %ymm13
	vpmaxsd	%ymm3, %ymm13, %ymm14
	vpaddd	%ymm0, %ymm14, %ymm15
	vpminsd	(%rsi,%rax), %ymm15, %ymm5
	vmovdqu	%ymm5, (%rcx,%rax)
	addq	$32, %rax
.myL213:
	vmovdqu	(%rcx,%rax), %ymm8
	vpsubd	%ymm4, %ymm8, %ymm9
	vpmaxsd	%ymm3, %ymm9, %ymm10
	vpaddd	%ymm0, %ymm10, %ymm11
	vpminsd	(%rsi,%rax), %ymm11, %ymm12
	vmovdqu	%ymm12, (%rcx,%rax)
	addq	$32, %rax
.myL212:
	vmovdqu	(%rcx,%rax), %ymm13
	vpsubd	%ymm4, %ymm13, %ymm14
	vpmaxsd	%ymm3, %ymm14, %ymm15
	vpaddd	%ymm0, %ymm15, %ymm5
	vpminsd	(%rsi,%rax), %ymm5, %ymm8
	vmovdqu	%ymm8, (%rcx,%rax)
	addq	$32, %rax
.myL211:
	vmovdqu	(%rcx,%rax), %ymm9
	vpsubd	%ymm4, %ymm9, %ymm10
	vpmaxsd	%ymm3, %ymm10, %ymm11
	vpaddd	%ymm0, %ymm11, %ymm12
	vpminsd	(%rsi,%rax), %ymm12, %ymm13
	vmovdqu	%ymm13, (%rcx,%rax)
	addq	$32, %rax
.myL210:
	vmovdqu	(%rcx,%rax), %ymm14
	vpsubd	%ymm4, %ymm14, %ymm15
	vpmaxsd	%ymm3, %ymm15, %ymm5
	vpaddd	%ymm0, %ymm5, %ymm8
	vpminsd	(%rsi,%rax), %ymm8, %ymm9
	vmovdqu	%ymm9, (%rcx,%rax)
	addq	$32, %rax
.myL209:
	vmovdqu	(%rcx,%rax), %ymm10
	vpsubd	%ymm4, %ymm10, %ymm11
	vpmaxsd	%ymm3, %ymm11, %ymm12
	vpaddd	%ymm0, %ymm12, %ymm13
	vpminsd	(%rsi,%rax), %ymm13, %ymm14
	vmovdqu	%ymm14, (%rcx,%rax)
	addq	$32, %rax
	cmpq	%rax, %r12
	je	.myL225
.myL178:
	vmovdqu	(%rcx,%rax), %ymm15
	vmovdqu	32(%rcx,%rax), %ymm11
	vpsubd	%ymm4, %ymm15, %ymm5
	vpsubd	%ymm4, %ymm11, %ymm12
	vpmaxsd	%ymm3, %ymm5, %ymm8
	vmovdqu	64(%rcx,%rax), %ymm5
	vpmaxsd	%ymm3, %ymm12, %ymm13
	vmovdqu	96(%rcx,%rax), %ymm12
	vpaddd	%ymm0, %ymm8, %ymm9
	vpminsd	(%rsi,%rax), %ymm9, %ymm10
	vpaddd	%ymm0, %ymm13, %ymm14
	vpsubd	%ymm4, %ymm5, %ymm8
	vmovdqu	%ymm10, (%rcx,%rax)
	vpminsd	32(%rsi,%rax), %ymm14, %ymm15
	vpmaxsd	%ymm3, %ymm8, %ymm9
	vmovdqu	128(%rcx,%rax), %ymm8
	vpaddd	%ymm0, %ymm9, %ymm10
	vpsubd	%ymm4, %ymm12, %ymm13
	vmovdqu	%ymm15, 32(%rcx,%rax)
	vpmaxsd	%ymm3, %ymm13, %ymm14
	vpsubd	%ymm4, %ymm8, %ymm9
	vmovdqu	160(%rcx,%rax), %ymm13
	vpminsd	64(%rsi,%rax), %ymm10, %ymm11
	vpaddd	%ymm0, %ymm14, %ymm15
	vpmaxsd	%ymm3, %ymm9, %ymm10
	vpsubd	%ymm4, %ymm13, %ymm14
	vmovdqu	192(%rcx,%rax), %ymm9
	vmovdqu	%ymm11, 64(%rcx,%rax)
	vpminsd	96(%rsi,%rax), %ymm15, %ymm5
	vpaddd	%ymm0, %ymm10, %ymm11
	vpmaxsd	%ymm3, %ymm14, %ymm15
	vpsubd	%ymm4, %ymm9, %ymm10
	vmovdqu	224(%rcx,%rax), %ymm14
	vmovdqu	%ymm5, 96(%rcx,%rax)
	vpaddd	%ymm0, %ymm15, %ymm5
	vpminsd	128(%rsi,%rax), %ymm11, %ymm12
	vpmaxsd	%ymm3, %ymm10, %ymm11
	vpsubd	%ymm4, %ymm14, %ymm15
	vmovdqu	%ymm12, 128(%rcx,%rax)
	vpaddd	%ymm0, %ymm11, %ymm12
	vpminsd	160(%rsi,%rax), %ymm5, %ymm8
	vpmaxsd	%ymm3, %ymm15, %ymm5
	vmovdqu	%ymm8, 160(%rcx,%rax)
	vpaddd	%ymm0, %ymm5, %ymm8
	vpminsd	192(%rsi,%rax), %ymm12, %ymm13
	vmovdqu	%ymm13, 192(%rcx,%rax)
	vpminsd	224(%rsi,%rax), %ymm8, %ymm9
	vmovdqu	%ymm9, 224(%rcx,%rax)
	addq	$256, %rax
	cmpq	%rax, %r12
	jne	.myL178
.myL225:
	movq	%rdi, %rsi
	andq	$-8, %rsi
	addq	%rsi, %rdx
	testb	$7, %dil
	je	.myL233
	vzeroupper
.myL177:
	subq	%rsi, %rdi
	leaq	-1(%rdi), %rcx
	cmpq	$2, %rcx
	jbe	.myL180
	addq	%rbx, %rsi
	vpshufd	$0, %xmm6, %xmm6
	vpxor	%xmm3, %xmm3, %xmm3
	movq	%rdi, %r12
	leaq	(%r9,%rsi,4), %rbx
	vpshufd	$0, %xmm7, %xmm7
	andq	$-4, %r12
	vmovdqu	(%rbx), %xmm4
	addq	%r12, %rdx
	andl	$3, %edi
	vpsubd	%xmm6, %xmm4, %xmm0
	vpmaxsd	%xmm3, %xmm0, %xmm10
	vpaddd	%xmm7, %xmm10, %xmm11
	vpminsd	(%r11,%rsi,4), %xmm11, %xmm12
	vmovdqu	%xmm12, (%rbx)
	je	.myL227
.myL180:
	vmovd	(%r9,%rdx,4), %xmm13
	vpxor	%xmm15, %xmm15, %xmm15
	leaq	1(%rdx), %rdi
	vpsubd	%xmm1, %xmm13, %xmm14
	vpinsrd	$0, (%r11,%rdx,4), %xmm15, %xmm9
	vpmaxsd	%xmm15, %xmm14, %xmm5
	vpaddd	%xmm2, %xmm5, %xmm8
	vpminsd	%xmm9, %xmm8, %xmm6
	vmovd	%xmm6, (%r9,%rdx,4)
	cmpq	%rdi, %r10
	jle	.myL227
	vmovd	(%r9,%rdi,4), %xmm4
	vpxor	%xmm3, %xmm3, %xmm3
	addq	$2, %rdx
	vpsubd	%xmm1, %xmm4, %xmm0
	vpinsrd	$0, (%r11,%rdi,4), %xmm3, %xmm11
	vpmaxsd	%xmm3, %xmm0, %xmm10
	vpaddd	%xmm2, %xmm10, %xmm7
	vpminsd	%xmm11, %xmm7, %xmm12
	vmovd	%xmm12, (%r9,%rdi,4)
	cmpq	%rdx, %r10
	jle	.myL227
	vmovd	(%r9,%rdx,4), %xmm13
	vpsubd	%xmm1, %xmm13, %xmm14
	vpxor	%xmm1, %xmm1, %xmm1
	vpinsrd	$0, (%r11,%rdx,4), %xmm1, %xmm5
	vpmaxsd	%xmm1, %xmm14, %xmm15
	vpaddd	%xmm2, %xmm15, %xmm2
	vpminsd	%xmm5, %xmm2, %xmm8
	vmovd	%xmm8, (%r9,%rdx,4)
.myL227:
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL232:
	.cfi_restore_state
	vmovdqu	(%rcx), %ymm5
	movl	$32, %eax
	vpsubd	%ymm4, %ymm5, %ymm8
	vpmaxsd	%ymm3, %ymm8, %ymm9
	vpaddd	%ymm0, %ymm9, %ymm10
	vpminsd	(%rsi), %ymm10, %ymm11
	vmovdqu	%ymm11, (%rcx)
	jmp	.myL214
	.p2align 4,,10
	.p2align 3
.myL229:
	.cfi_def_cfa 7, 8
	.cfi_restore 3
	.cfi_restore 6
	.cfi_restore 12
	ret
	.p2align 4,,10
	.p2align 3
.myL233:
	.cfi_def_cfa 6, 16
	.cfi_offset 3, -32
	.cfi_offset 6, -16
	.cfi_offset 12, -24
	vzeroupper
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL182:
	.cfi_restore_state
	xorl	%esi, %esi
	leaq	a(%rip), %r9
	leaq	c(%rip), %r11
	jmp	.myL177
	.cfi_endproc
.myLFE9798:
	.size	_Z6subaddiiii, .-_Z6subaddiiii
	.p2align 4
	.globl	_Z9addsubaddiiiii
	.type	_Z9addsubaddiiiii, @function
_Z9addsubaddiiiii:
.myLFB9799:
	.cfi_startproc
	vmovd	%edx, %xmm2
	movslq	%esi, %r10
	movslq	%edi, %rdx
	cmpq	%r10, %rdx
	jge	.myL268
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%r10, %rsi
	vmovd	%ecx, %xmm3
	vmovd	%r8d, %xmm4
	vmovdqa	%xmm2, %xmm0
	vmovdqa	%xmm3, %xmm6
	vmovdqa	%xmm4, %xmm5
	subq	%rdx, %rsi
	leaq	-1(%rsi), %rax
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	movq	%rdx, %rbx
	cmpq	$6, %rax
	jbe	.myL241
	movq	%rsi, %r12
	vpbroadcastd	%xmm2, %ymm10
	vpbroadcastd	%xmm3, %ymm9
	xorl	%eax, %eax
	shrq	$3, %r12
	vpbroadcastd	%xmm4, %ymm7
	vpxor	%xmm8, %xmm8, %xmm8
	salq	$5, %r12
	leaq	0(,%rdx,4), %rdi
	leaq	a(%rip), %r9
	leaq	-32(%r12), %r8
	leaq	c(%rip), %r11
	shrq	$5, %r8
	leaq	(%r9,%rdi), %rcx
	addq	%r11, %rdi
	addq	$1, %r8
	andl	$3, %r8d
	je	.myL237
	cmpq	$1, %r8
	je	.myL256
	cmpq	$2, %r8
	je	.myL257
	vpaddd	(%rcx), %ymm10, %ymm1
	vpminsd	(%rdi), %ymm1, %ymm11
	movl	$32, %eax
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	(%rdi), %ymm14, %ymm15
	vmovdqu	%ymm15, (%rcx)
.myL257:
	vpaddd	(%rcx,%rax), %ymm10, %ymm1
	vpminsd	(%rdi,%rax), %ymm1, %ymm11
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, (%rcx,%rax)
	addq	$32, %rax
.myL256:
	vpaddd	(%rcx,%rax), %ymm10, %ymm1
	vpminsd	(%rdi,%rax), %ymm1, %ymm11
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, (%rcx,%rax)
	addq	$32, %rax
	cmpq	%rax, %r12
	je	.myL264
.myL237:
	vpaddd	(%rcx,%rax), %ymm10, %ymm1
	vpminsd	(%rdi,%rax), %ymm1, %ymm11
	vpaddd	32(%rcx,%rax), %ymm10, %ymm1
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, (%rcx,%rax)
	vpminsd	32(%rdi,%rax), %ymm1, %ymm11
	vpaddd	64(%rcx,%rax), %ymm10, %ymm1
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	32(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, 32(%rcx,%rax)
	vpminsd	64(%rdi,%rax), %ymm1, %ymm11
	vpaddd	96(%rcx,%rax), %ymm10, %ymm1
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	64(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, 64(%rcx,%rax)
	vpminsd	96(%rdi,%rax), %ymm1, %ymm11
	vpsubd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm7, %ymm13, %ymm14
	vpminsd	96(%rdi,%rax), %ymm14, %ymm15
	vmovdqu	%ymm15, 96(%rcx,%rax)
	subq	$-128, %rax
	cmpq	%rax, %r12
	jne	.myL237
.myL264:
	movq	%rsi, %rdi
	andq	$-8, %rdi
	addq	%rdi, %rdx
	testb	$7, %sil
	je	.myL271
	vzeroupper
.myL236:
	subq	%rdi, %rsi
	leaq	-1(%rsi), %rcx
	cmpq	$2, %rcx
	jbe	.myL239
	addq	%rbx, %rdi
	vpshufd	$0, %xmm0, %xmm0
	vpshufd	$0, %xmm6, %xmm6
	movq	%rsi, %r12
	leaq	(%r9,%rdi,4), %rbx
	vmovdqu	(%r11,%rdi,4), %xmm10
	vpxor	%xmm1, %xmm1, %xmm1
	vpshufd	$0, %xmm5, %xmm5
	vpaddd	(%rbx), %xmm0, %xmm9
	andq	$-4, %r12
	addq	%r12, %rdx
	andl	$3, %esi
	vpminsd	%xmm10, %xmm9, %xmm7
	vpsubd	%xmm6, %xmm7, %xmm8
	vpmaxsd	%xmm1, %xmm8, %xmm11
	vpaddd	%xmm5, %xmm11, %xmm12
	vpminsd	%xmm10, %xmm12, %xmm13
	vmovdqu	%xmm13, (%rbx)
	je	.myL266
.myL239:
	vmovd	(%r9,%rdx,4), %xmm15
	vmovd	(%r11,%rdx,4), %xmm14
	vpxor	%xmm7, %xmm7, %xmm7
	vpaddd	%xmm15, %xmm2, %xmm10
	leaq	1(%rdx), %rsi
	vpminsd	%xmm14, %xmm10, %xmm0
	vpsubd	%xmm3, %xmm0, %xmm9
	vpmaxsd	%xmm7, %xmm9, %xmm6
	vpaddd	%xmm4, %xmm6, %xmm8
	vpminsd	%xmm14, %xmm8, %xmm1
	vmovd	%xmm1, (%r9,%rdx,4)
	cmpq	%rsi, %r10
	jle	.myL266
	vpinsrd	$0, (%r9,%rsi,4), %xmm7, %xmm5
	vpinsrd	$0, (%r11,%rsi,4), %xmm7, %xmm11
	addq	$2, %rdx
	vpaddd	%xmm5, %xmm2, %xmm12
	vpminsd	%xmm11, %xmm12, %xmm13
	vpsubd	%xmm3, %xmm13, %xmm14
	vpmaxsd	%xmm7, %xmm14, %xmm15
	vpaddd	%xmm4, %xmm15, %xmm10
	vpminsd	%xmm11, %xmm10, %xmm0
	vmovd	%xmm0, (%r9,%rsi,4)
	cmpq	%rdx, %r10
	jle	.myL266
	vpinsrd	$0, (%r9,%rdx,4), %xmm7, %xmm6
	vpinsrd	$0, (%r11,%rdx,4), %xmm7, %xmm9
	vpaddd	%xmm6, %xmm2, %xmm2
	vpminsd	%xmm9, %xmm2, %xmm8
	vpsubd	%xmm3, %xmm8, %xmm3
	vpmaxsd	%xmm7, %xmm3, %xmm7
	vpaddd	%xmm4, %xmm7, %xmm4
	vpminsd	%xmm9, %xmm4, %xmm1
	vmovd	%xmm1, (%r9,%rdx,4)
.myL266:
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL268:
	.cfi_restore 3
	.cfi_restore 6
	.cfi_restore 12
	ret
	.p2align 4,,10
	.p2align 3
.myL271:
	.cfi_def_cfa 6, 16
	.cfi_offset 3, -32
	.cfi_offset 6, -16
	.cfi_offset 12, -24
	vzeroupper
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL241:
	.cfi_restore_state
	xorl	%edi, %edi
	leaq	a(%rip), %r9
	leaq	c(%rip), %r11
	jmp	.myL236
	.cfi_endproc
.myLFE9799:
	.size	_Z9addsubaddiiiii, .-_Z9addsubaddiiiii
	.p2align 4
	.globl	_Z9subaddsubiiiii
	.type	_Z9subaddsubiiiii, @function
_Z9subaddsubiiiii:
.myLFB9800:
	.cfi_startproc
	vmovd	%edx, %xmm2
	movslq	%esi, %r10
	movslq	%edi, %rdx
	cmpq	%r10, %rdx
	jge	.myL306
	pushq	%rbp
	.cfi_def_cfa_offset 16
	.cfi_offset 6, -16
	movq	%r10, %rsi
	vmovd	%ecx, %xmm3
	vmovd	%r8d, %xmm4
	vmovdqa	%xmm2, %xmm5
	vmovdqa	%xmm3, %xmm7
	vmovdqa	%xmm4, %xmm6
	subq	%rdx, %rsi
	leaq	-1(%rsi), %rax
	movq	%rsp, %rbp
	.cfi_def_cfa_register 6
	pushq	%r12
	pushq	%rbx
	.cfi_offset 12, -24
	.cfi_offset 3, -32
	movq	%rdx, %rbx
	cmpq	$6, %rax
	jbe	.myL279
	movq	%rsi, %r12
	vpbroadcastd	%xmm2, %ymm10
	vpbroadcastd	%xmm3, %ymm9
	xorl	%eax, %eax
	shrq	$3, %r12
	vpbroadcastd	%xmm4, %ymm8
	vpxor	%xmm1, %xmm1, %xmm1
	salq	$5, %r12
	leaq	0(,%rdx,4), %rdi
	leaq	a(%rip), %r9
	leaq	-32(%r12), %r8
	leaq	c(%rip), %r11
	shrq	$5, %r8
	leaq	(%r9,%rdi), %rcx
	addq	%r11, %rdi
	addq	$1, %r8
	andl	$3, %r8d
	je	.myL275
	cmpq	$1, %r8
	je	.myL294
	cmpq	$2, %r8
	je	.myL295
	vmovdqu	(%rcx), %ymm0
	movl	$32, %eax
	vpsubd	%ymm10, %ymm0, %ymm11
	vpmaxsd	%ymm1, %ymm11, %ymm12
	vpaddd	%ymm9, %ymm12, %ymm13
	vpminsd	(%rdi), %ymm13, %ymm14
	vpsubd	%ymm8, %ymm14, %ymm15
	vpmaxsd	%ymm1, %ymm15, %ymm0
	vmovdqu	%ymm0, (%rcx)
.myL295:
	vmovdqu	(%rcx,%rax), %ymm11
	vpsubd	%ymm10, %ymm11, %ymm12
	vpmaxsd	%ymm1, %ymm12, %ymm13
	vpaddd	%ymm9, %ymm13, %ymm14
	vpminsd	(%rdi,%rax), %ymm14, %ymm15
	vpsubd	%ymm8, %ymm15, %ymm0
	vpmaxsd	%ymm1, %ymm0, %ymm11
	vmovdqu	%ymm11, (%rcx,%rax)
	addq	$32, %rax
.myL294:
	vmovdqu	(%rcx,%rax), %ymm12
	vpsubd	%ymm10, %ymm12, %ymm13
	vpmaxsd	%ymm1, %ymm13, %ymm14
	vpaddd	%ymm9, %ymm14, %ymm15
	vpminsd	(%rdi,%rax), %ymm15, %ymm0
	vpsubd	%ymm8, %ymm0, %ymm11
	vpmaxsd	%ymm1, %ymm11, %ymm12
	vmovdqu	%ymm12, (%rcx,%rax)
	addq	$32, %rax
	cmpq	%rax, %r12
	je	.myL302
.myL275:
	vmovdqu	(%rcx,%rax), %ymm13
	vpsubd	%ymm10, %ymm13, %ymm14
	vpmaxsd	%ymm1, %ymm14, %ymm15
	vmovdqu	32(%rcx,%rax), %ymm14
	vpaddd	%ymm9, %ymm15, %ymm0
	vpminsd	(%rdi,%rax), %ymm0, %ymm11
	vpsubd	%ymm10, %ymm14, %ymm15
	vpsubd	%ymm8, %ymm11, %ymm12
	vpmaxsd	%ymm1, %ymm15, %ymm0
	vmovdqu	64(%rcx,%rax), %ymm15
	vpmaxsd	%ymm1, %ymm12, %ymm13
	vpaddd	%ymm9, %ymm0, %ymm11
	vmovdqu	%ymm13, (%rcx,%rax)
	vpminsd	32(%rdi,%rax), %ymm11, %ymm12
	vpsubd	%ymm10, %ymm15, %ymm0
	vpmaxsd	%ymm1, %ymm0, %ymm11
	vmovdqu	96(%rcx,%rax), %ymm0
	vpsubd	%ymm8, %ymm12, %ymm13
	vpaddd	%ymm9, %ymm11, %ymm12
	vpmaxsd	%ymm1, %ymm13, %ymm14
	vpsubd	%ymm10, %ymm0, %ymm11
	vmovdqu	%ymm14, 32(%rcx,%rax)
	vpminsd	64(%rdi,%rax), %ymm12, %ymm13
	vpmaxsd	%ymm1, %ymm11, %ymm12
	vpsubd	%ymm8, %ymm13, %ymm14
	vpaddd	%ymm9, %ymm12, %ymm13
	vpmaxsd	%ymm1, %ymm14, %ymm15
	vmovdqu	%ymm15, 64(%rcx,%rax)
	vpminsd	96(%rdi,%rax), %ymm13, %ymm14
	vpsubd	%ymm8, %ymm14, %ymm15
	vpmaxsd	%ymm1, %ymm15, %ymm0
	vmovdqu	%ymm0, 96(%rcx,%rax)
	subq	$-128, %rax
	cmpq	%rax, %r12
	jne	.myL275
.myL302:
	movq	%rsi, %rdi
	andq	$-8, %rdi
	addq	%rdi, %rdx
	testb	$7, %sil
	je	.myL309
	vzeroupper
.myL274:
	subq	%rdi, %rsi
	leaq	-1(%rsi), %rcx
	cmpq	$2, %rcx
	jbe	.myL277
	addq	%rbx, %rdi
	vpshufd	$0, %xmm5, %xmm5
	vpxor	%xmm8, %xmm8, %xmm8
	movq	%rsi, %r12
	leaq	(%r9,%rdi,4), %rbx
	vpshufd	$0, %xmm7, %xmm7
	vpshufd	$0, %xmm6, %xmm6
	andq	$-4, %r12
	vmovdqu	(%rbx), %xmm10
	addq	%r12, %rdx
	andl	$3, %esi
	vpsubd	%xmm5, %xmm10, %xmm9
	vpmaxsd	%xmm8, %xmm9, %xmm1
	vpaddd	%xmm7, %xmm1, %xmm11
	vpminsd	(%r11,%rdi,4), %xmm11, %xmm12
	vpsubd	%xmm6, %xmm12, %xmm13
	vpmaxsd	%xmm8, %xmm13, %xmm14
	vmovdqu	%xmm14, (%rbx)
	je	.myL304
.myL277:
	vmovd	(%r9,%rdx,4), %xmm15
	vpxor	%xmm5, %xmm5, %xmm5
	vpinsrd	$0, (%r11,%rdx,4), %xmm5, %xmm8
	vpsubd	%xmm2, %xmm15, %xmm0
	vpxor	%xmm11, %xmm11, %xmm11
	leaq	1(%rdx), %rsi
	vpmaxsd	%xmm5, %xmm0, %xmm10
	vpaddd	%xmm3, %xmm10, %xmm9
	vpminsd	%xmm8, %xmm9, %xmm1
	vpsubd	%xmm4, %xmm1, %xmm7
	vpmaxsd	%xmm11, %xmm7, %xmm12
	vmovd	%xmm12, (%r9,%rdx,4)
	cmpq	%rsi, %r10
	jle	.myL304
	vpinsrd	$0, (%r9,%rsi,4), %xmm11, %xmm6
	vpinsrd	$0, (%r11,%rsi,4), %xmm11, %xmm0
	vpxor	%xmm9, %xmm9, %xmm9
	addq	$2, %rdx
	vpsubd	%xmm2, %xmm6, %xmm13
	vpmaxsd	%xmm11, %xmm13, %xmm14
	vpaddd	%xmm3, %xmm14, %xmm15
	vpminsd	%xmm0, %xmm15, %xmm5
	vpsubd	%xmm4, %xmm5, %xmm10
	vpmaxsd	%xmm9, %xmm10, %xmm8
	vmovd	%xmm8, (%r9,%rsi,4)
	cmpq	%rdx, %r10
	jle	.myL304
	vpinsrd	$0, (%r9,%rdx,4), %xmm9, %xmm1
	vpinsrd	$0, (%r11,%rdx,4), %xmm9, %xmm11
	vpxor	%xmm6, %xmm6, %xmm6
	vpsubd	%xmm2, %xmm1, %xmm2
	vpmaxsd	%xmm9, %xmm2, %xmm7
	vpaddd	%xmm3, %xmm7, %xmm3
	vpminsd	%xmm11, %xmm3, %xmm12
	vpsubd	%xmm4, %xmm12, %xmm4
	vpmaxsd	%xmm6, %xmm4, %xmm13
	vmovd	%xmm13, (%r9,%rdx,4)
.myL304:
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_def_cfa 7, 8
	ret
	.p2align 4,,10
	.p2align 3
.myL306:
	.cfi_restore 3
	.cfi_restore 6
	.cfi_restore 12
	ret
	.p2align 4,,10
	.p2align 3
.myL309:
	.cfi_def_cfa 6, 16
	.cfi_offset 3, -32
	.cfi_offset 6, -16
	.cfi_offset 12, -24
	vzeroupper
	popq	%rbx
	popq	%r12
	popq	%rbp
	.cfi_remember_state
	.cfi_def_cfa 7, 8
	ret
.myL279:
	.cfi_restore_state
	xorl	%edi, %edi
	leaq	a(%rip), %r9
	leaq	c(%rip), %r11
	jmp	.myL274
	.cfi_endproc
.myLFE9800:
	.size	_Z9subaddsubiiiii, .-_Z9subaddsubiiiii
)dard");

void add(int l, int r, int x);
void sub(int l, int r, int x);
void addsub(int l, int r, int x, int y);
void subadd(int l, int r, int x, int y);
void addsubadd(int l, int r, int x, int y, int z);
void subaddsub(int l, int r, int x, int y, int z);
//void add(int l, int r, int x)
//{
//	Loop (i,l,r)
//		a[i] = (a[i] + x > c[i]? c[i]: a[i] + x);
//}
//void sub(int l, int r, int x)
//{
//	Loop (i,l,r)
//		a[i] = (a[i] - x < 0? 0: a[i] - x);
//}
//void addsub(int l, int r, int x, int y)
//{
//	Loop (i,l,r) {
//		a[i] = (a[i] + x > c[i]? c[i]: a[i] + x);
//		a[i] = (a[i] - y < 0? 0: a[i] - y);
//	}
//}
//void subadd(int l, int r, int x, int y)
//{
//	Loop (i,l,r) {
//		a[i] = (a[i] - x < 0? 0: a[i] - x);
//		a[i] = (a[i] + y > c[i]? c[i]: a[i] + y);
//	}
//}
//void addsubadd(int l, int r, int x, int y, int z)
//{
//	Loop (i,l,r) {
//		a[i] = (a[i] + x > c[i]? c[i]: a[i] + x);
//		a[i] = (a[i] - y < 0? 0: a[i] - y);
//		a[i] = (a[i] + z > c[i]? c[i]: a[i] + z);
//	}
//}
//void subaddsub(int l, int r, int x, int y, int z)
//{
//	Loop (i,l,r) {
//		a[i] = (a[i] - x < 0? 0: a[i] - x);
//		a[i] = (a[i] + y > c[i]? c[i]: a[i] + y);
//		a[i] = (a[i] - z < 0? 0: a[i] - z);
//	}
//}
void up(int l, int r, int x)
{
	if (x > 0)
		add(l, r, x);
	else
		sub(l, r, -x);
}
void upup(int l, int r, int x, int y)
{
	if (x >  0 && y >  0)
		add(l, r, min(inf, x+y));
	if (x <= 0 && y <= 0)
		sub(l, r, min(inf, -(x+y)));
	if (x >  0 && y <= 0)
		addsub(l, r, x, -y);
	if (x <= 0 && y >  0)
		subadd(l, r, -x, y);
}
void upupup(int l, int r, int x, int y, int z)
{
	if ((x > 0) == (y > 0))
		return upup(l, r, clamp(x+y, -inf, inf), z);
	if ((y > 0) == (z > 0))
		return upup(l, r, x, clamp(y+z, -inf, inf));
	if (x > 0)
		addsubadd(l, r, x, -y, z);
	if (x <= 0)
		subaddsub(l, r, -x, y, -z);
}
void upv(int l, int r, int *a, int cnt)
{
	switch (cnt) {
	case 0: return;
	case 1: return up(l, r, a[0]);
	case 2: return upup(l, r, a[0], a[1]);
	case 3: return upupup(l, r, a[0], a[1], a[2]);
	}
}

std::vector<int> distribute_candies(std::vector<int> _c, std::vector<int> ql,
                                    std::vector<int> qr, std::vector<int> qv) {
    int n = _c.size();
    int q = ql.size();
    for (int &x : qr)
	    ++x;
    Loop (i,0,n)
	    c[i] = _c[i];
    for (int L = 0; L < n; L += S) {
	    int R = min<int>(L+S, n);
	    for (int i = 0; i < q;) {
		    int cnt;
		    for (cnt = 0; cnt < 3 && i+cnt < q; ++cnt) {
			    if (!(ql[i+cnt] <= L && R <= qr[i+cnt]))
				    break;
		    }
		    upv(L, R, &qv[i], cnt);
		    i += cnt;
		    if (cnt != 3 && i != q) {
			    up(max(ql[i], L), min(qr[i], R), qv[i]);
			    i += 1;
		    }
	    }
    }
    return vector<int>(a, a+n);
}
# Verdict Execution time Memory Grader output
1 Correct 0 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 739 ms 8888 KB Output is correct
2 Correct 741 ms 8824 KB Output is correct
3 Correct 747 ms 8908 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 1 ms 212 KB Output is correct
2 Correct 71 ms 4964 KB Output is correct
3 Correct 49 ms 5320 KB Output is correct
4 Correct 800 ms 8840 KB Output is correct
5 Correct 794 ms 8956 KB Output is correct
6 Correct 796 ms 8960 KB Output is correct
7 Correct 752 ms 8868 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 0 ms 212 KB Output is correct
2 Correct 1 ms 212 KB Output is correct
3 Correct 49 ms 4972 KB Output is correct
4 Correct 50 ms 4272 KB Output is correct
5 Correct 1063 ms 8916 KB Output is correct
6 Correct 1071 ms 8904 KB Output is correct
7 Correct 1034 ms 8896 KB Output is correct
8 Correct 1058 ms 8908 KB Output is correct
9 Correct 892 ms 8844 KB Output is correct
# Verdict Execution time Memory Grader output
1 Correct 0 ms 212 KB Output is correct
2 Correct 0 ms 212 KB Output is correct
3 Correct 1 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 739 ms 8888 KB Output is correct
7 Correct 741 ms 8824 KB Output is correct
8 Correct 747 ms 8908 KB Output is correct
9 Correct 1 ms 212 KB Output is correct
10 Correct 71 ms 4964 KB Output is correct
11 Correct 49 ms 5320 KB Output is correct
12 Correct 800 ms 8840 KB Output is correct
13 Correct 794 ms 8956 KB Output is correct
14 Correct 796 ms 8960 KB Output is correct
15 Correct 752 ms 8868 KB Output is correct
16 Correct 0 ms 212 KB Output is correct
17 Correct 1 ms 212 KB Output is correct
18 Correct 49 ms 4972 KB Output is correct
19 Correct 50 ms 4272 KB Output is correct
20 Correct 1063 ms 8916 KB Output is correct
21 Correct 1071 ms 8904 KB Output is correct
22 Correct 1034 ms 8896 KB Output is correct
23 Correct 1058 ms 8908 KB Output is correct
24 Correct 892 ms 8844 KB Output is correct
25 Correct 0 ms 212 KB Output is correct
26 Correct 49 ms 4212 KB Output is correct
27 Correct 56 ms 5044 KB Output is correct
28 Correct 833 ms 8856 KB Output is correct
29 Correct 785 ms 8856 KB Output is correct
30 Correct 776 ms 8876 KB Output is correct
31 Correct 791 ms 8900 KB Output is correct
32 Correct 781 ms 8816 KB Output is correct