답안 #647802

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
647802 2022-10-04T07:15:01 Z ymm Building Bridges (CEOI17_building) C++17
100 / 100
1548 ms 4312 KB
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 100'032;
double h[N], w[N];
double dp[N], suf_sum[N];
double dsw[N];
int n;

//__attribute__((optimize("Ofast,unroll-loops"),target("avx2")))
//double find_min(int l, int r, double h)
//{
//	double ans = 1e100;
//	Loop (j,l,r) {
//		double tmp = h-::h[j];
//		tmp = tmp*tmp;
//		tmp += dsw[j];
//		ans = ans < tmp? ans: tmp;
//	}
//	return ans;
//}
double find_min(int, int, double);
asm("\n"
"	.p2align 4\n"
"	.globl	_Z8find_miniid\n"
"	.type	_Z8find_miniid, @function\n"
"_Z8find_miniid:\n"
".myLFB9897:\n"
"	.cfi_startproc\n"
"	movslq	%edi, %rax\n"
"	movslq	%esi, %rcx\n"
"	cmpq	%rcx, %rax\n"
"	jge	.myL63\n"
"	pushq	%rbp\n"
"	.cfi_def_cfa_offset 16\n"
"	.cfi_offset 6, -16\n"
"	subq	%rax, %rcx\n"
"	vmovsd	%xmm0, %xmm0, %xmm1\n"
"	movq	%rax, %r11\n"
"	leaq	-1(%rcx), %rdx\n"
"	movq	%rsp, %rbp\n"
"	.cfi_def_cfa_register 6\n"
"	pushq	%rbx\n"
"	.cfi_offset 3, -24\n"
"	cmpq	$2, %rdx\n"
"	jbe	.myL64\n"
"	movq	%rcx, %rbx\n"
"	leaq	h(%rip), %r10\n"
"	leaq	0(,%rax,8), %rsi\n"
"	vmovapd	.myLC0(%rip), %ymm2\n"
"	shrq	$2, %rbx\n"
"	leaq	(%r10,%rsi), %r8\n"
"	vbroadcastsd	%xmm0, %ymm3\n"
"	xorl	%edx, %edx\n"
"	salq	$5, %rbx\n"
"	leaq	dsw(%rip), %r9\n"
"	leaq	-32(%rbx), %rdi\n"
"	addq	%r9, %rsi\n"
"	shrq	$5, %rdi\n"
"	addq	$1, %rdi\n"
"	andl	$7, %edi\n"
"	je	.myL58\n"
"	cmpq	$1, %rdi\n"
"	je	.myL89\n"
"	cmpq	$2, %rdi\n"
"	je	.myL90\n"
"	cmpq	$3, %rdi\n"
"	je	.myL91\n"
"	cmpq	$4, %rdi\n"
"	je	.myL92\n"
"	cmpq	$5, %rdi\n"
"	je	.myL93\n"
"	cmpq	$6, %rdi\n"
"	jne	.myL110\n"
".myL94:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
".myL93:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
".myL92:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
".myL91:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
".myL90:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
".myL89:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	addq	$32, %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	cmpq	%rdx, %rbx\n"
"	je	.myL104\n"
".myL58:\n"
"	vsubpd	(%r8,%rdx), %ymm3, %ymm0\n"
"	leaq	32(%rdx), %rdi\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi,%rdx), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	32(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	32(%rsi,%rdx), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	64(%r8,%rdx), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	64(%rsi,%rdx), %ymm0, %ymm0\n"
"	leaq	224(%rdi), %rdx\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	64(%r8,%rdi), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	64(%rsi,%rdi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	96(%r8,%rdi), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	96(%rsi,%rdi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	128(%r8,%rdi), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	128(%rsi,%rdi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	160(%r8,%rdi), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	160(%rsi,%rdi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	vsubpd	192(%r8,%rdi), %ymm3, %ymm0\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	192(%rsi,%rdi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	cmpq	%rdx, %rbx\n"
"	jne	.myL58\n"
".myL104:\n"
"	vextractf128	$0x1, %ymm2, %xmm0\n"
"	movq	%rcx, %rdx\n"
"	vminpd	%xmm2, %xmm0, %xmm0\n"
"	andq	$-4, %rdx\n"
"	addq	%rdx, %rax\n"
"	vunpckhpd	%xmm0, %xmm0, %xmm2\n"
"	vminpd	%xmm0, %xmm2, %xmm2\n"
"	cmpq	%rdx, %rcx\n"
"	je	.myL111\n"
"	vzeroupper\n"
".myL57:\n"
"	subq	%rdx, %rcx\n"
"	cmpq	$1, %rcx\n"
"	je	.myL61\n"
"	addq	%r11, %rdx\n"
"	vmovddup	%xmm1, %xmm0\n"
"	vmovddup	%xmm2, %xmm2\n"
"	vsubpd	(%r10,%rdx,8), %xmm0, %xmm0\n"
"	vmulpd	%xmm0, %xmm0, %xmm0\n"
"	vaddpd	(%r9,%rdx,8), %xmm0, %xmm0\n"
"	movq	%rcx, %rdx\n"
"	andq	$-2, %rdx\n"
"	addq	%rdx, %rax\n"
"	vminpd	%xmm2, %xmm0, %xmm0\n"
"	vunpckhpd	%xmm0, %xmm0, %xmm2\n"
"	vminpd	%xmm0, %xmm2, %xmm2\n"
"	cmpq	%rdx, %rcx\n"
"	je	.myL55\n"
".myL61:\n"
"	vsubsd	(%r10,%rax,8), %xmm1, %xmm0\n"
"	vmulsd	%xmm0, %xmm0, %xmm0\n"
"	vaddsd	(%r9,%rax,8), %xmm0, %xmm0\n"
"	vminsd	%xmm0, %xmm2, %xmm2\n"
".myL55:\n"
"	movq	-8(%rbp), %rbx\n"
"	vmovsd	%xmm2, %xmm2, %xmm0\n"
"	leave\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL111:\n"
"	.cfi_restore_state\n"
"	vzeroupper\n"
"	movq	-8(%rbp), %rbx\n"
"	vmovsd	%xmm2, %xmm2, %xmm0\n"
"	leave\n"
"	.cfi_remember_state\n"
"	.cfi_def_cfa 7, 8\n"
"	ret\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL110:\n"
"	.cfi_restore_state\n"
"	vsubpd	(%r8), %ymm3, %ymm0\n"
"	movl	$32, %edx\n"
"	vmulpd	%ymm0, %ymm0, %ymm0\n"
"	vaddpd	(%rsi), %ymm0, %ymm0\n"
"	vminpd	%ymm0, %ymm2, %ymm2\n"
"	jmp	.myL94\n"
"	.p2align 4,,10\n"
"	.p2align 3\n"
".myL63:\n"
"	.cfi_def_cfa 7, 8\n"
"	.cfi_restore 3\n"
"	.cfi_restore 6\n"
"	vmovsd	.myLC1(%rip), %xmm2\n"
"	vmovsd	%xmm2, %xmm2, %xmm0\n"
"	ret\n"
".myL64:\n"
"	.cfi_def_cfa 6, 16\n"
"	.cfi_offset 3, -24\n"
"	.cfi_offset 6, -16\n"
"	xorl	%edx, %edx\n"
"	vmovsd	.myLC1(%rip), %xmm2\n"
"	leaq	h(%rip), %r10\n"
"	leaq	dsw(%rip), %r9\n"
"	jmp	.myL57\n"
"	.cfi_endproc\n"
".myLFE9897:\n"
"	.size	_Z8find_miniid, .-_Z8find_miniid\n"
"	.section	.rodata.cst32,\"aM\",@progbits,32\n"
"	.align 32\n"
".myLC0:\n"
"	.long	630506365\n"
"	.long	1420970413\n"
"	.long	630506365\n"
"	.long	1420970413\n"
"	.long	630506365\n"
"	.long	1420970413\n"
"	.long	630506365\n"
"	.long	1420970413\n"
"	.set	.myLC1,.myLC0\n"
);

int main()
{
	cin.tie(0) -> sync_with_stdio(false);
	cin >> n;
	Loop (i,0,n) {
		ll x;
		cin >> x;
		h[i] = x;
	}
	Loop (i,0,n) {
		ll x;
		cin >> x;
		w[i] = x;
	}
	suf_sum[n-1] = w[n-1];
	LoopR (i,0,n-1)
		suf_sum[i] = suf_sum[i+1] + w[i];
	dp[0] = 0;
	dsw[0] = dp[0] + suf_sum[0] - w[0];
	Loop (i,1,n) {
		dp[i] = find_min(0,i,h[i]) - suf_sum[i];
		dsw[i] = dp[i] + suf_sum[i] - w[i];
	}
	cout << (ll)dp[n-1] << '\n';
}
# 결과 실행 시간 메모리 Grader output
1 Correct 0 ms 340 KB Output is correct
2 Correct 0 ms 340 KB Output is correct
3 Correct 0 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 1496 ms 4300 KB Output is correct
2 Correct 1534 ms 4180 KB Output is correct
3 Correct 1469 ms 4204 KB Output is correct
4 Correct 1513 ms 4212 KB Output is correct
5 Correct 1493 ms 4312 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 0 ms 340 KB Output is correct
2 Correct 0 ms 340 KB Output is correct
3 Correct 0 ms 340 KB Output is correct
4 Correct 1 ms 340 KB Output is correct
5 Correct 1 ms 340 KB Output is correct
6 Correct 1496 ms 4300 KB Output is correct
7 Correct 1534 ms 4180 KB Output is correct
8 Correct 1469 ms 4204 KB Output is correct
9 Correct 1513 ms 4212 KB Output is correct
10 Correct 1493 ms 4312 KB Output is correct
11 Correct 1445 ms 4184 KB Output is correct
12 Correct 1527 ms 4224 KB Output is correct
13 Correct 1450 ms 4156 KB Output is correct
14 Correct 1412 ms 4192 KB Output is correct
15 Correct 1499 ms 4196 KB Output is correct
16 Correct 1440 ms 4312 KB Output is correct
17 Correct 1533 ms 4184 KB Output is correct
18 Correct 1548 ms 4156 KB Output is correct