#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 100'032;
double h[N], w[N];
double dp[N], suf_sum[N];
double dsw[N];
int n;
//__attribute__((optimize("Ofast,unroll-loops"),target("avx2")))
//double find_min(int l, int r, double h)
//{
// double ans = 1e100;
// Loop (j,l,r) {
// double tmp = h-::h[j];
// tmp = tmp*tmp;
// tmp += dsw[j];
// ans = ans < tmp? ans: tmp;
// }
// return ans;
//}
double find_min(int, int, double);
asm("\n"
" .p2align 4\n"
" .globl _Z8find_miniid\n"
" .type _Z8find_miniid, @function\n"
"_Z8find_miniid:\n"
".myLFB9897:\n"
" .cfi_startproc\n"
" movslq %edi, %rax\n"
" movslq %esi, %rcx\n"
" cmpq %rcx, %rax\n"
" jge .myL63\n"
" pushq %rbp\n"
" .cfi_def_cfa_offset 16\n"
" .cfi_offset 6, -16\n"
" subq %rax, %rcx\n"
" vmovsd %xmm0, %xmm0, %xmm1\n"
" movq %rax, %r11\n"
" leaq -1(%rcx), %rdx\n"
" movq %rsp, %rbp\n"
" .cfi_def_cfa_register 6\n"
" pushq %rbx\n"
" .cfi_offset 3, -24\n"
" cmpq $2, %rdx\n"
" jbe .myL64\n"
" movq %rcx, %rbx\n"
" leaq h(%rip), %r10\n"
" leaq 0(,%rax,8), %rsi\n"
" vmovapd .myLC0(%rip), %ymm2\n"
" shrq $2, %rbx\n"
" leaq (%r10,%rsi), %r8\n"
" vbroadcastsd %xmm0, %ymm3\n"
" xorl %edx, %edx\n"
" salq $5, %rbx\n"
" leaq dsw(%rip), %r9\n"
" leaq -32(%rbx), %rdi\n"
" addq %r9, %rsi\n"
" shrq $5, %rdi\n"
" addq $1, %rdi\n"
" andl $7, %edi\n"
" je .myL58\n"
" cmpq $1, %rdi\n"
" je .myL89\n"
" cmpq $2, %rdi\n"
" je .myL90\n"
" cmpq $3, %rdi\n"
" je .myL91\n"
" cmpq $4, %rdi\n"
" je .myL92\n"
" cmpq $5, %rdi\n"
" je .myL93\n"
" cmpq $6, %rdi\n"
" jne .myL110\n"
".myL94:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
".myL93:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
".myL92:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
".myL91:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
".myL90:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
".myL89:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" addq $32, %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" cmpq %rdx, %rbx\n"
" je .myL104\n"
".myL58:\n"
" vsubpd (%r8,%rdx), %ymm3, %ymm0\n"
" leaq 32(%rdx), %rdi\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi,%rdx), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 32(%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 32(%rsi,%rdx), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 64(%r8,%rdx), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 64(%rsi,%rdx), %ymm0, %ymm0\n"
" leaq 224(%rdi), %rdx\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 64(%r8,%rdi), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 64(%rsi,%rdi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 96(%r8,%rdi), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 96(%rsi,%rdi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 128(%r8,%rdi), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 128(%rsi,%rdi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 160(%r8,%rdi), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 160(%rsi,%rdi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" vsubpd 192(%r8,%rdi), %ymm3, %ymm0\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd 192(%rsi,%rdi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" cmpq %rdx, %rbx\n"
" jne .myL58\n"
".myL104:\n"
" vextractf128 $0x1, %ymm2, %xmm0\n"
" movq %rcx, %rdx\n"
" vminpd %xmm2, %xmm0, %xmm0\n"
" andq $-4, %rdx\n"
" addq %rdx, %rax\n"
" vunpckhpd %xmm0, %xmm0, %xmm2\n"
" vminpd %xmm0, %xmm2, %xmm2\n"
" cmpq %rdx, %rcx\n"
" je .myL111\n"
" vzeroupper\n"
".myL57:\n"
" subq %rdx, %rcx\n"
" cmpq $1, %rcx\n"
" je .myL61\n"
" addq %r11, %rdx\n"
" vmovddup %xmm1, %xmm0\n"
" vmovddup %xmm2, %xmm2\n"
" vsubpd (%r10,%rdx,8), %xmm0, %xmm0\n"
" vmulpd %xmm0, %xmm0, %xmm0\n"
" vaddpd (%r9,%rdx,8), %xmm0, %xmm0\n"
" movq %rcx, %rdx\n"
" andq $-2, %rdx\n"
" addq %rdx, %rax\n"
" vminpd %xmm2, %xmm0, %xmm0\n"
" vunpckhpd %xmm0, %xmm0, %xmm2\n"
" vminpd %xmm0, %xmm2, %xmm2\n"
" cmpq %rdx, %rcx\n"
" je .myL55\n"
".myL61:\n"
" vsubsd (%r10,%rax,8), %xmm1, %xmm0\n"
" vmulsd %xmm0, %xmm0, %xmm0\n"
" vaddsd (%r9,%rax,8), %xmm0, %xmm0\n"
" vminsd %xmm0, %xmm2, %xmm2\n"
".myL55:\n"
" movq -8(%rbp), %rbx\n"
" vmovsd %xmm2, %xmm2, %xmm0\n"
" leave\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL111:\n"
" .cfi_restore_state\n"
" vzeroupper\n"
" movq -8(%rbp), %rbx\n"
" vmovsd %xmm2, %xmm2, %xmm0\n"
" leave\n"
" .cfi_remember_state\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL110:\n"
" .cfi_restore_state\n"
" vsubpd (%r8), %ymm3, %ymm0\n"
" movl $32, %edx\n"
" vmulpd %ymm0, %ymm0, %ymm0\n"
" vaddpd (%rsi), %ymm0, %ymm0\n"
" vminpd %ymm0, %ymm2, %ymm2\n"
" jmp .myL94\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL63:\n"
" .cfi_def_cfa 7, 8\n"
" .cfi_restore 3\n"
" .cfi_restore 6\n"
" vmovsd .myLC1(%rip), %xmm2\n"
" vmovsd %xmm2, %xmm2, %xmm0\n"
" ret\n"
".myL64:\n"
" .cfi_def_cfa 6, 16\n"
" .cfi_offset 3, -24\n"
" .cfi_offset 6, -16\n"
" xorl %edx, %edx\n"
" vmovsd .myLC1(%rip), %xmm2\n"
" leaq h(%rip), %r10\n"
" leaq dsw(%rip), %r9\n"
" jmp .myL57\n"
" .cfi_endproc\n"
".myLFE9897:\n"
" .size _Z8find_miniid, .-_Z8find_miniid\n"
" .section .rodata.cst32,\"aM\",@progbits,32\n"
" .align 32\n"
".myLC0:\n"
" .long 630506365\n"
" .long 1420970413\n"
" .long 630506365\n"
" .long 1420970413\n"
" .long 630506365\n"
" .long 1420970413\n"
" .long 630506365\n"
" .long 1420970413\n"
" .set .myLC1,.myLC0\n"
);
int main()
{
cin.tie(0) -> sync_with_stdio(false);
cin >> n;
Loop (i,0,n) {
ll x;
cin >> x;
h[i] = x;
}
Loop (i,0,n) {
ll x;
cin >> x;
w[i] = x;
}
suf_sum[n-1] = w[n-1];
LoopR (i,0,n-1)
suf_sum[i] = suf_sum[i+1] + w[i];
dp[0] = 0;
dsw[0] = dp[0] + suf_sum[0] - w[0];
Loop (i,1,n) {
dp[i] = find_min(0,i,h[i]) - suf_sum[i];
dsw[i] = dp[i] + suf_sum[i] - w[i];
}
cout << (ll)dp[n-1] << '\n';
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
340 KB |
Output is correct |
2 |
Correct |
0 ms |
340 KB |
Output is correct |
3 |
Correct |
0 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1496 ms |
4300 KB |
Output is correct |
2 |
Correct |
1534 ms |
4180 KB |
Output is correct |
3 |
Correct |
1469 ms |
4204 KB |
Output is correct |
4 |
Correct |
1513 ms |
4212 KB |
Output is correct |
5 |
Correct |
1493 ms |
4312 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
340 KB |
Output is correct |
2 |
Correct |
0 ms |
340 KB |
Output is correct |
3 |
Correct |
0 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
6 |
Correct |
1496 ms |
4300 KB |
Output is correct |
7 |
Correct |
1534 ms |
4180 KB |
Output is correct |
8 |
Correct |
1469 ms |
4204 KB |
Output is correct |
9 |
Correct |
1513 ms |
4212 KB |
Output is correct |
10 |
Correct |
1493 ms |
4312 KB |
Output is correct |
11 |
Correct |
1445 ms |
4184 KB |
Output is correct |
12 |
Correct |
1527 ms |
4224 KB |
Output is correct |
13 |
Correct |
1450 ms |
4156 KB |
Output is correct |
14 |
Correct |
1412 ms |
4192 KB |
Output is correct |
15 |
Correct |
1499 ms |
4196 KB |
Output is correct |
16 |
Correct |
1440 ms |
4312 KB |
Output is correct |
17 |
Correct |
1533 ms |
4184 KB |
Output is correct |
18 |
Correct |
1548 ms |
4156 KB |
Output is correct |