#pragma GCC optimize("O2,unroll-loops,Ofast")
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
const int N = 100'000;
const int K = 200;
const ll INF = 1'000'000'000'000'000'000;
const int Q = 300'000;
int n, k, q;
ll a[N + 10], sum[N + 10];
ll dp[2][N + 10], val, valSum;
ll s2[N + 10];
int res[K + 2][N + 10];
int l, r, t, row;
int pntL[Q + 10], pntR[Q + 10];
int minOpt[Q + 10], maxOpt[Q + 10];
mt19937 rng(1234);
void calcS2() {
    for (int i = 1; i <= n; i++)
        s2[i] = s2[i - 1] + sum[i - 1] * a[i];
}
ll getVal(const int &nl, const int &nr) {
    return s2[nr] - s2[nl - 1] - sum[nl - 1] * (sum[nr] - sum[nl - 1]);
    while (nl < l) {
        val += a[l - 1] * valSum;
        valSum += a[l - 1];
        l--;
    }
    while (r < nr) {
        val += a[r + 1] * valSum;
        valSum += a[r + 1];
        r++;
    }
    while (l < nl) {
        valSum -= a[l];
        val -= a[l] * valSum;
        l++;
    }
    while (nr < r) {
        valSum -= a[r];
        val -= a[r] * (valSum);
        r--;
    }
    return val;
}
inline void addQu(int l, int r, int x, int y) {
    q++;
    pntL[q] = l;
    pntR[q] = r;
    minOpt[q] = x;
    maxOpt[q] = y;
}
void solve(int i) {
    int mid = (pntL[i] + pntR[i]) >> 1;
    dp[t][mid] = INF;
    int opt = minOpt[i];
    for (int j = minOpt[i]; j <= maxOpt[i] && j < mid; j++) {
        ll tmp = dp[1 - t][j] + getVal(j + 1, mid);
        if (tmp <= dp[t][mid]) {
            dp[t][mid] = tmp;
            opt = j;
        }
    }
    res[row][mid] = opt;
    if (pntL[i] < mid)
        addQu(pntL[i], mid - 1, minOpt[i], opt);
    if (mid < pntR[i])
        addQu(mid + 1, pntR[i], opt, maxOpt[i]);
}
void initVal() {
    l = r = 1;
    valSum = a[1];
    val = 0;
}
void calcDP() {
    initVal();
    for (int i = 1; i <= n; i++)
        dp[1][i] = getVal(1, i);
    for (int j = 2; j <= k; j++) {
        q = 1;
        pntL[1] = minOpt[1] = 1;
        pntR[1] = maxOpt[1] = n;
        t = j % 2;
        row = j;
        for (int i = 1; i <= q; i++)
            solve(i);
    }
}
void calcAns() {
    int pnt = n;
    vector<int> ans;
    for (int i = k; i >= 2; i--) {
        ans.push_back(res[i][pnt]);
        pnt = res[i][pnt];
    }
    sort(ans.begin(), ans.end());
    cout << getVal(1, n) - dp[k % 2][n] << '\n';
    for (auto x: ans)
        cout << x << ' ';
    cout.flush();
}
void readInput() {
    cin >> n >> k;
    for (int i = 1; i <= n; i++) {
        cin >> a[i];
        sum[i] = sum[i - 1] + a[i];
    }
    k++;   
}
int main() {
    ios_base::sync_with_stdio(false); cin.tie(0); cout.tie(0);
    readInput();
    calcS2();
    calcDP();
    calcAns();
    return 0;
}
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |