This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
using namespace std;
using ll = long long;
using pii = pair<int, int>;
#define pb push_back
#define ff first
#define ss second
#define ins insert
#define arr3 array<int, 3>
const int A = 3e5;
struct FT{
    ll bit[A + 1], a[A + 1], s;
    int n;
    FT(int ns){
        n = ns;
        for (int i = 1; i <= n; i++){
            bit[i] = a[i] = 0;
        }
        s = 0;
    }
    void upd(int v, ll x){
        s += x; a[v] += x;
        while (v <= n){
            bit[v] += x;
            v |= (v + 1);
        }
    }
    ll get(int v){
        ll out = 0;
        while (v > 0){
            out += bit[v];
            v = (v & (v + 1)) - 1;
        }
        return out;
    }
    ll get(int l, int r){
        if (l > r) return 0;
        return get(r) - get(l - 1);
    }
};
int main(){
    ios_base::sync_with_stdio(0);
    cin.tie(0);
    cout.tie(0);
    
    int n, q; cin>>n>>q;
    vector<int> a(n + 1);
    for (int i = 1; i <= n; i++){
        cin>>a[i];
    }
    
    vector<arr3> qs;
    for (int i = 1; i <= q; i++){
        int l, r; cin>>l>>r;
        qs.pb({l, r, i});
    }
    
    const int S = 2000;
    auto cmp = [&](arr3 x, arr3 y){
        int x1 = x[0] / S, y1 = y[0] / S;
        if (x1 != y1){
            return x[0] < y[0];
        }
        return (x1 & 1) ? (x[1] < y[1]) : (x[1] > y[1]);
    };
    sort(qs.begin(), qs.end(), cmp);
    
    vector<int> p(n + 1);
    for (int i = 1; i <= n; i++){
        if (i % 2){
            p[i] = (i + 1) / 2;
        }
        else {
            p[i] = n - i / 2 + 1;
        }
    }
    
    ll sum = 0, s1, s2;
    vector<int> f(n + 1), cnt(A + 1);
    pii rr[n + 1]; rr[0] = {1, n};
    for (int i = 1; i <= n; i++) rr[i] = {0, 0};
    FT T1(n), T2(n);
    auto add = [&](int x){
        x = a[x];
        int k = cnt[x], j1 = rr[k].ss, j = p[j1];
        sum += (f[j1] + 1);
        s1 = T1.get(1, j - 1); s2 = T2.get(1, j - 1);
        sum += (1LL * j * s1);
        sum -= (s2 - s1);
        sum += (T2.s - s2 - T2.a[j]);
        sum -= 1LL * (j - 1) * (T1.s - s1 - T1.a[j]);
        
        if (rr[k].ff == rr[k].ss) rr[k] = {0, 0};
        else rr[k].ss--;
        
        cnt[x]++; f[j1]++; k++;
        T1.upd(j, 1);
        T2.upd(j, j);
        
        if (!rr[k].ff) rr[k] = {j1, j1};
        else rr[k].ff--;
    };
    auto rem = [&](int x){
        x = a[x];
        int k = cnt[x], j1 = rr[k].ff, j = p[j1];
        sum -= f[j1];
        s1 = T1.get(1, j - 1); s2 = T2.get(1, j - 1);
        sum -= (1LL * j * s1);
        sum += (s2 - s1);
        sum -= (T2.s - s2 - T2.a[j]);
        sum += 1LL * (j - 1) * (T1.s - s1 - T1.a[j]);
        
        if (rr[k].ff == rr[k].ss) rr[k] = {0, 0};
        else rr[k].ff++;
        
        cnt[x]--; f[j1]--; k--;
        T1.upd(j, -1);
        T2.upd(j, -j);
        
        if (!rr[k].ff) rr[k] = {j1, j1};
        else rr[k].ss++;
    };
    
    vector<ll> out(q + 1);
    int lc = 1, rc = 0;
    for (auto [l, r, ii]: qs){
        while (lc > l) add(--lc);
        while (rc < r) add(++rc);
        while (lc < l) rem(lc++);
        while (rc > r) rem(rc--);
        
        out[ii] = sum;
    }
    
    for (int i = 1; i <= q; i++){
        cout<<out[i]<<"\n";
    }
}
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |