제출 #650307

#제출 시각아이디문제언어결과실행 시간메모리
650307ymmBubble Sort 2 (JOI18_bubblesort2)C++17
60 / 100
9014 ms21696 KiB
#include "bubblesort2.h"
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

const int N = 500'032;
const int Sa = 4096, Sq = 4096;
int cnt_gt[N];
int val[N];
int mx_qrange[Sq];
int added_to[N];
vector<int> ans;
int n, q, qx[N], qy[N];


int fen[N];
void fen_add(int i, int x)
{
	++i;
	while (i < N) {
		fen[i] += x;
		i += i & -i;
	}
}
int fen_get(int r)
{
	int ans = 0;
	while (r > 0) {
		ans += fen[r];
		r -= r & -r;
	}
	return ans;
}

void init()
{
	vector<int> cmp(val, val+n);
	sort(cmp.begin(), cmp.end());
	cmp.resize(unique(cmp.begin(), cmp.end()) - cmp.begin());
	int mx = cmp.size() - 1;
	Loop (i,0,n) {
		int j =   lower_bound(cmp.begin(), cmp.end(), val[i])
		        - cmp.begin();
		cnt_gt[i] = fen_get(mx-j);
		fen_add(mx-j, 1);
	}
}

#define OPTIM __attribute__((optimize("O3,unroll-loops"),target("avx")))

OPTIM pii do_cnt_gt(int l, int r, int x)
{
	int ans = 0;
	int mx = INT_MIN;
	Loop (i,l,r) {
		ans += val[i] > x;
		mx = cnt_gt[i] > mx? cnt_gt[i]: mx;
	}
	return {ans, mx};
}

OPTIM int do_sub_inrange(int l, int r, int xl, int xr)
{
	int mx = INT_MIN;
	Loop (i,l,r) {
		int cnt = cnt_gt[i];
		int val = ::val[i];
		cnt -= xl <= val && val < xr;
		mx = cnt > mx? cnt: mx;
		cnt_gt[i] = cnt;
	}
	return mx;
}

OPTIM int do_add_inrange(int l, int r, int xl, int xr)
{
	int mx = INT_MIN;
	Loop (i,l,r) {
		int cnt = cnt_gt[i];
		int val = ::val[i];
		cnt += xl <= val && val < xr;
		mx = cnt > mx? cnt: mx;
		cnt_gt[i] = cnt;
	}
	return mx;
}



int apply(int al, int ar, int qi)
{
	int i = qx[qi], x = qy[qi];
	int mx = INT_MIN;
	if (al < i) {
		int cr = min(ar, i);
		auto [tmp, new_mx] = do_cnt_gt(al, cr, x);
		added_to[qi] += tmp;
		mx = max(mx, new_mx);
	}
	if (i+1 < ar) {
		int cl = max(al, i+1);
		if (val[i] < x) {
			auto tmp = do_add_inrange(cl, ar, val[i], x);
			mx = max(mx, tmp);
		} else {
			auto tmp = do_sub_inrange(cl, ar, x, val[i]);
			mx = max(mx, tmp);
		}
	}
	if (al <= i && i < ar) {
		cnt_gt[i] = added_to[qi];
		mx = max(mx, cnt_gt[i]);
	}
	val[i] = x;
	return mx;
}

void solve_qrange(int ql, int qr)
{
	vector<int> ch;
	Loop (i,ql,qr)
		ch.push_back(val[qx[i]]);
	memset(mx_qrange, 0, sizeof(mx_qrange));
	for (int al = 0; al < n; al += Sa) {
		int ar = min(al + Sa, n);
		Loop (qi,ql,qr) {
			int x = apply(al, ar, qi);
			mx_qrange[qi-ql] = max(mx_qrange[qi-ql], x);
		}
		Loop (i,ql,qr)
			val[qx[i]] = ch[i-ql];
	}
	Loop (i,ql,qr)
		val[qx[i]] = qy[i];
	Loop (i,ql,qr)
		ans.push_back(mx_qrange[i-ql]);
}

vector<int> countScans(vector<int> A, vector<int> X, vector<int> V)
{
	n = A.size();
	Loop (i,0,n)
		val[i] = A[i];
	q = X.size();
	Loop (i,0,q) {
		qx[i] = X[i];
		qy[i] = V[i];
	}
	init();
	for (int ql = 0; ql < q; ql += Sq) {
		int qr = min(ql+Sq, q);
		solve_qrange(ql, qr);
	}
	return ans;
}
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...