답안 #650421

# 제출 시각 아이디 문제 언어 결과 실행 시간 메모리
650421 2022-10-13T17:29:47 Z ymm Bubble Sort 2 (JOI18_bubblesort2) C++17
60 / 100
9000 ms 34228 KB
#include "bubblesort2.h"
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;

#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")

const int S = 2048;
const int N = 500'032 + S;
int seg_mx[N/S][S*2];
int seg_cnt[N/S][S*2];
int val[N], cnt[N], val_srt[N], srt_ind[N];
int n;

void add_seg(int l, int r, int x, int t)
{
	#define MAX(x, y) ((x)>(y)?(x):(y))
	#define UP(i) seg_mx[t][i] =   MAX(seg_mx[t][2*(i)],\
	                                   seg_mx[t][2*(i)+1])\
	                             + seg_cnt[t][i];
	l += S; r += S;
	for (;;) {
		if (l != r && (l&1)) {
			seg_mx[t][l] += x;
			seg_cnt[t][l] += x;
			++l;
		}
		if (l != r && (r&1)) {
			--r;
			seg_mx[t][r] += x;
			seg_cnt[t][r] += x;
		}
		l /= 2; r /= 2;
		if (l == 0) break;
		if (l > 1) UP(l-1);
		if (r < S) UP(r);
	}
}

void build_seg(int t)
{
	Loop (i,S,2*S) {
		seg_cnt[t][i] = cnt[srt_ind[S*t + i-S]];
		seg_mx[t][i] = seg_cnt[t][i];
	}
	LoopR (i,1,S) {
		seg_mx[t][i] = max(seg_mx[t][2*i], seg_mx[t][2*i+1]);
		seg_cnt[t][i] = 0;
	}
}

void flush_seg(int t)
{
	Loop (i,1,S) {
		seg_cnt[t][2*i] += seg_cnt[t][i];
		seg_cnt[t][2*i+1] += seg_cnt[t][i];
	}
	Loop (i,S,2*S)
		cnt[srt_ind[t*S + i-S]] = seg_cnt[t][i];
}

int get_gt(int l, int x)
{
	return S - (  upper_bound(val_srt + l, val_srt + l + S, x)
	            - (val_srt + l));
}

void up_range(int l, int xl, int xr, int val)
{
	xl = lower_bound(val_srt + l, val_srt + l + S, xl) - val_srt - l;
	xr = lower_bound(val_srt + l, val_srt + l + S, xr) - val_srt - l;
	if (xl < xr)
		add_seg(xl, xr, val, l/S);
}

void mov_right(int i, int j)
{
	typedef int ymma __attribute__((vector_size(32),aligned(32)));
	typedef int ymmu __attribute__((vector_size(32),aligned(4)));
	if (j-i < 16) {
		Loop (k,i,j) {
			val_srt[k] = val_srt[k+1];
			srt_ind[k] = srt_ind[k+1];
		}
		return;
	}
	while (i%8) {
		val_srt[i] = val_srt[i+1];
		srt_ind[i] = srt_ind[i+1];
		++i;
	}
	Loop (k,i/8,j/8) {
		((ymma *)val_srt)[k] = ((ymmu *)(val_srt+1))[k];
		((ymma *)srt_ind)[k] = ((ymmu *)(srt_ind+1))[k];
	}
	Loop (k,j-j%8,j) {
		val_srt[k] = val_srt[k+1];
		srt_ind[k] = srt_ind[k+1];
	}
}


void mov_left(int i, int j)
{
	typedef int ymma __attribute__((vector_size(32),aligned(32)));
	typedef int ymmu __attribute__((vector_size(32),aligned(4)));
	if (j-i < 16) {
		LoopR (k,i,j) {
			val_srt[k+1] = val_srt[k];
			srt_ind[k+1] = srt_ind[k];
		}
		return;
	}
	while (j%8) {
		--j;
		val_srt[j+1] = val_srt[j];
		srt_ind[j+1] = srt_ind[j];
	}
	LoopR (k,(i+7)/8,j/8) {
		((ymmu *)(val_srt+1))[k] = ((ymma *)val_srt)[k];
		((ymmu *)(srt_ind+1))[k] = ((ymma *)srt_ind)[k];
	}
	LoopR (k,i,(i+7)/8*8) {
		val_srt[k+1] = val_srt[k];
		srt_ind[k+1] = srt_ind[k];
	}
}

void mov(int i, int x)
{
	int j = i - i%S;
	for (; srt_ind[j] != i; ++j);
	int dst =   lower_bound(val_srt + i-i%S, val_srt + i-i%S + S, x)
	          - val_srt;
	if (j < dst)
		mov_right(j, --dst);
	else
		mov_left(dst, j);
	val_srt[dst] = x;
	srt_ind[dst] = i;
}

void up(int i, int x)
{
	int pre = val[i];
	int cnt_gt = 0;
	for (int l = 0; l < i - i%S; l += S)
		cnt_gt += get_gt(l, x);
	flush_seg(i/S);
	val[i] = x;
	for (int j = i - i%S; j < i; ++j)
		cnt_gt += val[j] > x;
	cnt[i] = cnt_gt;
	for (int j = i + 1; j < i - i%S + S; ++j)
		cnt[j] += (x > val[j]) - (pre > val[j]);
	mov(i, x);
	build_seg(i/S);
	int val = pre < x? 1: -1;
	int xl = pre < x? pre: x;
	int xr = pre < x? x: pre;
	for (int l = i - i%S + S; l < n; l += S)
		up_range(l, xl, xr, val);
}

int get()
{
	int mx = 0;
	for (int l = 0; l < n; l += S)
		mx = max(seg_mx[l/S][1], mx);
	return mx;
}

namespace init {
int fen[N];
void fen_add(int i, int x)
{
	++i;
	while (i < N) {
		fen[i] += x;
		i += i & -i;
	}
}
int fen_get(int r)
{
	int ans = 0;
	while (r > 0) {
		ans += fen[r];
		r -= r & -r;
	}
	return ans;
}
 
void init(vector<int> A)
{
	static pii pii_srt[N];
	n = A.size();
	Loop (i,0,n) {
		val[i] = A[i];
		pii_srt[i] = {val[i], i};
	}
	Loop (i,n,N) {
		val[i] = 1e9+10;
		pii_srt[i] = {val[i], i};
	}
	for (int l = 0; l < n; l += S) {
		sort(pii_srt+l, pii_srt+l+S);
		Loop (i,l,l+S) {
			val_srt[i] = pii_srt[i].first;
			srt_ind[i] = pii_srt[i].second;
		}
	}
	vector<int> cmp(val, val+N);
	sort(cmp.begin(), cmp.end());
	cmp.resize(unique(cmp.begin(), cmp.end()) - cmp.begin());
	int mx = cmp.size() - 1;
	Loop (i,0,n) {
		int j =   lower_bound(cmp.begin(), cmp.end(), val[i])
		        - cmp.begin();
		cnt[i] = fen_get(mx-j);
		fen_add(mx-j, 1);
	}
	for (int l = 0; l < n; l += S)
		build_seg(l/S);
}
}

vector<int> countScans(vector<int> A, vector<int> X, vector<int> V)
{
	init::init(A);
	vector<int> ans;
	Loop (i,0,X.size()) {
		up(X[i], V[i]);
		ans.push_back(get());
	}
	return ans;
}
# 결과 실행 시간 메모리 Grader output
1 Correct 20 ms 8276 KB Output is correct
2 Correct 25 ms 8276 KB Output is correct
3 Correct 41 ms 8276 KB Output is correct
4 Correct 40 ms 8276 KB Output is correct
5 Correct 39 ms 8276 KB Output is correct
6 Correct 39 ms 8276 KB Output is correct
7 Correct 41 ms 8280 KB Output is correct
8 Correct 42 ms 8268 KB Output is correct
9 Correct 43 ms 8280 KB Output is correct
10 Correct 41 ms 8276 KB Output is correct
11 Correct 39 ms 8272 KB Output is correct
12 Correct 40 ms 8276 KB Output is correct
13 Correct 38 ms 8276 KB Output is correct
14 Correct 38 ms 8276 KB Output is correct
15 Correct 38 ms 8276 KB Output is correct
16 Correct 40 ms 8276 KB Output is correct
17 Correct 39 ms 8272 KB Output is correct
18 Correct 38 ms 8276 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 20 ms 8276 KB Output is correct
2 Correct 25 ms 8276 KB Output is correct
3 Correct 41 ms 8276 KB Output is correct
4 Correct 40 ms 8276 KB Output is correct
5 Correct 39 ms 8276 KB Output is correct
6 Correct 39 ms 8276 KB Output is correct
7 Correct 41 ms 8280 KB Output is correct
8 Correct 42 ms 8268 KB Output is correct
9 Correct 43 ms 8280 KB Output is correct
10 Correct 41 ms 8276 KB Output is correct
11 Correct 39 ms 8272 KB Output is correct
12 Correct 40 ms 8276 KB Output is correct
13 Correct 38 ms 8276 KB Output is correct
14 Correct 38 ms 8276 KB Output is correct
15 Correct 38 ms 8276 KB Output is correct
16 Correct 40 ms 8276 KB Output is correct
17 Correct 39 ms 8272 KB Output is correct
18 Correct 38 ms 8276 KB Output is correct
19 Correct 104 ms 8660 KB Output is correct
20 Correct 118 ms 8660 KB Output is correct
21 Correct 121 ms 8660 KB Output is correct
22 Correct 125 ms 8660 KB Output is correct
23 Correct 122 ms 8660 KB Output is correct
24 Correct 120 ms 8616 KB Output is correct
25 Correct 121 ms 8660 KB Output is correct
26 Correct 126 ms 8660 KB Output is correct
27 Correct 121 ms 8616 KB Output is correct
28 Correct 114 ms 8660 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 53 ms 9364 KB Output is correct
2 Correct 422 ms 10088 KB Output is correct
3 Correct 908 ms 10956 KB Output is correct
4 Correct 997 ms 10920 KB Output is correct
5 Correct 900 ms 10924 KB Output is correct
6 Correct 895 ms 10956 KB Output is correct
7 Correct 842 ms 10956 KB Output is correct
8 Correct 815 ms 10964 KB Output is correct
9 Correct 814 ms 10956 KB Output is correct
10 Correct 638 ms 11008 KB Output is correct
11 Correct 666 ms 10956 KB Output is correct
12 Correct 656 ms 10996 KB Output is correct
13 Correct 635 ms 10956 KB Output is correct
14 Correct 613 ms 11084 KB Output is correct
15 Correct 624 ms 10956 KB Output is correct
16 Correct 605 ms 10904 KB Output is correct
17 Correct 607 ms 10964 KB Output is correct
18 Correct 607 ms 10964 KB Output is correct
# 결과 실행 시간 메모리 Grader output
1 Correct 20 ms 8276 KB Output is correct
2 Correct 25 ms 8276 KB Output is correct
3 Correct 41 ms 8276 KB Output is correct
4 Correct 40 ms 8276 KB Output is correct
5 Correct 39 ms 8276 KB Output is correct
6 Correct 39 ms 8276 KB Output is correct
7 Correct 41 ms 8280 KB Output is correct
8 Correct 42 ms 8268 KB Output is correct
9 Correct 43 ms 8280 KB Output is correct
10 Correct 41 ms 8276 KB Output is correct
11 Correct 39 ms 8272 KB Output is correct
12 Correct 40 ms 8276 KB Output is correct
13 Correct 38 ms 8276 KB Output is correct
14 Correct 38 ms 8276 KB Output is correct
15 Correct 38 ms 8276 KB Output is correct
16 Correct 40 ms 8276 KB Output is correct
17 Correct 39 ms 8272 KB Output is correct
18 Correct 38 ms 8276 KB Output is correct
19 Correct 104 ms 8660 KB Output is correct
20 Correct 118 ms 8660 KB Output is correct
21 Correct 121 ms 8660 KB Output is correct
22 Correct 125 ms 8660 KB Output is correct
23 Correct 122 ms 8660 KB Output is correct
24 Correct 120 ms 8616 KB Output is correct
25 Correct 121 ms 8660 KB Output is correct
26 Correct 126 ms 8660 KB Output is correct
27 Correct 121 ms 8616 KB Output is correct
28 Correct 114 ms 8660 KB Output is correct
29 Correct 53 ms 9364 KB Output is correct
30 Correct 422 ms 10088 KB Output is correct
31 Correct 908 ms 10956 KB Output is correct
32 Correct 997 ms 10920 KB Output is correct
33 Correct 900 ms 10924 KB Output is correct
34 Correct 895 ms 10956 KB Output is correct
35 Correct 842 ms 10956 KB Output is correct
36 Correct 815 ms 10964 KB Output is correct
37 Correct 814 ms 10956 KB Output is correct
38 Correct 638 ms 11008 KB Output is correct
39 Correct 666 ms 10956 KB Output is correct
40 Correct 656 ms 10996 KB Output is correct
41 Correct 635 ms 10956 KB Output is correct
42 Correct 613 ms 11084 KB Output is correct
43 Correct 624 ms 10956 KB Output is correct
44 Correct 605 ms 10904 KB Output is correct
45 Correct 607 ms 10964 KB Output is correct
46 Correct 607 ms 10964 KB Output is correct
47 Correct 4182 ms 17124 KB Output is correct
48 Execution timed out 9081 ms 34228 KB Time limit exceeded
49 Halted 0 ms 0 KB -