이 제출은 이전 버전의 oj.uz에서 채점하였습니다. 현재는 제출 당시와는 다른 서버에서 채점을 하기 때문에, 다시 제출하면 결과가 달라질 수도 있습니다.
#include "bubblesort2.h"
#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2")
const int S = 2048 + 512;
const int N = 500'032 + S;
int seg_mx[N/S][S*2];
int seg_cnt[N/S][S*2];
int val[N], cnt[N], val_srt[N], srt_ind[N];
int n;
void add_seg(int l, int r, int x, int t)
{
#define MAX(x, y) ((x)>(y)?(x):(y))
#define UP(i) seg_mx[t][i] = MAX(seg_mx[t][2*(i)],\
seg_mx[t][2*(i)+1])\
+ seg_cnt[t][i];
l += S; r += S;
for (;;) {
if (l != r && (l&1)) {
seg_mx[t][l] += x;
seg_cnt[t][l] += x;
++l;
}
if (l != r && (r&1)) {
--r;
seg_mx[t][r] += x;
seg_cnt[t][r] += x;
}
l /= 2; r /= 2;
if (l == 0) break;
if (l > 1) UP(l-1);
if (r < S) UP(r);
}
}
void build_seg(int t)
{
Loop (i,S,2*S) {
seg_cnt[t][i] = cnt[srt_ind[S*t + i-S]];
seg_mx[t][i] = seg_cnt[t][i];
}
LoopR (i,1,S) {
seg_mx[t][i] = max(seg_mx[t][2*i], seg_mx[t][2*i+1]);
seg_cnt[t][i] = 0;
}
}
void flush_seg(int t)
{
Loop (i,1,S) {
seg_cnt[t][2*i] += seg_cnt[t][i];
seg_cnt[t][2*i+1] += seg_cnt[t][i];
}
Loop (i,S,2*S)
cnt[srt_ind[t*S + i-S]] = seg_cnt[t][i];
}
int get_gt(int l, int x)
{
return S - ( upper_bound(val_srt + l, val_srt + l + S, x)
- (val_srt + l));
}
void up_range(int l, int xl, int xr, int val)
{
xl = lower_bound(val_srt + l, val_srt + l + S, xl) - val_srt - l;
xr = lower_bound(val_srt + l, val_srt + l + S, xr) - val_srt - l;
if (xl < xr)
add_seg(xl, xr, val, l/S);
}
void mov_right(int i, int j)
{
typedef int ymma __attribute__((vector_size(32),aligned(32)));
typedef int ymmu __attribute__((vector_size(32),aligned(4)));
if (j-i < 16) {
Loop (k,i,j) {
val_srt[k] = val_srt[k+1];
srt_ind[k] = srt_ind[k+1];
}
return;
}
while (i%8) {
val_srt[i] = val_srt[i+1];
srt_ind[i] = srt_ind[i+1];
++i;
}
Loop (k,i/8,j/8) {
((ymma *)val_srt)[k] = ((ymmu *)(val_srt+1))[k];
((ymma *)srt_ind)[k] = ((ymmu *)(srt_ind+1))[k];
}
Loop (k,j-j%8,j) {
val_srt[k] = val_srt[k+1];
srt_ind[k] = srt_ind[k+1];
}
}
void mov_left(int i, int j)
{
typedef int ymma __attribute__((vector_size(32),aligned(32)));
typedef int ymmu __attribute__((vector_size(32),aligned(4)));
if (j-i < 16) {
LoopR (k,i,j) {
val_srt[k+1] = val_srt[k];
srt_ind[k+1] = srt_ind[k];
}
return;
}
while (j%8) {
--j;
val_srt[j+1] = val_srt[j];
srt_ind[j+1] = srt_ind[j];
}
LoopR (k,(i+7)/8,j/8) {
((ymmu *)(val_srt+1))[k] = ((ymma *)val_srt)[k];
((ymmu *)(srt_ind+1))[k] = ((ymma *)srt_ind)[k];
}
LoopR (k,i,(i+7)/8*8) {
val_srt[k+1] = val_srt[k];
srt_ind[k+1] = srt_ind[k];
}
}
void mov(int i, int x)
{
int j = i - i%S;
for (; srt_ind[j] != i; ++j);
int dst = lower_bound(val_srt + i-i%S, val_srt + i-i%S + S, x)
- val_srt;
if (j < dst)
mov_right(j, --dst);
else
mov_left(dst, j);
val_srt[dst] = x;
srt_ind[dst] = i;
}
void up(int i, int x)
{
int pre = val[i];
int cnt_gt = 0;
for (int l = 0; l < i - i%S; l += S)
cnt_gt += get_gt(l, x);
flush_seg(i/S);
val[i] = x;
for (int j = i - i%S; j < i; ++j)
cnt_gt += val[j] > x;
cnt[i] = cnt_gt;
for (int j = i + 1; j < i - i%S + S; ++j)
cnt[j] += (x > val[j]) - (pre > val[j]);
mov(i, x);
build_seg(i/S);
int val = pre < x? 1: -1;
int xl = pre < x? pre: x;
int xr = pre < x? x: pre;
for (int l = i - i%S + S; l < n; l += S)
up_range(l, xl, xr, val);
}
int get()
{
int mx = 0;
for (int l = 0; l < n; l += S)
mx = max(seg_mx[l/S][1], mx);
return mx;
}
namespace init {
int fen[N];
void fen_add(int i, int x)
{
++i;
while (i < N) {
fen[i] += x;
i += i & -i;
}
}
int fen_get(int r)
{
int ans = 0;
while (r > 0) {
ans += fen[r];
r -= r & -r;
}
return ans;
}
void init(vector<int> A)
{
static pii pii_srt[N];
n = A.size();
Loop (i,0,n) {
val[i] = A[i];
pii_srt[i] = {val[i], i};
}
Loop (i,n,N) {
val[i] = 1e9+10;
pii_srt[i] = {val[i], i};
}
for (int l = 0; l < n; l += S) {
sort(pii_srt+l, pii_srt+l+S);
Loop (i,l,l+S) {
val_srt[i] = pii_srt[i].first;
srt_ind[i] = pii_srt[i].second;
}
}
vector<int> cmp(val, val+N);
sort(cmp.begin(), cmp.end());
cmp.resize(unique(cmp.begin(), cmp.end()) - cmp.begin());
int mx = cmp.size() - 1;
Loop (i,0,n) {
int j = lower_bound(cmp.begin(), cmp.end(), val[i])
- cmp.begin();
cnt[i] = fen_get(mx-j);
fen_add(mx-j, 1);
}
for (int l = 0; l < n; l += S)
build_seg(l/S);
}
}
vector<int> countScans(vector<int> A, vector<int> X, vector<int> V)
{
init::init(A);
vector<int> ans;
Loop (i,0,X.size()) {
up(X[i], V[i]);
ans.push_back(get());
}
return ans;
}
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |
# | Verdict | Execution time | Memory | Grader output |
---|
Fetching results... |