제출 #650419

#제출 시각아이디문제언어결과실행 시간메모리
650419ymmBubble Sort 2 (JOI18_bubblesort2)C++17
60 / 100
9092 ms34364 KiB
#include "bubblesort2.h" #include <bits/stdc++.h> #define Loop(x,l,r) for (ll x = (l); x < (ll)(r); ++x) #define LoopR(x,l,r) for (ll x = (r)-1; x >= (ll)(l); --x) typedef long long ll; typedef std::pair<int, int> pii; typedef std::pair<ll , ll > pll; using namespace std; #pragma GCC optimize("O3,unroll-loops") #pragma GCC target("avx2") const int S = 2048 + 512; const int N = 500'032 + S; int seg_mx[N/S][S*2]; int seg_cnt[N/S][S*2]; int val[N], cnt[N], val_srt[N], srt_ind[N]; int n; void add_seg(int l, int r, int x, int t) { #define MAX(x, y) ((x)>(y)?(x):(y)) #define UP(i) seg_mx[t][i] = MAX(seg_mx[t][2*(i)],\ seg_mx[t][2*(i)+1])\ + seg_cnt[t][i]; l += S; r += S; for (;;) { if (l != r && (l&1)) { seg_mx[t][l] += x; seg_cnt[t][l] += x; ++l; } if (l != r && (r&1)) { --r; seg_mx[t][r] += x; seg_cnt[t][r] += x; } l /= 2; r /= 2; if (l == 0) break; if (l > 1) UP(l-1); if (r < S) UP(r); } } void build_seg(int t) { Loop (i,S,2*S) { seg_cnt[t][i] = cnt[srt_ind[S*t + i-S]]; seg_mx[t][i] = seg_cnt[t][i]; } LoopR (i,1,S) { seg_mx[t][i] = max(seg_mx[t][2*i], seg_mx[t][2*i+1]); seg_cnt[t][i] = 0; } } void flush_seg(int t) { Loop (i,1,S) { seg_cnt[t][2*i] += seg_cnt[t][i]; seg_cnt[t][2*i+1] += seg_cnt[t][i]; } Loop (i,S,2*S) cnt[srt_ind[t*S + i-S]] = seg_cnt[t][i]; } int get_gt(int l, int x) { return S - ( upper_bound(val_srt + l, val_srt + l + S, x) - (val_srt + l)); } void up_range(int l, int xl, int xr, int val) { xl = lower_bound(val_srt + l, val_srt + l + S, xl) - val_srt - l; xr = lower_bound(val_srt + l, val_srt + l + S, xr) - val_srt - l; if (xl < xr) add_seg(xl, xr, val, l/S); } void mov_right(int i, int j) { typedef int ymma __attribute__((vector_size(32),aligned(32))); typedef int ymmu __attribute__((vector_size(32),aligned(4))); if (j-i < 16) { Loop (k,i,j) { val_srt[k] = val_srt[k+1]; srt_ind[k] = srt_ind[k+1]; } return; } while (i%8) { val_srt[i] = val_srt[i+1]; srt_ind[i] = srt_ind[i+1]; ++i; } Loop (k,i/8,j/8) { ((ymma *)val_srt)[k] = ((ymmu *)(val_srt+1))[k]; ((ymma *)srt_ind)[k] = ((ymmu *)(srt_ind+1))[k]; } Loop (k,j-j%8,j) { val_srt[k] = val_srt[k+1]; srt_ind[k] = srt_ind[k+1]; } } void mov_left(int i, int j) { typedef int ymma __attribute__((vector_size(32),aligned(32))); typedef int ymmu __attribute__((vector_size(32),aligned(4))); if (j-i < 16) { LoopR (k,i,j) { val_srt[k+1] = val_srt[k]; srt_ind[k+1] = srt_ind[k]; } return; } while (j%8) { --j; val_srt[j+1] = val_srt[j]; srt_ind[j+1] = srt_ind[j]; } LoopR (k,(i+7)/8,j/8) { ((ymmu *)(val_srt+1))[k] = ((ymma *)val_srt)[k]; ((ymmu *)(srt_ind+1))[k] = ((ymma *)srt_ind)[k]; } LoopR (k,i,(i+7)/8*8) { val_srt[k+1] = val_srt[k]; srt_ind[k+1] = srt_ind[k]; } } void mov(int i, int x) { int j = i - i%S; for (; srt_ind[j] != i; ++j); int dst = lower_bound(val_srt + i-i%S, val_srt + i-i%S + S, x) - val_srt; if (j < dst) mov_right(j, --dst); else mov_left(dst, j); val_srt[dst] = x; srt_ind[dst] = i; } void up(int i, int x) { int pre = val[i]; int cnt_gt = 0; for (int l = 0; l < i - i%S; l += S) cnt_gt += get_gt(l, x); flush_seg(i/S); val[i] = x; for (int j = i - i%S; j < i; ++j) cnt_gt += val[j] > x; cnt[i] = cnt_gt; for (int j = i + 1; j < i - i%S + S; ++j) cnt[j] += (x > val[j]) - (pre > val[j]); mov(i, x); build_seg(i/S); int val = pre < x? 1: -1; int xl = pre < x? pre: x; int xr = pre < x? x: pre; for (int l = i - i%S + S; l < n; l += S) up_range(l, xl, xr, val); } int get() { int mx = 0; for (int l = 0; l < n; l += S) mx = max(seg_mx[l/S][1], mx); return mx; } namespace init { int fen[N]; void fen_add(int i, int x) { ++i; while (i < N) { fen[i] += x; i += i & -i; } } int fen_get(int r) { int ans = 0; while (r > 0) { ans += fen[r]; r -= r & -r; } return ans; } void init(vector<int> A) { static pii pii_srt[N]; n = A.size(); Loop (i,0,n) { val[i] = A[i]; pii_srt[i] = {val[i], i}; } Loop (i,n,N) { val[i] = 1e9+10; pii_srt[i] = {val[i], i}; } for (int l = 0; l < n; l += S) { sort(pii_srt+l, pii_srt+l+S); Loop (i,l,l+S) { val_srt[i] = pii_srt[i].first; srt_ind[i] = pii_srt[i].second; } } vector<int> cmp(val, val+N); sort(cmp.begin(), cmp.end()); cmp.resize(unique(cmp.begin(), cmp.end()) - cmp.begin()); int mx = cmp.size() - 1; Loop (i,0,n) { int j = lower_bound(cmp.begin(), cmp.end(), val[i]) - cmp.begin(); cnt[i] = fen_get(mx-j); fen_add(mx-j, 1); } for (int l = 0; l < n; l += S) build_seg(l/S); } } vector<int> countScans(vector<int> A, vector<int> X, vector<int> V) { init::init(A); vector<int> ans; Loop (i,0,X.size()) { up(X[i], V[i]); ans.push_back(get()); } return ans; }
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...