#include "rect.h"
#include <bits/stdc++.h>
#pragma GCC optimize("Ofast")
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")
#pragma GCC target("avx2")
#pragma GCC target("popcnt")
using namespace std;
 
using ll = long long;
using ull = unsigned long long;
using lld = long double;
using ii = pair<int,int>;
using pll = pair<ll, ll>;
 
using vi = vector<int>;
using vll = vector<ll>;
using vii = vector<ii>;
using vpll = vector<pll>;
using vlld = vector<lld>;
 
#define all(x) x.begin(),x.end()
#define lsb(x) x&(-x)
#define gcd(a,b) __gcd(a,b)
#define sz(x) (int)x.size()
#define pb push_back
#define fi first
#define se second
#define fls cout.flush()
 
#define fore(i, l, r) for (auto i = l; i < r; i++)
#define fo(i, n) fore (i, 0, n)
#define forex(i, r, l) for (auto i = r-1; i >= l; i--)
#define ffo(i, n) forex (i, n, 0)
 
bool cmin(ll &a, ll b) { if (b < a) { a=b; return 1; } return 0; }
bool cmax(ll &a, ll b) { if (b > a) { a=b; return 1; } return 0; }
 
const ll INF = 1e18;
const int N = 2505, LOG = 12;
struct SparseTable{
    
    vector<vector<int>> sp;
    int n;
    SparseTable (int n): n(n) { sp = vector(n+1, vector<int>(LOG, -1e9)); }
    SparseTable (int n, vector<int> &a): n(n) {
        sp = vector(n+1, vector<int>(LOG, -1e9));
        fo (i, n) sp[i][0] = a[i];
    }
    
    void build ( ) {
        fore (b, 1, LOG) {
            fo (i, n-(1<<b)+1) {
                sp[i][b] = max (sp[i][b-1], sp[i + (1<<(b-1))][b-1]);
            }
        }
    }
    
    int query (int l, int r) {
        int lg = log2(r - l + 1);
        return max (sp[l][lg], sp[r - (1<<lg)+1][lg]);
    }
};
struct Fenwick {
	vll ft;
	ll n;
	
	Fenwick () {  }
	Fenwick (ll n): n(n+2), ft (n+4, 0) {  }
 
	void update (ll i, ll v) {
		i++;
		for (; i <= n; i += lsb(i)) 
			ft[i] += v;
	}
 
	ll query (ll i) {
		i++;
		ll r = 0;
		for (; i > 0; i -= lsb(i))
			r += ft[i];
		return r;
	}
 
	void update (ll l, ll r, ll v) { update(l, +v); update(r+1, -v); }
	ll query (ll l, ll r) { if (l>r) return 0ll; return query(r) - query(l-1); }
};
vector<array<int, 3>> updates[N][N], qrys[N][N];
vector<array<int, 4>> off[N];
int qans[N*N*3], idx, L[N][N], R[N][N], pr[N][N];
ll count_rectangles(vector<vector<int> > a) {
	int n = a.size(), m = a[0].size();
	ll ans = 0;
	vector<vector<int>> down(n, vector<int>(m)), up(n, vector<int>(m));
	fo (j, m) {
		stack<ll> stk;
		stk.push(-1);
		fo (i, n) {
			while (stk.top() != -1 && a[stk.top()][j] < a[i][j]) stk.pop();
			up[i][j] = stk.top();
			stk.push(i);
		}
		while (stk.size()) stk.pop();
		stk.push(n);
		ffo (i, n) {
			while (stk.top() != n && a[stk.top()][j] < a[i][j]) stk.pop();
			down[i][j] = stk.top() * -1;
			stk.push(i);
		}
	}
	forex (i, n-1, 1) {
		stack<ll> stk;
		stk.push(-1);
		fo (j, m) {
			while (stk.top() != -1 && a[i][stk.top()] <= a[i][j]) stk.pop();
			L[i][j] = stk.top() + 1;
			stk.push(j);
		}
		while (stk.size()) stk.pop();
		stk.push(m);
		ffo (j, m) {
			while (stk.top() != m && a[i][stk.top()] < a[i][j]) stk.pop();
			R[i][j] = stk.top() - 1;
			stk.push(j);
		}
		fo (j, m) {
			if (L[i][j] < 1 || m-2 < R[i][j] || a[i][R[i][j]+1] == a[i][j]) continue;
			if (pr[L[i][j]][R[i][j]] == 0 || pr[L[i][j]][R[i][j]] > i+1) {
				off[i+1].pb({L[i][j], R[i][j], 1, idx});
				idx++;
			}
			off[i].pb({L[i][j], R[i][j], 1, idx});
			idx++;
			off[i-1].pb({L[i][j], R[i][j], 0, idx});
			idx++;
			pr[L[i][j]][R[i][j]] = i;
		}
	}
	fo (i, n) {
		SparseTable mn(m, down[i]), mx(m, up[i]);
		mn.build();
		mx.build();
		for (auto e: off[i]) {
			qans[e[3]] = (e[2] == 1 ? mx.query(e[0], e[1]) : mn.query(e[0], e[1])*-1);
		}
	}
	idx = 0;
	forex (i, n-1, 1) {
		fo (j, m) {
			if (L[i][j] < 1 || m-2 < R[i][j] || a[i][R[i][j]+1] == a[i][j]) continue;
			if (updates[L[i][j]][R[i][j]].empty() || updates[L[i][j]][R[i][j]].back()[0] > i+1) {
				updates[L[i][j]][R[i][j]].pb(array<int, 3>{qans[idx]+1, -1, int(i+1)});
				updates[L[i][j]][R[i][j]].pb(array<int, 3>{int(i+1), +1, int(i+1)});
				idx++;
			}
			updates[L[i][j]][R[i][j]].pb(array<int, 3>{qans[idx]+1, -1, int(i)});
			updates[L[i][j]][R[i][j]].pb(array<int, 3>{int(i), +1, int(i)});
			idx++;
			qrys[L[i][j]][R[i][j]].pb(array<int, 3>{int(i+1), qans[idx], -1});
			idx++;
		}
	}
	Fenwick ft(n+1);
	fo (l, m) {
		fo (r, m) {
			if (qrys[l][r].empty()) continue;
			// cout << "Range " << l << ' ' << r << '\n';
			sort(all(updates[l][r]));
			int otpr = n+5, ls = n+5;
			for (auto [lb, rb, _]: qrys[l][r]) {
				while (updates[l][r].size() && updates[l][r].back()[0] >= lb) {
					// cout << "Make " << updates[l][r].back()[0] << ' ' << updates[l][r].back()[1] << ' ' << updates[l][r].back()[2] << '\n';
					ft.update(updates[l][r].back()[2], updates[l][r].back()[1]);
					updates[l][r].pop_back();
				}
				int i = lb-1;
				if (i+1 < otpr) ls = i;
				// cout << "Row " << i << ' ' << lb << ' ' << rb << ' ' << ls << '\n';
				// cout << ft.query(lb, min(rb, ls+1)) << '\n';
				ans += ft.query(lb, min(rb, ls+1));
				otpr = i;
			}
			while (updates[l][r].size()) {
				ft.update(updates[l][r].back()[2], updates[l][r].back()[1]);
				updates[l][r].pop_back();
			}
		}
	}
	return ans;
}
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |