This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
#pragma GCC optimize("O3")
#pragma GCC target("avx2")
#include "rect.h"
#define ll long long
#define ar array
#define all(x) x.begin(), x.end()
#define sz(x) (int)x.size()
using namespace std;
template<typename T> bool ckmin(T &a, const T &b) { return a > b ? a = b, 1 : 0; }
template<typename T> bool ckmax(T &a, const T &b) { return a < b ? a = b, 1 : 0; }
using namespace std;
mt19937 rng(chrono::steady_clock::now().time_since_epoch().count());
#define rint(l, r) uniform_int_distribution<int>(l, r)(rng)
struct fenwick {
	int n; vector<int> bit;
	fenwick(int a) : n(a), bit(a+1) {}
	void update(int pos, int x) {
		pos++;
		for (int i = pos; i <= n; i += i&-i) bit[i] += x;
	}
	int sum(int pos) {
		int res = 0;
		while (pos) res += bit[pos], pos -= pos&-pos;
		return res;
	}
	int query(int l, int r) {
		return sum(r+1) - sum(l);
	}
};
long long count_rectangles(std::vector<std::vector<int>> a) {
	int n = sz(a), m = sz(a[0]);
	if ((n <= 700 && m <= 700) || (n <= 3)) {
		vector ok(n, vector(n, vector<bool>(m)));
		for (int i = 0; i < m; i++) {
			for (int j = 1; j < n-1; j++) {
				int mx = 0;
				for (int k = j; k < n-1; k++) {
					ckmax(mx, a[k][i]);
					ok[j][k][i] = mx >= min(a[j-1][i], a[k+1][i]);
				}
			}
		}
		vector<vector<int>> right(n, vector<int>(m, -1)), left(n, vector<int>(m, m-1));
		for (int i = 0; i < n; i++) {
			for (int j = 1; j < m-1; j++) {
				int mx = 0;
				for (int k = j; k < m-1; k++) {
					ckmax(mx, a[i][k]);
					if (mx >= a[i][j-1]) break;
					right[i][j] = k;
				}
			}
			for (int j = m-2; j >= 1; j--) {
				int mx = 0;
				for (int k = j; k >= 1; k--) {
					ckmax(mx, a[i][k]);
					if (mx >= a[i][j+1]) break;
					left[i][j] = k;
				}
			}
		}
		long long ans = 0;
		for (int i = 1; i < n-1; i++) { 
			vector<int> R(m, m-2), L(m, -1);
			for (int j = i; j < n-1; j++) {
				for (int k = 0; k < m; k++) {
					ckmin(R[k], right[j][k]);
					ckmax(L[k], left[j][k]);
				}
				vector<int> to(m, -1);
				for (int k = m-1; k >= 0; k--) if (!ok[i][j][k]) {
					int l = k;
					while (l && !ok[i][j][l-1]) l--;
					for (int z = l; z <= k; z++) to[z] = k;
					k = l;
				}
				for (int k = 1; k < m-1; k++) {
					int r = min(to[k], R[k]);
					for (int l = k; l <= r; l++) ans += L[l] <= k;
				}
			}
		}
		return ans;
	}
	vector<vector<bool>> vis(n, vector<bool>(m));
	const int dx[]{1, 0, -1, 0}, dy[]{0, 1, 0, -1};
	int ans = 0;
	for (int i = 1; i < n-1; i++) {
		for (int j = 1; j < m-1; j++) if (!vis[i][j]) {
			int lx = i, ly = j, rx = i, ry = j, got = 0;
			auto dfs = [&](auto&& s, int x, int y) -> void {
				vis[x][y] = 1, got++;
				ckmin(lx, x);
				ckmax(rx, x);
				ckmin(ly, y);
				ckmax(ry, y);
				for (int d = 0; d < 4; d++) {
					int nx = x + dx[d], ny = y + dy[d];
					if (nx >= 1 && ny >= 1 && nx < n-1 && ny < m-1 && !vis[nx][ny]) s(s, nx, ny);
				}
			};
			dfs(dfs, i, j);
			ans += (rx-lx+1)*(ry-ly+1) == got;
		}
	}
	return ans;
}
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |