This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include "rect.h"
#include<bits/stdc++.h>
using namespace std;
// #define ll long long
#define pb push_back
#define ff first
#define all(x) x.begin(), x.end()
#define en cout << '\n'
#define ss second
#define int short 
const int N = 2502, K = 20;
int c = 0, C[200000];
struct Fenwick{
  int n;
  vector<int> t, U;
  Fenwick(int _n){
    n = _n;
    t.resize(n+1, 0);
  }
  void add(int v){
    while(v <= n){
      t[v]++;
      C[c++] = v;
      v += (v&-v);
    }
  }
  int get(int v){
    int res = 0;
    while(v > 0){
      res += t[v];
      v -= (v&-v);
    }
    return res;
  }
  void rollback(){
  	for(int i = 0; i < c; ++i) t[C[i]] = 0;
  	c = 0;
  }
};
 
vector<int> colranges[N][N];
vector<int> rowranges[N][N];
vector<int> rangesL[N][N];
vector<int> rangesU[N][N];
long long count_rectangles(std::vector<std::vector<int32_t> > a) {
	int n = a.size();
	int m = a[0].size();
	if(n < 3 || m < 3){
		return 0;
	}
	long long ans = 0;	
	vector<vector<int>> L, R, D, U;
	L.resize(n, vector<int>(m));
	R.resize(n, vector<int>(m));
	D.resize(n, vector<int>(m));
	U.resize(n, vector<int>(m));
	for(int i = 0; i < n; ++i){
		vector<int> q;
		for(int j = 0; j < m; ++j){
			while(!q.empty() && a[i][q.back()] <= a[i][j]) q.pop_back();
			if(q.empty()){
				L[i][j] = -1;
			}else{
				L[i][j] = q.back();
			}
			q.pb(j);
		}
		q.clear();
		for(int j = m - 1; j >= 0; --j){
			while(!q.empty() && a[i][q.back()] <= a[i][j]) q.pop_back();
			if(q.empty()){
				R[i][j] = m;
			}else{
				R[i][j] = q.back();
			}
			q.pb(j);
		}
	}
	for(int j = 0; j < m; ++j){
		vector<int> q;
		for(int i = 0; i < n; ++i){
			while(!q.empty() && a[q.back()][j] <= a[i][j]) q.pop_back();
			if(q.empty()){
				U[i][j] = -1;
			}else{
				U[i][j] = q.back();
			}
			q.pb(i);
		}
		q.clear();
		for(int i = n - 1; i >= 0; --i){
			while(!q.empty() && a[q.back()][j] <= a[i][j]) q.pop_back();
			if(q.empty()){
				D[i][j] = n;
			}else{
				D[i][j] = q.back();
			}
			q.pb(i);
		}
	}
	int xx = 0;
	for(int i = 1; i + 1 < n; ++i){
		for(int j = 1; j + 1 < m; ++j){
			if(a[i][j + 1] <= a[i][j]) continue;
			int l = L[i][j], r = j + 1;
			while(l > -1 && r < m){
				++xx;
				// cout << i << ' ' << j << ' ' << l + 1 << ' ' << r - 1 << '\n';
				rangesL[i][r - 1].pb(l + 1);
				if(a[i][l] < a[i][r]){
					l = L[i][l];
				}else if(a[i][l] > a[i][r]){
					break;
				}else{
					break;
				}
			}
		}
	}
	for(int j = 1; j + 1 < m; ++j){
		for(int i = 1; i + 1 < n; ++i){
			if(a[i + 1][j] <= a[i][j]) continue;
			int l = U[i][j], r = i + 1;
			while(l > -1 && r < n){
				++xx;
				// cout << l << ' ' << r << ' ' << i << ' ' << j << '\n';
				rangesU[r - 1][j].pb(l + 1);
				if(a[l][j] < a[r][j]){
					l = U[l][j];
				}else if(a[l][j] > a[r][j]){
					break;
				}else{
					// l = U[l][j];
					// r = D[r][j];
					break;
				}
// 
			}
		}
	}
	L.clear();
	R.clear();
	D.clear();
	U.clear();
	for(int i = 1; i + 1 < n; ++i){
		Fenwick fenw(max(n,m));
		for(int j = 1; j + 1 < m; ++j){
			int ls = rangesL[i][j].size();
			int us = rangesU[i][j].size();
			for(int x = 0; x < ls; ++x){
				if(colranges[j][rangesL[i][j][x]].size() && colranges[j][rangesL[i][j][x]].back() < i - 1){
					colranges[j][rangesL[i][j][x]].clear();
				}
				colranges[j][rangesL[i][j][x]].pb(i);
			}
			for(int x = 0; x < us; ++x){
				if(rowranges[i][rangesU[i][j][x]].size() && rowranges[i][rangesU[i][j][x]].back() < j - 1){
					rowranges[i][rangesU[i][j][x]].clear();
				}
				rowranges[i][rangesU[i][j][x]].pb(j);
			}
			sort(all(rangesL[i][j]), [&](const int &g, const int &h){
				return colranges[j][g].size() < colranges[j][h].size();
			});
			int p = ls - 1;
			
			for(int y = us - 1; y >= 0; --y){
				int u = rangesU[i][j][y];
				int sz = rowranges[i][u].size();
				while(p > -1 && colranges[j][rangesL[i][j][p]].size() >= i - u + 1){
					fenw.add(j - rangesL[i][j][p] + 1);
					--p;
				}
				ans += fenw.get(sz);
			}
			fenw.rollback();
		}
	}
	return ans;
}
Compilation message (stderr)
rect.cpp: In function 'long long int count_rectangles(std::vector<std::vector<int> >)':
rect.cpp:176:59: warning: comparison of integer expressions of different signedness: 'std::vector<short int>::size_type' {aka 'long unsigned int'} and 'int' [-Wsign-compare]
  176 |     while(p > -1 && colranges[j][rangesL[i][j][p]].size() >= i - u + 1){
      |                     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |