This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
using namespace std;
typedef long long ll;
typedef pair<int,int> ii;
typedef pair<ll,ll> pll;
typedef vector<int> vi;
typedef vector<ii> vii;
typedef vector<ll> vll;
typedef vector<pll> vpll;
#define PB push_back
#define PF push_front
#define PPB pop_back
#define PPF pop_front
#define X first
#define Y second
#define MP make_pair
#define all(x) (x).begin(), (x).end()
const int mod = 1e9 + 7; //998244353;
const int inf = 1e9 + 7;
const ll INF = 1e18 + 7;
const int logo = 20;
const int MAXN = 1e6 + 7;
const int off = 1 << logo;
const int trsz = off << 1;
const int dx[] = {1, -1, 0, 0};
const int dy[] = {0, 0, -1, 1};
vector<vll> pf[17], vl[17];
vector<vi> mat;
int h, w;
vi vals;
unordered_map<ll, int> f;
bool ins(int r, int s){
	if(r < 1 or r > h or s < 1 or s > w) return 0;
	return 1;
}
ll calc(int id, int r1, int s1, int r2, int s2){
	if(r1 > r2 or s1 > s2) return 0;
	ll ret = pf[id][r2][s2] + pf[id][r1 - 1][s1 - 1] - pf[id][r1 - 1][s2] - pf[id][r2][s1 - 1];
	return ret;
}
bool check(int up, int down, int l, int r){
	if(up == down and l == r) return 1;
	ll val = 0;
	
	if(down == up){
		val = calc(12, up, l + 1, up, r - 1) + vl[8][up][l] + vl[4][down][r];
	}else if(r == l){
		val = calc(3, up + 1, l, down - 1, l) + vl[1][up][l] + vl[2][down][r];
	}else{
		val = vl[9][up][l] + vl[5][up][r] + vl[10][down][l] + vl[6][down][r];
		val += calc(15, up + 1, l + 1, down - 1, r - 1);
		val += calc(14, down, l + 1, down, r - 1);
		val += calc(13, up, l + 1, up, r - 1);
		val += calc(11, up + 1, l, down - 1, l);
		val += calc(7, up + 1, r, down - 1, r);
	}
	return (val == (ll)h * (ll)w + 1);
}
void solve(){
	cin >> h >> w;
	mat.resize(h + 1, vi(w + 1, 0));
	for(int i=1; i<=h; i++) for(int j=1; j<=w; j++) cin >> mat[i][j], vals.PB(mat[i][j]);
	sort(all(vals));
	for(auto &x : mat) for(auto &y : x) if(y) y = lower_bound(all(vals), y) - vals.begin() + 1;
	vi vec;
	for(int m=0; m<16; m++){
		pf[m].resize(h + 1, vll(w + 1, 0));
		vl[m].resize(h + 1, vll(w + 1, 0));
		for(int i=1; i<=h; i++){
			for(int j=1; j<=w; j++){
				vec.clear();
				vec.PB(0);
				for(int k=0; k<4; k++){
					if(!(m & (1 << k))) continue;
					int ni = i + dx[k], nj = j + dy[k];
					if(!ins(ni, nj)) continue;
					vec.PB({mat[ni][nj]});
				}
				
				int val = mat[i][j];
				sort(all(vec));
				if(vec.back() < mat[i][j]) val += (h * w + 1 - mat[i][j]);
				
				auto it = lower_bound(all(vec), mat[i][j]);
				it--;
				val -= *it;
				
				vl[m][i][j] = val;
				pf[m][i][j] += pf[m][i - 1][j] + pf[m][i][j - 1] - pf[m][i - 1][j - 1] + vl[m][i][j];
			}
		}
	}
	
	int ans = 0;
	for(int i=1; i<=h; i++){
		vi st;
		for(int j=1; j<=w; j++){
			if(st.size() and mat[i][j] > st.back()) st.clear();
			st.PB(mat[i][j]);
			ans += st.size();
		}
		
		st.clear();
		for(int j=1; j<=w; j++){
			if(st.size() and mat[i][j] < st.back()) st.clear();
			st.PB(mat[i][j]);
			ans += st.size();
		}
	}
	
	for(int j=1; j<=w; j++){
		vi st;
		for(int i=1; i<=h; i++){
			if(st.size() and mat[i][j] > st.back()) st.clear();
			st.PB(mat[i][j]);
			ans += st.size();
		}
		
		st.clear();
		for(int i=1; i<=h; i++){
			if(st.size() and mat[i][j] < st.back()) st.clear();
			st.PB(mat[i][j]);
			ans += st.size();
		}
	}
	ans -= 3 * h * w;
	
	
	if(h < w){
		for(int up=1; up<=h; up++){
			for(int down=up+1; down<=h; down++){
				ll cpf = 0;
				f.clear();
				for(int R=1; R<=w; R++){
					ll q = cpf;
					q += vl[5][up][R];
					q += vl[6][down][R];
					q += calc(7, up + 1, R, down - 1, R);
					ans += f[(h * w + 1) - q];
					
				
					cpf += vl[13][up][R];
					cpf += vl[14][down][R];
					cpf += calc(15, up + 1, R, down - 1, R);
					
					q = -cpf;
					q += vl[9][up][R];
					q += vl[10][down][R];
					q += calc(11, up + 1, R, down - 1, R);
					f[q]++;
				}
			}
		}
	}else{
		for(int l=1; l<=w; l++){
			for(int r=l+1; r<=w; r++){
				ll cpf = 0;
				f.clear();
				for(int down=1; down<=h; down++){
					ll q = cpf;
					q += vl[10][down][l];
					q += vl[6][down][r];
					q += calc(14, down, l + 1, down, r - 1);
					ans += f[(h * w + 1) - q];
					
				
					cpf += vl[11][down][l];
					cpf += vl[7][down][r];
					cpf += calc(15, down, l + 1, down, r - 1);
					
					q = -cpf;
					q += vl[9][down][l];
					q += vl[5][down][r];
					q += calc(13, down, l + 1, down, r - 1);
					f[q]++;
				}
			}
		}
	}
	
	cout << ans << "\n";
}
int main(){
	ios_base::sync_with_stdio(false);
	cin.tie(NULL);
	cout.tie(NULL);
	int tt = 1;
	//cin >> tt;
	while(tt--) solve();
	return 0;
}
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |