#include <bits/stdc++.h>
    
using namespace std;
    
typedef long long ll;
#pragma optimize("Ofast")
#pragma target("avx2")
const int  N = (int)5e4 + 12, MOD = 998244353, dx[] = {0, 1, 0, -1}, dy[] = {1, 0, -1, 0}, inf = (int)1e9;
#define int ll
int n, m;
vector<vector<int>> a;
vector<vector<vector<int>>> dp;
void test() {
    cin >> n >> m;
    a.resize(n + 1);
    a[0].resize(m + 1);
    for(int i = 1; i <= n; i++) {
        a[i].resize(m + 1);
        for(int j = 1; j <= m; j++) {
            cin >> a[i][j];
        }
    }
    // if(n > m) {
    //     vector<vector<int>> b(m + 1, vector<int>(n + 1, 0));
    //     for(int i = 1; i <= n; i++) {
    //         for(int j = 1; j <= m; j++) {
    //             b[j][i] = a[i][j];
    //         }
    //     }
    //     a = b;
    //     swap(n, m);
    // }
    dp.resize(n + 1);
    for(int i = 0; i <= n; i++) {
        dp[i].resize(m + 1);
        for(int j = 0; j <= m; j++) {
            dp[i][j].resize(16);
        }
    }
    for(int f = 1; f < 16; f++) {
        for(int i = 1; i <= n; i++) {
            for(int j = 1; j <= m; j++) {
                bool ok = 0;
                int mx = 0;
                for(int d = 0; d < 4; d++) {
                    if((f >> d) & 1) {
                        int x = i + dx[d], y = j + dy[d];
                        if(x >= 1 && x <= n && y >= 1 && y <= m) {
                            if(a[x][y] > a[i][j]) {
                                ok = 1;
                            } else {
                                mx = max(mx, a[x][y]);
                            }
                        }
                    }
                }
                if(ok) {
                    dp[i][j][f] = a[i][j] - mx;
                } else {
                    dp[i][j][f] = inf - mx;
                }
            }
        }
    }
    for(int f = 1; f < 16; f++) {
        if(__builtin_popcount(f) <= 2) continue;
        for(int i = 1; i <= n; i++) {
            for(int j = 1; j <= m; j++) {
                if(f == 7 || f == 13) {
                    dp[i][j][f] = dp[i][j - 1][f] + dp[i][j][f];
                } 
                else if(f == 11 || f == 14) {
                    dp[i][j][f] = dp[i - 1][j][f] + dp[i][j][f];
                }
                else {
                    dp[i][j][f] = dp[i - 1][j][f] + dp[i][j - 1][f] - dp[i - 1][j - 1][f] + dp[i][j][f];
                }
            }
        }
    }
    int res = n * m;
    
    for(int x = 1; x <= n; x++) {
        for(int x1 = x + 1; x1 <= n; x1++) {
            for(int y = 1; y <= m; y++) {
                for(int y1 = y + 1; y1 <= m; y1++) {
                    int S = dp[x1 - 1][y1 - 1][15] - dp[x][y1 - 1][15] - dp[x1 - 1][y][15] + dp[x][y][15];
                    S += dp[x][y][3];
                    S += dp[x1][y1][12];
                    S += dp[x1][y][9];
                    S += dp[x][y1][6];
                    S += dp[x][y1 - 1][7] - dp[x][y][7];
                    S += dp[x1][y1 - 1][13] - dp[x1][y][13];
                    S += dp[x1 - 1][y][11] - dp[x][y][11];
                    S += dp[x1 - 1][y1][14] - dp[x][y1][14];
                    if(S == inf) {
                        res++;
                    }
                }
            }
        }
    }
    for(int i = 1; i <= n; i++) {
        int c = 1;
        for(int j = 2; j <= m; j++) {
            if((a[i][j] > a[i][j - 1]) == (a[i][j - 1] > a[i][j - 2])) {
                c++;
            } else c = 2;
            res += c - 1;
        }
    }
    for(int j = 1; j <= m; j++) {
        int c = 1;
        for(int i = 2; i <= n; i++) {
            if((a[i][j] > a[i - 1][j]) == (a[i - 1][j] > a[i - 2][j])) {
                c++;
            } else c = 2;
            res += c - 1;
        }
    }
    cout << res << '\n';
}
int32_t main() {
    ios_base::sync_with_stdio(false);
    cin.tie(0);
    int t = 1;
    // cin >> t;
    while(t--) {
        test();
    }
}   
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |