This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#include <bits/stdc++.h>
using namespace std;
#ifdef tabr
#include "library/debug.cpp"
#else
#define debug(...)
#endif
struct dsu {
    vector<int> p;
    vector<int> sz;
    vector<int> mn;
    vector<int> mx;
    int n;
    dsu(int _n) : n(_n) {
        p = vector<int>(n);
        iota(p.begin(), p.end(), 0);
        sz = vector<int>(n, 1);
        mx = p;
        mn = p;
    }
    inline int get(int x) {
        if (p[x] == x) {
            return x;
        } else {
            return p[x] = get(p[x]);
        }
    }
    inline bool unite(int x, int y) {
        x = get(x);
        y = get(y);
        if (x == y) {
            return false;
        }
        p[x] = y;
        sz[y] += sz[x];
        mn[y] = min(mn[y], mn[x]);
        mx[y] = max(mx[y], mx[x]);
        return true;
    }
    inline bool same(int x, int y) {
        return (get(x) == get(y));
    }
    inline int size(int x) {
        return sz[get(x)];
    }
    inline bool root(int x) {
        return (x == get(x));
    }
};
namespace smin {
struct sparse {
    using T = int;
    int n;
    int h;
    vector<vector<T>> table;
    T op(T x, T y) {
        return min(x, y);
    }
    sparse(const vector<T> &v) {
        n = (int) v.size();
        h = 32 - __builtin_clz(n);
        table.resize(h);
        table[0] = v;
        for (int j = 1; j < h; j++) {
            table[j].resize(n - (1 << j) + 1);
            for (int i = 0; i <= n - (1 << j); i++) {
                table[j][i] = op(table[j - 1][i], table[j - 1][i + (1 << (j - 1))]);
            }
        }
    }
    T get(int l, int r) {
        assert(l < r);
        int k = 31 - __builtin_clz(r - l);
        return op(table[k][l], table[k][r - (1 << k)]);
    }
};
}  // namespace smin
namespace smax {
struct sparse {
    using T = int;
    int n;
    int h;
    vector<vector<T>> table;
    T op(T x, T y) {
        return max(x, y);
    }
    sparse(const vector<T> &v) {
        n = (int) v.size();
        h = 32 - __builtin_clz(n);
        table.resize(h);
        table[0] = v;
        for (int j = 1; j < h; j++) {
            table[j].resize(n - (1 << j) + 1);
            for (int i = 0; i <= n - (1 << j); i++) {
                table[j][i] = op(table[j - 1][i], table[j - 1][i + (1 << (j - 1))]);
            }
        }
    }
    T get(int l, int r) {
        assert(l < r);
        int k = 31 - __builtin_clz(r - l);
        return op(table[k][l], table[k][r - (1 << k)]);
    }
};
}  // namespace smax
#include <ext/pb_ds/assoc_container.hpp>
using namespace __gnu_pbds;
template <typename T>
using pbds = tree<T, null_type, less<T>, rb_tree_tag, tree_order_statistics_node_update>;
long long count_rectangles(vector<vector<int>> a) {
    int h = (int) a.size();
    int w = (int) a[0].size();
    vector<vector<int>> first_ng(h, vector<int>(w, h));
    for (int j = 0; j < w; j++) {
        vector<int> s;
        for (int i = h - 1; i >= 0; i--) {
            while (!s.empty() && a[s.back()][j] < a[i][j]) {
                s.pop_back();
            }
            if (!s.empty()) {
                first_ng[i][j] = s.back();
            }
            s.emplace_back(i);
        }
    }
    vector<vector<int>> last_ng(h, vector<int>(w, -1));
    for (int j = 0; j < w; j++) {
        vector<int> s;
        for (int i = 0; i < h; i++) {
            while (!s.empty() && a[s.back()][j] < a[i][j]) {
                s.pop_back();
            }
            if (!s.empty()) {
                last_ng[i][j] = s.back();
            }
            s.emplace_back(i);
        }
    }
    long long res = 0;
    set<pair<int, int>> cnt;
    vector<vector<pbds<int>>> segs(w, vector<pbds<int>>(w));
    vector<vector<map<int, vector<int>>>> ngs(w, vector<map<int, vector<int>>>(w));
    for (int i = 1; i < h - 1; i++) {
        vector<vector<int>> bound(w);
        map<int, vector<int>> v;
        for (int j = 0; j < w; j++) {
            v[a[i][j]].emplace_back(j);
        }
        dsu uf(w);
        for (auto p : v) {
            for (int j : p.second) {
                if (j - 1 >= 0 && a[i][j] >= a[i][j - 1]) {
                    uf.unite(j - 1, j);
                }
                if (j + 1 < w && a[i][j] >= a[i][j + 1]) {
                    uf.unite(j + 1, j);
                }
            }
            for (int j : p.second) {
                if (uf.root(j) && uf.mn[j] > 0 && uf.mx[j] < w - 1) {
                    bound[uf.mx[j]].emplace_back(uf.mn[j]);
                }
            }
        }
        set<pair<int, int>> new_cnt;
        smin::sparse sp_min(first_ng[i - 1]);
        smax::sparse sp_max(last_ng[i + 1]);
        int up_last = 0;
        int down_last = 0;
        for (int j = 1; j < w - 1; j++) {
            if (a[i][j] >= a[i - 1][j]) {
                up_last = j;
            }
            if (a[i][j] >= a[i + 1][j]) {
                down_last = j;
            }
            for (int k : bound[j]) {
                new_cnt.emplace(k, j);
                if (up_last < k) {
                    segs[k][j].insert(i);
                    ngs[k][j][sp_min.get(k, j + 1)].emplace_back(i);
                }
                for (int x : ngs[k][j][i]) {
                    segs[k][j].erase(x);
                }
                ngs[k][j].erase(i);
                if (down_last < k) {
                    // debug(i, j, k, (int) (segs[k][j].size() - segs[k][j].order_of_key(sp_max.get(k, j + 1) + 1)));
                    res += (int) (segs[k][j].size() - segs[k][j].order_of_key(sp_max.get(k, j + 1) + 1));
                }
            }
        }
        for (auto p : cnt) {
            if (!new_cnt.count(p)) {
                segs[p.first][p.second].clear();
                ngs[p.first][p.second].clear();
            }
        }
        swap(cnt, new_cnt);
    }
    return res;
}
#ifdef tabr
mt19937 rng((unsigned int) chrono::steady_clock::now().time_since_epoch().count());
int rand_int(int a, int b) {  // [a, b]
    return uniform_int_distribution<int>(a, b)(rng);
}
int main() {
    ios::sync_with_stdio(false);
    cin.tie(0);
    debug(count_rectangles({{4, 8, 7, 5, 6},
                            {7, 4, 10, 3, 5},
                            {9, 7, 20, 14, 2},
                            {9, 14, 7, 3, 6},
                            {5, 7, 5, 2, 7},
                            {4, 5, 13, 5, 6}}));
    debug(count_rectangles({{5, 7, 5},
                            {9, 4, 7},
                            {9, 7, 20},
                            {7, 4, 10},
                            {4, 8, 7}}));
    debug(count_rectangles({{10, 10, 10},
                            {10, 9, 10},
                            {10, 0, 10},
                            {10, 1, 10},
                            {10, 5, 10},
                            {10, 10, 10}}));
    vector<vector<int>> a(6, vector<int>(3, 10));
    for (int i = 1; i < 5; i++) {
        for (int j = 1; j < 2; j++) {
            a[i][j] = rand_int(0, 9);
        }
    }
    for (auto b : a) debug(b);
    debug(count_rectangles(a));
    return 0;
}
#endif
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |