#include "rect.h"
#include <bits/stdc++.h>
#pragma GCC optimize("Ofast")
#pragma GCC optimize("unroll-loops")
using namespace std;
using i64 = long long;
using d64 = long double;
using pi = pair<int, int>;
using pli = pair<i64, i64>;
using ti = tuple<int, int, int>;
using tli = tuple<i64, i64, i64>;
#define iterall(cont) cont.begin(), cont.end()
#define prec(n) setprecision(n) << fixed
vector<pi> findLegalTuple(const vector<int> &v) {
vector<pi> ret;
vector<size_t> seq; // monotonic
size_t sn = 0;
for (size_t i = 0; i < v.size(); i++) {
while (sn >= 1 && v[seq[sn - 1]] < v[i]) {
seq.pop_back(), --sn;
if (sn >= 1 && i - seq[sn - 1] >= 2)
ret.emplace_back(seq[sn - 1], i);
}
if (sn >= 1 && v[seq[sn - 1]] == v[i]) seq.pop_back(), --sn;
seq.emplace_back(i), ++sn;
}
return ret;
}
// 이거 N^3이라서 쫌 이따 바꿔야지
vector<pi> getMaxPos(const vector<int> &v) {
size_t N = v.size();
vector<pi> ret(N, {-1, -1});
{
vector<int> seq; // monotone
size_t sn = 0;
for (int i = 0; i < N; i++) {
while (sn >= 1 && v[seq[sn - 1]] < v[i]) {
ret[seq[sn - 1]].second = i;
seq.pop_back(), --sn;
}
seq.emplace_back(i), ++sn;
}
}
{
vector<int> seq; // monotone
size_t sn = 0;
for (int i = N - 1; i >= 0; i--) {
while (sn >= 1 && v[seq[sn - 1]] < v[i]) {
ret[seq[sn - 1]].first = i;
seq.pop_back(), --sn;
}
seq.emplace_back(i), ++sn;
}
}
return ret;
}
vector<pi> processToIntervals(const vector<pi> &v) {
vector<pi> ret;
for (auto [el, _] : v) {
if (!ret.empty() && ret.back().second == el - 1)
ret.back().second++;
else
ret.emplace_back(el, el);
}
return ret;
}
bool intervalBinarySearch(const vector<pi> &v, pi t) {
if (v.empty()) return false;
const size_t N = v.size();
int r1 = -1;
{
size_t s = 0, e = N - 1;
while (e - s >= 2) {
size_t m = (s + e) >> 1;
if (v[m].first <= t.first && t.first <= v[m].second) {
r1 = m;
break;
}
if (v[m].second < t.first) s = m + 1;
else e = m - 1;
}
if (v[s].first <= t.first && t.first <= v[s].second) r1 = s;
if (v[e].first <= t.first && t.first <= v[e].second) r1 = e;
}
if (r1 == -1) return false;
return v[r1].first <= t.second && t.second <= v[r1].second;
}
i64 _hs(i64 a, i64 b, i64 c, i64 d) {
return (a << 48) + (b << 32) + (c << 16) + d;
}
i64 count_rectangles(vector<vector<int>> a) {
const size_t N = a.size();
const size_t M = a[0].size();
// transpose
vector<vector<int>> b(M, vector<int>(N));
for (int i = 0; i < N; i++)
for (int j = 0; j < M; j++) b[j][i] = a[i][j];
// get Max Position
vector<vector<pi>> aM(N, vector<pi>(M));
vector<vector<pi>> bM(M, vector<pi>(N));
for (int i = 0; i < N; i++) aM[i] = getMaxPos(a[i]);
for (int i = 0; i < M; i++) bM[i] = getMaxPos(b[i]);
// find Legal Tuple && assign
vector<vector<vector<pi>>> aL(M, decltype(aL)::value_type(M));
vector<vector<vector<pi>>> bL(N, decltype(bL)::value_type(N));
for (int i = 0; i < N; i++) {
auto Lg = findLegalTuple(a[i]);
for (auto [l, r] : Lg) aL[l][r].emplace_back(i, 0);
}
for (int i = 0; i < M; i++) {
auto Lg = findLegalTuple(b[i]);
for (auto [l, r] : Lg) bL[l][r].emplace_back(i, 0);
}
for (auto &vec : aL)
for (auto &el : vec) el = processToIntervals(el);
for (auto &vec : bL)
for (auto &el : vec) el = processToIntervals(el);
// Main Logic
vector<i64> us;
for (int i = 0; i < N; i++) {
for (int j = 0; j < M; j++) {
auto [L, R] = aM[i][j];
auto [T, B] = bM[j][i];
auto H = _hs(L, R, T, B);
if (L == -1 || R == -1 || T == -1 || B == -1) continue;
if (intervalBinarySearch(aL[L][R], {T + 1, B - 1}) &&
intervalBinarySearch(bL[T][B], {L + 1, R - 1}))
us.emplace_back(H);
}
}
sort(iterall(us));
us.erase(unique(iterall(us)), us.end());
return us.size();
}