| # | Time | Username | Problem | Language | Result | Execution time | Memory |
|---|---|---|---|---|---|---|---|
| 1326907 | defactopdiddy | Sandcastle 2 (JOI22_ho_t5) | C++20 | 0 ms | 0 KiB |
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx2,bmi,bmi2,lzcnt,popcnt")
#include <iostream>
#include <vector>
#include <algorithm>
using namespace std;
const int MAX_CELLS = 50005;
uint32_t Delta_packed[MAX_CELLS][9];
int16_t C[MAX_CELLS][9];
int Pref[MAX_CELLS];
int freq[350005];
int H, W;
vector<vector<int>> A;
// Verify boundaries up to 2 cells depth
inline bool in_R(int vr, int vc, int ur, int uc, int du, int dd, int dc1, int dc2) {
int dr = vr - ur;
int dc = vc - uc;
if (dr < 0 && du < 2 && dr < -du) return false;
if (dr > 0 && dd < 2 && dr > dd) return false;
if (dc < 0 && dc1 < 2 && dc < -dc1) return false;
if (dc > 0 && dc2 < 2 && dc > dc2) return false;
return true;
}
// Extract localized evaluations: 1 for valid local minima, 1 for local source mapping
int get_val(int r, int c, int du, int dd, int dc1, int dc2) {
int minima = 1;
int dr_n[] = {-1, 1, 0, 0};
int dc_n[] = {0, 0, -1, 1};
for (int i = 0; i < 4; ++i) {
int nr = r + dr_n[i], nc = c + dc_n[i];
if (nr >= 0 && nr < H && nc >= 0 && nc < W) {
if (A[nr][nc] < A[r][c] && in_R(nr, nc, r, c, du, dd, dc1, dc2)) {
minima = 0; break;
}
}
}
int indeg0 = 1;
for (int i = 0; i < 4; ++i) {
int nr = r + dr_n[i], nc = c + dc_n[i];
if (nr >= 0 && nr < H && nc >= 0 && nc < W) {
if (A[nr][nc] > A[r][c] && in_R(nr, nc, r, c, du, dd, dc1, dc2)) {
bool points = true;
for (int j = 0; j < 4; ++j) {
int wr = nr + dr_n[j], wc = nc + dc_n[j];
if (wr >= 0 && wr < H && wc >= 0 && wc < W) {
if (A[wr][wc] > A[r][c] && A[wr][wc] < A[nr][nc] && in_R(wr, wc, r, c, du, dd, dc1, dc2)) {
points = false; break;
}
}
}
if (points) { indeg0 = 0; break; }
}
}
}
return minima + indeg0;
}
int main() {
// I/O Operations Speedup
ios_base::sync_with_stdio(false);
cin.tie(NULL);
if (!(cin >> H >> W)) return 0;
A.assign(H, vector<int>(W));
for (int r = 0; r < H; ++r) {
for (int c = 0; c < W; ++c) cin >> A[r][c];
}
// Force constraints bound to mathematically limit execution row loops stringently
if (H > W) {
vector<vector<int>> transposed(W, vector<int>(H));
for (int r = 0; r < H; ++r) {
for (int c = 0; c < W; ++c) transposed[c][r] = A[r][c];
}
swap(H, W);
A = move(transposed);
}
// Offline block state evaluations mathematically mapped utilizing cache packing
for (int r = 0; r < H; ++r) {
for (int c = 0; c < W; ++c) {
int id = r * W + c;
for (int du = 0; du < 3; ++du) {
for (int dd = 0; dd < 3; ++dd) {
uint32_t packed = 0;
for (int dc = 0; dc < 9; ++dc) {
int dc1 = dc / 3, dc2 = dc % 3;
int val = get_val(r, c, du, dd, dc1, dc2);
int old_val = (dd > 0) ? get_val(r, c, du, dd - 1, dc1, dc2) : 0;
int delta = val - old_val;
uint32_t bits = (delta + 2) & 7;
packed |= (bits << (3 * dc));
}
Delta_packed[id][du * 3 + dd] = packed;
}
}
}
}
long long ans = 0;
const int OFFSET = 150000;
for (int r1 = 0; r1 < H; ++r1) {
// Soft memory wipe optimized for localized row tracking width iteration bounds
for (int c = 0; c < W; ++c) {
C[c][0] = C[c][1] = C[c][2] = C[c][3] = C[c][4] =
C[c][5] = C[c][6] = C[c][7] = C[c][8] = 0;
}
for (int r2 = r1; r2 < H; ++r2) {
for (int step = 0; step <= 2; ++step) {
int r = r2 - step;
if (r < r1) break;
int state = min(2, r - r1) * 3 + step;
int base_id = r * W;
// Bitwise unpack transitions dynamically avoiding massive if-conditions
for (int c = 0; c < W; ++c) {
uint32_t packed = Delta_packed[base_id + c][state];
C[c][0] += (packed & 7) - 2;
C[c][1] += ((packed >> 3) & 7) - 2;
C[c][2] += ((packed >> 6) & 7) - 2;
C[c][3] += ((packed >> 9) & 7) - 2;
C[c][4] += ((packed >> 12) & 7) - 2;
C[c][5] += ((packed >> 15) & 7) - 2;
C[c][6] += ((packed >> 18) & 7) - 2;
C[c][7] += ((packed >> 21) & 7) - 2;
C[c][8] += ((packed >> 24) & 7) - 2;
}
}
for (int c = 0; c < W; ++c) if (C[c][0] == 2) ans++;
for (int c = 0; c < W - 1; ++c) if (C[c][1] + C[c+1][3] == 2) ans++;
for (int c = 0; c < W - 2; ++c) if (C[c][2] + C[c+1][4] + C[c+2][6] == 2) ans++;
if (W >= 4) {
Pref[0] = C[0][8];
for (int c = 1; c < W; ++c) Pref[c] = Pref[c-1] + C[c][8];
for (int c2 = 3; c2 < W; ++c2) {
int c1 = c2 - 3;
int Y = C[c1][2] + C[c1+1][5] - Pref[c1+1];
freq[(2 - Y) + OFFSET]++;
int X = C[c2-1][7] + C[c2][6] + Pref[c2-2];
ans += freq[X + OFFSET];
}
for (int c2 = 3; c2 < W; ++c2) {
int c1 = c2 - 3;
int Y = C[c1][2] + C[c1+1][5] - Pref[c1+1];
freq[(2 - Y) + OFFSET]--;
}
}
}
}
cout << ans << "\n";
return 0;
}
