This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
#pragma GCC optimize ("O3")
#pragma GCC target ("sse4")
#include <bits/stdc++.h>
#include <ext/pb_ds/tree_policy.hpp>
#include <ext/pb_ds/assoc_container.hpp>
#include <ext/rope>
using namespace std;
using namespace __gnu_pbds;
using namespace __gnu_cxx;
 
typedef long long ll;
typedef long double ld;
typedef complex<ld> cd;
typedef pair<int, int> pi;
typedef pair<ll,ll> pl;
typedef pair<ld,ld> pd;
typedef vector<int> vi;
typedef vector<ld> vd;
typedef vector<ll> vl;
typedef vector<pi> vpi;
typedef vector<pl> vpl;
typedef vector<cd> vcd;
template <class T> using pqg = priority_queue<T,vector<T>,greater<T>>;
template <class T> using Tree = tree<T, null_type, less<T>, rb_tree_tag,tree_order_statistics_node_update>;
#define FOR(i, a, b) for (int i=a; i<(b); i++)
#define F0R(i, a) for (int i=0; i<(a); i++)
#define FORd(i,a,b) for (int i = (b)-1; i >= a; i--)
#define F0Rd(i,a) for (int i = (a)-1; i >= 0; i--)
#define sz(x) (int)(x).size()
#define mp make_pair
#define pb push_back
#define f first
#define s second
#define lb lower_bound
#define ub upper_bound
#define all(x) x.begin(), x.end()
const int MOD = 1000000007;
const ll INF = 1e18;
const int MX = 401;
template<class T> T poll(pqg<T>& x) {
    T y = x.top(); x.pop();
    return y;
}
pi operator+(const pi& l, const pi& r) { return {l.f+r.f,l.s+r.s}; }
// we want to generate a border for the villages 
int N,M, h[MX][MX], v[MX][MX],g[MX][MX];
ll dist[MX][MX];
pi pre[MX][MX];
set<array<int,3>> banEdge;
void input() {
    ios_base::sync_with_stdio(0); cin.tie(0);
    cin >> N >> M;
    F0R(i,N) F0R(j,M) cin >> g[i][j];
    F0R(i,N) F0R(j,M+1) cin >> v[i][j];
    F0R(i,N+1) F0R(j,M) cin >> h[i][j];
}
pi dir[4] = {{1,0},{0,1},{-1,0},{0,-1}};
bool valid(pi x) {
    return 0 <= x.f && x.f <= N && 0 <= x.s && x.s <= M;
}
int getWei(pi x, int d) {
    if (d == 0) return v[x.f][x.s];
    if (d == 1) return h[x.f][x.s];
    if (d == 2) return v[x.f-1][x.s];
    return h[x.f][x.s-1];
}
bool done[MX][MX];
void insBan(pi a, pi b) {
    if (a > b) swap(a,b);
    if (a.f+1 == b.f) banEdge.insert({0,a.f,a.s});
    else banEdge.insert({1,a.f,a.s});
}
void tri(int i, int j) {
    if (done[i][j]) return;
    done[i][j] = 1;
    pi x = pre[i][j];
    insBan(x,{i,j});
    tri(x.f,x.s);
}
void genShortest() {
    F0R(i,N+1) F0R(j,M+1) dist[i][j] = INF;
    dist[0][0] = 0; 
    pqg<pair<ll,pi>> p; p.push({0,{0,0}});
    while (sz(p)) {
        auto a = poll(p); if (a.f > dist[a.s.f][a.s.s]) continue;
        F0R(i,4) {
            pi A = a.s+dir[i];
            if (!valid(A)) continue;
            ll d = a.f+getWei(a.s,i);
            if (d < dist[A.f][A.s]) {
                pre[A.f][A.s] = a.s;
                p.push({dist[A.f][A.s] = d,A});
            }
        }
    }
    F0R(i,N) F0R(j,M) if (g[i][j]) {
        insBan({i,j},{i,j+1});
        insBan({i,j+1},{i+1,j+1});
        insBan({i+1,j},{i+1,j+1});
        insBan({i,j},{i+1,j});
        tri(i,j);
    }
    banEdge.insert({0,-1,0});
    banEdge.insert({1,0,-1});
}
ll ans[MX*MX*4];
vpi adj[MX*MX*4];
int hsh(int a, int b, int c) {
    return 4*((M+1)*a+b)+c;
}
void addEdge(int a, int b, int c) {
    adj[a].pb({b,c}), adj[b].pb({a,c});
}
void genAdj() {
    /*for (auto a: banEdge) {
        cout << a[0] << " " << a[1] << " " << a[2] << "\n";
    }*/
    F0R(i,N) F0R(j,M+1) {
        addEdge(hsh(i,j,2),hsh(i+1,j,1),v[i][j]);
        addEdge(hsh(i,j,3),hsh(i+1,j,0),v[i][j]);
    }
    F0R(i,N+1) F0R(j,M) {
        addEdge(hsh(i,j,0),hsh(i,j+1,1),h[i][j]);
        addEdge(hsh(i,j,3),hsh(i,j+1,2),h[i][j]);
    }
    F0R(i,N+1) F0R(j,M+1) {
        if (!banEdge.count({0,i,j})) addEdge(hsh(i,j,2),hsh(i,j,3),0);
        if (!banEdge.count({0,i-1,j})) addEdge(hsh(i,j,0),hsh(i,j,1),0);
        if (!banEdge.count({1,i,j})) addEdge(hsh(i,j,0),hsh(i,j,3),0);
        if (!banEdge.count({1,i,j-1})) addEdge(hsh(i,j,1),hsh(i,j,2),0);
    }
}
void solve() {
    genAdj();
    F0R(i,MX*MX*4) ans[i] = INF;
    pqg<pl> p; p.push({ans[2] = 0,2});
    while (sz(p)) {
        auto a = poll(p); if (a.f > ans[a.s]) continue;
        for (auto b: adj[a.s]) if (a.f+b.s < ans[b.f]) 
            p.push({ans[b.f] = a.f+b.s,b.f});
    }
}
int main() {
    input();
    genShortest();
    solve();
    cout << ans[0];
}
/* Look for:
* the exact constraints (multiple sets are too slow for n=10^6 :( ) 
* special cases (n=1?)
* overflow (ll vs int?)
* array bounds
* if you have no idea just guess the appropriate well-known algo instead of doing nothing :/
*/
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict  | Execution time | Memory | Grader output | 
|---|
| Fetching results... |