This submission is migrated from previous version of oj.uz, which used different machine for grading. This submission may have different result if resubmitted.
//{ <defines>
#include <bits/stdc++.h>
using namespace std;
//#pragma comment(linker, "/stack:200000000")
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native")
//#pragma GCC optimize("Ofast,no-stack-protector,unroll-loops,fast-math,-O3")
#define fr(i, n) for(int i = 0; i < n; ++i)
#define fo(n) fr(i, n)
#define re return
#define ef else if
#define ifn(x) if(!(x))
#define _  << ' ' <<
#define ft first
#define sd second
#define ve vector
#define pb push_back
#define eb emplace_back
#define sz(x) int((x).size())
#define bnd(x) x.begin(), x.end()
#define clr(x, y) memset((x), (y), sizeof (x))
typedef long long ll;
typedef long double ld;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
typedef ve<int> vi;
inline ll time() {re chrono :: system_clock().now().time_since_epoch().count();}
mt19937 rnd(time());
mt19937_64 RND(time());
template<typename t> inline void umin(t &a, t b) {a = min(a, b);}
template<typename t> inline void umax(t &a, t b) {a = max(a, b);}
int md = 998244353;
inline int m_add(int&a, int b) {a += b; if(a < 0) a += md; if(a >= md) a -= md; re a;}
inline int m_sum(int a, int b) {a += b; if(a < 0) a += md; if(a >= md) a -= md; re a;}
inline int m_mul(int&a, int b) {re a = 1ll * a * b % md;}
inline int m_prod(int a, int b) {re 1ll * a * b % md;}
int m_bpow(ll A, ll b) {
    int a = A % md;
    ll ans = 1;
    for(ll p = 1ll << 63 - __builtin_clzll(b); p; p >>= 1) {
        (ans *= ans) %= md;
        if(p & b)
            (ans *= a) %= md;
    }
    re ans;
}
//const ld pi = arg(complex<ld>(-1, 0));
//const ld pi2 = pi + pi;
const int oo = 2e9;
const ll OO = 4e18;
//} </defines>
const int N = 2e3 + 5;
int k, n;
vi d, p;
struct e {
    int u, v, capacity, flow, cost;
    bool full() { return flow == capacity; }
};
vector<e> E;
void add_edge(int u, int v, int c) {
    E.push_back({u, v, 1, 0, +c});
    E.push_back({v, u, 0, 0, -c});
}
short mp[N][N], nm[N][N];
vector<bool> mk;
vector< list<e*> > g;
void fb() {
    p.assign(n, +oo);
    p[0] = 0;
    fo(n) for(e &i : E) ifn(i.full())
        umin(p[i.v], p[i.u] + i.cost);
}
void dk() {
    d.assign(n, +oo);
    d[0] = 0;
    struct cmp {
        bool operator() (const int &a, const int &b) const
            { return d[a] != d[b] ? d[a] < d[b] : a < b; }
    };
    set<int, cmp> q;
    q.insert(0);
    while(!q.empty()) {
        int u = *q.begin();
        q.erase(q.begin());
        for(e *i : g[u]) {
            if(i->full()) continue;
            int v = i->v;
            int w = d[u] + i->cost + p[u] - p[v];
            if(d[v] <= w) continue;
            if(q.count(v)) q.erase(v);
            d[v] = w;
            q.insert(v);
        }
    }
    fo(n) if(d[i] < oo) p[i] += d[i];
    for(e &i : E) ifn(i.full()) assert(p[i.u] + i.cost - p[i.v] >= 0);
}
int ff(int v = 0) {
    if(mk[v]) re 1;
    mk[v] = true;
    if(v == 1) re 0;
    for(e *i : g[v]) {
        if(i->full()) continue;
        int u = i->v;
        if(i->cost + p[v] != p[u]) continue;
        int f = ff(u);
        if(f == 1) continue;
        i->flow++;
        E[1 ^ i - &E[0]].flow--;
        return f + i->cost;
    }
    return 1;
}
int field(const pii &p) { return mp[p.ft][p.sd]; }
void solve() {
    int t;
    cin >> t >> k;
    ll sum = 0;
    E.clear();
    fo(t) fr(j, t) {
        cin >> mp[i][j];
        sum += mp[i][j];
    }
    const int MX = 2020;
    vi r(MX, 0);
    fo(t) fr(j, t) {
        if(i + 1 < t) r[mp[i][j] + mp[i + 1][j]]++;
        if(j + 1 < t) r[mp[i][j] + mp[i][j + 1]]++;
        nm[i][j] = 0;
    }
    int z = min(t * (t - 1) * 2, 7 * (k - 1) + 1);
    for(int i = MX - 1, j = z; i >= 0; --i) {
        umin(r[i], j);
        j -= r[i];
    }
    n = 2;
    fo(t) fr(j, t) {
        if(i + 1 < t && r[mp[i][j] + mp[i + 1][j]]-- > 0) {
            short *u = &nm[i + 0][j]; if(!*u) *u = n++;
            short *v = &nm[i + 1][j]; if(!*v) *v = n++;
            if(i + j & 1) swap(u, v);
            add_edge(*u, *v, 0);
        }
        if(j + 1 < t && r[mp[i][j] + mp[i][j + 1]]-- > 0) {
            short *u = &nm[i][j + 0]; if(!*u) *u = n++;
            short *v = &nm[i][j + 1]; if(!*v) *v = n++;
            if(i + j & 1) swap(u, v);
            add_edge(*u, *v, 0);
        }
        if(!nm[i][j]) continue;
        if(i + j & 1) {
            add_edge(nm[i][j], 1, -mp[i][j]);
        } else {
            add_edge(0, nm[i][j], -mp[i][j]);
        }
    }
    g.clear();
    g.resize(n);
    for(auto &i : E) g[i.u].pb(&i);
    int ans = 0;
    fb();
    fo(k) {
        dk();
        mk.assign(n, false);
        ans += ff();
    }
    cout << sum + ans << endl;
}
int main() {
#ifdef _LOCAL
    freopen("in.txt", "r", stdin);
    int tests; cin >> tests;
    for(int test = 1; test <= tests; ++test) {
		cerr << "case #" << test << endl;
        solve();
        cerr << endl;
    }
#else
//    freopen("input.txt", "r", stdin); freopen("output.txt", "w", stdout);
    ios_base :: sync_with_stdio(0); cin.tie(0); cout.tie(0);
    solve();
#endif
    return 0;
}
Compilation message (stderr)
domino.cpp: In function 'int m_bpow(ll, ll)':
domino.cpp:50:26: warning: suggest parentheses around '-' inside '<<' [-Wparentheses]
     for(ll p = 1ll << 63 - __builtin_clzll(b); p; p >>= 1) {
                       ~~~^~~~~~~~~~~~~~~~~~~~
domino.cpp: In function 'int ff(int)':
domino.cpp:131:17: warning: suggest parentheses around arithmetic in operand of '^' [-Wparentheses]
         E[1 ^ i - &E[0]].flow--;
domino.cpp: In function 'void solve()':
domino.cpp:165:18: warning: suggest parentheses around '+' in operand of '&' [-Wparentheses]
             if(i + j & 1) swap(u, v);
                ~~^~~
domino.cpp:171:18: warning: suggest parentheses around '+' in operand of '&' [-Wparentheses]
             if(i + j & 1) swap(u, v);
                ~~^~~
domino.cpp:175:14: warning: suggest parentheses around '+' in operand of '&' [-Wparentheses]
         if(i + j & 1) {
            ~~^~~| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... | 
| # | Verdict | Execution time | Memory | Grader output | 
|---|
| Fetching results... |