Submission #752241

#TimeUsernameProblemLanguageResultExecution timeMemory
752241tch1cherinOlympic Bus (JOI20_ho_t4)C++17
37 / 100
1080 ms12780 KiB
#pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function)
#pragma GCC optimize ("Ofast")
#pragma GCC target ("avx2,bmi,bmi2,lzcnt,popcnt")

#include <x86intrin.h>
#include <unistd.h>

#include <vector>
#include <ext/pb_ds/assoc_container.hpp>
using namespace std;
using namespace __gnu_pbds;
using namespace __gnu_cxx;
#include <sys/mman.h>
#include <bits/stdc++.h>
#define dprintf(...) //fprintf(stderr, __VA_ARGS__)
#define ll long long

// https://judge.yosupo.jp/submission/100577
template<typename T>
struct align_alloc
{
        T* oldptr=0;
        T* ptr = 0;
        int ptr_sz = 0;
        typedef T value_type;
        T* allocate(int n) {
                if (n <= 1<<14) {
                        int sz=(n*sizeof(T) + 63)>>6<<6;
                        if (sz <= ptr_sz) return ptr;

                        if(oldptr)free(oldptr);
                        oldptr=ptr;
                        ptr_sz = sz;
                        return ptr = (T*) aligned_alloc(64, sz);
                }
                int sz = (n*sizeof(T) + (1<<21) - 1) >> 21 << 21;
                if (sz <= ptr_sz) return ptr;

                if(oldptr)free(oldptr);
                oldptr=ptr;
                ptr_sz = sz;
                ptr = (T*)aligned_alloc(1<<21, sz);
                madvise(ptr, sz, MADV_HUGEPAGE);
                return ptr;
        }
        void deallocate(T* p, int n) {
                if (p==oldptr and oldptr) {
                        free(oldptr);
                        oldptr=0;
                }
        } ~align_alloc() {
                if(ptr) free(ptr);
                ptr=0;
                ptr_sz=0;
                if(oldptr) free(oldptr);
                oldptr=0;
        }
        template<typename U> struct rebind { typedef align_alloc<U> other; };
};

typedef __m256i mvi;
typedef __m256 mvf;
typedef __m256d mvd;
typedef unsigned long long ull;

pair</*min*/ll, /*ind*/int> argmin(mvi v1, mvi v2)
{
        mvi mmask = _mm256_cmpgt_epi64(v1, v2);
        mvi v=(mvi)_mm256_blendv_pd((mvd)v1, (mvd)v2, (mvd)mmask);
        mvi x=v;
        mvi y = (mvi)_mm256_permute_pd((mvd)x, 0b01'01); // 0<=>1 2<=>3
        mvi mask = _mm256_cmpgt_epi64(x, y);
        x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask);
        y = _mm256_permute4x64_epi64(x, 0b01'00'11'10);
        mask = _mm256_cmpgt_epi64(x, y);
        x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask);

        ll mn = _mm256_extract_epi64(x, 0);
        mask = _mm256_cmpeq_epi64(x, v);
        int ind = _tzcnt_u32(_mm256_movemask_pd((mvd)mask));
        int whichmask = _mm256_movemask_pd((mvd)mmask);
        return make_pair(mn, (whichmask & (1<<ind)) == 0?ind:ind+4);
}


struct simd_priority_queue
{
        vector<ll, align_alloc<ll>> heap;
        vector<int, align_alloc<int>> keys;
        // gp_hash_table<int, int> key_to_pos;
        vector<int> key_to_pos;

        simd_priority_queue(int nkeys) {key_to_pos.resize(nkeys);}
        int sz = 0;

        int par(int ind) { return (ind>>3)-1; }
        int ch(int ind, int c) { return (ind+1) * 8 + c; }

        void siftup(int ind, int key, ll val) {
                while ((ind & ~0x7) != 0 and heap[par(ind)] > val) {
                        heap[ind] = heap[par(ind)];
                        keys[ind] = keys[par(ind)];
                        key_to_pos[keys[ind]] = ind;
                        ind=par(ind);
                }
                heap[ind]=val;
                keys[ind]=key;
                key_to_pos[key] = ind;
        }
        void siftdown(int ind, int key, ll val) {
                while(1) {
                        dprintf("siftdown ind = %d key = %d val = %lld\n", ind, key, val);
                        if (__builtin_expect(ch(ind, 7) >= sz, 0)) {
                                int smallest = ch(ind, 0);
                                if (smallest >= sz) break;

                                for (int c = smallest+1; c < sz; c++)
                                        smallest += (heap[smallest] > heap[c]) * (c - smallest);

                                if (heap[smallest] >= val) break;

                                heap[ind] = heap[smallest];
                                keys[ind] = keys[smallest];
                                key_to_pos[keys[ind]] = ind;
                                ind = smallest;
                                break;
                        }
                        auto [mn, mind] = argmin(_mm256_load_si256((mvi*)&heap[ch(ind,0)]), _mm256_load_si256((mvi*)&heap[ch(ind,4)])); // loads 8 vals

                        if (mn >= val) break;
                        int smallest = ch(ind, mind);
                        heap[ind] = heap[smallest];
                        keys[ind] = keys[smallest];
                        key_to_pos[keys[ind]] = ind;
                        ind = smallest;
                }
                heap[ind]=val;
                keys[ind]=key;
                key_to_pos[key] = ind;
        }

        pair<int,ll> top() {
                if(sz==0)return make_pair(-999,-999);
                if(sz<8) {
                        int smallest=0;
                        for (int c=1;c<sz;c++)
                                smallest+=(heap[smallest]>heap[c])*(c-smallest);
                        return make_pair(keys[smallest], heap[smallest]);
                }
                auto [mn, amn] = argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4]));
                return make_pair(keys[amn], mn);
        }
        int topind() {
                if(sz==0)return -999;
                if(sz<8) {
                        int smallest=0;
                        for (int c=1;c<sz;c++)
                                smallest+=(heap[smallest]>heap[c])*(c-smallest);
                        return smallest;
                }
                return argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4])).second;
        }

        void decreasekey(int key, int nkey, ll nval) {
                int pos = key_to_pos[key];
                // key_to_pos.erase(key);
                siftup(pos, nkey, nval);
        }
        void insert(int key, ll val) {
                sz++;
                heap.resize(sz);
                keys.resize(sz);
                siftup(sz-1, key, val);
        }
        void extract() {
                int ti = topind();
                // key_to_pos.erase(keys[ti]);
                sz--;
                if(sz!=0 and ti != sz) siftdown(ti, keys.back(), heap.back());
                heap.resize(sz);
                keys.resize(sz);
        }
        void replace(int key, ll val) {
                siftdown(topind(), key, val);
        }
};

struct edge {
  int from, to, weight, cost, id;

  edge() {}

  edge(int _from, int _to, int _weight, int _cost, int _id) : from(_from), to(_to), weight(_weight), cost(_cost), id(_id) {}
};

struct node {
  int distance, parent;

  node() {
    distance = INT_MAX;
    parent = -1;
  }

  node(int _distance, int _parent) : distance(_distance), parent(_parent) {}
};

struct result {
  int edge;
  vector<int> distances; 
};

vector<node> dijkstra(vector<vector<edge>> graph, int start) {
  int n = (int)graph.size();
  vector<node> answer(n);
  simd_priority_queue q(n);
  q.insert(start, 0);
  answer[start].distance = 0;
  while (q.sz) {
    auto [u, d] = q.top();
    q.extract();
    if (answer[u].distance > d) {
      continue;
    }
    for (auto [from, to, weight, cost, id] : graph[u]) {
      if (answer[from].distance + weight < answer[to].distance) {
        answer[to] = node(answer[from].distance + weight, id);
        q.insert(to, answer[to].distance);
      }
    }
  }
  return answer;
}

vector<vector<edge>> transpose(vector<vector<edge>> graph) {
  int n = (int)graph.size();
  vector<vector<edge>> new_graph(n);
  for (int u = 0; u < n; u++) {
    for (auto [from, to, weight, cost, id] : graph[u]) {
      new_graph[to].emplace_back(to, from, weight, cost, id);
    }
  }
  return new_graph;
}

vector<vector<int>> find_shortest_paths_without_each_edge(vector<vector<edge>> graph, int start) {
  int n = (int)graph.size();
  int m = 0;
  for (int u = 0; u < n; u++) {
    for (auto [from, to, weight, cost, id] : graph[u]) {
      m = max(m, 1 + id);
    }
  }
  vector<node> result = dijkstra(graph, start);
  vector<int> dist(n);
  for (int i = 0; i < n; i++) {
    dist[i] = result[i].distance;
  }
  vector<vector<int>> answer(m);
  for (auto [distance, parent] : result) {
    if (parent != -1) {
      vector<vector<edge>> new_graph = graph;
      for (int u = 0; u < n; u++) {
        for (int i = 0; i < (int)new_graph[u].size(); i++) {
          auto [from, to, weight, cost, id] = new_graph[u][i];
          if (id == parent) {
            new_graph[u].erase(new_graph[u].begin() + i);
            break;
          }  
        }
      }
      vector<node> new_result = dijkstra(new_graph, start);
      answer[parent] = vector<int>(n);
      for (int i = 0; i < n; i++) {
        answer[parent][i] = new_result[i].distance;  
      }
    }
  }
  answer.push_back(dist);
  return answer;
}

void solve() {
  int n, m;
  cin >> n >> m;
  vector<vector<edge>> graph(n);
  for (int i = 0; i < m; i++) {
    int from, to, weight, cost;
    cin >> from >> to >> weight >> cost;
    from--, to--;
    graph[from].emplace_back(from, to, weight, cost, i);
  }
  vector<vector<edge>> rev_graph = transpose(graph);
  vector<vector<int>> result_a = find_shortest_paths_without_each_edge(graph, 0);
  vector<vector<int>> result_b = find_shortest_paths_without_each_edge(graph, n - 1);
  vector<vector<int>> rev_result_a = find_shortest_paths_without_each_edge(rev_graph, 0);
  vector<vector<int>> rev_result_b = find_shortest_paths_without_each_edge(rev_graph, n - 1);
  auto get = [](vector<vector<int>>& x, int i, int j) {
    if (x[i].empty()) {
      return x.back()[j];
    } else {
      return x[i][j];
    }
  };
  int ans = INT_MAX;
  if (result_a.back()[n - 1] != INT_MAX && result_b.back()[0] != INT_MAX) {
    ans = result_a.back()[n - 1] + result_b.back()[0];
  }
  for (int u = 0; u < n; u++) {
    for (auto [from, to, weight, cost, id] : graph[u]) {
      int AB = INT_MAX, BA = INT_MAX;
      AB = min(AB, get(result_a, id, n - 1));
      if (get(result_a, id, to) != INT_MAX && get(rev_result_b, id, from) != INT_MAX) {
        AB = min(AB, get(result_a, id, to) + get(rev_result_b, id, from) + weight);
      }
      BA = min(BA, get(result_b, id, 0));
      if (get(result_b, id, to) != INT_MAX && get(rev_result_a, id, from) != INT_MAX) {
        BA = min(BA, get(result_b, id, to) + get(rev_result_a, id, from) + weight); 
      }
      if (AB == INT_MAX || BA == INT_MAX) {
        continue;
      }
      ans = min(ans, AB + BA + cost);
    }
  }
  cout << (ans == INT_MAX ? -1 : ans) << "\n";
}

int main() {
  ios::sync_with_stdio(false);
  cin.tie(nullptr);
  solve();
}

Compilation message (stderr)

ho_t4.cpp:1: warning: ignoring '#pragma clang attribute' [-Wunknown-pragmas]
    1 | #pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function)
      |
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...