Submission #752241

#TimeUsernameProblemLanguageResultExecution timeMemory
752241tch1cherinOlympic Bus (JOI20_ho_t4)C++17
37 / 100
1080 ms12780 KiB
#pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function) #pragma GCC optimize ("Ofast") #pragma GCC target ("avx2,bmi,bmi2,lzcnt,popcnt") #include <x86intrin.h> #include <unistd.h> #include <vector> #include <ext/pb_ds/assoc_container.hpp> using namespace std; using namespace __gnu_pbds; using namespace __gnu_cxx; #include <sys/mman.h> #include <bits/stdc++.h> #define dprintf(...) //fprintf(stderr, __VA_ARGS__) #define ll long long // https://judge.yosupo.jp/submission/100577 template<typename T> struct align_alloc { T* oldptr=0; T* ptr = 0; int ptr_sz = 0; typedef T value_type; T* allocate(int n) { if (n <= 1<<14) { int sz=(n*sizeof(T) + 63)>>6<<6; if (sz <= ptr_sz) return ptr; if(oldptr)free(oldptr); oldptr=ptr; ptr_sz = sz; return ptr = (T*) aligned_alloc(64, sz); } int sz = (n*sizeof(T) + (1<<21) - 1) >> 21 << 21; if (sz <= ptr_sz) return ptr; if(oldptr)free(oldptr); oldptr=ptr; ptr_sz = sz; ptr = (T*)aligned_alloc(1<<21, sz); madvise(ptr, sz, MADV_HUGEPAGE); return ptr; } void deallocate(T* p, int n) { if (p==oldptr and oldptr) { free(oldptr); oldptr=0; } } ~align_alloc() { if(ptr) free(ptr); ptr=0; ptr_sz=0; if(oldptr) free(oldptr); oldptr=0; } template<typename U> struct rebind { typedef align_alloc<U> other; }; }; typedef __m256i mvi; typedef __m256 mvf; typedef __m256d mvd; typedef unsigned long long ull; pair</*min*/ll, /*ind*/int> argmin(mvi v1, mvi v2) { mvi mmask = _mm256_cmpgt_epi64(v1, v2); mvi v=(mvi)_mm256_blendv_pd((mvd)v1, (mvd)v2, (mvd)mmask); mvi x=v; mvi y = (mvi)_mm256_permute_pd((mvd)x, 0b01'01); // 0<=>1 2<=>3 mvi mask = _mm256_cmpgt_epi64(x, y); x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask); y = _mm256_permute4x64_epi64(x, 0b01'00'11'10); mask = _mm256_cmpgt_epi64(x, y); x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask); ll mn = _mm256_extract_epi64(x, 0); mask = _mm256_cmpeq_epi64(x, v); int ind = _tzcnt_u32(_mm256_movemask_pd((mvd)mask)); int whichmask = _mm256_movemask_pd((mvd)mmask); return make_pair(mn, (whichmask & (1<<ind)) == 0?ind:ind+4); } struct simd_priority_queue { vector<ll, align_alloc<ll>> heap; vector<int, align_alloc<int>> keys; // gp_hash_table<int, int> key_to_pos; vector<int> key_to_pos; simd_priority_queue(int nkeys) {key_to_pos.resize(nkeys);} int sz = 0; int par(int ind) { return (ind>>3)-1; } int ch(int ind, int c) { return (ind+1) * 8 + c; } void siftup(int ind, int key, ll val) { while ((ind & ~0x7) != 0 and heap[par(ind)] > val) { heap[ind] = heap[par(ind)]; keys[ind] = keys[par(ind)]; key_to_pos[keys[ind]] = ind; ind=par(ind); } heap[ind]=val; keys[ind]=key; key_to_pos[key] = ind; } void siftdown(int ind, int key, ll val) { while(1) { dprintf("siftdown ind = %d key = %d val = %lld\n", ind, key, val); if (__builtin_expect(ch(ind, 7) >= sz, 0)) { int smallest = ch(ind, 0); if (smallest >= sz) break; for (int c = smallest+1; c < sz; c++) smallest += (heap[smallest] > heap[c]) * (c - smallest); if (heap[smallest] >= val) break; heap[ind] = heap[smallest]; keys[ind] = keys[smallest]; key_to_pos[keys[ind]] = ind; ind = smallest; break; } auto [mn, mind] = argmin(_mm256_load_si256((mvi*)&heap[ch(ind,0)]), _mm256_load_si256((mvi*)&heap[ch(ind,4)])); // loads 8 vals if (mn >= val) break; int smallest = ch(ind, mind); heap[ind] = heap[smallest]; keys[ind] = keys[smallest]; key_to_pos[keys[ind]] = ind; ind = smallest; } heap[ind]=val; keys[ind]=key; key_to_pos[key] = ind; } pair<int,ll> top() { if(sz==0)return make_pair(-999,-999); if(sz<8) { int smallest=0; for (int c=1;c<sz;c++) smallest+=(heap[smallest]>heap[c])*(c-smallest); return make_pair(keys[smallest], heap[smallest]); } auto [mn, amn] = argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4])); return make_pair(keys[amn], mn); } int topind() { if(sz==0)return -999; if(sz<8) { int smallest=0; for (int c=1;c<sz;c++) smallest+=(heap[smallest]>heap[c])*(c-smallest); return smallest; } return argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4])).second; } void decreasekey(int key, int nkey, ll nval) { int pos = key_to_pos[key]; // key_to_pos.erase(key); siftup(pos, nkey, nval); } void insert(int key, ll val) { sz++; heap.resize(sz); keys.resize(sz); siftup(sz-1, key, val); } void extract() { int ti = topind(); // key_to_pos.erase(keys[ti]); sz--; if(sz!=0 and ti != sz) siftdown(ti, keys.back(), heap.back()); heap.resize(sz); keys.resize(sz); } void replace(int key, ll val) { siftdown(topind(), key, val); } }; struct edge { int from, to, weight, cost, id; edge() {} edge(int _from, int _to, int _weight, int _cost, int _id) : from(_from), to(_to), weight(_weight), cost(_cost), id(_id) {} }; struct node { int distance, parent; node() { distance = INT_MAX; parent = -1; } node(int _distance, int _parent) : distance(_distance), parent(_parent) {} }; struct result { int edge; vector<int> distances; }; vector<node> dijkstra(vector<vector<edge>> graph, int start) { int n = (int)graph.size(); vector<node> answer(n); simd_priority_queue q(n); q.insert(start, 0); answer[start].distance = 0; while (q.sz) { auto [u, d] = q.top(); q.extract(); if (answer[u].distance > d) { continue; } for (auto [from, to, weight, cost, id] : graph[u]) { if (answer[from].distance + weight < answer[to].distance) { answer[to] = node(answer[from].distance + weight, id); q.insert(to, answer[to].distance); } } } return answer; } vector<vector<edge>> transpose(vector<vector<edge>> graph) { int n = (int)graph.size(); vector<vector<edge>> new_graph(n); for (int u = 0; u < n; u++) { for (auto [from, to, weight, cost, id] : graph[u]) { new_graph[to].emplace_back(to, from, weight, cost, id); } } return new_graph; } vector<vector<int>> find_shortest_paths_without_each_edge(vector<vector<edge>> graph, int start) { int n = (int)graph.size(); int m = 0; for (int u = 0; u < n; u++) { for (auto [from, to, weight, cost, id] : graph[u]) { m = max(m, 1 + id); } } vector<node> result = dijkstra(graph, start); vector<int> dist(n); for (int i = 0; i < n; i++) { dist[i] = result[i].distance; } vector<vector<int>> answer(m); for (auto [distance, parent] : result) { if (parent != -1) { vector<vector<edge>> new_graph = graph; for (int u = 0; u < n; u++) { for (int i = 0; i < (int)new_graph[u].size(); i++) { auto [from, to, weight, cost, id] = new_graph[u][i]; if (id == parent) { new_graph[u].erase(new_graph[u].begin() + i); break; } } } vector<node> new_result = dijkstra(new_graph, start); answer[parent] = vector<int>(n); for (int i = 0; i < n; i++) { answer[parent][i] = new_result[i].distance; } } } answer.push_back(dist); return answer; } void solve() { int n, m; cin >> n >> m; vector<vector<edge>> graph(n); for (int i = 0; i < m; i++) { int from, to, weight, cost; cin >> from >> to >> weight >> cost; from--, to--; graph[from].emplace_back(from, to, weight, cost, i); } vector<vector<edge>> rev_graph = transpose(graph); vector<vector<int>> result_a = find_shortest_paths_without_each_edge(graph, 0); vector<vector<int>> result_b = find_shortest_paths_without_each_edge(graph, n - 1); vector<vector<int>> rev_result_a = find_shortest_paths_without_each_edge(rev_graph, 0); vector<vector<int>> rev_result_b = find_shortest_paths_without_each_edge(rev_graph, n - 1); auto get = [](vector<vector<int>>& x, int i, int j) { if (x[i].empty()) { return x.back()[j]; } else { return x[i][j]; } }; int ans = INT_MAX; if (result_a.back()[n - 1] != INT_MAX && result_b.back()[0] != INT_MAX) { ans = result_a.back()[n - 1] + result_b.back()[0]; } for (int u = 0; u < n; u++) { for (auto [from, to, weight, cost, id] : graph[u]) { int AB = INT_MAX, BA = INT_MAX; AB = min(AB, get(result_a, id, n - 1)); if (get(result_a, id, to) != INT_MAX && get(rev_result_b, id, from) != INT_MAX) { AB = min(AB, get(result_a, id, to) + get(rev_result_b, id, from) + weight); } BA = min(BA, get(result_b, id, 0)); if (get(result_b, id, to) != INT_MAX && get(rev_result_a, id, from) != INT_MAX) { BA = min(BA, get(result_b, id, to) + get(rev_result_a, id, from) + weight); } if (AB == INT_MAX || BA == INT_MAX) { continue; } ans = min(ans, AB + BA + cost); } } cout << (ans == INT_MAX ? -1 : ans) << "\n"; } int main() { ios::sync_with_stdio(false); cin.tie(nullptr); solve(); }

Compilation message (stderr)

ho_t4.cpp:1: warning: ignoring '#pragma clang attribute' [-Wunknown-pragmas]
    1 | #pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function)
      |
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...
#Verdict Execution timeMemoryGrader output
Fetching results...