#pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function)
#pragma GCC optimize ("Ofast")
#pragma GCC target ("avx2,bmi,bmi2,lzcnt,popcnt")
#include <x86intrin.h>
#include <unistd.h>
#include <vector>
#include <ext/pb_ds/assoc_container.hpp>
using namespace std;
using namespace __gnu_pbds;
using namespace __gnu_cxx;
#include <sys/mman.h>
#include <bits/stdc++.h>
#define dprintf(...) //fprintf(stderr, __VA_ARGS__)
#define ll long long
// https://judge.yosupo.jp/submission/100577
template<typename T>
struct align_alloc
{
T* oldptr=0;
T* ptr = 0;
int ptr_sz = 0;
typedef T value_type;
T* allocate(int n) {
if (n <= 1<<14) {
int sz=(n*sizeof(T) + 63)>>6<<6;
if (sz <= ptr_sz) return ptr;
if(oldptr)free(oldptr);
oldptr=ptr;
ptr_sz = sz;
return ptr = (T*) aligned_alloc(64, sz);
}
int sz = (n*sizeof(T) + (1<<21) - 1) >> 21 << 21;
if (sz <= ptr_sz) return ptr;
if(oldptr)free(oldptr);
oldptr=ptr;
ptr_sz = sz;
ptr = (T*)aligned_alloc(1<<21, sz);
madvise(ptr, sz, MADV_HUGEPAGE);
return ptr;
}
void deallocate(T* p, int n) {
if (p==oldptr and oldptr) {
free(oldptr);
oldptr=0;
}
} ~align_alloc() {
if(ptr) free(ptr);
ptr=0;
ptr_sz=0;
if(oldptr) free(oldptr);
oldptr=0;
}
template<typename U> struct rebind { typedef align_alloc<U> other; };
};
typedef __m256i mvi;
typedef __m256 mvf;
typedef __m256d mvd;
typedef unsigned long long ull;
pair</*min*/ll, /*ind*/int> argmin(mvi v1, mvi v2)
{
mvi mmask = _mm256_cmpgt_epi64(v1, v2);
mvi v=(mvi)_mm256_blendv_pd((mvd)v1, (mvd)v2, (mvd)mmask);
mvi x=v;
mvi y = (mvi)_mm256_permute_pd((mvd)x, 0b01'01); // 0<=>1 2<=>3
mvi mask = _mm256_cmpgt_epi64(x, y);
x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask);
y = _mm256_permute4x64_epi64(x, 0b01'00'11'10);
mask = _mm256_cmpgt_epi64(x, y);
x = (mvi)_mm256_blendv_pd((mvd)x, (mvd)y, (mvd)mask);
ll mn = _mm256_extract_epi64(x, 0);
mask = _mm256_cmpeq_epi64(x, v);
int ind = _tzcnt_u32(_mm256_movemask_pd((mvd)mask));
int whichmask = _mm256_movemask_pd((mvd)mmask);
return make_pair(mn, (whichmask & (1<<ind)) == 0?ind:ind+4);
}
struct simd_priority_queue
{
vector<ll, align_alloc<ll>> heap;
vector<int, align_alloc<int>> keys;
// gp_hash_table<int, int> key_to_pos;
vector<int> key_to_pos;
simd_priority_queue(int nkeys) {key_to_pos.resize(nkeys);}
int sz = 0;
int par(int ind) { return (ind>>3)-1; }
int ch(int ind, int c) { return (ind+1) * 8 + c; }
void siftup(int ind, int key, ll val) {
while ((ind & ~0x7) != 0 and heap[par(ind)] > val) {
heap[ind] = heap[par(ind)];
keys[ind] = keys[par(ind)];
key_to_pos[keys[ind]] = ind;
ind=par(ind);
}
heap[ind]=val;
keys[ind]=key;
key_to_pos[key] = ind;
}
void siftdown(int ind, int key, ll val) {
while(1) {
dprintf("siftdown ind = %d key = %d val = %lld\n", ind, key, val);
if (__builtin_expect(ch(ind, 7) >= sz, 0)) {
int smallest = ch(ind, 0);
if (smallest >= sz) break;
for (int c = smallest+1; c < sz; c++)
smallest += (heap[smallest] > heap[c]) * (c - smallest);
if (heap[smallest] >= val) break;
heap[ind] = heap[smallest];
keys[ind] = keys[smallest];
key_to_pos[keys[ind]] = ind;
ind = smallest;
break;
}
auto [mn, mind] = argmin(_mm256_load_si256((mvi*)&heap[ch(ind,0)]), _mm256_load_si256((mvi*)&heap[ch(ind,4)])); // loads 8 vals
if (mn >= val) break;
int smallest = ch(ind, mind);
heap[ind] = heap[smallest];
keys[ind] = keys[smallest];
key_to_pos[keys[ind]] = ind;
ind = smallest;
}
heap[ind]=val;
keys[ind]=key;
key_to_pos[key] = ind;
}
pair<int,ll> top() {
if(sz==0)return make_pair(-999,-999);
if(sz<8) {
int smallest=0;
for (int c=1;c<sz;c++)
smallest+=(heap[smallest]>heap[c])*(c-smallest);
return make_pair(keys[smallest], heap[smallest]);
}
auto [mn, amn] = argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4]));
return make_pair(keys[amn], mn);
}
int topind() {
if(sz==0)return -999;
if(sz<8) {
int smallest=0;
for (int c=1;c<sz;c++)
smallest+=(heap[smallest]>heap[c])*(c-smallest);
return smallest;
}
return argmin(_mm256_load_si256((mvi*)&heap[0]), _mm256_load_si256((mvi*)&heap[4])).second;
}
void decreasekey(int key, int nkey, ll nval) {
int pos = key_to_pos[key];
// key_to_pos.erase(key);
siftup(pos, nkey, nval);
}
void insert(int key, ll val) {
sz++;
heap.resize(sz);
keys.resize(sz);
siftup(sz-1, key, val);
}
void extract() {
int ti = topind();
// key_to_pos.erase(keys[ti]);
sz--;
if(sz!=0 and ti != sz) siftdown(ti, keys.back(), heap.back());
heap.resize(sz);
keys.resize(sz);
}
void replace(int key, ll val) {
siftdown(topind(), key, val);
}
};
struct edge {
int from, to, weight, cost, id;
edge() {}
edge(int _from, int _to, int _weight, int _cost, int _id) : from(_from), to(_to), weight(_weight), cost(_cost), id(_id) {}
};
struct node {
int distance, parent;
node() {
distance = INT_MAX;
parent = -1;
}
node(int _distance, int _parent) : distance(_distance), parent(_parent) {}
};
struct result {
int edge;
vector<int> distances;
};
vector<node> dijkstra(vector<vector<edge>> graph, int start) {
int n = (int)graph.size();
vector<node> answer(n);
simd_priority_queue q(n);
q.insert(start, 0);
answer[start].distance = 0;
while (q.sz) {
auto [u, d] = q.top();
q.extract();
if (answer[u].distance > d) {
continue;
}
for (auto [from, to, weight, cost, id] : graph[u]) {
if (answer[from].distance + weight < answer[to].distance) {
answer[to] = node(answer[from].distance + weight, id);
q.insert(to, answer[to].distance);
}
}
}
return answer;
}
vector<vector<edge>> transpose(vector<vector<edge>> graph) {
int n = (int)graph.size();
vector<vector<edge>> new_graph(n);
for (int u = 0; u < n; u++) {
for (auto [from, to, weight, cost, id] : graph[u]) {
new_graph[to].emplace_back(to, from, weight, cost, id);
}
}
return new_graph;
}
vector<vector<int>> find_shortest_paths_without_each_edge(vector<vector<edge>> graph, int start) {
int n = (int)graph.size();
int m = 0;
for (int u = 0; u < n; u++) {
for (auto [from, to, weight, cost, id] : graph[u]) {
m = max(m, 1 + id);
}
}
vector<node> result = dijkstra(graph, start);
vector<int> dist(n);
for (int i = 0; i < n; i++) {
dist[i] = result[i].distance;
}
vector<vector<int>> answer(m);
for (auto [distance, parent] : result) {
if (parent != -1) {
vector<vector<edge>> new_graph = graph;
for (int u = 0; u < n; u++) {
for (int i = 0; i < (int)new_graph[u].size(); i++) {
auto [from, to, weight, cost, id] = new_graph[u][i];
if (id == parent) {
new_graph[u].erase(new_graph[u].begin() + i);
break;
}
}
}
vector<node> new_result = dijkstra(new_graph, start);
answer[parent] = vector<int>(n);
for (int i = 0; i < n; i++) {
answer[parent][i] = new_result[i].distance;
}
}
}
answer.push_back(dist);
return answer;
}
void solve() {
int n, m;
cin >> n >> m;
vector<vector<edge>> graph(n);
for (int i = 0; i < m; i++) {
int from, to, weight, cost;
cin >> from >> to >> weight >> cost;
from--, to--;
graph[from].emplace_back(from, to, weight, cost, i);
}
vector<vector<edge>> rev_graph = transpose(graph);
vector<vector<int>> result_a = find_shortest_paths_without_each_edge(graph, 0);
vector<vector<int>> result_b = find_shortest_paths_without_each_edge(graph, n - 1);
vector<vector<int>> rev_result_a = find_shortest_paths_without_each_edge(rev_graph, 0);
vector<vector<int>> rev_result_b = find_shortest_paths_without_each_edge(rev_graph, n - 1);
auto get = [](vector<vector<int>>& x, int i, int j) {
if (x[i].empty()) {
return x.back()[j];
} else {
return x[i][j];
}
};
int ans = INT_MAX;
if (result_a.back()[n - 1] != INT_MAX && result_b.back()[0] != INT_MAX) {
ans = result_a.back()[n - 1] + result_b.back()[0];
}
for (int u = 0; u < n; u++) {
for (auto [from, to, weight, cost, id] : graph[u]) {
int AB = INT_MAX, BA = INT_MAX;
AB = min(AB, get(result_a, id, n - 1));
if (get(result_a, id, to) != INT_MAX && get(rev_result_b, id, from) != INT_MAX) {
AB = min(AB, get(result_a, id, to) + get(rev_result_b, id, from) + weight);
}
BA = min(BA, get(result_b, id, 0));
if (get(result_b, id, to) != INT_MAX && get(rev_result_a, id, from) != INT_MAX) {
BA = min(BA, get(result_b, id, to) + get(rev_result_a, id, from) + weight);
}
if (AB == INT_MAX || BA == INT_MAX) {
continue;
}
ans = min(ans, AB + BA + cost);
}
}
cout << (ans == INT_MAX ? -1 : ans) << "\n";
}
int main() {
ios::sync_with_stdio(false);
cin.tie(nullptr);
solve();
}
Compilation message
ho_t4.cpp:1: warning: ignoring '#pragma clang attribute' [-Wunknown-pragmas]
1 | #pragma clang attribute push (__attribute__((target("avx2,bmi,bmi2,lzcnt,popcnt"))), apply_to=function)
|
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Correct |
37 ms |
1244 KB |
Output is correct |
2 |
Correct |
3 ms |
468 KB |
Output is correct |
3 |
Correct |
47 ms |
1284 KB |
Output is correct |
4 |
Correct |
51 ms |
1284 KB |
Output is correct |
5 |
Correct |
4 ms |
724 KB |
Output is correct |
6 |
Correct |
7 ms |
512 KB |
Output is correct |
7 |
Correct |
0 ms |
212 KB |
Output is correct |
8 |
Correct |
0 ms |
212 KB |
Output is correct |
9 |
Correct |
1 ms |
468 KB |
Output is correct |
10 |
Correct |
58 ms |
1308 KB |
Output is correct |
11 |
Correct |
59 ms |
1260 KB |
Output is correct |
12 |
Correct |
57 ms |
1236 KB |
Output is correct |
13 |
Correct |
17 ms |
852 KB |
Output is correct |
14 |
Correct |
33 ms |
1124 KB |
Output is correct |
15 |
Correct |
33 ms |
1100 KB |
Output is correct |
16 |
Correct |
34 ms |
1108 KB |
Output is correct |
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Correct |
590 ms |
12628 KB |
Output is correct |
2 |
Correct |
534 ms |
12780 KB |
Output is correct |
3 |
Correct |
575 ms |
12588 KB |
Output is correct |
4 |
Correct |
55 ms |
1508 KB |
Output is correct |
5 |
Correct |
34 ms |
1160 KB |
Output is correct |
6 |
Correct |
13 ms |
724 KB |
Output is correct |
7 |
Correct |
1 ms |
468 KB |
Output is correct |
8 |
Correct |
0 ms |
212 KB |
Output is correct |
9 |
Correct |
186 ms |
12496 KB |
Output is correct |
10 |
Correct |
160 ms |
12520 KB |
Output is correct |
11 |
Correct |
439 ms |
12404 KB |
Output is correct |
12 |
Correct |
355 ms |
12356 KB |
Output is correct |
13 |
Correct |
415 ms |
12468 KB |
Output is correct |
14 |
Correct |
391 ms |
12720 KB |
Output is correct |
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Correct |
47 ms |
1444 KB |
Output is correct |
2 |
Correct |
10 ms |
628 KB |
Output is correct |
3 |
Correct |
265 ms |
9956 KB |
Output is correct |
4 |
Correct |
6 ms |
596 KB |
Output is correct |
5 |
Correct |
309 ms |
12396 KB |
Output is correct |
6 |
Correct |
1 ms |
212 KB |
Output is correct |
7 |
Correct |
0 ms |
212 KB |
Output is correct |
8 |
Correct |
140 ms |
12476 KB |
Output is correct |
9 |
Correct |
133 ms |
12520 KB |
Output is correct |
10 |
Correct |
239 ms |
12280 KB |
Output is correct |
11 |
Correct |
241 ms |
12284 KB |
Output is correct |
12 |
Correct |
214 ms |
12624 KB |
Output is correct |
13 |
Correct |
1 ms |
212 KB |
Output is correct |
14 |
Correct |
1 ms |
212 KB |
Output is correct |
15 |
Correct |
1 ms |
212 KB |
Output is correct |
16 |
Correct |
1 ms |
212 KB |
Output is correct |
17 |
Correct |
0 ms |
212 KB |
Output is correct |
18 |
Correct |
0 ms |
212 KB |
Output is correct |
19 |
Correct |
219 ms |
12628 KB |
Output is correct |
20 |
Correct |
216 ms |
12476 KB |
Output is correct |
21 |
Correct |
240 ms |
12360 KB |
Output is correct |
22 |
Correct |
224 ms |
12308 KB |
Output is correct |
# |
Verdict |
Execution time |
Memory |
Grader output |
1 |
Correct |
37 ms |
1244 KB |
Output is correct |
2 |
Correct |
3 ms |
468 KB |
Output is correct |
3 |
Correct |
47 ms |
1284 KB |
Output is correct |
4 |
Correct |
51 ms |
1284 KB |
Output is correct |
5 |
Correct |
4 ms |
724 KB |
Output is correct |
6 |
Correct |
7 ms |
512 KB |
Output is correct |
7 |
Correct |
0 ms |
212 KB |
Output is correct |
8 |
Correct |
0 ms |
212 KB |
Output is correct |
9 |
Correct |
1 ms |
468 KB |
Output is correct |
10 |
Correct |
58 ms |
1308 KB |
Output is correct |
11 |
Correct |
59 ms |
1260 KB |
Output is correct |
12 |
Correct |
57 ms |
1236 KB |
Output is correct |
13 |
Correct |
17 ms |
852 KB |
Output is correct |
14 |
Correct |
33 ms |
1124 KB |
Output is correct |
15 |
Correct |
33 ms |
1100 KB |
Output is correct |
16 |
Correct |
34 ms |
1108 KB |
Output is correct |
17 |
Correct |
590 ms |
12628 KB |
Output is correct |
18 |
Correct |
534 ms |
12780 KB |
Output is correct |
19 |
Correct |
575 ms |
12588 KB |
Output is correct |
20 |
Correct |
55 ms |
1508 KB |
Output is correct |
21 |
Correct |
34 ms |
1160 KB |
Output is correct |
22 |
Correct |
13 ms |
724 KB |
Output is correct |
23 |
Correct |
1 ms |
468 KB |
Output is correct |
24 |
Correct |
0 ms |
212 KB |
Output is correct |
25 |
Correct |
186 ms |
12496 KB |
Output is correct |
26 |
Correct |
160 ms |
12520 KB |
Output is correct |
27 |
Correct |
439 ms |
12404 KB |
Output is correct |
28 |
Correct |
355 ms |
12356 KB |
Output is correct |
29 |
Correct |
415 ms |
12468 KB |
Output is correct |
30 |
Correct |
391 ms |
12720 KB |
Output is correct |
31 |
Correct |
47 ms |
1444 KB |
Output is correct |
32 |
Correct |
10 ms |
628 KB |
Output is correct |
33 |
Correct |
265 ms |
9956 KB |
Output is correct |
34 |
Correct |
6 ms |
596 KB |
Output is correct |
35 |
Correct |
309 ms |
12396 KB |
Output is correct |
36 |
Correct |
1 ms |
212 KB |
Output is correct |
37 |
Correct |
0 ms |
212 KB |
Output is correct |
38 |
Correct |
140 ms |
12476 KB |
Output is correct |
39 |
Correct |
133 ms |
12520 KB |
Output is correct |
40 |
Correct |
239 ms |
12280 KB |
Output is correct |
41 |
Correct |
241 ms |
12284 KB |
Output is correct |
42 |
Correct |
214 ms |
12624 KB |
Output is correct |
43 |
Correct |
1 ms |
212 KB |
Output is correct |
44 |
Correct |
1 ms |
212 KB |
Output is correct |
45 |
Correct |
1 ms |
212 KB |
Output is correct |
46 |
Correct |
1 ms |
212 KB |
Output is correct |
47 |
Correct |
0 ms |
212 KB |
Output is correct |
48 |
Correct |
0 ms |
212 KB |
Output is correct |
49 |
Correct |
219 ms |
12628 KB |
Output is correct |
50 |
Correct |
216 ms |
12476 KB |
Output is correct |
51 |
Correct |
240 ms |
12360 KB |
Output is correct |
52 |
Correct |
224 ms |
12308 KB |
Output is correct |
53 |
Correct |
679 ms |
12412 KB |
Output is correct |
54 |
Correct |
663 ms |
12440 KB |
Output is correct |
55 |
Correct |
608 ms |
12636 KB |
Output is correct |
56 |
Correct |
53 ms |
1304 KB |
Output is correct |
57 |
Correct |
53 ms |
1312 KB |
Output is correct |
58 |
Correct |
530 ms |
10088 KB |
Output is correct |
59 |
Correct |
529 ms |
10140 KB |
Output is correct |
60 |
Correct |
573 ms |
10236 KB |
Output is correct |
61 |
Correct |
479 ms |
10132 KB |
Output is correct |
62 |
Correct |
478 ms |
10188 KB |
Output is correct |
63 |
Correct |
539 ms |
10048 KB |
Output is correct |
64 |
Execution timed out |
1080 ms |
6300 KB |
Time limit exceeded |
65 |
Halted |
0 ms |
0 KB |
- |