/******************************
Author: jhnah917(Justice_Hui)
g++ -std=c++17 -DLOCAL
******************************/
#pragma GCC optimize ("O3")
#pragma GCC optimize ("unroll-loops")
#pragma GCC target ("avx,avx2,fma")
#include <bits/stdc++.h>
#include <immintrin.h>
using namespace std;
using uint = unsigned;
using ll = long long;
using ull = unsigned long long;
constexpr int SZ = 1 << 17;
constexpr int WORD = 256; // AVX
constexpr int INT_PER_WORD = WORD / sizeof(int) / 8;
int N, Q;
alignas(256) int _S[SZ], _T[SZ], _Sum[SZ];
__m256i S[SZ/INT_PER_WORD], T[SZ/INT_PER_WORD], Sum[SZ/INT_PER_WORD], Flags;
__m256i _mm256_cmpgeq_epi32(__m256i a, __m256i b){
__m256i ret = _mm256_cmpeq_epi32(a, b);
ret = _mm256_or_si256(ret, _mm256_cmpgt_epi32(a, b));
return ret;
}
__m256i _mm256_cmpgeq_epi32(__m256i a, unsigned b){
return _mm256_cmpgeq_epi32(a, _mm256_set1_epi32(b));
}
int main(){
ios_base::sync_with_stdio(false); cin.tie(nullptr);
cin >> N >> Q;
for(int i=0; i<N; i++) cin >> _S[i] >> _T[i];
int pv = 0;
for(int i=0; i<N; i+=INT_PER_WORD, pv++){
S[pv] = _mm256_load_si256((const __m256i*)(_S+i));
T[pv] = _mm256_load_si256((const __m256i*)(_T+i));
Sum[pv] = _mm256_add_epi32(S[pv], T[pv]);
}
for(int i=0; i<Q; i++){
int x, y, z, ans = 0; cin >> x >> y >> z;
__m256i cnt = _mm256_set1_epi32(0);
for(int j=0; j<pv; j++){
__m256i X = _mm256_cmpgeq_epi32(S[j], x);
__m256i Y = _mm256_cmpgeq_epi32(T[j], y);
__m256i Z = _mm256_cmpgeq_epi32(Sum[j], z);
Flags = _mm256_and_si256(_mm256_and_si256(X, _mm256_and_si256(Y, Z)), _mm256_set1_epi32(1));
cnt = _mm256_add_epi32(cnt, Flags);
}
int tmp[8]; _mm256_store_si256((__m256i*)tmp, cnt);
for(int j=0; j<8; j++) ans += tmp[j];
cout << ans << "\n";
}
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
364 KB |
Output is correct |
2 |
Correct |
0 ms |
364 KB |
Output is correct |
3 |
Correct |
0 ms |
364 KB |
Output is correct |
4 |
Correct |
1 ms |
364 KB |
Output is correct |
5 |
Correct |
1 ms |
364 KB |
Output is correct |
6 |
Correct |
1 ms |
364 KB |
Output is correct |
7 |
Correct |
7 ms |
492 KB |
Output is correct |
8 |
Correct |
6 ms |
492 KB |
Output is correct |
9 |
Correct |
6 ms |
492 KB |
Output is correct |
10 |
Correct |
6 ms |
492 KB |
Output is correct |
11 |
Correct |
6 ms |
492 KB |
Output is correct |
12 |
Correct |
6 ms |
492 KB |
Output is correct |
13 |
Correct |
6 ms |
492 KB |
Output is correct |
14 |
Correct |
6 ms |
492 KB |
Output is correct |
15 |
Correct |
6 ms |
492 KB |
Output is correct |
16 |
Correct |
6 ms |
492 KB |
Output is correct |
17 |
Correct |
6 ms |
492 KB |
Output is correct |
18 |
Correct |
6 ms |
492 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Execution timed out |
3083 ms |
2936 KB |
Time limit exceeded |
2 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Execution timed out |
3083 ms |
2936 KB |
Time limit exceeded |
2 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
364 KB |
Output is correct |
2 |
Correct |
0 ms |
364 KB |
Output is correct |
3 |
Correct |
0 ms |
364 KB |
Output is correct |
4 |
Correct |
1 ms |
364 KB |
Output is correct |
5 |
Correct |
1 ms |
364 KB |
Output is correct |
6 |
Correct |
1 ms |
364 KB |
Output is correct |
7 |
Correct |
7 ms |
492 KB |
Output is correct |
8 |
Correct |
6 ms |
492 KB |
Output is correct |
9 |
Correct |
6 ms |
492 KB |
Output is correct |
10 |
Correct |
6 ms |
492 KB |
Output is correct |
11 |
Correct |
6 ms |
492 KB |
Output is correct |
12 |
Correct |
6 ms |
492 KB |
Output is correct |
13 |
Correct |
6 ms |
492 KB |
Output is correct |
14 |
Correct |
6 ms |
492 KB |
Output is correct |
15 |
Correct |
6 ms |
492 KB |
Output is correct |
16 |
Correct |
6 ms |
492 KB |
Output is correct |
17 |
Correct |
6 ms |
492 KB |
Output is correct |
18 |
Correct |
6 ms |
492 KB |
Output is correct |
19 |
Execution timed out |
3083 ms |
2936 KB |
Time limit exceeded |
20 |
Halted |
0 ms |
0 KB |
- |