#include <vector>
#include <immintrin.h>
using namespace std;
#pragma GCC target "avx2"
#pragma GCC optimize "Ofast"
typedef __m256i m256;
typedef vector<int> vi;
#define SZ(v) ((int) (v).size())
const int MN = 2e5;
int n, q;
int arr[MN], cap[MN];
vi distribute_candies(vi cIn, vi lIn, vi rIn, vi vIn){
n = SZ(cIn);
q = SZ(lIn);
for(int i = 0; i < n; i++) cap[i] = cIn[i];
for(int i = 0; i < q; i++){
int l = lIn[i], r = rIn[i]+1, v = vIn[i];
int lb = (l+7)/8, rb = r/8;
m256 vVec = _mm256_set1_epi32(v);
m256 zeroVec = _mm256_setzero_si256();
if(r-l < 16){
for(int j = l; j < r; j++) arr[j] = max(0, min(cap[j], arr[j] + v));
continue;
}
for(int j = l; j < lb*8; j++) arr[j] = max(0, min(cap[j], arr[j] + v));
for(int j = rb*8; j < r; j++) arr[j] = max(0, min(cap[j], arr[j] + v));
if(v < 0){
for(int j = lb; j < rb; j++){
m256 aVec = _mm256_load_si256((m256*) arr + j);
m256 res = _mm256_max_epi32(zeroVec, _mm256_add_epi32(aVec, vVec));
_mm256_store_si256((m256*) arr + j, res);
}
}
else {
for(int j = lb; j < rb; j++){
m256 aVec = _mm256_load_si256((m256*) arr + j);
m256 cVec = _mm256_load_si256((m256*) cap + j);
m256 res = _mm256_min_epi32(cVec, _mm256_add_epi32(aVec, vVec));
_mm256_store_si256((m256*) arr + j, res);
}
}
}
vi res(n);
for(int i = 0; i < n; i++) res[i] = arr[i];
return res;
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
212 KB |
Output is correct |
2 |
Correct |
0 ms |
212 KB |
Output is correct |
3 |
Correct |
1 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
2349 ms |
8900 KB |
Output is correct |
2 |
Correct |
2295 ms |
8912 KB |
Output is correct |
3 |
Correct |
2285 ms |
8896 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
212 KB |
Output is correct |
2 |
Correct |
70 ms |
4960 KB |
Output is correct |
3 |
Correct |
65 ms |
5364 KB |
Output is correct |
4 |
Correct |
1979 ms |
8892 KB |
Output is correct |
5 |
Correct |
1951 ms |
8892 KB |
Output is correct |
6 |
Correct |
1935 ms |
8892 KB |
Output is correct |
7 |
Correct |
1966 ms |
8932 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
212 KB |
Output is correct |
2 |
Correct |
0 ms |
212 KB |
Output is correct |
3 |
Correct |
99 ms |
5036 KB |
Output is correct |
4 |
Correct |
104 ms |
4312 KB |
Output is correct |
5 |
Execution timed out |
5068 ms |
8036 KB |
Time limit exceeded |
6 |
Halted |
0 ms |
0 KB |
- |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
0 ms |
212 KB |
Output is correct |
2 |
Correct |
0 ms |
212 KB |
Output is correct |
3 |
Correct |
1 ms |
340 KB |
Output is correct |
4 |
Correct |
1 ms |
340 KB |
Output is correct |
5 |
Correct |
1 ms |
340 KB |
Output is correct |
6 |
Correct |
2349 ms |
8900 KB |
Output is correct |
7 |
Correct |
2295 ms |
8912 KB |
Output is correct |
8 |
Correct |
2285 ms |
8896 KB |
Output is correct |
9 |
Correct |
0 ms |
212 KB |
Output is correct |
10 |
Correct |
70 ms |
4960 KB |
Output is correct |
11 |
Correct |
65 ms |
5364 KB |
Output is correct |
12 |
Correct |
1979 ms |
8892 KB |
Output is correct |
13 |
Correct |
1951 ms |
8892 KB |
Output is correct |
14 |
Correct |
1935 ms |
8892 KB |
Output is correct |
15 |
Correct |
1966 ms |
8932 KB |
Output is correct |
16 |
Correct |
0 ms |
212 KB |
Output is correct |
17 |
Correct |
0 ms |
212 KB |
Output is correct |
18 |
Correct |
99 ms |
5036 KB |
Output is correct |
19 |
Correct |
104 ms |
4312 KB |
Output is correct |
20 |
Execution timed out |
5068 ms |
8036 KB |
Time limit exceeded |
21 |
Halted |
0 ms |
0 KB |
- |