#include <bits/stdc++.h>
#define Loop(x,l,r) for (ll x = (l); x < (r); ++x)
#define LoopR(x,l,r) for (ll x = (r)-1; x >= (l); --x)
typedef long long ll;
typedef std::pair<int, int> pii;
typedef std::pair<ll , ll > pll;
using namespace std;
const int N = 100'010;
int a[N], b[N];
int n, q;
__attribute__((naked))
int cnt(int x, int y, int z) {
asm(
" movl %edi, %r9d\n"
" movslq n(%rip), %rdi\n"
" testq %rdi, %rdi\n"
" jle .myL9\n"
" leaq -1(%rdi), %rax\n"
" movl %esi, %r10d\n"
" movl %edx, %r11d\n"
" cmpq $6, %rax\n"
" jbe .myL10\n"
" movq %rdi, %r8\n"
" vmovd %edx, %xmm5\n"
" vmovd %esi, %xmm4\n"
" xorl %eax, %eax\n"
" shrq $3, %r8\n"
" vmovd %r9d, %xmm6\n"
" vpbroadcastd %xmm4, %ymm4\n"
" salq $5, %r8\n"
" vpbroadcastd %xmm6, %ymm6\n"
" vpbroadcastd %xmm5, %ymm5\n"
" leaq -32(%r8), %rdx\n"
" vpxor %xmm3, %xmm3, %xmm3\n"
" leaq a(%rip), %rcx\n"
" shrq $5, %rdx\n"
" leaq b(%rip), %rsi\n"
" addq $1, %rdx\n"
" andl $3, %edx\n"
" je .myL4\n"
" cmpq $1, %rdx\n"
" je .myL21\n"
" cmpq $2, %rdx\n"
" je .myL22\n"
" vmovdqa (%rcx), %ymm0\n"
" vmovdqa (%rsi), %ymm1\n"
" movl $32, %eax\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
".myL22:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rsi,%rax), %ymm1\n"
" addq $32, %rax\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
".myL21:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rsi,%rax), %ymm1\n"
" addq $32, %rax\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
" cmpq %r8, %rax\n"
" je .myL27\n"
".myL4:\n"
" vmovdqa (%rcx,%rax), %ymm0\n"
" vmovdqa (%rsi,%rax), %ymm1\n"
" leaq 32(%rax), %rdx\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vmovdqa 32(%rsi,%rax), %ymm1\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
" vmovdqa 32(%rcx,%rax), %ymm0\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vmovdqa 64(%rsi,%rax), %ymm1\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
" vmovdqa 64(%rcx,%rax), %ymm0\n"
" leaq 96(%rdx), %rax\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vmovdqa 64(%rsi,%rdx), %ymm1\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
" vmovdqa 64(%rcx,%rdx), %ymm0\n"
" vpaddd %ymm1, %ymm0, %ymm2\n"
" vpminsd %ymm1, %ymm4, %ymm1\n"
" vpcmpgtd %ymm0, %ymm6, %ymm0\n"
" vpcmpeqd %ymm1, %ymm4, %ymm1\n"
" vpandn %ymm1, %ymm0, %ymm0\n"
" vpcmpgtd %ymm2, %ymm5, %ymm1\n"
" vpandn %ymm0, %ymm1, %ymm0\n"
" vpsubd %ymm0, %ymm3, %ymm3\n"
" cmpq %r8, %rax\n"
" jne .myL4\n"
".myL27:\n"
" vmovdqa %xmm3, %xmm0\n"
" vextracti128 $0x1, %ymm3, %xmm3\n"
" movq %rdi, %rax\n"
" vpaddd %xmm3, %xmm0, %xmm0\n"
" andq $-8, %rax\n"
" vpsrldq $8, %xmm0, %xmm1\n"
" movq %rax, %rdx\n"
" vpaddd %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddd %xmm1, %xmm0, %xmm0\n"
" vmovd %xmm0, %r8d\n"
" cmpq %rdi, %rax\n"
" je .myL33\n"
" vzeroupper\n"
".myL3:\n"
" pushq %rbp\n"
" .cfi_def_cfa_offset 16\n"
" .cfi_offset 6, -16\n"
" movq %rsp, %rbp\n"
" .cfi_def_cfa_register 6\n"
" pushq %r13\n"
" pushq %r12\n"
" pushq %rbx\n"
" .cfi_offset 13, -24\n"
" .cfi_offset 12, -32\n"
" .cfi_offset 3, -40\n"
" movq %rdi, %rbx\n"
" subq %rax, %rbx\n"
" leaq -1(%rbx), %r12\n"
" cmpq $2, %r12\n"
" jbe .myL7\n"
" vmovdqa (%rcx,%rax,4), %xmm2\n"
" vmovdqa (%rsi,%rax,4), %xmm1\n"
" vmovd %r10d, %xmm7\n"
" vpshufd $0, %xmm7, %xmm0\n"
" vmovd %r9d, %xmm7\n"
" vpaddd %xmm1, %xmm2, %xmm3\n"
" vpminsd %xmm1, %xmm0, %xmm1\n"
" vpcmpeqd %xmm1, %xmm0, %xmm1\n"
" vpshufd $0, %xmm7, %xmm0\n"
" vmovd %r11d, %xmm7\n"
" vpcmpgtd %xmm2, %xmm0, %xmm0\n"
" vpandn %xmm1, %xmm0, %xmm0\n"
" vpshufd $0, %xmm7, %xmm1\n"
" vpcmpgtd %xmm3, %xmm1, %xmm1\n"
" vpandn %xmm0, %xmm1, %xmm1\n"
" vpxor %xmm0, %xmm0, %xmm0\n"
" vpsubd %xmm1, %xmm0, %xmm0\n"
" vpsrldq $8, %xmm0, %xmm0\n"
" vpsubd %xmm1, %xmm0, %xmm0\n"
" vpsrldq $4, %xmm0, %xmm1\n"
" vpaddd %xmm1, %xmm0, %xmm0\n"
" vmovd %xmm0, %eax\n"
" addl %eax, %r8d\n"
" movq %rbx, %rax\n"
" andq $-4, %rax\n"
" addq %rax, %rdx\n"
" cmpq %rax, %rbx\n"
" je .myL1\n"
".myL7:\n"
" movl (%rcx,%rdx,4), %ebx\n"
" movl (%rsi,%rdx,4), %r13d\n"
" xorl %eax, %eax\n"
" cmpl %r9d, %ebx\n"
" setge %al\n"
" xorl %r12d, %r12d\n"
" negl %eax\n"
" cmpl %r10d, %r13d\n"
" setge %r12b\n"
" addl %r13d, %ebx\n"
" negl %r12d\n"
" andl %r12d, %eax\n"
" cmpl %r11d, %ebx\n"
" setge %bl\n"
" movzbl %bl, %ebx\n"
" negl %ebx\n"
" andl %ebx, %eax\n"
" subl %eax, %r8d\n"
" leaq 1(%rdx), %rax\n"
" cmpq %rdi, %rax\n"
" jge .myL1\n"
" movl (%rsi,%rax,4), %ebx\n"
" movl (%rcx,%rax,4), %r12d\n"
" xorl %eax, %eax\n"
" cmpl %ebx, %r10d\n"
" setle %al\n"
" xorl %r13d, %r13d\n"
" negl %eax\n"
" cmpl %r12d, %r9d\n"
" setle %r13b\n"
" addl %r12d, %ebx\n"
" negl %r13d\n"
" andl %r13d, %eax\n"
" cmpl %ebx, %r11d\n"
" setle %bl\n"
" addq $2, %rdx\n"
" movzbl %bl, %ebx\n"
" negl %ebx\n"
" andl %ebx, %eax\n"
" subl %eax, %r8d\n"
" cmpq %rdx, %rdi\n"
" jle .myL1\n"
" movl (%rcx,%rdx,4), %ecx\n"
" movl (%rsi,%rdx,4), %esi\n"
" xorl %eax, %eax\n"
" cmpl %ecx, %r9d\n"
" setle %al\n"
" xorl %edx, %edx\n"
" negl %eax\n"
" cmpl %esi, %r10d\n"
" setle %dl\n"
" addl %esi, %ecx\n"
" negl %edx\n"
" andl %edx, %eax\n"
" xorl %edx, %edx\n"
" cmpl %ecx, %r11d\n"
" setle %dl\n"
" negl %edx\n"
" andl %edx, %eax\n"
" subl %eax, %r8d\n"
".myL1:\n"
" popq %rbx\n"
" movl %r8d, %eax\n"
" popq %r12\n"
" popq %r13\n"
" popq %rbp\n"
" .cfi_def_cfa 7, 8\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL9:\n"
" .cfi_restore 3\n"
" .cfi_restore 6\n"
" .cfi_restore 12\n"
" .cfi_restore 13\n"
" xorl %r8d, %r8d\n"
".myL30:\n"
" movl %r8d, %eax\n"
" ret\n"
" .p2align 4,,10\n"
" .p2align 3\n"
".myL33:\n"
" vzeroupper\n"
" jmp .myL30\n"
".myL10:\n"
" xorl %eax, %eax\n"
" xorl %edx, %edx\n"
" leaq a(%rip), %rcx\n"
" xorl %r8d, %r8d\n"
" leaq b(%rip), %rsi\n"
" jmp .myL3\n"
);
}
int main()
{
cin.tie(0) -> sync_with_stdio(false);
cin >> n >> q;
Loop (i,0,n)
cin >> a[i] >> b[i];
Loop (i,0,q) {
int x, y, z;
cin >> x >> y >> z;
cout << cnt(x, y, z) << '\n';
}
}
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
212 KB |
Output is correct |
2 |
Correct |
0 ms |
212 KB |
Output is correct |
3 |
Correct |
0 ms |
212 KB |
Output is correct |
4 |
Correct |
0 ms |
212 KB |
Output is correct |
5 |
Correct |
0 ms |
212 KB |
Output is correct |
6 |
Correct |
0 ms |
212 KB |
Output is correct |
7 |
Correct |
4 ms |
340 KB |
Output is correct |
8 |
Correct |
4 ms |
340 KB |
Output is correct |
9 |
Correct |
4 ms |
340 KB |
Output is correct |
10 |
Correct |
4 ms |
340 KB |
Output is correct |
11 |
Correct |
4 ms |
340 KB |
Output is correct |
12 |
Correct |
3 ms |
340 KB |
Output is correct |
13 |
Correct |
4 ms |
340 KB |
Output is correct |
14 |
Correct |
4 ms |
340 KB |
Output is correct |
15 |
Correct |
4 ms |
340 KB |
Output is correct |
16 |
Correct |
3 ms |
340 KB |
Output is correct |
17 |
Correct |
5 ms |
340 KB |
Output is correct |
18 |
Correct |
3 ms |
340 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
2179 ms |
1628 KB |
Output is correct |
2 |
Correct |
2243 ms |
1576 KB |
Output is correct |
3 |
Correct |
2202 ms |
1800 KB |
Output is correct |
4 |
Correct |
2263 ms |
1656 KB |
Output is correct |
5 |
Correct |
2300 ms |
1660 KB |
Output is correct |
6 |
Correct |
2247 ms |
1648 KB |
Output is correct |
7 |
Correct |
2286 ms |
1688 KB |
Output is correct |
8 |
Correct |
2189 ms |
1720 KB |
Output is correct |
9 |
Correct |
2163 ms |
1736 KB |
Output is correct |
10 |
Correct |
2221 ms |
1440 KB |
Output is correct |
11 |
Correct |
2225 ms |
1704 KB |
Output is correct |
12 |
Correct |
2153 ms |
1428 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
2179 ms |
1628 KB |
Output is correct |
2 |
Correct |
2243 ms |
1576 KB |
Output is correct |
3 |
Correct |
2202 ms |
1800 KB |
Output is correct |
4 |
Correct |
2263 ms |
1656 KB |
Output is correct |
5 |
Correct |
2300 ms |
1660 KB |
Output is correct |
6 |
Correct |
2247 ms |
1648 KB |
Output is correct |
7 |
Correct |
2286 ms |
1688 KB |
Output is correct |
8 |
Correct |
2189 ms |
1720 KB |
Output is correct |
9 |
Correct |
2163 ms |
1736 KB |
Output is correct |
10 |
Correct |
2221 ms |
1440 KB |
Output is correct |
11 |
Correct |
2225 ms |
1704 KB |
Output is correct |
12 |
Correct |
2153 ms |
1428 KB |
Output is correct |
13 |
Correct |
2270 ms |
1608 KB |
Output is correct |
14 |
Correct |
2192 ms |
1612 KB |
Output is correct |
15 |
Correct |
2234 ms |
1616 KB |
Output is correct |
16 |
Correct |
2242 ms |
1544 KB |
Output is correct |
17 |
Correct |
2131 ms |
1596 KB |
Output is correct |
18 |
Correct |
2141 ms |
1628 KB |
Output is correct |
19 |
Correct |
2170 ms |
1612 KB |
Output is correct |
20 |
Correct |
2114 ms |
1868 KB |
Output is correct |
21 |
Correct |
2104 ms |
1856 KB |
Output is correct |
22 |
Correct |
2093 ms |
1520 KB |
Output is correct |
23 |
Correct |
2118 ms |
1544 KB |
Output is correct |
24 |
Correct |
2089 ms |
1528 KB |
Output is correct |
# |
결과 |
실행 시간 |
메모리 |
Grader output |
1 |
Correct |
1 ms |
212 KB |
Output is correct |
2 |
Correct |
0 ms |
212 KB |
Output is correct |
3 |
Correct |
0 ms |
212 KB |
Output is correct |
4 |
Correct |
0 ms |
212 KB |
Output is correct |
5 |
Correct |
0 ms |
212 KB |
Output is correct |
6 |
Correct |
0 ms |
212 KB |
Output is correct |
7 |
Correct |
4 ms |
340 KB |
Output is correct |
8 |
Correct |
4 ms |
340 KB |
Output is correct |
9 |
Correct |
4 ms |
340 KB |
Output is correct |
10 |
Correct |
4 ms |
340 KB |
Output is correct |
11 |
Correct |
4 ms |
340 KB |
Output is correct |
12 |
Correct |
3 ms |
340 KB |
Output is correct |
13 |
Correct |
4 ms |
340 KB |
Output is correct |
14 |
Correct |
4 ms |
340 KB |
Output is correct |
15 |
Correct |
4 ms |
340 KB |
Output is correct |
16 |
Correct |
3 ms |
340 KB |
Output is correct |
17 |
Correct |
5 ms |
340 KB |
Output is correct |
18 |
Correct |
3 ms |
340 KB |
Output is correct |
19 |
Correct |
2179 ms |
1628 KB |
Output is correct |
20 |
Correct |
2243 ms |
1576 KB |
Output is correct |
21 |
Correct |
2202 ms |
1800 KB |
Output is correct |
22 |
Correct |
2263 ms |
1656 KB |
Output is correct |
23 |
Correct |
2300 ms |
1660 KB |
Output is correct |
24 |
Correct |
2247 ms |
1648 KB |
Output is correct |
25 |
Correct |
2286 ms |
1688 KB |
Output is correct |
26 |
Correct |
2189 ms |
1720 KB |
Output is correct |
27 |
Correct |
2163 ms |
1736 KB |
Output is correct |
28 |
Correct |
2221 ms |
1440 KB |
Output is correct |
29 |
Correct |
2225 ms |
1704 KB |
Output is correct |
30 |
Correct |
2153 ms |
1428 KB |
Output is correct |
31 |
Correct |
2270 ms |
1608 KB |
Output is correct |
32 |
Correct |
2192 ms |
1612 KB |
Output is correct |
33 |
Correct |
2234 ms |
1616 KB |
Output is correct |
34 |
Correct |
2242 ms |
1544 KB |
Output is correct |
35 |
Correct |
2131 ms |
1596 KB |
Output is correct |
36 |
Correct |
2141 ms |
1628 KB |
Output is correct |
37 |
Correct |
2170 ms |
1612 KB |
Output is correct |
38 |
Correct |
2114 ms |
1868 KB |
Output is correct |
39 |
Correct |
2104 ms |
1856 KB |
Output is correct |
40 |
Correct |
2093 ms |
1520 KB |
Output is correct |
41 |
Correct |
2118 ms |
1544 KB |
Output is correct |
42 |
Correct |
2089 ms |
1528 KB |
Output is correct |
43 |
Correct |
2107 ms |
1580 KB |
Output is correct |
44 |
Correct |
2139 ms |
1576 KB |
Output is correct |
45 |
Correct |
2130 ms |
1720 KB |
Output is correct |
46 |
Correct |
2144 ms |
1684 KB |
Output is correct |
47 |
Correct |
2142 ms |
1548 KB |
Output is correct |
48 |
Correct |
2230 ms |
1428 KB |
Output is correct |
49 |
Correct |
2127 ms |
1924 KB |
Output is correct |
50 |
Correct |
2110 ms |
1728 KB |
Output is correct |
51 |
Correct |
2186 ms |
1624 KB |
Output is correct |
52 |
Correct |
2174 ms |
1568 KB |
Output is correct |
53 |
Correct |
2157 ms |
1720 KB |
Output is correct |