-
Notifications
You must be signed in to change notification settings - Fork 3
/
isblockzero_amd64.s
71 lines (62 loc) · 925 Bytes
/
isblockzero_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
#include "textflag.h"
TEXT ·IsBlockZero(SB),NOSPLIT,$0-25
MOVQ buf_len+8(FP), BX
MOVQ buf+0(FP), SI
CMPQ BX, $8
JB small
CMPQ BX, $64
JB bigloop
// CMPB runtime·support_avx2(SB), $1
// JE hugeloop_avx2
XORPS X4, X4
hugeloop:
CMPQ BX, $64
JB bigloop
MOVOU (SI), X0
MOVOU 16(SI), X1
MOVOU 32(SI), X2
MOVOU 48(SI), X3
POR X1, X0
POR X2, X0
POR X3, X0
PCMPEQB X4, X0
PMOVMSKB X0, DX
ADDQ $64, SI
SUBQ $64, BX
CMPL DX, $0xffff
JEQ hugeloop
MOVB $0, ret+24(FP)
RET
bigloop:
CMPQ BX, $8
JBE leftover
MOVQ (SI), CX
ADDQ $8, SI
SUBQ $8, BX
CMPQ CX, $0
JEQ bigloop
MOVB $0, ret+24(FP)
RET
leftover:
MOVQ -8(SI)(BX*1), CX
CMPQ CX, $0
SETEQ ret+24(FP)
RET
small:
CMPQ BX, $0
JEQ equal
LEAQ 0(BX*8), CX
NEGQ CX
CMPB SI, $0xf8
JA si_high
MOVQ (SI), SI
SHLQ CX, SI
JMP si_finish
si_high:
MOVQ -8(SI)(BX*1), SI
SHRQ CX, SI
si_finish:
// CMPQ SI, $0
equal:
SETEQ ret+24(FP)
RET