-
Notifications
You must be signed in to change notification settings - Fork 10
/
xorbytes_amd64.s
71 lines (63 loc) · 1.5 KB
/
xorbytes_amd64.s
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
// func bytesN(dst, a, b *byte, n int)
TEXT ·bytesN(SB), 4, $0
MOVQ d+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVQ n+24(FP), DX
TESTQ $15, DX // AND 15 & len, if not zero jump to not_aligned.
JNZ not_aligned
aligned:
MOVQ $0, AX // position in slices
loop16b:
MOVOU (SI)(AX*1), X0 // XOR 16byte forwards.
MOVOU (CX)(AX*1), X1
PXOR X1, X0
MOVOU X0, (BX)(AX*1)
ADDQ $16, AX
CMPQ DX, AX
JNE loop16b
RET
loop_1b:
SUBQ $1, DX // XOR 1byte backwards.
MOVB (SI)(DX*1), DI
MOVB (CX)(DX*1), AX
XORB AX, DI
MOVB DI, (BX)(DX*1)
TESTQ $7, DX // AND 7 & len, if not zero jump to loop_1b.
JNZ loop_1b
CMPQ DX, $0 // if len is 0, ret.
JE ret
TESTQ $15, DX // AND 15 & len, if zero jump to aligned.
JZ aligned
not_aligned:
TESTQ $7, DX // AND $7 & len, if not zero jump to loop_1b.
JNE loop_1b
SUBQ $8, DX // XOR 8bytes backwards.
MOVQ (SI)(DX*1), DI
MOVQ (CX)(DX*1), AX
XORQ AX, DI
MOVQ DI, (BX)(DX*1)
CMPQ DX, $16 // if len is greater or equal 16 here, it must be aligned.
JGE aligned
ret:
RET
// func bytes8(dst, a, b *byte)
TEXT ·bytes8(SB), 4, $0
MOVQ d+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVQ (SI), DI
MOVQ (CX), AX
XORQ AX, DI
MOVQ DI, (BX)
RET
// func bytes16(dst, a, b *byte)
TEXT ·bytes16(SB), 4, $0
MOVQ d+0(FP), BX
MOVQ a+8(FP), SI
MOVQ b+16(FP), CX
MOVOU (SI), X0
MOVOU (CX), X1
PXOR X1, X0
MOVOU X0, (BX)
RET