diff --git a/bits_amd64.go b/bits_amd64.go index 5bd6ac7..7256673 100644 --- a/bits_amd64.go +++ b/bits_amd64.go @@ -36,7 +36,7 @@ func metaMatchH2(m *metadata, h h2) bitset { } func metaMatchEmpty(m *metadata) bitset { - b := simd.MatchMetadata((*[groupSize]uint8)(m), empty) + b := simd.MatchEmpty((*[groupSize]uint8)(m)) return bitset(b) } diff --git a/simd/match.s b/simd/match.s index 5aa2e13..705c320 100644 --- a/simd/match.s +++ b/simd/match.s @@ -17,3 +17,14 @@ TEXT ·MatchMetadata(SB), NOSPLIT, $0-18 PMOVMSKB X0, AX MOVW AX, ret+16(FP) RET + +// func MatchEmpty(metadata *[16]uint8) uint16 +// Requires: SSE2 +TEXT ·MatchEmpty(SB), NOSPLIT, $0-10 + MOVQ metadata+0(FP), AX + PXOR X0, X0 + MOVOU (AX), X1 + PCMPEQB X1, X0 + PMOVMSKB X0, AX + MOVW AX, ret+8(FP) + RET diff --git a/simd/match_amd64.go b/simd/match_amd64.go index ea93a14..ab8d0b6 100644 --- a/simd/match_amd64.go +++ b/simd/match_amd64.go @@ -7,3 +7,7 @@ package simd // MatchMetadata performs a 16-way probe of |metadata| using SSE instructions // nb: |metadata| must be an aligned pointer func MatchMetadata(metadata *[16]uint8, hash uint8) uint16 + +// MatchMetadata performs a 16-way probe of zero byte using SSE instructions +// nb: |metadata| must be an aligned pointer +func MatchEmpty(metadata *[16]uint8) uint16 diff --git a/simd/src/asm.go b/simd/src/asm.go index 13ecf45..58410c2 100644 --- a/simd/src/asm.go +++ b/simd/src/asm.go @@ -23,22 +23,41 @@ import ( func main() { ConstraintExpr("amd64") - TEXT("MatchMetadata", NOSPLIT, "func(metadata *[16]uint8, hash uint8) uint16") - Doc("MatchMetadata performs a 16-way probe of |metadata| using SSE instructions", - "nb: |metadata| must be an aligned pointer") - m := Mem{Base: Load(Param("metadata"), GP64())} - h := Load(Param("hash"), GP32()) - mask := GP32() - - x0, x1, x2 := XMM(), XMM(), XMM() - MOVD(h, x0) - PXOR(x1, x1) - PSHUFB(x1, x0) - MOVOU(m, x2) - PCMPEQB(x2, x0) - PMOVMSKB(x0, mask) - - Store(mask.As16(), ReturnIndex(0)) - RET() + { + TEXT("MatchMetadata", NOSPLIT, "func(metadata *[16]uint8, hash uint8) uint16") + Doc("MatchMetadata performs a 16-way probe of |metadata| using SSE instructions", + "nb: |metadata| must be an aligned pointer") + m := Mem{Base: Load(Param("metadata"), GP64())} + h := Load(Param("hash"), GP32()) + mask := GP32() + + x0, x1, x2 := XMM(), XMM(), XMM() + MOVD(h, x0) + PXOR(x1, x1) + PSHUFB(x1, x0) + MOVOU(m, x2) + PCMPEQB(x2, x0) + PMOVMSKB(x0, mask) + + Store(mask.As16(), ReturnIndex(0)) + RET() + } + + { + TEXT("MatchEmpty", NOSPLIT, "func(metadata *[16]uint8) uint16") + Doc("MatchMetadata performs a 16-way probe of zero byte using SSE instructions", + "nb: |metadata| must be an aligned pointer") + m := Mem{Base: Load(Param("metadata"), GP64())} + mask := GP32() + + x0, x1 := XMM(), XMM() + PXOR(x0, x0) + MOVOU(m, x1) + PCMPEQB(x1, x0) + PMOVMSKB(x0, mask) + + Store(mask.As16(), ReturnIndex(0)) + RET() + } Generate() }