From 6946b993a3210f700286ce05258549eb0c4b4dd0 Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Fri, 23 Dec 2022 09:17:04 +0100 Subject: [PATCH] zstd: Shave some instructions off the amd64 asm These are all in return sequences, so no performance gain expected, but the generated code is smaller. --- zstd/_generate/gen.go | 17 ++++++++--------- zstd/seqdec_amd64.s | 28 ++++------------------------ 2 files changed, 12 insertions(+), 33 deletions(-) diff --git a/zstd/_generate/gen.go b/zstd/_generate/gen.go index 2f10f64a3e..87da45efdb 100644 --- a/zstd/_generate/gen.go +++ b/zstd/_generate/gen.go @@ -502,10 +502,10 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute o.returnWithCode(errorNotEnoughLiterals) } - Comment("Return with not enough output space error") - { + if !o.useSeqs { + Comment("Return with not enough output space error") Label("error_not_enough_space") - if !o.useSeqs { + { ctx := Dereference(Param("ctx")) tmp := GP64() MOVQ(llP, tmp) @@ -513,9 +513,9 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute MOVQ(mlP, tmp) Store(tmp, ctx.Field("ml")) Store(ec.outPosition, ctx.Field("outPosition")) - } - o.returnWithCode(errorNotEnoughSpace) + o.returnWithCode(errorNotEnoughSpace) + } } } @@ -1062,10 +1062,9 @@ func (e executeSimple) generateProcedure(name string) { Store(seqIndex, ctx.Field("seqIndex")) Store(outPosition, ctx.Field("outPosition")) - // compute litPosition - tmp := GP64() - Load(ctx.Field("literals").Base(), tmp) - SUBQ(tmp, literals) // litPosition := current - initial literals pointer + // litPosition := current - initial literals pointer + litField, _ := ctx.Field("literals").Base().Resolve() + SUBQ(litField.Addr, literals) Store(literals, ctx.Field("litPosition")) } Label("loop_finished") diff --git a/zstd/seqdec_amd64.s b/zstd/seqdec_amd64.s index 52e5703c26..b94993a072 100644 --- a/zstd/seqdec_amd64.s +++ b/zstd/seqdec_amd64.s @@ -320,10 +320,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: CMOV TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32 @@ -617,10 +613,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_bmi2(SB), $8-32 @@ -897,10 +889,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int // Requires: BMI, BMI2, CMOV TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32 @@ -1152,10 +1140,6 @@ error_not_enough_literals: MOVQ $0x00000004, ret+24(FP) RET - // Return with not enough output space error - MOVQ $0x00000005, ret+24(FP) - RET - // func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool // Requires: SSE TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9 @@ -1389,8 +1373,7 @@ loop_finished: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1402,8 +1385,7 @@ error_match_off_too_big: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1747,8 +1729,7 @@ loop_finished: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET @@ -1760,8 +1741,7 @@ error_match_off_too_big: MOVQ ctx+0(FP), AX MOVQ DX, 24(AX) MOVQ DI, 104(AX) - MOVQ 80(AX), CX - SUBQ CX, SI + SUBQ 80(AX), SI MOVQ SI, 112(AX) RET