Skip to content

Commit

Permalink
zstd: Shave some instructions off the amd64 asm
Browse files Browse the repository at this point in the history
These are all in return sequences, so no performance gain expected, but
the generated code is smaller.
  • Loading branch information
greatroar committed Dec 23, 2022
1 parent 272fbc7 commit bb66c72
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 34 deletions.
19 changes: 9 additions & 10 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,20 +502,20 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
o.returnWithCode(errorNotEnoughLiterals)
}

Comment("Return with not enough output space error")
{
Label("error_not_enough_space")
if !o.useSeqs {
if !o.useSeqs {
Comment("Return with not enough output space error")
{
Label("error_not_enough_space")
ctx := Dereference(Param("ctx"))
tmp := GP64()
MOVQ(llP, tmp)
Store(tmp, ctx.Field("ll"))
MOVQ(mlP, tmp)
Store(tmp, ctx.Field("ml"))
Store(ec.outPosition, ctx.Field("outPosition"))
}

o.returnWithCode(errorNotEnoughSpace)
o.returnWithCode(errorNotEnoughSpace)
}
}
}

Expand Down Expand Up @@ -1062,10 +1062,9 @@ func (e executeSimple) generateProcedure(name string) {
Store(seqIndex, ctx.Field("seqIndex"))
Store(outPosition, ctx.Field("outPosition"))

// compute litPosition
tmp := GP64()
Load(ctx.Field("literals").Base(), tmp)
SUBQ(tmp, literals) // litPosition := current - initial literals pointer
// litPosition := current - initial literals pointer
litField, _ := ctx.Field("literals").Base().Resolve()
SUBQ(litField.Addr, literals)
Store(literals, ctx.Field("litPosition"))
}
Label("loop_finished")
Expand Down
28 changes: 4 additions & 24 deletions zstd/seqdec_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
Expand Down Expand Up @@ -617,10 +613,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
Expand Down Expand Up @@ -897,10 +889,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
Expand Down Expand Up @@ -1152,10 +1140,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Requires: SSE
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
Expand Down Expand Up @@ -1389,8 +1373,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand All @@ -1402,8 +1385,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand Down Expand Up @@ -1747,8 +1729,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand All @@ -1760,8 +1741,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand Down

0 comments on commit bb66c72

Please sign in to comment.