Skip to content

Commit

Permalink
zstd: Shave some instructions off the amd64 asm (#720)
Browse files Browse the repository at this point in the history
These are all in return sequences, so no performance gain expected, but
the generated code is smaller.
  • Loading branch information
greatroar committed Dec 29, 2022
1 parent 272fbc7 commit 5206968
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 33 deletions.
17 changes: 8 additions & 9 deletions zstd/_generate/gen.go
Original file line number Diff line number Diff line change
Expand Up @@ -502,20 +502,20 @@ func (o options) generateBody(name string, executeSingleTriple func(ctx *execute
o.returnWithCode(errorNotEnoughLiterals)
}

Comment("Return with not enough output space error")
{
if !o.useSeqs {
Comment("Return with not enough output space error")
Label("error_not_enough_space")
if !o.useSeqs {
{
ctx := Dereference(Param("ctx"))
tmp := GP64()
MOVQ(llP, tmp)
Store(tmp, ctx.Field("ll"))
MOVQ(mlP, tmp)
Store(tmp, ctx.Field("ml"))
Store(ec.outPosition, ctx.Field("outPosition"))
}

o.returnWithCode(errorNotEnoughSpace)
o.returnWithCode(errorNotEnoughSpace)
}
}
}

Expand Down Expand Up @@ -1062,10 +1062,9 @@ func (e executeSimple) generateProcedure(name string) {
Store(seqIndex, ctx.Field("seqIndex"))
Store(outPosition, ctx.Field("outPosition"))

// compute litPosition
tmp := GP64()
Load(ctx.Field("literals").Base(), tmp)
SUBQ(tmp, literals) // litPosition := current - initial literals pointer
// litPosition := current - initial literals pointer
litField, _ := ctx.Field("literals").Base().Resolve()
SUBQ(litField.Addr, literals)
Store(literals, ctx.Field("litPosition"))
}
Label("loop_finished")
Expand Down
28 changes: 4 additions & 24 deletions zstd/seqdec_amd64.s
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: CMOV
TEXT ·sequenceDecs_decode_56_amd64(SB), $8-32
Expand Down Expand Up @@ -617,10 +613,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_bmi2(SB), $8-32
Expand Down Expand Up @@ -897,10 +889,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
// Requires: BMI, BMI2, CMOV
TEXT ·sequenceDecs_decode_56_bmi2(SB), $8-32
Expand Down Expand Up @@ -1152,10 +1140,6 @@ error_not_enough_literals:
MOVQ $0x00000004, ret+24(FP)
RET

// Return with not enough output space error
MOVQ $0x00000005, ret+24(FP)
RET

// func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
// Requires: SSE
TEXT ·sequenceDecs_executeSimple_amd64(SB), $8-9
Expand Down Expand Up @@ -1389,8 +1373,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand All @@ -1402,8 +1385,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand Down Expand Up @@ -1747,8 +1729,7 @@ loop_finished:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand All @@ -1760,8 +1741,7 @@ error_match_off_too_big:
MOVQ ctx+0(FP), AX
MOVQ DX, 24(AX)
MOVQ DI, 104(AX)
MOVQ 80(AX), CX
SUBQ CX, SI
SUBQ 80(AX), SI
MOVQ SI, 112(AX)
RET

Expand Down

0 comments on commit 5206968

Please sign in to comment.