From 8c54b4233d2eb52a57db691b12eadf00c5fe6d06 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 8 Dec 2020 04:20:01 -0800 Subject: [PATCH] s2: Simplify small blocks with known max size. (#300) Remove code that handles cases where we are beyond possible block limits.. --- s2/_generate/gen.go | 288 +++++---- s2/encodeblock_amd64.go | 8 + s2/encodeblock_amd64.s | 1321 +-------------------------------------- 3 files changed, 206 insertions(+), 1411 deletions(-) diff --git a/s2/_generate/gen.go b/s2/_generate/gen.go index e2421502f7..f1c345a027 100644 --- a/s2/_generate/gen.go +++ b/s2/_generate/gen.go @@ -16,6 +16,16 @@ import ( // insert extra checks here and there. const debug = false +const ( + limit14B = math.MaxUint32 + // Use 12 bit table when no more than... + limit12B = 16<<10 - 1 + // Use 10 bit table when no more than... + limit10B = 4<<10 - 1 + // Use 8 bit table when no more than... + limit8B = 512 - 1 +) + func main() { Constraint(buildtags.Not("appengine").ToConstraint()) Constraint(buildtags.Not("noasm").ToConstraint()) @@ -24,19 +34,20 @@ func main() { o := options{ snappy: false, } - o.genEncodeBlockAsm("encodeBlockAsm", 14, 6, 6) - o.genEncodeBlockAsm("encodeBlockAsm12B", 12, 5, 5) - o.genEncodeBlockAsm("encodeBlockAsm10B", 10, 5, 4) - o.genEncodeBlockAsm("encodeBlockAsm8B", 8, 4, 4) + o.genEncodeBlockAsm("encodeBlockAsm", 14, 6, 6, limit14B) + o.genEncodeBlockAsm("encodeBlockAsm12B", 12, 5, 5, limit12B) + o.genEncodeBlockAsm("encodeBlockAsm10B", 10, 5, 4, limit10B) + o.genEncodeBlockAsm("encodeBlockAsm8B", 8, 4, 4, limit8B) // Snappy compatible o.snappy = true - o.genEncodeBlockAsm("encodeSnappyBlockAsm", 14, 6, 6) - o.genEncodeBlockAsm("encodeSnappyBlockAsm12B", 12, 5, 5) - o.genEncodeBlockAsm("encodeSnappyBlockAsm10B", 10, 5, 4) - o.genEncodeBlockAsm("encodeSnappyBlockAsm8B", 8, 4, 4) + o.genEncodeBlockAsm("encodeSnappyBlockAsm", 14, 6, 6, limit14B) + o.genEncodeBlockAsm("encodeSnappyBlockAsm12B", 12, 5, 5, limit12B) + o.genEncodeBlockAsm("encodeSnappyBlockAsm10B", 10, 5, 4, limit10B) + o.genEncodeBlockAsm("encodeSnappyBlockAsm8B", 8, 4, 4, limit8B) o.snappy = false + o.maxLen = math.MaxUint32 o.genEmitLiteral() o.genEmitRepeat() o.genEmitCopy() @@ -82,15 +93,18 @@ func assert(fn func(ok LabelRef)) { type options struct { snappy bool vmbi2 bool + maxLen int } -func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes int) { +func (o options) genEncodeBlockAsm(name string, tableBits, skipLog, hashBytes, maxLen int) { TEXT(name, 0, "func(dst, src []byte) int") Doc(name+" encodes a non-empty src to a guaranteed-large-enough dst.", + fmt.Sprintf("Maximum input %d bytes.", maxLen), "It assumes that the varint-encoded length of the decompressed bytes has already been written.", "") Pragma("noescape") const literalMaxOverhead = 4 + o.maxLen = maxLen var tableSize = 4 * (1 << tableBits) // Memzero needs at least 128 bytes. @@ -864,32 +878,43 @@ func (o options) emitLiteral(name string, litLen, retval, dstBase, litBase reg.G JLT(LabelRef("one_byte_" + name)) CMPL(n.As32(), U32(1<<8)) JLT(LabelRef("two_bytes_" + name)) - CMPL(n.As32(), U32(1<<16)) - JLT(LabelRef("three_bytes_" + name)) - CMPL(n.As32(), U32(1<<24)) - JLT(LabelRef("four_bytes_" + name)) - - Label("five_bytes_" + name) - MOVB(U8(252), Mem{Base: dstBase}) - MOVL(n.As32(), Mem{Base: dstBase, Disp: 1}) - if retval != nil { - ADDQ(U8(5), retval) + if o.maxLen >= 1<<16 { + CMPL(n.As32(), U32(1<<16)) + JLT(LabelRef("three_bytes_" + name)) + } else { + JMP(LabelRef("three_bytes_" + name)) } - ADDQ(U8(5), dstBase) - JMP(LabelRef("memmove_long_" + name)) - - Label("four_bytes_" + name) - MOVL(n, n16) - SHRL(U8(16), n16.As32()) - MOVB(U8(248), Mem{Base: dstBase}) - MOVW(n.As16(), Mem{Base: dstBase, Disp: 1}) - MOVB(n16.As8(), Mem{Base: dstBase, Disp: 3}) - if retval != nil { - ADDQ(U8(4), retval) + if o.maxLen >= 1<<16 { + if o.maxLen >= 1<<24 { + CMPL(n.As32(), U32(1<<24)) + JLT(LabelRef("four_bytes_" + name)) + } else { + JMP(LabelRef("four_bytes_" + name)) + } + } + if o.maxLen >= 1<<24 { + Label("five_bytes_" + name) + MOVB(U8(252), Mem{Base: dstBase}) + MOVL(n.As32(), Mem{Base: dstBase, Disp: 1}) + if retval != nil { + ADDQ(U8(5), retval) + } + ADDQ(U8(5), dstBase) + JMP(LabelRef("memmove_long_" + name)) + } + if o.maxLen >= 1<<16 { + Label("four_bytes_" + name) + MOVL(n, n16) + SHRL(U8(16), n16.As32()) + MOVB(U8(248), Mem{Base: dstBase}) + MOVW(n.As16(), Mem{Base: dstBase, Disp: 1}) + MOVB(n16.As8(), Mem{Base: dstBase, Disp: 3}) + if retval != nil { + ADDQ(U8(4), retval) + } + ADDQ(U8(4), dstBase) + JMP(LabelRef("memmove_long_" + name)) } - ADDQ(U8(4), dstBase) - JMP(LabelRef("memmove_long_" + name)) - Label("three_bytes_" + name) MOVB(U8(0xf4), Mem{Base: dstBase}) MOVW(n.As16(), Mem{Base: dstBase, Disp: 1}) @@ -1009,44 +1034,57 @@ func (o options) emitRepeat(name string, length, offset, retval, dstBase reg.GPV // length < 8 && offset < 2048 CMPL(tmp.As32(), U8(12)) JGE(LabelRef("cant_repeat_two_offset_" + name)) - CMPL(offset.As32(), U32(2048)) - JLT(LabelRef("repeat_two_offset_" + name)) + if o.maxLen >= 2048 { + CMPL(offset.As32(), U32(2048)) + JLT(LabelRef("repeat_two_offset_" + name)) + } const maxRepeat = ((1 << 24) - 1) + 65536 Label("cant_repeat_two_offset_" + name) CMPL(length.As32(), U32((1<<8)+4)) JLT(LabelRef("repeat_three_" + name)) // if length < (1<<8)+4 - CMPL(length.As32(), U32((1<<16)+(1<<8))) - JLT(LabelRef("repeat_four_" + name)) // if length < (1 << 16) + (1 << 8) - CMPL(length.As32(), U32(maxRepeat)) - JLT(LabelRef("repeat_five_" + name)) // If less than 24 bits to represent. - - // We have have more than 24 bits - // Emit so we have at least 4 bytes left. - LEAL(Mem{Base: length, Disp: -(maxRepeat - 4)}, length.As32()) // length -= (maxRepeat - 4) - MOVW(U16(7<<2|tagCopy1), Mem{Base: dstBase}) // dst[0] = 7<<2 | tagCopy1, dst[1] = 0 - MOVW(U16(65531), Mem{Base: dstBase, Disp: 2}) // 0xfffb - MOVB(U8(255), Mem{Base: dstBase, Disp: 4}) - ADDQ(U8(5), dstBase) - if retval != nil { - ADDQ(U8(5), retval) + if o.maxLen >= (1<<16)+(1<<8) { + CMPL(length.As32(), U32((1<<16)+(1<<8))) + JLT(LabelRef("repeat_four_" + name)) // if length < (1 << 16) + (1 << 8) + } else { + // Not needed, we should skip to it when generating. + // JMP(LabelRef("repeat_four_" + name)) // if length < (1 << 16) + (1 << 8) + } + if o.maxLen >= maxRepeat { + CMPL(length.As32(), U32(maxRepeat)) + JLT(LabelRef("repeat_five_" + name)) // If less than 24 bits to represent. + + // We have have more than 24 bits + // Emit so we have at least 4 bytes left. + LEAL(Mem{Base: length, Disp: -(maxRepeat - 4)}, length.As32()) // length -= (maxRepeat - 4) + MOVW(U16(7<<2|tagCopy1), Mem{Base: dstBase}) // dst[0] = 7<<2 | tagCopy1, dst[1] = 0 + MOVW(U16(65531), Mem{Base: dstBase, Disp: 2}) // 0xfffb + MOVB(U8(255), Mem{Base: dstBase, Disp: 4}) + ADDQ(U8(5), dstBase) + if retval != nil { + ADDQ(U8(5), retval) + } + JMP(LabelRef("emit_repeat_again_" + name)) + } else { + // Not needed. + // JMP(LabelRef("repeat_five_" + name)) // If less than 24 bits to represent. } - JMP(LabelRef("emit_repeat_again_" + name)) // Must be able to be within 5 bytes. - Label("repeat_five_" + name) - LEAL(Mem{Base: length, Disp: -65536}, length.As32()) // length -= 65536 - MOVL(length.As32(), offset.As32()) - MOVW(U16(7<<2|tagCopy1), Mem{Base: dstBase}) // dst[0] = 7<<2 | tagCopy1, dst[1] = 0 - MOVW(length.As16(), Mem{Base: dstBase, Disp: 2}) // dst[2] = uint8(length), dst[3] = uint8(length >> 8) - SARL(U8(16), offset.As32()) // offset = length >> 16 - MOVB(offset.As8(), Mem{Base: dstBase, Disp: 4}) // dst[4] = length >> 16 - if retval != nil { - ADDQ(U8(5), retval) // i += 5 + if o.maxLen >= (1<<16)+(1<<8) { + Label("repeat_five_" + name) + LEAL(Mem{Base: length, Disp: -65536}, length.As32()) // length -= 65536 + MOVL(length.As32(), offset.As32()) + MOVW(U16(7<<2|tagCopy1), Mem{Base: dstBase}) // dst[0] = 7<<2 | tagCopy1, dst[1] = 0 + MOVW(length.As16(), Mem{Base: dstBase, Disp: 2}) // dst[2] = uint8(length), dst[3] = uint8(length >> 8) + SARL(U8(16), offset.As32()) // offset = length >> 16 + MOVB(offset.As8(), Mem{Base: dstBase, Disp: 4}) // dst[4] = length >> 16 + if retval != nil { + ADDQ(U8(5), retval) // i += 5 + } + ADDQ(U8(5), dstBase) // dst += 5 + JMP(end) } - ADDQ(U8(5), dstBase) // dst += 5 - JMP(end) - Label("repeat_four_" + name) LEAL(Mem{Base: length, Disp: -256}, length.As32()) // length -= 256 MOVW(U16(6<<2|tagCopy1), Mem{Base: dstBase}) // dst[0] = 6<<2 | tagCopy1, dst[1] = 0 @@ -1174,69 +1212,70 @@ const ( // Will jump to end label when finished. // Uses 2 GP registers. func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVirtual, end LabelRef) { - //if offset >= 65536 { - CMPL(offset.As32(), U32(65536)) - JL(LabelRef("two_byte_offset_" + name)) - - // offset is >= 65536 - // if length <= 64 goto four_bytes_remain_ - Label("four_bytes_loop_back_" + name) - CMPL(length.As32(), U8(64)) - JLE(LabelRef("four_bytes_remain_" + name)) - - // Emit a length 64 copy, encoded as 5 bytes. - // dst[0] = 63<<2 | tagCopy4 - MOVB(U8(63<<2|tagCopy4), Mem{Base: dstBase}) - // dst[4] = uint8(offset >> 24) - // dst[3] = uint8(offset >> 16) - // dst[2] = uint8(offset >> 8) - // dst[1] = uint8(offset) - MOVL(offset.As32(), Mem{Base: dstBase, Disp: 1}) - // length -= 64 - LEAL(Mem{Base: length, Disp: -64}, length.As32()) - if retval != nil { - ADDQ(U8(5), retval) // i+=5 - } - ADDQ(U8(5), dstBase) // dst+=5 + if o.maxLen >= 65536 { + //if offset >= 65536 { + CMPL(offset.As32(), U32(65536)) + JL(LabelRef("two_byte_offset_" + name)) + + // offset is >= 65536 + // if length <= 64 goto four_bytes_remain_ + Label("four_bytes_loop_back_" + name) + CMPL(length.As32(), U8(64)) + JLE(LabelRef("four_bytes_remain_" + name)) + + // Emit a length 64 copy, encoded as 5 bytes. + // dst[0] = 63<<2 | tagCopy4 + MOVB(U8(63<<2|tagCopy4), Mem{Base: dstBase}) + // dst[4] = uint8(offset >> 24) + // dst[3] = uint8(offset >> 16) + // dst[2] = uint8(offset >> 8) + // dst[1] = uint8(offset) + MOVL(offset.As32(), Mem{Base: dstBase, Disp: 1}) + // length -= 64 + LEAL(Mem{Base: length, Disp: -64}, length.As32()) + if retval != nil { + ADDQ(U8(5), retval) // i+=5 + } + ADDQ(U8(5), dstBase) // dst+=5 - // if length >= 4 { - CMPL(length.As32(), U8(4)) - JL(LabelRef("four_bytes_remain_" + name)) + // if length >= 4 { + CMPL(length.As32(), U8(4)) + JL(LabelRef("four_bytes_remain_" + name)) - // Emit remaining as repeats - // return 5 + emitRepeat(dst[5:], offset, length) - // Inline call to emitRepeat. Will jump to end - if !o.snappy { - o.emitRepeat(name+"_emit_copy", length, offset, retval, dstBase, end) - } - JMP(LabelRef("four_bytes_loop_back_" + name)) + // Emit remaining as repeats + // return 5 + emitRepeat(dst[5:], offset, length) + // Inline call to emitRepeat. Will jump to end + if !o.snappy { + o.emitRepeat(name+"_emit_copy", length, offset, retval, dstBase, end) + } + JMP(LabelRef("four_bytes_loop_back_" + name)) - Label("four_bytes_remain_" + name) - // if length == 0 { - // return i - // } - TESTL(length.As32(), length.As32()) - JZ(end) + Label("four_bytes_remain_" + name) + // if length == 0 { + // return i + // } + TESTL(length.As32(), length.As32()) + JZ(end) - // Emit a copy, offset encoded as 4 bytes. - // dst[i+0] = uint8(length-1)<<2 | tagCopy4 - // dst[i+1] = uint8(offset) - // dst[i+2] = uint8(offset >> 8) - // dst[i+3] = uint8(offset >> 16) - // dst[i+4] = uint8(offset >> 24) - tmp := GP64() - MOVB(U8(tagCopy4), tmp.As8()) - // Use displacement to subtract 1 from upshifted length. - LEAL(Mem{Base: tmp, Disp: -(1 << 2), Index: length, Scale: 4}, length.As32()) - MOVB(length.As8(), Mem{Base: dstBase}) - MOVL(offset.As32(), Mem{Base: dstBase, Disp: 1}) - // return i + 5 - if retval != nil { - ADDQ(U8(5), retval) + // Emit a copy, offset encoded as 4 bytes. + // dst[i+0] = uint8(length-1)<<2 | tagCopy4 + // dst[i+1] = uint8(offset) + // dst[i+2] = uint8(offset >> 8) + // dst[i+3] = uint8(offset >> 16) + // dst[i+4] = uint8(offset >> 24) + tmp := GP64() + MOVB(U8(tagCopy4), tmp.As8()) + // Use displacement to subtract 1 from upshifted length. + LEAL(Mem{Base: tmp, Disp: -(1 << 2), Index: length, Scale: 4}, length.As32()) + MOVB(length.As8(), Mem{Base: dstBase}) + MOVL(offset.As32(), Mem{Base: dstBase, Disp: 1}) + // return i + 5 + if retval != nil { + ADDQ(U8(5), retval) + } + ADDQ(U8(5), dstBase) + JMP(end) } - ADDQ(U8(5), dstBase) - JMP(end) - Label("two_byte_offset_" + name) // Offset no more than 2 bytes. @@ -1270,13 +1309,14 @@ func (o options) emitCopy(name string, length, offset, retval, dstBase reg.GPVir //if length >= 12 || offset >= 2048 { CMPL(length.As32(), U8(12)) JGE(LabelRef("emit_copy_three_" + name)) - CMPL(offset.As32(), U32(2048)) - JGE(LabelRef("emit_copy_three_" + name)) - + if o.maxLen >= 2048 { + CMPL(offset.As32(), U32(2048)) + JGE(LabelRef("emit_copy_three_" + name)) + } // Emit the remaining copy, encoded as 2 bytes. // dst[1] = uint8(offset) // dst[0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 - tmp = GP64() + tmp := GP64() MOVB(U8(tagCopy1), tmp.As8()) // Use scale and displacement to shift and subtract values from length. LEAL(Mem{Base: tmp, Index: length, Scale: 4, Disp: -(4 << 2)}, length.As32()) diff --git a/s2/encodeblock_amd64.go b/s2/encodeblock_amd64.go index 9ae686c07c..99e7d68bee 100644 --- a/s2/encodeblock_amd64.go +++ b/s2/encodeblock_amd64.go @@ -7,48 +7,56 @@ package s2 // encodeBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeBlockAsm(dst []byte, src []byte) int // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeBlockAsm12B(dst []byte, src []byte) int // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeBlockAsm10B(dst []byte, src []byte) int // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeBlockAsm8B(dst []byte, src []byte) int // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 4294967295 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeSnappyBlockAsm(dst []byte, src []byte) int // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 16383 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeSnappyBlockAsm12B(dst []byte, src []byte) int // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 4095 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape func encodeSnappyBlockAsm10B(dst []byte, src []byte) int // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst. +// Maximum input 511 bytes. // It assumes that the varint-encoded length of the decompressed bytes has already been written. // //go:noescape diff --git a/s2/encodeblock_amd64.s b/s2/encodeblock_amd64.s index 8b3b5e354b..239c1c7de1 100644 --- a/s2/encodeblock_amd64.s +++ b/s2/encodeblock_amd64.s @@ -1329,25 +1329,6 @@ repeat_extend_back_end_encodeBlockAsm12B: JLT one_byte_repeat_emit_encodeBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm12B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeBlockAsm12B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeBlockAsm12B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeBlockAsm12B - -four_bytes_repeat_emit_encodeBlockAsm12B: - MOVL BP, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeBlockAsm12B - -three_bytes_repeat_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -1529,42 +1510,18 @@ repeat_extend_forward_end_encodeBlockAsm12B: MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm12B - -emit_repeat_again_match_repeat_encodeBlockAsm12B: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm12B - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm12B + MOVL BP, DI + LEAL -4(BP), BP + CMPL DI, $0x08 + JLE repeat_two_match_repeat_encodeBlockAsm12B + CMPL DI, $0x0c + JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm12B + CMPL SI, $0x00000800 + JLT repeat_two_offset_match_repeat_encodeBlockAsm12B cant_repeat_two_offset_match_repeat_encodeBlockAsm12B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm12B - CMPL BP, $0x00010100 - JLT repeat_four_match_repeat_encodeBlockAsm12B - CMPL BP, $0x0100ffff - JLT repeat_five_match_repeat_encodeBlockAsm12B - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_repeat_encodeBlockAsm12B - -repeat_five_match_repeat_encodeBlockAsm12B: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_four_match_repeat_encodeBlockAsm12B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -1597,96 +1554,6 @@ repeat_two_offset_match_repeat_encodeBlockAsm12B: JMP repeat_end_emit_encodeBlockAsm12B repeat_as_copy_encodeBlockAsm12B: - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeBlockAsm12B - -four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm12B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeBlockAsm12B - -emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy - CMPL DI, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy - CMPL SI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: - CMPL BP, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy - -repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy: - LEAL -256(BP), BP - MOVW $0x0019, (AX) - MOVW BP, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy: - LEAL -4(BP), BP - MOVW $0x0015, (AX) - MOVB BP, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy: - SHLL $0x02, BP - ORL $0x01, BP - MOVW BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BP*4), BP - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BP - MOVB BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm12B - JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm12B - -four_bytes_remain_repeat_as_copy_encodeBlockAsm12B: - TESTL BP, BP - JZ repeat_end_emit_encodeBlockAsm12B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm12B - two_byte_offset_repeat_as_copy_encodeBlockAsm12B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B @@ -1694,8 +1561,6 @@ two_byte_offset_repeat_as_copy_encodeBlockAsm12B: MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX - -emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 @@ -1708,28 +1573,6 @@ emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm12B_emit_copy_short - -repeat_five_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm12B - -repeat_four_repeat_as_copy_encodeBlockAsm12B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -1852,25 +1695,6 @@ match_dst_size_check_encodeBlockAsm12B: JLT one_byte_match_emit_encodeBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm12B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeBlockAsm12B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeBlockAsm12B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeBlockAsm12B - -four_bytes_match_emit_encodeBlockAsm12B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeBlockAsm12B - -three_bytes_match_emit_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -2053,95 +1877,6 @@ match_nolit_end_encodeBlockAsm12B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeBlockAsm12B - -four_bytes_loop_back_match_nolit_encodeBlockAsm12B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeBlockAsm12B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeBlockAsm12B - -emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm12B_emit_copy - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy - CMPL BP, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy - -cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: - CMPL R9, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy - -repeat_five_match_nolit_encodeBlockAsm12B_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - -repeat_four_match_nolit_encodeBlockAsm12B_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - -repeat_three_match_nolit_encodeBlockAsm12B_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - -repeat_two_match_nolit_encodeBlockAsm12B_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - -repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BP, 1(AX) - SARL $0x08, BP - SHLL $0x05, BP - ORL BP, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - JMP four_bytes_loop_back_match_nolit_encodeBlockAsm12B - -four_bytes_remain_match_nolit_encodeBlockAsm12B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeBlockAsm12B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B two_byte_offset_match_nolit_encodeBlockAsm12B: CMPL R9, $0x40 @@ -2150,8 +1885,6 @@ two_byte_offset_match_nolit_encodeBlockAsm12B: MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX - -emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 @@ -2164,28 +1897,6 @@ emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short: cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm12B_emit_copy_short - -repeat_five_match_nolit_encodeBlockAsm12B_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm12B - -repeat_four_match_nolit_encodeBlockAsm12B_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) @@ -2293,25 +2004,6 @@ emit_remainder_ok_encodeBlockAsm12B: JLT one_byte_emit_remainder_encodeBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm12B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBlockAsm12B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeBlockAsm12B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeBlockAsm12B - -four_bytes_emit_remainder_encodeBlockAsm12B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeBlockAsm12B - -three_bytes_emit_remainder_encodeBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -2553,25 +2245,6 @@ repeat_extend_back_end_encodeBlockAsm10B: JLT one_byte_repeat_emit_encodeBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm10B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeBlockAsm10B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeBlockAsm10B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeBlockAsm10B - -four_bytes_repeat_emit_encodeBlockAsm10B: - MOVL BP, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeBlockAsm10B - -three_bytes_repeat_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -2753,42 +2426,18 @@ repeat_extend_forward_end_encodeBlockAsm10B: MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm10B - -emit_repeat_again_match_repeat_encodeBlockAsm10B: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm10B - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm10B + MOVL BP, DI + LEAL -4(BP), BP + CMPL DI, $0x08 + JLE repeat_two_match_repeat_encodeBlockAsm10B + CMPL DI, $0x0c + JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm10B + CMPL SI, $0x00000800 + JLT repeat_two_offset_match_repeat_encodeBlockAsm10B cant_repeat_two_offset_match_repeat_encodeBlockAsm10B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm10B - CMPL BP, $0x00010100 - JLT repeat_four_match_repeat_encodeBlockAsm10B - CMPL BP, $0x0100ffff - JLT repeat_five_match_repeat_encodeBlockAsm10B - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_repeat_encodeBlockAsm10B - -repeat_five_match_repeat_encodeBlockAsm10B: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_four_match_repeat_encodeBlockAsm10B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -2821,96 +2470,6 @@ repeat_two_offset_match_repeat_encodeBlockAsm10B: JMP repeat_end_emit_encodeBlockAsm10B repeat_as_copy_encodeBlockAsm10B: - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeBlockAsm10B - -four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm10B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeBlockAsm10B - -emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy - CMPL DI, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy - CMPL SI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy: - CMPL BP, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy - -repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy: - LEAL -256(BP), BP - MOVW $0x0019, (AX) - MOVW BP, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy: - LEAL -4(BP), BP - MOVW $0x0015, (AX) - MOVB BP, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy: - SHLL $0x02, BP - ORL $0x01, BP - MOVW BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BP*4), BP - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BP - MOVB BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm10B - JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm10B - -four_bytes_remain_repeat_as_copy_encodeBlockAsm10B: - TESTL BP, BP - JZ repeat_end_emit_encodeBlockAsm10B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm10B - two_byte_offset_repeat_as_copy_encodeBlockAsm10B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B @@ -2918,8 +2477,6 @@ two_byte_offset_repeat_as_copy_encodeBlockAsm10B: MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX - -emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: MOVL BP, DI LEAL -4(BP), BP CMPL DI, $0x08 @@ -2932,28 +2489,6 @@ emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm10B_emit_copy_short - -repeat_five_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm10B - -repeat_four_repeat_as_copy_encodeBlockAsm10B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -3076,25 +2611,6 @@ match_dst_size_check_encodeBlockAsm10B: JLT one_byte_match_emit_encodeBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm10B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeBlockAsm10B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeBlockAsm10B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeBlockAsm10B - -four_bytes_match_emit_encodeBlockAsm10B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeBlockAsm10B - -three_bytes_match_emit_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -3277,95 +2793,6 @@ match_nolit_end_encodeBlockAsm10B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeBlockAsm10B - -four_bytes_loop_back_match_nolit_encodeBlockAsm10B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeBlockAsm10B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeBlockAsm10B - -emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm10B_emit_copy - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy - CMPL BP, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy - -cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy: - CMPL R9, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy - -repeat_five_match_nolit_encodeBlockAsm10B_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - -repeat_four_match_nolit_encodeBlockAsm10B_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - -repeat_three_match_nolit_encodeBlockAsm10B_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - -repeat_two_match_nolit_encodeBlockAsm10B_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - -repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BP, 1(AX) - SARL $0x08, BP - SHLL $0x05, BP - ORL BP, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - JMP four_bytes_loop_back_match_nolit_encodeBlockAsm10B - -four_bytes_remain_match_nolit_encodeBlockAsm10B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeBlockAsm10B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B two_byte_offset_match_nolit_encodeBlockAsm10B: CMPL R9, $0x40 @@ -3374,8 +2801,6 @@ two_byte_offset_match_nolit_encodeBlockAsm10B: MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX - -emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short: MOVL R9, SI LEAL -4(R9), R9 CMPL SI, $0x08 @@ -3388,28 +2813,6 @@ emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short: cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short: CMPL R9, $0x00000104 JLT repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm10B_emit_copy_short - -repeat_five_match_nolit_encodeBlockAsm10B_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm10B - -repeat_four_match_nolit_encodeBlockAsm10B_emit_copy_short: LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) @@ -3517,25 +2920,6 @@ emit_remainder_ok_encodeBlockAsm10B: JLT one_byte_emit_remainder_encodeBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm10B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBlockAsm10B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeBlockAsm10B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeBlockAsm10B - -four_bytes_emit_remainder_encodeBlockAsm10B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeBlockAsm10B - -three_bytes_emit_remainder_encodeBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -3777,25 +3161,6 @@ repeat_extend_back_end_encodeBlockAsm8B: JLT one_byte_repeat_emit_encodeBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeBlockAsm8B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeBlockAsm8B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeBlockAsm8B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeBlockAsm8B - -four_bytes_repeat_emit_encodeBlockAsm8B: - MOVL BP, R10 - SHRL $0x10, R10 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R10, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeBlockAsm8B - -three_bytes_repeat_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -3977,42 +3342,16 @@ repeat_extend_forward_end_encodeBlockAsm8B: MOVL 16(SP), SI TESTL DI, DI JZ repeat_as_copy_encodeBlockAsm8B - -emit_repeat_again_match_repeat_encodeBlockAsm8B: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_match_repeat_encodeBlockAsm8B - CMPL DI, $0x0c - JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B - CMPL SI, $0x00000800 - JLT repeat_two_offset_match_repeat_encodeBlockAsm8B + MOVL BP, SI + LEAL -4(BP), BP + CMPL SI, $0x08 + JLE repeat_two_match_repeat_encodeBlockAsm8B + CMPL SI, $0x0c + JGE cant_repeat_two_offset_match_repeat_encodeBlockAsm8B cant_repeat_two_offset_match_repeat_encodeBlockAsm8B: CMPL BP, $0x00000104 JLT repeat_three_match_repeat_encodeBlockAsm8B - CMPL BP, $0x00010100 - JLT repeat_four_match_repeat_encodeBlockAsm8B - CMPL BP, $0x0100ffff - JLT repeat_five_match_repeat_encodeBlockAsm8B - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_repeat_encodeBlockAsm8B - -repeat_five_match_repeat_encodeBlockAsm8B: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_four_match_repeat_encodeBlockAsm8B: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -4032,8 +3371,6 @@ repeat_two_match_repeat_encodeBlockAsm8B: MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - -repeat_two_offset_match_repeat_encodeBlockAsm8B: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) @@ -4045,96 +3382,6 @@ repeat_two_offset_match_repeat_encodeBlockAsm8B: JMP repeat_end_emit_encodeBlockAsm8B repeat_as_copy_encodeBlockAsm8B: - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeBlockAsm8B - -four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeBlockAsm8B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeBlockAsm8B - -emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy: - MOVL BP, DI - LEAL -4(BP), BP - CMPL DI, $0x08 - JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy - CMPL DI, $0x0c - JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy - CMPL SI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy - -cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy: - CMPL BP, $0x00000104 - JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy - -repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy: - LEAL -256(BP), BP - MOVW $0x0019, (AX) - MOVW BP, 2(AX) - ADDQ $0x04, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy: - LEAL -4(BP), BP - MOVW $0x0015, (AX) - MOVB BP, 2(AX) - ADDQ $0x03, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy: - SHLL $0x02, BP - ORL $0x01, BP - MOVW BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy: - XORQ DI, DI - LEAL 1(DI)(BP*4), BP - MOVB SI, 1(AX) - SARL $0x08, SI - SHLL $0x05, SI - ORL SI, BP - MOVB BP, (AX) - ADDQ $0x02, AX - JMP repeat_end_emit_encodeBlockAsm8B - JMP four_bytes_loop_back_repeat_as_copy_encodeBlockAsm8B - -four_bytes_remain_repeat_as_copy_encodeBlockAsm8B: - TESTL BP, BP - JZ repeat_end_emit_encodeBlockAsm8B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm8B - two_byte_offset_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x40 JLE two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B @@ -4142,42 +3389,16 @@ two_byte_offset_repeat_as_copy_encodeBlockAsm8B: MOVW SI, 1(AX) LEAL -60(BP), BP ADDQ $0x03, AX - -emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - MOVL BP, DI + MOVL BP, SI LEAL -4(BP), BP - CMPL DI, $0x08 + CMPL SI, $0x08 JLE repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL DI, $0x0c + CMPL SI, $0x0c JGE cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x00000800 - JLT repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: CMPL BP, $0x00000104 JLT repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL BP, $0x00010100 - JLT repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - CMPL BP, $0x0100ffff - JLT repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - LEAL -16842747(BP), BP - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_repeat_as_copy_encodeBlockAsm8B_emit_copy_short - -repeat_five_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: - LEAL -65536(BP), BP - MOVL BP, SI - MOVW $0x001d, (AX) - MOVW BP, 2(AX) - SARL $0x10, SI - MOVB SI, 4(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeBlockAsm8B - -repeat_four_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: LEAL -256(BP), BP MOVW $0x0019, (AX) MOVW BP, 2(AX) @@ -4197,8 +3418,6 @@ repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: MOVW BP, (AX) ADDQ $0x02, AX JMP repeat_end_emit_encodeBlockAsm8B - -repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: XORQ DI, DI LEAL 1(DI)(BP*4), BP MOVB SI, 1(AX) @@ -4213,8 +3432,6 @@ repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short: two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B - CMPL SI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) @@ -4300,25 +3517,6 @@ match_dst_size_check_encodeBlockAsm8B: JLT one_byte_match_emit_encodeBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeBlockAsm8B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeBlockAsm8B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeBlockAsm8B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeBlockAsm8B - -four_bytes_match_emit_encodeBlockAsm8B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeBlockAsm8B - -three_bytes_match_emit_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -4501,95 +3699,6 @@ match_nolit_end_encodeBlockAsm8B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeBlockAsm8B - -four_bytes_loop_back_match_nolit_encodeBlockAsm8B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeBlockAsm8B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeBlockAsm8B - -emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy - CMPL BP, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy - -cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy: - CMPL R9, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy - -repeat_five_match_nolit_encodeBlockAsm8B_emit_copy: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_four_match_nolit_encodeBlockAsm8B_emit_copy: - LEAL -256(R9), R9 - MOVW $0x0019, (AX) - MOVW R9, 2(AX) - ADDQ $0x04, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_three_match_nolit_encodeBlockAsm8B_emit_copy: - LEAL -4(R9), R9 - MOVW $0x0015, (AX) - MOVB R9, 2(AX) - ADDQ $0x03, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_two_match_nolit_encodeBlockAsm8B_emit_copy: - SHLL $0x02, R9 - ORL $0x01, R9 - MOVW R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy: - XORQ SI, SI - LEAL 1(SI)(R9*4), R9 - MOVB BP, 1(AX) - SARL $0x08, BP - SHLL $0x05, BP - ORL BP, R9 - MOVB R9, (AX) - ADDQ $0x02, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - JMP four_bytes_loop_back_match_nolit_encodeBlockAsm8B - -four_bytes_remain_match_nolit_encodeBlockAsm8B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeBlockAsm8B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B two_byte_offset_match_nolit_encodeBlockAsm8B: CMPL R9, $0x40 @@ -4598,42 +3707,16 @@ two_byte_offset_match_nolit_encodeBlockAsm8B: MOVW BP, 1(AX) LEAL -60(R9), R9 ADDQ $0x03, AX + MOVL R9, BP + LEAL -4(R9), R9 + CMPL BP, $0x08 + JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short + CMPL BP, $0x0c + JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short -emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short: - MOVL R9, SI - LEAL -4(R9), R9 - CMPL SI, $0x08 - JLE repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL SI, $0x0c - JGE cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL BP, $0x00000800 - JLT repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short - -cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: - CMPL R9, $0x00000104 - JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL R9, $0x00010100 - JLT repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short - CMPL R9, $0x0100ffff - JLT repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short - LEAL -16842747(R9), R9 - MOVW $0x001d, (AX) - MOVW $0xfffb, 2(AX) - MOVB $0xff, 4(AX) - ADDQ $0x05, AX - JMP emit_repeat_again_match_nolit_encodeBlockAsm8B_emit_copy_short - -repeat_five_match_nolit_encodeBlockAsm8B_emit_copy_short: - LEAL -65536(R9), R9 - MOVL R9, BP - MOVW $0x001d, (AX) - MOVW R9, 2(AX) - SARL $0x10, BP - MOVB BP, 4(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_four_match_nolit_encodeBlockAsm8B_emit_copy_short: +cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: + CMPL R9, $0x00000104 + JLT repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short LEAL -256(R9), R9 MOVW $0x0019, (AX) MOVW R9, 2(AX) @@ -4653,8 +3736,6 @@ repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short: MOVW R9, (AX) ADDQ $0x02, AX JMP match_nolit_emitcopy_end_encodeBlockAsm8B - -repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: XORQ SI, SI LEAL 1(SI)(R9*4), R9 MOVB BP, 1(AX) @@ -4669,8 +3750,6 @@ repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short: two_byte_offset_short_match_nolit_encodeBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeBlockAsm8B - CMPL BP, $0x00000800 - JGE emit_copy_three_match_nolit_encodeBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) @@ -4741,25 +3820,6 @@ emit_remainder_ok_encodeBlockAsm8B: JLT one_byte_emit_remainder_encodeBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeBlockAsm8B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeBlockAsm8B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeBlockAsm8B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeBlockAsm8B - -four_bytes_emit_remainder_encodeBlockAsm8B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeBlockAsm8B - -three_bytes_emit_remainder_encodeBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -5891,25 +4951,6 @@ repeat_extend_back_end_encodeSnappyBlockAsm12B: JLT one_byte_repeat_emit_encodeSnappyBlockAsm12B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm12B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeSnappyBlockAsm12B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeSnappyBlockAsm12B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B - -four_bytes_repeat_emit_encodeSnappyBlockAsm12B: - MOVL BP, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm12B - -three_bytes_repeat_emit_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -6089,29 +5130,6 @@ repeat_extend_forward_end_encodeSnappyBlockAsm12B: MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B - -four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B - JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm12B - -four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm12B: - TESTL BP, BP - JZ repeat_end_emit_encodeSnappyBlockAsm12B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeSnappyBlockAsm12B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B: CMPL BP, $0x40 @@ -6212,25 +5230,6 @@ match_dst_size_check_encodeSnappyBlockAsm12B: JLT one_byte_match_emit_encodeSnappyBlockAsm12B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeSnappyBlockAsm12B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeSnappyBlockAsm12B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm12B - -four_bytes_match_emit_encodeSnappyBlockAsm12B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm12B - -three_bytes_match_emit_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -6413,29 +5412,6 @@ match_nolit_end_encodeSnappyBlockAsm12B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeSnappyBlockAsm12B - -four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B - JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm12B - -four_bytes_remain_match_nolit_encodeSnappyBlockAsm12B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm12B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm12B two_byte_offset_match_nolit_encodeSnappyBlockAsm12B: CMPL R9, $0x40 @@ -6521,25 +5497,6 @@ emit_remainder_ok_encodeSnappyBlockAsm12B: JLT one_byte_emit_remainder_encodeSnappyBlockAsm12B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm12B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeSnappyBlockAsm12B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeSnappyBlockAsm12B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B - -four_bytes_emit_remainder_encodeSnappyBlockAsm12B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm12B - -three_bytes_emit_remainder_encodeSnappyBlockAsm12B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -6781,25 +5738,6 @@ repeat_extend_back_end_encodeSnappyBlockAsm10B: JLT one_byte_repeat_emit_encodeSnappyBlockAsm10B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm10B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeSnappyBlockAsm10B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeSnappyBlockAsm10B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B - -four_bytes_repeat_emit_encodeSnappyBlockAsm10B: - MOVL BP, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm10B - -three_bytes_repeat_emit_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -6979,29 +5917,6 @@ repeat_extend_forward_end_encodeSnappyBlockAsm10B: MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B - -four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B - JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm10B - -four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm10B: - TESTL BP, BP - JZ repeat_end_emit_encodeSnappyBlockAsm10B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeSnappyBlockAsm10B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B: CMPL BP, $0x40 @@ -7102,25 +6017,6 @@ match_dst_size_check_encodeSnappyBlockAsm10B: JLT one_byte_match_emit_encodeSnappyBlockAsm10B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeSnappyBlockAsm10B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeSnappyBlockAsm10B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm10B - -four_bytes_match_emit_encodeSnappyBlockAsm10B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm10B - -three_bytes_match_emit_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -7303,29 +6199,6 @@ match_nolit_end_encodeSnappyBlockAsm10B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeSnappyBlockAsm10B - -four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B - JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm10B - -four_bytes_remain_match_nolit_encodeSnappyBlockAsm10B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm10B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm10B two_byte_offset_match_nolit_encodeSnappyBlockAsm10B: CMPL R9, $0x40 @@ -7411,25 +6284,6 @@ emit_remainder_ok_encodeSnappyBlockAsm10B: JLT one_byte_emit_remainder_encodeSnappyBlockAsm10B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm10B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeSnappyBlockAsm10B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeSnappyBlockAsm10B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B - -four_bytes_emit_remainder_encodeSnappyBlockAsm10B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm10B - -three_bytes_emit_remainder_encodeSnappyBlockAsm10B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX @@ -7671,25 +6525,6 @@ repeat_extend_back_end_encodeSnappyBlockAsm8B: JLT one_byte_repeat_emit_encodeSnappyBlockAsm8B CMPL BP, $0x00000100 JLT two_bytes_repeat_emit_encodeSnappyBlockAsm8B - CMPL BP, $0x00010000 - JLT three_bytes_repeat_emit_encodeSnappyBlockAsm8B - CMPL BP, $0x01000000 - JLT four_bytes_repeat_emit_encodeSnappyBlockAsm8B - MOVB $0xfc, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B - -four_bytes_repeat_emit_encodeSnappyBlockAsm8B: - MOVL BP, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW BP, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_repeat_emit_encodeSnappyBlockAsm8B - -three_bytes_repeat_emit_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW BP, 1(AX) ADDQ $0x03, AX @@ -7869,29 +6704,6 @@ repeat_extend_forward_end_encodeSnappyBlockAsm8B: MOVL CX, BP SUBL SI, BP MOVL 16(SP), SI - CMPL SI, $0x00010000 - JL two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B - -four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B: - CMPL BP, $0x40 - JLE four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B - MOVB $0xff, (AX) - MOVL SI, 1(AX) - LEAL -64(BP), BP - ADDQ $0x05, AX - CMPL BP, $0x04 - JL four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B - JMP four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm8B - -four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm8B: - TESTL BP, BP - JZ repeat_end_emit_encodeSnappyBlockAsm8B - MOVB $0x03, BL - LEAL -4(BX)(BP*4), BP - MOVB BP, (AX) - MOVL SI, 1(AX) - ADDQ $0x05, AX - JMP repeat_end_emit_encodeSnappyBlockAsm8B two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x40 @@ -7905,8 +6717,6 @@ two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B: two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B: CMPL BP, $0x0c JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B - CMPL SI, $0x00000800 - JGE emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(BP*4), BP MOVB SI, 1(AX) @@ -7992,25 +6802,6 @@ match_dst_size_check_encodeSnappyBlockAsm8B: JLT one_byte_match_emit_encodeSnappyBlockAsm8B CMPL DI, $0x00000100 JLT two_bytes_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x00010000 - JLT three_bytes_match_emit_encodeSnappyBlockAsm8B - CMPL DI, $0x01000000 - JLT four_bytes_match_emit_encodeSnappyBlockAsm8B - MOVB $0xfc, (AX) - MOVL DI, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm8B - -four_bytes_match_emit_encodeSnappyBlockAsm8B: - MOVL DI, R9 - SHRL $0x10, R9 - MOVB $0xf8, (AX) - MOVW DI, 1(AX) - MOVB R9, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_match_emit_encodeSnappyBlockAsm8B - -three_bytes_match_emit_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW DI, 1(AX) ADDQ $0x03, AX @@ -8193,29 +6984,6 @@ match_nolit_end_encodeSnappyBlockAsm8B: MOVL 16(SP), BP ADDL $0x04, R9 MOVL CX, 12(SP) - CMPL BP, $0x00010000 - JL two_byte_offset_match_nolit_encodeSnappyBlockAsm8B - -four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B: - CMPL R9, $0x40 - JLE four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B - MOVB $0xff, (AX) - MOVL BP, 1(AX) - LEAL -64(R9), R9 - ADDQ $0x05, AX - CMPL R9, $0x04 - JL four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B - JMP four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm8B - -four_bytes_remain_match_nolit_encodeSnappyBlockAsm8B: - TESTL R9, R9 - JZ match_nolit_emitcopy_end_encodeSnappyBlockAsm8B - MOVB $0x03, BL - LEAL -4(BX)(R9*4), R9 - MOVB R9, (AX) - MOVL BP, 1(AX) - ADDQ $0x05, AX - JMP match_nolit_emitcopy_end_encodeSnappyBlockAsm8B two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x40 @@ -8229,8 +6997,6 @@ two_byte_offset_match_nolit_encodeSnappyBlockAsm8B: two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B: CMPL R9, $0x0c JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B - CMPL BP, $0x00000800 - JGE emit_copy_three_match_nolit_encodeSnappyBlockAsm8B MOVB $0x01, BL LEAL -16(BX)(R9*4), R9 MOVB BP, 1(AX) @@ -8301,25 +7067,6 @@ emit_remainder_ok_encodeSnappyBlockAsm8B: JLT one_byte_emit_remainder_encodeSnappyBlockAsm8B CMPL DX, $0x00000100 JLT two_bytes_emit_remainder_encodeSnappyBlockAsm8B - CMPL DX, $0x00010000 - JLT three_bytes_emit_remainder_encodeSnappyBlockAsm8B - CMPL DX, $0x01000000 - JLT four_bytes_emit_remainder_encodeSnappyBlockAsm8B - MOVB $0xfc, (AX) - MOVL DX, 1(AX) - ADDQ $0x05, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B - -four_bytes_emit_remainder_encodeSnappyBlockAsm8B: - MOVL DX, BX - SHRL $0x10, BX - MOVB $0xf8, (AX) - MOVW DX, 1(AX) - MOVB BL, 3(AX) - ADDQ $0x04, AX - JMP memmove_long_emit_remainder_encodeSnappyBlockAsm8B - -three_bytes_emit_remainder_encodeSnappyBlockAsm8B: MOVB $0xf4, (AX) MOVW DX, 1(AX) ADDQ $0x03, AX