From 463e7df6ad9504b38960d0a7296d537792ed3e87 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Thu, 8 Sep 2022 14:30:14 +0200 Subject: [PATCH] s2: Improve "best" compression (#658) Allow skipping bytes in the beginning. Since we already have pretty good matches we aren't too likely to find better. Improvement in the 0.0x%->0.10 range. --- s2/encode_best.go | 27 ++++++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/s2/encode_best.go b/s2/encode_best.go index 61eb73b2dd..1b7ea394fa 100644 --- a/s2/encode_best.go +++ b/s2/encode_best.go @@ -177,14 +177,21 @@ func encodeBlockBest(dst, src []byte) (d int) { best = bestOf(best, matchAt(getPrev(nextLong), s, uint32(cv), false)) } // Search for a match at best match end, see if that is better. - if sAt := best.s + best.length; sAt < sLimit { - sBack := best.s - backL := best.length + // Allow some bytes at the beginning to mismatch. + // Sweet spot is around 1-2 bytes, but depends on input. + // The skipped bytes are tested in Extend backwards, + // and still picked up as part of the match if they do. + const skipBeginning = 2 + const skipEnd = 1 + if sAt := best.s + best.length - skipEnd; sAt < sLimit { + + sBack := best.s + skipBeginning - skipEnd + backL := best.length - skipBeginning // Load initial values cv = load64(src, sBack) - // Search for mismatch + + // Grab candidates... next := lTable[hash8(load64(src, sAt), lTableBits)] - //next := sTable[hash4(load64(src, sAt), sTableBits)] if checkAt := getCur(next) - backL; checkAt > 0 { best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) @@ -192,6 +199,16 @@ func encodeBlockBest(dst, src []byte) (d int) { if checkAt := getPrev(next) - backL; checkAt > 0 { best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) } + // Disabled: Extremely small gain + if false { + next = sTable[hash4(load64(src, sAt), sTableBits)] + if checkAt := getCur(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + if checkAt := getPrev(next) - backL; checkAt > 0 { + best = bestOf(best, matchAt(checkAt, sBack, uint32(cv), false)) + } + } } } }