From 8b3b1751198e83cedca96aa5203b697cd85a0f01 Mon Sep 17 00:00:00 2001 From: Klaus Post Date: Tue, 22 Jun 2021 10:15:17 +0200 Subject: [PATCH] zstd: Simplify hashing functions Inlining now works properly with constant folding. --- zstd/enc_best.go | 39 +++++++++++++++------------- zstd/enc_better.go | 64 ++++++++++++++++++++++++---------------------- zstd/enc_dfast.go | 61 ++++++++++++++++++++++--------------------- zstd/enc_fast.go | 37 ++++++++++++++------------- zstd/hash.go | 60 +++++++++---------------------------------- 5 files changed, 117 insertions(+), 144 deletions(-) diff --git a/zstd/enc_best.go b/zstd/enc_best.go index b7d4b90047..35ab09fc1f 100644 --- a/zstd/enc_best.go +++ b/zstd/enc_best.go @@ -12,6 +12,7 @@ import ( const ( bestLongTableBits = 20 // Bits used in the long match table bestLongTableSize = 1 << bestLongTableBits // Size of the table + bestLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the @@ -19,6 +20,8 @@ const ( // This greatly depends on the type of input. bestShortTableBits = 16 // Bits used in the short match table bestShortTableSize = 1 << bestShortTableBits // Size of the table + bestShortLen = 4 // Bytes used for table hash + ) // bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. @@ -174,8 +177,8 @@ encodeLoop: } const goodEnough = 100 - nextHashL := hash8(cv, bestLongTableBits) - nextHashS := hash4x64(cv, bestShortTableBits) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -209,11 +212,11 @@ encodeLoop: } s++ - candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)] + candidateS = e.table[hashLen(cv>>8, bestShortTableBits, bestShortLen)] cv = load6432(src, s) cv2 := load6432(src, s+1) - candidateL = e.longTable[hash8(cv, bestLongTableBits)] - candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)] + candidateL = e.longTable[hashLen(cv, bestLongTableBits, bestLongLen)] + candidateL2 := e.longTable[hashLen(cv2, bestLongTableBits, bestLongLen)] best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1)) best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1)) @@ -224,7 +227,7 @@ encodeLoop: // See if we can find a better match by checking where the current best ends. // Use that offset to see if we can find a better full match. if sAt := best.s + best.length; sAt < sLimit { - nextHashL := hash8(load6432(src, sAt), bestLongTableBits) + nextHashL := hashLen(load6432(src, sAt), bestLongTableBits, bestLongLen) candidateEnd := e.longTable[nextHashL] if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 { bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1)) @@ -284,8 +287,8 @@ encodeLoop: off := index0 + e.cur for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} off++ @@ -352,8 +355,8 @@ encodeLoop: // every entry for index0 < s-1 { cv0 := load6432(src, index0) - h0 := hash8(cv0, bestLongTableBits) - h1 := hash4x64(cv0, bestShortTableBits) + h0 := hashLen(cv0, bestLongTableBits, bestLongLen) + h1 := hashLen(cv0, bestShortTableBits, bestShortLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset} @@ -374,8 +377,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash4x64(cv, bestShortTableBits) - nextHashL := hash8(cv, bestLongTableBits) + nextHashS := hashLen(cv, bestShortTableBits, bestShortLen) + nextHashL := hashLen(cv, bestLongTableBits, bestLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -441,10 +444,10 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { const hashLog = bestShortTableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash4x64(cv, hashLog) // 0 -> 4 - nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5 - nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6 - nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7 + nextHash := hashLen(cv, hashLog, bestShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, bestShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, bestShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, bestShortLen) // 3 -> 7 e.dictTable[nextHash] = prevEntry{ prev: e.dictTable[nextHash].offset, offset: i, @@ -472,7 +475,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: e.maxMatchOff, prev: e.dictLongTable[h].offset, @@ -482,7 +485,7 @@ func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) { off := 8 // First to read for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hash8(cv, bestLongTableBits) + h := hashLen(cv, bestLongTableBits, bestLongLen) e.dictLongTable[h] = prevEntry{ offset: i, prev: e.dictLongTable[h].offset, diff --git a/zstd/enc_better.go b/zstd/enc_better.go index eab7b5083e..602c05ee0c 100644 --- a/zstd/enc_better.go +++ b/zstd/enc_better.go @@ -9,6 +9,7 @@ import "fmt" const ( betterLongTableBits = 19 // Bits used in the long match table betterLongTableSize = 1 << betterLongTableBits // Size of the table + betterLongLen = 8 // Bytes used for table hash // Note: Increasing the short table bits or making the hash shorter // can actually lead to compression degradation since it will 'steal' more from the @@ -16,6 +17,7 @@ const ( // This greatly depends on the type of input. betterShortTableBits = 13 // Bits used in the short match table betterShortTableSize = 1 << betterShortTableBits // Size of the table + betterShortLen = 5 // Bytes used for table hash betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard @@ -154,8 +156,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -214,10 +216,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -275,10 +277,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } cv = load6432(src, s) @@ -353,7 +355,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, betterLongTableBits) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) candidateL = e.longTable[nextHashL] coffsetL = candidateL.offset - e.cur @@ -413,8 +415,8 @@ encodeLoop: } // Try to find a better match by searching for a long match at the end of the current best match - if true && s+matched < sLimit { - nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + if s+matched < sLimit { + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) cv := load3232(src, s) candidateL := e.longTable[nextHashL] coffsetL := candidateL.offset - e.cur - matched @@ -495,10 +497,10 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} - e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + e.table[hashLen(cv1, betterShortTableBits, betterShortLen)] = tableEntry{offset: off + 1, val: uint32(cv1)} index0 += 2 } @@ -516,8 +518,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -672,8 +674,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -734,11 +736,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -798,11 +800,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -879,7 +881,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, betterLongTableBits) + nextHashL = hashLen(cv, betterLongTableBits, betterLongLen) candidateL = e.longTable[nextHashL] coffsetL = candidateL.offset - e.cur @@ -940,7 +942,7 @@ encodeLoop: } // Try to find a better match by searching for a long match at the end of the current best match if s+matched < sLimit { - nextHashL := hash8(load6432(src, s+matched), betterLongTableBits) + nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen) cv := load3232(src, s) candidateL := e.longTable[nextHashL] coffsetL := candidateL.offset - e.cur - matched @@ -1021,11 +1023,11 @@ encodeLoop: for index0 < s-1 { cv0 := load6432(src, index0) cv1 := cv0 >> 8 - h0 := hash8(cv0, betterLongTableBits) + h0 := hashLen(cv0, betterLongTableBits, betterLongLen) off := index0 + e.cur e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} e.markLongShardDirty(h0) - h1 := hash5(cv1, betterShortTableBits) + h1 := hashLen(cv1, betterShortTableBits, betterShortLen) e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)} e.markShortShardDirty(h1) index0 += 2 @@ -1045,8 +1047,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, betterShortTableBits) - nextHashL := hash8(cv, betterLongTableBits) + nextHashS := hashLen(cv, betterShortTableBits, betterShortLen) + nextHashL := hashLen(cv, betterLongTableBits, betterLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -1113,10 +1115,10 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { const hashLog = betterShortTableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash5(cv, hashLog) // 0 -> 4 - nextHash1 := hash5(cv>>8, hashLog) // 1 -> 5 - nextHash2 := hash5(cv>>16, hashLog) // 2 -> 6 - nextHash3 := hash5(cv>>24, hashLog) // 3 -> 7 + nextHash := hashLen(cv, hashLog, betterShortLen) // 0 -> 4 + nextHash1 := hashLen(cv>>8, hashLog, betterShortLen) // 1 -> 5 + nextHash2 := hashLen(cv>>16, hashLog, betterShortLen) // 2 -> 6 + nextHash3 := hashLen(cv>>24, hashLog, betterShortLen) // 3 -> 7 e.dictTable[nextHash] = tableEntry{ val: uint32(cv), offset: i, @@ -1145,7 +1147,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - h := hash8(cv, betterLongTableBits) + h := hashLen(cv, betterLongTableBits, betterLongLen) e.dictLongTable[h] = prevEntry{ offset: e.maxMatchOff, prev: e.dictLongTable[h].offset, @@ -1155,7 +1157,7 @@ func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) { off := 8 // First to read for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[off]) << 56) - h := hash8(cv, betterLongTableBits) + h := hashLen(cv, betterLongTableBits, betterLongLen) e.dictLongTable[h] = prevEntry{ offset: i, prev: e.dictLongTable[h].offset, diff --git a/zstd/enc_dfast.go b/zstd/enc_dfast.go index 96b21b90e8..d6b3104240 100644 --- a/zstd/enc_dfast.go +++ b/zstd/enc_dfast.go @@ -10,6 +10,7 @@ const ( dFastLongTableBits = 17 // Bits used in the long match table dFastLongTableSize = 1 << dFastLongTableBits // Size of the table dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastLongLen = 8 // Bytes used for table hash dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard @@ -17,6 +18,8 @@ const ( dFastShortTableBits = tableBits // Bits used in the short match table dFastShortTableSize = 1 << dFastShortTableBits // Size of the table dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + dFastShortLen = 5 // Bytes used for table hash + ) type doubleFastEncoder struct { @@ -124,8 +127,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -208,7 +211,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -304,16 +307,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -330,8 +333,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -436,8 +439,8 @@ encodeLoop: var t int32 for { - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -521,7 +524,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -614,16 +617,16 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - e.longTable[hash8(cv0, dFastLongTableBits)] = te0 - e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + e.longTable[hashLen(cv0, dFastLongTableBits, dFastLongLen)] = te0 + e.longTable[hashLen(cv1, dFastLongTableBits, dFastLongLen)] = te1 cv0 >>= 8 cv1 >>= 8 te0.offset++ te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - e.table[hash5(cv0, dFastShortTableBits)] = te0 - e.table[hash5(cv1, dFastShortTableBits)] = te1 + e.table[hashLen(cv0, dFastShortTableBits, dFastShortLen)] = te0 + e.table[hashLen(cv1, dFastShortTableBits, dFastShortLen)] = te1 cv = load6432(src, s) @@ -640,8 +643,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv1>>8, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv1>>8, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -782,8 +785,8 @@ encodeLoop: panic("offset0 was 0") } - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL := e.longTable[nextHashL] candidateS := e.table[nextHashS] @@ -868,7 +871,7 @@ encodeLoop: // See if we can find a long match at s+1 const checkAt = 1 cv := load6432(src, s+checkAt) - nextHashL = hash8(cv, dFastLongTableBits) + nextHashL = hashLen(cv, dFastLongTableBits, dFastLongLen) candidateL = e.longTable[nextHashL] coffsetL = s - (candidateL.offset - e.cur) + checkAt @@ -965,8 +968,8 @@ encodeLoop: cv1 := load6432(src, index1) te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} - longHash1 := hash8(cv0, dFastLongTableBits) - longHash2 := hash8(cv0, dFastLongTableBits) + longHash1 := hashLen(cv0, dFastLongTableBits, dFastLongLen) + longHash2 := hashLen(cv0, dFastLongTableBits, dFastLongLen) e.longTable[longHash1] = te0 e.longTable[longHash2] = te1 e.markLongShardDirty(longHash1) @@ -977,8 +980,8 @@ encodeLoop: te1.offset++ te0.val = uint32(cv0) te1.val = uint32(cv1) - hashVal1 := hash5(cv0, dFastShortTableBits) - hashVal2 := hash5(cv1, dFastShortTableBits) + hashVal1 := hashLen(cv0, dFastShortTableBits, dFastShortLen) + hashVal2 := hashLen(cv1, dFastShortTableBits, dFastShortLen) e.table[hashVal1] = te0 e.markShardDirty(hashVal1) e.table[hashVal2] = te1 @@ -999,8 +1002,8 @@ encodeLoop: } // Store this, since we have it. - nextHashS := hash5(cv, dFastShortTableBits) - nextHashL := hash8(cv, dFastLongTableBits) + nextHashS := hashLen(cv, dFastShortTableBits, dFastShortLen) + nextHashL := hashLen(cv, dFastLongTableBits, dFastLongLen) // We have at least 4 byte match. // No need to check backwards. We come straight from a match @@ -1071,14 +1074,14 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) { } if len(d.content) >= 8 { cv := load6432(d.content, 0) - e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{ + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ val: uint32(cv), offset: e.maxMatchOff, } end := int32(len(d.content)) - 8 + e.maxMatchOff for i := e.maxMatchOff + 1; i < end; i++ { cv = cv>>8 | (uint64(d.content[i-e.maxMatchOff+7]) << 56) - e.dictLongTable[hash8(cv, dFastLongTableBits)] = tableEntry{ + e.dictLongTable[hashLen(cv, dFastLongTableBits, dFastLongLen)] = tableEntry{ val: uint32(cv), offset: i, } diff --git a/zstd/enc_fast.go b/zstd/enc_fast.go index 2246d286dc..f2502629bc 100644 --- a/zstd/enc_fast.go +++ b/zstd/enc_fast.go @@ -11,12 +11,13 @@ import ( ) const ( - tableBits = 15 // Bits used in the table - tableSize = 1 << tableBits // Size of the table - tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table - tableShardSize = tableSize / tableShardCnt // Size of an individual shard - tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. - maxMatchLength = 131074 + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table + tableShardSize = tableSize / tableShardCnt // Size of an individual shard + tableFastHashLen = 6 + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + maxMatchLength = 131074 ) type tableEntry struct { @@ -122,8 +123,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -301,7 +302,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -405,8 +406,8 @@ encodeLoop: // By not using them for the first 3 matches for { - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -589,7 +590,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} seq.matchLen = uint32(l) - zstdMinMatch seq.litLen = 0 @@ -715,8 +716,8 @@ encodeLoop: panic("offset0 was 0") } - nextHash := hash6(cv, hashLog) - nextHash2 := hash6(cv>>8, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) + nextHash2 := hashLen(cv>>8, hashLog, tableFastHashLen) candidate := e.table[nextHash] candidate2 := e.table[nextHash2] repIndex := s - offset1 + 2 @@ -896,7 +897,7 @@ encodeLoop: } // Store this, since we have it. - nextHash := hash6(cv, hashLog) + nextHash := hashLen(cv, hashLog, tableFastHashLen) e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} e.markShardDirty(nextHash) seq.matchLen = uint32(l) - zstdMinMatch @@ -957,9 +958,9 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) { const hashLog = tableBits cv := load6432(d.content, i-e.maxMatchOff) - nextHash := hash6(cv, hashLog) // 0 -> 5 - nextHash1 := hash6(cv>>8, hashLog) // 1 -> 6 - nextHash2 := hash6(cv>>16, hashLog) // 2 -> 7 + nextHash := hashLen(cv, hashLog, tableFastHashLen) // 0 -> 5 + nextHash1 := hashLen(cv>>8, hashLog, tableFastHashLen) // 1 -> 6 + nextHash2 := hashLen(cv>>16, hashLog, tableFastHashLen) // 2 -> 7 e.dictTable[nextHash] = tableEntry{ val: uint32(cv), offset: i, diff --git a/zstd/hash.go b/zstd/hash.go index 4a752067fc..cf33f29a1b 100644 --- a/zstd/hash.go +++ b/zstd/hash.go @@ -13,24 +13,24 @@ const ( prime8bytes = 0xcf1bbcdcb7a56463 ) -// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes. -// l must be >=4 and <=8. Any other value will return hash for 4 bytes. -// h should always be <32. -// Preferably h and l should be a constant. -// FIXME: This does NOT get resolved, if 'mls' is constant, -// so this cannot be used. -func hashLen(u uint64, hashLog, mls uint8) uint32 { +// hashLen returns a hash of the lowest mls bytes of with length output bits. +// mls must be >=3 and <=8. Any other value will return hash for 4 bytes. +// length should always be < 32. +// Preferably length and mls should be a constant for inlining. +func hashLen(u uint64, length, mls uint8) uint32 { switch mls { + case 3: + return (uint32(u<<8) * prime3bytes) >> (32 - length) case 5: - return hash5(u, hashLog) + return uint32(((u << (64 - 40)) * prime5bytes) >> (64 - length)) case 6: - return hash6(u, hashLog) + return uint32(((u << (64 - 48)) * prime6bytes) >> (64 - length)) case 7: - return hash7(u, hashLog) + return uint32(((u << (64 - 56)) * prime7bytes) >> (64 - length)) case 8: - return hash8(u, hashLog) + return uint32((u * prime8bytes) >> (64 - length)) default: - return hash4x64(u, hashLog) + return (uint32(u) * prime4bytes) >> (32 - length) } } @@ -39,39 +39,3 @@ func hashLen(u uint64, hashLog, mls uint8) uint32 { func hash3(u uint32, h uint8) uint32 { return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) } - -// hash4 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4(u uint32, h uint8) uint32 { - return (u * prime4bytes) >> ((32 - h) & 31) -} - -// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <32. -func hash4x64(u uint64, h uint8) uint32 { - return (uint32(u) * prime4bytes) >> ((32 - h) & 31) -} - -// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash5(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) -} - -// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash6(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) -} - -// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash7(u uint64, h uint8) uint32 { - return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) -} - -// hash8 returns the hash of u to fit in a hash table with h bits. -// Preferably h should be a constant and should always be <64. -func hash8(u uint64, h uint8) uint32 { - return uint32((u * prime8bytes) >> ((64 - h) & 63)) -}