forked from rclone/rclone
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allows to compress short arbitrary strings and returns a string using base64 url encoding. Generator for tables included and a few samples has been added. Add more to init.go Tested with fuzzing for crash resistance and symmetry, see fuzz.go Requires klauspost/compress#275 - until release a replace has been added to go.mod
- Loading branch information
Showing
8 changed files
with
369 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,4 @@ rclone.iml | |
*.test | ||
*.log | ||
*.iml | ||
fuzz-build.zip |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
package filename | ||
|
||
import ( | ||
"bytes" | ||
"encoding/base64" | ||
"encoding/binary" | ||
"errors" | ||
"sync" | ||
|
||
"github.com/klauspost/compress/huff0" | ||
) | ||
|
||
// ErrCorrupted is returned if a provided encoded filename cannot be decoded. | ||
var ErrCorrupted = errors.New("file name corrupt") | ||
|
||
// ErrUnsupported is returned if a provided encoding may come from a future version or the file name is corrupt. | ||
var ErrUnsupported = errors.New("file name possibly generated by future version of rclone") | ||
|
||
// Custom decoder for tableCustom types. Stateful, so must have lock. | ||
var customDec huff0.Scratch | ||
var customDecMu sync.Mutex | ||
|
||
// Decode an encoded string. | ||
func Decode(s string) (string, error) { | ||
if len(s) < 1 { | ||
return "", ErrCorrupted | ||
} | ||
table := decodeMap[s[0]] | ||
if table == 0 { | ||
return "", ErrCorrupted | ||
} | ||
table-- | ||
s = s[1:] | ||
|
||
data := make([]byte, base64.URLEncoding.DecodedLen(len(s))) | ||
n, err := base64.URLEncoding.Decode(data, ([]byte)(s)) | ||
if err != nil || n < 0 { | ||
return "", ErrCorrupted | ||
} | ||
data = data[:n] | ||
|
||
switch table { | ||
case tableUncompressed: | ||
return string(data), nil | ||
case tableReserved: | ||
return "", ErrUnsupported | ||
case tableRLE: | ||
if len(data) < 2 { | ||
return "", ErrCorrupted | ||
} | ||
n, used := binary.Uvarint(data[:len(data)-1]) | ||
if used <= 0 || n > maxLength { | ||
return "", ErrCorrupted | ||
} | ||
return string(bytes.Repeat(data[len(data)-1:], int(n))), nil | ||
case tableCustom: | ||
customDecMu.Lock() | ||
defer customDecMu.Unlock() | ||
_, data, err := huff0.ReadTable(data, &customDec) | ||
if err != nil { | ||
return "", ErrCorrupted | ||
} | ||
customDec.MaxDecodedSize = maxLength | ||
decoded, err := customDec.Decompress1X(data) | ||
if err != nil { | ||
return "", ErrCorrupted | ||
} | ||
return string(decoded), nil | ||
default: | ||
if table >= byte(len(decTables)) { | ||
return "", ErrCorrupted | ||
} | ||
dec := decTables[table] | ||
if dec == nil { | ||
return "", ErrUnsupported | ||
} | ||
var dst [maxLength]byte | ||
name, err := dec.Decompress1X(dst[:0], data) | ||
if err != nil { | ||
return "", ErrCorrupted | ||
} | ||
return string(name), nil | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package filename | ||
|
||
import ( | ||
"encoding/base64" | ||
"encoding/binary" | ||
|
||
"github.com/klauspost/compress/huff0" | ||
) | ||
|
||
// Encode will encode the string and return a base64 (url) compatible version of it. | ||
// Calling Decode with the returned string should always succeed. | ||
// It is not a requirement that the input string is valid utf-8. | ||
func Encode(s string) string { | ||
initCoders() | ||
bestSize := len(s) | ||
bestTable := tableUncompressed | ||
org := []byte(s) | ||
bestOut := []byte(s) | ||
|
||
// Try all tables and choose the best | ||
for i, enc := range encTables[:] { | ||
if len(org) <= 1 || len(org) > maxLength { | ||
// Use the uncompressed | ||
break | ||
} | ||
if enc == nil { | ||
continue | ||
} | ||
// Try to encode using table. | ||
err := func() error { | ||
encTableLocks[i].Lock() | ||
defer encTableLocks[i].Unlock() | ||
out, _, err := huff0.Compress1X(org, enc) | ||
if err != nil { | ||
return err | ||
} | ||
if len(out) < bestSize { | ||
bestOut = bestOut[:len(out)] | ||
bestTable = i | ||
bestSize = len(out) | ||
copy(bestOut, out) | ||
} | ||
return nil | ||
}() | ||
// If input is a single byte repeated store as RLE or save uncompressed. | ||
if err == huff0.ErrUseRLE { | ||
if len(org) > 2 { | ||
// Encode as one byte repeated since it will be smaller than uncompressed. | ||
n := binary.PutUvarint(bestOut, uint64(len(org))) | ||
bestOut = bestOut[:n+1] | ||
bestOut[n] = org[0] | ||
bestSize = n + 1 | ||
bestTable = tableRLE | ||
} | ||
break | ||
} | ||
} | ||
|
||
return string(encodeURL[bestTable]) + base64.URLEncoding.EncodeToString(bestOut) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
//+build gofuzz | ||
|
||
package filename | ||
|
||
import ( | ||
"bytes" | ||
"fmt" | ||
) | ||
|
||
// Run like: | ||
// go-fuzz-build -o=fuzz-build.zip -func=Fuzz . && go-fuzz -minimize=5s -bin=fuzz-build.zip -workdir=testdata/corpus -procs=24 | ||
|
||
// Fuzz test the provided input. | ||
func Fuzz(data []byte) int { | ||
// First try to decode as is. | ||
// We don't care about the result, it just shouldn't crash. | ||
Decode(string(data)) | ||
|
||
// Now encode | ||
enc := Encode(string(data)) | ||
|
||
// And decoded must match | ||
decoded, err := Decode(enc) | ||
if err != nil { | ||
panic(fmt.Sprintf("error decoding %q, input %q: %v", enc, string(data), err)) | ||
} | ||
if !bytes.Equal(data, []byte(decoded)) { | ||
panic(fmt.Sprintf("decode mismatch, encoded: %q, org: %q, got: %q", enc, string(data), decoded)) | ||
} | ||
|
||
// Everything is good. | ||
return 1 | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
//+build ignore | ||
|
||
package main | ||
|
||
import ( | ||
"encoding/base64" | ||
"fmt" | ||
"math" | ||
|
||
"github.com/klauspost/compress" | ||
"github.com/klauspost/compress/huff0" | ||
) | ||
|
||
// Replace/add histogram data and execute go run gentable.go | ||
|
||
func main() { | ||
// Allow non-represented characters. | ||
const omitUnused = false | ||
|
||
histogram := [256]uint64{ | ||
// ncw home directory | ||
//0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 19442, 760, 0, 349, 570, 1520, 199, 76, 685, 654, 0, 40377, 1605, 395132, 935270, 0, 1156377, 887730, 811737, 712241, 693240, 689139, 675964, 656417, 666577, 657413, 532, 24, 0, 145, 0, 3, 946, 44932, 37362, 46126, 36752, 76346, 19338, 47457, 14288, 38163, 4350, 7867, 36541, 65011, 30255, 26792, 22097, 1803, 39191, 61965, 76585, 11887, 12896, 5931, 1935, 1731, 1385, 1279, 9, 1278, 1, 420185, 0, 1146359, 746359, 968896, 868703, 1393640, 745019, 354147, 159462, 483979, 169092, 75937, 385858, 322166, 466635, 571268, 447132, 13792, 446484, 736844, 732675, 170232, 112983, 63184, 142357, 173945, 21521, 250, 0, 250, 4140, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 39, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 4, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 15, 0, 0, 0, 10, 0, 5, 0, 0, 0, 0, 0, 0, 283, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
//Images: | ||
//0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 765, 0, 0, 0, 0, 0, 8, 7, 3, 3, 0, 29, 53, 247265, 83587, 0, 265952, 233552, 229781, 71156, 78374, 65141, 46152, 43767, 55603, 39411, 0, 0, 0, 0, 0, 88, 84, 141, 70, 222, 191, 51, 52, 101, 60, 53, 23, 17, 49, 93, 53, 17, 92, 0, 158, 109, 41, 19, 43, 28, 10, 5, 1, 0, 0, 0, 0, 879, 0, 3415, 6770, 39823, 3566, 2491, 964, 42115, 825, 5178, 40755, 483, 1290, 3294, 1720, 6309, 42983, 10, 37739, 3454, 7028, 5077, 854, 227, 1259, 767, 218, 0, 0, 0, 163, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
// Google Drive: | ||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459, 0, 0, 7, 0, 0, 0, 7, 1, 1, 0, 2, 1, 506, 706, 0, 3903, 3552, 3694, 3338, 3262, 3257, 3222, 3249, 3325, 3261, 5, 0, 0, 1, 0, 0, 0, 48, 31, 61, 53, 46, 17, 17, 34, 32, 9, 22, 17, 31, 27, 19, 52, 5, 46, 84, 38, 14, 5, 19, 2, 2, 0, 8, 0, 8, 0, 180, 0, 5847, 3282, 3729, 3695, 3842, 3356, 316, 139, 487, 117, 95, 476, 289, 428, 609, 467, 5, 446, 592, 955, 130, 112, 57, 390, 168, 14, 0, 2, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, | ||
} | ||
|
||
// Override with equally distributed characters | ||
if false { | ||
histogram = [256]uint64{} | ||
var chars string | ||
// base c64 | ||
chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" | ||
// hex | ||
//chars = "0123456789abcdef" | ||
for _, v := range []byte(chars) { | ||
histogram[v] = 1 | ||
} | ||
} | ||
|
||
// Sum up distributions | ||
var total uint64 | ||
for _, v := range histogram[:] { | ||
total += v | ||
} | ||
|
||
// Scale the distribution to approx this size. | ||
const scale = 100 << 10 | ||
var tmp []byte | ||
for i, v := range histogram[:] { | ||
if v == 0 && omitUnused { | ||
continue | ||
} | ||
nf := float64(v) / float64(total) * scale | ||
if nf < 1 { | ||
nf = 1 | ||
} | ||
t2 := make([]byte, int(math.Ceil(nf))) | ||
for j := range t2 { | ||
t2[j] = byte(i) | ||
} | ||
tmp = append(tmp, t2...) | ||
} | ||
|
||
var s huff0.Scratch | ||
s.Reuse = huff0.ReusePolicyNone | ||
_, _, err := huff0.Compress1X(tmp, &s) | ||
if err != nil { | ||
panic(err) | ||
} | ||
fmt.Println("table:", base64.URLEncoding.EncodeToString(s.OutTable)) | ||
|
||
// Encode without ones: | ||
s.Reuse = huff0.ReusePolicyPrefer | ||
tmp = tmp[:0] | ||
for i, v := range histogram[:] { | ||
nf := float64(v) / float64(total) * scale | ||
t2 := make([]byte, int(math.Ceil(nf))) | ||
for j := range t2 { | ||
t2[j] = byte(i) | ||
} | ||
tmp = append(tmp, t2...) | ||
} | ||
_, _, err = huff0.Compress1X(tmp, &s) | ||
fmt.Println("sample", len(tmp), "byte, compressed size:", len(s.OutData)) | ||
fmt.Println("Shannon limit:", compress.ShannonEntropyBits(tmp)/8, "bytes") | ||
if err != nil { | ||
panic(err) | ||
} | ||
|
||
fmt.Printf("avg size: 1 -> %.02f", float64(len(s.OutData))/float64(len(tmp))) | ||
} |
Oops, something went wrong.