diff --git a/.gitignore b/.gitignore index 48f061587f1..4896ebb4d63 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ /cmd/tempo-cli/tempo-cli /example/docker-compose/example-data/tempo .DS_Store +/tempodb/encoding/benchmark_block \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 2af41ef8479..807722c9ac7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * [CHANGE] Fixed ingester latency spikes on read [#461](https://github.com/grafana/tempo/pull/461) * [CHANGE] Ingester cut blocks based on size instead of trace count. Replace ingester `traces_per_block` setting with `max_block_bytes`. This is a **breaking change**. [#474](https://github.com/grafana/tempo/issues/474) * [CHANGE] Refactor cache section in tempodb. This is a **breaking change** b/c the cache config section has changed. [#485](https://github.com/grafana/tempo/pull/485) +* [FEATURE] Added block compression. This is a **breaking change** b/c some configuration fields moved. [#504](https://github.com/grafana/tempo/pull/504) * [ENHANCEMENT] Serve config at the "/config" endpoint. [#446](https://github.com/grafana/tempo/pull/446) * [ENHANCEMENT] Switch blocklist polling and retention to different concurrency mechanism, add configuration options. [#475](https://github.com/grafana/tempo/issues/475) * [ENHANCEMENT] Add S3 options region and forcepathstyle [#431](https://github.com/grafana/tempo/issues/431) diff --git a/cmd/tempo-cli/cmd-list-block.go b/cmd/tempo-cli/cmd-list-block.go index 3f356cd3b3c..5761692bbcc 100644 --- a/cmd/tempo-cli/cmd-list-block.go +++ b/cmd/tempo-cli/cmd-list-block.go @@ -64,12 +64,12 @@ func dumpBlock(r tempodb_backend.Reader, c tempodb_backend.Compactor, tenantID s Version: unifiedMeta.version, TenantID: tenantID, BlockID: id, - }) + }, r) if err != nil { return err } - iter, err := block.Iterator(10*1024*1024, r) + iter, err := block.Iterator(10 * 1024 * 1024) if err != nil { return err } diff --git a/example/docker-compose/etc/tempo-azure.yaml b/example/docker-compose/etc/tempo-azure.yaml index 56eecb02468..c33a0785f07 100644 --- a/example/docker-compose/etc/tempo-azure.yaml +++ b/example/docker-compose/etc/tempo-azure.yaml @@ -33,10 +33,12 @@ compactor: storage: trace: backend: azure # backend configuration to use - wal: - path: /tmp/tempo/wal # where to store the the wal locally + block: bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives index_downsample: 10 # number of traces per index record + encoding: lz4-64k # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + wal: + path: /tmp/tempo/wal # where to store the the wal locally azure: container-name: tempo # how to store data in azure endpoint-suffix: azurite:10000 diff --git a/example/docker-compose/etc/tempo-gcs-fake.yaml b/example/docker-compose/etc/tempo-gcs-fake.yaml index 609ca486477..4ec6f139468 100644 --- a/example/docker-compose/etc/tempo-gcs-fake.yaml +++ b/example/docker-compose/etc/tempo-gcs-fake.yaml @@ -34,10 +34,12 @@ compactor: storage: trace: backend: gcs # backend configuration to use - wal: - path: /tmp/tempo/wal # where to store the the wal locally + block: bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives index_downsample: 10 # number of traces per index record + encoding: lz4-64k # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + wal: + path: /tmp/tempo/wal # where to store the the wal locally gcs: bucket_name: tempo endpoint: https://gcs:4443/storage/v1/ diff --git a/example/docker-compose/etc/tempo-local.yaml b/example/docker-compose/etc/tempo-local.yaml index 1623f9a16f6..6d95cebaa54 100644 --- a/example/docker-compose/etc/tempo-local.yaml +++ b/example/docker-compose/etc/tempo-local.yaml @@ -33,10 +33,12 @@ compactor: storage: trace: backend: local # backend configuration to use - wal: - path: /tmp/tempo/wal # where to store the the wal locally + block: bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives index_downsample: 10 # number of traces per index record + encoding: lz4-64k # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + wal: + path: /tmp/tempo/wal # where to store the the wal locally local: path: /tmp/tempo/blocks pool: diff --git a/example/docker-compose/etc/tempo-s3-minio.yaml b/example/docker-compose/etc/tempo-s3-minio.yaml index 4823f471c92..7a33b5ea668 100644 --- a/example/docker-compose/etc/tempo-s3-minio.yaml +++ b/example/docker-compose/etc/tempo-s3-minio.yaml @@ -34,10 +34,12 @@ compactor: storage: trace: backend: s3 # backend configuration to use - wal: - path: /tmp/tempo/wal # where to store the the wal locally + block: bloom_filter_false_positive: .05 # bloom filter false positive rate. lower values create larger filters but fewer false positives index_downsample: 10 # number of traces per index record + encoding: lz4-64k # block encoding/compression. options: none, gzip, lz4-64k, lz4-256k, lz4-1M, lz4, snappy, zstd + wal: + path: /tmp/tempo/wal # where to store the the wal locally s3: bucket: tempo # how to store data in s3 endpoint: minio:9000 diff --git a/go.mod b/go.mod index d82f9537005..e325f0c0da3 100644 --- a/go.mod +++ b/go.mod @@ -13,6 +13,7 @@ require ( github.com/gogo/status v1.0.3 github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b github.com/golang/protobuf v1.4.3 + github.com/golang/snappy v0.0.2 github.com/google/uuid v1.1.1 github.com/gorilla/mux v1.7.4 github.com/grafana/loki v1.3.0 @@ -20,10 +21,12 @@ require ( github.com/hashicorp/go-hclog v0.14.0 github.com/jaegertracing/jaeger v1.18.2-0.20200707061226-97d2319ff2be github.com/jsternberg/zap-logfmt v1.0.0 + github.com/klauspost/compress v1.10.5 github.com/minio/minio-go/v7 v7.0.5 github.com/olekukonko/tablewriter v0.0.2 github.com/open-telemetry/opentelemetry-proto v0.4.0 github.com/opentracing/opentracing-go v1.2.0 + github.com/pierrec/lz4/v4 v4.1.3 github.com/pkg/errors v0.9.1 github.com/prometheus/client_golang v1.8.0 github.com/prometheus/client_model v0.2.0 diff --git a/go.sum b/go.sum index 6ca1b94321d..79568c41d29 100644 --- a/go.sum +++ b/go.sum @@ -1233,6 +1233,7 @@ github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0 github.com/klauspost/compress v1.9.4/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A= github.com/klauspost/compress v1.10.4/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/klauspost/compress v1.10.5 h1:7q6vHIqubShURwQz8cQK6yIe/xC3IF0Vm7TGfqjewrc= github.com/klauspost/compress v1.10.5/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= github.com/klauspost/cpuid v1.2.3/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek= @@ -1576,7 +1577,10 @@ github.com/pierrec/lz4 v0.0.0-20190327172049-315a67e90e41/go.mod h1:3/3N9NVKO0je github.com/pierrec/lz4 v1.0.2-0.20190131084431-473cd7ce01a1/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= github.com/pierrec/lz4 v2.3.1-0.20191115212037-9085dacd1e1e+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4 v2.4.1+incompatible h1:mFe7ttWaflA46Mhqh+jUfjp2qTbPYxLB2/OyBppH9dg= github.com/pierrec/lz4 v2.4.1+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= +github.com/pierrec/lz4/v4 v4.1.3 h1:/dvQpkb0o1pVlSgKNQqfkavlnXaIK+hJ0LXsKRUN9D4= +github.com/pierrec/lz4/v4 v4.1.3/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1-0.20171018195549-f15c970de5b7/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= diff --git a/modules/ingester/ingester.go b/modules/ingester/ingester.go index 16bd76d3758..d4e12f00d53 100644 --- a/modules/ingester/ingester.go +++ b/modules/ingester/ingester.go @@ -227,7 +227,7 @@ func (i *Ingester) getOrCreateInstance(instanceID string) (*instance, error) { inst, ok = i.instances[instanceID] if !ok { var err error - inst, err = newInstance(instanceID, i.limiter, i.store.WAL()) + inst, err = newInstance(instanceID, i.limiter, i.store) if err != nil { return nil, err } diff --git a/modules/ingester/ingester_test.go b/modules/ingester/ingester_test.go index cf6d847a744..f9481a4accc 100644 --- a/modules/ingester/ingester_test.go +++ b/modules/ingester/ingester_test.go @@ -14,6 +14,7 @@ import ( "github.com/go-kit/kit/log" "github.com/golang/protobuf/proto" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" "github.com/weaveworks/common/user" "github.com/grafana/tempo/modules/overrides" @@ -21,7 +22,9 @@ import ( "github.com/grafana/tempo/pkg/tempopb" "github.com/grafana/tempo/pkg/util/test" "github.com/grafana/tempo/tempodb" + "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/wal" ) @@ -179,7 +182,7 @@ func TestFlush(t *testing.T) { func defaultIngester(t *testing.T, tmpDir string) (*Ingester, []*tempopb.Trace, [][]byte) { ingesterConfig := defaultIngesterTestConfig() limits, err := overrides.NewOverrides(defaultLimitsTestConfig()) - assert.NoError(t, err, "unexpected error creating overrides") + require.NoError(t, err, "unexpected error creating overrides") s, err := storage.NewStore(storage.Config{ Trace: tempodb.Config{ @@ -187,20 +190,23 @@ func defaultIngester(t *testing.T, tmpDir string) (*Ingester, []*tempopb.Trace, Local: &local.Config{ Path: tmpDir, }, - WAL: &wal.Config{ - Filepath: tmpDir, + Block: &encoding.BlockConfig{ IndexDownsample: 2, BloomFP: .01, + Encoding: backend.EncLZ4_1M, + }, + WAL: &wal.Config{ + Filepath: tmpDir, }, }, }, log.NewNopLogger()) - assert.NoError(t, err, "unexpected error store") + require.NoError(t, err, "unexpected error store") ingester, err := New(ingesterConfig, s, limits) - assert.NoError(t, err, "unexpected error creating ingester") + require.NoError(t, err, "unexpected error creating ingester") err = ingester.starting(context.Background()) - assert.NoError(t, err, "unexpected error starting ingester") + require.NoError(t, err, "unexpected error starting ingester") // make some fake traceIDs/requests traces := make([]*tempopb.Trace, 0) @@ -209,7 +215,7 @@ func defaultIngester(t *testing.T, tmpDir string) (*Ingester, []*tempopb.Trace, for i := 0; i < 10; i++ { id := make([]byte, 16) _, err = rand.Read(id) - assert.NoError(t, err) + require.NoError(t, err) traces = append(traces, test.MakeTrace(10, id)) traceIDs = append(traceIDs, id) @@ -222,7 +228,7 @@ func defaultIngester(t *testing.T, tmpDir string) (*Ingester, []*tempopb.Trace, &tempopb.PushRequest{ Batch: batch, }) - assert.NoError(t, err, "unexpected error pushing") + require.NoError(t, err, "unexpected error pushing") } } diff --git a/modules/ingester/instance.go b/modules/ingester/instance.go index 44c90a7b308..85339ae0942 100644 --- a/modules/ingester/instance.go +++ b/modules/ingester/instance.go @@ -20,6 +20,7 @@ import ( "github.com/grafana/tempo/pkg/tempopb" "github.com/grafana/tempo/pkg/util" + "github.com/grafana/tempo/tempodb" "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/encoding/common" "github.com/grafana/tempo/tempodb/wal" @@ -63,12 +64,12 @@ type instance struct { tracesCreatedTotal prometheus.Counter bytesWrittenTotal prometheus.Counter limiter *Limiter - wal *wal.WAL + writer tempodb.Writer hash hash.Hash32 } -func newInstance(instanceID string, limiter *Limiter, wal *wal.WAL) (*instance, error) { +func newInstance(instanceID string, limiter *Limiter, writer tempodb.Writer) (*instance, error) { i := &instance{ traces: map[uint32]*trace{}, @@ -76,7 +77,7 @@ func newInstance(instanceID string, limiter *Limiter, wal *wal.WAL) (*instance, tracesCreatedTotal: metricTracesCreatedTotal.WithLabelValues(instanceID), bytesWrittenTotal: metricBytesWrittenTotal.WithLabelValues(instanceID), limiter: limiter, - wal: wal, + writer: writer, hash: fnv.New32(), } @@ -153,7 +154,7 @@ func (i *instance) CutBlockIfReady(maxBlockLifetime time.Duration, maxBlockBytes // todo : this should be a queue of blocks to complete with workers go func() { - completeBlock, err := i.completingBlock.Complete(i.wal, i) + completeBlock, err := i.writer.CompleteBlock(i.completingBlock, i) i.blocksMtx.Lock() defer i.blocksMtx.Unlock() @@ -307,7 +308,7 @@ func (i *instance) tokenForTraceID(id []byte) uint32 { // resetHeadBlock() should be called under lock func (i *instance) resetHeadBlock() error { var err error - i.headBlock, err = i.wal.NewBlock(uuid.New(), i.instanceID) + i.headBlock, err = i.writer.WAL().NewBlock(uuid.New(), i.instanceID) i.lastBlockCut = time.Now() return err } diff --git a/modules/ingester/instance_test.go b/modules/ingester/instance_test.go index cdf3412d11c..ca511fd3770 100644 --- a/modules/ingester/instance_test.go +++ b/modules/ingester/instance_test.go @@ -9,9 +9,15 @@ import ( "testing" "time" + "github.com/go-kit/kit/log" "github.com/grafana/tempo/modules/overrides" + "github.com/grafana/tempo/modules/storage" "github.com/grafana/tempo/pkg/tempopb" "github.com/grafana/tempo/pkg/util/test" + "github.com/grafana/tempo/tempodb" + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/wal" "github.com/stretchr/testify/assert" @@ -36,11 +42,9 @@ func TestInstance(t *testing.T) { defer os.RemoveAll(tempDir) ingester, _, _ := defaultIngester(t, tempDir) - wal := ingester.store.WAL() - request := test.MakeRequest(10, []byte{}) - i, err := newInstance("fake", limiter, wal) + i, err := newInstance("fake", limiter, ingester.store) assert.NoError(t, err, "unexpected error creating new instance") err = i.Push(context.Background(), request) assert.NoError(t, err) @@ -89,12 +93,10 @@ func TestInstanceFind(t *testing.T) { defer os.RemoveAll(tempDir) ingester, _, _ := defaultIngester(t, tempDir) - wal := ingester.store.WAL() - request := test.MakeRequest(10, []byte{}) traceID := test.MustTraceID(request) - i, err := newInstance("fake", limiter, wal) + i, err := newInstance("fake", limiter, ingester.store) assert.NoError(t, err, "unexpected error creating new instance") err = i.Push(context.Background(), request) assert.NoError(t, err) @@ -128,9 +130,8 @@ func TestInstanceDoesNotRace(t *testing.T) { defer os.RemoveAll(tempDir) ingester, _, _ := defaultIngester(t, tempDir) - wal := ingester.store.WAL() - i, err := newInstance("fake", limiter, wal) + i, err := newInstance("fake", limiter, ingester.store) assert.NoError(t, err, "unexpected error creating new instance") end := make(chan struct{}) @@ -197,9 +198,8 @@ func TestInstanceLimits(t *testing.T) { defer os.RemoveAll(tempDir) ingester, _, _ := defaultIngester(t, tempDir) - wal := ingester.store.WAL() - i, err := newInstance("fake", limiter, wal) + i, err := newInstance("fake", limiter, ingester.store) assert.NoError(t, err, "unexpected error creating new instance") type push struct { @@ -427,19 +427,30 @@ func TestInstanceCutBlockIfReady(t *testing.T) { } } -func defaultInstance(t assert.TestingT, tempDir string) *instance { +func defaultInstance(t assert.TestingT, tmpDir string) *instance { limits, err := overrides.NewOverrides(overrides.Limits{}) assert.NoError(t, err, "unexpected error creating limits") limiter := NewLimiter(limits, &ringCountMock{count: 1}, 1) - wal, err := wal.New(&wal.Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: .01, - }) - assert.NoError(t, err, "unexpected error creating wal") + s, err := storage.NewStore(storage.Config{ + Trace: tempodb.Config{ + Backend: "local", + Local: &local.Config{ + Path: tmpDir, + }, + Block: &encoding.BlockConfig{ + IndexDownsample: 2, + BloomFP: .01, + Encoding: backend.EncLZ4_1M, + }, + WAL: &wal.Config{ + Filepath: tmpDir, + }, + }, + }, log.NewNopLogger()) + assert.NoError(t, err, "unexpected error creating store") - instance, err := newInstance("fake", limiter, wal) + instance, err := newInstance("fake", limiter, s) assert.NoError(t, err, "unexpected error creating new instance") return instance diff --git a/modules/querier/querier.go b/modules/querier/querier.go index 8d1ef103c0b..90c89790ead 100644 --- a/modules/querier/querier.go +++ b/modules/querier/querier.go @@ -29,18 +29,6 @@ import ( ) var ( - metricQueryReads = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "query_reads", - Help: "count of reads", - Buckets: prometheus.ExponentialBuckets(1, 2, 10), - }, []string{"layer"}) - metricQueryBytesRead = promauto.NewHistogramVec(prometheus.HistogramOpts{ - Namespace: "tempo", - Name: "query_bytes_read", - Help: "bytes read", - Buckets: prometheus.ExponentialBuckets(1024*1024, 2, 8), - }, []string{"layer"}) metricIngesterClients = promauto.NewGauge(prometheus.GaugeOpts{ Namespace: "tempo", Name: "querier_ingester_clients", @@ -195,19 +183,12 @@ func (q *Querier) FindTraceByID(ctx context.Context, req *tempopb.TraceByIDReque } span.LogFields(ot_log.String("msg", "searching store")) - partialTraces, metrics, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID, req.BlockStart, req.BlockEnd) + partialTraces, err := q.store.Find(opentracing.ContextWithSpan(ctx, span), userID, req.TraceID, req.BlockStart, req.BlockEnd) if err != nil { return nil, errors.Wrap(err, "error querying store in Querier.FindTraceByID") } span.LogFields(ot_log.String("msg", "done searching store")) - metricQueryReads.WithLabelValues("bloom").Observe(float64(metrics.BloomFilterReads.Load())) - metricQueryBytesRead.WithLabelValues("bloom").Observe(float64(metrics.BloomFilterBytesRead.Load())) - metricQueryReads.WithLabelValues("index").Observe(float64(metrics.IndexReads.Load())) - metricQueryBytesRead.WithLabelValues("index").Observe(float64(metrics.IndexBytesRead.Load())) - metricQueryReads.WithLabelValues("block").Observe(float64(metrics.BlockReads.Load())) - metricQueryBytesRead.WithLabelValues("block").Observe(float64(metrics.BlockBytesRead.Load())) - // combine partialTraces with completeTrace for _, partialTrace := range partialTraces { storeTrace := &tempopb.Trace{} diff --git a/modules/querier/querier_test.go b/modules/querier/querier_test.go index 6b67582cab9..a5d23c4c6d3 100644 --- a/modules/querier/querier_test.go +++ b/modules/querier/querier_test.go @@ -20,7 +20,9 @@ import ( "github.com/grafana/tempo/pkg/util" "github.com/grafana/tempo/pkg/util/test" "github.com/grafana/tempo/tempodb" + "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/pool" "github.com/grafana/tempo/tempodb/wal" ) @@ -51,11 +53,14 @@ func TestReturnAllHits(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ + Encoding: backend.EncNone, IndexDownsample: 10, BloomFP: .05, }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), + }, BlocklistPoll: 50 * time.Millisecond, }, log.NewNopLogger()) assert.NoError(t, err, "unexpected error creating tempodb") @@ -85,7 +90,7 @@ func TestReturnAllHits(t *testing.T) { err = head.Write(testTraceID, bReq) assert.NoError(t, err, "unexpected error writing req") - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -96,7 +101,7 @@ func TestReturnAllHits(t *testing.T) { time.Sleep(100 * time.Millisecond) // find should return both now - foundBytes, _, err := r.Find(context.Background(), util.FakeTenantID, testTraceID, tempodb.BlockIDMin, tempodb.BlockIDMax) + foundBytes, err := r.Find(context.Background(), util.FakeTenantID, testTraceID, tempodb.BlockIDMin, tempodb.BlockIDMax) assert.NoError(t, err) require.Len(t, foundBytes, 2) diff --git a/modules/storage/config.go b/modules/storage/config.go index 9c14de678bc..a2e19decbee 100644 --- a/modules/storage/config.go +++ b/modules/storage/config.go @@ -6,10 +6,12 @@ import ( "github.com/grafana/tempo/pkg/util" "github.com/grafana/tempo/tempodb" + "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/azure" "github.com/grafana/tempo/tempodb/backend/gcs" "github.com/grafana/tempo/tempodb/backend/local" "github.com/grafana/tempo/tempodb/backend/s3" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/pool" "github.com/grafana/tempo/tempodb/wal" ) @@ -31,8 +33,11 @@ func (cfg *Config) RegisterFlagsAndApplyDefaults(prefix string, f *flag.FlagSet) cfg.Trace.WAL = &wal.Config{} f.StringVar(&cfg.Trace.WAL.Filepath, util.PrefixConfig(prefix, "trace.wal.path"), "/var/tempo/wal", "Path at which store WAL blocks.") - f.Float64Var(&cfg.Trace.WAL.BloomFP, util.PrefixConfig(prefix, "trace.wal.bloom-filter-false-positive"), .05, "Bloom False Positive.") - f.IntVar(&cfg.Trace.WAL.IndexDownsample, util.PrefixConfig(prefix, "trace.wal.index-downsample"), 100, "Number of traces per index record.") + + cfg.Trace.Block = &encoding.BlockConfig{} + f.Float64Var(&cfg.Trace.Block.BloomFP, util.PrefixConfig(prefix, "trace.block.bloom-filter-false-positive"), .05, "Bloom False Positive.") + f.IntVar(&cfg.Trace.Block.IndexDownsample, util.PrefixConfig(prefix, "trace.block.index-downsample"), 100, "Number of traces per index record.") + cfg.Trace.Block.Encoding = backend.EncLZ4_256k cfg.Trace.Azure = &azure.Config{} f.StringVar(&cfg.Trace.Azure.StorageAccountName.Value, util.PrefixConfig(prefix, "trace.azure.storage-account-name"), "", "Azure storage account name.") diff --git a/operations/tempo-mixin/out/tempo-operational.json b/operations/tempo-mixin/out/tempo-operational.json index 99abc3d1835..a7757317ce8 100644 --- a/operations/tempo-mixin/out/tempo-operational.json +++ b/operations/tempo-mixin/out/tempo-operational.json @@ -27,7 +27,8 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "iteration": 1609831418010, + "id": 115, + "iteration": 1612469301683, "links": [ ], @@ -92,7 +93,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -201,7 +202,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -310,7 +311,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -323,7 +324,7 @@ "targets": [ { "expr": "go_goroutines{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"$namespace/$component\"}", - "legendFormat": "{{job}}", + "legendFormat": "{{pod}}", "refId": "A" } ], @@ -418,7 +419,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -528,7 +529,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -540,7 +541,7 @@ "steppedLine": false, "targets": [ { - "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\"}", + "expr": "container_memory_working_set_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"$component.*\", container!=\"POD\"}", "interval": "", "legendFormat": "{{pod}}", "refId": "A" @@ -637,7 +638,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -754,7 +755,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -862,7 +863,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -987,7 +988,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1096,7 +1097,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1214,7 +1215,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -1334,7 +1335,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1444,7 +1445,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -1565,7 +1566,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1680,7 +1681,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1806,7 +1807,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1916,7 +1917,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2037,7 +2038,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2158,7 +2159,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2238,127 +2239,6 @@ { "aliasColors": { - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 20, - "y": 13 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "Query Bytes Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - }, "bars": false, "dashLength": 10, @@ -2400,7 +2280,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2477,226 +2357,6 @@ "alignLevel": null } }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 16, - "y": 18 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.5, sum(rate(tempo_query_reads_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (layer, le))", - "hide": false, - "interval": "", - "legendFormat": "{{layer}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "p50 Reads", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": { - - }, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": { - - } - }, - "overrides": [ - - ] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 19, - "y": 18 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [ - - ], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(tempo_query_reads_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (layer, le))", - "hide": false, - "interval": "", - "legendFormat": "{{layer}}", - "refId": "A" - } - ], - "thresholds": [ - - ], - "timeFrom": null, - "timeRegions": [ - - ], - "timeShift": null, - "title": "p99 Reads", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [ - - ] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "collapsed": false, "datasource": null, @@ -2757,7 +2417,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2866,7 +2526,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2975,7 +2635,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3084,7 +2744,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3193,7 +2853,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3302,7 +2962,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -3423,7 +3083,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3532,7 +3192,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3647,7 +3307,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3768,7 +3428,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3908,7 +3568,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4020,7 +3680,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4858,7 +4518,7 @@ }, { "color": "red", - "value": 0.05 + "value": 0.050000000000000003 } ] }, @@ -4961,7 +4621,7 @@ } }, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "targets": [ { "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\", error=\"notfound\"}[1h])) by (secondsago) / sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[1h])) by (secondsago)", @@ -4997,7 +4657,7 @@ }, { "color": "red", - "value": 0.05 + "value": 0.050000000000000003 } ] }, @@ -5100,7 +4760,7 @@ } }, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "targets": [ { "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\", error=\"missingspans\"}[1h])) by (secondsago) / sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[1h])) by (secondsago)", @@ -5174,7 +4834,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -5286,7 +4946,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -5395,7 +5055,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -5504,7 +5164,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -5767,4 +5427,4 @@ "title": "Tempo Operational", "uid": "AOFXrRUZz", "version": 1 -} \ No newline at end of file +} diff --git a/operations/tempo-mixin/out/tempo-reads.json b/operations/tempo-mixin/out/tempo-reads.json index 593558103d2..b815b54d6bb 100644 --- a/operations/tempo-mixin/out/tempo-reads.json +++ b/operations/tempo-mixin/out/tempo-reads.json @@ -1895,4 +1895,4 @@ "title": "Tempo / Reads", "uid": "", "version": 0 -} \ No newline at end of file +} diff --git a/operations/tempo-mixin/out/tempo-resources.json b/operations/tempo-mixin/out/tempo-resources.json index c8ca9b8d469..406acc9b3b9 100644 --- a/operations/tempo-mixin/out/tempo-resources.json +++ b/operations/tempo-mixin/out/tempo-resources.json @@ -1933,4 +1933,4 @@ "title": "Tempo / Resources", "uid": "", "version": 0 -} \ No newline at end of file +} diff --git a/operations/tempo-mixin/out/tempo-writes.json b/operations/tempo-mixin/out/tempo-writes.json index 4a243dab73c..f155150b3a6 100644 --- a/operations/tempo-mixin/out/tempo-writes.json +++ b/operations/tempo-mixin/out/tempo-writes.json @@ -1507,4 +1507,4 @@ "title": "Tempo / Writes", "uid": "", "version": 0 -} \ No newline at end of file +} diff --git a/operations/tempo-mixin/tempo-operational.json b/operations/tempo-mixin/tempo-operational.json index b6abacc7fac..17db9360100 100644 --- a/operations/tempo-mixin/tempo-operational.json +++ b/operations/tempo-mixin/tempo-operational.json @@ -25,7 +25,8 @@ "editable": true, "gnetId": null, "graphTooltip": 1, - "iteration": 1609831418010, + "id": 115, + "iteration": 1612469301683, "links": [], "panels": [ { @@ -80,7 +81,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -175,7 +176,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -270,7 +271,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -364,7 +365,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -460,7 +461,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -555,7 +556,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -658,7 +659,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -752,7 +753,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -861,7 +862,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -956,7 +957,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1060,7 +1061,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -1166,7 +1167,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1262,7 +1263,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -1369,7 +1370,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1470,7 +1471,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1580,7 +1581,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1676,7 +1677,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1783,7 +1784,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1890,7 +1891,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -1959,113 +1960,6 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 20, - "y": 13 - }, - "hiddenSeries": false, - "id": 13, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".99", - "refId": "A" - }, - { - "expr": "histogram_quantile(.9, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".9", - "refId": "B" - }, - { - "expr": "histogram_quantile(.5, sum(rate(tempo_query_bytes_read_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (le))", - "interval": "", - "legendFormat": ".5", - "refId": "C" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "Query Bytes Read", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "decbytes", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "aliasColors": {}, "bars": false, @@ -2104,7 +1998,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2173,198 +2067,6 @@ "alignLevel": null } }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 16, - "y": 18 - }, - "hiddenSeries": false, - "id": 15, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.5, sum(rate(tempo_query_reads_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (layer, le))", - "hide": false, - "interval": "", - "legendFormat": "{{layer}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "p50 Reads", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, - { - "aliasColors": {}, - "bars": false, - "dashLength": 10, - "dashes": false, - "datasource": "$ds", - "fieldConfig": { - "defaults": { - "custom": {} - }, - "overrides": [] - }, - "fill": 1, - "fillGradient": 0, - "gridPos": { - "h": 5, - "w": 3, - "x": 19, - "y": 18 - }, - "hiddenSeries": false, - "id": 14, - "legend": { - "avg": false, - "current": false, - "max": false, - "min": false, - "show": false, - "total": false, - "values": false - }, - "lines": true, - "linewidth": 1, - "nullPointMode": "null", - "options": { - "alertThreshold": true - }, - "percentage": false, - "pluginVersion": "7.4.0-9155pre", - "pointradius": 2, - "points": false, - "renderer": "flot", - "seriesOverrides": [], - "spaceLength": 10, - "stack": false, - "steppedLine": false, - "targets": [ - { - "expr": "histogram_quantile(.99, sum(rate(tempo_query_reads_bucket{cluster=\"$cluster\", namespace=\"$namespace\"}[$__rate_interval])) by (layer, le))", - "hide": false, - "interval": "", - "legendFormat": "{{layer}}", - "refId": "A" - } - ], - "thresholds": [], - "timeFrom": null, - "timeRegions": [], - "timeShift": null, - "title": "p99 Reads", - "tooltip": { - "shared": true, - "sort": 0, - "value_type": "individual" - }, - "type": "graph", - "xaxis": { - "buckets": null, - "mode": "time", - "name": null, - "show": true, - "values": [] - }, - "yaxes": [ - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - }, - { - "format": "short", - "label": null, - "logBase": 1, - "max": null, - "min": null, - "show": true - } - ], - "yaxis": { - "align": false, - "alignLevel": null - } - }, { "collapsed": false, "datasource": null, @@ -2417,7 +2119,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2512,7 +2214,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2607,7 +2309,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2702,7 +2404,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2797,7 +2499,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -2892,7 +2594,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": true, "renderer": "flot", @@ -2999,7 +2701,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3094,7 +2796,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3195,7 +2897,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3302,7 +3004,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3424,7 +3126,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -3520,7 +3222,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4349,7 +4051,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "targets": [ { "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\", error=\"notfound\"}[1h])) by (secondsago) / sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[1h])) by (secondsago)", @@ -4482,7 +4184,7 @@ "showUnfilled": true, "text": {} }, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "targets": [ { "expr": "sum(rate(tempo_vulture_trace_error_total{cluster=\"$cluster\", namespace=\"$namespace\", error=\"missingspans\"}[1h])) by (secondsago) / sum(rate(tempo_vulture_trace_total{cluster=\"$cluster\", namespace=\"$namespace\"}[1h])) by (secondsago)", @@ -4548,7 +4250,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4646,7 +4348,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4741,7 +4443,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", @@ -4836,7 +4538,7 @@ "alertThreshold": true }, "percentage": false, - "pluginVersion": "7.4.0-9155pre", + "pluginVersion": "7.5.0-11433pre", "pointradius": 2, "points": false, "renderer": "flot", diff --git a/tempodb/backend/block_meta.go b/tempodb/backend/block_meta.go index c60048fe4e1..b5bfd329d22 100644 --- a/tempodb/backend/block_meta.go +++ b/tempodb/backend/block_meta.go @@ -23,18 +23,20 @@ type BlockMeta struct { EndTime time.Time `json:"endTime"` TotalObjects int `json:"totalObjects"` CompactionLevel uint8 `json:"compactionLevel"` + Encoding Encoding `json:"encoding"` } -func NewBlockMeta(tenantID string, blockID uuid.UUID) *BlockMeta { +func NewBlockMeta(tenantID string, blockID uuid.UUID, version string, encoding Encoding) *BlockMeta { now := time.Now() b := &BlockMeta{ - Version: "v0", // jpe - think about this + Version: version, BlockID: blockID, MinID: []byte{}, MaxID: []byte{}, TenantID: tenantID, StartTime: now, EndTime: now, + Encoding: encoding, } return b diff --git a/tempodb/backend/block_meta_test.go b/tempodb/backend/block_meta_test.go index 78168a33c9f..54d41417f4f 100644 --- a/tempodb/backend/block_meta_test.go +++ b/tempodb/backend/block_meta_test.go @@ -14,11 +14,16 @@ const ( ) func TestBlockMeta(t *testing.T) { + testVersion := "blerg" + testEncoding := EncLZ4_256k + id := uuid.New() - b := NewBlockMeta(testTenantID, id) + b := NewBlockMeta(testTenantID, id, testVersion, testEncoding) assert.Equal(t, id, b.BlockID) assert.Equal(t, testTenantID, b.TenantID) + assert.Equal(t, testVersion, b.Version) + assert.Equal(t, testEncoding, b.Encoding) randID1 := make([]byte, 10) randID2 := make([]byte, 10) @@ -32,6 +37,5 @@ func TestBlockMeta(t *testing.T) { b.ObjectAdded(randID2) assert.True(t, b.EndTime.After(b.StartTime)) assert.Equal(t, 1, bytes.Compare(b.MaxID, b.MinID)) - assert.Equal(t, 2, b.TotalObjects) } diff --git a/tempodb/backend/encoding.go b/tempodb/backend/encoding.go new file mode 100644 index 00000000000..e482674ed90 --- /dev/null +++ b/tempodb/backend/encoding.go @@ -0,0 +1,124 @@ +package backend + +import ( + "bytes" + "encoding/json" + "fmt" + "strings" +) + +// Encoding is the identifier for a chunk encoding. +type Encoding byte + +// The different available encodings. +// Make sure to preserve the order, as these numeric values are written to the chunks! +const ( + EncNone Encoding = iota + EncGZIP + EncLZ4_64k + EncLZ4_256k + EncLZ4_1M + EncLZ4_4M + EncSnappy + EncZstd +) + +// SupportedEncoding is a slice of all supported encodings +var SupportedEncoding = []Encoding{ + EncNone, + EncGZIP, + EncLZ4_64k, + EncLZ4_256k, + EncLZ4_1M, + EncLZ4_4M, + EncSnappy, + EncZstd, +} + +func (e Encoding) String() string { + switch e { + case EncNone: + return "none" + case EncGZIP: + return "gzip" + case EncLZ4_64k: + return "lz4-64k" + case EncLZ4_256k: + return "lz4-256k" + case EncLZ4_1M: + return "lz4-1M" + case EncLZ4_4M: + return "lz4" + case EncSnappy: + return "snappy" + case EncZstd: + return "zstd" + default: + return "unsupported" + } +} + +// UnmarshalYAML implements the Unmarshaler interface of the yaml pkg. +func (e *Encoding) UnmarshalYAML(unmarshal func(interface{}) error) error { + var encString string + err := unmarshal(&encString) + if err != nil { + return err + } + + *e, err = ParseEncoding(encString) + if err != nil { + return err + } + + return nil +} + +// MarshalYAML implements the Marshaler interface of the yaml pkg +func (e Encoding) MarshalYAML() (interface{}, error) { + return e.String(), nil +} + +// UnmarshalJSON implements the Unmarshaler interface of the json pkg. +func (e *Encoding) UnmarshalJSON(b []byte) error { + var encString string + err := json.Unmarshal(b, &encString) + if err != nil { + return err + } + + *e, err = ParseEncoding(encString) + if err != nil { + return err + } + + return nil +} + +// MarshalJSON implements the marshaler interface of the json pkg. +func (e Encoding) MarshalJSON() ([]byte, error) { + buffer := bytes.NewBufferString("\"" + e.String() + "\"") + return buffer.Bytes(), nil +} + +// ParseEncoding parses an chunk encoding (compression algorithm) by its name. +func ParseEncoding(enc string) (Encoding, error) { + for _, e := range SupportedEncoding { + if strings.EqualFold(e.String(), enc) { + return e, nil + } + } + return 0, fmt.Errorf("invalid encoding: %s, supported: %s", enc, SupportedEncodingString()) +} + +// SupportedEncodingString returns the list of supported Encoding. +func SupportedEncodingString() string { + var sb strings.Builder + for i := range SupportedEncoding { + sb.WriteString(SupportedEncoding[i].String()) + if i != len(SupportedEncoding)-1 { + sb.WriteString(", ") + } + } + return sb.String() +} diff --git a/tempodb/backend/encoding_test.go b/tempodb/backend/encoding_test.go new file mode 100644 index 00000000000..5a01d940c5b --- /dev/null +++ b/tempodb/backend/encoding_test.go @@ -0,0 +1,45 @@ +package backend + +import ( + "encoding/json" + "testing" + + "github.com/stretchr/testify/assert" + "gopkg.in/yaml.v3" +) + +type marshalTest struct { + Test Encoding +} + +func TestUnmarshalMarshalYaml(t *testing.T) { + for _, enc := range SupportedEncoding { + expected := marshalTest{ + Test: enc, + } + actual := marshalTest{} + + buff, err := yaml.Marshal(expected) + assert.NoError(t, err) + err = yaml.Unmarshal(buff, &actual) + assert.NoError(t, err) + + assert.Equal(t, expected, actual) + } +} + +func TestUnmarshalMarshalJson(t *testing.T) { + for _, enc := range SupportedEncoding { + expected := marshalTest{ + Test: enc, + } + actual := marshalTest{} + + buff, err := json.Marshal(expected) + assert.NoError(t, err) + err = json.Unmarshal(buff, &actual) + assert.NoError(t, err) + + assert.Equal(t, expected, actual) + } +} diff --git a/tempodb/backend/gcs/gcs.go b/tempodb/backend/gcs/gcs.go index 90297a2738d..32871d53426 100644 --- a/tempodb/backend/gcs/gcs.go +++ b/tempodb/backend/gcs/gcs.go @@ -132,6 +132,10 @@ func (rw *readerWriter) Append(ctx context.Context, name string, blockID uuid.UU // CloseAppend implements backend.Writer func (rw *readerWriter) CloseAppend(_ context.Context, tracker backend.AppendTracker) error { + if tracker == nil { + return nil + } + w := tracker.(*storage.Writer) return w.Close() } diff --git a/tempodb/backend/local/local.go b/tempodb/backend/local/local.go index f0022f9fea3..1c71bc2d254 100644 --- a/tempodb/backend/local/local.go +++ b/tempodb/backend/local/local.go @@ -112,6 +112,10 @@ func (rw *readerWriter) Append(ctx context.Context, name string, blockID uuid.UU // CloseAppend implements backend.Writer func (rw *readerWriter) CloseAppend(ctx context.Context, tracker backend.AppendTracker) error { + if tracker == nil { + return nil + } + var dst *os.File = tracker.(*os.File) return dst.Close() } diff --git a/tempodb/backend/readerat.go b/tempodb/backend/readerat.go new file mode 100644 index 00000000000..527ff40b8b8 --- /dev/null +++ b/tempodb/backend/readerat.go @@ -0,0 +1,26 @@ +package backend + +import "context" + +// ReaderAt is a shim that allows a backend.Reader to be used as an io.ReaderAt +type ReaderAt struct { + meta *BlockMeta + name string + r Reader +} + +// NewReaderAt creates a ReaderAt for the given BlockMeta +func NewReaderAt(meta *BlockMeta, name string, r Reader) *ReaderAt { + return &ReaderAt{ + meta: meta, + name: name, + r: r, + } +} + +// ReadAt implements ReaderAt +func (b *ReaderAt) ReadAt(p []byte, off int64) (int, error) { + // todo: how to preserve context? len(p) is cheating + err := b.r.ReadRange(context.Background(), b.name, b.meta.BlockID, b.meta.TenantID, uint64(off), p) + return len(p), err +} diff --git a/tempodb/backend/s3/s3.go b/tempodb/backend/s3/s3.go index 4e3b7cb378b..875067ae86e 100644 --- a/tempodb/backend/s3/s3.go +++ b/tempodb/backend/s3/s3.go @@ -226,6 +226,10 @@ func (rw *readerWriter) Append(ctx context.Context, name string, blockID uuid.UU // CloseAppend implements backend.Writer func (rw *readerWriter) CloseAppend(ctx context.Context, tracker backend.AppendTracker) error { + if tracker == nil { + return nil + } + a := tracker.(appendTracker) completeParts := make([]minio.CompletePart, 0) for _, p := range a.parts { diff --git a/tempodb/compactor.go b/tempodb/compactor.go index 115b103313a..8b5d45df033 100644 --- a/tempodb/compactor.go +++ b/tempodb/compactor.go @@ -134,12 +134,12 @@ func (rw *readerWriter) compact(blockMetas []*backend.BlockMeta, tenantID string level.Info(rw.logger).Log("msg", "compacting block", "block", fmt.Sprintf("%+v", blockMeta)) totalRecords += blockMeta.TotalObjects - block, err := encoding.NewBackendBlock(blockMeta) + block, err := encoding.NewBackendBlock(blockMeta, rw.r) if err != nil { return err } - iter, err := block.Iterator(rw.compactorCfg.ChunkSizeBytes, rw.r) + iter, err := block.Iterator(rw.compactorCfg.ChunkSizeBytes) if err != nil { return err } @@ -188,7 +188,7 @@ func (rw *readerWriter) compact(blockMetas []*backend.BlockMeta, tenantID string // make a new block if necessary if currentBlock == nil { - currentBlock, err = encoding.NewCompactorBlock(uuid.New(), tenantID, rw.cfg.WAL.BloomFP, rw.cfg.WAL.IndexDownsample, blockMetas, recordsPerBlock) + currentBlock, err = encoding.NewCompactorBlock(rw.cfg.Block, uuid.New(), tenantID, blockMetas, recordsPerBlock) if err != nil { return errors.Wrap(err, "error making new compacted block") } @@ -242,12 +242,12 @@ func (rw *readerWriter) compact(blockMetas []*backend.BlockMeta, tenantID string func appendBlock(rw *readerWriter, tracker backend.AppendTracker, block *encoding.CompactorBlock) (backend.AppendTracker, error) { compactionLevelLabel := strconv.Itoa(int(block.BlockMeta().CompactionLevel - 1)) metricCompactionObjectsWritten.WithLabelValues(compactionLevelLabel).Add(float64(block.CurrentBufferedObjects())) - metricCompactionBytesWritten.WithLabelValues(compactionLevelLabel).Add(float64(block.CurrentBufferLength())) - tracker, err := block.FlushBuffer(context.TODO(), tracker, rw.w) + tracker, bytesFlushed, err := block.FlushBuffer(context.TODO(), tracker, rw.w) if err != nil { return nil, err } + metricCompactionBytesWritten.WithLabelValues(compactionLevelLabel).Add(float64(bytesFlushed)) return tracker, nil } @@ -255,15 +255,12 @@ func appendBlock(rw *readerWriter, tracker backend.AppendTracker, block *encodin func finishBlock(rw *readerWriter, tracker backend.AppendTracker, block *encoding.CompactorBlock) error { level.Info(rw.logger).Log("msg", "writing compacted block", "block", fmt.Sprintf("%+v", block.BlockMeta())) - tracker, err := appendBlock(rw, tracker, block) - if err != nil { - return err - } - - err = block.Complete(context.TODO(), tracker, rw.w) + bytesFlushed, err := block.Complete(context.TODO(), tracker, rw.w) if err != nil { return err } + compactionLevelLabel := strconv.Itoa(int(block.BlockMeta().CompactionLevel - 1)) + metricCompactionBytesWritten.WithLabelValues(compactionLevelLabel).Add(float64(bytesFlushed)) return nil } diff --git a/tempodb/compactor_bookmark_test.go b/tempodb/compactor_bookmark_test.go index 0b19e5b046f..9287b87f106 100644 --- a/tempodb/compactor_bookmark_test.go +++ b/tempodb/compactor_bookmark_test.go @@ -14,6 +14,7 @@ import ( "github.com/golang/protobuf/proto" "github.com/google/uuid" "github.com/grafana/tempo/pkg/util/test" + "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/wal" @@ -30,10 +31,13 @@ func TestCurrentClear(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncGZIP, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -64,16 +68,16 @@ func TestCurrentClear(t *testing.T) { assert.NoError(t, err, "unexpected error writing req") } - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) assert.NoError(t, err) rw := r.(*readerWriter) - block, err := encoding.NewBackendBlock(complete.BlockMeta()) + block, err := encoding.NewBackendBlock(complete.BlockMeta(), rw.r) assert.NoError(t, err) - iter, err := block.Iterator(10, rw.r) + iter, err := block.Iterator(10) assert.NoError(t, err) bm := newBookmark(iter) diff --git a/tempodb/compactor_test.go b/tempodb/compactor_test.go index e2f2cbf0e63..de3c90a22f9 100644 --- a/tempodb/compactor_test.go +++ b/tempodb/compactor_test.go @@ -18,6 +18,7 @@ import ( "github.com/grafana/tempo/pkg/util/test" "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/pool" "github.com/grafana/tempo/tempodb/wal" ) @@ -51,10 +52,13 @@ func TestCompaction(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 11, BloomFP: .01, + Encoding: backend.EncLZ4_4M, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -99,7 +103,7 @@ func TestCompaction(t *testing.T) { allReqs = append(allReqs, reqs...) allIds = append(allIds, ids...) - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -149,7 +153,7 @@ func TestCompaction(t *testing.T) { // now see if we can find our ids for i, id := range allIds { - b, _, err := rw.Find(context.Background(), testTenantID, id, BlockIDMin, BlockIDMax) + b, err := rw.Find(context.Background(), testTenantID, id, BlockIDMin, BlockIDMax) assert.NoError(t, err) out := &tempopb.PushRequest{} @@ -174,10 +178,13 @@ func TestSameIDCompaction(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), - IndexDownsample: rand.Int()%20 + 1, + Block: &encoding.BlockConfig{ + IndexDownsample: 11, BloomFP: .01, + Encoding: backend.EncSnappy, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -206,7 +213,7 @@ func TestSameIDCompaction(t *testing.T) { err = head.Write(id, rec) assert.NoError(t, err, "unexpected error writing req") - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -258,10 +265,13 @@ func TestCompactionUpdatesBlocklist(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), - IndexDownsample: rand.Int()%20 + 1, + Block: &encoding.BlockConfig{ + IndexDownsample: 11, BloomFP: .01, + Encoding: backend.EncNone, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -298,7 +308,7 @@ func TestCompactionUpdatesBlocklist(t *testing.T) { // Make sure all expected traces are found. for i := 0; i < blockCount; i++ { for j := 0; j < recordCount; j++ { - trace, _, err := rw.Find(context.TODO(), testTenantID, makeTraceID(i, j), BlockIDMin, BlockIDMax) + trace, err := rw.Find(context.TODO(), testTenantID, makeTraceID(i, j), BlockIDMin, BlockIDMax) assert.NotNil(t, trace) assert.Greater(t, len(trace), 0) assert.NoError(t, err) @@ -320,10 +330,13 @@ func TestCompactionMetrics(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), - IndexDownsample: rand.Int()%20 + 1, + Block: &encoding.BlockConfig{ + IndexDownsample: 11, BloomFP: .01, + Encoding: backend.EncNone, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -391,10 +404,13 @@ func TestCompactionIteratesThroughTenants(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), - IndexDownsample: rand.Int()%20 + 1, + Block: &encoding.BlockConfig{ + IndexDownsample: 11, BloomFP: .01, + Encoding: backend.EncLZ4_64k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -444,7 +460,7 @@ func cutTestBlocks(t *testing.T, w Writer, tenantID string, blockCount int, reco assert.NoError(t, err, "unexpected error writing rec") } - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) diff --git a/tempodb/config.go b/tempodb/config.go index b4dc3c438b0..7d731554f00 100644 --- a/tempodb/config.go +++ b/tempodb/config.go @@ -1,6 +1,8 @@ package tempodb import ( + "errors" + "fmt" "time" "github.com/grafana/tempo/tempodb/backend/azure" @@ -9,6 +11,7 @@ import ( "github.com/grafana/tempo/tempodb/backend/gcs" "github.com/grafana/tempo/tempodb/backend/local" "github.com/grafana/tempo/tempodb/backend/s3" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/pool" "github.com/grafana/tempo/tempodb/wal" ) @@ -16,23 +19,29 @@ import ( const DefaultBlocklistPollConcurrency = uint(50) const DefaultRetentionConcurrency = uint(10) +// Config holds the entirety of tempodb configuration type Config struct { + Pool *pool.Config `yaml:"pool,omitempty"` + WAL *wal.Config `yaml:"wal"` + Block *encoding.BlockConfig `yaml:"block"` + + BlocklistPoll time.Duration `yaml:"blocklist_poll"` + BlocklistPollConcurrency uint `yaml:"blocklist_poll_concurrency"` + + // backends Backend string `yaml:"backend"` Local *local.Config `yaml:"local"` GCS *gcs.Config `yaml:"gcs"` S3 *s3.Config `yaml:"s3"` Azure *azure.Config `yaml:"azure"` - Pool *pool.Config `yaml:"pool,omitempty"` - WAL *wal.Config `yaml:"wal"` + // caches Cache string `yaml:"cache"` Memcached *memcached.Config `yaml:"memcached"` Redis *redis.Config `yaml:"redis"` - - BlocklistPoll time.Duration `yaml:"blocklist_poll"` - BlocklistPollConcurrency uint `yaml:"blocklist_poll_concurrency"` } +// CompactorConfig contains compaction configuration options type CompactorConfig struct { ChunkSizeBytes uint32 `yaml:"chunk_size_bytes"` // todo: do we need this? FlushSizeBytes uint32 `yaml:"flush_size_bytes"` @@ -42,3 +51,20 @@ type CompactorConfig struct { CompactedBlockRetention time.Duration `yaml:"compacted_block_retention"` RetentionConcurrency uint `yaml:"retention_concurrency"` } + +func validateConfig(cfg *Config) error { + if cfg.WAL == nil { + return errors.New("wal config should be non-nil") + } + + if cfg.Block == nil { + return errors.New("block config should be non-nil") + } + + err := encoding.ValidateConfig(cfg.Block) + if err != nil { + return fmt.Errorf("block config validation failed: %w", err) + } + + return nil +} diff --git a/tempodb/encoding/backend_block.go b/tempodb/encoding/backend_block.go new file mode 100644 index 00000000000..1939465b644 --- /dev/null +++ b/tempodb/encoding/backend_block.go @@ -0,0 +1,123 @@ +package encoding + +import ( + "bytes" + "context" + "fmt" + + "github.com/opentracing/opentracing-go" + willf_bloom "github.com/willf/bloom" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding/common" +) + +// BackendBlock represents a block already in the backend. +type BackendBlock struct { + encoding versionedEncoding + + meta *backend.BlockMeta + reader backend.Reader +} + +// NewBackendBlock returns a BackendBlock for the given backend.BlockMeta +// It is version aware. +func NewBackendBlock(meta *backend.BlockMeta, r backend.Reader) (*BackendBlock, error) { + var encoding versionedEncoding + + switch meta.Version { + case "v0": + encoding = v0Encoding{} + case "v1": + encoding = v1Encoding{} + default: + return nil, fmt.Errorf("%s is not a valid block version", meta.Version) + } + + return &BackendBlock{ + encoding: encoding, + meta: meta, + reader: r, + }, nil +} + +// Find searches a block for the ID and returns an object if found. +func (b *BackendBlock) Find(ctx context.Context, id common.ID) ([]byte, error) { + var err error + span, ctx := opentracing.StartSpanFromContext(ctx, "BackendBlock.Find") + defer func() { + if err != nil { + span.SetTag("error", true) + } + span.Finish() + }() + + span.SetTag("block", b.meta.BlockID.String()) + + shardKey := common.ShardKeyForTraceID(id) + blockID := b.meta.BlockID + tenantID := b.meta.TenantID + + bloomBytes, err := b.reader.Read(ctx, b.encoding.nameBloom(shardKey), blockID, tenantID) + if err != nil { + return nil, fmt.Errorf("error retrieving bloom (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + filter := &willf_bloom.BloomFilter{} + _, err = filter.ReadFrom(bytes.NewReader(bloomBytes)) + if err != nil { + return nil, fmt.Errorf("error parsing bloom (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + if !filter.Test(id) { + return nil, nil + } + + indexBytes, err := b.reader.Read(ctx, b.encoding.nameIndex(), blockID, tenantID) + if err != nil { + return nil, fmt.Errorf("error reading index (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + indexReader, err := b.encoding.newIndexReader(indexBytes) + if err != nil { + return nil, fmt.Errorf("error building index reader (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + ra := backend.NewReaderAt(b.meta, b.encoding.nameObjects(), b.reader) + pageReader, err := b.encoding.newPageReader(ra, b.meta.Encoding) + if err != nil { + return nil, fmt.Errorf("error building page reader (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + // passing nil for objectCombiner here. this is fine b/c a backend block should never have dupes + finder := b.encoding.newPagedFinder(indexReader, pageReader, nil) + objectBytes, err := finder.Find(id) + + if err != nil { + return nil, fmt.Errorf("error using pageFinder (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + return objectBytes, nil +} + +// Iterator returns an Iterator that iterates over the objects in the block from the backend +func (b *BackendBlock) Iterator(chunkSizeBytes uint32) (common.Iterator, error) { + // read index + indexBytes, err := b.reader.Read(context.TODO(), b.encoding.nameIndex(), b.meta.BlockID, b.meta.TenantID) + if err != nil { + return nil, fmt.Errorf("failed to read index (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + ra := backend.NewReaderAt(b.meta, b.encoding.nameObjects(), b.reader) + pageReader, err := b.encoding.newPageReader(ra, b.meta.Encoding) + if err != nil { + return nil, fmt.Errorf("failed to create pageReader (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + iterator, err := b.encoding.newPagedIterator(chunkSizeBytes, indexBytes, pageReader) + if err != nil { + return nil, fmt.Errorf("failed to create iterator (%s, %s): %w", b.meta.TenantID, b.meta.BlockID, err) + } + + return iterator, nil +} diff --git a/tempodb/encoding/v0/backend_block_test.go b/tempodb/encoding/backend_block_test.go similarity index 79% rename from tempodb/encoding/v0/backend_block_test.go rename to tempodb/encoding/backend_block_test.go index 95807fe917f..534058fd7a6 100644 --- a/tempodb/encoding/v0/backend_block_test.go +++ b/tempodb/encoding/backend_block_test.go @@ -1,143 +1,25 @@ -package v0 +package encoding import ( - "bytes" "context" - "errors" - "strings" "testing" "github.com/google/uuid" "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" - "github.com/grafana/tempo/tempodb/backend/util" - "github.com/grafana/tempo/tempodb/encoding/common" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/willf/bloom" ) -func TestBackendBlock(t *testing.T) { - id := []byte{0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01} - object := []byte{0x01, 0x02} - - bloom := bloom.New(1, 1) - bloom.Add(id) - - bloomBuffer := bytes.Buffer{} - _, err := bloom.WriteTo(&bloomBuffer) - require.NoError(t, err) - - objectBuffer := bytes.Buffer{} - _, err = marshalObjectToWriter(id, object, &objectBuffer) - require.NoError(t, err) - - record := newRecord() - record.ID = id - record.Length = uint32(objectBuffer.Len()) - indexBytes, err := marshalRecords([]*common.Record{record}) - require.NoError(t, err) - - tests := []struct { - name string - id []byte - readerError error - readerBloom []byte - readerIndex []byte - readerRange []byte - expected []byte - expectedBloomReads int32 - expectedBloomBytes int32 - expectedIndexReads int32 - expectedIndexBytes int32 - expectedBlockReads int32 - expectedBlockBytes int32 - }{ - { - name: "error", - id: id, - readerError: errors.New("wups"), - }, - { - name: "bloom passes", - id: id, - readerBloom: bloomBuffer.Bytes(), - expectedBloomReads: 1, - expectedBloomBytes: int32(bloomBuffer.Len()), - expectedIndexReads: 1, - }, - { - name: "index passes", - id: id, - readerBloom: bloomBuffer.Bytes(), - readerIndex: indexBytes, - expectedBloomReads: 1, - expectedBloomBytes: int32(bloomBuffer.Len()), - expectedIndexReads: 1, - expectedIndexBytes: int32(len(indexBytes)), - expectedBlockReads: 1, - expectedBlockBytes: int32(objectBuffer.Len()), - }, - { - name: "obj found", - id: id, - readerBloom: bloomBuffer.Bytes(), - readerIndex: indexBytes, - readerRange: objectBuffer.Bytes(), - expected: object, - expectedBloomReads: 1, - expectedBloomBytes: int32(bloomBuffer.Len()), - expectedIndexReads: 1, - expectedIndexBytes: int32(len(indexBytes)), - expectedBlockReads: 1, - expectedBlockBytes: int32(objectBuffer.Len()), - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - fn := func(name string, blockID uuid.UUID, tenantID string) ([]byte, error) { - if tt.readerError != nil { - return nil, tt.readerError - } - - if strings.Contains(name, nameBloomPrefix) { - return tt.readerBloom, nil - } - - return tt.readerIndex, nil - } - - mockR := &util.MockReader{ - ReadFn: fn, - Range: tt.readerRange, - } - - findMetrics := common.NewFindMetrics() - block := NewBackendBlock(&backend.BlockMeta{}) - actual, err := block.Find(context.Background(), mockR, tt.id, &findMetrics) - - assert.True(t, errors.Is(err, tt.readerError)) - assert.Equal(t, tt.expected, actual) - assert.Equal(t, tt.expectedBloomReads, findMetrics.BloomFilterReads.Load()) - assert.Equal(t, tt.expectedBloomBytes, findMetrics.BloomFilterBytesRead.Load()) - assert.Equal(t, tt.expectedIndexReads, findMetrics.IndexReads.Load()) - assert.Equal(t, tt.expectedIndexBytes, findMetrics.IndexBytesRead.Load()) - assert.Equal(t, tt.expectedBlockReads, findMetrics.BlockReads.Load()) - assert.Equal(t, tt.expectedBlockBytes, findMetrics.BlockBytesRead.Load()) - }) - } - -} - func TestV0Block(t *testing.T) { r, _, _, err := local.New(&local.Config{ - Path: "./v0_test", + Path: "./v0test", }) require.NoError(t, err, "error creating backend") - meta := backend.NewBlockMeta("fake", uuid.MustParse("00f5a116-639e-4880-bbe7-be9b0c828033")) - backendBlock := NewBackendBlock(meta) + meta := backend.NewBlockMeta("fake", uuid.MustParse("00f5a116-639e-4880-bbe7-be9b0c828033"), "v0", backend.EncNone) + backendBlock, err := NewBackendBlock(meta, r) + require.NoError(t, err, "error creating backendblock") // known ids and objs written ids := [][]byte{ @@ -167,16 +49,15 @@ func TestV0Block(t *testing.T) { } // test Find - m := common.NewFindMetrics() for i, id := range ids { - foundBytes, err := backendBlock.Find(context.Background(), r, id, &m) + foundBytes, err := backendBlock.Find(context.Background(), id) assert.NoError(t, err) assert.Equal(t, reqs[i], foundBytes) } // test Iterator - iterator, err := backendBlock.Iterator(10, r) + iterator, err := backendBlock.Iterator(10) require.NoError(t, err, "error getting iterator") i := 0 for { diff --git a/tempodb/encoding/common/index_reader.go b/tempodb/encoding/common/index_reader.go new file mode 100644 index 00000000000..eba268d095a --- /dev/null +++ b/tempodb/encoding/common/index_reader.go @@ -0,0 +1,31 @@ +package common + +import ( + "bytes" + "sort" +) + +// Records is a slice of *Record +type Records []*Record + +// At implements IndexReader +func (r Records) At(i int) *Record { + if i < 0 || i >= len(r) { + return nil + } + + return r[i] +} + +// Find implements IndexReader +func (r Records) Find(id ID) (*Record, int) { + i := sort.Search(len(r), func(idx int) bool { + return bytes.Compare(r[idx].ID, id) >= 0 + }) + + if i < 0 || i >= len(r) { + return nil, -1 + } + + return r[i], i +} diff --git a/tempodb/encoding/common/types.go b/tempodb/encoding/common/types.go index 6ad019bd731..18af2775852 100644 --- a/tempodb/encoding/common/types.go +++ b/tempodb/encoding/common/types.go @@ -1,7 +1,5 @@ package common -import "go.uber.org/atomic" - // This file contains types that need to be referenced by both the ./encoding and ./encoding/vX packages. // It primarily exists here to break dependency loops. @@ -28,7 +26,7 @@ type Finder interface { // Appender is capable of tracking objects and ids that are added to it type Appender interface { Append(ID, []byte) error - Complete() + Complete() error Records() []*Record Length() int DataLength() uint64 @@ -39,24 +37,21 @@ type ObjectCombiner interface { Combine(objA []byte, objB []byte) []byte } -// FindMetrics is a threadsafe struct for tracking metrics related to a parallelized query -type FindMetrics struct { - BloomFilterReads *atomic.Int32 - BloomFilterBytesRead *atomic.Int32 - IndexReads *atomic.Int32 - IndexBytesRead *atomic.Int32 - BlockReads *atomic.Int32 - BlockBytesRead *atomic.Int32 +// PageReader returns a slice of pages in the encoding/v0 format referenced by +// the slice of *Records passed in. The length of the returned slice is guaranteed +// to be equal to the length of the provided records unless error is non nil. +// PageReader is the primary abstraction point for supporting multiple data +// formats. +type PageReader interface { + Read([]*Record) ([][]byte, error) } -// NewFindMetrics constructs a FindMetrics -func NewFindMetrics() FindMetrics { - return FindMetrics{ - BloomFilterReads: atomic.NewInt32(0), - BloomFilterBytesRead: atomic.NewInt32(0), - IndexReads: atomic.NewInt32(0), - IndexBytesRead: atomic.NewInt32(0), - BlockReads: atomic.NewInt32(0), - BlockBytesRead: atomic.NewInt32(0), - } +// IndexReader is used to abstract away the details of an index. Currently +// only used in the paged finder, it could eventually provide a way to +// support multiple index formats. +// IndexReader is the primary abstraction point for supporting multiple index +// formats. +type IndexReader interface { + At(i int) *Record + Find(id ID) (*Record, int) } diff --git a/tempodb/encoding/compactor_block.go b/tempodb/encoding/compactor_block.go index 9117044eb1e..611523fd282 100644 --- a/tempodb/encoding/compactor_block.go +++ b/tempodb/encoding/compactor_block.go @@ -11,6 +11,8 @@ import ( ) type CompactorBlock struct { + encoding versionedEncoding + compactedMeta *backend.BlockMeta inMetas []*backend.BlockMeta @@ -21,7 +23,8 @@ type CompactorBlock struct { appender common.Appender } -func NewCompactorBlock(id uuid.UUID, tenantID string, bloomFP float64, indexDownsample int, metas []*backend.BlockMeta, estimatedObjects int) (*CompactorBlock, error) { +// NewCompactorBlock creates a ... new compactor block! +func NewCompactorBlock(cfg *BlockConfig, id uuid.UUID, tenantID string, metas []*backend.BlockMeta, estimatedObjects int) (*CompactorBlock, error) { if len(metas) == 0 { return nil, fmt.Errorf("empty block meta list") } @@ -31,13 +34,18 @@ func NewCompactorBlock(id uuid.UUID, tenantID string, bloomFP float64, indexDown } c := &CompactorBlock{ - compactedMeta: backend.NewBlockMeta(tenantID, id), - bloom: common.NewWithEstimates(uint(estimatedObjects), bloomFP), + encoding: latestEncoding(), + compactedMeta: backend.NewBlockMeta(tenantID, id, currentVersion, cfg.Encoding), + bloom: common.NewWithEstimates(uint(estimatedObjects), cfg.BloomFP), inMetas: metas, } + var err error c.appendBuffer = &bytes.Buffer{} - c.appender = newBufferedAppender(c.appendBuffer, indexDownsample, estimatedObjects) + c.appender, err = c.encoding.newBufferedAppender(c.appendBuffer, cfg.Encoding, cfg.IndexDownsample, estimatedObjects) + if err != nil { + return nil, fmt.Errorf("failed to created appender: %w", err) + } return c, nil } @@ -66,32 +74,46 @@ func (c *CompactorBlock) Length() int { } // FlushBuffer flushes any existing objects to the backend -func (c *CompactorBlock) FlushBuffer(ctx context.Context, tracker backend.AppendTracker, w backend.Writer) (backend.AppendTracker, error) { +func (c *CompactorBlock) FlushBuffer(ctx context.Context, tracker backend.AppendTracker, w backend.Writer) (backend.AppendTracker, int, error) { + if c.appender.Length() == 0 { + return tracker, 0, nil + } + meta := c.BlockMeta() - tracker, err := appendBlockData(ctx, w, meta, tracker, c.appendBuffer.Bytes()) + tracker, err := c.encoding.appendBlockData(ctx, w, meta, tracker, c.appendBuffer.Bytes()) if err != nil { - return nil, err + return nil, 0, err } + bytesFlushed := c.appendBuffer.Len() c.appendBuffer.Reset() c.bufferedObjects = 0 - return tracker, nil + return tracker, bytesFlushed, nil } // Complete finishes writes the compactor metadata and closes all buffers and appenders -func (c *CompactorBlock) Complete(ctx context.Context, tracker backend.AppendTracker, w backend.Writer) error { - c.appender.Complete() +func (c *CompactorBlock) Complete(ctx context.Context, tracker backend.AppendTracker, w backend.Writer) (int, error) { + err := c.appender.Complete() + if err != nil { + return 0, err + } + + // one final flush + tracker, bytesFlushed, err := c.FlushBuffer(ctx, tracker, w) + if err != nil { + return 0, err + } records := c.appender.Records() meta := c.BlockMeta() - err := writeBlockMeta(ctx, w, meta, records, c.bloom) + err = c.encoding.writeBlockMeta(ctx, w, meta, records, c.bloom) if err != nil { - return err + return 0, err } - return w.CloseAppend(ctx, tracker) + return bytesFlushed, w.CloseAppend(ctx, tracker) } func (c *CompactorBlock) BlockMeta() *backend.BlockMeta { diff --git a/tempodb/encoding/compactor_block_test.go b/tempodb/encoding/compactor_block_test.go index f9ad37a0e58..7e61574cb3a 100644 --- a/tempodb/encoding/compactor_block_test.go +++ b/tempodb/encoding/compactor_block_test.go @@ -19,13 +19,12 @@ const ( ) func TestCompactorBlockError(t *testing.T) { - _, err := NewCompactorBlock(uuid.New(), "", 0, 0, nil, 0) + _, err := NewCompactorBlock(nil, uuid.New(), "", nil, 0) assert.Error(t, err) } func TestCompactorBlockAddObject(t *testing.T) { indexDownsample := 3 - bloomFP := .01 metas := []*backend.BlockMeta{ { @@ -39,7 +38,11 @@ func TestCompactorBlockAddObject(t *testing.T) { } numObjects := (rand.Int() % 20) + 1 - cb, err := NewCompactorBlock(uuid.New(), testTenantID, bloomFP, indexDownsample, metas, numObjects) + cb, err := NewCompactorBlock(&BlockConfig{ + BloomFP: .01, + IndexDownsample: indexDownsample, + Encoding: backend.EncGZIP, + }, uuid.New(), testTenantID, metas, numObjects) assert.NoError(t, err) var minID common.ID @@ -67,8 +70,8 @@ func TestCompactorBlockAddObject(t *testing.T) { maxID = id } } - cb.appender.Complete() - + err = cb.appender.Complete() + assert.NoError(t, err) assert.Equal(t, numObjects, cb.Length()) // test meta diff --git a/tempodb/encoding/complete_block.go b/tempodb/encoding/complete_block.go index 0949f74cba6..9fb8df6c91f 100644 --- a/tempodb/encoding/complete_block.go +++ b/tempodb/encoding/complete_block.go @@ -18,6 +18,8 @@ import ( // A CompleteBlock also knows the filepath of the append wal file it was cut from. It is responsible for // cleaning this block up once it has been flushed to the backend. type CompleteBlock struct { + encoding versionedEncoding + meta *backend.BlockMeta bloom *common.ShardedBloomFilter records []*common.Record @@ -31,10 +33,11 @@ type CompleteBlock struct { } // NewCompleteBlock creates a new block and takes _ALL_ the parameters necessary to build the ordered, deduped file on disk -func NewCompleteBlock(originatingMeta *backend.BlockMeta, iterator common.Iterator, bloomFP float64, estimatedObjects int, indexDownsample int, filepath string, walFilename string) (*CompleteBlock, error) { +func NewCompleteBlock(cfg *BlockConfig, originatingMeta *backend.BlockMeta, iterator common.Iterator, estimatedObjects int, filepath string, walFilename string) (*CompleteBlock, error) { c := &CompleteBlock{ - meta: backend.NewBlockMeta(originatingMeta.TenantID, uuid.New()), - bloom: common.NewWithEstimates(uint(estimatedObjects), bloomFP), + encoding: latestEncoding(), + meta: backend.NewBlockMeta(originatingMeta.TenantID, uuid.New(), currentVersion, cfg.Encoding), + bloom: common.NewWithEstimates(uint(estimatedObjects), cfg.BloomFP), records: make([]*common.Record, 0), filepath: filepath, walFilename: walFilename, @@ -50,7 +53,10 @@ func NewCompleteBlock(originatingMeta *backend.BlockMeta, iterator common.Iterat return nil, err } - appender := newBufferedAppender(appendFile, indexDownsample, estimatedObjects) + appender, err := c.encoding.newBufferedAppender(appendFile, cfg.Encoding, cfg.IndexDownsample, estimatedObjects) + if err != nil { + return nil, err + } for { bytesID, bytesObject, err := iterator.Next() if bytesID == nil { @@ -73,7 +79,10 @@ func NewCompleteBlock(originatingMeta *backend.BlockMeta, iterator common.Iterat return nil, err } } - appender.Complete() + err = appender.Complete() + if err != nil { + return nil, err + } appendFile.Close() c.records = appender.Records() c.meta.StartTime = originatingMeta.StartTime @@ -101,12 +110,12 @@ func (c *CompleteBlock) Write(ctx context.Context, w backend.Writer) error { return err } - err = writeBlockData(ctx, w, c.meta, src, fileStat.Size()) + err = c.encoding.writeBlockData(ctx, w, c.meta, src, fileStat.Size()) if err != nil { return err } - err = writeBlockMeta(ctx, w, c.meta, c.records, c.bloom) + err = c.encoding.writeBlockMeta(ctx, w, c.meta, c.records, c.bloom) if err != nil { return err } @@ -133,8 +142,12 @@ func (c *CompleteBlock) Find(id common.ID, combiner common.ObjectCombiner) ([]by return nil, err } - finder := newDedupingFinder(c.records, file, combiner) + pageReader, err := c.encoding.newPageReader(file, c.meta.Encoding) + if err != nil { + return nil, err + } + finder := c.encoding.newPagedFinder(common.Records(c.records), pageReader, combiner) return finder.Find(id) } diff --git a/tempodb/encoding/complete_block_test.go b/tempodb/encoding/complete_block_test.go index 7bd33cc1740..0a54e0cd56e 100644 --- a/tempodb/encoding/complete_block_test.go +++ b/tempodb/encoding/complete_block_test.go @@ -4,6 +4,8 @@ import ( "bufio" "bytes" "context" + "fmt" + "io" "io/ioutil" "math/rand" "os" @@ -19,6 +21,7 @@ import ( "github.com/grafana/tempo/tempodb/backend/local" "github.com/grafana/tempo/tempodb/encoding/common" v0 "github.com/grafana/tempo/tempodb/encoding/v0" + v1 "github.com/grafana/tempo/tempodb/encoding/v1" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) @@ -39,7 +42,11 @@ func TestCompleteBlock(t *testing.T) { defer os.RemoveAll(tempDir) require.NoError(t, err, "unexpected error creating temp dir") - block, ids, reqs := completeBlock(t, tempDir) + block, ids, reqs := completeBlock(t, &BlockConfig{ + IndexDownsample: 13, + BloomFP: .01, + Encoding: backend.EncGZIP, + }, tempDir) // test Find for i, id := range ids { @@ -61,12 +68,24 @@ func TestCompleteBlock(t *testing.T) { } } -func TestCompleteBlockToBackendBlock(t *testing.T) { +func TestCompleteBlockAll(t *testing.T) { + for _, enc := range backend.SupportedEncoding { + testCompleteBlockToBackendBlock(t, + &BlockConfig{ + IndexDownsample: 13, + BloomFP: .01, + Encoding: enc, + }, + ) + } +} + +func testCompleteBlockToBackendBlock(t *testing.T, cfg *BlockConfig) { tempDir, err := ioutil.TempDir("/tmp", "") defer os.RemoveAll(tempDir) require.NoError(t, err, "unexpected error creating temp dir") - block, ids, reqs := completeBlock(t, tempDir) + block, ids, reqs := completeBlock(t, cfg, tempDir) backendTmpDir, err := ioutil.TempDir("/tmp", "") defer os.RemoveAll(backendTmpDir) @@ -88,13 +107,12 @@ func TestCompleteBlockToBackendBlock(t *testing.T) { meta, err := r.BlockMeta(context.Background(), uuids[0], testTenantID) require.NoError(t, err, "error getting meta") - backendBlock, err := NewBackendBlock(meta) + backendBlock, err := NewBackendBlock(meta, r) require.NoError(t, err, "error creating block") - m := common.NewFindMetrics() // test Find for i, id := range ids { - foundBytes, err := backendBlock.Find(context.Background(), r, id, &m) + foundBytes, err := backendBlock.Find(context.Background(), id) assert.NoError(t, err) assert.Equal(t, reqs[i], foundBytes) @@ -107,7 +125,7 @@ func TestCompleteBlockToBackendBlock(t *testing.T) { } sort.Slice(ids, func(i int, j int) bool { return bytes.Compare(ids[i], ids[j]) == -1 }) - iterator, err := backendBlock.Iterator(10, r) + iterator, err := backendBlock.Iterator(10 * 1024 * 1024) require.NoError(t, err, "error getting iterator") i := 0 for { @@ -124,10 +142,9 @@ func TestCompleteBlockToBackendBlock(t *testing.T) { assert.Equal(t, len(ids), i) } -func completeBlock(t *testing.T, tempDir string) (*CompleteBlock, [][]byte, [][]byte) { +func completeBlock(t *testing.T, cfg *BlockConfig, tempDir string) (*CompleteBlock, [][]byte, [][]byte) { rand.Seed(time.Now().Unix()) - indexDownsample := 13 buffer := &bytes.Buffer{} writer := bufio.NewWriter(buffer) appender := v0.NewAppender(writer) @@ -155,20 +172,21 @@ func completeBlock(t *testing.T, tempDir string) (*CompleteBlock, [][]byte, [][] minID = id } } - appender.Complete() - err := writer.Flush() + err := appender.Complete() + require.NoError(t, err) + err = writer.Flush() require.NoError(t, err, "unexpected error flushing writer") - originatingMeta := backend.NewBlockMeta(testTenantID, uuid.New()) + originatingMeta := backend.NewBlockMeta(testTenantID, uuid.New(), "should_be_ignored", backend.EncGZIP) originatingMeta.StartTime = time.Now().Add(-5 * time.Minute) originatingMeta.EndTime = time.Now().Add(5 * time.Minute) iterator := v0.NewRecordIterator(appender.Records(), bytes.NewReader(buffer.Bytes())) - block, err := NewCompleteBlock(originatingMeta, iterator, .01, numMsgs, indexDownsample, tempDir, "") + block, err := NewCompleteBlock(cfg, originatingMeta, iterator, numMsgs, tempDir, "") require.NoError(t, err, "unexpected error completing block") // test downsample config - require.Equal(t, numMsgs/indexDownsample+1, len(block.records)) + require.Equal(t, numMsgs/cfg.IndexDownsample+1, len(block.records)) require.True(t, block.FlushedTime().IsZero()) require.True(t, bytes.Equal(block.meta.MinID, minID)) @@ -179,3 +197,100 @@ func completeBlock(t *testing.T, tempDir string) (*CompleteBlock, [][]byte, [][] return block, ids, reqs } + +const benchDownsample = 200 + +func BenchmarkWriteGzip(b *testing.B) { + benchmarkCompressBlock(b, backend.EncGZIP, benchDownsample, false) +} +func BenchmarkWriteSnappy(b *testing.B) { + benchmarkCompressBlock(b, backend.EncSnappy, benchDownsample, false) +} +func BenchmarkWriteLZ4256(b *testing.B) { + benchmarkCompressBlock(b, backend.EncLZ4_256k, benchDownsample, false) +} +func BenchmarkWriteLZ41M(b *testing.B) { + benchmarkCompressBlock(b, backend.EncLZ4_1M, benchDownsample, false) +} +func BenchmarkWriteNone(b *testing.B) { + benchmarkCompressBlock(b, backend.EncNone, benchDownsample, false) +} + +func BenchmarkWriteZstd(b *testing.B) { + benchmarkCompressBlock(b, backend.EncZstd, benchDownsample, false) +} + +func BenchmarkReadGzip(b *testing.B) { + benchmarkCompressBlock(b, backend.EncGZIP, benchDownsample, true) +} +func BenchmarkReadSnappy(b *testing.B) { + benchmarkCompressBlock(b, backend.EncSnappy, benchDownsample, true) +} +func BenchmarkReadLZ4256(b *testing.B) { + benchmarkCompressBlock(b, backend.EncLZ4_256k, benchDownsample, true) +} +func BenchmarkReadLZ41M(b *testing.B) { + benchmarkCompressBlock(b, backend.EncLZ4_1M, benchDownsample, true) +} +func BenchmarkReadNone(b *testing.B) { + benchmarkCompressBlock(b, backend.EncNone, benchDownsample, true) +} + +func BenchmarkReadZstd(b *testing.B) { + benchmarkCompressBlock(b, backend.EncZstd, benchDownsample, true) +} + +// Download a block from your backend and place in ./benchmark_block/fake/ +//nolint:unparam +func benchmarkCompressBlock(b *testing.B, encoding backend.Encoding, indexDownsample int, benchRead bool) { + tempDir, err := ioutil.TempDir("/tmp", "") + defer os.RemoveAll(tempDir) + require.NoError(b, err, "unexpected error creating temp dir") + + r, _, _, err := local.New(&local.Config{ + Path: "./benchmark_block", + }) + require.NoError(b, err, "error creating backend") + + backendBlock, err := NewBackendBlock(backend.NewBlockMeta("fake", uuid.MustParse("9f15417a-1242-40e4-9de3-a057d3b176c1"), "v0", backend.EncNone), r) + require.NoError(b, err, "error creating backend block") + + iterator, err := backendBlock.Iterator(10 * 1024 * 1024) + require.NoError(b, err, "error creating iterator") + + if !benchRead { + b.ResetTimer() + } + + originatingMeta := backend.NewBlockMeta(testTenantID, uuid.New(), "should_be_ignored", backend.EncGZIP) + cb, err := NewCompleteBlock(&BlockConfig{ + IndexDownsample: indexDownsample, + BloomFP: .05, + Encoding: encoding, + }, originatingMeta, iterator, 10000, tempDir, "") + require.NoError(b, err, "error creating block") + + lastRecord := cb.records[len(cb.records)-1] + fmt.Println("size: ", lastRecord.Start+uint64(lastRecord.Length)) + + if !benchRead { + return + } + + b.ResetTimer() + file, err := os.Open(cb.fullFilename()) + require.NoError(b, err) + pr, err := v1.NewPageReader(file, encoding) + require.NoError(b, err) + iterator = v1.NewPagedIterator(10*1024*1024, common.Records(cb.records), pr) + + for { + id, _, err := iterator.Next() + if err != io.EOF { + require.NoError(b, err) + } + if id == nil { + break + } + } +} diff --git a/tempodb/encoding/config.go b/tempodb/encoding/config.go new file mode 100644 index 00000000000..53606edee0f --- /dev/null +++ b/tempodb/encoding/config.go @@ -0,0 +1,27 @@ +package encoding + +import ( + "fmt" + + "github.com/grafana/tempo/tempodb/backend" +) + +// BlockConfig holds configuration options for newly created blocks +type BlockConfig struct { + IndexDownsample int `yaml:"index_downsample"` + BloomFP float64 `yaml:"bloom_filter_false_positive"` + Encoding backend.Encoding `yaml:"encoding"` +} + +// ValidateConfig returns true if the config is valid +func ValidateConfig(b *BlockConfig) error { + if b.IndexDownsample == 0 { + return fmt.Errorf("Non-zero index downsample required") + } + + if b.BloomFP <= 0.0 { + return fmt.Errorf("invalid bloom filter fp rate %v", b.BloomFP) + } + + return nil +} diff --git a/tempodb/encoding/v0/appender.go b/tempodb/encoding/v0/appender.go index f9587f73421..91362f50a43 100644 --- a/tempodb/encoding/v0/appender.go +++ b/tempodb/encoding/v0/appender.go @@ -25,7 +25,7 @@ func NewAppender(writer io.Writer) common.Appender { // Append appends the id/object to the writer. Note that the caller is giving up ownership of the two byte arrays backing the slices. // Copies should be made and passed in if this is a problem func (a *appender) Append(id common.ID, b []byte) error { - length, err := marshalObjectToWriter(id, b, a.writer) + length, err := MarshalObjectToWriter(id, b, a.writer) if err != nil { return err } @@ -57,6 +57,6 @@ func (a *appender) DataLength() uint64 { return a.currentOffset } -func (a *appender) Complete() { - +func (a *appender) Complete() error { + return nil } diff --git a/tempodb/encoding/v0/appender_buffered.go b/tempodb/encoding/v0/appender_buffered.go index f4227f9915e..f22f53e6385 100644 --- a/tempodb/encoding/v0/appender_buffered.go +++ b/tempodb/encoding/v0/appender_buffered.go @@ -29,7 +29,7 @@ func NewBufferedAppender(writer io.Writer, indexDownsample int, totalObjectsEsti // Append appends the id/object to the writer. Note that the caller is giving up ownership of the two byte arrays backing the slices. // Copies should be made and passed in if this is a problem func (a *bufferedAppender) Append(id common.ID, b []byte) error { - length, err := marshalObjectToWriter(id, b, a.writer) + length, err := MarshalObjectToWriter(id, b, a.writer) if err != nil { return err } @@ -65,10 +65,12 @@ func (a *bufferedAppender) DataLength() uint64 { return a.currentOffset } -func (a *bufferedAppender) Complete() { +func (a *bufferedAppender) Complete() error { if a.currentRecord == nil { - return + return nil } a.records = append(a.records, a.currentRecord) a.currentRecord = nil + + return nil } diff --git a/tempodb/encoding/v0/backend_block.go b/tempodb/encoding/v0/backend_block.go deleted file mode 100644 index 6b6f6248c4b..00000000000 --- a/tempodb/encoding/v0/backend_block.go +++ /dev/null @@ -1,105 +0,0 @@ -package v0 - -import ( - "bytes" - "context" - "fmt" - - "github.com/opentracing/opentracing-go" - willf_bloom "github.com/willf/bloom" - - "github.com/grafana/tempo/tempodb/backend" - "github.com/grafana/tempo/tempodb/encoding/common" -) - -type BackendBlock struct { - meta *backend.BlockMeta -} - -// NewBackendBlock returns a block used for finding traces in the backend -func NewBackendBlock(meta *backend.BlockMeta) *BackendBlock { - return &BackendBlock{ - meta: meta, - } -} - -// Find searches a block for the ID and returns an object if found. -func (b *BackendBlock) Find(ctx context.Context, r backend.Reader, id common.ID, metrics *common.FindMetrics) ([]byte, error) { - var err error - span, ctx := opentracing.StartSpanFromContext(ctx, "BackendBlock.Find") - defer func() { - if err != nil { - span.SetTag("error", true) - } - span.Finish() - }() - - span.SetTag("block", b.meta.BlockID.String()) - - shardKey := common.ShardKeyForTraceID(id) - blockID := b.meta.BlockID - tenantID := b.meta.TenantID - - bloomBytes, err := r.Read(ctx, bloomName(shardKey), blockID, tenantID) - if err != nil { - return nil, fmt.Errorf("error retrieving bloom %w", err) - } - - filter := &willf_bloom.BloomFilter{} - _, err = filter.ReadFrom(bytes.NewReader(bloomBytes)) - if err != nil { - return nil, fmt.Errorf("error parsing bloom %w", err) - } - - metrics.BloomFilterReads.Inc() - metrics.BloomFilterBytesRead.Add(int32(len(bloomBytes))) - if !filter.Test(id) { - return nil, nil - } - - indexBytes, err := r.Read(ctx, nameIndex, blockID, tenantID) - metrics.IndexReads.Inc() - metrics.IndexBytesRead.Add(int32(len(indexBytes))) - if err != nil { - return nil, fmt.Errorf("error reading index %w", err) - } - - record, err := findRecord(id, indexBytes) // todo: replace with backend.Finder - if err != nil { - return nil, fmt.Errorf("error finding record %w", err) - } - - if record == nil { - return nil, nil - } - - objectBytes := make([]byte, record.Length) - err = r.ReadRange(ctx, nameObjects, blockID, tenantID, record.Start, objectBytes) - metrics.BlockReads.Inc() - metrics.BlockBytesRead.Add(int32(len(objectBytes))) - if err != nil { - return nil, fmt.Errorf("error reading object %w", err) - } - - iter := NewIterator(bytes.NewReader(objectBytes)) - var foundObject []byte - for { - iterID, iterObject, err := iter.Next() - if iterID == nil { - break - } - if err != nil { - return nil, err - } - if bytes.Equal(iterID, id) { - foundObject = iterObject - break - } - } - return foundObject, nil -} - -// Iterator searches a block for the ID and returns an object if found. -func (b *BackendBlock) Iterator(chunkSizeBytes uint32, r backend.Reader) (common.Iterator, error) { - return NewBackendIterator(b.meta.TenantID, b.meta.BlockID, chunkSizeBytes, r) -} diff --git a/tempodb/encoding/v0/block.go b/tempodb/encoding/v0/block.go index 3fb05e164f8..77ddc1469f2 100644 --- a/tempodb/encoding/v0/block.go +++ b/tempodb/encoding/v0/block.go @@ -11,12 +11,15 @@ import ( ) const ( - nameObjects = "data" - nameIndex = "index" + // NameObjects names the backend data object + NameObjects = "data" + // NameIndex names the backend index object + NameIndex = "index" nameBloomPrefix = "bloom-" ) -func bloomName(shard int) string { +// BloomName returns the backend bloom name for the given shard +func BloomName(shard int) string { return nameBloomPrefix + strconv.Itoa(shard) } @@ -33,14 +36,14 @@ func WriteBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMe } // index - err = w.Write(ctx, nameIndex, meta.BlockID, meta.TenantID, index) + err = w.Write(ctx, NameIndex, meta.BlockID, meta.TenantID, index) if err != nil { return fmt.Errorf("unexpected error writing index %w", err) } // bloom for i, bloom := range blooms { - err := w.Write(ctx, bloomName(i), meta.BlockID, meta.TenantID, bloom) + err := w.Write(ctx, BloomName(i), meta.BlockID, meta.TenantID, bloom) if err != nil { return fmt.Errorf("unexpected error writing bloom-%d %w", i, err) } @@ -57,10 +60,10 @@ func WriteBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMe // WriteBlockData writes the data object from an io.Reader to the backend.Writer func WriteBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error { - return w.WriteReader(ctx, nameObjects, meta.BlockID, meta.TenantID, r, size) + return w.WriteReader(ctx, NameObjects, meta.BlockID, meta.TenantID, r, size) } // AppendBlockData appends the bytes passed to the block data func AppendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) { - return w.Append(ctx, nameObjects, meta.BlockID, meta.TenantID, tracker, buffer) + return w.Append(ctx, NameObjects, meta.BlockID, meta.TenantID, tracker, buffer) } diff --git a/tempodb/encoding/v0/finder_deduping.go b/tempodb/encoding/v0/finder_deduping.go deleted file mode 100644 index ca63572fa76..00000000000 --- a/tempodb/encoding/v0/finder_deduping.go +++ /dev/null @@ -1,90 +0,0 @@ -package v0 - -import ( - "bytes" - "io" - "sort" - - "github.com/grafana/tempo/tempodb/encoding/common" -) - -type dedupingFinder struct { - ra io.ReaderAt - sortedRecords []*common.Record - combiner common.ObjectCombiner -} - -// NewDedupingFinder returns a dedupingFinder. This finder is used for searching -// a set of records and returning an object. If a set of consecutive records has -// matching ids they will be combined using the ObjectCombiner. -func NewDedupingFinder(sortedRecords []*common.Record, ra io.ReaderAt, combiner common.ObjectCombiner) common.Finder { - return &dedupingFinder{ - ra: ra, - sortedRecords: sortedRecords, - combiner: combiner, - } -} - -func (f *dedupingFinder) Find(id common.ID) ([]byte, error) { - i := sort.Search(len(f.sortedRecords), func(idx int) bool { - return bytes.Compare(f.sortedRecords[idx].ID, id) >= 0 - }) - - if i < 0 || i >= len(f.sortedRecords) { - return nil, nil - } - - var bytesFound []byte - - for { - record := f.sortedRecords[i] - - bytesOne, err := f.findOne(id, record) - if err != nil { - return nil, err - } - - bytesFound = f.combiner.Combine(bytesFound, bytesOne) - - // we need to check the next record to see if it also matches our id - i++ - if i >= len(f.sortedRecords) { - break - } - - if !bytes.Equal(f.sortedRecords[i].ID, id) { - break - } - } - - return bytesFound, nil -} - -func (f *dedupingFinder) findOne(id common.ID, record *common.Record) ([]byte, error) { - buff := make([]byte, record.Length) - _, err := f.ra.ReadAt(buff, int64(record.Start)) - if err != nil { - return nil, err - } - - iter := NewIterator(bytes.NewReader(buff)) - iter, err = NewDedupingIterator(iter, f.combiner) - if err != nil { - return nil, err - } - - for { - foundID, b, err := iter.Next() - if foundID == nil { - break - } - if err != nil { - return nil, err - } - if bytes.Equal(foundID, id) { - return b, nil - } - } - - return nil, nil -} diff --git a/tempodb/encoding/v0/finder_paged.go b/tempodb/encoding/v0/finder_paged.go new file mode 100644 index 00000000000..d276d48b9ce --- /dev/null +++ b/tempodb/encoding/v0/finder_paged.go @@ -0,0 +1,93 @@ +package v0 + +import ( + "bytes" + "errors" + + "github.com/grafana/tempo/tempodb/encoding/common" +) + +type pagedFinder struct { + r common.PageReader + index common.IndexReader + combiner common.ObjectCombiner +} + +// NewPagedFinder returns a paged. This finder is used for searching +// a set of records and returning an object. If a set of consecutive records has +// matching ids they will be combined using the ObjectCombiner. +func NewPagedFinder(index common.IndexReader, r common.PageReader, combiner common.ObjectCombiner) common.Finder { + return &pagedFinder{ + r: r, + index: index, + combiner: combiner, + } +} + +func (f *pagedFinder) Find(id common.ID) ([]byte, error) { + var bytesFound []byte + record, i := f.index.Find(id) + + if record == nil { + return nil, nil + } + + for { + bytesOne, err := f.findOne(id, record) + if err != nil { + return nil, err + } + + if f.combiner == nil { + bytesFound = bytesOne + break + } + + bytesFound = f.combiner.Combine(bytesFound, bytesOne) + + // we need to check the next record to see if it also matches our id + i++ + record = f.index.At(i) + if record == nil { + break + } + if !bytes.Equal(record.ID, id) { + break + } + } + + return bytesFound, nil +} + +func (f *pagedFinder) findOne(id common.ID, record *common.Record) ([]byte, error) { + pages, err := f.r.Read([]*common.Record{record}) + if err != nil { + return nil, err + } + if len(pages) == 0 { + return nil, errors.New("unexpected 0 length pages in findOne") + } + + iter := NewIterator(bytes.NewReader(pages[0])) + if f.combiner != nil { + iter, err = NewDedupingIterator(iter, f.combiner) + } + if err != nil { + return nil, err + } + + for { + foundID, b, err := iter.Next() + if foundID == nil { + break + } + if err != nil { + return nil, err + } + if bytes.Equal(foundID, id) { + return b, nil + } + } + + return nil, nil +} diff --git a/tempodb/encoding/v0/index_reader.go b/tempodb/encoding/v0/index_reader.go new file mode 100644 index 00000000000..7b3d8cc28ee --- /dev/null +++ b/tempodb/encoding/v0/index_reader.go @@ -0,0 +1,56 @@ +package v0 + +import ( + "bytes" + "fmt" + "sort" + + "github.com/grafana/tempo/tempodb/encoding/common" +) + +type readerBytes struct { + index []byte +} + +// NewIndexReader returns an index reader for a byte slice of marshalled +// ordered records. +func NewIndexReader(index []byte) (common.IndexReader, error) { + mod := len(index) % recordLength + if mod != 0 { + return nil, fmt.Errorf("records are an unexpected number of bytes %d", len(index)) + } + + return &readerBytes{ + index: index, + }, nil +} + +func (r *readerBytes) At(i int) *common.Record { + if i < 0 || i >= len(r.index)/recordLength { + return nil + } + + buff := r.index[i*recordLength : (i+1)*recordLength] + return unmarshalRecord(buff) +} + +func (r *readerBytes) Find(id common.ID) (*common.Record, int) { + numRecords := recordCount(r.index) + var record *common.Record + + i := sort.Search(numRecords, func(i int) bool { + buff := r.index[i*recordLength : (i+1)*recordLength] + record = unmarshalRecord(buff) + + return bytes.Compare(record.ID, id) >= 0 + }) + + if i >= 0 && i < numRecords { + buff := r.index[i*recordLength : (i+1)*recordLength] + record = unmarshalRecord(buff) + + return record, i + } + + return nil, -1 +} diff --git a/tempodb/encoding/v0/index_reader_test.go b/tempodb/encoding/v0/index_reader_test.go new file mode 100644 index 00000000000..414f61a5e9a --- /dev/null +++ b/tempodb/encoding/v0/index_reader_test.go @@ -0,0 +1,90 @@ +package v0 + +import ( + "testing" + + "github.com/grafana/tempo/tempodb/encoding/common" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestIndexReader(t *testing.T) { + record1 := &common.Record{ + ID: []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Start: 0, + Length: 1, + } + record2 := &common.Record{ + ID: []byte{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Start: 1, + Length: 2, + } + record3 := &common.Record{ + ID: []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + Start: 2, + Length: 3, + } + + recordBytes, err := marshalRecords([]*common.Record{record1, record2, record3}) + require.NoError(t, err) + + tests := []struct { + recordBytes []byte + expectedError bool + at int + expectedAt *common.Record + find common.ID + expectedFind *common.Record + expectedFindIndex int + }{ + { + recordBytes: []byte{}, + expectedFindIndex: -1, + }, + { + recordBytes: []byte{0x01}, + expectedError: true, + }, + { + recordBytes: []byte{}, + at: 12, + expectedFindIndex: -1, + }, + { + recordBytes: recordBytes, + at: 0, + expectedAt: record1, + find: []byte{0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + expectedFind: record1, + }, + { + recordBytes: recordBytes, + at: 1, + expectedAt: record2, + find: []byte{0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + expectedFind: record2, + expectedFindIndex: 1, + }, + { + recordBytes: recordBytes, + at: 2, + expectedAt: record3, + find: []byte{0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, + expectedFind: record3, + expectedFindIndex: 2, + }, + } + + for _, tc := range tests { + reader, err := NewIndexReader(tc.recordBytes) + if tc.expectedError { + assert.Error(t, err) + continue + } + + assert.Equal(t, tc.expectedAt, reader.At(tc.at)) + actualFind, actualIndex := reader.Find(tc.find) + assert.Equal(t, tc.expectedFind, actualFind) + assert.Equal(t, tc.expectedFindIndex, actualIndex) + } +} diff --git a/tempodb/encoding/v0/iterator_backend.go b/tempodb/encoding/v0/iterator_backend.go deleted file mode 100644 index b871d4b6570..00000000000 --- a/tempodb/encoding/v0/iterator_backend.go +++ /dev/null @@ -1,98 +0,0 @@ -package v0 - -import ( - "context" - "io" - "math" - - "github.com/google/uuid" - "github.com/grafana/tempo/tempodb/backend" - "github.com/grafana/tempo/tempodb/encoding/common" - "github.com/pkg/errors" -) - -type backendIterator struct { - tenantID string - blockID uuid.UUID - r backend.Reader - - indexBuffer []byte - objectsBuffer []byte - activeObjectsBuffer []byte -} - -// NewBackendIterator returns a backendIterator. This iterator is used to iterate -// through objects stored in object storage. -func NewBackendIterator(tenantID string, blockID uuid.UUID, chunkSizeBytes uint32, reader backend.Reader) (common.Iterator, error) { - index, err := reader.Read(context.TODO(), nameIndex, blockID, tenantID) - if err != nil { - return nil, err - } - - return &backendIterator{ - tenantID: tenantID, - blockID: blockID, - r: reader, - indexBuffer: index, - objectsBuffer: make([]byte, chunkSizeBytes), - }, err -} - -// For performance reasons the ID and object slices returned from this method are owned by -// the iterator. If you have need to keep these values for longer than a single iteration -// you need to make a copy of them. -func (i *backendIterator) Next() (common.ID, []byte, error) { - var err error - var id common.ID - var object []byte - - i.activeObjectsBuffer, id, object, err = unmarshalAndAdvanceBuffer(i.activeObjectsBuffer) - if err != nil && err != io.EOF { - return nil, nil, errors.Wrap(err, "error iterating through object in backend") - } else if err != io.EOF { - return id, object, nil - } - - // objects reader was empty, check the index - // if no index left, EOF - if len(i.indexBuffer) == 0 { - return nil, nil, io.EOF - } - - // pull next n bytes into objects - var start uint64 - var length uint32 - - start = math.MaxUint64 - for len(i.indexBuffer) > 0 { - record := unmarshalRecord(i.indexBuffer[:recordLength]) - - // see if we can fit this record in. we have to get at least one record in - if length+record.Length > uint32(len(i.objectsBuffer)) && start != math.MaxUint64 { - break - } - // advance index buffer - i.indexBuffer = i.indexBuffer[recordLength:] - - if start == math.MaxUint64 { - start = record.Start - } - length += record.Length - } - if length > uint32(len(i.objectsBuffer)) { - i.objectsBuffer = make([]byte, length) - } - i.activeObjectsBuffer = i.objectsBuffer[:length] - err = i.r.ReadRange(context.TODO(), nameObjects, i.blockID, i.tenantID, start, i.activeObjectsBuffer) - if err != nil { - return nil, nil, errors.Wrap(err, "error iterating through object in backend") - } - - // attempt to get next object from objects - i.activeObjectsBuffer, id, object, err = unmarshalAndAdvanceBuffer(i.activeObjectsBuffer) - if err != nil { - return nil, nil, errors.Wrap(err, "error iterating through object in backend") - } - - return id, object, nil -} diff --git a/tempodb/encoding/v0/iterator_paged.go b/tempodb/encoding/v0/iterator_paged.go new file mode 100644 index 00000000000..0d3c4ecfe4e --- /dev/null +++ b/tempodb/encoding/v0/iterator_paged.go @@ -0,0 +1,95 @@ +package v0 + +import ( + "io" + + "github.com/grafana/tempo/tempodb/encoding/common" + "github.com/pkg/errors" +) + +type pagedIterator struct { + pageReader common.PageReader + indexReader common.IndexReader + currentIndex int + + chunkSizeBytes uint32 + pages [][]byte + activePage []byte +} + +// NewPagedIterator returns a backendIterator. This iterator is used to iterate +// through objects stored in object storage. +func NewPagedIterator(chunkSizeBytes uint32, indexReader common.IndexReader, pageReader common.PageReader) common.Iterator { + return &pagedIterator{ + pageReader: pageReader, + indexReader: indexReader, + chunkSizeBytes: chunkSizeBytes, + } +} + +// For performance reasons the ID and object slices returned from this method are owned by +// the iterator. If you have need to keep these values for longer than a single iteration +// you need to make a copy of them. +func (i *pagedIterator) Next() (common.ID, []byte, error) { + var err error + var id common.ID + var object []byte + + // if the current page is empty advance to the next one + if len(i.activePage) == 0 && len(i.pages) > 0 { + i.activePage = i.pages[0] + i.pages = i.pages[1:] // advance pages + } + + i.activePage, id, object, err = unmarshalAndAdvanceBuffer(i.activePage) + if err != nil && err != io.EOF { + return nil, nil, errors.Wrap(err, "error iterating through object in backend") + } else if err != io.EOF { + return id, object, nil + } + + // objects reader was empty, check the index + // if no index left, EOF + currentRecord := i.indexReader.At(i.currentIndex) + if currentRecord == nil { + return nil, nil, io.EOF + } + + // pull next n bytes into objects + var length uint32 + records := make([]*common.Record, 0, 5) // 5? why not? + for currentRecord != nil { + //record := unmarshalRecord(i.indexBuffer[:recordLength]) + // see if we can fit this record in. we have to get at least one record in + if length+currentRecord.Length > i.chunkSizeBytes && len(records) != 0 { + break + } + + // add currentRecord to the batch + records = append(records, currentRecord) + length += currentRecord.Length + + // get next + i.currentIndex++ + currentRecord = i.indexReader.At(i.currentIndex) + } + + i.pages, err = i.pageReader.Read(records) + if err != nil { + return nil, nil, errors.Wrap(err, "error iterating through object in backend") + } + if len(i.pages) == 0 { + return nil, nil, errors.Wrap(err, "unexpected 0 length pages in pagedIterator") + } + + i.activePage = i.pages[0] + i.pages = i.pages[1:] // advance pages + + // attempt to get next object from objects + i.activePage, id, object, err = unmarshalAndAdvanceBuffer(i.activePage) + if err != nil { + return nil, nil, errors.Wrap(err, "error iterating through object in backend") + } + + return id, object, nil +} diff --git a/tempodb/encoding/v0/object.go b/tempodb/encoding/v0/object.go index 54dcb48c3e2..9832a9cb681 100644 --- a/tempodb/encoding/v0/object.go +++ b/tempodb/encoding/v0/object.go @@ -17,7 +17,7 @@ const ( | total length | id length | id | object bytes | */ -func marshalObjectToWriter(id common.ID, b []byte, w io.Writer) (int, error) { +func MarshalObjectToWriter(id common.ID, b []byte, w io.Writer) (int, error) { idLength := len(id) totalLength := len(b) + idLength + uint32Size*2 diff --git a/tempodb/encoding/v0/object_test.go b/tempodb/encoding/v0/object_test.go index c8f1caa86be..9d3b34d7efd 100644 --- a/tempodb/encoding/v0/object_test.go +++ b/tempodb/encoding/v0/object_test.go @@ -19,7 +19,7 @@ func TestMarshalUnmarshal(t *testing.T) { bReq, err := proto.Marshal(req) assert.NoError(t, err) - _, err = marshalObjectToWriter(id, bReq, buffer) + _, err = MarshalObjectToWriter(id, bReq, buffer) assert.NoError(t, err) outID, outObject, err := unmarshalObjectFromReader(buffer) @@ -46,7 +46,7 @@ func TestMarshalUnmarshalFromBuffer(t *testing.T) { bReq, err := proto.Marshal(req) assert.NoError(t, err) - _, err = marshalObjectToWriter(id, bReq, buffer) + _, err = MarshalObjectToWriter(id, bReq, buffer) assert.NoError(t, err) } diff --git a/tempodb/encoding/v0/page_reader.go b/tempodb/encoding/v0/page_reader.go new file mode 100644 index 00000000000..1eb37e938c7 --- /dev/null +++ b/tempodb/encoding/v0/page_reader.go @@ -0,0 +1,64 @@ +package v0 + +import ( + "fmt" + "io" + + "github.com/grafana/tempo/tempodb/encoding/common" +) + +type pageReader struct { + r io.ReaderAt +} + +// NewPageReader returns a new v0 pageReader. A v0 pageReader +// is basically a no-op. It retrieves the requested byte +// ranges and returns them as is. +// A pages "format" is a contiguous collection of objects +// | -- object -- | -- object -- | ... +func NewPageReader(r io.ReaderAt) common.PageReader { + return &pageReader{ + r: r, + } +} + +// Read returns the pages requested in the passed records. It +// assumes that if there are multiple records they are ordered +// and contiguous +func (r *pageReader) Read(records []*common.Record) ([][]byte, error) { + if len(records) == 0 { + return nil, nil + } + + start := records[0].Start + length := uint32(0) + for _, record := range records { + length += record.Length + } + + contiguousPages := make([]byte, length) + _, err := r.r.ReadAt(contiguousPages, int64(start)) + if err != nil { + return nil, err + } + + slicePages := make([][]byte, 0, len(records)) + cursor := uint32(0) + previousEnd := uint64(0) + for _, record := range records { + end := cursor + record.Length + if end > uint32(len(contiguousPages)) { + return nil, fmt.Errorf("record out of bounds while reading pages: %d, %d, %d, %d", cursor, record.Length, end, len(contiguousPages)) + } + + if previousEnd != record.Start && previousEnd != 0 { + return nil, fmt.Errorf("non-contiguous pages requested from pageReader: %d, %+v", previousEnd, record) + } + + slicePages = append(slicePages, contiguousPages[cursor:end]) + cursor += record.Length + previousEnd = record.Start + uint64(record.Length) + } + + return slicePages, nil +} diff --git a/tempodb/encoding/v0/page_reader_test.go b/tempodb/encoding/v0/page_reader_test.go new file mode 100644 index 00000000000..9f904834d2f --- /dev/null +++ b/tempodb/encoding/v0/page_reader_test.go @@ -0,0 +1,135 @@ +package v0 + +import ( + "bytes" + "testing" + + "github.com/grafana/tempo/tempodb/encoding/common" + "github.com/stretchr/testify/assert" +) + +func TestPageReader(t *testing.T) { + + tests := []struct { + readerBytes []byte + records []*common.Record + expectedBytes [][]byte + expectedError bool + }{ + {}, + { + records: []*common.Record{ + { + Start: 0, + Length: 1, + }, + }, + expectedError: true, + }, + { + readerBytes: []byte{0x01, 0x02}, + records: []*common.Record{ + { + Start: 0, + Length: 1, + }, + }, + expectedBytes: [][]byte{ + {0x01}, + }, + }, + { + readerBytes: []byte{0x01, 0x02}, + records: []*common.Record{ + { + Start: 1, + Length: 1, + }, + }, + expectedBytes: [][]byte{ + {0x02}, + }, + }, + { + readerBytes: []byte{0x01, 0x02}, + records: []*common.Record{ + { + Start: 0, + Length: 1, + }, + { + Start: 1, + Length: 1, + }, + }, + expectedBytes: [][]byte{ + {0x01}, + {0x02}, + }, + }, + { + readerBytes: []byte{0x01, 0x02}, + records: []*common.Record{ + { + Start: 0, + Length: 5, + }, + }, + expectedError: true, + }, + { + readerBytes: []byte{0x01, 0x02}, + records: []*common.Record{ + { + Start: 5, + Length: 5, + }, + }, + expectedError: true, + }, + { + readerBytes: []byte{0x01, 0x02, 0x03}, + records: []*common.Record{ + { + Start: 1, + Length: 1, + }, + { + Start: 2, + Length: 1, + }, + }, + expectedBytes: [][]byte{ + {0x02}, + {0x03}, + }, + }, + { + readerBytes: []byte{0x01, 0x02, 0x03}, + records: []*common.Record{ + { + Start: 0, + Length: 1, + }, + { + Start: 2, + Length: 1, + }, + }, + expectedError: true, + }, + } + + for _, tc := range tests { + reader := NewPageReader(bytes.NewReader(tc.readerBytes)) + actual, err := reader.Read(tc.records) + + if tc.expectedError { + assert.Error(t, err) + continue + } + + assert.NoError(t, err) + assert.Equal(t, tc.expectedBytes, actual) + } +} diff --git a/tempodb/encoding/v0/record.go b/tempodb/encoding/v0/record.go index 621820b06d1..d7a5d85c152 100644 --- a/tempodb/encoding/v0/record.go +++ b/tempodb/encoding/v0/record.go @@ -73,33 +73,6 @@ func unmarshalRecords(recordBytes []byte) ([]*common.Record, error) { return records, nil } -// binary search the bytes. records are not compressed and ordered -func findRecord(id common.ID, recordBytes []byte) (*common.Record, error) { - mod := len(recordBytes) % recordLength - if mod != 0 { - return nil, fmt.Errorf("records are an unexpected number of bytes %d", mod) - } - - numRecords := recordCount(recordBytes) - var record *common.Record - - i := sort.Search(numRecords, func(i int) bool { - buff := recordBytes[i*recordLength : (i+1)*recordLength] - record = unmarshalRecord(buff) - - return bytes.Compare(record.ID, id) >= 0 - }) - - if i >= 0 && i < numRecords { - buff := recordBytes[i*recordLength : (i+1)*recordLength] - record = unmarshalRecord(buff) - - return record, nil - } - - return nil, nil -} - func recordCount(b []byte) int { return len(b) / recordLength } diff --git a/tempodb/encoding/v0/record_test.go b/tempodb/encoding/v0/record_test.go index dbf4d5a6da8..be8264fea56 100644 --- a/tempodb/encoding/v0/record_test.go +++ b/tempodb/encoding/v0/record_test.go @@ -43,7 +43,7 @@ func TestMarshalUnmarshalRecords(t *testing.T) { assert.Equal(t, expected, actual) } -func TestFindRecord(t *testing.T) { +/*func TestFindRecord(t *testing.T) { // jpe - move to index_reader_test.go numRecords := 10 expected := make([]*common.Record, 0, numRecords) @@ -66,7 +66,7 @@ func TestFindRecord(t *testing.T) { assert.NoError(t, err, "unexpected error finding records") assert.Equal(t, r, found) } -} +}*/ func TestSortRecord(t *testing.T) { numRecords := 10 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-0 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-0 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-0 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-0 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-1 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-1 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-1 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-1 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-2 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-2 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-2 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-2 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-3 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-3 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-3 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-3 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-4 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-4 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-4 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-4 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-5 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-5 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-5 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-5 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-6 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-6 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-6 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-6 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-7 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-7 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-7 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-7 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-8 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-8 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-8 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-8 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-9 b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-9 similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-9 rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/bloom-9 diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/data b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/data similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/data rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/data diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/index b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/index similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/index rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/index diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/meta.json b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/meta.json similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/meta.json rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/meta.json diff --git a/tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/readme.md b/tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/readme.md similarity index 100% rename from tempodb/encoding/v0/v0_test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/readme.md rename to tempodb/encoding/v0test/fake/00f5a116-639e-4880-bbe7-be9b0c828033/readme.md diff --git a/tempodb/encoding/v1/appender_buffered.go b/tempodb/encoding/v1/appender_buffered.go new file mode 100644 index 00000000000..d5d74c15ee9 --- /dev/null +++ b/tempodb/encoding/v1/appender_buffered.go @@ -0,0 +1,137 @@ +package v1 + +import ( + "bytes" + "io" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +// meteredWriter is a struct that is used to count the number of bytes +// written to a block after compression. Unfortunately the compression io.Reader +// returns bytes before compression so this is necessary to know the actual number of +// byte written. +type meteredWriter struct { + wrappedWriter io.Writer + bytesWritten int +} + +func (m *meteredWriter) Write(p []byte) (n int, err error) { + m.bytesWritten += len(p) + return m.wrappedWriter.Write(p) +} + +// buffer up in memory and then write a big ol' compressed block o shit at once +// used by CompleteBlock/CompactorBlock +// may need additional code? i.e. a signal that it's "about to flush" triggering a compression +type bufferedAppender struct { + v0Buffer *bytes.Buffer + outputWriter *meteredWriter + pool WriterPool + records []*common.Record + + totalObjects int + currentOffset uint64 + currentRecord *common.Record + indexDownsample int +} + +// NewBufferedAppender returns an bufferedAppender. This appender builds a writes to +// the provided writer and also builds a downsampled records slice. +func NewBufferedAppender(writer io.Writer, encoding backend.Encoding, indexDownsample int, totalObjectsEstimate int) (common.Appender, error) { + pool, err := getWriterPool(encoding) + if err != nil { + return nil, err + } + + return &bufferedAppender{ + v0Buffer: &bytes.Buffer{}, + indexDownsample: indexDownsample, + records: make([]*common.Record, 0, totalObjectsEstimate/indexDownsample+1), + + outputWriter: &meteredWriter{ + wrappedWriter: writer, + }, + pool: pool, + }, nil +} + +// Append appends the id/object to the writer. Note that the caller is giving up ownership of the two byte arrays backing the slices. +// Copies should be made and passed in if this is a problem +func (a *bufferedAppender) Append(id common.ID, b []byte) error { + _, err := v0.MarshalObjectToWriter(id, b, a.v0Buffer) + if err != nil { + return err + } + + if a.currentRecord == nil { + a.currentRecord = &common.Record{ + Start: a.currentOffset, + } + } + a.totalObjects++ + a.currentRecord.ID = id + + if a.totalObjects%a.indexDownsample == 0 { + err := a.flush() + if err != nil { + return err + } + } + + return nil +} + +func (a *bufferedAppender) Records() []*common.Record { + return a.records +} + +func (a *bufferedAppender) Length() int { + return a.totalObjects +} + +func (a *bufferedAppender) DataLength() uint64 { + return a.currentOffset +} + +// compress everything left and flush? +func (a *bufferedAppender) Complete() error { + err := a.flush() + if err != nil { + return err + } + + return nil +} + +func (a *bufferedAppender) flush() error { + if a.currentRecord == nil { + return nil + } + + compressedWriter := a.pool.GetWriter(a.outputWriter) + + // write compressed data + buffer := a.v0Buffer.Bytes() + _, err := compressedWriter.Write(buffer) + if err != nil { + return err + } + + // now clear our v0 buffer so we can start the new block page + compressedWriter.Close() + a.v0Buffer.Reset() + a.pool.PutWriter(compressedWriter) + + a.currentOffset += uint64(a.outputWriter.bytesWritten) + a.currentRecord.Length += uint32(a.outputWriter.bytesWritten) + a.outputWriter.bytesWritten = 0 + + // update index + a.records = append(a.records, a.currentRecord) + a.currentRecord = nil + + return nil +} diff --git a/tempodb/encoding/v1/block.go b/tempodb/encoding/v1/block.go new file mode 100644 index 00000000000..943449ee392 --- /dev/null +++ b/tempodb/encoding/v1/block.go @@ -0,0 +1,43 @@ +package v1 + +import ( + "context" + "io" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +// These methods control the layout of the block in the backend. Nothing has changed between v0 and v1 here +// so we will just passthrough + +// NameObjects returns v0 name +func NameObjects() string { + return v0.NameObjects +} + +// NameIndex returns v0 name +func NameIndex() string { + return v0.NameIndex +} + +// BloomName returns v0 name +func BloomName(shard int) string { + return v0.BloomName(shard) +} + +// WriteBlockMeta writes the bloom filter, meta and index to the passed in backend.Writer +func WriteBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, records []*common.Record, b *common.ShardedBloomFilter) error { + return v0.WriteBlockMeta(ctx, w, meta, records, b) +} + +// WriteBlockData writes the data object from an io.Reader to the backend.Writer +func WriteBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error { + return v0.WriteBlockData(ctx, w, meta, r, size) +} + +// AppendBlockData appends the bytes passed to the block data +func AppendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) { + return v0.AppendBlockData(ctx, w, meta, tracker, buffer) +} diff --git a/tempodb/encoding/v1/finder_paged.go b/tempodb/encoding/v1/finder_paged.go new file mode 100644 index 00000000000..da2043ebe4b --- /dev/null +++ b/tempodb/encoding/v1/finder_paged.go @@ -0,0 +1,12 @@ +package v1 + +import ( + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +// NewPagedFinder returns a v0.pagedFinder. There are no changes +// to logic from the v0 finder and all compression changes are handled in the pageReader +func NewPagedFinder(index common.IndexReader, r common.PageReader, combiner common.ObjectCombiner) common.Finder { + return v0.NewPagedFinder(index, r, combiner) +} diff --git a/tempodb/encoding/v1/index_reader.go b/tempodb/encoding/v1/index_reader.go new file mode 100644 index 00000000000..d037a3c63ed --- /dev/null +++ b/tempodb/encoding/v1/index_reader.go @@ -0,0 +1,13 @@ +package v1 + +import ( + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +// NewIndexReader returns an index reader for a byte slice of marshalled +// ordered records. +// The index has not changed between v0 and v1. +func NewIndexReader(index []byte) (common.IndexReader, error) { + return v0.NewIndexReader(index) +} diff --git a/tempodb/encoding/v1/iterator_paged.go b/tempodb/encoding/v1/iterator_paged.go new file mode 100644 index 00000000000..67fa675e7e1 --- /dev/null +++ b/tempodb/encoding/v1/iterator_paged.go @@ -0,0 +1,12 @@ +package v1 + +import ( + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +// NewPagedIterator returns a v0.backendIterator. There are no changes +// to logic from the v0 iterator and all compression changes are handled in the pageReader +func NewPagedIterator(chunkSizeBytes uint32, indexReader common.IndexReader, pageReader common.PageReader) common.Iterator { + return v0.NewPagedIterator(chunkSizeBytes, indexReader, pageReader) +} diff --git a/tempodb/encoding/v1/page_reader.go b/tempodb/encoding/v1/page_reader.go new file mode 100644 index 00000000000..cadb8345c77 --- /dev/null +++ b/tempodb/encoding/v1/page_reader.go @@ -0,0 +1,56 @@ +package v1 + +import ( + "bytes" + "io" + "io/ioutil" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding/common" + v0 "github.com/grafana/tempo/tempodb/encoding/v0" +) + +type pageReader struct { + v0PageReader common.PageReader + + pool ReaderPool +} + +// NewPageReader constructs a v1 PageReader that handles compression +func NewPageReader(r io.ReaderAt, encoding backend.Encoding) (common.PageReader, error) { + pool, err := getReaderPool(encoding) + if err != nil { + return nil, err + } + + return &pageReader{ + v0PageReader: v0.NewPageReader(r), + pool: pool, + }, nil +} + +// Read returns the pages requested in the passed records. It +// assumes that if there are multiple records they are ordered +// and contiguous +func (r *pageReader) Read(records []*common.Record) ([][]byte, error) { + compressedPages, err := r.v0PageReader.Read(records) + if err != nil { + return nil, err + } + + // now decompress + decompressedPages := make([][]byte, 0, len(compressedPages)) + for _, page := range compressedPages { + reader := r.pool.GetReader(bytes.NewReader(page)) + + page, err := ioutil.ReadAll(reader) + r.pool.PutReader(reader) + if err != nil { + return nil, err + } + + decompressedPages = append(decompressedPages, page) + } + + return decompressedPages, nil +} diff --git a/tempodb/encoding/v1/page_reader_test.go b/tempodb/encoding/v1/page_reader_test.go new file mode 100644 index 00000000000..c29fe9b8616 --- /dev/null +++ b/tempodb/encoding/v1/page_reader_test.go @@ -0,0 +1,57 @@ +package v1 + +import ( + "bytes" + "testing" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding/common" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPageReader(t *testing.T) { + + tests := []struct { + readerBytes []byte + }{ + { + readerBytes: []byte{0x01, 0x02}, + }, + } + + for _, tc := range tests { + for _, enc := range backend.SupportedEncoding { + t.Run(enc.String(), func(t *testing.T) { + wPool, err := getWriterPool(enc) + require.NoError(t, err) + + buff := bytes.NewBuffer([]byte{}) + mw := &meteredWriter{ + wrappedWriter: buff, + } + writer := wPool.GetWriter(mw) + + _, err = writer.Write(tc.readerBytes) + require.NoError(t, err) + err = writer.Close() + require.NoError(t, err) + + encryptedBytes := buff.Bytes() + reader, err := NewPageReader(bytes.NewReader(encryptedBytes), enc) + require.NoError(t, err) + + actual, err := reader.Read([]*common.Record{ + { + Start: 0, + Length: uint32(mw.bytesWritten), + }, + }) + + assert.NoError(t, err) + assert.Len(t, actual, 1) + assert.Equal(t, tc.readerBytes, actual[0]) + }) + } + } +} diff --git a/tempodb/encoding/v1/pool.go b/tempodb/encoding/v1/pool.go new file mode 100644 index 00000000000..8446dd542b4 --- /dev/null +++ b/tempodb/encoding/v1/pool.go @@ -0,0 +1,333 @@ +package v1 + +import ( + "fmt" + "io" + "sync" + + "github.com/golang/snappy" + "github.com/grafana/tempo/tempodb/backend" + "github.com/klauspost/compress/gzip" + "github.com/klauspost/compress/zstd" + "github.com/pierrec/lz4/v4" + "github.com/prometheus/prometheus/pkg/pool" +) + +const maxEncoding = backend.EncZstd + +// WriterPool is a pool of io.Writer +// This is used by every chunk to avoid unnecessary allocations. +type WriterPool interface { + GetWriter(io.Writer) io.WriteCloser + PutWriter(io.WriteCloser) + Encoding() backend.Encoding +} + +// ReaderPool similar to WriterPool but for reading chunks. +type ReaderPool interface { + GetReader(io.Reader) io.Reader + PutReader(io.Reader) + Encoding() backend.Encoding +} + +var ( + // Gzip is the gnu zip compression pool + Gzip = GzipPool{level: gzip.DefaultCompression} + // Lz4_64k is the l4z compression pool, with 64k buffer size + Lz4_64k = LZ4Pool{bufferSize: 1 << 16} + // Lz4_256k uses 256k buffer + Lz4_256k = LZ4Pool{bufferSize: 1 << 18} + // Lz4_1M uses 1M buffer + Lz4_1M = LZ4Pool{bufferSize: 1 << 20} + // Lz4_4M uses 4M buffer + Lz4_4M = LZ4Pool{bufferSize: 1 << 22} + // Snappy is the snappy compression pool + Snappy SnappyPool + // Noop is the no compression pool + Noop NoopPool + // Zstd Pool + Zstd = ZstdPool{} + + // BytesBufferPool is a bytes buffer used for lines decompressed. + // Buckets [0.5KB,1KB,2KB,4KB,8KB] + BytesBufferPool = pool.New(1<<9, 1<<13, 2, func(size int) interface{} { return make([]byte, 0, size) }) +) + +func getWriterPool(enc backend.Encoding) (WriterPool, error) { + r, err := getReaderPool(enc) + if err != nil { + return nil, err + } + + return r.(WriterPool), nil +} + +func getReaderPool(enc backend.Encoding) (ReaderPool, error) { + switch enc { + case backend.EncNone: + return &Noop, nil + case backend.EncGZIP: + return &Gzip, nil + case backend.EncLZ4_64k: + return &Lz4_64k, nil + case backend.EncLZ4_256k: + return &Lz4_256k, nil + case backend.EncLZ4_1M: + return &Lz4_1M, nil + case backend.EncLZ4_4M: + return &Lz4_4M, nil + case backend.EncSnappy: + return &Snappy, nil + case backend.EncZstd: + return &Zstd, nil + default: + return nil, fmt.Errorf("Unknown pool encoding %d", enc) + } +} + +// GzipPool is a gun zip compression pool +type GzipPool struct { + readers sync.Pool + writers sync.Pool + level int +} + +// Encoding implements WriterPool and ReaderPool +func (pool *GzipPool) Encoding() backend.Encoding { + return backend.EncGZIP +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *GzipPool) GetReader(src io.Reader) io.Reader { + if r := pool.readers.Get(); r != nil { + reader := r.(*gzip.Reader) + err := reader.Reset(src) + if err != nil { + panic(err) + } + return reader + } + reader, err := gzip.NewReader(src) + if err != nil { + panic(err) + } + return reader +} + +// PutReader places back in the pool a CompressionReader +func (pool *GzipPool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *GzipPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*gzip.Writer) + writer.Reset(dst) + return writer + } + + level := pool.level + if level == 0 { + level = gzip.DefaultCompression + } + w, err := gzip.NewWriterLevel(dst, level) + if err != nil { + panic(err) // never happens, error is only returned on wrong compression level. + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *GzipPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +// LZ4Pool is an pool...of lz4s... +type LZ4Pool struct { + readers sync.Pool + writers sync.Pool + bufferSize uint32 // available values: 1<<16 (64k), 1<<18 (256k), 1<<20 (1M), 1<<22 (4M). Defaults to 4MB, if not set. +} + +// Encoding implements WriterPool and ReaderPool +func (pool *LZ4Pool) Encoding() backend.Encoding { + switch pool.bufferSize { + case 1 << 16: + return backend.EncLZ4_64k + case 1 << 18: + return backend.EncLZ4_256k + case 1 << 20: + return backend.EncLZ4_1M + case 1 << 22: + return backend.EncLZ4_4M + } + + return backend.EncNone +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *LZ4Pool) GetReader(src io.Reader) io.Reader { + var r *lz4.Reader + if pooled := pool.readers.Get(); pooled != nil { + r = pooled.(*lz4.Reader) + r.Reset(src) + } else { + r = lz4.NewReader(src) + } + return r +} + +// PutReader places back in the pool a CompressionReader +func (pool *LZ4Pool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *LZ4Pool) GetWriter(dst io.Writer) io.WriteCloser { + var w *lz4.Writer + if fromPool := pool.writers.Get(); fromPool != nil { + w = fromPool.(*lz4.Writer) + w.Reset(dst) + } else { + w = lz4.NewWriter(dst) + } + err := w.Apply( + lz4.ChecksumOption(false), + lz4.BlockSizeOption(lz4.BlockSize(pool.bufferSize)), + lz4.CompressionLevelOption(lz4.Fast), + ) + if err != nil { + panic(err) + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *LZ4Pool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +// SnappyPool is a really cool looking pool. Dang that pool is _snappy_. +type SnappyPool struct { + readers sync.Pool + writers sync.Pool +} + +// Encoding implements WriterPool and ReaderPool +func (pool *SnappyPool) Encoding() backend.Encoding { + return backend.EncSnappy +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *SnappyPool) GetReader(src io.Reader) io.Reader { + if r := pool.readers.Get(); r != nil { + reader := r.(*snappy.Reader) + reader.Reset(src) + return reader + } + return snappy.NewReader(src) +} + +// PutReader places back in the pool a CompressionReader +func (pool *SnappyPool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *SnappyPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*snappy.Writer) + writer.Reset(dst) + return writer + } + return snappy.NewBufferedWriter(dst) +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *SnappyPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +// NoopPool is for people who think compression is for the weak +type NoopPool struct{} + +// Encoding implements WriterPool and ReaderPool +func (pool *NoopPool) Encoding() backend.Encoding { + return backend.EncNone +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *NoopPool) GetReader(src io.Reader) io.Reader { + return src +} + +// PutReader places back in the pool a CompressionReader +func (pool *NoopPool) PutReader(reader io.Reader) {} + +type noopCloser struct { + io.Writer +} + +func (noopCloser) Close() error { return nil } + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *NoopPool) GetWriter(dst io.Writer) io.WriteCloser { + return noopCloser{dst} +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *NoopPool) PutWriter(writer io.WriteCloser) {} + +// ZstdPool is a zstd compression pool +type ZstdPool struct { + readers sync.Pool + writers sync.Pool +} + +// Encoding implements WriterPool and ReaderPool +func (pool *ZstdPool) Encoding() backend.Encoding { + return backend.EncZstd +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *ZstdPool) GetReader(src io.Reader) io.Reader { + if r := pool.readers.Get(); r != nil { + reader := r.(*zstd.Decoder) + err := reader.Reset(src) + if err != nil { + panic(err) + } + return reader + } + reader, err := zstd.NewReader(src) + if err != nil { + panic(err) + } + return reader +} + +// PutReader places back in the pool a CompressionReader +func (pool *ZstdPool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *ZstdPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*zstd.Encoder) + writer.Reset(dst) + return writer + } + + w, err := zstd.NewWriter(dst) + if err != nil { + panic(err) // never happens, error is only returned on wrong compression level. + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *ZstdPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} diff --git a/tempodb/encoding/v1/pool_test.go b/tempodb/encoding/v1/pool_test.go new file mode 100644 index 00000000000..190ec3ab5b8 --- /dev/null +++ b/tempodb/encoding/v1/pool_test.go @@ -0,0 +1,33 @@ +package v1 + +import ( + "fmt" + "testing" + + "github.com/grafana/tempo/tempodb/backend" + "github.com/stretchr/testify/assert" +) + +func TestGetPool(t *testing.T) { + for _, enc := range backend.SupportedEncoding { + t.Run(fmt.Sprintf("testing %s", enc), func(t *testing.T) { + rPool, err := getReaderPool(enc) + assert.NotNil(t, rPool) + assert.NoError(t, err) + assert.Equal(t, enc, rPool.Encoding()) + + wPool, err := getWriterPool(enc) + assert.NotNil(t, wPool) + assert.NoError(t, err) + assert.Equal(t, enc, wPool.Encoding()) + }) + } + + rPool, err := getReaderPool(maxEncoding + 1) + assert.Nil(t, rPool) + assert.Error(t, err) + + wPool, err := getWriterPool(maxEncoding + 1) + assert.Nil(t, wPool) + assert.Error(t, err) +} diff --git a/tempodb/encoding/versioned.go b/tempodb/encoding/versioned.go index ee0e636cb01..826b86111c9 100644 --- a/tempodb/encoding/versioned.go +++ b/tempodb/encoding/versioned.go @@ -2,56 +2,121 @@ package encoding import ( "context" - "fmt" "io" "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/encoding/common" v0 "github.com/grafana/tempo/tempodb/encoding/v0" + v1 "github.com/grafana/tempo/tempodb/encoding/v1" ) -// Current contains a string indicating the most recent block version -const Current = "v0" +const currentVersion = "v1" -// BackendBlock defines an object that can be used to interact with a block in object storage -type BackendBlock interface { - // Find searches for a given ID and returns the object if exists - Find(ctx context.Context, r backend.Reader, id common.ID, metrics *common.FindMetrics) ([]byte, error) - // Iterator returns an iterator that can be used to examine every object in the block - Iterator(chunkSizeBytes uint32, r backend.Reader) (common.Iterator, error) -} +// versionedEncoding has a whole bunch of versioned functionality. This is +// currently quite sloppy and could easily be tightened up to just a few methods +// but it is what it is for now! +type versionedEncoding interface { + newBufferedAppender(writer io.Writer, encoding backend.Encoding, indexDownsample int, totalObjectsEstimate int) (common.Appender, error) + newPagedFinder(indexReader common.IndexReader, pageReader common.PageReader, combiner common.ObjectCombiner) common.Finder + newPagedIterator(chunkSizeBytes uint32, indexBytes []byte, pageReader common.PageReader) (common.Iterator, error) -// newBackendBlock returns a BackendBlock for the given backend.BlockMeta -// It is version aware. -func NewBackendBlock(meta *backend.BlockMeta) (BackendBlock, error) { - if meta.Version == "v0" { - return v0.NewBackendBlock(meta), nil - } + writeBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, records []*common.Record, b *common.ShardedBloomFilter) error + writeBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error + appendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) - return nil, fmt.Errorf("%s is not a valid block version", meta.Version) + newPageReader(ra io.ReaderAt, encoding backend.Encoding) (common.PageReader, error) + newIndexReader(indexBytes []byte) (common.IndexReader, error) + + nameIndex() string + nameObjects() string + nameBloom(shard int) string } -// newBufferedAppender returns the most recent Appender -func newBufferedAppender(writer io.Writer, indexDownsample int, totalObjectsEstimate int) common.Appender { - return v0.NewBufferedAppender(writer, indexDownsample, totalObjectsEstimate) +// latestEncoding is used by Compactor and Complete block +func latestEncoding() versionedEncoding { + return v1Encoding{} } -// newDedupingFinder returns the most recent Finder -func newDedupingFinder(sortedRecords []*common.Record, ra io.ReaderAt, combiner common.ObjectCombiner) common.Finder { - return v0.NewDedupingFinder(sortedRecords, ra, combiner) +// v0Encoding +type v0Encoding struct{} + +func (v v0Encoding) newBufferedAppender(writer io.Writer, _ backend.Encoding, indexDownsample int, totalObjectsEstimate int) (common.Appender, error) { + return v0.NewBufferedAppender(writer, indexDownsample, totalObjectsEstimate), nil } +func (v v0Encoding) newPagedIterator(chunkSizeBytes uint32, indexBytes []byte, pageReader common.PageReader) (common.Iterator, error) { + reader, err := v0.NewIndexReader(indexBytes) + if err != nil { + return nil, err + } -// writeBlockMeta calls the most recent WriteBlockMeta -func writeBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, records []*common.Record, b *common.ShardedBloomFilter) error { + return v0.NewPagedIterator(chunkSizeBytes, reader, pageReader), nil +} +func (v v0Encoding) newPagedFinder(indexReader common.IndexReader, pageReader common.PageReader, combiner common.ObjectCombiner) common.Finder { + return v0.NewPagedFinder(indexReader, pageReader, combiner) +} +func (v v0Encoding) newIndexReader(indexBytes []byte) (common.IndexReader, error) { + return v0.NewIndexReader(indexBytes) +} +func (v v0Encoding) newPageReader(ra io.ReaderAt, encoding backend.Encoding) (common.PageReader, error) { + return v0.NewPageReader(ra), nil +} +func (v v0Encoding) writeBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, records []*common.Record, b *common.ShardedBloomFilter) error { return v0.WriteBlockMeta(ctx, w, meta, records, b) } - -// writeBlockData calls the most recent WriteBlockData -func writeBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error { +func (v v0Encoding) writeBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error { return v0.WriteBlockData(ctx, w, meta, r, size) } - -// appendBlockData calls the most recent AppendBlockData -func appendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) { +func (v v0Encoding) appendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) { return v0.AppendBlockData(ctx, w, meta, tracker, buffer) } +func (v v0Encoding) nameObjects() string { + return v0.NameObjects +} +func (v v0Encoding) nameIndex() string { + return v0.NameIndex +} +func (v v0Encoding) nameBloom(shard int) string { + return v0.BloomName(shard) +} + +// v1Encoding +type v1Encoding struct{} + +func (v v1Encoding) newBufferedAppender(writer io.Writer, encoding backend.Encoding, indexDownsample int, totalObjectsEstimate int) (common.Appender, error) { + return v1.NewBufferedAppender(writer, encoding, indexDownsample, totalObjectsEstimate) +} +func (v v1Encoding) newPagedIterator(chunkSizeBytes uint32, indexBytes []byte, pageReader common.PageReader) (common.Iterator, error) { + reader, err := v1.NewIndexReader(indexBytes) + if err != nil { + return nil, err + } + + return v1.NewPagedIterator(chunkSizeBytes, reader, pageReader), nil +} +func (v v1Encoding) newPagedFinder(indexReader common.IndexReader, pageReader common.PageReader, combiner common.ObjectCombiner) common.Finder { + return v1.NewPagedFinder(indexReader, pageReader, combiner) +} +func (v v1Encoding) newPageReader(ra io.ReaderAt, encoding backend.Encoding) (common.PageReader, error) { + return v1.NewPageReader(ra, encoding) +} +func (v v1Encoding) newIndexReader(indexBytes []byte) (common.IndexReader, error) { + return v1.NewIndexReader(indexBytes) +} +func (v v1Encoding) writeBlockMeta(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, records []*common.Record, b *common.ShardedBloomFilter) error { + return v1.WriteBlockMeta(ctx, w, meta, records, b) +} +func (v v1Encoding) writeBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, r io.Reader, size int64) error { + return v1.WriteBlockData(ctx, w, meta, r, size) +} +func (v v1Encoding) appendBlockData(ctx context.Context, w backend.Writer, meta *backend.BlockMeta, tracker backend.AppendTracker, buffer []byte) (backend.AppendTracker, error) { + return v1.AppendBlockData(ctx, w, meta, tracker, buffer) +} +func (v v1Encoding) nameObjects() string { + return v1.NameObjects() +} +func (v v1Encoding) nameIndex() string { + return v1.NameIndex() +} +func (v v1Encoding) nameBloom(shard int) string { + return v1.BloomName(shard) +} diff --git a/tempodb/retention_test.go b/tempodb/retention_test.go index e84b1aa528c..ea449ea9cc5 100644 --- a/tempodb/retention_test.go +++ b/tempodb/retention_test.go @@ -12,7 +12,9 @@ import ( "github.com/google/uuid" "github.com/stretchr/testify/assert" + "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/wal" ) @@ -26,10 +28,13 @@ func TestRetention(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -50,7 +55,7 @@ func TestRetention(t *testing.T) { head, err := wal.NewBlock(blockID, testTenantID) assert.NoError(t, err) - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) blockID = complete.BlockMeta().BlockID diff --git a/tempodb/tempodb.go b/tempodb/tempodb.go index 4b7283f619b..f6c2e0ab439 100644 --- a/tempodb/tempodb.go +++ b/tempodb/tempodb.go @@ -81,11 +81,12 @@ var ( type Writer interface { WriteBlock(ctx context.Context, block WriteableBlock) error + CompleteBlock(block *wal.AppendBlock, combiner common.ObjectCombiner) (*encoding.CompleteBlock, error) WAL() *wal.WAL } type Reader interface { - Find(ctx context.Context, tenantID string, id common.ID, blockStart string, blockEnd string) ([][]byte, common.FindMetrics, error) + Find(ctx context.Context, tenantID string, id common.ID, blockStart string, blockEnd string) ([][]byte, error) Shutdown() } @@ -121,12 +122,17 @@ type readerWriter struct { compactorTenantOffset uint } +// New creates a new tempodb func New(cfg *Config, logger log.Logger) (Reader, Writer, Compactor, error) { - var err error var r backend.Reader var w backend.Writer var c backend.Compactor + err := validateConfig(cfg) + if err != nil { + return nil, nil, nil, fmt.Errorf("invalid config while creating tempodb: %w", err) + } + switch cfg.Backend { case "local": r, w, c, err = local.New(cfg.Local) @@ -189,13 +195,15 @@ func (rw *readerWriter) WriteBlock(ctx context.Context, c WriteableBlock) error return c.Write(ctx, rw.w) } +func (rw *readerWriter) CompleteBlock(block *wal.AppendBlock, combiner common.ObjectCombiner) (*encoding.CompleteBlock, error) { + return block.Complete(rw.cfg.Block, rw.wal, combiner) +} + func (rw *readerWriter) WAL() *wal.WAL { return rw.wal } -func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, blockStart string, blockEnd string) ([][]byte, common.FindMetrics, error) { - metrics := common.NewFindMetrics() - +func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, blockStart string, blockEnd string) ([][]byte, error) { // tracing instrumentation logger := util.WithContext(ctx, util.Logger) span, ctx := opentracing.StartSpanFromContext(ctx, "store.Find") @@ -203,19 +211,19 @@ func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, blockStartUUID, err := uuid.Parse(blockStart) if err != nil { - return nil, metrics, err + return nil, err } blockStartBytes, err := blockStartUUID.MarshalBinary() if err != nil { - return nil, metrics, err + return nil, err } blockEndUUID, err := uuid.Parse(blockEnd) if err != nil { - return nil, metrics, err + return nil, err } blockEndBytes, err := blockEndUUID.MarshalBinary() if err != nil { - return nil, metrics, err + return nil, err } rw.blockListsMtx.Lock() @@ -237,17 +245,17 @@ func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, // deliberately placed outside the blocklist mtx unlock if !found { - return nil, metrics, nil + return nil, nil } partialTraces, err := rw.pool.RunJobs(ctx, copiedBlocklist, func(ctx context.Context, payload interface{}) ([]byte, error) { meta := payload.(*backend.BlockMeta) - block, err := encoding.NewBackendBlock(meta) + block, err := encoding.NewBackendBlock(meta, rw.r) if err != nil { return nil, err } - foundObject, err := block.Find(ctx, rw.r, id, &metrics) + foundObject, err := block.Find(ctx, id) if err != nil { return nil, err } @@ -262,7 +270,7 @@ func (rw *readerWriter) Find(ctx context.Context, tenantID string, id common.ID, return foundObject, nil }) - return partialTraces, metrics, err + return partialTraces, err } func (rw *readerWriter) Shutdown() { diff --git a/tempodb/tempodb_test.go b/tempodb/tempodb_test.go index 615f37ecc65..d318f6891d2 100644 --- a/tempodb/tempodb_test.go +++ b/tempodb/tempodb_test.go @@ -17,6 +17,7 @@ import ( "github.com/grafana/tempo/pkg/util/test" "github.com/grafana/tempo/tempodb/backend" "github.com/grafana/tempo/tempodb/backend/local" + "github.com/grafana/tempo/tempodb/encoding" "github.com/grafana/tempo/tempodb/wal" "github.com/stretchr/testify/assert" ) @@ -36,10 +37,13 @@ func TestDB(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncGZIP, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -76,7 +80,7 @@ func TestDB(t *testing.T) { assert.NoError(t, err, "unexpected error writing req") } - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -87,7 +91,7 @@ func TestDB(t *testing.T) { // read for i, id := range ids { - bFound, _, err := r.Find(context.Background(), testTenantID, id, BlockIDMin, BlockIDMax) + bFound, err := r.Find(context.Background(), testTenantID, id, BlockIDMin, BlockIDMax) assert.NoError(t, err) out := &tempopb.PushRequest{} @@ -112,10 +116,13 @@ func TestBlockSharding(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -139,7 +146,7 @@ func TestBlockSharding(t *testing.T) { assert.NoError(t, err, "unexpected error writing req") // write block to backend - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -157,7 +164,7 @@ func TestBlockSharding(t *testing.T) { // check if it respects the blockstart/blockend params - case1: hit blockStart := uuid.MustParse(BlockIDMin).String() blockEnd := uuid.MustParse(BlockIDMax).String() - bFound, _, err := r.Find(context.Background(), testTenantID, id, blockStart, blockEnd) + bFound, err := r.Find(context.Background(), testTenantID, id, blockStart, blockEnd) assert.NoError(t, err) assert.Greater(t, len(bFound), 0) @@ -169,7 +176,7 @@ func TestBlockSharding(t *testing.T) { // check if it respects the blockstart/blockend params - case2: miss blockStart = uuid.MustParse(BlockIDMin).String() blockEnd = uuid.MustParse(BlockIDMin).String() - bFound, _, err = r.Find(context.Background(), testTenantID, id, blockStart, blockEnd) + bFound, err = r.Find(context.Background(), testTenantID, id, blockStart, blockEnd) assert.NoError(t, err) assert.Len(t, bFound, 0) } @@ -184,16 +191,19 @@ func TestNilOnUnknownTenantID(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) assert.NoError(t, err) - buff, _, err := r.Find(context.Background(), "unknown", []byte{0x01}, BlockIDMin, BlockIDMax) + buff, err := r.Find(context.Background(), "unknown", []byte{0x01}, BlockIDMin, BlockIDMax) assert.Nil(t, buff) assert.Nil(t, err) } @@ -208,10 +218,13 @@ func TestBlockCleanup(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -232,7 +245,7 @@ func TestBlockCleanup(t *testing.T) { head, err := wal.NewBlock(blockID, testTenantID) assert.NoError(t, err) - complete, err := head.Complete(wal, &mockSharder{}) + complete, err := w.CompleteBlock(head, &mockSharder{}) assert.NoError(t, err) err = w.WriteBlock(context.Background(), complete) @@ -287,10 +300,13 @@ func TestCleanMissingTenants(t *testing.T) { Local: &local.Config{ Path: path.Join("/tmp", "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join("/tmp", "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join("/tmp", "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -344,10 +360,13 @@ func TestUpdateBlocklist(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) @@ -528,10 +547,13 @@ func TestUpdateBlocklistCompacted(t *testing.T) { Local: &local.Config{ Path: path.Join(tempDir, "traces"), }, - WAL: &wal.Config{ - Filepath: path.Join(tempDir, "wal"), + Block: &encoding.BlockConfig{ IndexDownsample: 17, BloomFP: .01, + Encoding: backend.EncLZ4_256k, + }, + WAL: &wal.Config{ + Filepath: path.Join(tempDir, "wal"), }, BlocklistPoll: 0, }, log.NewNopLogger()) diff --git a/tempodb/wal/append_block.go b/tempodb/wal/append_block.go index 95080d88a62..b577a7bea5f 100644 --- a/tempodb/wal/append_block.go +++ b/tempodb/wal/append_block.go @@ -10,6 +10,12 @@ import ( v0 "github.com/grafana/tempo/tempodb/encoding/v0" ) +// these values should never be used. these dummy values can help detect +// if they leak elsewhere. append blocks are not versioned and do not +// support different encodings +const appendBlockVersion = "append" +const appendBlockEncoding = backend.EncNone + // AppendBlock is a block that is actively used to append new objects to. It stores all data in the appendFile // in the order it was received and an in memory sorted index. type AppendBlock struct { @@ -22,7 +28,7 @@ type AppendBlock struct { func newAppendBlock(id uuid.UUID, tenantID string, filepath string) (*AppendBlock, error) { h := &AppendBlock{ block: block{ - meta: backend.NewBlockMeta(tenantID, id), + meta: backend.NewBlockMeta(tenantID, id, appendBlockVersion, appendBlockEncoding), filepath: filepath, }, } @@ -61,7 +67,7 @@ func (h *AppendBlock) DataLength() uint64 { // includes an on disk file containing all objects in order. // Note that calling this method leaves the original file on disk. This file is still considered to be part of the WAL // until Write() is successfully called on the CompleteBlock. -func (h *AppendBlock) Complete(w *WAL, combiner common.ObjectCombiner) (*encoding.CompleteBlock, error) { +func (h *AppendBlock) Complete(cfg *encoding.BlockConfig, w *WAL, combiner common.ObjectCombiner) (*encoding.CompleteBlock, error) { if h.appendFile != nil { err := h.appendFile.Close() if err != nil { @@ -81,7 +87,7 @@ func (h *AppendBlock) Complete(w *WAL, combiner common.ObjectCombiner) (*encodin return nil, err } - orderedBlock, err := encoding.NewCompleteBlock(h.meta, iterator, w.c.BloomFP, len(records), w.c.IndexDownsample, w.c.CompletedFilepath, h.fullFilename()) + orderedBlock, err := encoding.NewCompleteBlock(cfg, h.meta, iterator, len(records), w.c.CompletedFilepath, h.fullFilename()) if err != nil { return nil, err } @@ -96,7 +102,7 @@ func (h *AppendBlock) Find(id common.ID, combiner common.ObjectCombiner) ([]byte return nil, err } - finder := v0.NewDedupingFinder(records, file, combiner) + finder := v0.NewPagedFinder(common.Records(records), v0.NewPageReader(file), combiner) return finder.Find(id) } diff --git a/tempodb/wal/wal.go b/tempodb/wal/wal.go index bdd2f1fc25f..6f4978e5633 100644 --- a/tempodb/wal/wal.go +++ b/tempodb/wal/wal.go @@ -22,8 +22,6 @@ type WAL struct { type Config struct { Filepath string `yaml:"path"` CompletedFilepath string - IndexDownsample int `yaml:"index_downsample"` - BloomFP float64 `yaml:"bloom_filter_false_positive"` } func New(c *Config) (*WAL, error) { @@ -31,14 +29,6 @@ func New(c *Config) (*WAL, error) { return nil, fmt.Errorf("please provide a path for the WAL") } - if c.IndexDownsample == 0 { - return nil, fmt.Errorf("Non-zero index downsample required") - } - - if c.BloomFP <= 0.0 { - return nil, fmt.Errorf("invalid bloom filter fp rate %v", c.BloomFP) - } - // make folder err := os.MkdirAll(c.Filepath, os.ModePerm) if err != nil { @@ -84,7 +74,7 @@ func (w *WAL) AllBlocks() ([]*ReplayBlock, error) { blocks = append(blocks, &ReplayBlock{ block: block{ - meta: backend.NewBlockMeta(tenantID, blockID), + meta: backend.NewBlockMeta(tenantID, blockID, appendBlockVersion, appendBlockEncoding), filepath: w.c.Filepath, }, }) diff --git a/tempodb/wal/wal_test.go b/tempodb/wal/wal_test.go index 5db54838b11..e7cd11a301a 100644 --- a/tempodb/wal/wal_test.go +++ b/tempodb/wal/wal_test.go @@ -13,6 +13,8 @@ import ( "github.com/grafana/tempo/pkg/tempopb" "github.com/grafana/tempo/pkg/util/test" + "github.com/grafana/tempo/tempodb/backend" + "github.com/grafana/tempo/tempodb/encoding" v0 "github.com/grafana/tempo/tempodb/encoding/v0" ) @@ -37,9 +39,7 @@ func TestCreateBlock(t *testing.T) { assert.NoError(t, err, "unexpected error creating temp dir") wal, err := New(&Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: 0.1, + Filepath: tempDir, }) assert.NoError(t, err, "unexpected error creating temp wal") @@ -61,9 +61,7 @@ func TestReadWrite(t *testing.T) { assert.NoError(t, err, "unexpected error creating temp dir") wal, err := New(&Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: 0.1, + Filepath: tempDir, }) assert.NoError(t, err, "unexpected error creating temp wal") @@ -93,9 +91,7 @@ func TestAppend(t *testing.T) { assert.NoError(t, err, "unexpected error creating temp dir") wal, err := New(&Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: 0.1, + Filepath: tempDir, }) assert.NoError(t, err, "unexpected error creating temp wal") @@ -143,11 +139,8 @@ func TestAppendBlockComplete(t *testing.T) { defer os.RemoveAll(tempDir) assert.NoError(t, err, "unexpected error creating temp dir") - indexDownsample := 13 wal, err := New(&Config{ - Filepath: tempDir, - IndexDownsample: indexDownsample, - BloomFP: .01, + Filepath: tempDir, }) assert.NoError(t, err, "unexpected error creating temp wal") @@ -171,7 +164,11 @@ func TestAppendBlockComplete(t *testing.T) { assert.NoError(t, err, "unexpected error writing req") } - complete, err := block.Complete(wal, &mockCombiner{}) + complete, err := block.Complete(&encoding.BlockConfig{ + IndexDownsample: 13, + BloomFP: .01, + Encoding: backend.EncGZIP, + }, wal, &mockCombiner{}) assert.NoError(t, err, "unexpected error completing block") for i, id := range ids { @@ -198,9 +195,7 @@ func TestWorkDir(t *testing.T) { assert.NoError(t, err, "unexpected error creating testfile") _, err = New(&Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: 0.1, + Filepath: tempDir, }) assert.NoError(t, err, "unexpected error creating temp wal") @@ -218,9 +213,7 @@ func BenchmarkWriteRead(b *testing.B) { defer os.RemoveAll(tempDir) wal, _ := New(&Config{ - Filepath: tempDir, - IndexDownsample: 2, - BloomFP: 0.1, + Filepath: tempDir, }) blockID := uuid.New() diff --git a/vendor/github.com/klauspost/compress/LICENSE b/vendor/github.com/klauspost/compress/LICENSE new file mode 100644 index 00000000000..1eb75ef68e4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2012 The Go Authors. All rights reserved. +Copyright (c) 2019 Klaus Post. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go new file mode 100644 index 00000000000..25dbe3e15f4 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/deflate.go @@ -0,0 +1,821 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Copyright (c) 2015 Klaus Post +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "io" + "math" +) + +const ( + NoCompression = 0 + BestSpeed = 1 + BestCompression = 9 + DefaultCompression = -1 + + // HuffmanOnly disables Lempel-Ziv match searching and only performs Huffman + // entropy encoding. This mode is useful in compressing data that has + // already been compressed with an LZ style algorithm (e.g. Snappy or LZ4) + // that lacks an entropy encoder. Compression gains are achieved when + // certain bytes in the input stream occur more frequently than others. + // + // Note that HuffmanOnly produces a compressed output that is + // RFC 1951 compliant. That is, any valid DEFLATE decompressor will + // continue to be able to decompress this output. + HuffmanOnly = -2 + ConstantCompression = HuffmanOnly // compatibility alias. + + logWindowSize = 15 + windowSize = 1 << logWindowSize + windowMask = windowSize - 1 + logMaxOffsetSize = 15 // Standard DEFLATE + minMatchLength = 4 // The smallest match that the compressor looks for + maxMatchLength = 258 // The longest match for the compressor + minOffsetSize = 1 // The shortest offset that makes any sense + + // The maximum number of tokens we put into a single flat block, just too + // stop things from getting too large. + maxFlateBlockTokens = 1 << 14 + maxStoreBlockSize = 65535 + hashBits = 17 // After 17 performance degrades + hashSize = 1 << hashBits + hashMask = (1 << hashBits) - 1 + hashShift = (hashBits + minMatchLength - 1) / minMatchLength + maxHashOffset = 1 << 24 + + skipNever = math.MaxInt32 + + debugDeflate = false +) + +type compressionLevel struct { + good, lazy, nice, chain, fastSkipHashing, level int +} + +// Compression levels have been rebalanced from zlib deflate defaults +// to give a bigger spread in speed and compression. +// See https://blog.klauspost.com/rebalancing-deflate-compression-levels/ +var levels = []compressionLevel{ + {}, // 0 + // Level 1-6 uses specialized algorithm - values not used + {0, 0, 0, 0, 0, 1}, + {0, 0, 0, 0, 0, 2}, + {0, 0, 0, 0, 0, 3}, + {0, 0, 0, 0, 0, 4}, + {0, 0, 0, 0, 0, 5}, + {0, 0, 0, 0, 0, 6}, + // Levels 7-9 use increasingly more lazy matching + // and increasingly stringent conditions for "good enough". + {8, 8, 24, 16, skipNever, 7}, + {10, 16, 24, 64, skipNever, 8}, + {32, 258, 258, 4096, skipNever, 9}, +} + +// advancedState contains state for the advanced levels, with bigger hash tables, etc. +type advancedState struct { + // deflate state + length int + offset int + maxInsertIndex int + + // Input hash chains + // hashHead[hashValue] contains the largest inputIndex with the specified hash value + // If hashHead[hashValue] is within the current window, then + // hashPrev[hashHead[hashValue] & windowMask] contains the previous index + // with the same hash value. + chainHead int + hashHead [hashSize]uint32 + hashPrev [windowSize]uint32 + hashOffset int + + // input window: unprocessed data is window[index:windowEnd] + index int + hashMatch [maxMatchLength + minMatchLength]uint32 + + hash uint32 + ii uint16 // position of last match, intended to overflow to reset. +} + +type compressor struct { + compressionLevel + + w *huffmanBitWriter + + // compression algorithm + fill func(*compressor, []byte) int // copy data to window + step func(*compressor) // process window + + window []byte + windowEnd int + blockStart int // window index where current tokens start + err error + + // queued output tokens + tokens tokens + fast fastEnc + state *advancedState + + sync bool // requesting flush + byteAvailable bool // if true, still need to process window[index-1]. +} + +func (d *compressor) fillDeflate(b []byte) int { + s := d.state + if s.index >= 2*windowSize-(minMatchLength+maxMatchLength) { + // shift the window by windowSize + copy(d.window[:], d.window[windowSize:2*windowSize]) + s.index -= windowSize + d.windowEnd -= windowSize + if d.blockStart >= windowSize { + d.blockStart -= windowSize + } else { + d.blockStart = math.MaxInt32 + } + s.hashOffset += windowSize + if s.hashOffset > maxHashOffset { + delta := s.hashOffset - 1 + s.hashOffset -= delta + s.chainHead -= delta + // Iterate over slices instead of arrays to avoid copying + // the entire table onto the stack (Issue #18625). + for i, v := range s.hashPrev[:] { + if int(v) > delta { + s.hashPrev[i] = uint32(int(v) - delta) + } else { + s.hashPrev[i] = 0 + } + } + for i, v := range s.hashHead[:] { + if int(v) > delta { + s.hashHead[i] = uint32(int(v) - delta) + } else { + s.hashHead[i] = 0 + } + } + } + } + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +func (d *compressor) writeBlock(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + var window []byte + if d.blockStart <= index { + window = d.window[d.blockStart:index] + } + d.blockStart = index + d.w.writeBlock(tok, eof, window) + return d.w.err + } + return nil +} + +// writeBlockSkip writes the current block and uses the number of tokens +// to determine if the block should be stored on no matches, or +// only huffman encoded. +func (d *compressor) writeBlockSkip(tok *tokens, index int, eof bool) error { + if index > 0 || eof { + if d.blockStart <= index { + window := d.window[d.blockStart:index] + // If we removed less than a 64th of all literals + // we huffman compress the block. + if int(tok.n) > len(window)-int(tok.n>>6) { + d.w.writeBlockHuff(eof, window, d.sync) + } else { + // Write a dynamic huffman block. + d.w.writeBlockDynamic(tok, eof, window, d.sync) + } + } else { + d.w.writeBlock(tok, eof, nil) + } + d.blockStart = index + return d.w.err + } + return nil +} + +// fillWindow will fill the current window with the supplied +// dictionary and calculate all hashes. +// This is much faster than doing a full encode. +// Should only be used after a start/reset. +func (d *compressor) fillWindow(b []byte) { + // Do not fill window if we are in store-only or huffman mode. + if d.level <= 0 { + return + } + if d.fast != nil { + // encode the last data, but discard the result + if len(b) > maxMatchOffset { + b = b[len(b)-maxMatchOffset:] + } + d.fast.Encode(&d.tokens, b) + d.tokens.Reset() + return + } + s := d.state + // If we are given too much, cut it. + if len(b) > windowSize { + b = b[len(b)-windowSize:] + } + // Add all to window. + n := copy(d.window[d.windowEnd:], b) + + // Calculate 256 hashes at the time (more L1 cache hits) + loops := (n + 256 - minMatchLength) / 256 + for j := 0; j < loops; j++ { + startindex := j * 256 + end := startindex + 256 + minMatchLength - 1 + if end > n { + end = n + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + + if dstSize <= 0 { + continue + } + + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + s.hash = newH + } + // Update window information. + d.windowEnd += n + s.index = n +} + +// Try to find a match starting at index whose length is greater than prevSize. +// We only look at chainCount possibilities before giving up. +// pos = s.index, prevHead = s.chainHead-s.hashOffset, prevLength=minMatchLength-1, lookahead +func (d *compressor) findMatch(pos int, prevHead int, prevLength int, lookahead int) (length, offset int, ok bool) { + minMatchLook := maxMatchLength + if lookahead < minMatchLook { + minMatchLook = lookahead + } + + win := d.window[0 : pos+minMatchLook] + + // We quit when we get a match that's at least nice long + nice := len(win) - pos + if d.nice < nice { + nice = d.nice + } + + // If we've got a match that's good enough, only look in 1/4 the chain. + tries := d.chain + length = prevLength + if length >= d.good { + tries >>= 2 + } + + wEnd := win[pos+length] + wPos := win[pos:] + minIndex := pos - windowSize + + for i := prevHead; tries > 0; tries-- { + if wEnd == win[i+length] { + n := matchLen(win[i:i+minMatchLook], wPos) + + if n > length && (n > minMatchLength || pos-i <= 4096) { + length = n + offset = pos - i + ok = true + if n >= nice { + // The match is good enough that we don't try to find a better one. + break + } + wEnd = win[pos+n] + } + } + if i == minIndex { + // hashPrev[i & windowMask] has already been overwritten, so stop now. + break + } + i = int(d.state.hashPrev[i&windowMask]) - d.state.hashOffset + if i < minIndex || i < 0 { + break + } + } + return +} + +func (d *compressor) writeStoredBlock(buf []byte) error { + if d.w.writeStoredHeader(len(buf), false); d.w.err != nil { + return d.w.err + } + d.w.writeBytes(buf) + return d.w.err +} + +// hash4 returns a hash representation of the first 4 bytes +// of the supplied slice. +// The caller must ensure that len(b) >= 4. +func hash4(b []byte) uint32 { + b = b[:4] + return hash4u(uint32(b[3])|uint32(b[2])<<8|uint32(b[1])<<16|uint32(b[0])<<24, hashBits) +} + +// bulkHash4 will compute hashes using the same +// algorithm as hash4 +func bulkHash4(b []byte, dst []uint32) { + if len(b) < 4 { + return + } + hb := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24 + dst[0] = hash4u(hb, hashBits) + end := len(b) - 4 + 1 + for i := 1; i < end; i++ { + hb = (hb << 8) | uint32(b[i+3]) + dst[i] = hash4u(hb, hashBits) + } +} + +func (d *compressor) initDeflate() { + d.window = make([]byte, 2*windowSize) + d.byteAvailable = false + d.err = nil + if d.state == nil { + return + } + s := d.state + s.index = 0 + s.hashOffset = 1 + s.length = minMatchLength - 1 + s.offset = 0 + s.hash = 0 + s.chainHead = -1 +} + +// deflateLazy is the same as deflate, but with d.fastSkipHashing == skipNever, +// meaning it always has lazy matching on. +func (d *compressor) deflateLazy() { + s := d.state + // Sanity enables additional runtime tests. + // It's intended to be used during development + // to supplement the currently ad-hoc unit tests. + const sanity = debugDeflate + + if d.windowEnd-s.index < minMatchLength+maxMatchLength && !d.sync { + return + } + + s.maxInsertIndex = d.windowEnd - (minMatchLength - 1) + if s.index < s.maxInsertIndex { + s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + } + + for { + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + lookahead := d.windowEnd - s.index + if lookahead < minMatchLength+maxMatchLength { + if !d.sync { + return + } + if sanity && s.index > d.windowEnd { + panic("index > windowEnd") + } + if lookahead == 0 { + // Flush current output block if any. + if d.byteAvailable { + // There is still one pending token that needs to be flushed + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + } + if d.tokens.n > 0 { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + return + } + } + if s.index < s.maxInsertIndex { + // Update the hash + s.hash = hash4(d.window[s.index : s.index+minMatchLength]) + ch := s.hashHead[s.hash&hashMask] + s.chainHead = int(ch) + s.hashPrev[s.index&windowMask] = ch + s.hashHead[s.hash&hashMask] = uint32(s.index + s.hashOffset) + } + prevLength := s.length + prevOffset := s.offset + s.length = minMatchLength - 1 + s.offset = 0 + minIndex := s.index - windowSize + if minIndex < 0 { + minIndex = 0 + } + + if s.chainHead-s.hashOffset >= minIndex && lookahead > prevLength && prevLength < d.lazy { + if newLength, newOffset, ok := d.findMatch(s.index, s.chainHead-s.hashOffset, minMatchLength-1, lookahead); ok { + s.length = newLength + s.offset = newOffset + } + } + if prevLength >= minMatchLength && s.length <= prevLength { + // There was a match at the previous step, and the current match is + // not better. Output the previous match. + d.tokens.AddMatch(uint32(prevLength-3), uint32(prevOffset-minOffsetSize)) + + // Insert in the hash table all strings up to the end of the match. + // index and index-1 are already inserted. If there is not enough + // lookahead, the last two strings are not inserted into the hash + // table. + var newIndex int + newIndex = s.index + prevLength - 1 + // Calculate missing hashes + end := newIndex + if end > s.maxInsertIndex { + end = s.maxInsertIndex + } + end += minMatchLength - 1 + startindex := s.index + 1 + if startindex > s.maxInsertIndex { + startindex = s.maxInsertIndex + } + tocheck := d.window[startindex:end] + dstSize := len(tocheck) - minMatchLength + 1 + if dstSize > 0 { + dst := s.hashMatch[:dstSize] + bulkHash4(tocheck, dst) + var newH uint32 + for i, val := range dst { + di := i + startindex + newH = val & hashMask + // Get previous value with the same hash. + // Our chain should point to the previous value. + s.hashPrev[di&windowMask] = s.hashHead[newH] + // Set the head of the hash chain to us. + s.hashHead[newH] = uint32(di + s.hashOffset) + } + s.hash = newH + } + + s.index = newIndex + d.byteAvailable = false + s.length = minMatchLength - 1 + if d.tokens.n == maxFlateBlockTokens { + // The block includes the current character + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } else { + // Reset, if we got a match this run. + if s.length >= minMatchLength { + s.ii = 0 + } + // We have a byte waiting. Emit it. + if d.byteAvailable { + s.ii++ + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + + // If we have a long run of no matches, skip additional bytes + // Resets when s.ii overflows after 64KB. + if s.ii > 31 { + n := int(s.ii >> 5) + for j := 0; j < n; j++ { + if s.index >= d.windowEnd-1 { + break + } + + d.tokens.AddLiteral(d.window[s.index-1]) + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + s.index++ + } + // Flush last byte + d.tokens.AddLiteral(d.window[s.index-1]) + d.byteAvailable = false + // s.length = minMatchLength - 1 // not needed, since s.ii is reset above, so it should never be > minMatchLength + if d.tokens.n == maxFlateBlockTokens { + if d.err = d.writeBlock(&d.tokens, s.index, false); d.err != nil { + return + } + d.tokens.Reset() + } + } + } else { + s.index++ + d.byteAvailable = true + } + } + } +} + +func (d *compressor) store() { + if d.windowEnd > 0 && (d.windowEnd == maxStoreBlockSize || d.sync) { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + d.windowEnd = 0 + } +} + +// fillWindow will fill the buffer with data for huffman-only compression. +// The number of bytes copied is returned. +func (d *compressor) fillBlock(b []byte) int { + n := copy(d.window[d.windowEnd:], b) + d.windowEnd += n + return n +} + +// storeHuff will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeHuff() { + if d.windowEnd < len(d.window) && !d.sync || d.windowEnd == 0 { + return + } + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + d.windowEnd = 0 +} + +// storeFast will compress and store the currently added data, +// if enough has been accumulated or we at the end of the stream. +// Any error that occurred will be in d.err +func (d *compressor) storeFast() { + // We only compress if we have maxStoreBlockSize. + if d.windowEnd < len(d.window) { + if !d.sync { + return + } + // Handle extremely small sizes. + if d.windowEnd < 128 { + if d.windowEnd == 0 { + return + } + if d.windowEnd <= 32 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + } else { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], true) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 + d.fast.Reset() + return + } + } + + d.fast.Encode(&d.tokens, d.window[:d.windowEnd]) + // If we made zero matches, store the block as is. + if d.tokens.n == 0 { + d.err = d.writeStoredBlock(d.window[:d.windowEnd]) + // If we removed less than 1/16th, huffman compress the block. + } else if int(d.tokens.n) > d.windowEnd-(d.windowEnd>>4) { + d.w.writeBlockHuff(false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } else { + d.w.writeBlockDynamic(&d.tokens, false, d.window[:d.windowEnd], d.sync) + d.err = d.w.err + } + d.tokens.Reset() + d.windowEnd = 0 +} + +// write will add input byte to the stream. +// Unless an error occurs all bytes will be consumed. +func (d *compressor) write(b []byte) (n int, err error) { + if d.err != nil { + return 0, d.err + } + n = len(b) + for len(b) > 0 { + d.step(d) + b = b[d.fill(d, b):] + if d.err != nil { + return 0, d.err + } + } + return n, d.err +} + +func (d *compressor) syncFlush() error { + d.sync = true + if d.err != nil { + return d.err + } + d.step(d) + if d.err == nil { + d.w.writeStoredHeader(0, false) + d.w.flush() + d.err = d.w.err + } + d.sync = false + return d.err +} + +func (d *compressor) init(w io.Writer, level int) (err error) { + d.w = newHuffmanBitWriter(w) + + switch { + case level == NoCompression: + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).store + case level == ConstantCompression: + d.w.logNewTablePenalty = 4 + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeHuff + case level == DefaultCompression: + level = 5 + fallthrough + case level >= 1 && level <= 6: + d.w.logNewTablePenalty = 6 + d.fast = newFastEnc(level) + d.window = make([]byte, maxStoreBlockSize) + d.fill = (*compressor).fillBlock + d.step = (*compressor).storeFast + case 7 <= level && level <= 9: + d.w.logNewTablePenalty = 10 + d.state = &advancedState{} + d.compressionLevel = levels[level] + d.initDeflate() + d.fill = (*compressor).fillDeflate + d.step = (*compressor).deflateLazy + default: + return fmt.Errorf("flate: invalid compression level %d: want value in range [-2, 9]", level) + } + d.level = level + return nil +} + +// reset the state of the compressor. +func (d *compressor) reset(w io.Writer) { + d.w.reset(w) + d.sync = false + d.err = nil + // We only need to reset a few things for Snappy. + if d.fast != nil { + d.fast.Reset() + d.windowEnd = 0 + d.tokens.Reset() + return + } + switch d.compressionLevel.chain { + case 0: + // level was NoCompression or ConstantCompresssion. + d.windowEnd = 0 + default: + s := d.state + s.chainHead = -1 + for i := range s.hashHead { + s.hashHead[i] = 0 + } + for i := range s.hashPrev { + s.hashPrev[i] = 0 + } + s.hashOffset = 1 + s.index, d.windowEnd = 0, 0 + d.blockStart, d.byteAvailable = 0, false + d.tokens.Reset() + s.length = minMatchLength - 1 + s.offset = 0 + s.hash = 0 + s.ii = 0 + s.maxInsertIndex = 0 + } +} + +func (d *compressor) close() error { + if d.err != nil { + return d.err + } + d.sync = true + d.step(d) + if d.err != nil { + return d.err + } + if d.w.writeStoredHeader(0, true); d.w.err != nil { + return d.w.err + } + d.w.flush() + d.w.reset(nil) + return d.w.err +} + +// NewWriter returns a new Writer compressing data at the given level. +// Following zlib, levels range from 1 (BestSpeed) to 9 (BestCompression); +// higher levels typically run slower but compress more. +// Level 0 (NoCompression) does not attempt any compression; it only adds the +// necessary DEFLATE framing. +// Level -1 (DefaultCompression) uses the default compression level. +// Level -2 (ConstantCompression) will use Huffman compression only, giving +// a very fast compression for all types of input, but sacrificing considerable +// compression efficiency. +// +// If level is in the range [-2, 9] then the error returned will be nil. +// Otherwise the error returned will be non-nil. +func NewWriter(w io.Writer, level int) (*Writer, error) { + var dw Writer + if err := dw.d.init(w, level); err != nil { + return nil, err + } + return &dw, nil +} + +// NewWriterDict is like NewWriter but initializes the new +// Writer with a preset dictionary. The returned Writer behaves +// as if the dictionary had been written to it without producing +// any compressed output. The compressed data written to w +// can only be decompressed by a Reader initialized with the +// same dictionary. +func NewWriterDict(w io.Writer, level int, dict []byte) (*Writer, error) { + zw, err := NewWriter(w, level) + if err != nil { + return nil, err + } + zw.d.fillWindow(dict) + zw.dict = append(zw.dict, dict...) // duplicate dictionary for Reset method. + return zw, err +} + +// A Writer takes data written to it and writes the compressed +// form of that data to an underlying writer (see NewWriter). +type Writer struct { + d compressor + dict []byte +} + +// Write writes data to w, which will eventually write the +// compressed form of data to its underlying writer. +func (w *Writer) Write(data []byte) (n int, err error) { + return w.d.write(data) +} + +// Flush flushes any pending data to the underlying writer. +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. +// Flush does not return until the data has been written. +// Calling Flush when there is no pending data still causes the Writer +// to emit a sync marker of at least 4 bytes. +// If the underlying writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (w *Writer) Flush() error { + // For more about flushing: + // http://www.bolet.org/~pornin/deflate-flush.html + return w.d.syncFlush() +} + +// Close flushes and closes the writer. +func (w *Writer) Close() error { + return w.d.close() +} + +// Reset discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level and dictionary. +func (w *Writer) Reset(dst io.Writer) { + if len(w.dict) > 0 { + // w was created with NewWriterDict + w.d.reset(dst) + if dst != nil { + w.d.fillWindow(w.dict) + } + } else { + // w was created with NewWriter + w.d.reset(dst) + } +} + +// ResetDict discards the writer's state and makes it equivalent to +// the result of NewWriter or NewWriterDict called with dst +// and w's level, but sets a specific dictionary. +func (w *Writer) ResetDict(dst io.Writer, dict []byte) { + w.dict = dict + w.d.reset(dst) + w.d.fillWindow(w.dict) +} diff --git a/vendor/github.com/klauspost/compress/flate/dict_decoder.go b/vendor/github.com/klauspost/compress/flate/dict_decoder.go new file mode 100644 index 00000000000..71c75a065ea --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/dict_decoder.go @@ -0,0 +1,184 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// dictDecoder implements the LZ77 sliding dictionary as used in decompression. +// LZ77 decompresses data through sequences of two forms of commands: +// +// * Literal insertions: Runs of one or more symbols are inserted into the data +// stream as is. This is accomplished through the writeByte method for a +// single symbol, or combinations of writeSlice/writeMark for multiple symbols. +// Any valid stream must start with a literal insertion if no preset dictionary +// is used. +// +// * Backward copies: Runs of one or more symbols are copied from previously +// emitted data. Backward copies come as the tuple (dist, length) where dist +// determines how far back in the stream to copy from and length determines how +// many bytes to copy. Note that it is valid for the length to be greater than +// the distance. Since LZ77 uses forward copies, that situation is used to +// perform a form of run-length encoding on repeated runs of symbols. +// The writeCopy and tryWriteCopy are used to implement this command. +// +// For performance reasons, this implementation performs little to no sanity +// checks about the arguments. As such, the invariants documented for each +// method call must be respected. +type dictDecoder struct { + hist []byte // Sliding window history + + // Invariant: 0 <= rdPos <= wrPos <= len(hist) + wrPos int // Current output position in buffer + rdPos int // Have emitted hist[:rdPos] already + full bool // Has a full window length been written yet? +} + +// init initializes dictDecoder to have a sliding window dictionary of the given +// size. If a preset dict is provided, it will initialize the dictionary with +// the contents of dict. +func (dd *dictDecoder) init(size int, dict []byte) { + *dd = dictDecoder{hist: dd.hist} + + if cap(dd.hist) < size { + dd.hist = make([]byte, size) + } + dd.hist = dd.hist[:size] + + if len(dict) > len(dd.hist) { + dict = dict[len(dict)-len(dd.hist):] + } + dd.wrPos = copy(dd.hist, dict) + if dd.wrPos == len(dd.hist) { + dd.wrPos = 0 + dd.full = true + } + dd.rdPos = dd.wrPos +} + +// histSize reports the total amount of historical data in the dictionary. +func (dd *dictDecoder) histSize() int { + if dd.full { + return len(dd.hist) + } + return dd.wrPos +} + +// availRead reports the number of bytes that can be flushed by readFlush. +func (dd *dictDecoder) availRead() int { + return dd.wrPos - dd.rdPos +} + +// availWrite reports the available amount of output buffer space. +func (dd *dictDecoder) availWrite() int { + return len(dd.hist) - dd.wrPos +} + +// writeSlice returns a slice of the available buffer to write data to. +// +// This invariant will be kept: len(s) <= availWrite() +func (dd *dictDecoder) writeSlice() []byte { + return dd.hist[dd.wrPos:] +} + +// writeMark advances the writer pointer by cnt. +// +// This invariant must be kept: 0 <= cnt <= availWrite() +func (dd *dictDecoder) writeMark(cnt int) { + dd.wrPos += cnt +} + +// writeByte writes a single byte to the dictionary. +// +// This invariant must be kept: 0 < availWrite() +func (dd *dictDecoder) writeByte(c byte) { + dd.hist[dd.wrPos] = c + dd.wrPos++ +} + +// writeCopy copies a string at a given (dist, length) to the output. +// This returns the number of bytes copied and may be less than the requested +// length if the available space in the output buffer is too small. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) writeCopy(dist, length int) int { + dstBase := dd.wrPos + dstPos := dstBase + srcPos := dstPos - dist + endPos := dstPos + length + if endPos > len(dd.hist) { + endPos = len(dd.hist) + } + + // Copy non-overlapping section after destination position. + // + // This section is non-overlapping in that the copy length for this section + // is always less than or equal to the backwards distance. This can occur + // if a distance refers to data that wraps-around in the buffer. + // Thus, a backwards copy is performed here; that is, the exact bytes in + // the source prior to the copy is placed in the destination. + if srcPos < 0 { + srcPos += len(dd.hist) + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:]) + srcPos = 0 + } + + // Copy possibly overlapping section before destination position. + // + // This section can overlap if the copy length for this section is larger + // than the backwards distance. This is allowed by LZ77 so that repeated + // strings can be succinctly represented using (dist, length) pairs. + // Thus, a forwards copy is performed here; that is, the bytes copied is + // possibly dependent on the resulting bytes in the destination as the copy + // progresses along. This is functionally equivalent to the following: + // + // for i := 0; i < endPos-dstPos; i++ { + // dd.hist[dstPos+i] = dd.hist[srcPos+i] + // } + // dstPos = endPos + // + for dstPos < endPos { + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// tryWriteCopy tries to copy a string at a given (distance, length) to the +// output. This specialized version is optimized for short distances. +// +// This method is designed to be inlined for performance reasons. +// +// This invariant must be kept: 0 < dist <= histSize() +func (dd *dictDecoder) tryWriteCopy(dist, length int) int { + dstPos := dd.wrPos + endPos := dstPos + length + if dstPos < dist || endPos > len(dd.hist) { + return 0 + } + dstBase := dstPos + srcPos := dstPos - dist + + // Copy possibly overlapping section before destination position. +loop: + dstPos += copy(dd.hist[dstPos:endPos], dd.hist[srcPos:dstPos]) + if dstPos < endPos { + goto loop // Avoid for-loop so that this function can be inlined + } + + dd.wrPos = dstPos + return dstPos - dstBase +} + +// readFlush returns a slice of the historical buffer that is ready to be +// emitted to the user. The data returned by readFlush must be fully consumed +// before calling any other dictDecoder methods. +func (dd *dictDecoder) readFlush() []byte { + toRead := dd.hist[dd.rdPos:dd.wrPos] + dd.rdPos = dd.wrPos + if dd.wrPos == len(dd.hist) { + dd.wrPos, dd.rdPos = 0, 0 + dd.full = true + } + return toRead +} diff --git a/vendor/github.com/klauspost/compress/flate/fast_encoder.go b/vendor/github.com/klauspost/compress/flate/fast_encoder.go new file mode 100644 index 00000000000..6d4c1e98bc5 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/fast_encoder.go @@ -0,0 +1,254 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Modified for deflate by Klaus Post (c) 2015. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "fmt" + "math/bits" +) + +type fastEnc interface { + Encode(dst *tokens, src []byte) + Reset() +} + +func newFastEnc(level int) fastEnc { + switch level { + case 1: + return &fastEncL1{fastGen: fastGen{cur: maxStoreBlockSize}} + case 2: + return &fastEncL2{fastGen: fastGen{cur: maxStoreBlockSize}} + case 3: + return &fastEncL3{fastGen: fastGen{cur: maxStoreBlockSize}} + case 4: + return &fastEncL4{fastGen: fastGen{cur: maxStoreBlockSize}} + case 5: + return &fastEncL5{fastGen: fastGen{cur: maxStoreBlockSize}} + case 6: + return &fastEncL6{fastGen: fastGen{cur: maxStoreBlockSize}} + default: + panic("invalid level specified") + } +} + +const ( + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. + baseMatchOffset = 1 // The smallest match offset + baseMatchLength = 3 // The smallest match length per the RFC section 3.2.5 + maxMatchOffset = 1 << 15 // The largest match offset + + bTableBits = 17 // Bits used in the big tables + bTableSize = 1 << bTableBits // Size of the table + allocHistory = maxStoreBlockSize * 10 // Size to preallocate for history. + bufferReset = (1 << 31) - allocHistory - maxStoreBlockSize - 1 // Reset the buffer offset when reaching this. +) + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +func load32(b []byte, i int) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func load3232(b []byte, i int32) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6432(b []byte, i int32) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func hash(u uint32) uint32 { + return (u * 0x1e35a7bd) >> tableShift +} + +type tableEntry struct { + offset int32 +} + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastGen struct { + hist []byte + cur int32 +} + +func (e *fastGen) addBlock(src []byte) int32 { + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + e.hist = make([]byte, 0, allocHistory) + } else { + if cap(e.hist) < maxMatchOffset*2 { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - maxMatchOffset + copy(e.hist[0:maxMatchOffset], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:maxMatchOffset] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4u(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> ((32 - h) & 31) +} + +type tableEntryPrev struct { + Cur tableEntry + Prev tableEntry +} + +// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4x64(u uint64, h uint8) uint32 { + return (uint32(u) * prime4bytes) >> ((32 - h) & 31) +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) +} + +// hash8 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash8(u uint64, h uint8) uint32 { + return uint32((u * prime8bytes) >> ((64 - h) & 63)) +} + +// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash6(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) +} + +// matchlen will return the match length between offsets and t in src. +// The maximum length returned is maxMatchLength - 4. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlen(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + s1 := int(s) + maxMatchLength - 4 + if s1 > len(src) { + s1 = len(src) + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:s1], src[t:])) +} + +// matchlenLong will return the match length between offsets and t in src. +// It is assumed that s > t, that t >=0 and s < len(src). +func (e *fastGen) matchlenLong(s, t int32, src []byte) int32 { + if debugDecode { + if t >= s { + panic(fmt.Sprint("t >=s:", t, s)) + } + if int(s) >= len(src) { + panic(fmt.Sprint("s >= len(src):", s, len(src))) + } + if t < 0 { + panic(fmt.Sprint("t < 0:", t)) + } + if s-t > maxMatchOffset { + panic(fmt.Sprint(s, "-", t, "(", s-t, ") > maxMatchLength (", maxMatchOffset, ")")) + } + } + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastGen) Reset() { + if cap(e.hist) < allocHistory { + e.hist = make([]byte, 0, allocHistory) + } + // We offset current position so everything will be out of reach. + // If we are above the buffer reset it will be cleared anyway since len(hist) == 0. + if e.cur <= bufferReset { + e.cur += maxMatchOffset + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} + +// matchLen returns the maximum length. +// 'a' must be the shortest of the two. +func matchLen(a, b []byte) int { + b = b[:len(a)] + var checked int + if len(a) > 4 { + // Try 4 bytes first + if diff := load32(a, 0) ^ load32(b, 0); diff != 0 { + return bits.TrailingZeros32(diff) >> 3 + } + // Switch to 8 byte matching. + checked = 4 + a = a[4:] + b = b[4:] + for len(a) >= 8 { + b = b[:len(a)] + if diff := load64(a, 0) ^ load64(b, 0); diff != 0 { + return checked + (bits.TrailingZeros64(diff) >> 3) + } + checked += 8 + a = a[8:] + b = b[8:] + } + } + b = b[:len(a)] + for i := range a { + if a[i] != b[i] { + return int(i) + checked + } + } + return len(a) + checked +} diff --git a/vendor/github.com/klauspost/compress/flate/gen_inflate.go b/vendor/github.com/klauspost/compress/flate/gen_inflate.go new file mode 100644 index 00000000000..c74a95fe7f6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/gen_inflate.go @@ -0,0 +1,274 @@ +// +build generate + +//go:generate go run $GOFILE && gofmt -w inflate_gen.go + +package main + +import ( + "os" + "strings" +) + +func main() { + f, err := os.Create("inflate_gen.go") + if err != nil { + panic(err) + } + defer f.Close() + types := []string{"*bytes.Buffer", "*bytes.Reader", "*bufio.Reader", "*strings.Reader"} + names := []string{"BytesBuffer", "BytesReader", "BufioReader", "StringsReader"} + imports := []string{"bytes", "bufio", "io", "strings", "math/bits"} + f.WriteString(`// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( +`) + + for _, imp := range imports { + f.WriteString("\t\"" + imp + "\"\n") + } + f.WriteString(")\n\n") + + template := ` + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) $FUNCNAME$() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.($TYPE$) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).$FUNCNAME$ // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +` + for i, t := range types { + s := strings.Replace(template, "$FUNCNAME$", "huffman"+names[i], -1) + s = strings.Replace(s, "$TYPE$", t, -1) + f.WriteString(s) + } + f.WriteString("func (f *decompressor) huffmanBlockDecoder() func() {\n") + f.WriteString("\tswitch f.r.(type) {\n") + for i, t := range types { + f.WriteString("\t\tcase " + t + ":\n") + f.WriteString("\t\t\treturn f.huffman" + names[i] + "\n") + } + f.WriteString("\t\tdefault:\n") + f.WriteString("\t\t\treturn f.huffmanBlockGeneric") + f.WriteString("\t}\n}\n") +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go new file mode 100644 index 00000000000..53fe1d06e25 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_bit_writer.go @@ -0,0 +1,911 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "io" +) + +const ( + // The largest offset code. + offsetCodeCount = 30 + + // The special code used to mark the end of a block. + endBlockMarker = 256 + + // The first length code. + lengthCodesStart = 257 + + // The number of codegen codes. + codegenCodeCount = 19 + badCode = 255 + + // bufferFlushSize indicates the buffer size + // after which bytes are flushed to the writer. + // Should preferably be a multiple of 6, since + // we accumulate 6 bytes between writes to the buffer. + bufferFlushSize = 240 + + // bufferSize is the actual output byte buffer size. + // It must have additional headroom for a flush + // which can contain up to 8 bytes. + bufferSize = bufferFlushSize + 8 +) + +// The number of extra bits needed by length code X - LENGTH_CODES_START. +var lengthExtraBits = [32]int8{ + /* 257 */ 0, 0, 0, + /* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, + /* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, + /* 280 */ 4, 5, 5, 5, 5, 0, +} + +// The length indicated by length code X - LENGTH_CODES_START. +var lengthBase = [32]uint8{ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 10, + 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, + 64, 80, 96, 112, 128, 160, 192, 224, 255, +} + +// offset code word extra bits. +var offsetExtraBits = [64]int8{ + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, + 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, + /* extended window */ + 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, +} + +var offsetBase = [64]uint32{ + /* normal deflate */ + 0x000000, 0x000001, 0x000002, 0x000003, 0x000004, + 0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, + 0x000020, 0x000030, 0x000040, 0x000060, 0x000080, + 0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, + 0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, + 0x001800, 0x002000, 0x003000, 0x004000, 0x006000, + + /* extended window */ + 0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, + 0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, + 0x100000, 0x180000, 0x200000, 0x300000, +} + +// The odd order in which the codegen code sizes are written. +var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +type huffmanBitWriter struct { + // writer is the underlying writer. + // Do not use it directly; use the write method, which ensures + // that Write errors are sticky. + writer io.Writer + + // Data waiting to be written is bytes[0:nbytes] + // and then the low nbits of bits. + bits uint64 + nbits uint16 + nbytes uint8 + literalEncoding *huffmanEncoder + offsetEncoding *huffmanEncoder + codegenEncoding *huffmanEncoder + err error + lastHeader int + // Set between 0 (reused block can be up to 2x the size) + logNewTablePenalty uint + lastHuffMan bool + bytes [256]byte + literalFreq [lengthCodesStart + 32]uint16 + offsetFreq [32]uint16 + codegenFreq [codegenCodeCount]uint16 + + // codegen must have an extra space for the final symbol. + codegen [literalCount + offsetCodeCount + 1]uint8 +} + +// Huffman reuse. +// +// The huffmanBitWriter supports reusing huffman tables and thereby combining block sections. +// +// This is controlled by several variables: +// +// If lastHeader is non-zero the Huffman table can be reused. +// This also indicates that a Huffman table has been generated that can output all +// possible symbols. +// It also indicates that an EOB has not yet been emitted, so if a new tabel is generated +// an EOB with the previous table must be written. +// +// If lastHuffMan is set, a table for outputting literals has been generated and offsets are invalid. +// +// An incoming block estimates the output size of a new table using a 'fresh' by calculating the +// optimal size and adding a penalty in 'logNewTablePenalty'. +// A Huffman table is not optimal, which is why we add a penalty, and generating a new table +// is slower both for compression and decompression. + +func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { + return &huffmanBitWriter{ + writer: w, + literalEncoding: newHuffmanEncoder(literalCount), + codegenEncoding: newHuffmanEncoder(codegenCodeCount), + offsetEncoding: newHuffmanEncoder(offsetCodeCount), + } +} + +func (w *huffmanBitWriter) reset(writer io.Writer) { + w.writer = writer + w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil + w.lastHeader = 0 + w.lastHuffMan = false +} + +func (w *huffmanBitWriter) canReuse(t *tokens) (offsets, lits bool) { + offsets, lits = true, true + a := t.offHist[:offsetCodeCount] + b := w.offsetFreq[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + offsets = false + break + } + } + + a = t.extraHist[:literalCount-256] + b = w.literalFreq[256:literalCount] + b = b[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + lits = false + break + } + } + if lits { + a = t.litHist[:] + b = w.literalFreq[:len(a)] + for i := range a { + if b[i] == 0 && a[i] != 0 { + lits = false + break + } + } + } + return +} + +func (w *huffmanBitWriter) flush() { + if w.err != nil { + w.nbits = 0 + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + n := w.nbytes + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + if w.nbits > 8 { // Avoid underflow + w.nbits -= 8 + } else { + w.nbits = 0 + } + n++ + } + w.bits = 0 + w.write(w.bytes[:n]) + w.nbytes = 0 +} + +func (w *huffmanBitWriter) write(b []byte) { + if w.err != nil { + return + } + _, w.err = w.writer.Write(b) +} + +func (w *huffmanBitWriter) writeBits(b int32, nb uint16) { + w.bits |= uint64(b) << (w.nbits & 63) + w.nbits += nb + if w.nbits >= 48 { + w.writeOutBits() + } +} + +func (w *huffmanBitWriter) writeBytes(bytes []byte) { + if w.err != nil { + return + } + n := w.nbytes + if w.nbits&7 != 0 { + w.err = InternalError("writeBytes with unfinished bits") + return + } + for w.nbits != 0 { + w.bytes[n] = byte(w.bits) + w.bits >>= 8 + w.nbits -= 8 + n++ + } + if n != 0 { + w.write(w.bytes[:n]) + } + w.nbytes = 0 + w.write(bytes) +} + +// RFC 1951 3.2.7 specifies a special run-length encoding for specifying +// the literal and offset lengths arrays (which are concatenated into a single +// array). This method generates that run-length encoding. +// +// The result is written into the codegen array, and the frequencies +// of each code is written into the codegenFreq array. +// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional +// information. Code badCode is an end marker +// +// numLiterals The number of literals in literalEncoding +// numOffsets The number of offsets in offsetEncoding +// litenc, offenc The literal and offset encoder to use +func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { + for i := range w.codegenFreq { + w.codegenFreq[i] = 0 + } + // Note that we are using codegen both as a temporary variable for holding + // a copy of the frequencies, and as the place where we put the result. + // This is fine because the output is always shorter than the input used + // so far. + codegen := w.codegen[:] // cache + // Copy the concatenated code sizes to codegen. Put a marker at the end. + cgnl := codegen[:numLiterals] + for i := range cgnl { + cgnl[i] = uint8(litEnc.codes[i].len) + } + + cgnl = codegen[numLiterals : numLiterals+numOffsets] + for i := range cgnl { + cgnl[i] = uint8(offEnc.codes[i].len) + } + codegen[numLiterals+numOffsets] = badCode + + size := codegen[0] + count := 1 + outIndex := 0 + for inIndex := 1; size != badCode; inIndex++ { + // INVARIANT: We have seen "count" copies of size that have not yet + // had output generated for them. + nextSize := codegen[inIndex] + if nextSize == size { + count++ + continue + } + // We need to generate codegen indicating "count" of size. + if size != 0 { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + count-- + for count >= 3 { + n := 6 + if n > count { + n = count + } + codegen[outIndex] = 16 + outIndex++ + codegen[outIndex] = uint8(n - 3) + outIndex++ + w.codegenFreq[16]++ + count -= n + } + } else { + for count >= 11 { + n := 138 + if n > count { + n = count + } + codegen[outIndex] = 18 + outIndex++ + codegen[outIndex] = uint8(n - 11) + outIndex++ + w.codegenFreq[18]++ + count -= n + } + if count >= 3 { + // count >= 3 && count <= 10 + codegen[outIndex] = 17 + outIndex++ + codegen[outIndex] = uint8(count - 3) + outIndex++ + w.codegenFreq[17]++ + count = 0 + } + } + count-- + for ; count >= 0; count-- { + codegen[outIndex] = size + outIndex++ + w.codegenFreq[size]++ + } + // Set up invariant for next time through the loop. + size = nextSize + count = 1 + } + // Marker indicating the end of the codegen. + codegen[outIndex] = badCode +} + +func (w *huffmanBitWriter) codegens() int { + numCodegens := len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return numCodegens +} + +func (w *huffmanBitWriter) headerSize() (size, numCodegens int) { + numCodegens = len(w.codegenFreq) + for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { + numCodegens-- + } + return 3 + 5 + 5 + 4 + (3 * numCodegens) + + w.codegenEncoding.bitLength(w.codegenFreq[:]) + + int(w.codegenFreq[16])*2 + + int(w.codegenFreq[17])*3 + + int(w.codegenFreq[18])*7, numCodegens +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicReuseSize(litEnc, offEnc *huffmanEncoder) (size int) { + size = litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + return size +} + +// dynamicSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { + header, numCodegens := w.headerSize() + size = header + + litEnc.bitLength(w.literalFreq[:]) + + offEnc.bitLength(w.offsetFreq[:]) + + extraBits + return size, numCodegens +} + +// extraBitSize will return the number of bits that will be written +// as "extra" bits on matches. +func (w *huffmanBitWriter) extraBitSize() int { + total := 0 + for i, n := range w.literalFreq[257:literalCount] { + total += int(n) * int(lengthExtraBits[i&31]) + } + for i, n := range w.offsetFreq[:offsetCodeCount] { + total += int(n) * int(offsetExtraBits[i&31]) + } + return total +} + +// fixedSize returns the size of dynamically encoded data in bits. +func (w *huffmanBitWriter) fixedSize(extraBits int) int { + return 3 + + fixedLiteralEncoding.bitLength(w.literalFreq[:]) + + fixedOffsetEncoding.bitLength(w.offsetFreq[:]) + + extraBits +} + +// storedSize calculates the stored size, including header. +// The function returns the size in bits and whether the block +// fits inside a single block. +func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { + if in == nil { + return 0, false + } + if len(in) <= maxStoreBlockSize { + return (len(in) + 5) * 8, true + } + return 0, false +} + +func (w *huffmanBitWriter) writeCode(c hcode) { + // The function does not get inlined if we "& 63" the shift. + w.bits |= uint64(c.code) << w.nbits + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } +} + +// writeOutBits will write bits to the buffer. +func (w *huffmanBitWriter) writeOutBits() { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + w.bytes[n+2] = byte(bits >> 16) + w.bytes[n+3] = byte(bits >> 24) + w.bytes[n+4] = byte(bits >> 32) + w.bytes[n+5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n +} + +// Write the header of a dynamic Huffman block to the output stream. +// +// numLiterals The number of literals specified in codegen +// numOffsets The number of offsets specified in codegen +// numCodegens The number of codegens used in codegen +func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { + if w.err != nil { + return + } + var firstBits int32 = 4 + if isEof { + firstBits = 5 + } + w.writeBits(firstBits, 3) + w.writeBits(int32(numLiterals-257), 5) + w.writeBits(int32(numOffsets-1), 5) + w.writeBits(int32(numCodegens-4), 4) + + for i := 0; i < numCodegens; i++ { + value := uint(w.codegenEncoding.codes[codegenOrder[i]].len) + w.writeBits(int32(value), 3) + } + + i := 0 + for { + var codeWord = uint32(w.codegen[i]) + i++ + if codeWord == badCode { + break + } + w.writeCode(w.codegenEncoding.codes[codeWord]) + + switch codeWord { + case 16: + w.writeBits(int32(w.codegen[i]), 2) + i++ + case 17: + w.writeBits(int32(w.codegen[i]), 3) + i++ + case 18: + w.writeBits(int32(w.codegen[i]), 7) + i++ + } + } +} + +// writeStoredHeader will write a stored header. +// If the stored block is only used for EOF, +// it is replaced with a fixed huffman block. +func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // To write EOF, use a fixed encoding block. 10 bits instead of 5 bytes. + if length == 0 && isEof { + w.writeFixedHeader(isEof) + // EOB: 7 bits, value: 0 + w.writeBits(0, 7) + w.flush() + return + } + + var flag int32 + if isEof { + flag = 1 + } + w.writeBits(flag, 3) + w.flush() + w.writeBits(int32(length), 16) + w.writeBits(int32(^uint16(length)), 16) +} + +func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { + if w.err != nil { + return + } + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + + // Indicate that we are a fixed Huffman block + var value int32 = 2 + if isEof { + value = 3 + } + w.writeBits(value, 3) +} + +// writeBlock will write a block of tokens with the smallest encoding. +// The original input can be supplied, and if the huffman encoded data +// is larger than the original bytes, the data will be written as a +// stored block. +// If the input is nil, the tokens will always be Huffman encoded. +func (w *huffmanBitWriter) writeBlock(tokens *tokens, eof bool, input []byte) { + if w.err != nil { + return + } + + tokens.AddEOB() + if w.lastHeader > 0 { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + numLiterals, numOffsets := w.indexTokens(tokens, false) + w.generate(tokens) + var extraBits int + storedSize, storable := w.storedSize(input) + if storable { + extraBits = w.extraBitSize() + } + + // Figure out smallest code. + // Fixed Huffman baseline. + var literalEncoding = fixedLiteralEncoding + var offsetEncoding = fixedOffsetEncoding + var size = w.fixedSize(extraBits) + + // Dynamic Huffman? + var numCodegens int + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) + + if dynamicSize < size { + size = dynamicSize + literalEncoding = w.literalEncoding + offsetEncoding = w.offsetEncoding + } + + // Stored bytes? + if storable && storedSize < size { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + // Huffman. + if literalEncoding == fixedLiteralEncoding { + w.writeFixedHeader(eof) + } else { + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + } + + // Write the tokens. + w.writeTokens(tokens.Slice(), literalEncoding.codes, offsetEncoding.codes) +} + +// writeBlockDynamic encodes a block using a dynamic Huffman table. +// This should be used if the symbols used have a disproportionate +// histogram distribution. +// If input is supplied and the compression savings are below 1/16th of the +// input size the block is stored. +func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + sync = sync || eof + if sync { + tokens.AddEOB() + } + + // We cannot reuse pure huffman table, and must mark as EOF. + if (w.lastHuffMan || eof) && w.lastHeader > 0 { + // We will not try to reuse. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } + if !sync { + tokens.Fill() + } + numLiterals, numOffsets := w.indexTokens(tokens, !sync) + + var size int + // Check if we should reuse. + if w.lastHeader > 0 { + // Estimate size for using a new table. + // Use the previous header size as the best estimate. + newSize := w.lastHeader + tokens.EstimatedBits() + newSize += newSize >> w.logNewTablePenalty + + // The estimated size is calculated as an optimal table. + // We add a penalty to make it more realistic and re-use a bit more. + reuseSize := w.dynamicReuseSize(w.literalEncoding, w.offsetEncoding) + w.extraBitSize() + + // Check if a new table is better. + if newSize < reuseSize { + // Write the EOB we owe. + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + size = newSize + w.lastHeader = 0 + } else { + size = reuseSize + } + // Check if we get a reasonable size decrease. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + w.lastHeader = 0 + return + } + } + + // We want a new block/table + if w.lastHeader == 0 { + w.generate(tokens) + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + var numCodegens int + size, numCodegens = w.dynamicSize(w.literalEncoding, w.offsetEncoding, w.extraBitSize()) + // Store bytes, if we don't get a reasonable improvement. + if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + w.lastHeader = 0 + return + } + + // Write Huffman table. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHeader, _ = w.headerSize() + w.lastHuffMan = false + } + + if sync { + w.lastHeader = 0 + } + // Write the tokens. + w.writeTokens(tokens.Slice(), w.literalEncoding.codes, w.offsetEncoding.codes) +} + +// indexTokens indexes a slice of tokens, and updates +// literalFreq and offsetFreq, and generates literalEncoding +// and offsetEncoding. +// The number of literal and offset tokens is returned. +func (w *huffmanBitWriter) indexTokens(t *tokens, filled bool) (numLiterals, numOffsets int) { + copy(w.literalFreq[:], t.litHist[:]) + copy(w.literalFreq[256:], t.extraHist[:]) + copy(w.offsetFreq[:], t.offHist[:offsetCodeCount]) + + if t.n == 0 { + return + } + if filled { + return maxNumLit, maxNumDist + } + // get the number of literals + numLiterals = len(w.literalFreq) + for w.literalFreq[numLiterals-1] == 0 { + numLiterals-- + } + // get the number of offsets + numOffsets = len(w.offsetFreq) + for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { + numOffsets-- + } + if numOffsets == 0 { + // We haven't found a single match. If we want to go with the dynamic encoding, + // we should count at least one offset to be sure that the offset huffman tree could be encoded. + w.offsetFreq[0] = 1 + numOffsets = 1 + } + return +} + +func (w *huffmanBitWriter) generate(t *tokens) { + w.literalEncoding.generate(w.literalFreq[:literalCount], 15) + w.offsetEncoding.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeTokens writes a slice of tokens to the output. +// codes for literal and offset encoding must be supplied. +func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { + if w.err != nil { + return + } + if len(tokens) == 0 { + return + } + + // Only last token should be endBlockMarker. + var deferEOB bool + if tokens[len(tokens)-1] == endBlockMarker { + tokens = tokens[:len(tokens)-1] + deferEOB = true + } + + // Create slices up to the next power of two to avoid bounds checks. + lits := leCodes[:256] + offs := oeCodes[:32] + lengths := leCodes[lengthCodesStart:] + lengths = lengths[:32] + for _, t := range tokens { + if t < matchType { + w.writeCode(lits[t.literal()]) + continue + } + + // Write the length + length := t.length() + lengthCode := lengthCode(length) + if false { + w.writeCode(lengths[lengthCode&31]) + } else { + // inlined + c := lengths[lengthCode&31] + w.bits |= uint64(c.code) << (w.nbits & 63) + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } + } + + extraLengthBits := uint16(lengthExtraBits[lengthCode&31]) + if extraLengthBits > 0 { + extraLength := int32(length - lengthBase[lengthCode&31]) + w.writeBits(extraLength, extraLengthBits) + } + // Write the offset + offset := t.offset() + offsetCode := offsetCode(offset) + if false { + w.writeCode(offs[offsetCode&31]) + } else { + // inlined + c := offs[offsetCode&31] + w.bits |= uint64(c.code) << (w.nbits & 63) + w.nbits += c.len + if w.nbits >= 48 { + w.writeOutBits() + } + } + extraOffsetBits := uint16(offsetExtraBits[offsetCode&63]) + if extraOffsetBits > 0 { + extraOffset := int32(offset - offsetBase[offsetCode&63]) + w.writeBits(extraOffset, extraOffsetBits) + } + } + if deferEOB { + w.writeCode(leCodes[endBlockMarker]) + } +} + +// huffOffset is a static offset encoder used for huffman only encoding. +// It can be reused since we will not be encoding offset values. +var huffOffset *huffmanEncoder + +func init() { + w := newHuffmanBitWriter(nil) + w.offsetFreq[0] = 1 + huffOffset = newHuffmanEncoder(offsetCodeCount) + huffOffset.generate(w.offsetFreq[:offsetCodeCount], 15) +} + +// writeBlockHuff encodes a block of bytes as either +// Huffman encoded literals or uncompressed bytes if the +// results only gains very little from compression. +func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte, sync bool) { + if w.err != nil { + return + } + + // Clear histogram + for i := range w.literalFreq[:] { + w.literalFreq[i] = 0 + } + if !w.lastHuffMan { + for i := range w.offsetFreq[:] { + w.offsetFreq[i] = 0 + } + } + + // Add everything as literals + // We have to estimate the header size. + // Assume header is around 70 bytes: + // https://stackoverflow.com/a/25454430 + const guessHeaderSizeBits = 70 * 8 + estBits, estExtra := histogramSize(input, w.literalFreq[:], !eof && !sync) + estBits += w.lastHeader + 15 + if w.lastHeader == 0 { + estBits += guessHeaderSizeBits + } + estBits += estBits >> w.logNewTablePenalty + + // Store bytes, if we don't get a reasonable improvement. + ssize, storable := w.storedSize(input) + if storable && ssize < estBits { + w.writeStoredHeader(len(input), eof) + w.writeBytes(input) + return + } + + if w.lastHeader > 0 { + reuseSize := w.literalEncoding.bitLength(w.literalFreq[:256]) + estBits += estExtra + + if estBits < reuseSize { + // We owe an EOB + w.writeCode(w.literalEncoding.codes[endBlockMarker]) + w.lastHeader = 0 + } + } + + const numLiterals = endBlockMarker + 1 + const numOffsets = 1 + if w.lastHeader == 0 { + w.literalFreq[endBlockMarker] = 1 + w.literalEncoding.generate(w.literalFreq[:numLiterals], 15) + + // Generate codegen and codegenFrequencies, which indicates how to encode + // the literalEncoding and the offsetEncoding. + w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) + w.codegenEncoding.generate(w.codegenFreq[:], 7) + numCodegens := w.codegens() + + // Huffman. + w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) + w.lastHuffMan = true + w.lastHeader, _ = w.headerSize() + } + + encoding := w.literalEncoding.codes[:257] + for _, t := range input { + // Bitwriting inlined, ~30% speedup + c := encoding[t] + w.bits |= uint64(c.code) << ((w.nbits) & 63) + w.nbits += c.len + if w.nbits >= 48 { + bits := w.bits + w.bits >>= 48 + w.nbits -= 48 + n := w.nbytes + w.bytes[n] = byte(bits) + w.bytes[n+1] = byte(bits >> 8) + w.bytes[n+2] = byte(bits >> 16) + w.bytes[n+3] = byte(bits >> 24) + w.bytes[n+4] = byte(bits >> 32) + w.bytes[n+5] = byte(bits >> 40) + n += 6 + if n >= bufferFlushSize { + if w.err != nil { + n = 0 + return + } + w.write(w.bytes[:n]) + n = 0 + } + w.nbytes = n + } + } + if eof || sync { + w.writeCode(encoding[endBlockMarker]) + w.lastHeader = 0 + w.lastHuffMan = false + } +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_code.go b/vendor/github.com/klauspost/compress/flate/huffman_code.go new file mode 100644 index 00000000000..4c39a301871 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_code.go @@ -0,0 +1,363 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "math" + "math/bits" +) + +const ( + maxBitsLimit = 16 + // number of valid literals + literalCount = 286 +) + +// hcode is a huffman code with a bit code and bit length. +type hcode struct { + code, len uint16 +} + +type huffmanEncoder struct { + codes []hcode + freqcache []literalNode + bitCount [17]int32 +} + +type literalNode struct { + literal uint16 + freq uint16 +} + +// A levelInfo describes the state of the constructed tree for a given depth. +type levelInfo struct { + // Our level. for better printing + level int32 + + // The frequency of the last node at this level + lastFreq int32 + + // The frequency of the next character to add to this level + nextCharFreq int32 + + // The frequency of the next pair (from level below) to add to this level. + // Only valid if the "needed" value of the next lower level is 0. + nextPairFreq int32 + + // The number of chains remaining to generate for this level before moving + // up to the next level + needed int32 +} + +// set sets the code and length of an hcode. +func (h *hcode) set(code uint16, length uint16) { + h.len = length + h.code = code +} + +func reverseBits(number uint16, bitLength byte) uint16 { + return bits.Reverse16(number << ((16 - bitLength) & 15)) +} + +func maxNode() literalNode { return literalNode{math.MaxUint16, math.MaxUint16} } + +func newHuffmanEncoder(size int) *huffmanEncoder { + // Make capacity to next power of two. + c := uint(bits.Len32(uint32(size - 1))) + return &huffmanEncoder{codes: make([]hcode, size, 1<= 3 +// The cases of 0, 1, and 2 literals are handled by special case code. +// +// list An array of the literals with non-zero frequencies +// and their associated frequencies. The array is in order of increasing +// frequency, and has as its last element a special element with frequency +// MaxInt32 +// maxBits The maximum number of bits that should be used to encode any literal. +// Must be less than 16. +// return An integer array in which array[i] indicates the number of literals +// that should be encoded in i bits. +func (h *huffmanEncoder) bitCounts(list []literalNode, maxBits int32) []int32 { + if maxBits >= maxBitsLimit { + panic("flate: maxBits too large") + } + n := int32(len(list)) + list = list[0 : n+1] + list[n] = maxNode() + + // The tree can't have greater depth than n - 1, no matter what. This + // saves a little bit of work in some small cases + if maxBits > n-1 { + maxBits = n - 1 + } + + // Create information about each of the levels. + // A bogus "Level 0" whose sole purpose is so that + // level1.prev.needed==0. This makes level1.nextPairFreq + // be a legitimate value that never gets chosen. + var levels [maxBitsLimit]levelInfo + // leafCounts[i] counts the number of literals at the left + // of ancestors of the rightmost node at level i. + // leafCounts[i][j] is the number of literals at the left + // of the level j ancestor. + var leafCounts [maxBitsLimit][maxBitsLimit]int32 + + for level := int32(1); level <= maxBits; level++ { + // For every level, the first two items are the first two characters. + // We initialize the levels as if we had already figured this out. + levels[level] = levelInfo{ + level: level, + lastFreq: int32(list[1].freq), + nextCharFreq: int32(list[2].freq), + nextPairFreq: int32(list[0].freq) + int32(list[1].freq), + } + leafCounts[level][level] = 2 + if level == 1 { + levels[level].nextPairFreq = math.MaxInt32 + } + } + + // We need a total of 2*n - 2 items at top level and have already generated 2. + levels[maxBits].needed = 2*n - 4 + + level := maxBits + for { + l := &levels[level] + if l.nextPairFreq == math.MaxInt32 && l.nextCharFreq == math.MaxInt32 { + // We've run out of both leafs and pairs. + // End all calculations for this level. + // To make sure we never come back to this level or any lower level, + // set nextPairFreq impossibly large. + l.needed = 0 + levels[level+1].nextPairFreq = math.MaxInt32 + level++ + continue + } + + prevFreq := l.lastFreq + if l.nextCharFreq < l.nextPairFreq { + // The next item on this row is a leaf node. + n := leafCounts[level][level] + 1 + l.lastFreq = l.nextCharFreq + // Lower leafCounts are the same of the previous node. + leafCounts[level][level] = n + e := list[n] + if e.literal < math.MaxUint16 { + l.nextCharFreq = int32(e.freq) + } else { + l.nextCharFreq = math.MaxInt32 + } + } else { + // The next item on this row is a pair from the previous row. + // nextPairFreq isn't valid until we generate two + // more values in the level below + l.lastFreq = l.nextPairFreq + // Take leaf counts from the lower level, except counts[level] remains the same. + copy(leafCounts[level][:level], leafCounts[level-1][:level]) + levels[l.level-1].needed = 2 + } + + if l.needed--; l.needed == 0 { + // We've done everything we need to do for this level. + // Continue calculating one level up. Fill in nextPairFreq + // of that level with the sum of the two nodes we've just calculated on + // this level. + if l.level == maxBits { + // All done! + break + } + levels[l.level+1].nextPairFreq = prevFreq + l.lastFreq + level++ + } else { + // If we stole from below, move down temporarily to replenish it. + for levels[level-1].needed > 0 { + level-- + } + } + } + + // Somethings is wrong if at the end, the top level is null or hasn't used + // all of the leaves. + if leafCounts[maxBits][maxBits] != n { + panic("leafCounts[maxBits][maxBits] != n") + } + + bitCount := h.bitCount[:maxBits+1] + bits := 1 + counts := &leafCounts[maxBits] + for level := maxBits; level > 0; level-- { + // chain.leafCount gives the number of literals requiring at least "bits" + // bits to encode. + bitCount[bits] = counts[level] - counts[level-1] + bits++ + } + return bitCount +} + +// Look at the leaves and assign them a bit count and an encoding as specified +// in RFC 1951 3.2.2 +func (h *huffmanEncoder) assignEncodingAndSize(bitCount []int32, list []literalNode) { + code := uint16(0) + for n, bits := range bitCount { + code <<= 1 + if n == 0 || bits == 0 { + continue + } + // The literals list[len(list)-bits] .. list[len(list)-bits] + // are encoded using "bits" bits, and get the values + // code, code + 1, .... The code values are + // assigned in literal order (not frequency order). + chunk := list[len(list)-int(bits):] + + sortByLiteral(chunk) + for _, node := range chunk { + h.codes[node.literal] = hcode{code: reverseBits(code, uint8(n)), len: uint16(n)} + code++ + } + list = list[0 : len(list)-int(bits)] + } +} + +// Update this Huffman Code object to be the minimum code for the specified frequency count. +// +// freq An array of frequencies, in which frequency[i] gives the frequency of literal i. +// maxBits The maximum number of bits to use for any literal. +func (h *huffmanEncoder) generate(freq []uint16, maxBits int32) { + if h.freqcache == nil { + // Allocate a reusable buffer with the longest possible frequency table. + // Possible lengths are codegenCodeCount, offsetCodeCount and literalCount. + // The largest of these is literalCount, so we allocate for that case. + h.freqcache = make([]literalNode, literalCount+1) + } + list := h.freqcache[:len(freq)+1] + // Number of non-zero literals + count := 0 + // Set list to be the set of all non-zero literals and their frequencies + for i, f := range freq { + if f != 0 { + list[count] = literalNode{uint16(i), f} + count++ + } else { + list[count] = literalNode{} + h.codes[i].len = 0 + } + } + list[len(freq)] = literalNode{} + + list = list[:count] + if count <= 2 { + // Handle the small cases here, because they are awkward for the general case code. With + // two or fewer literals, everything has bit length 1. + for i, node := range list { + // "list" is in order of increasing literal value. + h.codes[node.literal].set(uint16(i), 1) + } + return + } + sortByFreq(list) + + // Get the number of literals for each bit count + bitCount := h.bitCounts(list, maxBits) + // And do the assignment + h.assignEncodingAndSize(bitCount, list) +} + +func atLeastOne(v float32) float32 { + if v < 1 { + return 1 + } + return v +} + +// histogramSize accumulates a histogram of b in h. +// An estimated size in bits is returned. +// Unassigned values are assigned '1' in the histogram. +// len(h) must be >= 256, and h's elements must be all zeroes. +func histogramSize(b []byte, h []uint16, fill bool) (int, int) { + h = h[:256] + for _, t := range b { + h[t]++ + } + invTotal := 1.0 / float32(len(b)) + shannon := float32(0.0) + var extra float32 + if fill { + oneBits := atLeastOne(-mFastLog2(invTotal)) + for i, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } else { + h[i] = 1 + extra += oneBits + } + } + } else { + for _, v := range h[:] { + if v > 0 { + n := float32(v) + shannon += atLeastOne(-mFastLog2(n*invTotal)) * n + } + } + } + + return int(shannon + 0.99), int(extra + 0.99) +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go new file mode 100644 index 00000000000..20778029900 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByFreq.go @@ -0,0 +1,178 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByFreq(data []literalNode) { + n := len(data) + quickSortByFreq(data, 0, n, maxDepth(n)) +} + +func quickSortByFreq(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivotByFreq(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSortByFreq(data, a, mlo, maxDepth) + a = mhi // i.e., quickSortByFreq(data, mhi, b) + } else { + quickSortByFreq(data, mhi, b, maxDepth) + b = mlo // i.e., quickSortByFreq(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].freq == data[i-6].freq && data[i].literal < data[i-6].literal || data[i].freq < data[i-6].freq { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSortByFreq(data, a, b) + } +} + +// siftDownByFreq implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDownByFreq(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && (data[first+child].freq == data[first+child+1].freq && data[first+child].literal < data[first+child+1].literal || data[first+child].freq < data[first+child+1].freq) { + child++ + } + if data[first+root].freq == data[first+child].freq && data[first+root].literal > data[first+child].literal || data[first+root].freq > data[first+child].freq { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivotByFreq(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThreeSortByFreq(data, lo, lo+s, lo+2*s) + medianOfThreeSortByFreq(data, m, m-s, m+s) + medianOfThreeSortByFreq(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThreeSortByFreq(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { + } + b := a + for { + for ; b < c && (data[pivot].freq == data[b].freq && data[pivot].literal > data[b].literal || data[pivot].freq > data[b].freq); b++ { // data[b] <= pivot + } + for ; b < c && (data[pivot].freq == data[c-1].freq && data[pivot].literal < data[c-1].literal || data[pivot].freq < data[c-1].freq); c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].freq == data[hi-1].freq && data[pivot].literal > data[hi-1].literal || data[pivot].freq > data[hi-1].freq { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].freq == data[pivot].freq && data[m].literal > data[pivot].literal || data[m].freq > data[pivot].freq { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && (data[b-1].freq == data[pivot].freq && data[b-1].literal > data[pivot].literal || data[b-1].freq > data[pivot].freq); b-- { // data[b] == pivot + } + for ; a < b && (data[a].freq == data[pivot].freq && data[a].literal < data[pivot].literal || data[a].freq < data[pivot].freq); a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSortByFreq(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && (data[j].freq == data[j-1].freq && data[j].literal < data[j-1].literal || data[j].freq < data[j-1].freq); j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// quickSortByFreq, loosely following Bentley and McIlroy, +// ``Engineering a Sort Function,'' SP&E November 1993. + +// medianOfThreeSortByFreq moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThreeSortByFreq(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].freq == data[m1].freq && data[m2].literal < data[m1].literal || data[m2].freq < data[m1].freq { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].freq == data[m0].freq && data[m1].literal < data[m0].literal || data[m1].freq < data[m0].freq { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go new file mode 100644 index 00000000000..93f1aea109e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/huffman_sortByLiteral.go @@ -0,0 +1,201 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +// Sort sorts data. +// It makes one call to data.Len to determine n, and O(n*log(n)) calls to +// data.Less and data.Swap. The sort is not guaranteed to be stable. +func sortByLiteral(data []literalNode) { + n := len(data) + quickSort(data, 0, n, maxDepth(n)) +} + +func quickSort(data []literalNode, a, b, maxDepth int) { + for b-a > 12 { // Use ShellSort for slices <= 12 elements + if maxDepth == 0 { + heapSort(data, a, b) + return + } + maxDepth-- + mlo, mhi := doPivot(data, a, b) + // Avoiding recursion on the larger subproblem guarantees + // a stack depth of at most lg(b-a). + if mlo-a < b-mhi { + quickSort(data, a, mlo, maxDepth) + a = mhi // i.e., quickSort(data, mhi, b) + } else { + quickSort(data, mhi, b, maxDepth) + b = mlo // i.e., quickSort(data, a, mlo) + } + } + if b-a > 1 { + // Do ShellSort pass with gap 6 + // It could be written in this simplified form cause b-a <= 12 + for i := a + 6; i < b; i++ { + if data[i].literal < data[i-6].literal { + data[i], data[i-6] = data[i-6], data[i] + } + } + insertionSort(data, a, b) + } +} +func heapSort(data []literalNode, a, b int) { + first := a + lo := 0 + hi := b - a + + // Build heap with greatest element at top. + for i := (hi - 1) / 2; i >= 0; i-- { + siftDown(data, i, hi, first) + } + + // Pop elements, largest first, into end of data. + for i := hi - 1; i >= 0; i-- { + data[first], data[first+i] = data[first+i], data[first] + siftDown(data, lo, i, first) + } +} + +// siftDown implements the heap property on data[lo, hi). +// first is an offset into the array where the root of the heap lies. +func siftDown(data []literalNode, lo, hi, first int) { + root := lo + for { + child := 2*root + 1 + if child >= hi { + break + } + if child+1 < hi && data[first+child].literal < data[first+child+1].literal { + child++ + } + if data[first+root].literal > data[first+child].literal { + return + } + data[first+root], data[first+child] = data[first+child], data[first+root] + root = child + } +} +func doPivot(data []literalNode, lo, hi int) (midlo, midhi int) { + m := int(uint(lo+hi) >> 1) // Written like this to avoid integer overflow. + if hi-lo > 40 { + // Tukey's ``Ninther,'' median of three medians of three. + s := (hi - lo) / 8 + medianOfThree(data, lo, lo+s, lo+2*s) + medianOfThree(data, m, m-s, m+s) + medianOfThree(data, hi-1, hi-1-s, hi-1-2*s) + } + medianOfThree(data, lo, m, hi-1) + + // Invariants are: + // data[lo] = pivot (set up by ChoosePivot) + // data[lo < i < a] < pivot + // data[a <= i < b] <= pivot + // data[b <= i < c] unexamined + // data[c <= i < hi-1] > pivot + // data[hi-1] >= pivot + pivot := lo + a, c := lo+1, hi-1 + + for ; a < c && data[a].literal < data[pivot].literal; a++ { + } + b := a + for { + for ; b < c && data[pivot].literal > data[b].literal; b++ { // data[b] <= pivot + } + for ; b < c && data[pivot].literal < data[c-1].literal; c-- { // data[c-1] > pivot + } + if b >= c { + break + } + // data[b] > pivot; data[c-1] <= pivot + data[b], data[c-1] = data[c-1], data[b] + b++ + c-- + } + // If hi-c<3 then there are duplicates (by property of median of nine). + // Let's be a bit more conservative, and set border to 5. + protect := hi-c < 5 + if !protect && hi-c < (hi-lo)/4 { + // Lets test some points for equality to pivot + dups := 0 + if data[pivot].literal > data[hi-1].literal { // data[hi-1] = pivot + data[c], data[hi-1] = data[hi-1], data[c] + c++ + dups++ + } + if data[b-1].literal > data[pivot].literal { // data[b-1] = pivot + b-- + dups++ + } + // m-lo = (hi-lo)/2 > 6 + // b-lo > (hi-lo)*3/4-1 > 8 + // ==> m < b ==> data[m] <= pivot + if data[m].literal > data[pivot].literal { // data[m] = pivot + data[m], data[b-1] = data[b-1], data[m] + b-- + dups++ + } + // if at least 2 points are equal to pivot, assume skewed distribution + protect = dups > 1 + } + if protect { + // Protect against a lot of duplicates + // Add invariant: + // data[a <= i < b] unexamined + // data[b <= i < c] = pivot + for { + for ; a < b && data[b-1].literal > data[pivot].literal; b-- { // data[b] == pivot + } + for ; a < b && data[a].literal < data[pivot].literal; a++ { // data[a] < pivot + } + if a >= b { + break + } + // data[a] == pivot; data[b-1] < pivot + data[a], data[b-1] = data[b-1], data[a] + a++ + b-- + } + } + // Swap pivot into middle + data[pivot], data[b-1] = data[b-1], data[pivot] + return b - 1, c +} + +// Insertion sort +func insertionSort(data []literalNode, a, b int) { + for i := a + 1; i < b; i++ { + for j := i; j > a && data[j].literal < data[j-1].literal; j-- { + data[j], data[j-1] = data[j-1], data[j] + } + } +} + +// maxDepth returns a threshold at which quicksort should switch +// to heapsort. It returns 2*ceil(lg(n+1)). +func maxDepth(n int) int { + var depth int + for i := n; i > 0; i >>= 1 { + depth++ + } + return depth * 2 +} + +// medianOfThree moves the median of the three values data[m0], data[m1], data[m2] into data[m1]. +func medianOfThree(data []literalNode, m1, m0, m2 int) { + // sort 3 elements + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + // data[m0] <= data[m1] + if data[m2].literal < data[m1].literal { + data[m2], data[m1] = data[m1], data[m2] + // data[m0] <= data[m2] && data[m1] < data[m2] + if data[m1].literal < data[m0].literal { + data[m1], data[m0] = data[m0], data[m1] + } + } + // now data[m0] <= data[m1] <= data[m2] +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go new file mode 100644 index 00000000000..3e4259f157b --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate.go @@ -0,0 +1,1001 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package flate implements the DEFLATE compressed data format, described in +// RFC 1951. The gzip and zlib packages implement access to DEFLATE-based file +// formats. +package flate + +import ( + "bufio" + "fmt" + "io" + "math/bits" + "strconv" + "sync" +) + +const ( + maxCodeLen = 16 // max length of Huffman code + maxCodeLenMask = 15 // mask for max length of Huffman code + // The next three numbers come from the RFC section 3.2.7, with the + // additional proviso in section 3.2.5 which implies that distance codes + // 30 and 31 should never occur in compressed data. + maxNumLit = 286 + maxNumDist = 30 + numCodes = 19 // number of codes in Huffman meta-code + + debugDecode = false +) + +// Initialize the fixedHuffmanDecoder only once upon first use. +var fixedOnce sync.Once +var fixedHuffmanDecoder huffmanDecoder + +// A CorruptInputError reports the presence of corrupt input at a given offset. +type CorruptInputError int64 + +func (e CorruptInputError) Error() string { + return "flate: corrupt input before offset " + strconv.FormatInt(int64(e), 10) +} + +// An InternalError reports an error in the flate code itself. +type InternalError string + +func (e InternalError) Error() string { return "flate: internal error: " + string(e) } + +// A ReadError reports an error encountered while reading input. +// +// Deprecated: No longer returned. +type ReadError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Read +} + +func (e *ReadError) Error() string { + return "flate: read error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// A WriteError reports an error encountered while writing output. +// +// Deprecated: No longer returned. +type WriteError struct { + Offset int64 // byte offset where error occurred + Err error // error returned by underlying Write +} + +func (e *WriteError) Error() string { + return "flate: write error at offset " + strconv.FormatInt(e.Offset, 10) + ": " + e.Err.Error() +} + +// Resetter resets a ReadCloser returned by NewReader or NewReaderDict to +// to switch to a new underlying Reader. This permits reusing a ReadCloser +// instead of allocating a new one. +type Resetter interface { + // Reset discards any buffered data and resets the Resetter as if it was + // newly initialized with the given reader. + Reset(r io.Reader, dict []byte) error +} + +// The data structure for decoding Huffman tables is based on that of +// zlib. There is a lookup table of a fixed bit width (huffmanChunkBits), +// For codes smaller than the table width, there are multiple entries +// (each combination of trailing bits has the same value). For codes +// larger than the table width, the table contains a link to an overflow +// table. The width of each entry in the link table is the maximum code +// size minus the chunk width. +// +// Note that you can do a lookup in the table even without all bits +// filled. Since the extra bits are zero, and the DEFLATE Huffman codes +// have the property that shorter codes come before longer ones, the +// bit length estimate in the result is a lower bound on the actual +// number of bits. +// +// See the following: +// http://www.gzip.org/algorithm.txt + +// chunk & 15 is number of bits +// chunk >> 4 is value, including table link + +const ( + huffmanChunkBits = 9 + huffmanNumChunks = 1 << huffmanChunkBits + huffmanCountMask = 15 + huffmanValueShift = 4 +) + +type huffmanDecoder struct { + maxRead int // the maximum number of bits we can read and not overread + chunks *[huffmanNumChunks]uint16 // chunks as described above + links [][]uint16 // overflow links + linkMask uint32 // mask the width of the link table +} + +// Initialize Huffman decoding tables from array of code lengths. +// Following this function, h is guaranteed to be initialized into a complete +// tree (i.e., neither over-subscribed nor under-subscribed). The exception is a +// degenerate case where the tree has only a single symbol with length 1. Empty +// trees are permitted. +func (h *huffmanDecoder) init(lengths []int) bool { + // Sanity enables additional runtime tests during Huffman + // table construction. It's intended to be used during + // development to supplement the currently ad-hoc unit tests. + const sanity = false + + if h.chunks == nil { + h.chunks = &[huffmanNumChunks]uint16{} + } + if h.maxRead != 0 { + *h = huffmanDecoder{chunks: h.chunks, links: h.links} + } + + // Count number of codes of each length, + // compute maxRead and max length. + var count [maxCodeLen]int + var min, max int + for _, n := range lengths { + if n == 0 { + continue + } + if min == 0 || n < min { + min = n + } + if n > max { + max = n + } + count[n&maxCodeLenMask]++ + } + + // Empty tree. The decompressor.huffSym function will fail later if the tree + // is used. Technically, an empty tree is only valid for the HDIST tree and + // not the HCLEN and HLIT tree. However, a stream with an empty HCLEN tree + // is guaranteed to fail since it will attempt to use the tree to decode the + // codes for the HLIT and HDIST trees. Similarly, an empty HLIT tree is + // guaranteed to fail later since the compressed data section must be + // composed of at least one symbol (the end-of-block marker). + if max == 0 { + return true + } + + code := 0 + var nextcode [maxCodeLen]int + for i := min; i <= max; i++ { + code <<= 1 + nextcode[i&maxCodeLenMask] = code + code += count[i&maxCodeLenMask] + } + + // Check that the coding is complete (i.e., that we've + // assigned all 2-to-the-max possible bit sequences). + // Exception: To be compatible with zlib, we also need to + // accept degenerate single-code codings. See also + // TestDegenerateHuffmanCoding. + if code != 1< huffmanChunkBits { + numLinks := 1 << (uint(max) - huffmanChunkBits) + h.linkMask = uint32(numLinks - 1) + + // create link tables + link := nextcode[huffmanChunkBits+1] >> 1 + if cap(h.links) < huffmanNumChunks-link { + h.links = make([][]uint16, huffmanNumChunks-link) + } else { + h.links = h.links[:huffmanNumChunks-link] + } + for j := uint(link); j < huffmanNumChunks; j++ { + reverse := int(bits.Reverse16(uint16(j))) + reverse >>= uint(16 - huffmanChunkBits) + off := j - uint(link) + if sanity && h.chunks[reverse] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[reverse] = uint16(off<>= uint(16 - n) + if n <= huffmanChunkBits { + for off := reverse; off < len(h.chunks); off += 1 << uint(n) { + // We should never need to overwrite + // an existing chunk. Also, 0 is + // never a valid chunk, because the + // lower 4 "count" bits should be + // between 1 and 15. + if sanity && h.chunks[off] != 0 { + panic("impossible: overwriting existing chunk") + } + h.chunks[off] = chunk + } + } else { + j := reverse & (huffmanNumChunks - 1) + if sanity && h.chunks[j]&huffmanCountMask != huffmanChunkBits+1 { + // Longer codes should have been + // associated with a link table above. + panic("impossible: not an indirect chunk") + } + value := h.chunks[j] >> huffmanValueShift + linktab := h.links[value] + reverse >>= huffmanChunkBits + for off := reverse; off < len(linktab); off += 1 << uint(n-huffmanChunkBits) { + if sanity && linktab[off] != 0 { + panic("impossible: overwriting existing chunk") + } + linktab[off] = chunk + } + } + } + + if sanity { + // Above we've sanity checked that we never overwrote + // an existing entry. Here we additionally check that + // we filled the tables completely. + for i, chunk := range h.chunks { + if chunk == 0 { + // As an exception, in the degenerate + // single-code case, we allow odd + // chunks to be missing. + if code == 1 && i%2 == 1 { + continue + } + panic("impossible: missing chunk") + } + } + for _, linktab := range h.links { + for _, chunk := range linktab { + if chunk == 0 { + panic("impossible: missing chunk") + } + } + } + } + + return true +} + +// The actual read interface needed by NewReader. +// If the passed in io.Reader does not also have ReadByte, +// the NewReader will introduce its own buffering. +type Reader interface { + io.Reader + io.ByteReader +} + +// Decompress state. +type decompressor struct { + // Input source. + r Reader + roffset int64 + + // Huffman decoders for literal/length, distance. + h1, h2 huffmanDecoder + + // Length arrays used to define Huffman codes. + bits *[maxNumLit + maxNumDist]int + codebits *[numCodes]int + + // Output history, buffer. + dict dictDecoder + + // Next step in the decompression, + // and decompression state. + step func(*decompressor) + stepState int + err error + toRead []byte + hl, hd *huffmanDecoder + copyLen int + copyDist int + + // Temporary buffer (avoids repeated allocation). + buf [4]byte + + // Input bits, in top of b. + b uint32 + + nb uint + final bool +} + +func (f *decompressor) nextBlock() { + for f.nb < 1+2 { + if f.err = f.moreBits(); f.err != nil { + return + } + } + f.final = f.b&1 == 1 + f.b >>= 1 + typ := f.b & 3 + f.b >>= 2 + f.nb -= 1 + 2 + switch typ { + case 0: + f.dataBlock() + case 1: + // compressed, fixed Huffman tables + f.hl = &fixedHuffmanDecoder + f.hd = nil + f.huffmanBlockDecoder()() + case 2: + // compressed, dynamic Huffman tables + if f.err = f.readHuffman(); f.err != nil { + break + } + f.hl = &f.h1 + f.hd = &f.h2 + f.huffmanBlockDecoder()() + default: + // 3 is reserved. + if debugDecode { + fmt.Println("reserved data block encountered") + } + f.err = CorruptInputError(f.roffset) + } +} + +func (f *decompressor) Read(b []byte) (int, error) { + for { + if len(f.toRead) > 0 { + n := copy(b, f.toRead) + f.toRead = f.toRead[n:] + if len(f.toRead) == 0 { + return n, f.err + } + return n, nil + } + if f.err != nil { + return 0, f.err + } + f.step(f) + if f.err != nil && len(f.toRead) == 0 { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + } + } +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (f *decompressor) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + flushed := false + for { + if len(f.toRead) > 0 { + n, err := w.Write(f.toRead) + total += int64(n) + if err != nil { + f.err = err + return total, err + } + if n != len(f.toRead) { + return total, io.ErrShortWrite + } + f.toRead = f.toRead[:0] + } + if f.err != nil && flushed { + if f.err == io.EOF { + return total, nil + } + return total, f.err + } + if f.err == nil { + f.step(f) + } + if len(f.toRead) == 0 && f.err != nil && !flushed { + f.toRead = f.dict.readFlush() // Flush what's left in case of error + flushed = true + } + } +} + +func (f *decompressor) Close() error { + if f.err == io.EOF { + return nil + } + return f.err +} + +// RFC 1951 section 3.2.7. +// Compression with dynamic Huffman codes + +var codeOrder = [...]int{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} + +func (f *decompressor) readHuffman() error { + // HLIT[5], HDIST[5], HCLEN[4]. + for f.nb < 5+5+4 { + if err := f.moreBits(); err != nil { + return err + } + } + nlit := int(f.b&0x1F) + 257 + if nlit > maxNumLit { + if debugDecode { + fmt.Println("nlit > maxNumLit", nlit) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + ndist := int(f.b&0x1F) + 1 + if ndist > maxNumDist { + if debugDecode { + fmt.Println("ndist > maxNumDist", ndist) + } + return CorruptInputError(f.roffset) + } + f.b >>= 5 + nclen := int(f.b&0xF) + 4 + // numCodes is 19, so nclen is always valid. + f.b >>= 4 + f.nb -= 5 + 5 + 4 + + // (HCLEN+4)*3 bits: code lengths in the magic codeOrder order. + for i := 0; i < nclen; i++ { + for f.nb < 3 { + if err := f.moreBits(); err != nil { + return err + } + } + f.codebits[codeOrder[i]] = int(f.b & 0x7) + f.b >>= 3 + f.nb -= 3 + } + for i := nclen; i < len(codeOrder); i++ { + f.codebits[codeOrder[i]] = 0 + } + if !f.h1.init(f.codebits[0:]) { + if debugDecode { + fmt.Println("init codebits failed") + } + return CorruptInputError(f.roffset) + } + + // HLIT + 257 code lengths, HDIST + 1 code lengths, + // using the code length Huffman code. + for i, n := 0, nlit+ndist; i < n; { + x, err := f.huffSym(&f.h1) + if err != nil { + return err + } + if x < 16 { + // Actual length. + f.bits[i] = x + i++ + continue + } + // Repeat previous length or zero. + var rep int + var nb uint + var b int + switch x { + default: + return InternalError("unexpected length code") + case 16: + rep = 3 + nb = 2 + if i == 0 { + if debugDecode { + fmt.Println("i==0") + } + return CorruptInputError(f.roffset) + } + b = f.bits[i-1] + case 17: + rep = 3 + nb = 3 + b = 0 + case 18: + rep = 11 + nb = 7 + b = 0 + } + for f.nb < nb { + if err := f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits:", err) + } + return err + } + } + rep += int(f.b & uint32(1<>= nb + f.nb -= nb + if i+rep > n { + if debugDecode { + fmt.Println("i+rep > n", i, rep, n) + } + return CorruptInputError(f.roffset) + } + for j := 0; j < rep; j++ { + f.bits[i] = b + i++ + } + } + + if !f.h1.init(f.bits[0:nlit]) || !f.h2.init(f.bits[nlit:nlit+ndist]) { + if debugDecode { + fmt.Println("init2 failed") + } + return CorruptInputError(f.roffset) + } + + // As an optimization, we can initialize the maxRead bits to read at a time + // for the HLIT tree to the length of the EOB marker since we know that + // every block must terminate with one. This preserves the property that + // we never read any extra bytes after the end of the DEFLATE stream. + if f.h1.maxRead < f.bits[endBlockMarker] { + f.h1.maxRead = f.bits[endBlockMarker] + } + if !f.final { + // If not the final block, the smallest block possible is + // a predefined table, BTYPE=01, with a single EOB marker. + // This will take up 3 + 7 bits. + f.h1.maxRead += 10 + } + + return nil +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBlockGeneric() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlockGeneric + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = f.moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBlockGeneric // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Copy a single uncompressed data block from input to output. +func (f *decompressor) dataBlock() { + // Uncompressed. + // Discard current half-byte. + left := (f.nb) & 7 + f.nb -= left + f.b >>= left + + offBytes := f.nb >> 3 + // Unfilled values will be overwritten. + f.buf[0] = uint8(f.b) + f.buf[1] = uint8(f.b >> 8) + f.buf[2] = uint8(f.b >> 16) + f.buf[3] = uint8(f.b >> 24) + + f.roffset += int64(offBytes) + f.nb, f.b = 0, 0 + + // Length then ones-complement of length. + nr, err := io.ReadFull(f.r, f.buf[offBytes:4]) + f.roffset += int64(nr) + if err != nil { + f.err = noEOF(err) + return + } + n := uint16(f.buf[0]) | uint16(f.buf[1])<<8 + nn := uint16(f.buf[2]) | uint16(f.buf[3])<<8 + if nn != ^n { + if debugDecode { + ncomp := ^n + fmt.Println("uint16(nn) != uint16(^n)", nn, ncomp) + } + f.err = CorruptInputError(f.roffset) + return + } + + if n == 0 { + f.toRead = f.dict.readFlush() + f.finishBlock() + return + } + + f.copyLen = int(n) + f.copyData() +} + +// copyData copies f.copyLen bytes from the underlying reader into f.hist. +// It pauses for reads when f.hist is full. +func (f *decompressor) copyData() { + buf := f.dict.writeSlice() + if len(buf) > f.copyLen { + buf = buf[:f.copyLen] + } + + cnt, err := io.ReadFull(f.r, buf) + f.roffset += int64(cnt) + f.copyLen -= cnt + f.dict.writeMark(cnt) + if err != nil { + f.err = noEOF(err) + return + } + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).copyData + return + } + f.finishBlock() +} + +func (f *decompressor) finishBlock() { + if f.final { + if f.dict.availRead() > 0 { + f.toRead = f.dict.readFlush() + } + f.err = io.EOF + } + f.step = (*decompressor).nextBlock +} + +// noEOF returns err, unless err == io.EOF, in which case it returns io.ErrUnexpectedEOF. +func noEOF(e error) error { + if e == io.EOF { + return io.ErrUnexpectedEOF + } + return e +} + +func (f *decompressor) moreBits() error { + c, err := f.r.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil +} + +// Read the next Huffman-encoded symbol from f according to h. +func (f *decompressor) huffSym(h *huffmanDecoder) (int, error) { + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(h.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := f.r.ReadByte() + if err != nil { + f.b = b + f.nb = nb + return 0, noEOF(err) + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := h.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = h.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&h.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return 0, f.err + } + f.b = b >> (n & 31) + f.nb = nb - n + return int(chunk >> huffmanValueShift), nil + } + } +} + +func makeReader(r io.Reader) Reader { + if rr, ok := r.(Reader); ok { + return rr + } + return bufio.NewReader(r) +} + +func fixedHuffmanDecoderInit() { + fixedOnce.Do(func() { + // These come from the RFC section 3.2.6. + var bits [288]int + for i := 0; i < 144; i++ { + bits[i] = 8 + } + for i := 144; i < 256; i++ { + bits[i] = 9 + } + for i := 256; i < 280; i++ { + bits[i] = 7 + } + for i := 280; i < 288; i++ { + bits[i] = 8 + } + fixedHuffmanDecoder.init(bits[:]) + }) +} + +func (f *decompressor) Reset(r io.Reader, dict []byte) error { + *f = decompressor{ + r: makeReader(r), + bits: f.bits, + codebits: f.codebits, + h1: f.h1, + h2: f.h2, + dict: f.dict, + step: (*decompressor).nextBlock, + } + f.dict.init(maxMatchOffset, dict) + return nil +} + +// NewReader returns a new ReadCloser that can be used +// to read the uncompressed version of r. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// It is the caller's responsibility to call Close on the ReadCloser +// when finished reading. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReader(r io.Reader) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, nil) + return &f +} + +// NewReaderDict is like NewReader but initializes the reader +// with a preset dictionary. The returned Reader behaves as if +// the uncompressed data stream started with the given dictionary, +// which has already been read. NewReaderDict is typically used +// to read data compressed by NewWriterDict. +// +// The ReadCloser returned by NewReader also implements Resetter. +func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser { + fixedHuffmanDecoderInit() + + var f decompressor + f.r = makeReader(r) + f.bits = new([maxNumLit + maxNumDist]int) + f.codebits = new([numCodes]int) + f.step = (*decompressor).nextBlock + f.dict.init(maxMatchOffset, dict) + return &f +} diff --git a/vendor/github.com/klauspost/compress/flate/inflate_gen.go b/vendor/github.com/klauspost/compress/flate/inflate_gen.go new file mode 100644 index 00000000000..397dc1b1a13 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/inflate_gen.go @@ -0,0 +1,922 @@ +// Code generated by go generate gen_inflate.go. DO NOT EDIT. + +package flate + +import ( + "bufio" + "bytes" + "fmt" + "math/bits" + "strings" +) + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesBuffer() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Buffer) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesBuffer // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBytesReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bytes.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBytesReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanBufioReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*bufio.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanBufioReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +// Decode a single Huffman block from f. +// hl and hd are the Huffman states for the lit/length values +// and the distance values, respectively. If hd == nil, using the +// fixed distance encoding associated with fixed Huffman blocks. +func (f *decompressor) huffmanStringsReader() { + const ( + stateInit = iota // Zero value must be stateInit + stateDict + ) + fr := f.r.(*strings.Reader) + moreBits := func() error { + c, err := fr.ReadByte() + if err != nil { + return noEOF(err) + } + f.roffset++ + f.b |= uint32(c) << f.nb + f.nb += 8 + return nil + } + + switch f.stepState { + case stateInit: + goto readLiteral + case stateDict: + goto copyHistory + } + +readLiteral: + // Read literal and/or (length, distance) according to RFC section 3.2.3. + { + var v int + { + // Inlined v, err := f.huffSym(f.hl) + // Since a huffmanDecoder can be empty or be composed of a degenerate tree + // with single element, huffSym must error on these two edge cases. In both + // cases, the chunks slice will be 0 for the invalid sequence, leading it + // satisfy the n == 0 check below. + n := uint(f.hl.maxRead) + // Optimization. Compiler isn't smart enough to keep f.b,f.nb in registers, + // but is smart enough to keep local variables in registers, so use nb and b, + // inline call to moreBits and reassign b,nb back to f on return. + nb, b := f.nb, f.b + for { + for nb < n { + c, err := fr.ReadByte() + if err != nil { + f.b = b + f.nb = nb + f.err = noEOF(err) + return + } + f.roffset++ + b |= uint32(c) << (nb & 31) + nb += 8 + } + chunk := f.hl.chunks[b&(huffmanNumChunks-1)] + n = uint(chunk & huffmanCountMask) + if n > huffmanChunkBits { + chunk = f.hl.links[chunk>>huffmanValueShift][(b>>huffmanChunkBits)&f.hl.linkMask] + n = uint(chunk & huffmanCountMask) + } + if n <= nb { + if n == 0 { + f.b = b + f.nb = nb + if debugDecode { + fmt.Println("huffsym: n==0") + } + f.err = CorruptInputError(f.roffset) + return + } + f.b = b >> (n & 31) + f.nb = nb - n + v = int(chunk >> huffmanValueShift) + break + } + } + } + + var n uint // number of bits extra + var length int + var err error + switch { + case v < 256: + f.dict.writeByte(byte(v)) + if f.dict.availWrite() == 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader + f.stepState = stateInit + return + } + goto readLiteral + case v == 256: + f.finishBlock() + return + // otherwise, reference to older data + case v < 265: + length = v - (257 - 3) + n = 0 + case v < 269: + length = v*2 - (265*2 - 11) + n = 1 + case v < 273: + length = v*4 - (269*4 - 19) + n = 2 + case v < 277: + length = v*8 - (273*8 - 35) + n = 3 + case v < 281: + length = v*16 - (277*16 - 67) + n = 4 + case v < 285: + length = v*32 - (281*32 - 131) + n = 5 + case v < maxNumLit: + length = 258 + n = 0 + default: + if debugDecode { + fmt.Println(v, ">= maxNumLit") + } + f.err = CorruptInputError(f.roffset) + return + } + if n > 0 { + for f.nb < n { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits n>0:", err) + } + f.err = err + return + } + } + length += int(f.b & uint32(1<>= n + f.nb -= n + } + + var dist int + if f.hd == nil { + for f.nb < 5 { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb<5:", err) + } + f.err = err + return + } + } + dist = int(bits.Reverse8(uint8(f.b & 0x1F << 3))) + f.b >>= 5 + f.nb -= 5 + } else { + if dist, err = f.huffSym(f.hd); err != nil { + if debugDecode { + fmt.Println("huffsym:", err) + } + f.err = err + return + } + } + + switch { + case dist < 4: + dist++ + case dist < maxNumDist: + nb := uint(dist-2) >> 1 + // have 1 bit in bottom of dist, need nb more. + extra := (dist & 1) << nb + for f.nb < nb { + if err = moreBits(); err != nil { + if debugDecode { + fmt.Println("morebits f.nb>= nb + f.nb -= nb + dist = 1<<(nb+1) + 1 + extra + default: + if debugDecode { + fmt.Println("dist too big:", dist, maxNumDist) + } + f.err = CorruptInputError(f.roffset) + return + } + + // No check on length; encoding can be prescient. + if dist > f.dict.histSize() { + if debugDecode { + fmt.Println("dist > f.dict.histSize():", dist, f.dict.histSize()) + } + f.err = CorruptInputError(f.roffset) + return + } + + f.copyLen, f.copyDist = length, dist + goto copyHistory + } + +copyHistory: + // Perform a backwards copy according to RFC section 3.2.3. + { + cnt := f.dict.tryWriteCopy(f.copyDist, f.copyLen) + if cnt == 0 { + cnt = f.dict.writeCopy(f.copyDist, f.copyLen) + } + f.copyLen -= cnt + + if f.dict.availWrite() == 0 || f.copyLen > 0 { + f.toRead = f.dict.readFlush() + f.step = (*decompressor).huffmanStringsReader // We need to continue this work + f.stepState = stateDict + return + } + goto readLiteral + } +} + +func (f *decompressor) huffmanBlockDecoder() func() { + switch f.r.(type) { + case *bytes.Buffer: + return f.huffmanBytesBuffer + case *bytes.Reader: + return f.huffmanBytesReader + case *bufio.Reader: + return f.huffmanBufioReader + case *strings.Reader: + return f.huffmanStringsReader + default: + return f.huffmanBlockGeneric + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level1.go b/vendor/github.com/klauspost/compress/flate/level1.go new file mode 100644 index 00000000000..1e5eea3968a --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level1.go @@ -0,0 +1,179 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL1 struct { + fastGen + table [tableSize]tableEntry +} + +// EncodeL1 uses a similar algorithm to level 1 +func (e *fastEncL1) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + candidate = e.table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash(uint32(now)) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + // Save the match found + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash(cv)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hash(uint32(x)) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level2.go b/vendor/github.com/klauspost/compress/flate/level2.go new file mode 100644 index 00000000000..5b986a1944e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level2.go @@ -0,0 +1,205 @@ +package flate + +import "fmt" + +// fastGen maintains the table for matches, +// and the previous byte block for level 2. +// This is the generic implementation. +type fastEncL2 struct { + fastGen + table [bTableSize]tableEntry +} + +// EncodeL2 uses a similar algorithm to level 1, but is capable +// of matching across blocks giving better compression at a small slowdown. +func (e *fastEncL2) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + // When should we start skipping if we haven't found matches in a long while. + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hash4u(cv, bTableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidate = e.table[nextHash] + now := load6432(src, nextS) + e.table[nextHash] = tableEntry{offset: s + e.cur} + nextHash = hash4u(uint32(now), bTableBits) + + offset := s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + e.table[nextHash] = tableEntry{offset: nextS + e.cur} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = e.table[nextHash] + now >>= 8 + e.table[nextHash] = tableEntry{offset: s + e.cur} + + offset = s - (candidate.offset - e.cur) + if offset < maxMatchOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + cv = uint32(now) + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+l+4) < len(src) { + cv := load3232(src, s) + e.table[hash4u(cv, bTableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every second hash in-between, but offset by 1. + for i := s - l + 2; i < s-5; i += 7 { + x := load6432(src, int32(i)) + nextHash := hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 2} + // Skip one + x >>= 16 + nextHash = hash4u(uint32(x), bTableBits) + e.table[nextHash] = tableEntry{offset: e.cur + i + 4} + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 to s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6432(src, s-2) + o := e.cur + s - 2 + prevHash := hash4u(uint32(x), bTableBits) + prevHash2 := hash4u(uint32(x>>8), bTableBits) + e.table[prevHash] = tableEntry{offset: o} + e.table[prevHash2] = tableEntry{offset: o + 1} + currHash := hash4u(uint32(x>>16), bTableBits) + candidate = e.table[currHash] + e.table[currHash] = tableEntry{offset: o + 2} + + offset := s - (candidate.offset - e.cur) + if offset > maxMatchOffset || uint32(x>>16) != load3232(src, candidate.offset-e.cur) { + cv = uint32(x >> 24) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level3.go b/vendor/github.com/klauspost/compress/flate/level3.go new file mode 100644 index 00000000000..c22b4244a5c --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level3.go @@ -0,0 +1,229 @@ +package flate + +import "fmt" + +// fastEncL3 +type fastEncL3 struct { + fastGen + table [tableSize]tableEntryPrev +} + +// Encode uses a similar algorithm to level 2, will check up to two candidates. +func (e *fastEncL3) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 8 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + } + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + e.table[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // Skip if too small. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3232(src, s) + for { + const skipLog = 6 + nextS := s + var candidate tableEntry + for { + nextHash := hash(cv) + s = nextS + nextS = s + 1 + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + candidates := e.table[nextHash] + now := load3232(src, nextS) + + // Safe offset distance until s + 4... + minOffset := e.cur + s - (maxMatchOffset - 4) + e.table[nextHash] = tableEntryPrev{Prev: candidates.Cur, Cur: tableEntry{offset: s + e.cur}} + + // Check both candidates + candidate = candidates.Cur + if candidate.offset < minOffset { + cv = now + // Previous will also be invalid, we have nothing. + continue + } + + if cv == load3232(src, candidate.offset-e.cur) { + if candidates.Prev.offset < minOffset || cv != load3232(src, candidates.Prev.offset-e.cur) { + break + } + // Both match and are valid, pick longest. + offset := s - (candidate.offset - e.cur) + o2 := s - (candidates.Prev.offset - e.cur) + l1, l2 := matchLen(src[s+4:], src[s-offset+4:]), matchLen(src[s+4:], src[s-o2+4:]) + if l2 > l1 { + candidate = candidates.Prev + } + break + } else { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + break + } + } + cv = now + } + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + // + t := candidate.offset - e.cur + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + t += l + // Index first pair after match end. + if int(t+4) < len(src) && t > 0 { + cv := load3232(src, t) + nextHash := hash(cv) + e.table[nextHash] = tableEntryPrev{ + Prev: e.table[nextHash].Cur, + Cur: tableEntry{offset: e.cur + t}, + } + } + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-3 to s. + x := load6432(src, s-3) + prevHash := hash(uint32(x)) + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 3}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 2}, + } + x >>= 8 + prevHash = hash(uint32(x)) + + e.table[prevHash] = tableEntryPrev{ + Prev: e.table[prevHash].Cur, + Cur: tableEntry{offset: e.cur + s - 1}, + } + x >>= 8 + currHash := hash(uint32(x)) + candidates := e.table[currHash] + cv = uint32(x) + e.table[currHash] = tableEntryPrev{ + Prev: candidates.Cur, + Cur: tableEntry{offset: s + e.cur}, + } + + // Check both candidates + candidate = candidates.Cur + minOffset := e.cur + s - (maxMatchOffset - 4) + + if candidate.offset > minOffset && cv != load3232(src, candidate.offset-e.cur) { + // We only check if value mismatches. + // Offset will always be invalid in other cases. + candidate = candidates.Prev + if candidate.offset > minOffset && cv == load3232(src, candidate.offset-e.cur) { + offset := s - (candidate.offset - e.cur) + if offset <= maxMatchOffset { + continue + } + } + } + cv = uint32(x >> 8) + s++ + break + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level4.go b/vendor/github.com/klauspost/compress/flate/level4.go new file mode 100644 index 00000000000..e62f0c02b1e --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level4.go @@ -0,0 +1,212 @@ +package flate + +import "fmt" + +type fastEncL4 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntry +} + +func (e *fastEncL4) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntry{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.bTable[i].offset = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + e.bTable[nextHashL] = entry + + t = lCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.offset-e.cur) { + // We got a long match. Use that. + break + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + lCandidate = e.bTable[hash7(next, tableBits)] + + // If the next long is a candidate, check if we should use that instead... + lOff := nextS - (lCandidate.offset - e.cur) + if lOff < maxMatchOffset && load3232(src, lCandidate.offset-e.cur) == uint32(next) { + l1, l2 := matchLen(src[s+4:], src[t+4:]), matchLen(src[nextS+4:], src[nextS-lOff+4:]) + if l2 > l1 { + s = nextS + t = lCandidate.offset - e.cur + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + l := e.matchlenLong(s+4, t+4, src) + 4 + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if debugDeflate { + if t >= s { + panic("s-t") + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index first pair after match end. + if int(s+8) < len(src) { + cv := load6432(src, s) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: s + e.cur} + e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur} + } + goto emitRemainder + } + + // Store every 3rd hash in-between + if true { + i := nextS + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + + i += 3 + for ; i < s-1; i += 3 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + e.bTable[hash7(cv, tableBits)] = t + e.bTable[hash7(cv>>8, tableBits)] = t2 + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + e.bTable[prevHashL] = tableEntry{offset: o} + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level5.go b/vendor/github.com/klauspost/compress/flate/level5.go new file mode 100644 index 00000000000..d513f1ffd37 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level5.go @@ -0,0 +1,279 @@ +package flate + +import "fmt" + +type fastEncL5 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL5) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + for { + const skipLog = 6 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + lCandidate = e.bTable[nextHashL] + // Store the next match + + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // If the next long is a candidate, use that... + t2 := lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if debugDeflate { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + goto emitRemainder + } + + // Store every 3rd hash in-between. + if true { + const hashEvery = 3 + i := s - l + 1 + if i < s-1 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + e.table[hash4x64(cv, tableBits)] = t + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // Do an long at i+1 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + eLong = &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + + // We only have enough bits for a short entry at i+2 + cv >>= 8 + t = tableEntry{offset: t.offset + 1} + e.table[hash4x64(cv, tableBits)] = t + + // Skip one - otherwise we risk hitting 's' + i += 4 + for ; i < s-1; i += hashEvery { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = t, eLong.Cur + e.table[hash4u(uint32(cv>>8), tableBits)] = t2 + } + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + x := load6432(src, s-1) + o := e.cur + s - 1 + prevHashS := hash4x64(x, tableBits) + prevHashL := hash7(x, tableBits) + e.table[prevHashS] = tableEntry{offset: o} + eLong := &e.bTable[prevHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur + cv = x >> 8 + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/level6.go b/vendor/github.com/klauspost/compress/flate/level6.go new file mode 100644 index 00000000000..a52c80ea456 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/level6.go @@ -0,0 +1,282 @@ +package flate + +import "fmt" + +type fastEncL6 struct { + fastGen + table [tableSize]tableEntry + bTable [tableSize]tableEntryPrev +} + +func (e *fastEncL6) Encode(dst *tokens, src []byte) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debugDeflate && e.cur < 0 { + panic(fmt.Sprint("e.cur < 0: ", e.cur)) + } + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.bTable[:] { + e.bTable[i] = tableEntryPrev{} + } + e.cur = maxMatchOffset + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - maxMatchOffset + for i := range e.table[:] { + v := e.table[i].offset + if v <= minOff { + v = 0 + } else { + v = v - e.cur + maxMatchOffset + } + e.table[i].offset = v + } + for i := range e.bTable[:] { + v := e.bTable[i] + if v.Cur.offset <= minOff { + v.Cur.offset = 0 + v.Prev.offset = 0 + } else { + v.Cur.offset = v.Cur.offset - e.cur + maxMatchOffset + if v.Prev.offset <= minOff { + v.Prev.offset = 0 + } else { + v.Prev.offset = v.Prev.offset - e.cur + maxMatchOffset + } + } + e.bTable[i] = v + } + e.cur = maxMatchOffset + } + + s := e.addBlock(src) + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = uint16(len(src)) + return + } + + // Override src + src = e.hist + nextEmit := s + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int32(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load6432(src, s) + // Repeat MUST be > 1 and within range + repeat := int32(1) + for { + const skipLog = 7 + const doEvery = 1 + + nextS := s + var l int32 + var t int32 + for { + nextHashS := hash4x64(cv, tableBits) + nextHashL := hash7(cv, tableBits) + s = nextS + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit { + goto emitRemainder + } + // Fetch a short+long candidate + sCandidate := e.table[nextHashS] + lCandidate := e.bTable[nextHashL] + next := load6432(src, nextS) + entry := tableEntry{offset: s + e.cur} + e.table[nextHashS] = entry + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = entry, eLong.Cur + + // Calculate hashes of 'next' + nextHashS = hash4x64(next, tableBits) + nextHashL = hash7(next, tableBits) + + t = lCandidate.Cur.offset - e.cur + if s-t < maxMatchOffset { + if uint32(cv) == load3232(src, lCandidate.Cur.offset-e.cur) { + // Long candidate matches at least 4 bytes. + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check the previous long candidate as well. + t2 := lCandidate.Prev.offset - e.cur + if s-t2 < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + l = e.matchlen(s+4, t+4, src) + 4 + ml1 := e.matchlen(s+4, t2+4, src) + 4 + if ml1 > l { + t = t2 + l = ml1 + break + } + } + break + } + // Current value did not match, but check if previous long value does. + t = lCandidate.Prev.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, lCandidate.Prev.offset-e.cur) { + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + break + } + } + + t = sCandidate.offset - e.cur + if s-t < maxMatchOffset && uint32(cv) == load3232(src, sCandidate.offset-e.cur) { + // Found a 4 match... + l = e.matchlen(s+4, t+4, src) + 4 + + // Look up next long candidate (at nextS) + lCandidate = e.bTable[nextHashL] + + // Store the next match + e.table[nextHashS] = tableEntry{offset: nextS + e.cur} + eLong := &e.bTable[nextHashL] + eLong.Cur, eLong.Prev = tableEntry{offset: nextS + e.cur}, eLong.Cur + + // Check repeat at s + repOff + const repOff = 1 + t2 := s - repeat + repOff + if load3232(src, t2) == uint32(cv>>(8*repOff)) { + ml := e.matchlen(s+4+repOff, t2+4, src) + 4 + if ml > l { + t = t2 + l = ml + s += repOff + // Not worth checking more. + break + } + } + + // If the next long is a candidate, use that... + t2 = lCandidate.Cur.offset - e.cur + if nextS-t2 < maxMatchOffset { + if load3232(src, lCandidate.Cur.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + // This is ok, but check previous as well. + } + } + // If the previous long is a candidate, use that... + t2 = lCandidate.Prev.offset - e.cur + if nextS-t2 < maxMatchOffset && load3232(src, lCandidate.Prev.offset-e.cur) == uint32(next) { + ml := e.matchlen(nextS+4, t2+4, src) + 4 + if ml > l { + t = t2 + s = nextS + l = ml + break + } + } + } + break + } + cv = next + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + + // Extend the 4-byte match as long as possible. + if l == 0 { + l = e.matchlenLong(s+4, t+4, src) + 4 + } else if l == maxMatchLength { + l += e.matchlenLong(s+l, t+l, src) + } + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + if false { + if t >= s { + panic(fmt.Sprintln("s-t", s, t)) + } + if (s - t) > maxMatchOffset { + panic(fmt.Sprintln("mmo", s-t)) + } + if l < baseMatchLength { + panic("bml") + } + } + + dst.AddMatchLong(l, uint32(s-t-baseMatchOffset)) + repeat = s - t + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + + if s >= sLimit { + // Index after match end. + for i := nextS + 1; i < int32(len(src))-8; i += 2 { + cv := load6432(src, i) + e.table[hash4x64(cv, tableBits)] = tableEntry{offset: i + e.cur} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur + } + goto emitRemainder + } + + // Store every long hash in-between and every second short. + if true { + for i := nextS + 1; i < s-1; i += 2 { + cv := load6432(src, i) + t := tableEntry{offset: i + e.cur} + t2 := tableEntry{offset: t.offset + 1} + eLong := &e.bTable[hash7(cv, tableBits)] + eLong2 := &e.bTable[hash7(cv>>8, tableBits)] + e.table[hash4x64(cv, tableBits)] = t + eLong.Cur, eLong.Prev = t, eLong.Cur + eLong2.Cur, eLong2.Prev = t2, eLong2.Cur + } + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. + cv = load6432(src, s) + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/stateless.go b/vendor/github.com/klauspost/compress/flate/stateless.go new file mode 100644 index 00000000000..53e89912463 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/stateless.go @@ -0,0 +1,297 @@ +package flate + +import ( + "io" + "math" + "sync" +) + +const ( + maxStatelessBlock = math.MaxInt16 + // dictionary will be taken from maxStatelessBlock, so limit it. + maxStatelessDict = 8 << 10 + + slTableBits = 13 + slTableSize = 1 << slTableBits + slTableShift = 32 - slTableBits +) + +type statelessWriter struct { + dst io.Writer + closed bool +} + +func (s *statelessWriter) Close() error { + if s.closed { + return nil + } + s.closed = true + // Emit EOF block + return StatelessDeflate(s.dst, nil, true, nil) +} + +func (s *statelessWriter) Write(p []byte) (n int, err error) { + err = StatelessDeflate(s.dst, p, false, nil) + if err != nil { + return 0, err + } + return len(p), nil +} + +func (s *statelessWriter) Reset(w io.Writer) { + s.dst = w + s.closed = false +} + +// NewStatelessWriter will do compression but without maintaining any state +// between Write calls. +// There will be no memory kept between Write calls, +// but compression and speed will be suboptimal. +// Because of this, the size of actual Write calls will affect output size. +func NewStatelessWriter(dst io.Writer) io.WriteCloser { + return &statelessWriter{dst: dst} +} + +// bitWriterPool contains bit writers that can be reused. +var bitWriterPool = sync.Pool{ + New: func() interface{} { + return newHuffmanBitWriter(nil) + }, +} + +// StatelessDeflate allows to compress directly to a Writer without retaining state. +// When returning everything will be flushed. +// Up to 8KB of an optional dictionary can be given which is presumed to presumed to precede the block. +// Longer dictionaries will be truncated and will still produce valid output. +// Sending nil dictionary is perfectly fine. +func StatelessDeflate(out io.Writer, in []byte, eof bool, dict []byte) error { + var dst tokens + bw := bitWriterPool.Get().(*huffmanBitWriter) + bw.reset(out) + defer func() { + // don't keep a reference to our output + bw.reset(nil) + bitWriterPool.Put(bw) + }() + if eof && len(in) == 0 { + // Just write an EOF block. + // Could be faster... + bw.writeStoredHeader(0, true) + bw.flush() + return bw.err + } + + // Truncate dict + if len(dict) > maxStatelessDict { + dict = dict[len(dict)-maxStatelessDict:] + } + + for len(in) > 0 { + todo := in + if len(todo) > maxStatelessBlock-len(dict) { + todo = todo[:maxStatelessBlock-len(dict)] + } + in = in[len(todo):] + uncompressed := todo + if len(dict) > 0 { + // combine dict and source + bufLen := len(todo) + len(dict) + combined := make([]byte, bufLen) + copy(combined, dict) + copy(combined[len(dict):], todo) + todo = combined + } + // Compress + statelessEnc(&dst, todo, int16(len(dict))) + isEof := eof && len(in) == 0 + + if dst.n == 0 { + bw.writeStoredHeader(len(uncompressed), isEof) + if bw.err != nil { + return bw.err + } + bw.writeBytes(uncompressed) + } else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 { + // If we removed less than 1/16th, huffman compress the block. + bw.writeBlockHuff(isEof, uncompressed, len(in) == 0) + } else { + bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0) + } + if len(in) > 0 { + // Retain a dict if we have more + dict = todo[len(todo)-maxStatelessDict:] + dst.Reset() + } + if bw.err != nil { + return bw.err + } + } + if !eof { + // Align, only a stored block can do that. + bw.writeStoredHeader(0, false) + } + bw.flush() + return bw.err +} + +func hashSL(u uint32) uint32 { + return (u * 0x1e35a7bd) >> slTableShift +} + +func load3216(b []byte, i int16) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6416(b []byte, i int16) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func statelessEnc(dst *tokens, src []byte, startAt int16) { + const ( + inputMargin = 12 - 1 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + type tableEntry struct { + offset int16 + } + + var table [slTableSize]tableEntry + + // This check isn't in the Snappy implementation, but there, the caller + // instead of the callee handles this case. + if len(src)-int(startAt) < minNonLiteralBlockSize { + // We do not fill the token table. + // This will be picked up by caller. + dst.n = 0 + return + } + // Index until startAt + if startAt > 0 { + cv := load3232(src, 0) + for i := int16(0); i < startAt; i++ { + table[hashSL(cv)] = tableEntry{offset: i} + cv = (cv >> 8) | (uint32(src[i+4]) << 24) + } + } + + s := startAt + 1 + nextEmit := startAt + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := int16(len(src) - inputMargin) + + // nextEmit is where in src the next emitLiteral should start from. + cv := load3216(src, s) + + for { + const skipLog = 5 + const doEvery = 2 + + nextS := s + var candidate tableEntry + for { + nextHash := hashSL(cv) + candidate = table[nextHash] + nextS = s + doEvery + (s-nextEmit)>>skipLog + if nextS > sLimit || nextS <= 0 { + goto emitRemainder + } + + now := load6416(src, nextS) + table[nextHash] = tableEntry{offset: s} + nextHash = hashSL(uint32(now)) + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + + // Do one right away... + cv = uint32(now) + s = nextS + nextS++ + candidate = table[nextHash] + now >>= 8 + table[nextHash] = tableEntry{offset: s} + + if cv == load3216(src, candidate.offset) { + table[nextHash] = tableEntry{offset: nextS} + break + } + cv = uint32(now) + s = nextS + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + + // Extend the 4-byte match as long as possible. + t := candidate.offset + l := int16(matchLen(src[s+4:], src[t+4:]) + 4) + + // Extend backwards + for t > 0 && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + if nextEmit < s { + emitLiteral(dst, src[nextEmit:s]) + } + + // Save the match found + dst.AddMatchLong(int32(l), uint32(s-t-baseMatchOffset)) + s += l + nextEmit = s + if nextS >= s { + s = nextS + 1 + } + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-2 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load6416(src, s-2) + o := s - 2 + prevHash := hashSL(uint32(x)) + table[prevHash] = tableEntry{offset: o} + x >>= 16 + currHash := hashSL(uint32(x)) + candidate = table[currHash] + table[currHash] = tableEntry{offset: o + 2} + + if uint32(x) != load3216(src, candidate.offset) { + cv = uint32(x >> 8) + s++ + break + } + } + } + +emitRemainder: + if int(nextEmit) < len(src) { + // If nothing was added, don't encode literals. + if dst.n == 0 { + return + } + emitLiteral(dst, src[nextEmit:]) + } +} diff --git a/vendor/github.com/klauspost/compress/flate/token.go b/vendor/github.com/klauspost/compress/flate/token.go new file mode 100644 index 00000000000..f9abf606d67 --- /dev/null +++ b/vendor/github.com/klauspost/compress/flate/token.go @@ -0,0 +1,375 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package flate + +import ( + "bytes" + "encoding/binary" + "fmt" + "io" + "math" +) + +const ( + // 2 bits: type 0 = literal 1=EOF 2=Match 3=Unused + // 8 bits: xlength = length - MIN_MATCH_LENGTH + // 22 bits xoffset = offset - MIN_OFFSET_SIZE, or literal + lengthShift = 22 + offsetMask = 1<maxnumlit + offHist [32]uint16 // offset codes + litHist [256]uint16 // codes 0->255 + n uint16 // Must be able to contain maxStoreBlockSize + tokens [maxStoreBlockSize + 1]token +} + +func (t *tokens) Reset() { + if t.n == 0 { + return + } + t.n = 0 + t.nLits = 0 + for i := range t.litHist[:] { + t.litHist[i] = 0 + } + for i := range t.extraHist[:] { + t.extraHist[i] = 0 + } + for i := range t.offHist[:] { + t.offHist[i] = 0 + } +} + +func (t *tokens) Fill() { + if t.n == 0 { + return + } + for i, v := range t.litHist[:] { + if v == 0 { + t.litHist[i] = 1 + t.nLits++ + } + } + for i, v := range t.extraHist[:literalCount-256] { + if v == 0 { + t.nLits++ + t.extraHist[i] = 1 + } + } + for i, v := range t.offHist[:offsetCodeCount] { + if v == 0 { + t.offHist[i] = 1 + } + } +} + +func indexTokens(in []token) tokens { + var t tokens + t.indexTokens(in) + return t +} + +func (t *tokens) indexTokens(in []token) { + t.Reset() + for _, tok := range in { + if tok < matchType { + t.AddLiteral(tok.literal()) + continue + } + t.AddMatch(uint32(tok.length()), tok.offset()) + } +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +func emitLiteral(dst *tokens, lit []byte) { + ol := int(dst.n) + for i, v := range lit { + dst.tokens[(i+ol)&maxStoreBlockSize] = token(v) + dst.litHist[v]++ + } + dst.n += uint16(len(lit)) + dst.nLits += len(lit) +} + +func (t *tokens) AddLiteral(lit byte) { + t.tokens[t.n] = token(lit) + t.litHist[lit]++ + t.n++ + t.nLits++ +} + +// from https://stackoverflow.com/a/28730362 +func mFastLog2(val float32) float32 { + ux := int32(math.Float32bits(val)) + log2 := (float32)(((ux >> 23) & 255) - 128) + ux &= -0x7f800001 + ux += 127 << 23 + uval := math.Float32frombits(uint32(ux)) + log2 += ((-0.34484843)*uval+2.02466578)*uval - 0.67487759 + return log2 +} + +// EstimatedBits will return an minimum size estimated by an *optimal* +// compression of the block. +// The size of the block +func (t *tokens) EstimatedBits() int { + shannon := float32(0) + bits := int(0) + nMatches := 0 + if t.nLits > 0 { + invTotal := 1.0 / float32(t.nLits) + for _, v := range t.litHist[:] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + } + } + // Just add 15 for EOB + shannon += 15 + for i, v := range t.extraHist[1 : literalCount-256] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(lengthExtraBits[i&31]) * int(v) + nMatches += int(v) + } + } + } + if nMatches > 0 { + invTotal := 1.0 / float32(nMatches) + for i, v := range t.offHist[:offsetCodeCount] { + if v > 0 { + n := float32(v) + shannon += -mFastLog2(n*invTotal) * n + bits += int(offsetExtraBits[i&31]) * int(v) + } + } + } + return int(shannon) + bits +} + +// AddMatch adds a match to the tokens. +// This function is very sensitive to inlining and right on the border. +func (t *tokens) AddMatch(xlength uint32, xoffset uint32) { + if debugDeflate { + if xlength >= maxMatchLength+baseMatchLength { + panic(fmt.Errorf("invalid length: %v", xlength)) + } + if xoffset >= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + t.nLits++ + lengthCode := lengthCodes1[uint8(xlength)] & 31 + t.tokens[t.n] = token(matchType | xlength<= maxMatchOffset+baseMatchOffset { + panic(fmt.Errorf("invalid offset: %v", xoffset)) + } + } + oc := offsetCode(xoffset) & 31 + for xlength > 0 { + xl := xlength + if xl > 258 { + // We need to have at least baseMatchLength left over for next loop. + xl = 258 - baseMatchLength + } + xlength -= xl + xl -= 3 + t.nLits++ + lengthCode := lengthCodes1[uint8(xl)] & 31 + t.tokens[t.n] = token(matchType | uint32(xl)<> lengthShift) } + +// The code is never more than 8 bits, but is returned as uint32 for convenience. +func lengthCode(len uint8) uint32 { return uint32(lengthCodes[len]) } + +// Returns the offset code corresponding to a specific offset +func offsetCode(off uint32) uint32 { + if false { + if off < uint32(len(offsetCodes)) { + return offsetCodes[off&255] + } else if off>>7 < uint32(len(offsetCodes)) { + return offsetCodes[(off>>7)&255] + 14 + } else { + return offsetCodes[(off>>14)&255] + 28 + } + } + if off < uint32(len(offsetCodes)) { + return offsetCodes[uint8(off)] + } + return offsetCodes14[uint8(off>>7)] +} diff --git a/vendor/github.com/klauspost/compress/fse/README.md b/vendor/github.com/klauspost/compress/fse/README.md new file mode 100644 index 00000000000..ea7324da671 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/README.md @@ -0,0 +1,79 @@ +# Finite State Entropy + +This package provides Finite State Entropy encoding and decoding. + +Finite State Entropy (also referenced as [tANS](https://en.wikipedia.org/wiki/Asymmetric_numeral_systems#tANS)) +encoding provides a fast near-optimal symbol encoding/decoding +for byte blocks as implemented in [zstandard](https://github.com/facebook/zstd). + +This can be used for compressing input with a lot of similar input values to the smallest number of bytes. +This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, +but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. + +* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/fse) + +## News + + * Feb 2018: First implementation released. Consider this beta software for now. + +# Usage + +This package provides a low level interface that allows to compress single independent blocks. + +Each block is separate, and there is no built in integrity checks. +This means that the caller should keep track of block sizes and also do checksums if needed. + +Compressing a block is done via the [`Compress`](https://godoc.org/github.com/klauspost/compress/fse#Compress) function. +You must provide input and will receive the output and maybe an error. + +These error values can be returned: + +| Error | Description | +|---------------------|-----------------------------------------------------------------------------| +| `` | Everything ok, output is returned | +| `ErrIncompressible` | Returned when input is judged to be too hard to compress | +| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | +| `(error)` | An internal error occurred. | + +As can be seen above there are errors that will be returned even under normal operation so it is important to handle these. + +To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/fse#Scratch) object +that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same +object can be used for both. + +Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this +you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. + +Decompressing is done by calling the [`Decompress`](https://godoc.org/github.com/klauspost/compress/fse#Decompress) function. +You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back +your input was likely corrupted. + +It is important to note that a successful decoding does *not* mean your output matches your original input. +There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. + +For more detailed usage, see examples in the [godoc documentation](https://godoc.org/github.com/klauspost/compress/fse#pkg-examples). + +# Performance + +A lot of factors are affecting speed. Block sizes and compressibility of the material are primary factors. +All compression functions are currently only running on the calling goroutine so only one core will be used per block. + +The compressor is significantly faster if symbols are kept as small as possible. The highest byte value of the input +is used to reduce some of the processing, so if all your input is above byte value 64 for instance, it may be +beneficial to transpose all your input values down by 64. + +With moderate block sizes around 64k speed are typically 200MB/s per core for compression and +around 300MB/s decompression speed. + +The same hardware typically does Huffman (deflate) encoding at 125MB/s and decompression at 100MB/s. + +# Plans + +At one point, more internals will be exposed to facilitate more "expert" usage of the components. + +A streaming interface is also likely to be implemented. Likely compatible with [FSE stream format](https://github.com/Cyan4973/FiniteStateEntropy/blob/dev/programs/fileio.c#L261). + +# Contributing + +Contributions are always welcome. Be aware that adding public functions will require good justification and breaking +changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file diff --git a/vendor/github.com/klauspost/compress/fse/bitreader.go b/vendor/github.com/klauspost/compress/fse/bitreader.go new file mode 100644 index 00000000000..b9db204f59d --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bitreader.go @@ -0,0 +1,107 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import ( + "errors" + "io" +) + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReader struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReader) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + b.fill() + b.fill() + b.bitsRead += 8 - uint8(highBits(uint32(v))) + return nil +} + +// getBits will return n bits. n can be 0. +func (b *bitReader) getBits(n uint8) uint16 { + if n == 0 || b.bitsRead >= 64 { + return 0 + } + return b.getBitsFast(n) +} + +// getBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReader) getBitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return v +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReader) fillFast() { + if b.bitsRead < 32 { + return + } + // Do single re-slice to avoid bounds checks. + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReader) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value = (b.value << 8) | uint64(b.in[b.off-1]) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReader) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReader) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/fse/bitwriter.go b/vendor/github.com/klauspost/compress/fse/bitwriter.go new file mode 100644 index 00000000000..43e463611b1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bitwriter.go @@ -0,0 +1,168 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import "fmt" + +// bitWriter will write bits. +// First bit will be LSB of the first byte of output. +type bitWriter struct { + bitContainer uint64 + nBits uint8 + out []byte +} + +// bitMask16 is bitmasks. Has extra to avoid bounds check. +var bitMask16 = [32]uint16{ + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF} /* up to 16 bits */ + +// addBits16NC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16NC(value uint16, bits uint8) { + b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16ZeroNC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +// This is fastest if bits can be zero. +func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { + if bits == 0 { + return + } + value <<= (16 - bits) & 15 + value >>= (16 - bits) & 15 + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// flush will flush all pending full bytes. +// There will be at least 56 bits available for writing when this has been called. +// Using flush32 is faster, but leaves less space for writing. +func (b *bitWriter) flush() { + v := b.nBits >> 3 + switch v { + case 0: + case 1: + b.out = append(b.out, + byte(b.bitContainer), + ) + case 2: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + ) + case 3: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + ) + case 4: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + ) + case 5: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + ) + case 6: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + ) + case 7: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + ) + case 8: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + byte(b.bitContainer>>56), + ) + default: + panic(fmt.Errorf("bits (%d) > 64", b.nBits)) + } + b.bitContainer >>= v << 3 + b.nBits &= 7 +} + +// flush32 will flush out, so there are at least 32 bits available for writing. +func (b *bitWriter) flush32() { + if b.nBits < 32 { + return + } + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24)) + b.nBits -= 32 + b.bitContainer >>= 32 +} + +// flushAlign will flush remaining full bytes and align to next byte boundary. +func (b *bitWriter) flushAlign() { + nbBytes := (b.nBits + 7) >> 3 + for i := uint8(0); i < nbBytes; i++ { + b.out = append(b.out, byte(b.bitContainer>>(i*8))) + } + b.nBits = 0 + b.bitContainer = 0 +} + +// close will write the alignment bit and write the final byte(s) +// to the output. +func (b *bitWriter) close() error { + // End mark + b.addBits16Clean(1, 1) + // flush until next byte. + b.flushAlign() + return nil +} + +// reset and continue writing by appending to out. +func (b *bitWriter) reset(out []byte) { + b.bitContainer = 0 + b.nBits = 0 + b.out = out +} diff --git a/vendor/github.com/klauspost/compress/fse/bytereader.go b/vendor/github.com/klauspost/compress/fse/bytereader.go new file mode 100644 index 00000000000..f228a46cdf6 --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/bytereader.go @@ -0,0 +1,56 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +// byteReader provides a byte reader that reads +// little endian values from a byte stream. +// The input stream is manually advanced. +// The reader performs no bounds checks. +type byteReader struct { + b []byte + off int +} + +// init will initialize the reader and set the input. +func (b *byteReader) init(in []byte) { + b.b = in + b.off = 0 +} + +// advance the stream b n bytes. +func (b *byteReader) advance(n uint) { + b.off += int(n) +} + +// Int32 returns a little endian int32 starting at current offset. +func (b byteReader) Int32() int32 { + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := int32(b2[3]) + v2 := int32(b2[2]) + v1 := int32(b2[1]) + v0 := int32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint32 returns a little endian uint32 starting at current offset. +func (b byteReader) Uint32() uint32 { + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// unread returns the unread portion of the input. +func (b byteReader) unread() []byte { + return b.b[b.off:] +} + +// remain will return the number of bytes remaining. +func (b byteReader) remain() int { + return len(b.b) - b.off +} diff --git a/vendor/github.com/klauspost/compress/fse/compress.go b/vendor/github.com/klauspost/compress/fse/compress.go new file mode 100644 index 00000000000..b69237c9b8f --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/compress.go @@ -0,0 +1,684 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package fse + +import ( + "errors" + "fmt" +) + +// Compress the input bytes. Input must be < 2GB. +// Provide a Scratch buffer to avoid memory allocations. +// Note that the output is also kept in the scratch buffer. +// If input is too hard to compress, ErrIncompressible is returned. +// If input is a single byte value repeated ErrUseRLE is returned. +func Compress(in []byte, s *Scratch) ([]byte, error) { + if len(in) <= 1 { + return nil, ErrIncompressible + } + if len(in) > (2<<30)-1 { + return nil, errors.New("input too big, must be < 2GB") + } + s, err := s.prepare(in) + if err != nil { + return nil, err + } + + // Create histogram, if none was provided. + maxCount := s.maxCount + if maxCount == 0 { + maxCount = s.countSimple(in) + } + // Reset for next run. + s.clearCount = true + s.maxCount = 0 + if maxCount == len(in) { + // One symbol, use RLE + return nil, ErrUseRLE + } + if maxCount == 1 || maxCount < (len(in)>>7) { + // Each symbol present maximum once or too well distributed. + return nil, ErrIncompressible + } + s.optimalTableLog() + err = s.normalizeCount() + if err != nil { + return nil, err + } + err = s.writeCount() + if err != nil { + return nil, err + } + + if false { + err = s.validateNorm() + if err != nil { + return nil, err + } + } + + err = s.buildCTable() + if err != nil { + return nil, err + } + err = s.compress(in) + if err != nil { + return nil, err + } + s.Out = s.bw.out + // Check if we compressed. + if len(s.Out) >= len(in) { + return nil, ErrIncompressible + } + return s.Out, nil +} + +// cState contains the compression state of a stream. +type cState struct { + bw *bitWriter + stateTable []uint16 + state uint16 +} + +// init will initialize the compression state to the first symbol of the stream. +func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTransform) { + c.bw = bw + c.stateTable = ct.stateTable + + nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 + im := int32((nbBitsOut << 16) - first.deltaNbBits) + lu := (im >> nbBitsOut) + first.deltaFindState + c.state = c.stateTable[lu] + return +} + +// encode the output symbol provided and write it to the bitstream. +func (c *cState) encode(symbolTT symbolTransform) { + nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 + dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState + c.bw.addBits16NC(c.state, uint8(nbBitsOut)) + c.state = c.stateTable[dstState] +} + +// encode the output symbol provided and write it to the bitstream. +func (c *cState) encodeZero(symbolTT symbolTransform) { + nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 + dstState := int32(c.state>>(nbBitsOut&15)) + symbolTT.deltaFindState + c.bw.addBits16ZeroNC(c.state, uint8(nbBitsOut)) + c.state = c.stateTable[dstState] +} + +// flush will write the tablelog to the output and flush the remaining full bytes. +func (c *cState) flush(tableLog uint8) { + c.bw.flush32() + c.bw.addBits16NC(c.state, tableLog) + c.bw.flush() +} + +// compress is the main compression loop that will encode the input from the last byte to the first. +func (s *Scratch) compress(src []byte) error { + if len(src) <= 2 { + return errors.New("compress: src too small") + } + tt := s.ct.symbolTT[:256] + s.bw.reset(s.Out) + + // Our two states each encodes every second byte. + // Last byte encoded (first byte decoded) will always be encoded by c1. + var c1, c2 cState + + // Encode so remaining size is divisible by 4. + ip := len(src) + if ip&1 == 1 { + c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) + c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) + c1.encodeZero(tt[src[ip-3]]) + ip -= 3 + } else { + c2.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-1]]) + c1.init(&s.bw, &s.ct, s.actualTableLog, tt[src[ip-2]]) + ip -= 2 + } + if ip&2 != 0 { + c2.encodeZero(tt[src[ip-1]]) + c1.encodeZero(tt[src[ip-2]]) + ip -= 2 + } + + // Main compression loop. + switch { + case !s.zeroBits && s.actualTableLog <= 8: + // We can encode 4 symbols without requiring a flush. + // We do not need to check if any output is 0 bits. + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encode(tt[v0]) + c1.encode(tt[v1]) + c2.encode(tt[v2]) + c1.encode(tt[v3]) + ip -= 4 + } + case !s.zeroBits: + // We do not need to check if any output is 0 bits. + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encode(tt[v0]) + c1.encode(tt[v1]) + s.bw.flush32() + c2.encode(tt[v2]) + c1.encode(tt[v3]) + ip -= 4 + } + case s.actualTableLog <= 8: + // We can encode 4 symbols without requiring a flush + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encodeZero(tt[v0]) + c1.encodeZero(tt[v1]) + c2.encodeZero(tt[v2]) + c1.encodeZero(tt[v3]) + ip -= 4 + } + default: + for ip >= 4 { + s.bw.flush32() + v3, v2, v1, v0 := src[ip-4], src[ip-3], src[ip-2], src[ip-1] + c2.encodeZero(tt[v0]) + c1.encodeZero(tt[v1]) + s.bw.flush32() + c2.encodeZero(tt[v2]) + c1.encodeZero(tt[v3]) + ip -= 4 + } + } + + // Flush final state. + // Used to initialize state when decoding. + c2.flush(s.actualTableLog) + c1.flush(s.actualTableLog) + + return s.bw.close() +} + +// writeCount will write the normalized histogram count to header. +// This is read back by readNCount. +func (s *Scratch) writeCount() error { + var ( + tableLog = s.actualTableLog + tableSize = 1 << tableLog + previous0 bool + charnum uint16 + + maxHeaderSize = ((int(s.symbolLen) * int(tableLog)) >> 3) + 3 + + // Write Table Size + bitStream = uint32(tableLog - minTablelog) + bitCount = uint(4) + remaining = int16(tableSize + 1) /* +1 for extra accuracy */ + threshold = int16(tableSize) + nbBits = uint(tableLog + 1) + ) + if cap(s.Out) < maxHeaderSize { + s.Out = make([]byte, 0, s.br.remain()+maxHeaderSize) + } + outP := uint(0) + out := s.Out[:maxHeaderSize] + + // stops at 1 + for remaining > 1 { + if previous0 { + start := charnum + for s.norm[charnum] == 0 { + charnum++ + } + for charnum >= start+24 { + start += 24 + bitStream += uint32(0xFFFF) << bitCount + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + } + for charnum >= start+3 { + start += 3 + bitStream += 3 << bitCount + bitCount += 2 + } + bitStream += uint32(charnum-start) << bitCount + bitCount += 2 + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + count := s.norm[charnum] + charnum++ + max := (2*threshold - 1) - remaining + if count < 0 { + remaining += count + } else { + remaining -= count + } + count++ // +1 for extra accuracy + if count >= threshold { + count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ + } + bitStream += uint32(count) << bitCount + bitCount += nbBits + if count < max { + bitCount-- + } + + previous0 = count == 1 + if remaining < 1 { + return errors.New("internal error: remaining<1") + } + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += (bitCount + 7) / 8 + + if uint16(charnum) > s.symbolLen { + return errors.New("internal error: charnum > s.symbolLen") + } + s.Out = out[:outP] + return nil +} + +// symbolTransform contains the state transform for a symbol. +type symbolTransform struct { + deltaFindState int32 + deltaNbBits uint32 +} + +// String prints values as a human readable string. +func (s symbolTransform) String() string { + return fmt.Sprintf("dnbits: %08x, fs:%d", s.deltaNbBits, s.deltaFindState) +} + +// cTable contains tables used for compression. +type cTable struct { + tableSymbol []byte + stateTable []uint16 + symbolTT []symbolTransform +} + +// allocCtable will allocate tables needed for compression. +// If existing tables a re big enough, they are simply re-used. +func (s *Scratch) allocCtable() { + tableSize := 1 << s.actualTableLog + // get tableSymbol that is big enough. + if cap(s.ct.tableSymbol) < int(tableSize) { + s.ct.tableSymbol = make([]byte, tableSize) + } + s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] + + ctSize := tableSize + if cap(s.ct.stateTable) < ctSize { + s.ct.stateTable = make([]uint16, ctSize) + } + s.ct.stateTable = s.ct.stateTable[:ctSize] + + if cap(s.ct.symbolTT) < 256 { + s.ct.symbolTT = make([]symbolTransform, 256) + } + s.ct.symbolTT = s.ct.symbolTT[:256] +} + +// buildCTable will populate the compression table so it is ready to be used. +func (s *Scratch) buildCTable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + var cumul [maxSymbolValue + 2]int16 + + s.allocCtable() + tableSymbol := s.ct.tableSymbol[:tableSize] + // symbol start positions + { + cumul[0] = 0 + for ui, v := range s.norm[:s.symbolLen-1] { + u := byte(ui) // one less than reference + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = u + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + } + // Encode last symbol separately to avoid overflowing u + u := int(s.symbolLen - 1) + v := s.norm[s.symbolLen-1] + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = byte(u) + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + if uint32(cumul[s.symbolLen]) != tableSize { + return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) + } + cumul[s.symbolLen] = int16(tableSize) + 1 + } + // Spread symbols + s.zeroBits = false + { + step := tableStep(tableSize) + tableMask := tableSize - 1 + var position uint32 + // if any symbol > largeLimit, we may have 0 bits output. + largeLimit := int16(1 << (s.actualTableLog - 1)) + for ui, v := range s.norm[:s.symbolLen] { + symbol := byte(ui) + if v > largeLimit { + s.zeroBits = true + } + for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { + tableSymbol[position] = symbol + position = (position + step) & tableMask + for position > highThreshold { + position = (position + step) & tableMask + } /* Low proba area */ + } + } + + // Check if we have gone through all positions + if position != 0 { + return errors.New("position!=0") + } + } + + // Build table + table := s.ct.stateTable + { + tsi := int(tableSize) + for u, v := range tableSymbol { + // TableU16 : sorted by symbol order; gives next state value + table[cumul[v]] = uint16(tsi + u) + cumul[v]++ + } + } + + // Build Symbol Transformation Table + { + total := int16(0) + symbolTT := s.ct.symbolTT[:s.symbolLen] + tableLog := s.actualTableLog + tl := (uint32(tableLog) << 16) - (1 << tableLog) + for i, v := range s.norm[:s.symbolLen] { + switch v { + case 0: + case -1, 1: + symbolTT[i].deltaNbBits = tl + symbolTT[i].deltaFindState = int32(total - 1) + total++ + default: + maxBitsOut := uint32(tableLog) - highBits(uint32(v-1)) + minStatePlus := uint32(v) << maxBitsOut + symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus + symbolTT[i].deltaFindState = int32(total - v) + total += v + } + } + if total != int16(tableSize) { + return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) + } + } + return nil +} + +// countSimple will create a simple histogram in s.count. +// Returns the biggest count. +// Does not update s.clearCount. +func (s *Scratch) countSimple(in []byte) (max int) { + for _, v := range in { + s.count[v]++ + } + m := uint32(0) + for i, v := range s.count[:] { + if v > m { + m = v + } + if v > 0 { + s.symbolLen = uint16(i) + 1 + } + } + return int(m) +} + +// minTableLog provides the minimum logSize to safely represent a distribution. +func (s *Scratch) minTableLog() uint8 { + minBitsSrc := highBits(uint32(s.br.remain()-1)) + 1 + minBitsSymbols := highBits(uint32(s.symbolLen-1)) + 2 + if minBitsSrc < minBitsSymbols { + return uint8(minBitsSrc) + } + return uint8(minBitsSymbols) +} + +// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog +func (s *Scratch) optimalTableLog() { + tableLog := s.TableLog + minBits := s.minTableLog() + maxBitsSrc := uint8(highBits(uint32(s.br.remain()-1))) - 2 + if maxBitsSrc < tableLog { + // Accuracy can be reduced + tableLog = maxBitsSrc + } + if minBits > tableLog { + tableLog = minBits + } + // Need a minimum to safely represent all symbol values + if tableLog < minTablelog { + tableLog = minTablelog + } + if tableLog > maxTableLog { + tableLog = maxTableLog + } + s.actualTableLog = tableLog +} + +var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} + +// normalizeCount will normalize the count of the symbols so +// the total is equal to the table size. +func (s *Scratch) normalizeCount() error { + var ( + tableLog = s.actualTableLog + scale = 62 - uint64(tableLog) + step = (1 << 62) / uint64(s.br.remain()) + vStep = uint64(1) << (scale - 20) + stillToDistribute = int16(1 << tableLog) + largest int + largestP int16 + lowThreshold = (uint32)(s.br.remain() >> tableLog) + ) + + for i, cnt := range s.count[:s.symbolLen] { + // already handled + // if (count[s] == s.length) return 0; /* rle special case */ + + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + stillToDistribute-- + } else { + proba := (int16)((uint64(cnt) * step) >> scale) + if proba < 8 { + restToBeat := vStep * uint64(rtbTable[proba]) + v := uint64(cnt)*step - (uint64(proba) << scale) + if v > restToBeat { + proba++ + } + } + if proba > largestP { + largestP = proba + largest = i + } + s.norm[i] = proba + stillToDistribute -= proba + } + } + + if -stillToDistribute >= (s.norm[largest] >> 1) { + // corner case, need another normalization method + return s.normalizeCount2() + } + s.norm[largest] += stillToDistribute + return nil +} + +// Secondary normalization method. +// To be used when primary method fails. +func (s *Scratch) normalizeCount2() error { + const notYetAssigned = -2 + var ( + distributed uint32 + total = uint32(s.br.remain()) + tableLog = s.actualTableLog + lowThreshold = uint32(total >> tableLog) + lowOne = uint32((total * 3) >> (tableLog + 1)) + ) + for i, cnt := range s.count[:s.symbolLen] { + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + distributed++ + total -= cnt + continue + } + if cnt <= lowOne { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + s.norm[i] = notYetAssigned + } + toDistribute := (1 << tableLog) - distributed + + if (total / toDistribute) > lowOne { + // risk of rounding to zero + lowOne = uint32((total * 3) / (toDistribute * 2)) + for i, cnt := range s.count[:s.symbolLen] { + if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + } + toDistribute = (1 << tableLog) - distributed + } + if distributed == uint32(s.symbolLen)+1 { + // all values are pretty poor; + // probably incompressible data (should have already been detected); + // find max, then give all remaining points to max + var maxV int + var maxC uint32 + for i, cnt := range s.count[:s.symbolLen] { + if cnt > maxC { + maxV = i + maxC = cnt + } + } + s.norm[maxV] += int16(toDistribute) + return nil + } + + if total == 0 { + // all of the symbols were low enough for the lowOne or lowThreshold + for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { + if s.norm[i] > 0 { + toDistribute-- + s.norm[i]++ + } + } + return nil + } + + var ( + vStepLog = 62 - uint64(tableLog) + mid = uint64((1 << (vStepLog - 1)) - 1) + rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining + tmpTotal = mid + ) + for i, cnt := range s.count[:s.symbolLen] { + if s.norm[i] == notYetAssigned { + var ( + end = tmpTotal + uint64(cnt)*rStep + sStart = uint32(tmpTotal >> vStepLog) + sEnd = uint32(end >> vStepLog) + weight = sEnd - sStart + ) + if weight < 1 { + return errors.New("weight < 1") + } + s.norm[i] = int16(weight) + tmpTotal = end + } + } + return nil +} + +// validateNorm validates the normalized histogram table. +func (s *Scratch) validateNorm() (err error) { + var total int + for _, v := range s.norm[:s.symbolLen] { + if v >= 0 { + total += int(v) + } else { + total -= int(v) + } + } + defer func() { + if err == nil { + return + } + fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) + for i, v := range s.norm[:s.symbolLen] { + fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) + } + }() + if total != (1 << s.actualTableLog) { + return fmt.Errorf("warning: Total == %d != %d", total, 1< tablelogAbsoluteMax { + return errors.New("tableLog too large") + } + bitStream >>= 4 + bitCount := uint(4) + + s.actualTableLog = uint8(nbBits) + remaining := int32((1 << nbBits) + 1) + threshold := int32(1 << nbBits) + gotTotal := int32(0) + nbBits++ + + for remaining > 1 { + if previous0 { + n0 := charnum + for (bitStream & 0xFFFF) == 0xFFFF { + n0 += 24 + if b.off < iend-5 { + b.advance(2) + bitStream = b.Uint32() >> bitCount + } else { + bitStream >>= 16 + bitCount += 16 + } + } + for (bitStream & 3) == 3 { + n0 += 3 + bitStream >>= 2 + bitCount += 2 + } + n0 += uint16(bitStream & 3) + bitCount += 2 + if n0 > maxSymbolValue { + return errors.New("maxSymbolValue too small") + } + for charnum < n0 { + s.norm[charnum&0xff] = 0 + charnum++ + } + + if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { + b.advance(bitCount >> 3) + bitCount &= 7 + bitStream = b.Uint32() >> bitCount + } else { + bitStream >>= 2 + } + } + + max := (2*(threshold) - 1) - (remaining) + var count int32 + + if (int32(bitStream) & (threshold - 1)) < max { + count = int32(bitStream) & (threshold - 1) + bitCount += nbBits - 1 + } else { + count = int32(bitStream) & (2*threshold - 1) + if count >= threshold { + count -= max + } + bitCount += nbBits + } + + count-- // extra accuracy + if count < 0 { + // -1 means +1 + remaining += count + gotTotal -= count + } else { + remaining -= count + gotTotal += count + } + s.norm[charnum&0xff] = int16(count) + charnum++ + previous0 = count == 0 + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + if b.off <= iend-7 || b.off+int(bitCount>>3) <= iend-4 { + b.advance(bitCount >> 3) + bitCount &= 7 + } else { + bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) + b.off = len(b.b) - 4 + } + bitStream = b.Uint32() >> (bitCount & 31) + } + s.symbolLen = charnum + + if s.symbolLen <= 1 { + return fmt.Errorf("symbolLen (%d) too small", s.symbolLen) + } + if s.symbolLen > maxSymbolValue+1 { + return fmt.Errorf("symbolLen (%d) too big", s.symbolLen) + } + if remaining != 1 { + return fmt.Errorf("corruption detected (remaining %d != 1)", remaining) + } + if bitCount > 32 { + return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount) + } + if gotTotal != 1<> 3) + return nil +} + +// decSymbol contains information about a state entry, +// Including the state offset base, the output symbol and +// the number of bits to read for the low part of the destination state. +type decSymbol struct { + newState uint16 + symbol uint8 + nbBits uint8 +} + +// allocDtable will allocate decoding tables if they are not big enough. +func (s *Scratch) allocDtable() { + tableSize := 1 << s.actualTableLog + if cap(s.decTable) < int(tableSize) { + s.decTable = make([]decSymbol, tableSize) + } + s.decTable = s.decTable[:tableSize] + + if cap(s.ct.tableSymbol) < 256 { + s.ct.tableSymbol = make([]byte, 256) + } + s.ct.tableSymbol = s.ct.tableSymbol[:256] + + if cap(s.ct.stateTable) < 256 { + s.ct.stateTable = make([]uint16, 256) + } + s.ct.stateTable = s.ct.stateTable[:256] +} + +// buildDtable will build the decoding table. +func (s *Scratch) buildDtable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + s.allocDtable() + symbolNext := s.ct.stateTable[:256] + + // Init, lay down lowprob symbols + s.zeroBits = false + { + largeLimit := int16(1 << (s.actualTableLog - 1)) + for i, v := range s.norm[:s.symbolLen] { + if v == -1 { + s.decTable[highThreshold].symbol = uint8(i) + highThreshold-- + symbolNext[i] = 1 + } else { + if v >= largeLimit { + s.zeroBits = true + } + symbolNext[i] = uint16(v) + } + } + } + // Spread symbols + { + tableMask := tableSize - 1 + step := tableStep(tableSize) + position := uint32(0) + for ss, v := range s.norm[:s.symbolLen] { + for i := 0; i < int(v); i++ { + s.decTable[position].symbol = uint8(ss) + position = (position + step) & tableMask + for position > highThreshold { + // lowprob area + position = (position + step) & tableMask + } + } + } + if position != 0 { + // position must reach all cells once, otherwise normalizedCounter is incorrect + return errors.New("corrupted input (position != 0)") + } + } + + // Build Decoding table + { + tableSize := uint16(1 << s.actualTableLog) + for u, v := range s.decTable { + symbol := v.symbol + nextState := symbolNext[symbol] + symbolNext[symbol] = nextState + 1 + nBits := s.actualTableLog - byte(highBits(uint32(nextState))) + s.decTable[u].nbBits = nBits + newState := (nextState << nBits) - tableSize + if newState >= tableSize { + return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) + } + if newState == uint16(u) && nBits == 0 { + // Seems weird that this is possible with nbits > 0. + return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) + } + s.decTable[u].newState = newState + } + } + return nil +} + +// decompress will decompress the bitstream. +// If the buffer is over-read an error is returned. +func (s *Scratch) decompress() error { + br := &s.bits + br.init(s.br.unread()) + + var s1, s2 decoder + // Initialize and decode first state and symbol. + s1.init(br, s.decTable, s.actualTableLog) + s2.init(br, s.decTable, s.actualTableLog) + + // Use temp table to avoid bound checks/append penalty. + var tmp = s.ct.tableSymbol[:256] + var off uint8 + + // Main part + if !s.zeroBits { + for br.off >= 8 { + br.fillFast() + tmp[off+0] = s1.nextFast() + tmp[off+1] = s2.nextFast() + br.fillFast() + tmp[off+2] = s1.nextFast() + tmp[off+3] = s2.nextFast() + off += 4 + // When off is 0, we have overflowed and should write. + if off == 0 { + s.Out = append(s.Out, tmp...) + if len(s.Out) >= s.DecompressLimit { + return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) + } + } + } + } else { + for br.off >= 8 { + br.fillFast() + tmp[off+0] = s1.next() + tmp[off+1] = s2.next() + br.fillFast() + tmp[off+2] = s1.next() + tmp[off+3] = s2.next() + off += 4 + if off == 0 { + s.Out = append(s.Out, tmp...) + // When off is 0, we have overflowed and should write. + if len(s.Out) >= s.DecompressLimit { + return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) + } + } + } + } + s.Out = append(s.Out, tmp[:off]...) + + // Final bits, a bit more expensive check + for { + if s1.finished() { + s.Out = append(s.Out, s1.final(), s2.final()) + break + } + br.fill() + s.Out = append(s.Out, s1.next()) + if s2.finished() { + s.Out = append(s.Out, s2.final(), s1.final()) + break + } + s.Out = append(s.Out, s2.next()) + if len(s.Out) >= s.DecompressLimit { + return fmt.Errorf("output size (%d) > DecompressLimit (%d)", len(s.Out), s.DecompressLimit) + } + } + return br.close() +} + +// decoder keeps track of the current state and updates it from the bitstream. +type decoder struct { + state uint16 + br *bitReader + dt []decSymbol +} + +// init will initialize the decoder and read the first state from the stream. +func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) { + d.dt = dt + d.br = in + d.state = uint16(in.getBits(tableLog)) +} + +// next returns the next symbol and sets the next state. +// At least tablelog bits must be available in the bit reader. +func (d *decoder) next() uint8 { + n := &d.dt[d.state] + lowBits := d.br.getBits(n.nbBits) + d.state = n.newState + lowBits + return n.symbol +} + +// finished returns true if all bits have been read from the bitstream +// and the next state would require reading bits from the input. +func (d *decoder) finished() bool { + return d.br.finished() && d.dt[d.state].nbBits > 0 +} + +// final returns the current state symbol without decoding the next. +func (d *decoder) final() uint8 { + return d.dt[d.state].symbol +} + +// nextFast returns the next symbol and sets the next state. +// This can only be used if no symbols are 0 bits. +// At least tablelog bits must be available in the bit reader. +func (d *decoder) nextFast() uint8 { + n := d.dt[d.state] + lowBits := d.br.getBitsFast(n.nbBits) + d.state = n.newState + lowBits + return n.symbol +} diff --git a/vendor/github.com/klauspost/compress/fse/fse.go b/vendor/github.com/klauspost/compress/fse/fse.go new file mode 100644 index 00000000000..535cbadfdea --- /dev/null +++ b/vendor/github.com/klauspost/compress/fse/fse.go @@ -0,0 +1,144 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +// Package fse provides Finite State Entropy encoding and decoding. +// +// Finite State Entropy encoding provides a fast near-optimal symbol encoding/decoding +// for byte blocks as implemented in zstd. +// +// See https://github.com/klauspost/compress/tree/master/fse for more information. +package fse + +import ( + "errors" + "fmt" + "math/bits" +) + +const ( + /*!MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ + maxMemoryUsage = 14 + defaultMemoryUsage = 13 + + maxTableLog = maxMemoryUsage - 2 + maxTablesize = 1 << maxTableLog + defaultTablelog = defaultMemoryUsage - 2 + minTablelog = 5 + maxSymbolValue = 255 +) + +var ( + // ErrIncompressible is returned when input is judged to be too hard to compress. + ErrIncompressible = errors.New("input is not compressible") + + // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. + ErrUseRLE = errors.New("input is single value repeated") +) + +// Scratch provides temporary storage for compression and decompression. +type Scratch struct { + // Private + count [maxSymbolValue + 1]uint32 + norm [maxSymbolValue + 1]int16 + br byteReader + bits bitReader + bw bitWriter + ct cTable // Compression tables. + decTable []decSymbol // Decompression table. + maxCount int // count of the most probable symbol + + // Per block parameters. + // These can be used to override compression parameters of the block. + // Do not touch, unless you know what you are doing. + + // Out is output buffer. + // If the scratch is re-used before the caller is done processing the output, + // set this field to nil. + // Otherwise the output buffer will be re-used for next Compression/Decompression step + // and allocation will be avoided. + Out []byte + + // DecompressLimit limits the maximum decoded size acceptable. + // If > 0 decompression will stop when approximately this many bytes + // has been decoded. + // If 0, maximum size will be 2GB. + DecompressLimit int + + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + zeroBits bool // no bits has prob > 50%. + clearCount bool // clear count + + // MaxSymbolValue will override the maximum symbol value of the next block. + MaxSymbolValue uint8 + + // TableLog will attempt to override the tablelog for the next block. + TableLog uint8 +} + +// Histogram allows to populate the histogram and skip that step in the compression, +// It otherwise allows to inspect the histogram when compression is done. +// To indicate that you have populated the histogram call HistogramFinished +// with the value of the highest populated symbol, as well as the number of entries +// in the most populated entry. These are accepted at face value. +// The returned slice will always be length 256. +func (s *Scratch) Histogram() []uint32 { + return s.count[:] +} + +// HistogramFinished can be called to indicate that the histogram has been populated. +// maxSymbol is the index of the highest set symbol of the next data segment. +// maxCount is the number of entries in the most populated entry. +// These are accepted at face value. +func (s *Scratch) HistogramFinished(maxSymbol uint8, maxCount int) { + s.maxCount = maxCount + s.symbolLen = uint16(maxSymbol) + 1 + s.clearCount = maxCount != 0 +} + +// prepare will prepare and allocate scratch tables used for both compression and decompression. +func (s *Scratch) prepare(in []byte) (*Scratch, error) { + if s == nil { + s = &Scratch{} + } + if s.MaxSymbolValue == 0 { + s.MaxSymbolValue = 255 + } + if s.TableLog == 0 { + s.TableLog = defaultTablelog + } + if s.TableLog > maxTableLog { + return nil, fmt.Errorf("tableLog (%d) > maxTableLog (%d)", s.TableLog, maxTableLog) + } + if cap(s.Out) == 0 { + s.Out = make([]byte, 0, len(in)) + } + if s.clearCount && s.maxCount == 0 { + for i := range s.count { + s.count[i] = 0 + } + s.clearCount = false + } + s.br.init(in) + if s.DecompressLimit == 0 { + // Max size 2GB. + s.DecompressLimit = (2 << 30) - 1 + } + + return s, nil +} + +// tableStep returns the next table index. +func tableStep(tableSize uint32) uint32 { + return (tableSize >> 1) + (tableSize >> 3) + 3 +} + +func highBits(val uint32) (n uint32) { + return uint32(bits.Len32(val) - 1) +} diff --git a/vendor/github.com/klauspost/compress/gzip/gunzip.go b/vendor/github.com/klauspost/compress/gzip/gunzip.go new file mode 100644 index 00000000000..568b5d4fb8b --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gunzip.go @@ -0,0 +1,344 @@ +// Copyright 2009 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package gzip implements reading and writing of gzip format compressed files, +// as specified in RFC 1952. +package gzip + +import ( + "bufio" + "encoding/binary" + "errors" + "hash/crc32" + "io" + "time" + + "github.com/klauspost/compress/flate" +) + +const ( + gzipID1 = 0x1f + gzipID2 = 0x8b + gzipDeflate = 8 + flagText = 1 << 0 + flagHdrCrc = 1 << 1 + flagExtra = 1 << 2 + flagName = 1 << 3 + flagComment = 1 << 4 +) + +var ( + // ErrChecksum is returned when reading GZIP data that has an invalid checksum. + ErrChecksum = errors.New("gzip: invalid checksum") + // ErrHeader is returned when reading GZIP data that has an invalid header. + ErrHeader = errors.New("gzip: invalid header") +) + +var le = binary.LittleEndian + +// noEOF converts io.EOF to io.ErrUnexpectedEOF. +func noEOF(err error) error { + if err == io.EOF { + return io.ErrUnexpectedEOF + } + return err +} + +// The gzip file stores a header giving metadata about the compressed file. +// That header is exposed as the fields of the Writer and Reader structs. +// +// Strings must be UTF-8 encoded and may only contain Unicode code points +// U+0001 through U+00FF, due to limitations of the GZIP file format. +type Header struct { + Comment string // comment + Extra []byte // "extra data" + ModTime time.Time // modification time + Name string // file name + OS byte // operating system type +} + +// A Reader is an io.Reader that can be read to retrieve +// uncompressed data from a gzip-format compressed file. +// +// In general, a gzip file can be a concatenation of gzip files, +// each with its own header. Reads from the Reader +// return the concatenation of the uncompressed data of each. +// Only the first header is recorded in the Reader fields. +// +// Gzip files store a length and checksum of the uncompressed data. +// The Reader will return a ErrChecksum when Read +// reaches the end of the uncompressed data if it does not +// have the expected length or checksum. Clients should treat data +// returned by Read as tentative until they receive the io.EOF +// marking the end of the data. +type Reader struct { + Header // valid after NewReader or Reader.Reset + r flate.Reader + decompressor io.ReadCloser + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + buf [512]byte + err error + multistream bool +} + +// NewReader creates a new Reader reading the given reader. +// If r does not also implement io.ByteReader, +// the decompressor may read more data than necessary from r. +// +// It is the caller's responsibility to call Close on the Reader when done. +// +// The Reader.Header fields will be valid in the Reader returned. +func NewReader(r io.Reader) (*Reader, error) { + z := new(Reader) + if err := z.Reset(r); err != nil { + return nil, err + } + return z, nil +} + +// Reset discards the Reader z's state and makes it equivalent to the +// result of its original state from NewReader, but reading from r instead. +// This permits reusing a Reader rather than allocating a new one. +func (z *Reader) Reset(r io.Reader) error { + *z = Reader{ + decompressor: z.decompressor, + multistream: true, + } + if rr, ok := r.(flate.Reader); ok { + z.r = rr + } else { + z.r = bufio.NewReader(r) + } + z.Header, z.err = z.readHeader() + return z.err +} + +// Multistream controls whether the reader supports multistream files. +// +// If enabled (the default), the Reader expects the input to be a sequence +// of individually gzipped data streams, each with its own header and +// trailer, ending at EOF. The effect is that the concatenation of a sequence +// of gzipped files is treated as equivalent to the gzip of the concatenation +// of the sequence. This is standard behavior for gzip readers. +// +// Calling Multistream(false) disables this behavior; disabling the behavior +// can be useful when reading file formats that distinguish individual gzip +// data streams or mix gzip data streams with other data streams. +// In this mode, when the Reader reaches the end of the data stream, +// Read returns io.EOF. If the underlying reader implements io.ByteReader, +// it will be left positioned just after the gzip stream. +// To start the next stream, call z.Reset(r) followed by z.Multistream(false). +// If there is no next stream, z.Reset(r) will return io.EOF. +func (z *Reader) Multistream(ok bool) { + z.multistream = ok +} + +// readString reads a NUL-terminated string from z.r. +// It treats the bytes read as being encoded as ISO 8859-1 (Latin-1) and +// will output a string encoded using UTF-8. +// This method always updates z.digest with the data read. +func (z *Reader) readString() (string, error) { + var err error + needConv := false + for i := 0; ; i++ { + if i >= len(z.buf) { + return "", ErrHeader + } + z.buf[i], err = z.r.ReadByte() + if err != nil { + return "", err + } + if z.buf[i] > 0x7f { + needConv = true + } + if z.buf[i] == 0 { + // Digest covers the NUL terminator. + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:i+1]) + + // Strings are ISO 8859-1, Latin-1 (RFC 1952, section 2.3.1). + if needConv { + s := make([]rune, 0, i) + for _, v := range z.buf[:i] { + s = append(s, rune(v)) + } + return string(s), nil + } + return string(z.buf[:i]), nil + } + } +} + +// readHeader reads the GZIP header according to section 2.3.1. +// This method does not set z.err. +func (z *Reader) readHeader() (hdr Header, err error) { + if _, err = io.ReadFull(z.r, z.buf[:10]); err != nil { + // RFC 1952, section 2.2, says the following: + // A gzip file consists of a series of "members" (compressed data sets). + // + // Other than this, the specification does not clarify whether a + // "series" is defined as "one or more" or "zero or more". To err on the + // side of caution, Go interprets this to mean "zero or more". + // Thus, it is okay to return io.EOF here. + return hdr, err + } + if z.buf[0] != gzipID1 || z.buf[1] != gzipID2 || z.buf[2] != gzipDeflate { + return hdr, ErrHeader + } + flg := z.buf[3] + hdr.ModTime = time.Unix(int64(le.Uint32(z.buf[4:8])), 0) + // z.buf[8] is XFL and is currently ignored. + hdr.OS = z.buf[9] + z.digest = crc32.ChecksumIEEE(z.buf[:10]) + + if flg&flagExtra != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, z.buf[:2]) + data := make([]byte, le.Uint16(z.buf[:2])) + if _, err = io.ReadFull(z.r, data); err != nil { + return hdr, noEOF(err) + } + z.digest = crc32.Update(z.digest, crc32.IEEETable, data) + hdr.Extra = data + } + + var s string + if flg&flagName != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Name = s + } + + if flg&flagComment != 0 { + if s, err = z.readString(); err != nil { + return hdr, err + } + hdr.Comment = s + } + + if flg&flagHdrCrc != 0 { + if _, err = io.ReadFull(z.r, z.buf[:2]); err != nil { + return hdr, noEOF(err) + } + digest := le.Uint16(z.buf[:2]) + if digest != uint16(z.digest) { + return hdr, ErrHeader + } + } + + z.digest = 0 + if z.decompressor == nil { + z.decompressor = flate.NewReader(z.r) + } else { + z.decompressor.(flate.Resetter).Reset(z.r, nil) + } + return hdr, nil +} + +// Read implements io.Reader, reading uncompressed bytes from its underlying Reader. +func (z *Reader) Read(p []byte) (n int, err error) { + if z.err != nil { + return 0, z.err + } + + n, z.err = z.decompressor.Read(p) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p[:n]) + z.size += uint32(n) + if z.err != io.EOF { + // In the normal case we return here. + return n, z.err + } + + // Finished file; check checksum and size. + if _, err := io.ReadFull(z.r, z.buf[:8]); err != nil { + z.err = noEOF(err) + return n, z.err + } + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return n, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return n, io.EOF + } + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + return n, z.err + } + + // Read from next file, if necessary. + if n > 0 { + return n, nil + } + return z.Read(p) +} + +// Support the io.WriteTo interface for io.Copy and friends. +func (z *Reader) WriteTo(w io.Writer) (int64, error) { + total := int64(0) + crcWriter := crc32.NewIEEE() + for { + if z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + + // We write both to output and digest. + mw := io.MultiWriter(w, crcWriter) + n, err := z.decompressor.(io.WriterTo).WriteTo(mw) + total += n + z.size += uint32(n) + if err != nil { + z.err = err + return total, z.err + } + + // Finished file; check checksum + size. + if _, err := io.ReadFull(z.r, z.buf[0:8]); err != nil { + if err == io.EOF { + err = io.ErrUnexpectedEOF + } + z.err = err + return total, err + } + z.digest = crcWriter.Sum32() + digest := le.Uint32(z.buf[:4]) + size := le.Uint32(z.buf[4:8]) + if digest != z.digest || size != z.size { + z.err = ErrChecksum + return total, z.err + } + z.digest, z.size = 0, 0 + + // File is ok; check if there is another. + if !z.multistream { + return total, nil + } + crcWriter.Reset() + z.err = nil // Remove io.EOF + + if _, z.err = z.readHeader(); z.err != nil { + if z.err == io.EOF { + return total, nil + } + return total, z.err + } + } +} + +// Close closes the Reader. It does not close the underlying io.Reader. +// In order for the GZIP checksum to be verified, the reader must be +// fully consumed until the io.EOF. +func (z *Reader) Close() error { return z.decompressor.Close() } diff --git a/vendor/github.com/klauspost/compress/gzip/gzip.go b/vendor/github.com/klauspost/compress/gzip/gzip.go new file mode 100644 index 00000000000..26203851bdf --- /dev/null +++ b/vendor/github.com/klauspost/compress/gzip/gzip.go @@ -0,0 +1,269 @@ +// Copyright 2010 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package gzip + +import ( + "errors" + "fmt" + "hash/crc32" + "io" + + "github.com/klauspost/compress/flate" +) + +// These constants are copied from the flate package, so that code that imports +// "compress/gzip" does not also have to import "compress/flate". +const ( + NoCompression = flate.NoCompression + BestSpeed = flate.BestSpeed + BestCompression = flate.BestCompression + DefaultCompression = flate.DefaultCompression + ConstantCompression = flate.ConstantCompression + HuffmanOnly = flate.HuffmanOnly + + // StatelessCompression will do compression but without maintaining any state + // between Write calls. + // There will be no memory kept between Write calls, + // but compression and speed will be suboptimal. + // Because of this, the size of actual Write calls will affect output size. + StatelessCompression = -3 +) + +// A Writer is an io.WriteCloser. +// Writes to a Writer are compressed and written to w. +type Writer struct { + Header // written at first call to Write, Flush, or Close + w io.Writer + level int + err error + compressor *flate.Writer + digest uint32 // CRC-32, IEEE polynomial (section 8) + size uint32 // Uncompressed size (section 2.3.1) + wroteHeader bool + closed bool + buf [10]byte +} + +// NewWriter returns a new Writer. +// Writes to the returned writer are compressed and written to w. +// +// It is the caller's responsibility to call Close on the WriteCloser when done. +// Writes may be buffered and not flushed until Close. +// +// Callers that wish to set the fields in Writer.Header must do so before +// the first call to Write, Flush, or Close. +func NewWriter(w io.Writer) *Writer { + z, _ := NewWriterLevel(w, DefaultCompression) + return z +} + +// NewWriterLevel is like NewWriter but specifies the compression level instead +// of assuming DefaultCompression. +// +// The compression level can be DefaultCompression, NoCompression, or any +// integer value between BestSpeed and BestCompression inclusive. The error +// returned will be nil if the level is valid. +func NewWriterLevel(w io.Writer, level int) (*Writer, error) { + if level < StatelessCompression || level > BestCompression { + return nil, fmt.Errorf("gzip: invalid compression level: %d", level) + } + z := new(Writer) + z.init(w, level) + return z, nil +} + +func (z *Writer) init(w io.Writer, level int) { + compressor := z.compressor + if level != StatelessCompression { + if compressor != nil { + compressor.Reset(w) + } + } + + *z = Writer{ + Header: Header{ + OS: 255, // unknown + }, + w: w, + level: level, + compressor: compressor, + } +} + +// Reset discards the Writer z's state and makes it equivalent to the +// result of its original state from NewWriter or NewWriterLevel, but +// writing to w instead. This permits reusing a Writer rather than +// allocating a new one. +func (z *Writer) Reset(w io.Writer) { + z.init(w, z.level) +} + +// writeBytes writes a length-prefixed byte slice to z.w. +func (z *Writer) writeBytes(b []byte) error { + if len(b) > 0xffff { + return errors.New("gzip.Write: Extra data is too large") + } + le.PutUint16(z.buf[:2], uint16(len(b))) + _, err := z.w.Write(z.buf[:2]) + if err != nil { + return err + } + _, err = z.w.Write(b) + return err +} + +// writeString writes a UTF-8 string s in GZIP's format to z.w. +// GZIP (RFC 1952) specifies that strings are NUL-terminated ISO 8859-1 (Latin-1). +func (z *Writer) writeString(s string) (err error) { + // GZIP stores Latin-1 strings; error if non-Latin-1; convert if non-ASCII. + needconv := false + for _, v := range s { + if v == 0 || v > 0xff { + return errors.New("gzip.Write: non-Latin-1 header string") + } + if v > 0x7f { + needconv = true + } + } + if needconv { + b := make([]byte, 0, len(s)) + for _, v := range s { + b = append(b, byte(v)) + } + _, err = z.w.Write(b) + } else { + _, err = io.WriteString(z.w, s) + } + if err != nil { + return err + } + // GZIP strings are NUL-terminated. + z.buf[0] = 0 + _, err = z.w.Write(z.buf[:1]) + return err +} + +// Write writes a compressed form of p to the underlying io.Writer. The +// compressed bytes are not necessarily flushed until the Writer is closed. +func (z *Writer) Write(p []byte) (int, error) { + if z.err != nil { + return 0, z.err + } + var n int + // Write the GZIP header lazily. + if !z.wroteHeader { + z.wroteHeader = true + z.buf[0] = gzipID1 + z.buf[1] = gzipID2 + z.buf[2] = gzipDeflate + z.buf[3] = 0 + if z.Extra != nil { + z.buf[3] |= 0x04 + } + if z.Name != "" { + z.buf[3] |= 0x08 + } + if z.Comment != "" { + z.buf[3] |= 0x10 + } + le.PutUint32(z.buf[4:8], uint32(z.ModTime.Unix())) + if z.level == BestCompression { + z.buf[8] = 2 + } else if z.level == BestSpeed { + z.buf[8] = 4 + } else { + z.buf[8] = 0 + } + z.buf[9] = z.OS + n, z.err = z.w.Write(z.buf[:10]) + if z.err != nil { + return n, z.err + } + if z.Extra != nil { + z.err = z.writeBytes(z.Extra) + if z.err != nil { + return n, z.err + } + } + if z.Name != "" { + z.err = z.writeString(z.Name) + if z.err != nil { + return n, z.err + } + } + if z.Comment != "" { + z.err = z.writeString(z.Comment) + if z.err != nil { + return n, z.err + } + } + + if z.compressor == nil && z.level != StatelessCompression { + z.compressor, _ = flate.NewWriter(z.w, z.level) + } + } + z.size += uint32(len(p)) + z.digest = crc32.Update(z.digest, crc32.IEEETable, p) + if z.level == StatelessCompression { + return len(p), flate.StatelessDeflate(z.w, p, false, nil) + } + n, z.err = z.compressor.Write(p) + return n, z.err +} + +// Flush flushes any pending compressed data to the underlying writer. +// +// It is useful mainly in compressed network protocols, to ensure that +// a remote reader has enough data to reconstruct a packet. Flush does +// not return until the data has been written. If the underlying +// writer returns an error, Flush returns that error. +// +// In the terminology of the zlib library, Flush is equivalent to Z_SYNC_FLUSH. +func (z *Writer) Flush() error { + if z.err != nil { + return z.err + } + if z.closed || z.level == StatelessCompression { + return nil + } + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + z.err = z.compressor.Flush() + return z.err +} + +// Close closes the Writer, flushing any unwritten data to the underlying +// io.Writer, but does not close the underlying io.Writer. +func (z *Writer) Close() error { + if z.err != nil { + return z.err + } + if z.closed { + return nil + } + z.closed = true + if !z.wroteHeader { + z.Write(nil) + if z.err != nil { + return z.err + } + } + if z.level == StatelessCompression { + z.err = flate.StatelessDeflate(z.w, nil, true, nil) + } else { + z.err = z.compressor.Close() + } + if z.err != nil { + return z.err + } + le.PutUint32(z.buf[:4], z.digest) + le.PutUint32(z.buf[4:8], z.size) + _, z.err = z.w.Write(z.buf[:8]) + return z.err +} diff --git a/vendor/github.com/klauspost/compress/huff0/.gitignore b/vendor/github.com/klauspost/compress/huff0/.gitignore new file mode 100644 index 00000000000..b3d262958f8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/.gitignore @@ -0,0 +1 @@ +/huff0-fuzz.zip diff --git a/vendor/github.com/klauspost/compress/huff0/README.md b/vendor/github.com/klauspost/compress/huff0/README.md new file mode 100644 index 00000000000..0a8448ce9f9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/README.md @@ -0,0 +1,87 @@ +# Huff0 entropy compression + +This package provides Huff0 encoding and decoding as used in zstd. + +[Huff0](https://github.com/Cyan4973/FiniteStateEntropy#new-generation-entropy-coders), +a Huffman codec designed for modern CPU, featuring OoO (Out of Order) operations on multiple ALU +(Arithmetic Logic Unit), achieving extremely fast compression and decompression speeds. + +This can be used for compressing input with a lot of similar input values to the smallest number of bytes. +This does not perform any multi-byte [dictionary coding](https://en.wikipedia.org/wiki/Dictionary_coder) as LZ coders, +but it can be used as a secondary step to compressors (like Snappy) that does not do entropy encoding. + +* [Godoc documentation](https://godoc.org/github.com/klauspost/compress/huff0) + +THIS PACKAGE IS NOT CONSIDERED STABLE AND API OR ENCODING MAY CHANGE IN THE FUTURE. + +## News + + * Mar 2018: First implementation released. Consider this beta software for now. + +# Usage + +This package provides a low level interface that allows to compress single independent blocks. + +Each block is separate, and there is no built in integrity checks. +This means that the caller should keep track of block sizes and also do checksums if needed. + +Compressing a block is done via the [`Compress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress1X) and +[`Compress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Compress4X) functions. +You must provide input and will receive the output and maybe an error. + +These error values can be returned: + +| Error | Description | +|---------------------|-----------------------------------------------------------------------------| +| `` | Everything ok, output is returned | +| `ErrIncompressible` | Returned when input is judged to be too hard to compress | +| `ErrUseRLE` | Returned from the compressor when the input is a single byte value repeated | +| `ErrTooBig` | Returned if the input block exceeds the maximum allowed size (128 Kib) | +| `(error)` | An internal error occurred. | + + +As can be seen above some of there are errors that will be returned even under normal operation so it is important to handle these. + +To reduce allocations you can provide a [`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object +that can be re-used for successive calls. Both compression and decompression accepts a `Scratch` object, and the same +object can be used for both. + +Be aware, that when re-using a `Scratch` object that the *output* buffer is also re-used, so if you are still using this +you must set the `Out` field in the scratch to nil. The same buffer is used for compression and decompression output. + +The `Scratch` object will retain state that allows to re-use previous tables for encoding and decoding. + +## Tables and re-use + +Huff0 allows for reusing tables from the previous block to save space if that is expected to give better/faster results. + +The Scratch object allows you to set a [`ReusePolicy`](https://godoc.org/github.com/klauspost/compress/huff0#ReusePolicy) +that controls this behaviour. See the documentation for details. This can be altered between each block. + +Do however note that this information is *not* stored in the output block and it is up to the users of the package to +record whether [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable) should be called, +based on the boolean reported back from the CompressXX call. + +If you want to store the table separate from the data, you can access them as `OutData` and `OutTable` on the +[`Scratch`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch) object. + +## Decompressing + +The first part of decoding is to initialize the decoding table through [`ReadTable`](https://godoc.org/github.com/klauspost/compress/huff0#ReadTable). +This will initialize the decoding tables. +You can supply the complete block to `ReadTable` and it will return the data part of the block +which can be given to the decompressor. + +Decompressing is done by calling the [`Decompress1X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress1X) +or [`Decompress4X`](https://godoc.org/github.com/klauspost/compress/huff0#Scratch.Decompress4X) function. + +You must provide the output from the compression stage, at exactly the size you got back. If you receive an error back +your input was likely corrupted. + +It is important to note that a successful decoding does *not* mean your output matches your original input. +There are no integrity checks, so relying on errors from the decompressor does not assure your data is valid. + +# Contributing + +Contributions are always welcome. Be aware that adding public functions will require good justification and breaking +changes will likely not be accepted. If in doubt open an issue before writing the PR. \ No newline at end of file diff --git a/vendor/github.com/klauspost/compress/huff0/bitreader.go b/vendor/github.com/klauspost/compress/huff0/bitreader.go new file mode 100644 index 00000000000..7d0903c7010 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/bitreader.go @@ -0,0 +1,115 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package huff0 + +import ( + "errors" + "io" +) + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReader struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReader) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + b.fill() + b.fill() + b.bitsRead += 8 - uint8(highBit32(uint32(v))) + return nil +} + +// getBits will return n bits. n can be 0. +func (b *bitReader) getBits(n uint8) uint16 { + if n == 0 || b.bitsRead >= 64 { + return 0 + } + return b.getBitsFast(n) +} + +// getBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReader) getBitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return v +} + +// peekBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReader) peekBitsFast(n uint8) uint16 { + const regMask = 64 - 1 + v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + return v +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReader) fillFast() { + if b.bitsRead < 32 { + return + } + // Do single re-slice to avoid bounds checks. + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReader) fill() { + if b.bitsRead < 32 { + return + } + if b.off > 4 { + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value = (b.value << 8) | uint64(b.in[b.off-1]) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReader) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReader) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} diff --git a/vendor/github.com/klauspost/compress/huff0/bitwriter.go b/vendor/github.com/klauspost/compress/huff0/bitwriter.go new file mode 100644 index 00000000000..bda4021efd3 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/bitwriter.go @@ -0,0 +1,197 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package huff0 + +import "fmt" + +// bitWriter will write bits. +// First bit will be LSB of the first byte of output. +type bitWriter struct { + bitContainer uint64 + nBits uint8 + out []byte +} + +// bitMask16 is bitmasks. Has extra to avoid bounds check. +var bitMask16 = [32]uint16{ + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF} /* up to 16 bits */ + +// addBits16NC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16NC(value uint16, bits uint8) { + b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// encSymbol will add up to 16 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) encSymbol(ct cTable, symbol byte) { + enc := ct[symbol] + b.bitContainer |= uint64(enc.val) << (b.nBits & 63) + b.nBits += enc.nBits +} + +// encTwoSymbols will add up to 32 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) encTwoSymbols(ct cTable, av, bv byte) { + encA := ct[av] + encB := ct[bv] + sh := b.nBits & 63 + combined := uint64(encA.val) | (uint64(encB.val) << (encA.nBits & 63)) + b.bitContainer |= combined << sh + b.nBits += encA.nBits + encB.nBits +} + +// addBits16ZeroNC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +// This is fastest if bits can be zero. +func (b *bitWriter) addBits16ZeroNC(value uint16, bits uint8) { + if bits == 0 { + return + } + value <<= (16 - bits) & 15 + value >>= (16 - bits) & 15 + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// flush will flush all pending full bytes. +// There will be at least 56 bits available for writing when this has been called. +// Using flush32 is faster, but leaves less space for writing. +func (b *bitWriter) flush() { + v := b.nBits >> 3 + switch v { + case 0: + return + case 1: + b.out = append(b.out, + byte(b.bitContainer), + ) + b.bitContainer >>= 1 << 3 + case 2: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + ) + b.bitContainer >>= 2 << 3 + case 3: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + ) + b.bitContainer >>= 3 << 3 + case 4: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + ) + b.bitContainer >>= 4 << 3 + case 5: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + ) + b.bitContainer >>= 5 << 3 + case 6: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + ) + b.bitContainer >>= 6 << 3 + case 7: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + ) + b.bitContainer >>= 7 << 3 + case 8: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + byte(b.bitContainer>>56), + ) + b.bitContainer = 0 + b.nBits = 0 + return + default: + panic(fmt.Errorf("bits (%d) > 64", b.nBits)) + } + b.nBits &= 7 +} + +// flush32 will flush out, so there are at least 32 bits available for writing. +func (b *bitWriter) flush32() { + if b.nBits < 32 { + return + } + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24)) + b.nBits -= 32 + b.bitContainer >>= 32 +} + +// flushAlign will flush remaining full bytes and align to next byte boundary. +func (b *bitWriter) flushAlign() { + nbBytes := (b.nBits + 7) >> 3 + for i := uint8(0); i < nbBytes; i++ { + b.out = append(b.out, byte(b.bitContainer>>(i*8))) + } + b.nBits = 0 + b.bitContainer = 0 +} + +// close will write the alignment bit and write the final byte(s) +// to the output. +func (b *bitWriter) close() error { + // End mark + b.addBits16Clean(1, 1) + // flush until next byte. + b.flushAlign() + return nil +} + +// reset and continue writing by appending to out. +func (b *bitWriter) reset(out []byte) { + b.bitContainer = 0 + b.nBits = 0 + b.out = out +} diff --git a/vendor/github.com/klauspost/compress/huff0/bytereader.go b/vendor/github.com/klauspost/compress/huff0/bytereader.go new file mode 100644 index 00000000000..50bcdf6ea99 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/bytereader.go @@ -0,0 +1,54 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package huff0 + +// byteReader provides a byte reader that reads +// little endian values from a byte stream. +// The input stream is manually advanced. +// The reader performs no bounds checks. +type byteReader struct { + b []byte + off int +} + +// init will initialize the reader and set the input. +func (b *byteReader) init(in []byte) { + b.b = in + b.off = 0 +} + +// advance the stream b n bytes. +func (b *byteReader) advance(n uint) { + b.off += int(n) +} + +// Int32 returns a little endian int32 starting at current offset. +func (b byteReader) Int32() int32 { + v3 := int32(b.b[b.off+3]) + v2 := int32(b.b[b.off+2]) + v1 := int32(b.b[b.off+1]) + v0 := int32(b.b[b.off]) + return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 +} + +// Uint32 returns a little endian uint32 starting at current offset. +func (b byteReader) Uint32() uint32 { + v3 := uint32(b.b[b.off+3]) + v2 := uint32(b.b[b.off+2]) + v1 := uint32(b.b[b.off+1]) + v0 := uint32(b.b[b.off]) + return (v3 << 24) | (v2 << 16) | (v1 << 8) | v0 +} + +// unread returns the unread portion of the input. +func (b byteReader) unread() []byte { + return b.b[b.off:] +} + +// remain will return the number of bytes remaining. +func (b byteReader) remain() int { + return len(b.b) - b.off +} diff --git a/vendor/github.com/klauspost/compress/huff0/compress.go b/vendor/github.com/klauspost/compress/huff0/compress.go new file mode 100644 index 00000000000..0843cb014ff --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/compress.go @@ -0,0 +1,651 @@ +package huff0 + +import ( + "fmt" + "runtime" + "sync" +) + +// Compress1X will compress the input. +// The output can be decoded using Decompress1X. +// Supply a Scratch object. The scratch object contains state about re-use, +// So when sharing across independent encodes, be sure to set the re-use policy. +func Compress1X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { + s, err = s.prepare(in) + if err != nil { + return nil, false, err + } + return compress(in, s, s.compress1X) +} + +// Compress4X will compress the input. The input is split into 4 independent blocks +// and compressed similar to Compress1X. +// The output can be decoded using Decompress4X. +// Supply a Scratch object. The scratch object contains state about re-use, +// So when sharing across independent encodes, be sure to set the re-use policy. +func Compress4X(in []byte, s *Scratch) (out []byte, reUsed bool, err error) { + s, err = s.prepare(in) + if err != nil { + return nil, false, err + } + if false { + // TODO: compress4Xp only slightly faster. + const parallelThreshold = 8 << 10 + if len(in) < parallelThreshold || runtime.GOMAXPROCS(0) == 1 { + return compress(in, s, s.compress4X) + } + return compress(in, s, s.compress4Xp) + } + return compress(in, s, s.compress4X) +} + +func compress(in []byte, s *Scratch, compressor func(src []byte) ([]byte, error)) (out []byte, reUsed bool, err error) { + // Nuke previous table if we cannot reuse anyway. + if s.Reuse == ReusePolicyNone { + s.prevTable = s.prevTable[:0] + } + + // Create histogram, if none was provided. + maxCount := s.maxCount + var canReuse = false + if maxCount == 0 { + maxCount, canReuse = s.countSimple(in) + } else { + canReuse = s.canUseTable(s.prevTable) + } + + // We want the output size to be less than this: + wantSize := len(in) + if s.WantLogLess > 0 { + wantSize -= wantSize >> s.WantLogLess + } + + // Reset for next run. + s.clearCount = true + s.maxCount = 0 + if maxCount >= len(in) { + if maxCount > len(in) { + return nil, false, fmt.Errorf("maxCount (%d) > length (%d)", maxCount, len(in)) + } + if len(in) == 1 { + return nil, false, ErrIncompressible + } + // One symbol, use RLE + return nil, false, ErrUseRLE + } + if maxCount == 1 || maxCount < (len(in)>>7) { + // Each symbol present maximum once or too well distributed. + return nil, false, ErrIncompressible + } + + if s.Reuse == ReusePolicyPrefer && canReuse { + keepTable := s.cTable + keepTL := s.actualTableLog + s.cTable = s.prevTable + s.actualTableLog = s.prevTableLog + s.Out, err = compressor(in) + s.cTable = keepTable + s.actualTableLog = keepTL + if err == nil && len(s.Out) < wantSize { + s.OutData = s.Out + return s.Out, true, nil + } + // Do not attempt to re-use later. + s.prevTable = s.prevTable[:0] + } + + // Calculate new table. + err = s.buildCTable() + if err != nil { + return nil, false, err + } + + if false && !s.canUseTable(s.cTable) { + panic("invalid table generated") + } + + if s.Reuse == ReusePolicyAllow && canReuse { + hSize := len(s.Out) + oldSize := s.prevTable.estimateSize(s.count[:s.symbolLen]) + newSize := s.cTable.estimateSize(s.count[:s.symbolLen]) + if oldSize <= hSize+newSize || hSize+12 >= wantSize { + // Retain cTable even if we re-use. + keepTable := s.cTable + keepTL := s.actualTableLog + + s.cTable = s.prevTable + s.actualTableLog = s.prevTableLog + s.Out, err = compressor(in) + + // Restore ctable. + s.cTable = keepTable + s.actualTableLog = keepTL + if err != nil { + return nil, false, err + } + if len(s.Out) >= wantSize { + return nil, false, ErrIncompressible + } + s.OutData = s.Out + return s.Out, true, nil + } + } + + // Use new table + err = s.cTable.write(s) + if err != nil { + s.OutTable = nil + return nil, false, err + } + s.OutTable = s.Out + + // Compress using new table + s.Out, err = compressor(in) + if err != nil { + s.OutTable = nil + return nil, false, err + } + if len(s.Out) >= wantSize { + s.OutTable = nil + return nil, false, ErrIncompressible + } + // Move current table into previous. + s.prevTable, s.prevTableLog, s.cTable = s.cTable, s.actualTableLog, s.prevTable[:0] + s.OutData = s.Out[len(s.OutTable):] + return s.Out, false, nil +} + +func (s *Scratch) compress1X(src []byte) ([]byte, error) { + return s.compress1xDo(s.Out, src) +} + +func (s *Scratch) compress1xDo(dst, src []byte) ([]byte, error) { + var bw = bitWriter{out: dst} + + // N is length divisible by 4. + n := len(src) + n -= n & 3 + cTable := s.cTable[:256] + + // Encode last bytes. + for i := len(src) & 3; i > 0; i-- { + bw.encSymbol(cTable, src[n+i-1]) + } + n -= 4 + if s.actualTableLog <= 8 { + for ; n >= 0; n -= 4 { + tmp := src[n : n+4] + // tmp should be len 4 + bw.flush32() + bw.encTwoSymbols(cTable, tmp[3], tmp[2]) + bw.encTwoSymbols(cTable, tmp[1], tmp[0]) + } + } else { + for ; n >= 0; n -= 4 { + tmp := src[n : n+4] + // tmp should be len 4 + bw.flush32() + bw.encTwoSymbols(cTable, tmp[3], tmp[2]) + bw.flush32() + bw.encTwoSymbols(cTable, tmp[1], tmp[0]) + } + } + err := bw.close() + return bw.out, err +} + +var sixZeros [6]byte + +func (s *Scratch) compress4X(src []byte) ([]byte, error) { + if len(src) < 12 { + return nil, ErrIncompressible + } + segmentSize := (len(src) + 3) / 4 + + // Add placeholder for output length + offsetIdx := len(s.Out) + s.Out = append(s.Out, sixZeros[:]...) + + for i := 0; i < 4; i++ { + toDo := src + if len(toDo) > segmentSize { + toDo = toDo[:segmentSize] + } + src = src[len(toDo):] + + var err error + idx := len(s.Out) + s.Out, err = s.compress1xDo(s.Out, toDo) + if err != nil { + return nil, err + } + // Write compressed length as little endian before block. + if i < 3 { + // Last length is not written. + length := len(s.Out) - idx + s.Out[i*2+offsetIdx] = byte(length) + s.Out[i*2+offsetIdx+1] = byte(length >> 8) + } + } + + return s.Out, nil +} + +// compress4Xp will compress 4 streams using separate goroutines. +func (s *Scratch) compress4Xp(src []byte) ([]byte, error) { + if len(src) < 12 { + return nil, ErrIncompressible + } + // Add placeholder for output length + s.Out = s.Out[:6] + + segmentSize := (len(src) + 3) / 4 + var wg sync.WaitGroup + var errs [4]error + wg.Add(4) + for i := 0; i < 4; i++ { + toDo := src + if len(toDo) > segmentSize { + toDo = toDo[:segmentSize] + } + src = src[len(toDo):] + + // Separate goroutine for each block. + go func(i int) { + s.tmpOut[i], errs[i] = s.compress1xDo(s.tmpOut[i][:0], toDo) + wg.Done() + }(i) + } + wg.Wait() + for i := 0; i < 4; i++ { + if errs[i] != nil { + return nil, errs[i] + } + o := s.tmpOut[i] + // Write compressed length as little endian before block. + if i < 3 { + // Last length is not written. + s.Out[i*2] = byte(len(o)) + s.Out[i*2+1] = byte(len(o) >> 8) + } + + // Write output. + s.Out = append(s.Out, o...) + } + return s.Out, nil +} + +// countSimple will create a simple histogram in s.count. +// Returns the biggest count. +// Does not update s.clearCount. +func (s *Scratch) countSimple(in []byte) (max int, reuse bool) { + reuse = true + for _, v := range in { + s.count[v]++ + } + m := uint32(0) + if len(s.prevTable) > 0 { + for i, v := range s.count[:] { + if v > m { + m = v + } + if v > 0 { + s.symbolLen = uint16(i) + 1 + if i >= len(s.prevTable) { + reuse = false + } else { + if s.prevTable[i].nBits == 0 { + reuse = false + } + } + } + } + return int(m), reuse + } + for i, v := range s.count[:] { + if v > m { + m = v + } + if v > 0 { + s.symbolLen = uint16(i) + 1 + } + } + return int(m), false +} + +func (s *Scratch) canUseTable(c cTable) bool { + if len(c) < int(s.symbolLen) { + return false + } + for i, v := range s.count[:s.symbolLen] { + if v != 0 && c[i].nBits == 0 { + return false + } + } + return true +} + +func (s *Scratch) validateTable(c cTable) bool { + if len(c) < int(s.symbolLen) { + return false + } + for i, v := range s.count[:s.symbolLen] { + if v != 0 { + if c[i].nBits == 0 { + return false + } + if c[i].nBits > s.actualTableLog { + return false + } + } + } + return true +} + +// minTableLog provides the minimum logSize to safely represent a distribution. +func (s *Scratch) minTableLog() uint8 { + minBitsSrc := highBit32(uint32(s.br.remain())) + 1 + minBitsSymbols := highBit32(uint32(s.symbolLen-1)) + 2 + if minBitsSrc < minBitsSymbols { + return uint8(minBitsSrc) + } + return uint8(minBitsSymbols) +} + +// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog +func (s *Scratch) optimalTableLog() { + tableLog := s.TableLog + minBits := s.minTableLog() + maxBitsSrc := uint8(highBit32(uint32(s.br.remain()-1))) - 1 + if maxBitsSrc < tableLog { + // Accuracy can be reduced + tableLog = maxBitsSrc + } + if minBits > tableLog { + tableLog = minBits + } + // Need a minimum to safely represent all symbol values + if tableLog < minTablelog { + tableLog = minTablelog + } + if tableLog > tableLogMax { + tableLog = tableLogMax + } + s.actualTableLog = tableLog +} + +type cTableEntry struct { + val uint16 + nBits uint8 + // We have 8 bits extra +} + +const huffNodesMask = huffNodesLen - 1 + +func (s *Scratch) buildCTable() error { + s.optimalTableLog() + s.huffSort() + if cap(s.cTable) < maxSymbolValue+1 { + s.cTable = make([]cTableEntry, s.symbolLen, maxSymbolValue+1) + } else { + s.cTable = s.cTable[:s.symbolLen] + for i := range s.cTable { + s.cTable[i] = cTableEntry{} + } + } + + var startNode = int16(s.symbolLen) + nonNullRank := s.symbolLen - 1 + + nodeNb := int16(startNode) + huffNode := s.nodes[1 : huffNodesLen+1] + + // This overlays the slice above, but allows "-1" index lookups. + // Different from reference implementation. + huffNode0 := s.nodes[0 : huffNodesLen+1] + + for huffNode[nonNullRank].count == 0 { + nonNullRank-- + } + + lowS := int16(nonNullRank) + nodeRoot := nodeNb + lowS - 1 + lowN := nodeNb + huffNode[nodeNb].count = huffNode[lowS].count + huffNode[lowS-1].count + huffNode[lowS].parent, huffNode[lowS-1].parent = uint16(nodeNb), uint16(nodeNb) + nodeNb++ + lowS -= 2 + for n := nodeNb; n <= nodeRoot; n++ { + huffNode[n].count = 1 << 30 + } + // fake entry, strong barrier + huffNode0[0].count = 1 << 31 + + // create parents + for nodeNb <= nodeRoot { + var n1, n2 int16 + if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + n1 = lowS + lowS-- + } else { + n1 = lowN + lowN++ + } + if huffNode0[lowS+1].count < huffNode0[lowN+1].count { + n2 = lowS + lowS-- + } else { + n2 = lowN + lowN++ + } + + huffNode[nodeNb].count = huffNode0[n1+1].count + huffNode0[n2+1].count + huffNode0[n1+1].parent, huffNode0[n2+1].parent = uint16(nodeNb), uint16(nodeNb) + nodeNb++ + } + + // distribute weights (unlimited tree height) + huffNode[nodeRoot].nbBits = 0 + for n := nodeRoot - 1; n >= startNode; n-- { + huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + } + for n := uint16(0); n <= nonNullRank; n++ { + huffNode[n].nbBits = huffNode[huffNode[n].parent].nbBits + 1 + } + s.actualTableLog = s.setMaxHeight(int(nonNullRank)) + maxNbBits := s.actualTableLog + + // fill result into tree (val, nbBits) + if maxNbBits > tableLogMax { + return fmt.Errorf("internal error: maxNbBits (%d) > tableLogMax (%d)", maxNbBits, tableLogMax) + } + var nbPerRank [tableLogMax + 1]uint16 + var valPerRank [16]uint16 + for _, v := range huffNode[:nonNullRank+1] { + nbPerRank[v.nbBits]++ + } + // determine stating value per rank + { + min := uint16(0) + for n := maxNbBits; n > 0; n-- { + // get starting value within each rank + valPerRank[n] = min + min += nbPerRank[n] + min >>= 1 + } + } + + // push nbBits per symbol, symbol order + for _, v := range huffNode[:nonNullRank+1] { + s.cTable[v.symbol].nBits = v.nbBits + } + + // assign value within rank, symbol order + t := s.cTable[:s.symbolLen] + for n, val := range t { + nbits := val.nBits & 15 + v := valPerRank[nbits] + t[n].val = v + valPerRank[nbits] = v + 1 + } + + return nil +} + +// huffSort will sort symbols, decreasing order. +func (s *Scratch) huffSort() { + type rankPos struct { + base uint32 + current uint32 + } + + // Clear nodes + nodes := s.nodes[:huffNodesLen+1] + s.nodes = nodes + nodes = nodes[1 : huffNodesLen+1] + + // Sort into buckets based on length of symbol count. + var rank [32]rankPos + for _, v := range s.count[:s.symbolLen] { + r := highBit32(v+1) & 31 + rank[r].base++ + } + // maxBitLength is log2(BlockSizeMax) + 1 + const maxBitLength = 18 + 1 + for n := maxBitLength; n > 0; n-- { + rank[n-1].base += rank[n].base + } + for n := range rank[:maxBitLength] { + rank[n].current = rank[n].base + } + for n, c := range s.count[:s.symbolLen] { + r := (highBit32(c+1) + 1) & 31 + pos := rank[r].current + rank[r].current++ + prev := nodes[(pos-1)&huffNodesMask] + for pos > rank[r].base && c > prev.count { + nodes[pos&huffNodesMask] = prev + pos-- + prev = nodes[(pos-1)&huffNodesMask] + } + nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)} + } + return +} + +func (s *Scratch) setMaxHeight(lastNonNull int) uint8 { + maxNbBits := s.actualTableLog + huffNode := s.nodes[1 : huffNodesLen+1] + //huffNode = huffNode[: huffNodesLen] + + largestBits := huffNode[lastNonNull].nbBits + + // early exit : no elt > maxNbBits + if largestBits <= maxNbBits { + return largestBits + } + totalCost := int(0) + baseCost := int(1) << (largestBits - maxNbBits) + n := uint32(lastNonNull) + + for huffNode[n].nbBits > maxNbBits { + totalCost += baseCost - (1 << (largestBits - huffNode[n].nbBits)) + huffNode[n].nbBits = maxNbBits + n-- + } + // n stops at huffNode[n].nbBits <= maxNbBits + + for huffNode[n].nbBits == maxNbBits { + n-- + } + // n end at index of smallest symbol using < maxNbBits + + // renorm totalCost + totalCost >>= largestBits - maxNbBits /* note : totalCost is necessarily a multiple of baseCost */ + + // repay normalized cost + { + const noSymbol = 0xF0F0F0F0 + var rankLast [tableLogMax + 2]uint32 + + for i := range rankLast[:] { + rankLast[i] = noSymbol + } + + // Get pos of last (smallest) symbol per rank + { + currentNbBits := uint8(maxNbBits) + for pos := int(n); pos >= 0; pos-- { + if huffNode[pos].nbBits >= currentNbBits { + continue + } + currentNbBits = huffNode[pos].nbBits // < maxNbBits + rankLast[maxNbBits-currentNbBits] = uint32(pos) + } + } + + for totalCost > 0 { + nBitsToDecrease := uint8(highBit32(uint32(totalCost))) + 1 + + for ; nBitsToDecrease > 1; nBitsToDecrease-- { + highPos := rankLast[nBitsToDecrease] + lowPos := rankLast[nBitsToDecrease-1] + if highPos == noSymbol { + continue + } + if lowPos == noSymbol { + break + } + highTotal := huffNode[highPos].count + lowTotal := 2 * huffNode[lowPos].count + if highTotal <= lowTotal { + break + } + } + // only triggered when no more rank 1 symbol left => find closest one (note : there is necessarily at least one !) + // HUF_MAX_TABLELOG test just to please gcc 5+; but it should not be necessary + // FIXME: try to remove + for (nBitsToDecrease <= tableLogMax) && (rankLast[nBitsToDecrease] == noSymbol) { + nBitsToDecrease++ + } + totalCost -= 1 << (nBitsToDecrease - 1) + if rankLast[nBitsToDecrease-1] == noSymbol { + // this rank is no longer empty + rankLast[nBitsToDecrease-1] = rankLast[nBitsToDecrease] + } + huffNode[rankLast[nBitsToDecrease]].nbBits++ + if rankLast[nBitsToDecrease] == 0 { + /* special case, reached largest symbol */ + rankLast[nBitsToDecrease] = noSymbol + } else { + rankLast[nBitsToDecrease]-- + if huffNode[rankLast[nBitsToDecrease]].nbBits != maxNbBits-nBitsToDecrease { + rankLast[nBitsToDecrease] = noSymbol /* this rank is now empty */ + } + } + } + + for totalCost < 0 { /* Sometimes, cost correction overshoot */ + if rankLast[1] == noSymbol { /* special case : no rank 1 symbol (using maxNbBits-1); let's create one from largest rank 0 (using maxNbBits) */ + for huffNode[n].nbBits == maxNbBits { + n-- + } + huffNode[n+1].nbBits-- + rankLast[1] = n + 1 + totalCost++ + continue + } + huffNode[rankLast[1]+1].nbBits-- + rankLast[1]++ + totalCost++ + } + } + return maxNbBits +} + +type nodeElt struct { + count uint32 + parent uint16 + symbol byte + nbBits uint8 +} diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go new file mode 100644 index 00000000000..97ae66a4ac7 --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/decompress.go @@ -0,0 +1,472 @@ +package huff0 + +import ( + "errors" + "fmt" + "io" + + "github.com/klauspost/compress/fse" +) + +type dTable struct { + single []dEntrySingle + double []dEntryDouble +} + +// single-symbols decoding +type dEntrySingle struct { + entry uint16 +} + +// double-symbols decoding +type dEntryDouble struct { + seq uint16 + nBits uint8 + len uint8 +} + +// ReadTable will read a table from the input. +// The size of the input may be larger than the table definition. +// Any content remaining after the table definition will be returned. +// If no Scratch is provided a new one is allocated. +// The returned Scratch can be used for decoding input using this table. +func ReadTable(in []byte, s *Scratch) (s2 *Scratch, remain []byte, err error) { + s, err = s.prepare(in) + if err != nil { + return s, nil, err + } + if len(in) <= 1 { + return s, nil, errors.New("input too small for table") + } + iSize := in[0] + in = in[1:] + if iSize >= 128 { + // Uncompressed + oSize := iSize - 127 + iSize = (oSize + 1) / 2 + if int(iSize) > len(in) { + return s, nil, errors.New("input too small for table") + } + for n := uint8(0); n < oSize; n += 2 { + v := in[n/2] + s.huffWeight[n] = v >> 4 + s.huffWeight[n+1] = v & 15 + } + s.symbolLen = uint16(oSize) + in = in[iSize:] + } else { + if len(in) <= int(iSize) { + return s, nil, errors.New("input too small for table") + } + // FSE compressed weights + s.fse.DecompressLimit = 255 + hw := s.huffWeight[:] + s.fse.Out = hw + b, err := fse.Decompress(in[:iSize], s.fse) + s.fse.Out = nil + if err != nil { + return s, nil, err + } + if len(b) > 255 { + return s, nil, errors.New("corrupt input: output table too large") + } + s.symbolLen = uint16(len(b)) + in = in[iSize:] + } + + // collect weight stats + var rankStats [16]uint32 + weightTotal := uint32(0) + for _, v := range s.huffWeight[:s.symbolLen] { + if v > tableLogMax { + return s, nil, errors.New("corrupt input: weight too large") + } + v2 := v & 15 + rankStats[v2]++ + weightTotal += (1 << v2) >> 1 + } + if weightTotal == 0 { + return s, nil, errors.New("corrupt input: weights zero") + } + + // get last non-null symbol weight (implied, total must be 2^n) + { + tableLog := highBit32(weightTotal) + 1 + if tableLog > tableLogMax { + return s, nil, errors.New("corrupt input: tableLog too big") + } + s.actualTableLog = uint8(tableLog) + // determine last weight + { + total := uint32(1) << tableLog + rest := total - weightTotal + verif := uint32(1) << highBit32(rest) + lastWeight := highBit32(rest) + 1 + if verif != rest { + // last value must be a clean power of 2 + return s, nil, errors.New("corrupt input: last value not power of two") + } + s.huffWeight[s.symbolLen] = uint8(lastWeight) + s.symbolLen++ + rankStats[lastWeight]++ + } + } + + if (rankStats[1] < 2) || (rankStats[1]&1 != 0) { + // by construction : at least 2 elts of rank 1, must be even + return s, nil, errors.New("corrupt input: min elt size, even check failed ") + } + + // TODO: Choose between single/double symbol decoding + + // Calculate starting value for each rank + { + var nextRankStart uint32 + for n := uint8(1); n < s.actualTableLog+1; n++ { + current := nextRankStart + nextRankStart += rankStats[n] << (n - 1) + rankStats[n] = current + } + } + + // fill DTable (always full size) + tSize := 1 << tableLogMax + if len(s.dt.single) != tSize { + s.dt.single = make([]dEntrySingle, tSize) + } + for n, w := range s.huffWeight[:s.symbolLen] { + if w == 0 { + continue + } + length := (uint32(1) << w) >> 1 + d := dEntrySingle{ + entry: uint16(s.actualTableLog+1-w) | (uint16(n) << 8), + } + single := s.dt.single[rankStats[w] : rankStats[w]+length] + for i := range single { + single[i] = d + } + rankStats[w] += length + } + return s, in, nil +} + +// Decompress1X will decompress a 1X encoded stream. +// The length of the supplied input must match the end of a block exactly. +// Before this is called, the table must be initialized with ReadTable unless +// the encoder re-used the table. +func (s *Scratch) Decompress1X(in []byte) (out []byte, err error) { + if len(s.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + var br bitReader + err = br.init(in) + if err != nil { + return nil, err + } + s.Out = s.Out[:0] + + decode := func() byte { + val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ + v := s.dt.single[val] + br.bitsRead += uint8(v.entry) + return uint8(v.entry >> 8) + } + hasDec := func(v dEntrySingle) byte { + br.bitsRead += uint8(v.entry) + return uint8(v.entry >> 8) + } + + // Avoid bounds check by always having full sized table. + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + dt := s.dt.single[:tlSize] + + // Use temp table to avoid bound checks/append penalty. + var tmp = s.huffWeight[:256] + var off uint8 + + for br.off >= 8 { + br.fillFast() + tmp[off+0] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + tmp[off+1] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + br.fillFast() + tmp[off+2] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + tmp[off+3] = hasDec(dt[br.peekBitsFast(s.actualTableLog)&tlMask]) + off += 4 + if off == 0 { + if len(s.Out)+256 > s.MaxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + s.Out = append(s.Out, tmp...) + } + } + + if len(s.Out)+int(off) > s.MaxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + s.Out = append(s.Out, tmp[:off]...) + + for !br.finished() { + br.fill() + if len(s.Out) >= s.MaxDecodedSize { + br.close() + return nil, ErrMaxDecodedSizeExceeded + } + s.Out = append(s.Out, decode()) + } + return s.Out, br.close() +} + +// Decompress4X will decompress a 4X encoded stream. +// Before this is called, the table must be initialized with ReadTable unless +// the encoder re-used the table. +// The length of the supplied input must match the end of a block exactly. +// The destination size of the uncompressed data must be known and provided. +func (s *Scratch) Decompress4X(in []byte, dstSize int) (out []byte, err error) { + if len(s.dt.single) == 0 { + return nil, errors.New("no table loaded") + } + if len(in) < 6+(4*1) { + return nil, errors.New("input too small") + } + if dstSize > s.MaxDecodedSize { + return nil, ErrMaxDecodedSizeExceeded + } + // TODO: We do not detect when we overrun a buffer, except if the last one does. + + var br [4]bitReader + start := 6 + for i := 0; i < 3; i++ { + length := int(in[i*2]) | (int(in[i*2+1]) << 8) + if start+length >= len(in) { + return nil, errors.New("truncated input (or invalid offset)") + } + err = br[i].init(in[start : start+length]) + if err != nil { + return nil, err + } + start += length + } + err = br[3].init(in[start:]) + if err != nil { + return nil, err + } + + // Prepare output + if cap(s.Out) < dstSize { + s.Out = make([]byte, 0, dstSize) + } + s.Out = s.Out[:dstSize] + // destination, offset to match first output + dstOut := s.Out + dstEvery := (dstSize + 3) / 4 + + const tlSize = 1 << tableLogMax + const tlMask = tlSize - 1 + single := s.dt.single[:tlSize] + + decode := func(br *bitReader) byte { + val := br.peekBitsFast(s.actualTableLog) /* note : actualTableLog >= 1 */ + v := single[val&tlMask] + br.bitsRead += uint8(v.entry) + return uint8(v.entry >> 8) + } + + // Use temp table to avoid bound checks/append penalty. + var tmp = s.huffWeight[:256] + var off uint8 + var decoded int + + // Decode 2 values from each decoder/loop. + const bufoff = 256 / 4 +bigloop: + for { + for i := range br { + br := &br[i] + if br.off < 4 { + break bigloop + } + br.fillFast() + } + + { + const stream = 0 + val := br[stream].peekBitsFast(s.actualTableLog) + v := single[val&tlMask] + br[stream].bitsRead += uint8(v.entry) + + val2 := br[stream].peekBitsFast(s.actualTableLog) + v2 := single[val2&tlMask] + tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) + tmp[off+bufoff*stream] = uint8(v.entry >> 8) + br[stream].bitsRead += uint8(v2.entry) + } + + { + const stream = 1 + val := br[stream].peekBitsFast(s.actualTableLog) + v := single[val&tlMask] + br[stream].bitsRead += uint8(v.entry) + + val2 := br[stream].peekBitsFast(s.actualTableLog) + v2 := single[val2&tlMask] + tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) + tmp[off+bufoff*stream] = uint8(v.entry >> 8) + br[stream].bitsRead += uint8(v2.entry) + } + + { + const stream = 2 + val := br[stream].peekBitsFast(s.actualTableLog) + v := single[val&tlMask] + br[stream].bitsRead += uint8(v.entry) + + val2 := br[stream].peekBitsFast(s.actualTableLog) + v2 := single[val2&tlMask] + tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) + tmp[off+bufoff*stream] = uint8(v.entry >> 8) + br[stream].bitsRead += uint8(v2.entry) + } + + { + const stream = 3 + val := br[stream].peekBitsFast(s.actualTableLog) + v := single[val&tlMask] + br[stream].bitsRead += uint8(v.entry) + + val2 := br[stream].peekBitsFast(s.actualTableLog) + v2 := single[val2&tlMask] + tmp[off+bufoff*stream+1] = uint8(v2.entry >> 8) + tmp[off+bufoff*stream] = uint8(v.entry >> 8) + br[stream].bitsRead += uint8(v2.entry) + } + + off += 2 + + if off == bufoff { + if bufoff > dstEvery { + return nil, errors.New("corruption detected: stream overrun 1") + } + copy(dstOut, tmp[:bufoff]) + copy(dstOut[dstEvery:], tmp[bufoff:bufoff*2]) + copy(dstOut[dstEvery*2:], tmp[bufoff*2:bufoff*3]) + copy(dstOut[dstEvery*3:], tmp[bufoff*3:bufoff*4]) + off = 0 + dstOut = dstOut[bufoff:] + decoded += 256 + // There must at least be 3 buffers left. + if len(dstOut) < dstEvery*3 { + return nil, errors.New("corruption detected: stream overrun 2") + } + } + } + if off > 0 { + ioff := int(off) + if len(dstOut) < dstEvery*3+ioff { + return nil, errors.New("corruption detected: stream overrun 3") + } + copy(dstOut, tmp[:off]) + copy(dstOut[dstEvery:dstEvery+ioff], tmp[bufoff:bufoff*2]) + copy(dstOut[dstEvery*2:dstEvery*2+ioff], tmp[bufoff*2:bufoff*3]) + copy(dstOut[dstEvery*3:dstEvery*3+ioff], tmp[bufoff*3:bufoff*4]) + decoded += int(off) * 4 + dstOut = dstOut[off:] + } + + // Decode remaining. + for i := range br { + offset := dstEvery * i + br := &br[i] + for !br.finished() { + br.fill() + if offset >= len(dstOut) { + return nil, errors.New("corruption detected: stream overrun 4") + } + dstOut[offset] = decode(br) + offset++ + } + decoded += offset - dstEvery*i + err = br.close() + if err != nil { + return nil, err + } + } + if dstSize != decoded { + return nil, errors.New("corruption detected: short output block") + } + return s.Out, nil +} + +// matches will compare a decoding table to a coding table. +// Errors are written to the writer. +// Nothing will be written if table is ok. +func (s *Scratch) matches(ct cTable, w io.Writer) { + if s == nil || len(s.dt.single) == 0 { + return + } + dt := s.dt.single[:1<>8) == byte(sym) { + fmt.Fprintf(w, "symbol %x has decoder, but no encoder\n", sym) + errs++ + break + } + } + if errs == 0 { + broken-- + } + continue + } + // Unused bits in input + ub := tablelog - enc.nBits + top := enc.val << ub + // decoder looks at top bits. + dec := dt[top] + if uint8(dec.entry) != enc.nBits { + fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", sym, enc.nBits, uint8(dec.entry)) + errs++ + } + if uint8(dec.entry>>8) != uint8(sym) { + fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", sym, sym, uint8(dec.entry>>8)) + errs++ + } + if errs > 0 { + fmt.Fprintf(w, "%d errros in base, stopping\n", errs) + continue + } + // Ensure that all combinations are covered. + for i := uint16(0); i < (1 << ub); i++ { + vval := top | i + dec := dt[vval] + if uint8(dec.entry) != enc.nBits { + fmt.Fprintf(w, "symbol 0x%x bit size mismatch (enc: %d, dec:%d).\n", vval, enc.nBits, uint8(dec.entry)) + errs++ + } + if uint8(dec.entry>>8) != uint8(sym) { + fmt.Fprintf(w, "symbol 0x%x decoder output mismatch (enc: %d, dec:%d).\n", vval, sym, uint8(dec.entry>>8)) + errs++ + } + if errs > 20 { + fmt.Fprintf(w, "%d errros, stopping\n", errs) + break + } + } + if errs == 0 { + ok++ + broken-- + } + } + if broken > 0 { + fmt.Fprintf(w, "%d broken, %d ok\n", broken, ok) + } +} diff --git a/vendor/github.com/klauspost/compress/huff0/huff0.go b/vendor/github.com/klauspost/compress/huff0/huff0.go new file mode 100644 index 00000000000..177d6c4ea0e --- /dev/null +++ b/vendor/github.com/klauspost/compress/huff0/huff0.go @@ -0,0 +1,260 @@ +// Package huff0 provides fast huffman encoding as used in zstd. +// +// See README.md at https://github.com/klauspost/compress/tree/master/huff0 for details. +package huff0 + +import ( + "errors" + "fmt" + "math" + "math/bits" + + "github.com/klauspost/compress/fse" +) + +const ( + maxSymbolValue = 255 + + // zstandard limits tablelog to 11, see: + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#huffman-tree-description + tableLogMax = 11 + tableLogDefault = 11 + minTablelog = 5 + huffNodesLen = 512 + + // BlockSizeMax is maximum input size for a single block uncompressed. + BlockSizeMax = 1<<18 - 1 +) + +var ( + // ErrIncompressible is returned when input is judged to be too hard to compress. + ErrIncompressible = errors.New("input is not compressible") + + // ErrUseRLE is returned from the compressor when the input is a single byte value repeated. + ErrUseRLE = errors.New("input is single value repeated") + + // ErrTooBig is return if input is too large for a single block. + ErrTooBig = errors.New("input too big") + + // ErrMaxDecodedSizeExceeded is return if input is too large for a single block. + ErrMaxDecodedSizeExceeded = errors.New("maximum output size exceeded") +) + +type ReusePolicy uint8 + +const ( + // ReusePolicyAllow will allow reuse if it produces smaller output. + ReusePolicyAllow ReusePolicy = iota + + // ReusePolicyPrefer will re-use aggressively if possible. + // This will not check if a new table will produce smaller output, + // except if the current table is impossible to use or + // compressed output is bigger than input. + ReusePolicyPrefer + + // ReusePolicyNone will disable re-use of tables. + // This is slightly faster than ReusePolicyAllow but may produce larger output. + ReusePolicyNone +) + +type Scratch struct { + count [maxSymbolValue + 1]uint32 + + // Per block parameters. + // These can be used to override compression parameters of the block. + // Do not touch, unless you know what you are doing. + + // Out is output buffer. + // If the scratch is re-used before the caller is done processing the output, + // set this field to nil. + // Otherwise the output buffer will be re-used for next Compression/Decompression step + // and allocation will be avoided. + Out []byte + + // OutTable will contain the table data only, if a new table has been generated. + // Slice of the returned data. + OutTable []byte + + // OutData will contain the compressed data. + // Slice of the returned data. + OutData []byte + + // MaxDecodedSize will set the maximum allowed output size. + // This value will automatically be set to BlockSizeMax if not set. + // Decoders will return ErrMaxDecodedSizeExceeded is this limit is exceeded. + MaxDecodedSize int + + br byteReader + + // MaxSymbolValue will override the maximum symbol value of the next block. + MaxSymbolValue uint8 + + // TableLog will attempt to override the tablelog for the next block. + // Must be <= 11 and >= 5. + TableLog uint8 + + // Reuse will specify the reuse policy + Reuse ReusePolicy + + // WantLogLess allows to specify a log 2 reduction that should at least be achieved, + // otherwise the block will be returned as incompressible. + // The reduction should then at least be (input size >> WantLogLess) + // If WantLogLess == 0 any improvement will do. + WantLogLess uint8 + + symbolLen uint16 // Length of active part of the symbol table. + maxCount int // count of the most probable symbol + clearCount bool // clear count + actualTableLog uint8 // Selected tablelog. + prevTableLog uint8 // Tablelog for previous table + prevTable cTable // Table used for previous compression. + cTable cTable // compression table + dt dTable // decompression table + nodes []nodeElt + tmpOut [4][]byte + fse *fse.Scratch + huffWeight [maxSymbolValue + 1]byte +} + +func (s *Scratch) prepare(in []byte) (*Scratch, error) { + if len(in) > BlockSizeMax { + return nil, ErrTooBig + } + if s == nil { + s = &Scratch{} + } + if s.MaxSymbolValue == 0 { + s.MaxSymbolValue = maxSymbolValue + } + if s.TableLog == 0 { + s.TableLog = tableLogDefault + } + if s.TableLog > tableLogMax || s.TableLog < minTablelog { + return nil, fmt.Errorf(" invalid tableLog %d (%d -> %d)", s.TableLog, minTablelog, tableLogMax) + } + if s.MaxDecodedSize <= 0 || s.MaxDecodedSize > BlockSizeMax { + s.MaxDecodedSize = BlockSizeMax + } + if s.clearCount && s.maxCount == 0 { + for i := range s.count { + s.count[i] = 0 + } + s.clearCount = false + } + if cap(s.Out) == 0 { + s.Out = make([]byte, 0, len(in)) + } + s.Out = s.Out[:0] + + s.OutTable = nil + s.OutData = nil + if cap(s.nodes) < huffNodesLen+1 { + s.nodes = make([]nodeElt, 0, huffNodesLen+1) + } + s.nodes = s.nodes[:0] + if s.fse == nil { + s.fse = &fse.Scratch{} + } + s.br.init(in) + + return s, nil +} + +type cTable []cTableEntry + +func (c cTable) write(s *Scratch) error { + var ( + // precomputed conversion table + bitsToWeight [tableLogMax + 1]byte + huffLog = s.actualTableLog + // last weight is not saved. + maxSymbolValue = uint8(s.symbolLen - 1) + huffWeight = s.huffWeight[:256] + ) + const ( + maxFSETableLog = 6 + ) + // convert to weight + bitsToWeight[0] = 0 + for n := uint8(1); n < huffLog+1; n++ { + bitsToWeight[n] = huffLog + 1 - n + } + + // Acquire histogram for FSE. + hist := s.fse.Histogram() + hist = hist[:256] + for i := range hist[:16] { + hist[i] = 0 + } + for n := uint8(0); n < maxSymbolValue; n++ { + v := bitsToWeight[c[n].nBits] & 15 + huffWeight[n] = v + hist[v]++ + } + + // FSE compress if feasible. + if maxSymbolValue >= 2 { + huffMaxCnt := uint32(0) + huffMax := uint8(0) + for i, v := range hist[:16] { + if v == 0 { + continue + } + huffMax = byte(i) + if v > huffMaxCnt { + huffMaxCnt = v + } + } + s.fse.HistogramFinished(huffMax, int(huffMaxCnt)) + s.fse.TableLog = maxFSETableLog + b, err := fse.Compress(huffWeight[:maxSymbolValue], s.fse) + if err == nil && len(b) < int(s.symbolLen>>1) { + s.Out = append(s.Out, uint8(len(b))) + s.Out = append(s.Out, b...) + return nil + } + // Unable to compress (RLE/uncompressible) + } + // write raw values as 4-bits (max : 15) + if maxSymbolValue > (256 - 128) { + // should not happen : likely means source cannot be compressed + return ErrIncompressible + } + op := s.Out + // special case, pack weights 4 bits/weight. + op = append(op, 128|(maxSymbolValue-1)) + // be sure it doesn't cause msan issue in final combination + huffWeight[maxSymbolValue] = 0 + for n := uint16(0); n < uint16(maxSymbolValue); n += 2 { + op = append(op, (huffWeight[n]<<4)|huffWeight[n+1]) + } + s.Out = op + return nil +} + +// estimateSize returns the estimated size in bytes of the input represented in the +// histogram supplied. +func (c cTable) estimateSize(hist []uint32) int { + nbBits := uint32(7) + for i, v := range c[:len(hist)] { + nbBits += uint32(v.nBits) * hist[i] + } + return int(nbBits >> 3) +} + +// minSize returns the minimum possible size considering the shannon limit. +func (s *Scratch) minSize(total int) int { + nbBits := float64(7) + fTotal := float64(total) + for _, v := range s.count[:s.symbolLen] { + n := float64(v) + if n > 0 { + nbBits += math.Log2(fTotal/n) * n + } + } + return int(nbBits) >> 3 +} + +func highBit32(val uint32) (n uint32) { + return uint32(bits.Len32(val) - 1) +} diff --git a/vendor/github.com/klauspost/compress/snappy/.gitignore b/vendor/github.com/klauspost/compress/snappy/.gitignore new file mode 100644 index 00000000000..042091d9b3b --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/.gitignore @@ -0,0 +1,16 @@ +cmd/snappytool/snappytool +testdata/bench + +# These explicitly listed benchmark data files are for an obsolete version of +# snappy_test.go. +testdata/alice29.txt +testdata/asyoulik.txt +testdata/fireworks.jpeg +testdata/geo.protodata +testdata/html +testdata/html_x_4 +testdata/kppkn.gtb +testdata/lcet10.txt +testdata/paper-100k.pdf +testdata/plrabn12.txt +testdata/urls.10K diff --git a/vendor/github.com/klauspost/compress/snappy/AUTHORS b/vendor/github.com/klauspost/compress/snappy/AUTHORS new file mode 100644 index 00000000000..bcfa19520af --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/AUTHORS @@ -0,0 +1,15 @@ +# This is the official list of Snappy-Go authors for copyright purposes. +# This file is distinct from the CONTRIBUTORS files. +# See the latter for an explanation. + +# Names should be added to this file as +# Name or Organization +# The email address is not required for organizations. + +# Please keep the list sorted. + +Damian Gryski +Google Inc. +Jan Mercl <0xjnml@gmail.com> +Rodolfo Carvalho +Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS new file mode 100644 index 00000000000..931ae31606f --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/CONTRIBUTORS @@ -0,0 +1,37 @@ +# This is the official list of people who can contribute +# (and typically have contributed) code to the Snappy-Go repository. +# The AUTHORS file lists the copyright holders; this file +# lists people. For example, Google employees are listed here +# but not in AUTHORS, because Google holds the copyright. +# +# The submission process automatically checks to make sure +# that people submitting code are listed in this file (by email address). +# +# Names should be added to this file only after verifying that +# the individual or the individual's organization has agreed to +# the appropriate Contributor License Agreement, found here: +# +# http://code.google.com/legal/individual-cla-v1.0.html +# http://code.google.com/legal/corporate-cla-v1.0.html +# +# The agreement for individuals can be filled out on the web. +# +# When adding J Random Contributor's name to this file, +# either J's name or J's organization's name should be +# added to the AUTHORS file, depending on whether the +# individual or corporate CLA was used. + +# Names should be added to this file like so: +# Name + +# Please keep the list sorted. + +Damian Gryski +Jan Mercl <0xjnml@gmail.com> +Kai Backman +Marc-Antoine Ruel +Nigel Tao +Rob Pike +Rodolfo Carvalho +Russ Cox +Sebastien Binet diff --git a/vendor/github.com/klauspost/compress/snappy/LICENSE b/vendor/github.com/klauspost/compress/snappy/LICENSE new file mode 100644 index 00000000000..6050c10f4c8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/LICENSE @@ -0,0 +1,27 @@ +Copyright (c) 2011 The Snappy-Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/vendor/github.com/klauspost/compress/snappy/README b/vendor/github.com/klauspost/compress/snappy/README new file mode 100644 index 00000000000..cea12879a0e --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/README @@ -0,0 +1,107 @@ +The Snappy compression format in the Go programming language. + +To download and install from source: +$ go get github.com/golang/snappy + +Unless otherwise noted, the Snappy-Go source files are distributed +under the BSD-style license found in the LICENSE file. + + + +Benchmarks. + +The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten +or so files, the same set used by the C++ Snappy code (github.com/google/snappy +and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @ +3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29: + +"go test -test.bench=." + +_UFlat0-8 2.19GB/s ± 0% html +_UFlat1-8 1.41GB/s ± 0% urls +_UFlat2-8 23.5GB/s ± 2% jpg +_UFlat3-8 1.91GB/s ± 0% jpg_200 +_UFlat4-8 14.0GB/s ± 1% pdf +_UFlat5-8 1.97GB/s ± 0% html4 +_UFlat6-8 814MB/s ± 0% txt1 +_UFlat7-8 785MB/s ± 0% txt2 +_UFlat8-8 857MB/s ± 0% txt3 +_UFlat9-8 719MB/s ± 1% txt4 +_UFlat10-8 2.84GB/s ± 0% pb +_UFlat11-8 1.05GB/s ± 0% gaviota + +_ZFlat0-8 1.04GB/s ± 0% html +_ZFlat1-8 534MB/s ± 0% urls +_ZFlat2-8 15.7GB/s ± 1% jpg +_ZFlat3-8 740MB/s ± 3% jpg_200 +_ZFlat4-8 9.20GB/s ± 1% pdf +_ZFlat5-8 991MB/s ± 0% html4 +_ZFlat6-8 379MB/s ± 0% txt1 +_ZFlat7-8 352MB/s ± 0% txt2 +_ZFlat8-8 396MB/s ± 1% txt3 +_ZFlat9-8 327MB/s ± 1% txt4 +_ZFlat10-8 1.33GB/s ± 1% pb +_ZFlat11-8 605MB/s ± 1% gaviota + + + +"go test -test.bench=. -tags=noasm" + +_UFlat0-8 621MB/s ± 2% html +_UFlat1-8 494MB/s ± 1% urls +_UFlat2-8 23.2GB/s ± 1% jpg +_UFlat3-8 1.12GB/s ± 1% jpg_200 +_UFlat4-8 4.35GB/s ± 1% pdf +_UFlat5-8 609MB/s ± 0% html4 +_UFlat6-8 296MB/s ± 0% txt1 +_UFlat7-8 288MB/s ± 0% txt2 +_UFlat8-8 309MB/s ± 1% txt3 +_UFlat9-8 280MB/s ± 1% txt4 +_UFlat10-8 753MB/s ± 0% pb +_UFlat11-8 400MB/s ± 0% gaviota + +_ZFlat0-8 409MB/s ± 1% html +_ZFlat1-8 250MB/s ± 1% urls +_ZFlat2-8 12.3GB/s ± 1% jpg +_ZFlat3-8 132MB/s ± 0% jpg_200 +_ZFlat4-8 2.92GB/s ± 0% pdf +_ZFlat5-8 405MB/s ± 1% html4 +_ZFlat6-8 179MB/s ± 1% txt1 +_ZFlat7-8 170MB/s ± 1% txt2 +_ZFlat8-8 189MB/s ± 1% txt3 +_ZFlat9-8 164MB/s ± 1% txt4 +_ZFlat10-8 479MB/s ± 1% pb +_ZFlat11-8 270MB/s ± 1% gaviota + + + +For comparison (Go's encoded output is byte-for-byte identical to C++'s), here +are the numbers from C++ Snappy's + +make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log + +BM_UFlat/0 2.4GB/s html +BM_UFlat/1 1.4GB/s urls +BM_UFlat/2 21.8GB/s jpg +BM_UFlat/3 1.5GB/s jpg_200 +BM_UFlat/4 13.3GB/s pdf +BM_UFlat/5 2.1GB/s html4 +BM_UFlat/6 1.0GB/s txt1 +BM_UFlat/7 959.4MB/s txt2 +BM_UFlat/8 1.0GB/s txt3 +BM_UFlat/9 864.5MB/s txt4 +BM_UFlat/10 2.9GB/s pb +BM_UFlat/11 1.2GB/s gaviota + +BM_ZFlat/0 944.3MB/s html (22.31 %) +BM_ZFlat/1 501.6MB/s urls (47.78 %) +BM_ZFlat/2 14.3GB/s jpg (99.95 %) +BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %) +BM_ZFlat/4 8.3GB/s pdf (83.30 %) +BM_ZFlat/5 903.5MB/s html4 (22.52 %) +BM_ZFlat/6 336.0MB/s txt1 (57.88 %) +BM_ZFlat/7 312.3MB/s txt2 (61.91 %) +BM_ZFlat/8 353.1MB/s txt3 (54.99 %) +BM_ZFlat/9 289.9MB/s txt4 (66.26 %) +BM_ZFlat/10 1.2GB/s pb (19.68 %) +BM_ZFlat/11 527.4MB/s gaviota (37.72 %) diff --git a/vendor/github.com/klauspost/compress/snappy/decode.go b/vendor/github.com/klauspost/compress/snappy/decode.go new file mode 100644 index 00000000000..72efb0353dd --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/decode.go @@ -0,0 +1,237 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +var ( + // ErrCorrupt reports that the input is invalid. + ErrCorrupt = errors.New("snappy: corrupt input") + // ErrTooLarge reports that the uncompressed length is too large. + ErrTooLarge = errors.New("snappy: decoded block is too large") + // ErrUnsupported reports that the input isn't supported. + ErrUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// DecodedLen returns the length of the decoded block. +func DecodedLen(src []byte) (int, error) { + v, _, err := decodedLen(src) + return v, err +} + +// decodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func decodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrTooLarge + } + return int(v), n, nil +} + +const ( + decodeErrCodeCorrupt = 1 + decodeErrCodeUnsupportedLiteralLength = 2 +) + +// Decode returns the decoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire decoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Decode(dst, src []byte) ([]byte, error) { + dLen, s, err := decodedLen(src) + if err != nil { + return nil, err + } + if dLen <= len(dst) { + dst = dst[:dLen] + } else { + dst = make([]byte, dLen) + } + switch decode(dst, src[s:]) { + case 0: + return dst, nil + case decodeErrCodeUnsupportedLiteralLength: + return nil, errUnsupportedLiteralLength + } + return nil, ErrCorrupt +} + +// NewReader returns a new Reader that decompresses from r, using the framing +// format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +func NewReader(r io.Reader) *Reader { + return &Reader{ + r: r, + decoded: make([]byte, maxBlockSize), + buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize), + } +} + +// Reader is an io.Reader that can read Snappy-compressed bytes. +type Reader struct { + r io.Reader + err error + decoded []byte + buf []byte + // decoded[i:j] contains decoded bytes that have not yet been passed on. + i, j int + readHeader bool +} + +// Reset discards any buffered data, resets all state, and switches the Snappy +// reader to read from r. This permits reusing a Reader rather than allocating +// a new one. +func (r *Reader) Reset(reader io.Reader) { + r.r = reader + r.err = nil + r.i = 0 + r.j = 0 + r.readHeader = false +} + +func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrCorrupt + } + return false + } + return true +} + +// Read satisfies the io.Reader interface. +func (r *Reader) Read(p []byte) (int, error) { + if r.err != nil { + return 0, r.err + } + for { + if r.i < r.j { + n := copy(p, r.decoded[r.i:r.j]) + r.i += n + return n, nil + } + if !r.readFull(r.buf[:4], true) { + return 0, r.err + } + chunkType := r.buf[0] + if !r.readHeader { + if chunkType != chunkTypeStreamIdentifier { + r.err = ErrCorrupt + return 0, r.err + } + r.readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + r.err = ErrUnsupported + return 0, r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[checksumSize:] + + n, err := DecodedLen(buf) + if err != nil { + r.err = err + return 0, r.err + } + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if _, err := Decode(r.decoded, buf); err != nil { + r.err = err + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeUncompressedData: + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < checksumSize { + r.err = ErrCorrupt + return 0, r.err + } + buf := r.buf[:checksumSize] + if !r.readFull(buf, false) { + return 0, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - checksumSize + if n > len(r.decoded) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.decoded[:n], false) { + return 0, r.err + } + if crc(r.decoded[:n]) != checksum { + r.err = ErrCorrupt + return 0, r.err + } + r.i, r.j = 0, n + continue + + case chunkTypeStreamIdentifier: + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(magicBody) { + r.err = ErrCorrupt + return 0, r.err + } + if !r.readFull(r.buf[:len(magicBody)], false) { + return 0, r.err + } + for i := 0; i < len(magicBody); i++ { + if r.buf[i] != magicBody[i] { + r.err = ErrCorrupt + return 0, r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + r.err = ErrUnsupported + return 0, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return 0, r.err + } + } +} diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.go b/vendor/github.com/klauspost/compress/snappy/decode_amd64.go new file mode 100644 index 00000000000..fcd192b849e --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/decode_amd64.go @@ -0,0 +1,14 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// decode has the same semantics as in decode_other.go. +// +//go:noescape +func decode(dst, src []byte) int diff --git a/vendor/github.com/klauspost/compress/snappy/decode_amd64.s b/vendor/github.com/klauspost/compress/snappy/decode_amd64.s new file mode 100644 index 00000000000..1c66e37234d --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/decode_amd64.s @@ -0,0 +1,482 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The asm code generally follows the pure Go code in decode_other.go, except +// where marked with a "!!!". + +// func decode(dst, src []byte) int +// +// All local variables fit into registers. The non-zero stack size is only to +// spill registers and push args when issuing a CALL. The register allocation: +// - AX scratch +// - BX scratch +// - CX length or x +// - DX offset +// - SI &src[s] +// - DI &dst[d] +// + R8 dst_base +// + R9 dst_len +// + R10 dst_base + dst_len +// + R11 src_base +// + R12 src_len +// + R13 src_base + src_len +// - R14 used by doCopy +// - R15 used by doCopy +// +// The registers R8-R13 (marked with a "+") are set at the start of the +// function, and after a CALL returns, and are not otherwise modified. +// +// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI. +// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI. +TEXT ·decode(SB), NOSPLIT, $48-56 + // Initialize SI, DI and R8-R13. + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, DI + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, SI + MOVQ R11, R13 + ADDQ R12, R13 + +loop: + // for s < len(src) + CMPQ SI, R13 + JEQ end + + // CX = uint32(src[s]) + // + // switch src[s] & 0x03 + MOVBLZX (SI), CX + MOVL CX, BX + ANDL $3, BX + CMPL BX, $1 + JAE tagCopy + + // ---------------------------------------- + // The code below handles literal tags. + + // case tagLiteral: + // x := uint32(src[s] >> 2) + // switch + SHRL $2, CX + CMPL CX, $60 + JAE tagLit60Plus + + // case x < 60: + // s++ + INCQ SI + +doLit: + // This is the end of the inner "switch", when we have a literal tag. + // + // We assume that CX == x and x fits in a uint32, where x is the variable + // used in the pure Go decode_other.go code. + + // length = int(x) + 1 + // + // Unlike the pure Go code, we don't need to check if length <= 0 because + // CX can hold 64 bits, so the increment cannot overflow. + INCQ CX + + // Prepare to check if copying length bytes will run past the end of dst or + // src. + // + // AX = len(dst) - d + // BX = len(src) - s + MOVQ R10, AX + SUBQ DI, AX + MOVQ R13, BX + SUBQ SI, BX + + // !!! Try a faster technique for short (16 or fewer bytes) copies. + // + // if length > 16 || len(dst)-d < 16 || len(src)-s < 16 { + // goto callMemmove // Fall back on calling runtime·memmove. + // } + // + // The C++ snappy code calls this TryFastAppend. It also checks len(src)-s + // against 21 instead of 16, because it cannot assume that all of its input + // is contiguous in memory and so it needs to leave enough source bytes to + // read the next tag without refilling buffers, but Go's Decode assumes + // contiguousness (the src argument is a []byte). + CMPQ CX, $16 + JGT callMemmove + CMPQ AX, $16 + JLT callMemmove + CMPQ BX, $16 + JLT callMemmove + + // !!! Implement the copy from src to dst as a 16-byte load and store. + // (Decode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only length bytes, but that's + // OK. If the input is a valid Snappy encoding then subsequent iterations + // will fix up the overrun. Otherwise, Decode returns a nil []byte (and a + // non-nil error), so the overrun will be ignored. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(SI), X0 + MOVOU X0, 0(DI) + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +callMemmove: + // if length > len(dst)-d || length > len(src)-s { etc } + CMPQ CX, AX + JGT errCorrupt + CMPQ CX, BX + JGT errCorrupt + + // copy(dst[d:], src[s:s+length]) + // + // This means calling runtime·memmove(&dst[d], &src[s], length), so we push + // DI, SI and CX as arguments. Coincidentally, we also need to spill those + // three registers to the stack, to save local variables across the CALL. + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) + CALL runtime·memmove(SB) + + // Restore local variables: unspill registers from the stack and + // re-calculate R8-R13. + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVQ dst_base+0(FP), R8 + MOVQ dst_len+8(FP), R9 + MOVQ R8, R10 + ADDQ R9, R10 + MOVQ src_base+24(FP), R11 + MOVQ src_len+32(FP), R12 + MOVQ R11, R13 + ADDQ R12, R13 + + // d += length + // s += length + ADDQ CX, DI + ADDQ CX, SI + JMP loop + +tagLit60Plus: + // !!! This fragment does the + // + // s += x - 58; if uint(s) > uint(len(src)) { etc } + // + // checks. In the asm version, we code it once instead of once per switch case. + ADDQ CX, SI + SUBQ $58, SI + CMPQ SI, R13 + JA errCorrupt + + // case x == 60: + CMPL CX, $61 + JEQ tagLit61 + JA tagLit62Plus + + // x = uint32(src[s-1]) + MOVBLZX -1(SI), CX + JMP doLit + +tagLit61: + // case x == 61: + // x = uint32(src[s-2]) | uint32(src[s-1])<<8 + MOVWLZX -2(SI), CX + JMP doLit + +tagLit62Plus: + CMPL CX, $62 + JA tagLit63 + + // case x == 62: + // x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + MOVWLZX -3(SI), CX + MOVBLZX -1(SI), BX + SHLL $16, BX + ORL BX, CX + JMP doLit + +tagLit63: + // case x == 63: + // x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + MOVL -4(SI), CX + JMP doLit + +// The code above handles literal tags. +// ---------------------------------------- +// The code below handles copy tags. + +tagCopy4: + // case tagCopy4: + // s += 5 + ADDQ $5, SI + + // if uint(s) > uint(len(src)) { etc } + CMPQ SI, R13 + JA errCorrupt + + // length = 1 + int(src[s-5])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + MOVLQZX -4(SI), DX + JMP doCopy + +tagCopy2: + // case tagCopy2: + // s += 3 + ADDQ $3, SI + + // if uint(s) > uint(len(src)) { etc } + CMPQ SI, R13 + JA errCorrupt + + // length = 1 + int(src[s-3])>>2 + SHRQ $2, CX + INCQ CX + + // offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + MOVWQZX -2(SI), DX + JMP doCopy + +tagCopy: + // We have a copy tag. We assume that: + // - BX == src[s] & 0x03 + // - CX == src[s] + CMPQ BX, $2 + JEQ tagCopy2 + JA tagCopy4 + + // case tagCopy1: + // s += 2 + ADDQ $2, SI + + // if uint(s) > uint(len(src)) { etc } + CMPQ SI, R13 + JA errCorrupt + + // offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + MOVQ CX, DX + ANDQ $0xe0, DX + SHLQ $3, DX + MOVBQZX -1(SI), BX + ORQ BX, DX + + // length = 4 + int(src[s-2])>>2&0x7 + SHRQ $2, CX + ANDQ $7, CX + ADDQ $4, CX + +doCopy: + // This is the end of the outer "switch", when we have a copy tag. + // + // We assume that: + // - CX == length && CX > 0 + // - DX == offset + + // if offset <= 0 { etc } + CMPQ DX, $0 + JLE errCorrupt + + // if d < offset { etc } + MOVQ DI, BX + SUBQ R8, BX + CMPQ BX, DX + JLT errCorrupt + + // if length > len(dst)-d { etc } + MOVQ R10, BX + SUBQ DI, BX + CMPQ CX, BX + JGT errCorrupt + + // forwardCopy(dst[d:d+length], dst[d-offset:]); d += length + // + // Set: + // - R14 = len(dst)-d + // - R15 = &dst[d-offset] + MOVQ R10, R14 + SUBQ DI, R14 + MOVQ DI, R15 + SUBQ DX, R15 + + // !!! Try a faster technique for short (16 or fewer bytes) forward copies. + // + // First, try using two 8-byte load/stores, similar to the doLit technique + // above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is + // still OK if offset >= 8. Note that this has to be two 8-byte load/stores + // and not one 16-byte load/store, and the first store has to be before the + // second load, due to the overlap if offset is in the range [8, 16). + // + // if length > 16 || offset < 8 || len(dst)-d < 16 { + // goto slowForwardCopy + // } + // copy 16 bytes + // d += length + CMPQ CX, $16 + JGT slowForwardCopy + CMPQ DX, $8 + JLT slowForwardCopy + CMPQ R14, $16 + JLT slowForwardCopy + MOVQ 0(R15), AX + MOVQ AX, 0(DI) + MOVQ 8(R15), BX + MOVQ BX, 8(DI) + ADDQ CX, DI + JMP loop + +slowForwardCopy: + // !!! If the forward copy is longer than 16 bytes, or if offset < 8, we + // can still try 8-byte load stores, provided we can overrun up to 10 extra + // bytes. As above, the overrun will be fixed up by subsequent iterations + // of the outermost loop. + // + // The C++ snappy code calls this technique IncrementalCopyFastPath. Its + // commentary says: + // + // ---- + // + // The main part of this loop is a simple copy of eight bytes at a time + // until we've copied (at least) the requested amount of bytes. However, + // if d and d-offset are less than eight bytes apart (indicating a + // repeating pattern of length < 8), we first need to expand the pattern in + // order to get the correct results. For instance, if the buffer looks like + // this, with the eight-byte and patterns marked as + // intervals: + // + // abxxxxxxxxxxxx + // [------] d-offset + // [------] d + // + // a single eight-byte copy from to will repeat the pattern + // once, after which we can move two bytes without moving : + // + // ababxxxxxxxxxx + // [------] d-offset + // [------] d + // + // and repeat the exercise until the two no longer overlap. + // + // This allows us to do very well in the special case of one single byte + // repeated many times, without taking a big hit for more general cases. + // + // The worst case of extra writing past the end of the match occurs when + // offset == 1 and length == 1; the last copy will read from byte positions + // [0..7] and write to [4..11], whereas it was only supposed to write to + // position 1. Thus, ten excess bytes. + // + // ---- + // + // That "10 byte overrun" worst case is confirmed by Go's + // TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy + // and finishSlowForwardCopy algorithm. + // + // if length > len(dst)-d-10 { + // goto verySlowForwardCopy + // } + SUBQ $10, R14 + CMPQ CX, R14 + JGT verySlowForwardCopy + +makeOffsetAtLeast8: + // !!! As above, expand the pattern so that offset >= 8 and we can use + // 8-byte load/stores. + // + // for offset < 8 { + // copy 8 bytes from dst[d-offset:] to dst[d:] + // length -= offset + // d += offset + // offset += offset + // // The two previous lines together means that d-offset, and therefore + // // R15, is unchanged. + // } + CMPQ DX, $8 + JGE fixUpSlowForwardCopy + MOVQ (R15), BX + MOVQ BX, (DI) + SUBQ DX, CX + ADDQ DX, DI + ADDQ DX, DX + JMP makeOffsetAtLeast8 + +fixUpSlowForwardCopy: + // !!! Add length (which might be negative now) to d (implied by DI being + // &dst[d]) so that d ends up at the right place when we jump back to the + // top of the loop. Before we do that, though, we save DI to AX so that, if + // length is positive, copying the remaining length bytes will write to the + // right place. + MOVQ DI, AX + ADDQ CX, DI + +finishSlowForwardCopy: + // !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative + // length means that we overrun, but as above, that will be fixed up by + // subsequent iterations of the outermost loop. + CMPQ CX, $0 + JLE loop + MOVQ (R15), BX + MOVQ BX, (AX) + ADDQ $8, R15 + ADDQ $8, AX + SUBQ $8, CX + JMP finishSlowForwardCopy + +verySlowForwardCopy: + // verySlowForwardCopy is a simple implementation of forward copy. In C + // parlance, this is a do/while loop instead of a while loop, since we know + // that length > 0. In Go syntax: + // + // for { + // dst[d] = dst[d - offset] + // d++ + // length-- + // if length == 0 { + // break + // } + // } + MOVB (R15), BX + MOVB BX, (DI) + INCQ R15 + INCQ DI + DECQ CX + JNZ verySlowForwardCopy + JMP loop + +// The code above handles copy tags. +// ---------------------------------------- + +end: + // This is the end of the "for s < len(src)". + // + // if d != len(dst) { etc } + CMPQ DI, R10 + JNE errCorrupt + + // return 0 + MOVQ $0, ret+48(FP) + RET + +errCorrupt: + // return decodeErrCodeCorrupt + MOVQ $1, ret+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/decode_other.go b/vendor/github.com/klauspost/compress/snappy/decode_other.go new file mode 100644 index 00000000000..94a96c5d7b8 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/decode_other.go @@ -0,0 +1,115 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +// decode writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read, and that len(dst) +// equals that length. +// +// It returns 0 on success or a decodeErrCodeXxx error code on failure. +func decode(dst, src []byte) int { + var d, s, offset, length int + for s < len(src) { + switch src[s] & 0x03 { + case tagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + length = int(x) + 1 + if length <= 0 { + return decodeErrCodeUnsupportedLiteralLength + } + if length > len(dst)-d || length > len(src)-s { + return decodeErrCodeCorrupt + } + copy(dst[d:], src[s:s+length]) + d += length + s += length + continue + + case tagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1])) + + case tagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8) + + case tagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + return decodeErrCodeCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24) + } + + if offset <= 0 || d < offset || length > len(dst)-d { + return decodeErrCodeCorrupt + } + // Copy from an earlier sub-slice of dst to a later sub-slice. + // If no overlap, use the built-in copy: + if offset > length { + copy(dst[d:d+length], dst[d-offset:]) + d += length + continue + } + + // Unlike the built-in copy function, this byte-by-byte copy always runs + // forwards, even if the slices overlap. Conceptually, this is: + // + // d += forwardCopy(dst[d:d+length], dst[d-offset:]) + // + // We align the slices into a and b and show the compiler they are the same size. + // This allows the loop to run without bounds checks. + a := dst[d : d+length] + b := dst[d-offset:] + b = b[:len(a)] + for i := range a { + a[i] = b[i] + } + d += length + } + if d != len(dst) { + return decodeErrCodeCorrupt + } + return 0 +} diff --git a/vendor/github.com/klauspost/compress/snappy/encode.go b/vendor/github.com/klauspost/compress/snappy/encode.go new file mode 100644 index 00000000000..8d393e904bb --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/encode.go @@ -0,0 +1,285 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package snappy + +import ( + "encoding/binary" + "errors" + "io" +) + +// Encode returns the encoded form of src. The returned slice may be a sub- +// slice of dst if dst was large enough to hold the entire encoded block. +// Otherwise, a newly allocated slice will be returned. +// +// The dst and src must not overlap. It is valid to pass a nil dst. +func Encode(dst, src []byte) []byte { + if n := MaxEncodedLen(len(src)); n < 0 { + panic(ErrTooLarge) + } else if len(dst) < n { + dst = make([]byte, n) + } + + // The block starts with the varint-encoded length of the decompressed bytes. + d := binary.PutUvarint(dst, uint64(len(src))) + + for len(src) > 0 { + p := src + src = nil + if len(p) > maxBlockSize { + p, src = p[:maxBlockSize], p[maxBlockSize:] + } + if len(p) < minNonLiteralBlockSize { + d += emitLiteral(dst[d:], p) + } else { + d += encodeBlock(dst[d:], p) + } + } + return dst[:d] +} + +// inputMargin is the minimum number of extra input bytes to keep, inside +// encodeBlock's inner loop. On some architectures, this margin lets us +// implement a fast path for emitLiteral, where the copy of short (<= 16 byte) +// literals can be implemented as a single load to and store from a 16-byte +// register. That literal's actual length can be as short as 1 byte, so this +// can copy up to 15 bytes too much, but that's OK as subsequent iterations of +// the encoding loop will fix up the copy overrun, and this inputMargin ensures +// that we don't overrun the dst and src buffers. +const inputMargin = 16 - 1 + +// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that +// could be encoded with a copy tag. This is the minimum with respect to the +// algorithm used by encodeBlock, not a minimum enforced by the file format. +// +// The encoded output must start with at least a 1 byte literal, as there are +// no previous bytes to copy. A minimal (1 byte) copy after that, generated +// from an emitCopy call in encodeBlock's main loop, would require at least +// another inputMargin bytes, for the reason above: we want any emitLiteral +// calls inside encodeBlock's main loop to use the fast path if possible, which +// requires being able to overrun by inputMargin bytes. Thus, +// minNonLiteralBlockSize equals 1 + 1 + inputMargin. +// +// The C++ code doesn't use this exact threshold, but it could, as discussed at +// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion +// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an +// optimization. It should not affect the encoded form. This is tested by +// TestSameEncodingAsCppShortCopies. +const minNonLiteralBlockSize = 1 + 1 + inputMargin + +// MaxEncodedLen returns the maximum length of a snappy block, given its +// uncompressed length. +// +// It will return a negative value if srcLen is too large to encode. +func MaxEncodedLen(srcLen int) int { + n := uint64(srcLen) + if n > 0xffffffff { + return -1 + } + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // That is, 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + n = 32 + n + n/6 + if n > 0xffffffff { + return -1 + } + return int(n) +} + +var errClosed = errors.New("snappy: Writer is closed") + +// NewWriter returns a new Writer that compresses to w. +// +// The Writer returned does not buffer writes. There is no need to Flush or +// Close such a Writer. +// +// Deprecated: the Writer returned is not suitable for many small writes, only +// for few large writes. Use NewBufferedWriter instead, which is efficient +// regardless of the frequency and shape of the writes, and remember to Close +// that Writer when done. +func NewWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + obuf: make([]byte, obufLen), + } +} + +// NewBufferedWriter returns a new Writer that compresses to w, using the +// framing format described at +// https://github.com/google/snappy/blob/master/framing_format.txt +// +// The Writer returned buffers writes. Users must call Close to guarantee all +// data has been forwarded to the underlying io.Writer. They may also call +// Flush zero or more times before calling Close. +func NewBufferedWriter(w io.Writer) *Writer { + return &Writer{ + w: w, + ibuf: make([]byte, 0, maxBlockSize), + obuf: make([]byte, obufLen), + } +} + +// Writer is an io.Writer that can write Snappy-compressed bytes. +type Writer struct { + w io.Writer + err error + + // ibuf is a buffer for the incoming (uncompressed) bytes. + // + // Its use is optional. For backwards compatibility, Writers created by the + // NewWriter function have ibuf == nil, do not buffer incoming bytes, and + // therefore do not need to be Flush'ed or Close'd. + ibuf []byte + + // obuf is a buffer for the outgoing (compressed) bytes. + obuf []byte + + // wroteStreamHeader is whether we have written the stream header. + wroteStreamHeader bool +} + +// Reset discards the writer's state and switches the Snappy writer to write to +// w. This permits reusing a Writer rather than allocating a new one. +func (w *Writer) Reset(writer io.Writer) { + w.w = writer + w.err = nil + if w.ibuf != nil { + w.ibuf = w.ibuf[:0] + } + w.wroteStreamHeader = false +} + +// Write satisfies the io.Writer interface. +func (w *Writer) Write(p []byte) (nRet int, errRet error) { + if w.ibuf == nil { + // Do not buffer incoming bytes. This does not perform or compress well + // if the caller of Writer.Write writes many small slices. This + // behavior is therefore deprecated, but still supported for backwards + // compatibility with code that doesn't explicitly Flush or Close. + return w.write(p) + } + + // The remainder of this method is based on bufio.Writer.Write from the + // standard library. + + for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { + var n int + if len(w.ibuf) == 0 { + // Large write, empty buffer. + // Write directly from p to avoid copy. + n, _ = w.write(p) + } else { + n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + w.Flush() + } + nRet += n + p = p[n:] + } + if w.err != nil { + return nRet, w.err + } + n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) + w.ibuf = w.ibuf[:len(w.ibuf)+n] + nRet += n + return nRet, nil +} + +func (w *Writer) write(p []byte) (nRet int, errRet error) { + if w.err != nil { + return 0, w.err + } + for len(p) > 0 { + obufStart := len(magicChunk) + if !w.wroteStreamHeader { + w.wroteStreamHeader = true + copy(w.obuf, magicChunk) + obufStart = 0 + } + + var uncompressed []byte + if len(p) > maxBlockSize { + uncompressed, p = p[:maxBlockSize], p[maxBlockSize:] + } else { + uncompressed, p = p, nil + } + checksum := crc(uncompressed) + + // Compress the buffer, discarding the result if the improvement + // isn't at least 12.5%. + compressed := Encode(w.obuf[obufHeaderLen:], uncompressed) + chunkType := uint8(chunkTypeCompressedData) + chunkLen := 4 + len(compressed) + obufEnd := obufHeaderLen + len(compressed) + if len(compressed) >= len(uncompressed)-len(uncompressed)/8 { + chunkType = chunkTypeUncompressedData + chunkLen = 4 + len(uncompressed) + obufEnd = obufHeaderLen + } + + // Fill in the per-chunk header that comes before the body. + w.obuf[len(magicChunk)+0] = chunkType + w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) + w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) + w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) + w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) + w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) + w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) + w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) + + if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { + w.err = err + return nRet, err + } + if chunkType == chunkTypeUncompressedData { + if _, err := w.w.Write(uncompressed); err != nil { + w.err = err + return nRet, err + } + } + nRet += len(uncompressed) + } + return nRet, nil +} + +// Flush flushes the Writer to its underlying io.Writer. +func (w *Writer) Flush() error { + if w.err != nil { + return w.err + } + if len(w.ibuf) == 0 { + return nil + } + w.write(w.ibuf) + w.ibuf = w.ibuf[:0] + return w.err +} + +// Close calls Flush and then closes the Writer. +func (w *Writer) Close() error { + w.Flush() + ret := w.err + if w.err == nil { + w.err = errClosed + } + return ret +} diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.go b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go new file mode 100644 index 00000000000..150d91bc8be --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/encode_amd64.go @@ -0,0 +1,29 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +package snappy + +// emitLiteral has the same semantics as in encode_other.go. +// +//go:noescape +func emitLiteral(dst, lit []byte) int + +// emitCopy has the same semantics as in encode_other.go. +// +//go:noescape +func emitCopy(dst []byte, offset, length int) int + +// extendMatch has the same semantics as in encode_other.go. +// +//go:noescape +func extendMatch(src []byte, i, j int) int + +// encodeBlock has the same semantics as in encode_other.go. +// +//go:noescape +func encodeBlock(dst, src []byte) (d int) diff --git a/vendor/github.com/klauspost/compress/snappy/encode_amd64.s b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s new file mode 100644 index 00000000000..adfd979fe27 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/encode_amd64.s @@ -0,0 +1,730 @@ +// Copyright 2016 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a +// Go toolchain regression. See https://github.com/golang/go/issues/15426 and +// https://github.com/golang/snappy/issues/29 +// +// As a workaround, the package was built with a known good assembler, and +// those instructions were disassembled by "objdump -d" to yield the +// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 +// style comments, in AT&T asm syntax. Note that rsp here is a physical +// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm). +// The instructions were then encoded as "BYTE $0x.." sequences, which assemble +// fine on Go 1.6. + +// The asm code generally follows the pure Go code in encode_other.go, except +// where marked with a "!!!". + +// ---------------------------------------------------------------------------- + +// func emitLiteral(dst, lit []byte) int +// +// All local variables fit into registers. The register allocation: +// - AX len(lit) +// - BX n +// - DX return value +// - DI &dst[i] +// - R10 &lit[0] +// +// The 24 bytes of stack space is to call runtime·memmove. +// +// The unusual register allocation of local variables, such as R10 for the +// source pointer, matches the allocation used at the call site in encodeBlock, +// which makes it easier to manually inline this function. +TEXT ·emitLiteral(SB), NOSPLIT, $24-56 + MOVQ dst_base+0(FP), DI + MOVQ lit_base+24(FP), R10 + MOVQ lit_len+32(FP), AX + MOVQ AX, DX + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT oneByte + CMPL BX, $256 + JLT twoBytes + +threeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + ADDQ $3, DX + JMP memmove + +twoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + ADDQ $2, DX + JMP memmove + +oneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + ADDQ $1, DX + +memmove: + MOVQ DX, ret+48(FP) + + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + CALL runtime·memmove(SB) + RET + +// ---------------------------------------------------------------------------- + +// func emitCopy(dst []byte, offset, length int) int +// +// All local variables fit into registers. The register allocation: +// - AX length +// - SI &dst[0] +// - DI &dst[i] +// - R11 offset +// +// The unusual register allocation of local variables, such as R11 for the +// offset, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·emitCopy(SB), NOSPLIT, $0-48 + MOVQ dst_base+0(FP), DI + MOVQ DI, SI + MOVQ offset+24(FP), R11 + MOVQ length+32(FP), AX + +loop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT step1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP loop0 + +step1: + // if length > 64 { etc } + CMPL AX, $64 + JLE step2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +step2: + // if length >= 12 || offset >= 2048 { goto step3 } + CMPL AX, $12 + JGE step3 + CMPL R11, $2048 + JGE step3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +step3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + + // Return the number of bytes written. + SUBQ SI, DI + MOVQ DI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func extendMatch(src []byte, i, j int) int +// +// All local variables fit into registers. The register allocation: +// - DX &src[0] +// - SI &src[j] +// - R13 &src[len(src) - 8] +// - R14 &src[len(src)] +// - R15 &src[i] +// +// The unusual register allocation of local variables, such as R15 for a source +// pointer, matches the allocation used at the call site in encodeBlock, which +// makes it easier to manually inline this function. +TEXT ·extendMatch(SB), NOSPLIT, $0-48 + MOVQ src_base+0(FP), DX + MOVQ src_len+8(FP), R14 + MOVQ i+24(FP), R15 + MOVQ j+32(FP), SI + ADDQ DX, R14 + ADDQ DX, R15 + ADDQ DX, SI + MOVQ R14, R13 + SUBQ $8, R13 + +cmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA cmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE bsf + ADDQ $8, R15 + ADDQ $8, SI + JMP cmp8 + +bsf: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +cmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE extendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE extendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP cmp1 + +extendMatchEnd: + // Convert from &src[ret] to ret. + SUBQ DX, SI + MOVQ SI, ret+40(FP) + RET + +// ---------------------------------------------------------------------------- + +// func encodeBlock(dst, src []byte) (d int) +// +// All local variables fit into registers, other than "var table". The register +// allocation: +// - AX . . +// - BX . . +// - CX 56 shift (note that amd64 shifts by non-immediates must use CX). +// - DX 64 &src[0], tableSize +// - SI 72 &src[s] +// - DI 80 &dst[d] +// - R9 88 sLimit +// - R10 . &src[nextEmit] +// - R11 96 prevHash, currHash, nextHash, offset +// - R12 104 &src[base], skip +// - R13 . &src[nextS], &src[len(src) - 8] +// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x +// - R15 112 candidate +// +// The second column (56, 64, etc) is the stack offset to spill the registers +// when calling other functions. We could pack this slightly tighter, but it's +// simpler to have a dedicated spill map independent of the function called. +// +// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An +// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill +// local variables (registers) during calls gives 32768 + 56 + 64 = 32888. +TEXT ·encodeBlock(SB), 0, $32888-56 + MOVQ dst_base+0(FP), DI + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R14 + + // shift, tableSize := uint32(32-8), 1<<8 + MOVQ $24, CX + MOVQ $256, DX + +calcShift: + // for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + // shift-- + // } + CMPQ DX, $16384 + JGE varTable + CMPQ DX, R14 + JGE varTable + SUBQ $1, CX + SHLQ $1, DX + JMP calcShift + +varTable: + // var table [maxTableSize]uint16 + // + // In the asm code, unlike the Go code, we can zero-initialize only the + // first tableSize elements. Each uint16 element is 2 bytes and each MOVOU + // writes 16 bytes, so we can do only tableSize/8 writes instead of the + // 2048 writes that would zero-initialize all of table's 32768 bytes. + SHRQ $3, DX + LEAQ table-32768(SP), BX + PXOR X0, X0 + +memclr: + MOVOU X0, 0(BX) + ADDQ $16, BX + SUBQ $1, DX + JNZ memclr + + // !!! DX = &src[0] + MOVQ SI, DX + + // sLimit := len(src) - inputMargin + MOVQ R14, R9 + SUBQ $15, R9 + + // !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't + // change for the rest of the function. + MOVQ CX, 56(SP) + MOVQ DX, 64(SP) + MOVQ R9, 88(SP) + + // nextEmit := 0 + MOVQ DX, R10 + + // s := 1 + ADDQ $1, SI + + // nextHash := hash(load32(src, s), shift) + MOVL 0(SI), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + +outer: + // for { etc } + + // skip := 32 + MOVQ $32, R12 + + // nextS := s + MOVQ SI, R13 + + // candidate := 0 + MOVQ $0, R15 + +inner0: + // for { etc } + + // s := nextS + MOVQ R13, SI + + // bytesBetweenHashLookups := skip >> 5 + MOVQ R12, R14 + SHRQ $5, R14 + + // nextS = s + bytesBetweenHashLookups + ADDQ R14, R13 + + // skip += bytesBetweenHashLookups + ADDQ R14, R12 + + // if nextS > sLimit { goto emitRemainder } + MOVQ R13, AX + SUBQ DX, AX + CMPQ AX, R9 + JA emitRemainder + + // candidate = int(table[nextHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[nextHash] = uint16(s) + MOVQ SI, AX + SUBQ DX, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // nextHash = hash(load32(src, nextS), shift) + MOVL 0(R13), R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // if load32(src, s) != load32(src, candidate) { continue } break + MOVL 0(SI), AX + MOVL (DX)(R15*1), BX + CMPL AX, BX + JNE inner0 + +fourByteMatch: + // As per the encode_other.go code: + // + // A 4-byte match has been found. We'll later see etc. + + // !!! Jump to a fast path for short (<= 16 byte) literals. See the comment + // on inputMargin in encode.go. + MOVQ SI, AX + SUBQ R10, AX + CMPQ AX, $16 + JLE emitLiteralFastPath + + // ---------------------------------------- + // Begin inline of the emitLiteral call. + // + // d += emitLiteral(dst[d:], src[nextEmit:s]) + + MOVL AX, BX + SUBL $1, BX + + CMPL BX, $60 + JLT inlineEmitLiteralOneByte + CMPL BX, $256 + JLT inlineEmitLiteralTwoBytes + +inlineEmitLiteralThreeBytes: + MOVB $0xf4, 0(DI) + MOVW BX, 1(DI) + ADDQ $3, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralTwoBytes: + MOVB $0xf0, 0(DI) + MOVB BX, 1(DI) + ADDQ $2, DI + JMP inlineEmitLiteralMemmove + +inlineEmitLiteralOneByte: + SHLB $2, BX + MOVB BX, 0(DI) + ADDQ $1, DI + +inlineEmitLiteralMemmove: + // Spill local variables (registers) onto the stack; call; unspill. + // + // copy(dst[i:], lit) + // + // This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push + // DI, R10 and AX as arguments. + MOVQ DI, 0(SP) + MOVQ R10, 8(SP) + MOVQ AX, 16(SP) + ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)". + MOVQ SI, 72(SP) + MOVQ DI, 80(SP) + MOVQ R15, 112(SP) + CALL runtime·memmove(SB) + MOVQ 56(SP), CX + MOVQ 64(SP), DX + MOVQ 72(SP), SI + MOVQ 80(SP), DI + MOVQ 88(SP), R9 + MOVQ 112(SP), R15 + JMP inner1 + +inlineEmitLiteralEnd: + // End inline of the emitLiteral call. + // ---------------------------------------- + +emitLiteralFastPath: + // !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2". + MOVB AX, BX + SUBB $1, BX + SHLB $2, BX + MOVB BX, (DI) + ADDQ $1, DI + + // !!! Implement the copy from lit to dst as a 16-byte load and store. + // (Encode's documentation says that dst and src must not overlap.) + // + // This always copies 16 bytes, instead of only len(lit) bytes, but that's + // OK. Subsequent iterations will fix up the overrun. + // + // Note that on amd64, it is legal and cheap to issue unaligned 8-byte or + // 16-byte loads and stores. This technique probably wouldn't be as + // effective on architectures that are fussier about alignment. + MOVOU 0(R10), X0 + MOVOU X0, 0(DI) + ADDQ AX, DI + +inner1: + // for { etc } + + // base := s + MOVQ SI, R12 + + // !!! offset := base - candidate + MOVQ R12, R11 + SUBQ R15, R11 + SUBQ DX, R11 + + // ---------------------------------------- + // Begin inline of the extendMatch call. + // + // s = extendMatch(src, candidate+4, s+4) + + // !!! R14 = &src[len(src)] + MOVQ src_len+32(FP), R14 + ADDQ DX, R14 + + // !!! R13 = &src[len(src) - 8] + MOVQ R14, R13 + SUBQ $8, R13 + + // !!! R15 = &src[candidate + 4] + ADDQ $4, R15 + ADDQ DX, R15 + + // !!! s += 4 + ADDQ $4, SI + +inlineExtendMatchCmp8: + // As long as we are 8 or more bytes before the end of src, we can load and + // compare 8 bytes at a time. If those 8 bytes are equal, repeat. + CMPQ SI, R13 + JA inlineExtendMatchCmp1 + MOVQ (R15), AX + MOVQ (SI), BX + CMPQ AX, BX + JNE inlineExtendMatchBSF + ADDQ $8, R15 + ADDQ $8, SI + JMP inlineExtendMatchCmp8 + +inlineExtendMatchBSF: + // If those 8 bytes were not equal, XOR the two 8 byte values, and return + // the index of the first byte that differs. The BSF instruction finds the + // least significant 1 bit, the amd64 architecture is little-endian, and + // the shift by 3 converts a bit index to a byte index. + XORQ AX, BX + BSFQ BX, BX + SHRQ $3, BX + ADDQ BX, SI + JMP inlineExtendMatchEnd + +inlineExtendMatchCmp1: + // In src's tail, compare 1 byte at a time. + CMPQ SI, R14 + JAE inlineExtendMatchEnd + MOVB (R15), AX + MOVB (SI), BX + CMPB AX, BX + JNE inlineExtendMatchEnd + ADDQ $1, R15 + ADDQ $1, SI + JMP inlineExtendMatchCmp1 + +inlineExtendMatchEnd: + // End inline of the extendMatch call. + // ---------------------------------------- + + // ---------------------------------------- + // Begin inline of the emitCopy call. + // + // d += emitCopy(dst[d:], base-candidate, s-base) + + // !!! length := s - base + MOVQ SI, AX + SUBQ R12, AX + +inlineEmitCopyLoop0: + // for length >= 68 { etc } + CMPL AX, $68 + JLT inlineEmitCopyStep1 + + // Emit a length 64 copy, encoded as 3 bytes. + MOVB $0xfe, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $64, AX + JMP inlineEmitCopyLoop0 + +inlineEmitCopyStep1: + // if length > 64 { etc } + CMPL AX, $64 + JLE inlineEmitCopyStep2 + + // Emit a length 60 copy, encoded as 3 bytes. + MOVB $0xee, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + SUBL $60, AX + +inlineEmitCopyStep2: + // if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 } + CMPL AX, $12 + JGE inlineEmitCopyStep3 + CMPL R11, $2048 + JGE inlineEmitCopyStep3 + + // Emit the remaining copy, encoded as 2 bytes. + MOVB R11, 1(DI) + SHRL $8, R11 + SHLB $5, R11 + SUBB $4, AX + SHLB $2, AX + ORB AX, R11 + ORB $1, R11 + MOVB R11, 0(DI) + ADDQ $2, DI + JMP inlineEmitCopyEnd + +inlineEmitCopyStep3: + // Emit the remaining copy, encoded as 3 bytes. + SUBL $1, AX + SHLB $2, AX + ORB $2, AX + MOVB AX, 0(DI) + MOVW R11, 1(DI) + ADDQ $3, DI + +inlineEmitCopyEnd: + // End inline of the emitCopy call. + // ---------------------------------------- + + // nextEmit = s + MOVQ SI, R10 + + // if s >= sLimit { goto emitRemainder } + MOVQ SI, AX + SUBQ DX, AX + CMPQ AX, R9 + JAE emitRemainder + + // As per the encode_other.go code: + // + // We could immediately etc. + + // x := load64(src, s-1) + MOVQ -1(SI), R14 + + // prevHash := hash(uint32(x>>0), shift) + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // table[prevHash] = uint16(s-1) + MOVQ SI, AX + SUBQ DX, AX + SUBQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // currHash := hash(uint32(x>>8), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // candidate = int(table[currHash]) + // XXX: MOVWQZX table-32768(SP)(R11*2), R15 + // XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15 + BYTE $0x4e + BYTE $0x0f + BYTE $0xb7 + BYTE $0x7c + BYTE $0x5c + BYTE $0x78 + + // table[currHash] = uint16(s) + ADDQ $1, AX + + // XXX: MOVW AX, table-32768(SP)(R11*2) + // XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2) + BYTE $0x66 + BYTE $0x42 + BYTE $0x89 + BYTE $0x44 + BYTE $0x5c + BYTE $0x78 + + // if uint32(x>>8) == load32(src, candidate) { continue } + MOVL (DX)(R15*1), BX + CMPL R14, BX + JEQ inner1 + + // nextHash = hash(uint32(x>>16), shift) + SHRQ $8, R14 + MOVL R14, R11 + IMULL $0x1e35a7bd, R11 + SHRL CX, R11 + + // s++ + ADDQ $1, SI + + // break out of the inner1 for loop, i.e. continue the outer loop. + JMP outer + +emitRemainder: + // if nextEmit < len(src) { etc } + MOVQ src_len+32(FP), AX + ADDQ DX, AX + CMPQ R10, AX + JEQ encodeBlockEnd + + // d += emitLiteral(dst[d:], src[nextEmit:]) + // + // Push args. + MOVQ DI, 0(SP) + MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative. + MOVQ R10, 24(SP) + SUBQ R10, AX + MOVQ AX, 32(SP) + MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative. + + // Spill local variables (registers) onto the stack; call; unspill. + MOVQ DI, 80(SP) + CALL ·emitLiteral(SB) + MOVQ 80(SP), DI + + // Finish the "d +=" part of "d += emitLiteral(etc)". + ADDQ 48(SP), DI + +encodeBlockEnd: + MOVQ dst_base+0(FP), AX + SUBQ AX, DI + MOVQ DI, d+48(FP) + RET diff --git a/vendor/github.com/klauspost/compress/snappy/encode_other.go b/vendor/github.com/klauspost/compress/snappy/encode_other.go new file mode 100644 index 00000000000..dbcae905e6e --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/encode_other.go @@ -0,0 +1,238 @@ +// Copyright 2016 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build !amd64 appengine !gc noasm + +package snappy + +func load32(b []byte, i int) uint32 { + b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load64(b []byte, i int) uint64 { + b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +// emitLiteral writes a literal chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= len(lit) && len(lit) <= 65536 +func emitLiteral(dst, lit []byte) int { + i, n := 0, uint(len(lit)-1) + switch { + case n < 60: + dst[0] = uint8(n)<<2 | tagLiteral + i = 1 + case n < 1<<8: + dst[0] = 60<<2 | tagLiteral + dst[1] = uint8(n) + i = 2 + default: + dst[0] = 61<<2 | tagLiteral + dst[1] = uint8(n) + dst[2] = uint8(n >> 8) + i = 3 + } + return i + copy(dst[i:], lit) +} + +// emitCopy writes a copy chunk and returns the number of bytes written. +// +// It assumes that: +// dst is long enough to hold the encoded bytes +// 1 <= offset && offset <= 65535 +// 4 <= length && length <= 65535 +func emitCopy(dst []byte, offset, length int) int { + i := 0 + // The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The + // threshold for this loop is a little higher (at 68 = 64 + 4), and the + // length emitted down below is is a little lower (at 60 = 64 - 4), because + // it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed + // by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as + // a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as + // 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a + // tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an + // encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1. + for length >= 68 { + // Emit a length 64 copy, encoded as 3 bytes. + dst[i+0] = 63<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 64 + } + if length > 64 { + // Emit a length 60 copy, encoded as 3 bytes. + dst[i+0] = 59<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + i += 3 + length -= 60 + } + if length >= 12 || offset >= 2048 { + // Emit the remaining copy, encoded as 3 bytes. + dst[i+0] = uint8(length-1)<<2 | tagCopy2 + dst[i+1] = uint8(offset) + dst[i+2] = uint8(offset >> 8) + return i + 3 + } + // Emit the remaining copy, encoded as 2 bytes. + dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1 + dst[i+1] = uint8(offset) + return i + 2 +} + +// extendMatch returns the largest k such that k <= len(src) and that +// src[i:i+k-j] and src[j:k] have the same contents. +// +// It assumes that: +// 0 <= i && i < j && j <= len(src) +func extendMatch(src []byte, i, j int) int { + for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 { + } + return j +} + +func hash(u, shift uint32) uint32 { + return (u * 0x1e35a7bd) >> shift +} + +// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It +// assumes that the varint-encoded length of the decompressed bytes has already +// been written. +// +// It also assumes that: +// len(dst) >= MaxEncodedLen(len(src)) && +// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize +func encodeBlock(dst, src []byte) (d int) { + // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. + // The table element type is uint16, as s < sLimit and sLimit < len(src) + // and len(src) <= maxBlockSize and maxBlockSize == 65536. + const ( + maxTableSize = 1 << 14 + // tableMask is redundant, but helps the compiler eliminate bounds + // checks. + tableMask = maxTableSize - 1 + ) + shift := uint32(32 - 8) + for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 { + shift-- + } + // In Go, all array elements are zero-initialized, so there is no advantage + // to a smaller tableSize per se. However, it matches the C++ algorithm, + // and in the asm versions of this code, we can get away with zeroing only + // the first tableSize elements. + var table [maxTableSize]uint16 + + // sLimit is when to stop looking for offset/length copies. The inputMargin + // lets us use a fast path for emitLiteral in the main loop, while we are + // looking for copies. + sLimit := len(src) - inputMargin + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := 0 + + // The encoded form must start with a literal, as there are no previous + // bytes to copy, so we start looking for hash matches at s == 1. + s := 1 + nextHash := hash(load32(src, s), shift) + + for { + // Copied from the C++ snappy implementation: + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned (or skipped), look at every third byte, etc.. When a match + // is found, immediately go back to looking at every byte. This is a + // small loss (~5% performance, ~0.1% density) for compressible data + // due to more bookkeeping, but for non-compressible data (such as + // JPEG) it's a huge win since the compressor quickly "realizes" the + // data is incompressible and doesn't bother looking for matches + // everywhere. + // + // The "skip" variable keeps track of how many bytes there are since + // the last match; dividing it by 32 (ie. right-shifting by five) gives + // the number of bytes to move ahead for each iteration. + skip := 32 + + nextS := s + candidate := 0 + for { + s = nextS + bytesBetweenHashLookups := skip >> 5 + nextS = s + bytesBetweenHashLookups + skip += bytesBetweenHashLookups + if nextS > sLimit { + goto emitRemainder + } + candidate = int(table[nextHash&tableMask]) + table[nextHash&tableMask] = uint16(s) + nextHash = hash(load32(src, nextS), shift) + if load32(src, s) == load32(src, candidate) { + break + } + } + + // A 4-byte match has been found. We'll later see if more than 4 bytes + // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit + // them as literal bytes. + d += emitLiteral(dst[d:], src[nextEmit:s]) + + // Call emitCopy, and then see if another emitCopy could be our next + // move. Repeat until we find no match for the input immediately after + // what was consumed by the last emitCopy call. + // + // If we exit this loop normally then we need to call emitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can + // exit this loop via goto if we get close to exhausting the input. + for { + // Invariant: we have a 4-byte match at s, and no need to emit any + // literal bytes prior to s. + base := s + + // Extend the 4-byte match as long as possible. + // + // This is an inlined version of: + // s = extendMatch(src, candidate+4, s+4) + s += 4 + for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 { + } + + d += emitCopy(dst[d:], base-candidate, s-base) + nextEmit = s + if s >= sLimit { + goto emitRemainder + } + + // We could immediately start working at s now, but to improve + // compression we first update the hash table at s-1 and at s. If + // another emitCopy is not our next move, also calculate nextHash + // at s+1. At least on GOARCH=amd64, these three hash calculations + // are faster as one load64 call (with some shifts) instead of + // three load32 calls. + x := load64(src, s-1) + prevHash := hash(uint32(x>>0), shift) + table[prevHash&tableMask] = uint16(s - 1) + currHash := hash(uint32(x>>8), shift) + candidate = int(table[currHash&tableMask]) + table[currHash&tableMask] = uint16(s) + if uint32(x>>8) != load32(src, candidate) { + nextHash = hash(uint32(x>>16), shift) + s++ + break + } + } + } + +emitRemainder: + if nextEmit < len(src) { + d += emitLiteral(dst[d:], src[nextEmit:]) + } + return d +} diff --git a/vendor/github.com/klauspost/compress/snappy/runbench.cmd b/vendor/github.com/klauspost/compress/snappy/runbench.cmd new file mode 100644 index 00000000000..d24eb4b47c3 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/runbench.cmd @@ -0,0 +1,2 @@ +del old.txt +go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt diff --git a/vendor/github.com/klauspost/compress/snappy/snappy.go b/vendor/github.com/klauspost/compress/snappy/snappy.go new file mode 100644 index 00000000000..74a36689e87 --- /dev/null +++ b/vendor/github.com/klauspost/compress/snappy/snappy.go @@ -0,0 +1,98 @@ +// Copyright 2011 The Snappy-Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package snappy implements the Snappy compression format. It aims for very +// high speeds and reasonable compression. +// +// There are actually two Snappy formats: block and stream. They are related, +// but different: trying to decompress block-compressed data as a Snappy stream +// will fail, and vice versa. The block format is the Decode and Encode +// functions and the stream format is the Reader and Writer types. +// +// The block format, the more common case, is used when the complete size (the +// number of bytes) of the original data is known upfront, at the time +// compression starts. The stream format, also known as the framing format, is +// for when that isn't always true. +// +// The canonical, C++ implementation is at https://github.com/google/snappy and +// it only implements the block format. +package snappy + +import ( + "hash/crc32" +) + +/* +Each encoded block begins with the varint-encoded length of the decoded data, +followed by a sequence of chunks. Chunks begin and end on byte boundaries. The +first byte of each chunk is broken into its 2 least and 6 most significant bits +called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag. +Zero means a literal tag. All other values mean a copy tag. + +For literal tags: + - If m < 60, the next 1 + m bytes are literal bytes. + - Otherwise, let n be the little-endian unsigned integer denoted by the next + m - 59 bytes. The next 1 + n bytes after that are literal bytes. + +For copy tags, length bytes are copied from offset bytes ago, in the style of +Lempel-Ziv compression algorithms. In particular: + - For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12). + The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10 + of the offset. The next byte is bits 0-7 of the offset. + - For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65). + The length is 1 + m. The offset is the little-endian unsigned integer + denoted by the next 2 bytes. + - For l == 3, this tag is a legacy format that is no longer issued by most + encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in + [1, 65). The length is 1 + m. The offset is the little-endian unsigned + integer denoted by the next 4 bytes. +*/ +const ( + tagLiteral = 0x00 + tagCopy1 = 0x01 + tagCopy2 = 0x02 + tagCopy4 = 0x03 +) + +const ( + checksumSize = 4 + chunkHeaderSize = 4 + magicChunk = "\xff\x06\x00\x00" + magicBody + magicBody = "sNaPpY" + + // maxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + maxBlockSize = 65536 + + // maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + maxEncodedLenOfMaxBlockSize = 76490 + + obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize + obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func crc(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} diff --git a/vendor/github.com/klauspost/compress/zstd/README.md b/vendor/github.com/klauspost/compress/zstd/README.md new file mode 100644 index 00000000000..bc977a30234 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/README.md @@ -0,0 +1,393 @@ +# zstd + +[Zstandard](https://facebook.github.io/zstd/) is a real-time compression algorithm, providing high compression ratios. +It offers a very wide range of compression / speed trade-off, while being backed by a very fast decoder. +A high performance compression algorithm is implemented. For now focused on speed. + +This package provides [compression](#Compressor) to and [decompression](#Decompressor) of Zstandard content. +Note that custom dictionaries are not supported yet, so if your code relies on that, +you cannot use the package as-is. + +This package is pure Go and without use of "unsafe". +If a significant speedup can be achieved using "unsafe", it may be added as an option later. + +The `zstd` package is provided as open source software using a Go standard license. + +Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors. + +## Installation + +Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`. + +Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd + + +## Compressor + +### Status: + +STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively +used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), +kindly supplied by [fuzzit.dev](https://fuzzit.dev/). + +There may still be specific combinations of data types/size/settings that could lead to edge cases, +so as always, testing is recommended. + +For now, a high speed (fastest) and medium-fast (default) compressor has been implemented. + +The "Fastest" compression ratio is roughly equivalent to zstd level 1. +The "Default" compression ratio is roughly equivalent to zstd level 3 (default). + +In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode. +The compression ratio compared to stdlib is around level 3, but usually 3x as fast. + +Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2. + + +### Usage + +An Encoder can be used for either compressing a stream via the +`io.WriteCloser` interface supported by the Encoder or as multiple independent +tasks via the `EncodeAll` function. +Smaller encodes are encouraged to use the EncodeAll function. +Use `NewWriter` to create a new instance that can be used for both. + +To create a writer with default options, do like this: + +```Go +// Compress input to output. +func Compress(in io.Reader, out io.Writer) error { + w, err := NewWriter(output) + if err != nil { + return err + } + _, err := io.Copy(w, input) + if err != nil { + enc.Close() + return err + } + return enc.Close() +} +``` + +Now you can encode by writing data to `enc`. The output will be finished writing when `Close()` is called. +Even if your encode fails, you should still call `Close()` to release any resources that may be held up. + +The above is fine for big encodes. However, whenever possible try to *reuse* the writer. + +To reuse the encoder, you can use the `Reset(io.Writer)` function to change to another output. +This will allow the encoder to reuse all resources and avoid wasteful allocations. + +Currently stream encoding has 'light' concurrency, meaning up to 2 goroutines can be working on part +of a stream. This is independent of the `WithEncoderConcurrency(n)`, but that is likely to change +in the future. So if you want to limit concurrency for future updates, specify the concurrency +you would like. + +You can specify your desired compression level using `WithEncoderLevel()` option. Currently only pre-defined +compression settings can be specified. + +#### Future Compatibility Guarantees + +This will be an evolving project. When using this package it is important to note that both the compression efficiency and speed may change. + +The goal will be to keep the default efficiency at the default zstd (level 3). +However the encoding should never be assumed to remain the same, +and you should not use hashes of compressed output for similarity checks. + +The Encoder can be assumed to produce the same output from the exact same code version. +However, the may be modes in the future that break this, +although they will not be enabled without an explicit option. + +This encoder is not designed to (and will probably never) output the exact same bitstream as the reference encoder. + +Also note, that the cgo decompressor currently does not [report all errors on invalid input](https://github.com/DataDog/zstd/issues/59), +[omits error checks](https://github.com/DataDog/zstd/issues/61), [ignores checksums](https://github.com/DataDog/zstd/issues/43) +and seems to ignore concatenated streams, even though [it is part of the spec](https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frames). + +#### Blocks + +For compressing small blocks, the returned encoder has a function called `EncodeAll(src, dst []byte) []byte`. + +`EncodeAll` will encode all input in src and append it to dst. +This function can be called concurrently, but each call will only run on a single goroutine. + +Encoded blocks can be concatenated and the result will be the combined input stream. +Data compressed with EncodeAll can be decoded with the Decoder, using either a stream or `DecodeAll`. + +Especially when encoding blocks you should take special care to reuse the encoder. +This will effectively make it run without allocations after a warmup period. +To make it run completely without allocations, supply a destination buffer with space for all content. + +```Go +import "github.com/klauspost/compress/zstd" + +// Create a writer that caches compressors. +// For this operation type we supply a nil Reader. +var encoder, _ = zstd.NewWriter(nil) + +// Compress a buffer. +// If you have a destination buffer, the allocation in the call can also be eliminated. +func Compress(src []byte) []byte { + return encoder.EncodeAll(src, make([]byte, 0, len(src))) +} +``` + +You can control the maximum number of concurrent encodes using the `WithEncoderConcurrency(n)` +option when creating the writer. + +Using the Encoder for both a stream and individual blocks concurrently is safe. + +### Performance + +I have collected some speed examples to compare speed and compression against other compressors. + +* `file` is the input file. +* `out` is the compressor used. `zskp` is this package. `gzstd` is gzip standard library. `zstd` is the Datadog cgo library. +* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default". +* `insize`/`outsize` is the input/output size. +* `millis` is the number of milliseconds used for compression. +* `mb/s` is megabytes (2^20 bytes) per second. + +``` +The test data for the Large Text Compression Benchmark is the first +10^9 bytes of the English Wikipedia dump on Mar. 3, 2006. +http://mattmahoney.net/dc/textdata.html + +file out level insize outsize millis mb/s +enwik9 zskp 1 1000000000 343833033 5840 163.30 +enwik9 zskp 2 1000000000 317822183 8449 112.87 +enwik9 gzstd 1 1000000000 382578136 13627 69.98 +enwik9 gzstd 3 1000000000 349139651 22344 42.68 +enwik9 zstd 1 1000000000 357416379 4838 197.12 +enwik9 zstd 3 1000000000 313734522 7556 126.21 + +GOB stream of binary data. Highly compressible. +https://files.klauspost.com/compress/gob-stream.7z + +file out level insize outsize millis mb/s +gob-stream zskp 1 1911399616 234981983 5100 357.42 +gob-stream zskp 2 1911399616 208674003 6698 272.15 +gob-stream gzstd 1 1911399616 357382641 14727 123.78 +gob-stream gzstd 3 1911399616 327835097 17005 107.19 +gob-stream zstd 1 1911399616 250787165 4075 447.22 +gob-stream zstd 3 1911399616 208191888 5511 330.77 + +Highly compressible JSON file. Similar to logs in a lot of ways. +https://files.klauspost.com/compress/adresser.001.gz + +file out level insize outsize millis mb/s +adresser.001 zskp 1 1073741824 18510122 1477 692.83 +adresser.001 zskp 2 1073741824 19831697 1705 600.59 +adresser.001 gzstd 1 1073741824 47755503 3079 332.47 +adresser.001 gzstd 3 1073741824 40052381 3051 335.63 +adresser.001 zstd 1 1073741824 16135896 994 1030.18 +adresser.001 zstd 3 1073741824 17794465 905 1131.49 + +VM Image, Linux mint with a few installed applications: +https://files.klauspost.com/compress/rawstudio-mint14.7z + +file out level insize outsize millis mb/s +rawstudio-mint14.tar zskp 1 8558382592 3648168838 33398 244.38 +rawstudio-mint14.tar zskp 2 8558382592 3376721436 50962 160.16 +rawstudio-mint14.tar gzstd 1 8558382592 3926257486 84712 96.35 +rawstudio-mint14.tar gzstd 3 8558382592 3740711978 176344 46.28 +rawstudio-mint14.tar zstd 1 8558382592 3607859742 27903 292.51 +rawstudio-mint14.tar zstd 3 8558382592 3341710879 46700 174.77 + + +The test data is designed to test archivers in realistic backup scenarios. +http://mattmahoney.net/dc/10gb.html + +file out level insize outsize millis mb/s +10gb.tar zskp 1 10065157632 4883149814 45715 209.97 +10gb.tar zskp 2 10065157632 4638110010 60970 157.44 +10gb.tar gzstd 1 10065157632 5198296126 97769 98.18 +10gb.tar gzstd 3 10065157632 4932665487 313427 30.63 +10gb.tar zstd 1 10065157632 4940796535 40391 237.65 +10gb.tar zstd 3 10065157632 4638618579 52911 181.42 + +Silesia Corpus: +http://sun.aei.polsl.pl/~sdeor/corpus/silesia.zip + +file out level insize outsize millis mb/s +silesia.tar zskp 1 211947520 73025800 1108 182.26 +silesia.tar zskp 2 211947520 67674684 1599 126.41 +silesia.tar gzstd 1 211947520 80007735 2515 80.37 +silesia.tar gzstd 3 211947520 73133380 4259 47.45 +silesia.tar zstd 1 211947520 73513991 933 216.64 +silesia.tar zstd 3 211947520 66793301 1377 146.79 +``` + +### Converters + +As part of the development process a *Snappy* -> *Zstandard* converter was also built. + +This can convert a *framed* [Snappy Stream](https://godoc.org/github.com/golang/snappy#Writer) to a zstd stream. +Note that a single block is not framed. + +Conversion is done by converting the stream directly from Snappy without intermediate full decoding. +Therefore the compression ratio is much less than what can be done by a full decompression +and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without +any errors being generated. +No CRC value is being generated and not all CRC values of the Snappy stream are checked. +However, it provides really fast re-compression of Snappy streams. + + +``` +BenchmarkSnappy_ConvertSilesia-8 1 1156001600 ns/op 183.35 MB/s +Snappy len 103008711 -> zstd len 82687318 + +BenchmarkSnappy_Enwik9-8 1 6472998400 ns/op 154.49 MB/s +Snappy len 508028601 -> zstd len 390921079 +``` + + +```Go + s := zstd.SnappyConverter{} + n, err = s.Convert(input, output) + if err != nil { + fmt.Println("Re-compressed stream to", n, "bytes") + } +``` + +The converter `s` can be reused to avoid allocations, even after errors. + + +## Decompressor + +Staus: STABLE - there may still be subtle bugs, but a wide variety of content has been tested. + +This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz), +kindly supplied by [fuzzit.dev](https://fuzzit.dev/). +The main purpose of the fuzz testing is to ensure that it is not possible to crash the decoder, +or run it past its limits with ANY input provided. + +### Usage + +The package has been designed for two main usages, big streams of data and smaller in-memory buffers. +There are two main usages of the package for these. Both of them are accessed by creating a `Decoder`. + +For streaming use a simple setup could look like this: + +```Go +import "github.com/klauspost/compress/zstd" + +func Decompress(in io.Reader, out io.Writer) error { + d, err := zstd.NewReader(input) + if err != nil { + return err + } + defer d.Close() + + // Copy content... + _, err := io.Copy(out, d) + return err +} +``` + +It is important to use the "Close" function when you no longer need the Reader to stop running goroutines. +See "Allocation-less operation" below. + +For decoding buffers, it could look something like this: + +```Go +import "github.com/klauspost/compress/zstd" + +// Create a reader that caches decompressors. +// For this operation type we supply a nil Reader. +var decoder, _ = zstd.NewReader(nil) + +// Decompress a buffer. We don't supply a destination buffer, +// so it will be allocated by the decoder. +func Decompress(src []byte) ([]byte, error) { + return decoder.DecodeAll(src, nil) +} +``` + +Both of these cases should provide the functionality needed. +The decoder can be used for *concurrent* decompression of multiple buffers. +It will only allow a certain number of concurrent operations to run. +To tweak that yourself use the `WithDecoderConcurrency(n)` option when creating the decoder. + +### Allocation-less operation + +The decoder has been designed to operate without allocations after a warmup. + +This means that you should *store* the decoder for best performance. +To re-use a stream decoder, use the `Reset(r io.Reader) error` to switch to another stream. +A decoder can safely be re-used even if the previous stream failed. + +To release the resources, you must call the `Close()` function on a decoder. +After this it can *no longer be reused*, but all running goroutines will be stopped. +So you *must* use this if you will no longer need the Reader. + +For decompressing smaller buffers a single decoder can be used. +When decoding buffers, you can supply a destination slice with length 0 and your expected capacity. +In this case no unneeded allocations should be made. + +### Concurrency + +The buffer decoder does everything on the same goroutine and does nothing concurrently. +It can however decode several buffers concurrently. Use `WithDecoderConcurrency(n)` to limit that. + +The stream decoder operates on + +* One goroutine reads input and splits the input to several block decoders. +* A number of decoders will decode blocks. +* A goroutine coordinates these blocks and sends history from one to the next. + +So effectively this also means the decoder will "read ahead" and prepare data to always be available for output. + +Since "blocks" are quite dependent on the output of the previous block stream decoding will only have limited concurrency. + +In practice this means that concurrency is often limited to utilizing about 2 cores effectively. + + +### Benchmarks + +These are some examples of performance compared to [datadog cgo library](https://github.com/DataDog/zstd). + +The first two are streaming decodes and the last are smaller inputs. + +``` +BenchmarkDecoderSilesia-8 20 642550210 ns/op 329.85 MB/s 3101 B/op 8 allocs/op +BenchmarkDecoderSilesiaCgo-8 100 384930000 ns/op 550.61 MB/s 451878 B/op 9713 allocs/op + +BenchmarkDecoderEnwik9-2 10 3146000080 ns/op 317.86 MB/s 2649 B/op 9 allocs/op +BenchmarkDecoderEnwik9Cgo-2 20 1905900000 ns/op 524.69 MB/s 1125120 B/op 45785 allocs/op + +BenchmarkDecoder_DecodeAll/z000000.zst-8 200 7049994 ns/op 138.26 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000001.zst-8 100000 19560 ns/op 97.49 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000002.zst-8 5000 297599 ns/op 236.99 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000003.zst-8 2000 725502 ns/op 141.17 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000004.zst-8 200000 9314 ns/op 54.54 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000005.zst-8 10000 137500 ns/op 104.72 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000006.zst-8 500 2316009 ns/op 206.06 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000007.zst-8 20000 64499 ns/op 344.90 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000008.zst-8 50000 24900 ns/op 219.56 MB/s 40 B/op 2 allocs/op +BenchmarkDecoder_DecodeAll/z000009.zst-8 1000 2348999 ns/op 154.01 MB/s 40 B/op 2 allocs/op + +BenchmarkDecoder_DecodeAllCgo/z000000.zst-8 500 4268005 ns/op 228.38 MB/s 1228849 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000001.zst-8 100000 15250 ns/op 125.05 MB/s 2096 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000002.zst-8 10000 147399 ns/op 478.49 MB/s 73776 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000003.zst-8 5000 320798 ns/op 319.27 MB/s 139312 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000004.zst-8 200000 10004 ns/op 50.77 MB/s 560 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000005.zst-8 20000 73599 ns/op 195.64 MB/s 19120 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000006.zst-8 1000 1119003 ns/op 426.48 MB/s 557104 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000007.zst-8 20000 103450 ns/op 215.04 MB/s 71296 B/op 9 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000008.zst-8 100000 20130 ns/op 271.58 MB/s 6192 B/op 3 allocs/op +BenchmarkDecoder_DecodeAllCgo/z000009.zst-8 2000 1123500 ns/op 322.00 MB/s 368688 B/op 3 allocs/op +``` + +This reflects the performance around May 2019, but this may be out of date. + +# Contributions + +Contributions are always welcome. +For new features/fixes, remember to add tests and for performance enhancements include benchmarks. + +For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files. + +For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan). + +This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare. diff --git a/vendor/github.com/klauspost/compress/zstd/bitreader.go b/vendor/github.com/klauspost/compress/zstd/bitreader.go new file mode 100644 index 00000000000..15d79d439fa --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/bitreader.go @@ -0,0 +1,121 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "io" + "math/bits" +) + +// bitReader reads a bitstream in reverse. +// The last set bit indicates the start of the stream and is used +// for aligning the input. +type bitReader struct { + in []byte + off uint // next byte to read is at in[off - 1] + value uint64 // Maybe use [16]byte, but shifting is awkward. + bitsRead uint8 +} + +// init initializes and resets the bit reader. +func (b *bitReader) init(in []byte) error { + if len(in) < 1 { + return errors.New("corrupt stream: too short") + } + b.in = in + b.off = uint(len(in)) + // The highest bit of the last byte indicates where to start + v := in[len(in)-1] + if v == 0 { + return errors.New("corrupt stream, did not find end of stream") + } + b.bitsRead = 64 + b.value = 0 + b.fill() + b.fill() + b.bitsRead += 8 - uint8(highBits(uint32(v))) + return nil +} + +// getBits will return n bits. n can be 0. +func (b *bitReader) getBits(n uint8) int { + if n == 0 /*|| b.bitsRead >= 64 */ { + return 0 + } + return b.getBitsFast(n) +} + +// getBitsFast requires that at least one bit is requested every time. +// There are no checks if the buffer is filled. +func (b *bitReader) getBitsFast(n uint8) int { + const regMask = 64 - 1 + v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask)) + b.bitsRead += n + return int(v) +} + +// fillFast() will make sure at least 32 bits are available. +// There must be at least 4 bytes available. +func (b *bitReader) fillFast() { + if b.bitsRead < 32 { + return + } + // Do single re-slice to avoid bounds checks. + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 +} + +// fill() will make sure at least 32 bits are available. +func (b *bitReader) fill() { + if b.bitsRead < 32 { + return + } + if b.off >= 4 { + v := b.in[b.off-4 : b.off] + low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24) + b.value = (b.value << 32) | uint64(low) + b.bitsRead -= 32 + b.off -= 4 + return + } + for b.off > 0 { + b.value = (b.value << 8) | uint64(b.in[b.off-1]) + b.bitsRead -= 8 + b.off-- + } +} + +// finished returns true if all bits have been read from the bit stream. +func (b *bitReader) finished() bool { + return b.off == 0 && b.bitsRead >= 64 +} + +// overread returns true if more bits have been requested than is on the stream. +func (b *bitReader) overread() bool { + return b.bitsRead > 64 +} + +// remain returns the number of bits remaining. +func (b *bitReader) remain() uint { + return b.off*8 + 64 - uint(b.bitsRead) +} + +// close the bitstream and returns an error if out-of-buffer reads occurred. +func (b *bitReader) close() error { + // Release reference. + b.in = nil + if b.bitsRead > 64 { + return io.ErrUnexpectedEOF + } + return nil +} + +func highBits(val uint32) (n uint32) { + return uint32(bits.Len32(val) - 1) +} diff --git a/vendor/github.com/klauspost/compress/zstd/bitwriter.go b/vendor/github.com/klauspost/compress/zstd/bitwriter.go new file mode 100644 index 00000000000..303ae90f944 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/bitwriter.go @@ -0,0 +1,169 @@ +// Copyright 2018 Klaus Post. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. +// Based on work Copyright (c) 2013, Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +// bitWriter will write bits. +// First bit will be LSB of the first byte of output. +type bitWriter struct { + bitContainer uint64 + nBits uint8 + out []byte +} + +// bitMask16 is bitmasks. Has extra to avoid bounds check. +var bitMask16 = [32]uint16{ + 0, 1, 3, 7, 0xF, 0x1F, + 0x3F, 0x7F, 0xFF, 0x1FF, 0x3FF, 0x7FF, + 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, + 0xFFFF, 0xFFFF} /* up to 16 bits */ + +var bitMask32 = [32]uint32{ + 0, 1, 3, 7, 0xF, 0x1F, 0x3F, 0x7F, 0xFF, + 0x1FF, 0x3FF, 0x7FF, 0xFFF, 0x1FFF, 0x3FFF, 0x7FFF, 0xFFFF, + 0x1ffff, 0x3ffff, 0x7FFFF, 0xfFFFF, 0x1fFFFF, 0x3fFFFF, 0x7fFFFF, 0xffFFFF, + 0x1ffFFFF, 0x3ffFFFF, 0x7ffFFFF, 0xfffFFFF, 0x1fffFFFF, 0x3fffFFFF, 0x7fffFFFF, +} // up to 32 bits + +// addBits16NC will add up to 16 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16NC(value uint16, bits uint8) { + b.bitContainer |= uint64(value&bitMask16[bits&31]) << (b.nBits & 63) + b.nBits += bits +} + +// addBits32NC will add up to 32 bits. +// It will not check if there is space for them, +// so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits32NC(value uint32, bits uint8) { + b.bitContainer |= uint64(value&bitMask32[bits&31]) << (b.nBits & 63) + b.nBits += bits +} + +// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated. +// It will not check if there is space for them, so the caller must ensure that it has flushed recently. +func (b *bitWriter) addBits16Clean(value uint16, bits uint8) { + b.bitContainer |= uint64(value) << (b.nBits & 63) + b.nBits += bits +} + +// flush will flush all pending full bytes. +// There will be at least 56 bits available for writing when this has been called. +// Using flush32 is faster, but leaves less space for writing. +func (b *bitWriter) flush() { + v := b.nBits >> 3 + switch v { + case 0: + case 1: + b.out = append(b.out, + byte(b.bitContainer), + ) + case 2: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + ) + case 3: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + ) + case 4: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + ) + case 5: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + ) + case 6: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + ) + case 7: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + ) + case 8: + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24), + byte(b.bitContainer>>32), + byte(b.bitContainer>>40), + byte(b.bitContainer>>48), + byte(b.bitContainer>>56), + ) + default: + panic(fmt.Errorf("bits (%d) > 64", b.nBits)) + } + b.bitContainer >>= v << 3 + b.nBits &= 7 +} + +// flush32 will flush out, so there are at least 32 bits available for writing. +func (b *bitWriter) flush32() { + if b.nBits < 32 { + return + } + b.out = append(b.out, + byte(b.bitContainer), + byte(b.bitContainer>>8), + byte(b.bitContainer>>16), + byte(b.bitContainer>>24)) + b.nBits -= 32 + b.bitContainer >>= 32 +} + +// flushAlign will flush remaining full bytes and align to next byte boundary. +func (b *bitWriter) flushAlign() { + nbBytes := (b.nBits + 7) >> 3 + for i := uint8(0); i < nbBytes; i++ { + b.out = append(b.out, byte(b.bitContainer>>(i*8))) + } + b.nBits = 0 + b.bitContainer = 0 +} + +// close will write the alignment bit and write the final byte(s) +// to the output. +func (b *bitWriter) close() error { + // End mark + b.addBits16Clean(1, 1) + // flush until next byte. + b.flushAlign() + return nil +} + +// reset and continue writing by appending to out. +func (b *bitWriter) reset(out []byte) { + b.bitContainer = 0 + b.nBits = 0 + b.out = out +} diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go new file mode 100644 index 00000000000..c2f855e75be --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go @@ -0,0 +1,731 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" + "io" + "sync" + + "github.com/klauspost/compress/huff0" + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +type blockType uint8 + +//go:generate stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex + +const ( + blockTypeRaw blockType = iota + blockTypeRLE + blockTypeCompressed + blockTypeReserved +) + +type literalsBlockType uint8 + +const ( + literalsBlockRaw literalsBlockType = iota + literalsBlockRLE + literalsBlockCompressed + literalsBlockTreeless +) + +const ( + // maxCompressedBlockSize is the biggest allowed compressed block size (128KB) + maxCompressedBlockSize = 128 << 10 + + // Maximum possible block size (all Raw+Uncompressed). + maxBlockSize = (1 << 21) - 1 + + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#literals_section_header + maxCompressedLiteralSize = 1 << 18 + maxRLELiteralSize = 1 << 20 + maxMatchLen = 131074 + maxSequences = 0x7f00 + 0xffff + + // We support slightly less than the reference decoder to be able to + // use ints on 32 bit archs. + maxOffsetBits = 30 +) + +var ( + huffDecoderPool = sync.Pool{New: func() interface{} { + return &huff0.Scratch{} + }} + + fseDecoderPool = sync.Pool{New: func() interface{} { + return &fseDecoder{} + }} +) + +type blockDec struct { + // Raw source data of the block. + data []byte + dataStorage []byte + + // Destination of the decoded data. + dst []byte + + // Buffer for literals data. + literalBuf []byte + + // Window size of the block. + WindowSize uint64 + + history chan *history + input chan struct{} + result chan decodeOutput + sequenceBuf []seq + err error + decWG sync.WaitGroup + + // Block is RLE, this is the size. + RLESize uint32 + tmp [4]byte + + Type blockType + + // Is this the last block of a frame? + Last bool + + // Use less memory + lowMem bool +} + +func (b *blockDec) String() string { + if b == nil { + return "" + } + return fmt.Sprintf("Steam Size: %d, Type: %v, Last: %t, Window: %d", len(b.data), b.Type, b.Last, b.WindowSize) +} + +func newBlockDec(lowMem bool) *blockDec { + b := blockDec{ + lowMem: lowMem, + result: make(chan decodeOutput, 1), + input: make(chan struct{}, 1), + history: make(chan *history, 1), + } + b.decWG.Add(1) + go b.startDecoder() + return &b +} + +// reset will reset the block. +// Input must be a start of a block and will be at the end of the block when returned. +func (b *blockDec) reset(br byteBuffer, windowSize uint64) error { + b.WindowSize = windowSize + tmp := br.readSmall(3) + if tmp == nil { + if debug { + println("Reading block header:", io.ErrUnexpectedEOF) + } + return io.ErrUnexpectedEOF + } + bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16) + b.Last = bh&1 != 0 + b.Type = blockType((bh >> 1) & 3) + // find size. + cSize := int(bh >> 3) + maxSize := maxBlockSize + switch b.Type { + case blockTypeReserved: + return ErrReservedBlockType + case blockTypeRLE: + b.RLESize = uint32(cSize) + if b.lowMem { + maxSize = cSize + } + cSize = 1 + case blockTypeCompressed: + if debug { + println("Data size on stream:", cSize) + } + b.RLESize = 0 + maxSize = maxCompressedBlockSize + if windowSize < maxCompressedBlockSize && b.lowMem { + maxSize = int(windowSize) + } + if cSize > maxCompressedBlockSize || uint64(cSize) > b.WindowSize { + if debug { + printf("compressed block too big: csize:%d block: %+v\n", uint64(cSize), b) + } + return ErrCompressedSizeTooBig + } + default: + b.RLESize = 0 + } + + // Read block data. + if cap(b.dataStorage) < cSize { + if b.lowMem { + b.dataStorage = make([]byte, 0, cSize) + } else { + b.dataStorage = make([]byte, 0, maxBlockSize) + } + } + if cap(b.dst) <= maxSize { + b.dst = make([]byte, 0, maxSize+1) + } + var err error + b.data, err = br.readBig(cSize, b.dataStorage) + if err != nil { + if debug { + println("Reading block:", err, "(", cSize, ")", len(b.data)) + printf("%T", br) + } + return err + } + return nil +} + +// sendEOF will make the decoder send EOF on this frame. +func (b *blockDec) sendErr(err error) { + b.Last = true + b.Type = blockTypeReserved + b.err = err + b.input <- struct{}{} +} + +// Close will release resources. +// Closed blockDec cannot be reset. +func (b *blockDec) Close() { + close(b.input) + close(b.history) + close(b.result) + b.decWG.Wait() +} + +// decodeAsync will prepare decoding the block when it receives input. +// This will separate output and history. +func (b *blockDec) startDecoder() { + defer b.decWG.Done() + for range b.input { + //println("blockDec: Got block input") + switch b.Type { + case blockTypeRLE: + if cap(b.dst) < int(b.RLESize) { + if b.lowMem { + b.dst = make([]byte, b.RLESize) + } else { + b.dst = make([]byte, maxBlockSize) + } + } + o := decodeOutput{ + d: b, + b: b.dst[:b.RLESize], + err: nil, + } + v := b.data[0] + for i := range o.b { + o.b[i] = v + } + hist := <-b.history + hist.append(o.b) + b.result <- o + case blockTypeRaw: + o := decodeOutput{ + d: b, + b: b.data, + err: nil, + } + hist := <-b.history + hist.append(o.b) + b.result <- o + case blockTypeCompressed: + b.dst = b.dst[:0] + err := b.decodeCompressed(nil) + o := decodeOutput{ + d: b, + b: b.dst, + err: err, + } + if debug { + println("Decompressed to", len(b.dst), "bytes, error:", err) + } + b.result <- o + case blockTypeReserved: + // Used for returning errors. + <-b.history + b.result <- decodeOutput{ + d: b, + b: nil, + err: b.err, + } + default: + panic("Invalid block type") + } + if debug { + println("blockDec: Finished block") + } + } +} + +// decodeAsync will prepare decoding the block when it receives the history. +// If history is provided, it will not fetch it from the channel. +func (b *blockDec) decodeBuf(hist *history) error { + switch b.Type { + case blockTypeRLE: + if cap(b.dst) < int(b.RLESize) { + if b.lowMem { + b.dst = make([]byte, b.RLESize) + } else { + b.dst = make([]byte, maxBlockSize) + } + } + b.dst = b.dst[:b.RLESize] + v := b.data[0] + for i := range b.dst { + b.dst[i] = v + } + hist.appendKeep(b.dst) + return nil + case blockTypeRaw: + hist.appendKeep(b.data) + return nil + case blockTypeCompressed: + saved := b.dst + b.dst = hist.b + hist.b = nil + err := b.decodeCompressed(hist) + if debug { + println("Decompressed to total", len(b.dst), "bytes, hash:", xxhash.Sum64(b.dst), "error:", err) + } + hist.b = b.dst + b.dst = saved + return err + case blockTypeReserved: + // Used for returning errors. + return b.err + default: + panic("Invalid block type") + } +} + +// decodeCompressed will start decompressing a block. +// If no history is supplied the decoder will decodeAsync as much as possible +// before fetching from blockDec.history +func (b *blockDec) decodeCompressed(hist *history) error { + in := b.data + delayedHistory := hist == nil + + if delayedHistory { + // We must always grab history. + defer func() { + if hist == nil { + <-b.history + } + }() + } + // There must be at least one byte for Literals_Block_Type and one for Sequences_Section_Header + if len(in) < 2 { + return ErrBlockTooSmall + } + litType := literalsBlockType(in[0] & 3) + var litRegenSize int + var litCompSize int + sizeFormat := (in[0] >> 2) & 3 + var fourStreams bool + switch litType { + case literalsBlockRaw, literalsBlockRLE: + switch sizeFormat { + case 0, 2: + // Regenerated_Size uses 5 bits (0-31). Literals_Section_Header uses 1 byte. + litRegenSize = int(in[0] >> 3) + in = in[1:] + case 1: + // Regenerated_Size uses 12 bits (0-4095). Literals_Section_Header uses 2 bytes. + litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + in = in[2:] + case 3: + // Regenerated_Size uses 20 bits (0-1048575). Literals_Section_Header uses 3 bytes. + if len(in) < 3 { + println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) + return ErrBlockTooSmall + } + litRegenSize = int(in[0]>>4) + (int(in[1]) << 4) + (int(in[2]) << 12) + in = in[3:] + } + case literalsBlockCompressed, literalsBlockTreeless: + switch sizeFormat { + case 0, 1: + // Both Regenerated_Size and Compressed_Size use 10 bits (0-1023). + if len(in) < 3 { + println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) + return ErrBlockTooSmall + } + n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + litRegenSize = int(n & 1023) + litCompSize = int(n >> 10) + fourStreams = sizeFormat == 1 + in = in[3:] + case 2: + fourStreams = true + if len(in) < 4 { + println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) + return ErrBlockTooSmall + } + n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + litRegenSize = int(n & 16383) + litCompSize = int(n >> 14) + in = in[4:] + case 3: + fourStreams = true + if len(in) < 5 { + println("too small: litType:", litType, " sizeFormat", sizeFormat, len(in)) + return ErrBlockTooSmall + } + n := uint64(in[0]>>4) + (uint64(in[1]) << 4) + (uint64(in[2]) << 12) + (uint64(in[3]) << 20) + (uint64(in[4]) << 28) + litRegenSize = int(n & 262143) + litCompSize = int(n >> 18) + in = in[5:] + } + } + if debug { + println("literals type:", litType, "litRegenSize:", litRegenSize, "litCompSize:", litCompSize, "sizeFormat:", sizeFormat, "4X:", fourStreams) + } + var literals []byte + var huff *huff0.Scratch + switch litType { + case literalsBlockRaw: + if len(in) < litRegenSize { + println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litRegenSize) + return ErrBlockTooSmall + } + literals = in[:litRegenSize] + in = in[litRegenSize:] + //printf("Found %d uncompressed literals\n", litRegenSize) + case literalsBlockRLE: + if len(in) < 1 { + println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", 1) + return ErrBlockTooSmall + } + if cap(b.literalBuf) < litRegenSize { + if b.lowMem { + b.literalBuf = make([]byte, litRegenSize) + } else { + if litRegenSize > maxCompressedLiteralSize { + // Exceptional + b.literalBuf = make([]byte, litRegenSize) + } else { + b.literalBuf = make([]byte, litRegenSize, maxCompressedLiteralSize) + + } + } + } + literals = b.literalBuf[:litRegenSize] + v := in[0] + for i := range literals { + literals[i] = v + } + in = in[1:] + if debug { + printf("Found %d RLE compressed literals\n", litRegenSize) + } + case literalsBlockTreeless: + if len(in) < litCompSize { + println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) + return ErrBlockTooSmall + } + // Store compressed literals, so we defer decoding until we get history. + literals = in[:litCompSize] + in = in[litCompSize:] + if debug { + printf("Found %d compressed literals\n", litCompSize) + } + case literalsBlockCompressed: + if len(in) < litCompSize { + println("too small: litType:", litType, " sizeFormat", sizeFormat, "remain:", len(in), "want:", litCompSize) + return ErrBlockTooSmall + } + literals = in[:litCompSize] + in = in[litCompSize:] + huff = huffDecoderPool.Get().(*huff0.Scratch) + var err error + // Ensure we have space to store it. + if cap(b.literalBuf) < litRegenSize { + if b.lowMem { + b.literalBuf = make([]byte, 0, litRegenSize) + } else { + b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + } + } + if huff == nil { + huff = &huff0.Scratch{} + } + huff.Out = b.literalBuf[:0] + huff, literals, err = huff0.ReadTable(literals, huff) + if err != nil { + println("reading huffman table:", err) + return err + } + // Use our out buffer. + huff.Out = b.literalBuf[:0] + huff.MaxDecodedSize = litRegenSize + if fourStreams { + literals, err = huff.Decompress4X(literals, litRegenSize) + } else { + literals, err = huff.Decompress1X(literals) + } + if err != nil { + println("decoding compressed literals:", err) + return err + } + // Make sure we don't leak our literals buffer + huff.Out = nil + if len(literals) != litRegenSize { + return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + } + if debug { + printf("Decompressed %d literals into %d bytes\n", litCompSize, litRegenSize) + } + } + + // Decode Sequences + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#sequences-section + if len(in) < 1 { + return ErrBlockTooSmall + } + seqHeader := in[0] + nSeqs := 0 + switch { + case seqHeader == 0: + in = in[1:] + case seqHeader < 128: + nSeqs = int(seqHeader) + in = in[1:] + case seqHeader < 255: + if len(in) < 2 { + return ErrBlockTooSmall + } + nSeqs = int(seqHeader-128)<<8 | int(in[1]) + in = in[2:] + case seqHeader == 255: + if len(in) < 3 { + return ErrBlockTooSmall + } + nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8) + in = in[3:] + } + // Allocate sequences + if cap(b.sequenceBuf) < nSeqs { + if b.lowMem { + b.sequenceBuf = make([]seq, nSeqs) + } else { + // Allocate max + b.sequenceBuf = make([]seq, nSeqs, maxSequences) + } + } else { + // Reuse buffer + b.sequenceBuf = b.sequenceBuf[:nSeqs] + } + var seqs = &sequenceDecs{} + if nSeqs > 0 { + if len(in) < 1 { + return ErrBlockTooSmall + } + br := byteReader{b: in, off: 0} + compMode := br.Uint8() + br.advance(1) + if debug { + printf("Compression modes: 0b%b", compMode) + } + for i := uint(0); i < 3; i++ { + mode := seqCompMode((compMode >> (6 - i*2)) & 3) + if debug { + println("Table", tableIndex(i), "is", mode) + } + var seq *sequenceDec + switch tableIndex(i) { + case tableLiteralLengths: + seq = &seqs.litLengths + case tableOffsets: + seq = &seqs.offsets + case tableMatchLengths: + seq = &seqs.matchLengths + default: + panic("unknown table") + } + switch mode { + case compModePredefined: + seq.fse = &fsePredef[i] + case compModeRLE: + if br.remain() < 1 { + return ErrBlockTooSmall + } + v := br.Uint8() + br.advance(1) + dec := fseDecoderPool.Get().(*fseDecoder) + symb, err := decSymbolValue(v, symbolTableX[i]) + if err != nil { + printf("RLE Transform table (%v) error: %v", tableIndex(i), err) + return err + } + dec.setRLE(symb) + seq.fse = dec + if debug { + printf("RLE set to %+v, code: %v", symb, v) + } + case compModeFSE: + println("Reading table for", tableIndex(i)) + dec := fseDecoderPool.Get().(*fseDecoder) + err := dec.readNCount(&br, uint16(maxTableSymbol[i])) + if err != nil { + println("Read table error:", err) + return err + } + err = dec.transform(symbolTableX[i]) + if err != nil { + println("Transform table error:", err) + return err + } + if debug { + println("Read table ok", "symbolLen:", dec.symbolLen) + } + seq.fse = dec + case compModeRepeat: + seq.repeat = true + } + if br.overread() { + return io.ErrUnexpectedEOF + } + } + in = br.unread() + } + + // Wait for history. + // All time spent after this is critical since it is strictly sequential. + if hist == nil { + hist = <-b.history + if hist.error { + return ErrDecoderClosed + } + } + + // Decode treeless literal block. + if litType == literalsBlockTreeless { + // TODO: We could send the history early WITHOUT the stream history. + // This would allow decoding treeless literials before the byte history is available. + // Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless. + // So not much obvious gain here. + + if hist.huffTree == nil { + return errors.New("literal block was treeless, but no history was defined") + } + // Ensure we have space to store it. + if cap(b.literalBuf) < litRegenSize { + if b.lowMem { + b.literalBuf = make([]byte, 0, litRegenSize) + } else { + b.literalBuf = make([]byte, 0, maxCompressedLiteralSize) + } + } + var err error + // Use our out buffer. + huff = hist.huffTree + huff.Out = b.literalBuf[:0] + huff.MaxDecodedSize = litRegenSize + if fourStreams { + literals, err = huff.Decompress4X(literals, litRegenSize) + } else { + literals, err = huff.Decompress1X(literals) + } + // Make sure we don't leak our literals buffer + huff.Out = nil + if err != nil { + println("decompressing literals:", err) + return err + } + if len(literals) != litRegenSize { + return fmt.Errorf("literal output size mismatch want %d, got %d", litRegenSize, len(literals)) + } + } else { + if hist.huffTree != nil && huff != nil { + huffDecoderPool.Put(hist.huffTree) + hist.huffTree = nil + } + } + if huff != nil { + huff.Out = nil + hist.huffTree = huff + } + if debug { + println("Final literals:", len(literals), "hash:", xxhash.Sum64(literals), "and", nSeqs, "sequences.") + } + + if nSeqs == 0 { + // Decompressed content is defined entirely as Literals Section content. + b.dst = append(b.dst, literals...) + if delayedHistory { + hist.append(literals) + } + return nil + } + + seqs, err := seqs.mergeHistory(&hist.decoders) + if err != nil { + return err + } + if debug { + println("History merged ok") + } + br := &bitReader{} + if err := br.init(in); err != nil { + return err + } + + // TODO: Investigate if sending history without decoders are faster. + // This would allow the sequences to be decoded async and only have to construct stream history. + // If only recent offsets were not transferred, this would be an obvious win. + // Also, if first 3 sequences don't reference recent offsets, all sequences can be decoded. + + if err := seqs.initialize(br, hist, literals, b.dst); err != nil { + println("initializing sequences:", err) + return err + } + hbytes := hist.b + if len(hbytes) > hist.windowSize { + hbytes = hbytes[len(hbytes)-hist.windowSize:] + } + err = seqs.decode(nSeqs, br, hbytes) + if err != nil { + return err + } + if !br.finished() { + return fmt.Errorf("%d extra bits on block, should be 0", br.remain()) + } + + err = br.close() + if err != nil { + printf("Closing sequences: %v, %+v\n", err, *br) + } + if len(b.data) > maxCompressedBlockSize { + return fmt.Errorf("compressed block size too large (%d)", len(b.data)) + } + // Set output and release references. + b.dst = seqs.out + seqs.out, seqs.literals, seqs.hist = nil, nil, nil + + if !delayedHistory { + // If we don't have delayed history, no need to update. + hist.recentOffsets = seqs.prevOffset + return nil + } + if b.Last { + // if last block we don't care about history. + println("Last block, no history returned") + hist.b = hist.b[:0] + return nil + } + hist.append(b.dst) + hist.recentOffsets = seqs.prevOffset + if debug { + println("Finished block with literals:", len(literals), "and", nSeqs, "sequences.") + } + + return nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/blockenc.go b/vendor/github.com/klauspost/compress/zstd/blockenc.go new file mode 100644 index 00000000000..4f0eba22f08 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/blockenc.go @@ -0,0 +1,837 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" + "math" + "math/bits" + + "github.com/klauspost/compress/huff0" +) + +type blockEnc struct { + size int + literals []byte + sequences []seq + coders seqCoders + litEnc *huff0.Scratch + wr bitWriter + + extraLits int + last bool + + output []byte + recentOffsets [3]uint32 + prevRecentOffsets [3]uint32 +} + +// init should be used once the block has been created. +// If called more than once, the effect is the same as calling reset. +func (b *blockEnc) init() { + if cap(b.literals) < maxCompressedLiteralSize { + b.literals = make([]byte, 0, maxCompressedLiteralSize) + } + const defSeqs = 200 + b.literals = b.literals[:0] + if cap(b.sequences) < defSeqs { + b.sequences = make([]seq, 0, defSeqs) + } + if cap(b.output) < maxCompressedBlockSize { + b.output = make([]byte, 0, maxCompressedBlockSize) + } + if b.coders.mlEnc == nil { + b.coders.mlEnc = &fseEncoder{} + b.coders.mlPrev = &fseEncoder{} + b.coders.ofEnc = &fseEncoder{} + b.coders.ofPrev = &fseEncoder{} + b.coders.llEnc = &fseEncoder{} + b.coders.llPrev = &fseEncoder{} + } + b.litEnc = &huff0.Scratch{WantLogLess: 4} + b.reset(nil) +} + +// initNewEncode can be used to reset offsets and encoders to the initial state. +func (b *blockEnc) initNewEncode() { + b.recentOffsets = [3]uint32{1, 4, 8} + b.litEnc.Reuse = huff0.ReusePolicyNone + b.coders.setPrev(nil, nil, nil) +} + +// reset will reset the block for a new encode, but in the same stream, +// meaning that state will be carried over, but the block content is reset. +// If a previous block is provided, the recent offsets are carried over. +func (b *blockEnc) reset(prev *blockEnc) { + b.extraLits = 0 + b.literals = b.literals[:0] + b.size = 0 + b.sequences = b.sequences[:0] + b.output = b.output[:0] + b.last = false + if prev != nil { + b.recentOffsets = prev.prevRecentOffsets + } +} + +// reset will reset the block for a new encode, but in the same stream, +// meaning that state will be carried over, but the block content is reset. +// If a previous block is provided, the recent offsets are carried over. +func (b *blockEnc) swapEncoders(prev *blockEnc) { + b.coders.swap(&prev.coders) + b.litEnc, prev.litEnc = prev.litEnc, b.litEnc +} + +// blockHeader contains the information for a block header. +type blockHeader uint32 + +// setLast sets the 'last' indicator on a block. +func (h *blockHeader) setLast(b bool) { + if b { + *h = *h | 1 + } else { + const mask = (1 << 24) - 2 + *h = *h & mask + } +} + +// setSize will store the compressed size of a block. +func (h *blockHeader) setSize(v uint32) { + const mask = 7 + *h = (*h)&mask | blockHeader(v<<3) +} + +// setType sets the block type. +func (h *blockHeader) setType(t blockType) { + const mask = 1 | (((1 << 24) - 1) ^ 7) + *h = (*h & mask) | blockHeader(t<<1) +} + +// appendTo will append the block header to a slice. +func (h blockHeader) appendTo(b []byte) []byte { + return append(b, uint8(h), uint8(h>>8), uint8(h>>16)) +} + +// String returns a string representation of the block. +func (h blockHeader) String() string { + return fmt.Sprintf("Type: %d, Size: %d, Last:%t", (h>>1)&3, h>>3, h&1 == 1) +} + +// literalsHeader contains literals header information. +type literalsHeader uint64 + +// setType can be used to set the type of literal block. +func (h *literalsHeader) setType(t literalsBlockType) { + const mask = math.MaxUint64 - 3 + *h = (*h & mask) | literalsHeader(t) +} + +// setSize can be used to set a single size, for uncompressed and RLE content. +func (h *literalsHeader) setSize(regenLen int) { + inBits := bits.Len32(uint32(regenLen)) + // Only retain 2 bits + const mask = 3 + lh := uint64(*h & mask) + switch { + case inBits < 5: + lh |= (uint64(regenLen) << 3) | (1 << 60) + if debug { + got := int(lh>>3) & 0xff + if got != regenLen { + panic(fmt.Sprint("litRegenSize = ", regenLen, "(want) != ", got, "(got)")) + } + } + case inBits < 12: + lh |= (1 << 2) | (uint64(regenLen) << 4) | (2 << 60) + case inBits < 20: + lh |= (3 << 2) | (uint64(regenLen) << 4) | (3 << 60) + default: + panic(fmt.Errorf("internal error: block too big (%d)", regenLen)) + } + *h = literalsHeader(lh) +} + +// setSizes will set the size of a compressed literals section and the input length. +func (h *literalsHeader) setSizes(compLen, inLen int, single bool) { + compBits, inBits := bits.Len32(uint32(compLen)), bits.Len32(uint32(inLen)) + // Only retain 2 bits + const mask = 3 + lh := uint64(*h & mask) + switch { + case compBits <= 10 && inBits <= 10: + if !single { + lh |= 1 << 2 + } + lh |= (uint64(inLen) << 4) | (uint64(compLen) << (10 + 4)) | (3 << 60) + if debug { + const mmask = (1 << 24) - 1 + n := (lh >> 4) & mmask + if int(n&1023) != inLen { + panic(fmt.Sprint("regensize:", int(n&1023), "!=", inLen, inBits)) + } + if int(n>>10) != compLen { + panic(fmt.Sprint("compsize:", int(n>>10), "!=", compLen, compBits)) + } + } + case compBits <= 14 && inBits <= 14: + lh |= (2 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (14 + 4)) | (4 << 60) + if single { + panic("single stream used with more than 10 bits length.") + } + case compBits <= 18 && inBits <= 18: + lh |= (3 << 2) | (uint64(inLen) << 4) | (uint64(compLen) << (18 + 4)) | (5 << 60) + if single { + panic("single stream used with more than 10 bits length.") + } + default: + panic("internal error: block too big") + } + *h = literalsHeader(lh) +} + +// appendTo will append the literals header to a byte slice. +func (h literalsHeader) appendTo(b []byte) []byte { + size := uint8(h >> 60) + switch size { + case 1: + b = append(b, uint8(h)) + case 2: + b = append(b, uint8(h), uint8(h>>8)) + case 3: + b = append(b, uint8(h), uint8(h>>8), uint8(h>>16)) + case 4: + b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24)) + case 5: + b = append(b, uint8(h), uint8(h>>8), uint8(h>>16), uint8(h>>24), uint8(h>>32)) + default: + panic(fmt.Errorf("internal error: literalsHeader has invalid size (%d)", size)) + } + return b +} + +// size returns the output size with currently set values. +func (h literalsHeader) size() int { + return int(h >> 60) +} + +func (h literalsHeader) String() string { + return fmt.Sprintf("Type: %d, SizeFormat: %d, Size: 0x%d, Bytes:%d", literalsBlockType(h&3), (h>>2)&3, h&((1<<60)-1)>>4, h>>60) +} + +// pushOffsets will push the recent offsets to the backup store. +func (b *blockEnc) pushOffsets() { + b.prevRecentOffsets = b.recentOffsets +} + +// pushOffsets will push the recent offsets to the backup store. +func (b *blockEnc) popOffsets() { + b.recentOffsets = b.prevRecentOffsets +} + +// matchOffset will adjust recent offsets and return the adjusted one, +// if it matches a previous offset. +func (b *blockEnc) matchOffset(offset, lits uint32) uint32 { + // Check if offset is one of the recent offsets. + // Adjusts the output offset accordingly. + // Gives a tiny bit of compression, typically around 1%. + if true { + if lits > 0 { + switch offset { + case b.recentOffsets[0]: + offset = 1 + case b.recentOffsets[1]: + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset = 2 + case b.recentOffsets[2]: + b.recentOffsets[2] = b.recentOffsets[1] + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset = 3 + default: + b.recentOffsets[2] = b.recentOffsets[1] + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset += 3 + } + } else { + switch offset { + case b.recentOffsets[1]: + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset = 1 + case b.recentOffsets[2]: + b.recentOffsets[2] = b.recentOffsets[1] + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset = 2 + case b.recentOffsets[0] - 1: + b.recentOffsets[2] = b.recentOffsets[1] + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset = 3 + default: + b.recentOffsets[2] = b.recentOffsets[1] + b.recentOffsets[1] = b.recentOffsets[0] + b.recentOffsets[0] = offset + offset += 3 + } + } + } else { + offset += 3 + } + return offset +} + +// encodeRaw can be used to set the output to a raw representation of supplied bytes. +func (b *blockEnc) encodeRaw(a []byte) { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(uint32(len(a))) + bh.setType(blockTypeRaw) + b.output = bh.appendTo(b.output[:0]) + b.output = append(b.output, a...) + if debug { + println("Adding RAW block, length", len(a)) + } +} + +// encodeRaw can be used to set the output to a raw representation of supplied bytes. +func (b *blockEnc) encodeRawTo(dst, src []byte) []byte { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(uint32(len(src))) + bh.setType(blockTypeRaw) + dst = bh.appendTo(dst) + dst = append(dst, src...) + if debug { + println("Adding RAW block, length", len(src)) + } + return dst +} + +// encodeLits can be used if the block is only litLen. +func (b *blockEnc) encodeLits(raw bool) error { + var bh blockHeader + bh.setLast(b.last) + bh.setSize(uint32(len(b.literals))) + + // Don't compress extremely small blocks + if len(b.literals) < 32 || raw { + if debug { + println("Adding RAW block, length", len(b.literals)) + } + bh.setType(blockTypeRaw) + b.output = bh.appendTo(b.output) + b.output = append(b.output, b.literals...) + return nil + } + + var ( + out []byte + reUsed, single bool + err error + ) + if len(b.literals) >= 1024 { + // Use 4 Streams. + out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) + } else if len(b.literals) > 32 { + // Use 1 stream + single = true + out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) + } else { + err = huff0.ErrIncompressible + } + + switch err { + case huff0.ErrIncompressible: + if debug { + println("Adding RAW block, length", len(b.literals)) + } + bh.setType(blockTypeRaw) + b.output = bh.appendTo(b.output) + b.output = append(b.output, b.literals...) + return nil + case huff0.ErrUseRLE: + if debug { + println("Adding RLE block, length", len(b.literals)) + } + bh.setType(blockTypeRLE) + b.output = bh.appendTo(b.output) + b.output = append(b.output, b.literals[0]) + return nil + default: + return err + case nil: + } + // Compressed... + // Now, allow reuse + b.litEnc.Reuse = huff0.ReusePolicyAllow + bh.setType(blockTypeCompressed) + var lh literalsHeader + if reUsed { + if debug { + println("Reused tree, compressed to", len(out)) + } + lh.setType(literalsBlockTreeless) + } else { + if debug { + println("New tree, compressed to", len(out), "tree size:", len(b.litEnc.OutTable)) + } + lh.setType(literalsBlockCompressed) + } + // Set sizes + lh.setSizes(len(out), len(b.literals), single) + bh.setSize(uint32(len(out) + lh.size() + 1)) + + // Write block headers. + b.output = bh.appendTo(b.output) + b.output = lh.appendTo(b.output) + // Add compressed data. + b.output = append(b.output, out...) + // No sequences. + b.output = append(b.output, 0) + return nil +} + +// fuzzFseEncoder can be used to fuzz the FSE encoder. +func fuzzFseEncoder(data []byte) int { + if len(data) > maxSequences || len(data) < 2 { + return 0 + } + enc := fseEncoder{} + hist := enc.Histogram()[:256] + maxSym := uint8(0) + for i, v := range data { + v = v & 63 + data[i] = v + hist[v]++ + if v > maxSym { + maxSym = v + } + } + if maxSym == 0 { + // All 0 + return 0 + } + maxCount := func(a []uint32) int { + var max uint32 + for _, v := range a { + if v > max { + max = v + } + } + return int(max) + } + cnt := maxCount(hist[:maxSym]) + if cnt == len(data) { + // RLE + return 0 + } + enc.HistogramFinished(maxSym, cnt) + err := enc.normalizeCount(len(data)) + if err != nil { + return 0 + } + _, err = enc.writeCount(nil) + if err != nil { + panic(err) + } + return 1 +} + +// encode will encode the block and append the output in b.output. +func (b *blockEnc) encode(raw bool) error { + if len(b.sequences) == 0 { + return b.encodeLits(raw) + } + // We want some difference + if len(b.literals) > (b.size - (b.size >> 5)) { + return errIncompressible + } + + var bh blockHeader + var lh literalsHeader + bh.setLast(b.last) + bh.setType(blockTypeCompressed) + // Store offset of the block header. Needed when we know the size. + bhOffset := len(b.output) + b.output = bh.appendTo(b.output) + + var ( + out []byte + reUsed, single bool + err error + ) + if len(b.literals) >= 1024 && !raw { + // Use 4 Streams. + out, reUsed, err = huff0.Compress4X(b.literals, b.litEnc) + } else if len(b.literals) > 32 && !raw { + // Use 1 stream + single = true + out, reUsed, err = huff0.Compress1X(b.literals, b.litEnc) + } else { + err = huff0.ErrIncompressible + } + + switch err { + case huff0.ErrIncompressible: + lh.setType(literalsBlockRaw) + lh.setSize(len(b.literals)) + b.output = lh.appendTo(b.output) + b.output = append(b.output, b.literals...) + if debug { + println("Adding literals RAW, length", len(b.literals)) + } + case huff0.ErrUseRLE: + lh.setType(literalsBlockRLE) + lh.setSize(len(b.literals)) + b.output = lh.appendTo(b.output) + b.output = append(b.output, b.literals[0]) + if debug { + println("Adding literals RLE") + } + default: + if debug { + println("Adding literals ERROR:", err) + } + return err + case nil: + // Compressed litLen... + if reUsed { + if debug { + println("reused tree") + } + lh.setType(literalsBlockTreeless) + } else { + if debug { + println("new tree, size:", len(b.litEnc.OutTable)) + } + lh.setType(literalsBlockCompressed) + if debug { + _, _, err := huff0.ReadTable(out, nil) + if err != nil { + panic(err) + } + } + } + lh.setSizes(len(out), len(b.literals), single) + if debug { + printf("Compressed %d literals to %d bytes", len(b.literals), len(out)) + println("Adding literal header:", lh) + } + b.output = lh.appendTo(b.output) + b.output = append(b.output, out...) + b.litEnc.Reuse = huff0.ReusePolicyAllow + if debug { + println("Adding literals compressed") + } + } + // Sequence compression + + // Write the number of sequences + switch { + case len(b.sequences) < 128: + b.output = append(b.output, uint8(len(b.sequences))) + case len(b.sequences) < 0x7f00: // TODO: this could be wrong + n := len(b.sequences) + b.output = append(b.output, 128+uint8(n>>8), uint8(n)) + default: + n := len(b.sequences) - 0x7f00 + b.output = append(b.output, 255, uint8(n), uint8(n>>8)) + } + if debug { + println("Encoding", len(b.sequences), "sequences") + } + b.genCodes() + llEnc := b.coders.llEnc + ofEnc := b.coders.ofEnc + mlEnc := b.coders.mlEnc + err = llEnc.normalizeCount(len(b.sequences)) + if err != nil { + return err + } + err = ofEnc.normalizeCount(len(b.sequences)) + if err != nil { + return err + } + err = mlEnc.normalizeCount(len(b.sequences)) + if err != nil { + return err + } + + // Choose the best compression mode for each type. + // Will evaluate the new vs predefined and previous. + chooseComp := func(cur, prev, preDef *fseEncoder) (*fseEncoder, seqCompMode) { + // See if predefined/previous is better + hist := cur.count[:cur.symbolLen] + nSize := cur.approxSize(hist) + cur.maxHeaderSize() + predefSize := preDef.approxSize(hist) + prevSize := prev.approxSize(hist) + + // Add a small penalty for new encoders. + // Don't bother with extremely small (<2 byte gains). + nSize = nSize + (nSize+2*8*16)>>4 + switch { + case predefSize <= prevSize && predefSize <= nSize || forcePreDef: + if debug { + println("Using predefined", predefSize>>3, "<=", nSize>>3) + } + return preDef, compModePredefined + case prevSize <= nSize: + if debug { + println("Using previous", prevSize>>3, "<=", nSize>>3) + } + return prev, compModeRepeat + default: + if debug { + println("Using new, predef", predefSize>>3, ". previous:", prevSize>>3, ">", nSize>>3, "header max:", cur.maxHeaderSize()>>3, "bytes") + println("tl:", cur.actualTableLog, "symbolLen:", cur.symbolLen, "norm:", cur.norm[:cur.symbolLen], "hist", cur.count[:cur.symbolLen]) + } + return cur, compModeFSE + } + } + + // Write compression mode + var mode uint8 + if llEnc.useRLE { + mode |= uint8(compModeRLE) << 6 + llEnc.setRLE(b.sequences[0].llCode) + if debug { + println("llEnc.useRLE") + } + } else { + var m seqCompMode + llEnc, m = chooseComp(llEnc, b.coders.llPrev, &fsePredefEnc[tableLiteralLengths]) + mode |= uint8(m) << 6 + } + if ofEnc.useRLE { + mode |= uint8(compModeRLE) << 4 + ofEnc.setRLE(b.sequences[0].ofCode) + if debug { + println("ofEnc.useRLE") + } + } else { + var m seqCompMode + ofEnc, m = chooseComp(ofEnc, b.coders.ofPrev, &fsePredefEnc[tableOffsets]) + mode |= uint8(m) << 4 + } + + if mlEnc.useRLE { + mode |= uint8(compModeRLE) << 2 + mlEnc.setRLE(b.sequences[0].mlCode) + if debug { + println("mlEnc.useRLE, code: ", b.sequences[0].mlCode, "value", b.sequences[0].matchLen) + } + } else { + var m seqCompMode + mlEnc, m = chooseComp(mlEnc, b.coders.mlPrev, &fsePredefEnc[tableMatchLengths]) + mode |= uint8(m) << 2 + } + b.output = append(b.output, mode) + if debug { + printf("Compression modes: 0b%b", mode) + } + b.output, err = llEnc.writeCount(b.output) + if err != nil { + return err + } + start := len(b.output) + b.output, err = ofEnc.writeCount(b.output) + if err != nil { + return err + } + if false { + println("block:", b.output[start:], "tablelog", ofEnc.actualTableLog, "maxcount:", ofEnc.maxCount) + fmt.Printf("selected TableLog: %d, Symbol length: %d\n", ofEnc.actualTableLog, ofEnc.symbolLen) + for i, v := range ofEnc.norm[:ofEnc.symbolLen] { + fmt.Printf("%3d: %5d -> %4d \n", i, ofEnc.count[i], v) + } + } + b.output, err = mlEnc.writeCount(b.output) + if err != nil { + return err + } + + // Maybe in block? + wr := &b.wr + wr.reset(b.output) + + var ll, of, ml cState + + // Current sequence + seq := len(b.sequences) - 1 + s := b.sequences[seq] + llEnc.setBits(llBitsTable[:]) + mlEnc.setBits(mlBitsTable[:]) + ofEnc.setBits(nil) + + llTT, ofTT, mlTT := llEnc.ct.symbolTT[:256], ofEnc.ct.symbolTT[:256], mlEnc.ct.symbolTT[:256] + + // We have 3 bounds checks here (and in the loop). + // Since we are iterating backwards it is kinda hard to avoid. + llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] + ll.init(wr, &llEnc.ct, llB) + of.init(wr, &ofEnc.ct, ofB) + wr.flush32() + ml.init(wr, &mlEnc.ct, mlB) + + // Each of these lookups also generates a bounds check. + wr.addBits32NC(s.litLen, llB.outBits) + wr.addBits32NC(s.matchLen, mlB.outBits) + wr.flush32() + wr.addBits32NC(s.offset, ofB.outBits) + if debugSequences { + println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB) + } + seq-- + if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 { + // No need to flush (common) + for seq >= 0 { + s = b.sequences[seq] + wr.flush32() + llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] + // tabelog max is 8 for all. + of.encode(ofB) + ml.encode(mlB) + ll.encode(llB) + wr.flush32() + + // We checked that all can stay within 32 bits + wr.addBits32NC(s.litLen, llB.outBits) + wr.addBits32NC(s.matchLen, mlB.outBits) + wr.addBits32NC(s.offset, ofB.outBits) + + if debugSequences { + println("Encoded seq", seq, s) + } + + seq-- + } + } else { + for seq >= 0 { + s = b.sequences[seq] + wr.flush32() + llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode] + // tabelog max is below 8 for each. + of.encode(ofB) + ml.encode(mlB) + ll.encode(llB) + wr.flush32() + + // ml+ll = max 32 bits total + wr.addBits32NC(s.litLen, llB.outBits) + wr.addBits32NC(s.matchLen, mlB.outBits) + wr.flush32() + wr.addBits32NC(s.offset, ofB.outBits) + + if debugSequences { + println("Encoded seq", seq, s) + } + + seq-- + } + } + ml.flush(mlEnc.actualTableLog) + of.flush(ofEnc.actualTableLog) + ll.flush(llEnc.actualTableLog) + err = wr.close() + if err != nil { + return err + } + b.output = wr.out + + if len(b.output)-3-bhOffset >= b.size { + // Maybe even add a bigger margin. + b.litEnc.Reuse = huff0.ReusePolicyNone + return errIncompressible + } + + // Size is output minus block header. + bh.setSize(uint32(len(b.output)-bhOffset) - 3) + if debug { + println("Rewriting block header", bh) + } + _ = bh.appendTo(b.output[bhOffset:bhOffset]) + b.coders.setPrev(llEnc, mlEnc, ofEnc) + return nil +} + +var errIncompressible = errors.New("incompressible") + +func (b *blockEnc) genCodes() { + if len(b.sequences) == 0 { + // nothing to do + return + } + + if len(b.sequences) > math.MaxUint16 { + panic("can only encode up to 64K sequences") + } + // No bounds checks after here: + llH := b.coders.llEnc.Histogram()[:256] + ofH := b.coders.ofEnc.Histogram()[:256] + mlH := b.coders.mlEnc.Histogram()[:256] + for i := range llH { + llH[i] = 0 + } + for i := range ofH { + ofH[i] = 0 + } + for i := range mlH { + mlH[i] = 0 + } + + var llMax, ofMax, mlMax uint8 + for i, seq := range b.sequences { + v := llCode(seq.litLen) + seq.llCode = v + llH[v]++ + if v > llMax { + llMax = v + } + + v = ofCode(seq.offset) + seq.ofCode = v + ofH[v]++ + if v > ofMax { + ofMax = v + } + + v = mlCode(seq.matchLen) + seq.mlCode = v + mlH[v]++ + if v > mlMax { + mlMax = v + if debugAsserts && mlMax > maxMatchLengthSymbol { + panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen)) + } + } + b.sequences[i] = seq + } + maxCount := func(a []uint32) int { + var max uint32 + for _, v := range a { + if v > max { + max = v + } + } + return int(max) + } + if debugAsserts && mlMax > maxMatchLengthSymbol { + panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d)", mlMax)) + } + if debugAsserts && ofMax > maxOffsetBits { + panic(fmt.Errorf("ofMax > maxOffsetBits (%d)", ofMax)) + } + if debugAsserts && llMax > maxLiteralLengthSymbol { + panic(fmt.Errorf("llMax > maxLiteralLengthSymbol (%d)", llMax)) + } + + b.coders.mlEnc.HistogramFinished(mlMax, maxCount(mlH[:mlMax+1])) + b.coders.ofEnc.HistogramFinished(ofMax, maxCount(ofH[:ofMax+1])) + b.coders.llEnc.HistogramFinished(llMax, maxCount(llH[:llMax+1])) +} diff --git a/vendor/github.com/klauspost/compress/zstd/blocktype_string.go b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go new file mode 100644 index 00000000000..01a01e486e1 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/blocktype_string.go @@ -0,0 +1,85 @@ +// Code generated by "stringer -type=blockType,literalsBlockType,seqCompMode,tableIndex"; DO NOT EDIT. + +package zstd + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[blockTypeRaw-0] + _ = x[blockTypeRLE-1] + _ = x[blockTypeCompressed-2] + _ = x[blockTypeReserved-3] +} + +const _blockType_name = "blockTypeRawblockTypeRLEblockTypeCompressedblockTypeReserved" + +var _blockType_index = [...]uint8{0, 12, 24, 43, 60} + +func (i blockType) String() string { + if i >= blockType(len(_blockType_index)-1) { + return "blockType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _blockType_name[_blockType_index[i]:_blockType_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[literalsBlockRaw-0] + _ = x[literalsBlockRLE-1] + _ = x[literalsBlockCompressed-2] + _ = x[literalsBlockTreeless-3] +} + +const _literalsBlockType_name = "literalsBlockRawliteralsBlockRLEliteralsBlockCompressedliteralsBlockTreeless" + +var _literalsBlockType_index = [...]uint8{0, 16, 32, 55, 76} + +func (i literalsBlockType) String() string { + if i >= literalsBlockType(len(_literalsBlockType_index)-1) { + return "literalsBlockType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _literalsBlockType_name[_literalsBlockType_index[i]:_literalsBlockType_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[compModePredefined-0] + _ = x[compModeRLE-1] + _ = x[compModeFSE-2] + _ = x[compModeRepeat-3] +} + +const _seqCompMode_name = "compModePredefinedcompModeRLEcompModeFSEcompModeRepeat" + +var _seqCompMode_index = [...]uint8{0, 18, 29, 40, 54} + +func (i seqCompMode) String() string { + if i >= seqCompMode(len(_seqCompMode_index)-1) { + return "seqCompMode(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _seqCompMode_name[_seqCompMode_index[i]:_seqCompMode_index[i+1]] +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[tableLiteralLengths-0] + _ = x[tableOffsets-1] + _ = x[tableMatchLengths-2] +} + +const _tableIndex_name = "tableLiteralLengthstableOffsetstableMatchLengths" + +var _tableIndex_index = [...]uint8{0, 19, 31, 48} + +func (i tableIndex) String() string { + if i >= tableIndex(len(_tableIndex_index)-1) { + return "tableIndex(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _tableIndex_name[_tableIndex_index[i]:_tableIndex_index[i+1]] +} diff --git a/vendor/github.com/klauspost/compress/zstd/bytebuf.go b/vendor/github.com/klauspost/compress/zstd/bytebuf.go new file mode 100644 index 00000000000..658ef78380e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/bytebuf.go @@ -0,0 +1,127 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "fmt" + "io" + "io/ioutil" +) + +type byteBuffer interface { + // Read up to 8 bytes. + // Returns nil if no more input is available. + readSmall(n int) []byte + + // Read >8 bytes. + // MAY use the destination slice. + readBig(n int, dst []byte) ([]byte, error) + + // Read a single byte. + readByte() (byte, error) + + // Skip n bytes. + skipN(n int) error +} + +// in-memory buffer +type byteBuf []byte + +func (b *byteBuf) readSmall(n int) []byte { + if debugAsserts && n > 8 { + panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) + } + bb := *b + if len(bb) < n { + return nil + } + r := bb[:n] + *b = bb[n:] + return r +} + +func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) { + bb := *b + if len(bb) < n { + return nil, io.ErrUnexpectedEOF + } + r := bb[:n] + *b = bb[n:] + return r, nil +} + +func (b *byteBuf) remain() []byte { + return *b +} + +func (b *byteBuf) readByte() (byte, error) { + bb := *b + if len(bb) < 1 { + return 0, nil + } + r := bb[0] + *b = bb[1:] + return r, nil +} + +func (b *byteBuf) skipN(n int) error { + bb := *b + if len(bb) < n { + return io.ErrUnexpectedEOF + } + *b = bb[n:] + return nil +} + +// wrapper around a reader. +type readerWrapper struct { + r io.Reader + tmp [8]byte +} + +func (r *readerWrapper) readSmall(n int) []byte { + if debugAsserts && n > 8 { + panic(fmt.Errorf("small read > 8 (%d). use readBig", n)) + } + n2, err := io.ReadFull(r.r, r.tmp[:n]) + // We only really care about the actual bytes read. + if n2 != n { + if debug { + println("readSmall: got", n2, "want", n, "err", err) + } + return nil + } + return r.tmp[:n] +} + +func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) { + if cap(dst) < n { + dst = make([]byte, n) + } + n2, err := io.ReadFull(r.r, dst[:n]) + if err == io.EOF && n > 0 { + err = io.ErrUnexpectedEOF + } + return dst[:n2], err +} + +func (r *readerWrapper) readByte() (byte, error) { + n2, err := r.r.Read(r.tmp[:1]) + if err != nil { + return 0, err + } + if n2 != 1 { + return 0, io.ErrUnexpectedEOF + } + return r.tmp[0], nil +} + +func (r *readerWrapper) skipN(n int) error { + n2, err := io.CopyN(ioutil.Discard, r.r, int64(n)) + if n2 != int64(n) { + err = io.ErrUnexpectedEOF + } + return err +} diff --git a/vendor/github.com/klauspost/compress/zstd/bytereader.go b/vendor/github.com/klauspost/compress/zstd/bytereader.go new file mode 100644 index 00000000000..dc4378b6401 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/bytereader.go @@ -0,0 +1,74 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +// byteReader provides a byte reader that reads +// little endian values from a byte stream. +// The input stream is manually advanced. +// The reader performs no bounds checks. +type byteReader struct { + b []byte + off int +} + +// init will initialize the reader and set the input. +func (b *byteReader) init(in []byte) { + b.b = in + b.off = 0 +} + +// advance the stream b n bytes. +func (b *byteReader) advance(n uint) { + b.off += int(n) +} + +// overread returns whether we have advanced too far. +func (b *byteReader) overread() bool { + return b.off > len(b.b) +} + +// Int32 returns a little endian int32 starting at current offset. +func (b byteReader) Int32() int32 { + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := int32(b2[3]) + v2 := int32(b2[2]) + v1 := int32(b2[1]) + v0 := int32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// Uint8 returns the next byte +func (b *byteReader) Uint8() uint8 { + v := b.b[b.off] + return v +} + +// Uint32 returns a little endian uint32 starting at current offset. +func (b byteReader) Uint32() uint32 { + if r := b.remain(); r < 4 { + // Very rare + v := uint32(0) + for i := 1; i <= r; i++ { + v = (v << 8) | uint32(b.b[len(b.b)-i]) + } + return v + } + b2 := b.b[b.off : b.off+4 : b.off+4] + v3 := uint32(b2[3]) + v2 := uint32(b2[2]) + v1 := uint32(b2[1]) + v0 := uint32(b2[0]) + return v0 | (v1 << 8) | (v2 << 16) | (v3 << 24) +} + +// unread returns the unread portion of the input. +func (b byteReader) unread() []byte { + return b.b[b.off:] +} + +// remain will return the number of bytes remaining. +func (b byteReader) remain() int { + return len(b.b) - b.off +} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder.go b/vendor/github.com/klauspost/compress/zstd/decoder.go new file mode 100644 index 00000000000..2340255051e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/decoder.go @@ -0,0 +1,518 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "bytes" + "errors" + "io" + "sync" +) + +// Decoder provides decoding of zstandard streams. +// The decoder has been designed to operate without allocations after a warmup. +// This means that you should store the decoder for best performance. +// To re-use a stream decoder, use the Reset(r io.Reader) error to switch to another stream. +// A decoder can safely be re-used even if the previous stream failed. +// To release the resources, you must call the Close() function on a decoder. +type Decoder struct { + o decoderOptions + + // Unreferenced decoders, ready for use. + decoders chan *blockDec + + // Unreferenced decoders, ready for use. + frames chan *frameDec + + // Streams ready to be decoded. + stream chan decodeStream + + // Current read position used for Reader functionality. + current decoderState + + // Custom dictionaries + dicts map[uint32]struct{} + + // streamWg is the waitgroup for all streams + streamWg sync.WaitGroup +} + +// decoderState is used for maintaining state when the decoder +// is used for streaming. +type decoderState struct { + // current block being written to stream. + decodeOutput + + // output in order to be written to stream. + output chan decodeOutput + + // cancel remaining output. + cancel chan struct{} + + flushed bool +} + +var ( + // Check the interfaces we want to support. + _ = io.WriterTo(&Decoder{}) + _ = io.Reader(&Decoder{}) +) + +// NewReader creates a new decoder. +// A nil Reader can be provided in which case Reset can be used to start a decode. +// +// A Decoder can be used in two modes: +// +// 1) As a stream, or +// 2) For stateless decoding using DecodeAll. +// +// Only a single stream can be decoded concurrently, but the same decoder +// can run multiple concurrent stateless decodes. It is even possible to +// use stateless decodes while a stream is being decoded. +// +// The Reset function can be used to initiate a new stream, which is will considerably +// reduce the allocations normally caused by NewReader. +func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) { + initPredefined() + var d Decoder + d.o.setDefault() + for _, o := range opts { + err := o(&d.o) + if err != nil { + return nil, err + } + } + d.current.output = make(chan decodeOutput, d.o.concurrent) + d.current.flushed = true + + // Create decoders + d.decoders = make(chan *blockDec, d.o.concurrent) + d.frames = make(chan *frameDec, d.o.concurrent) + for i := 0; i < d.o.concurrent; i++ { + d.frames <- newFrameDec(d.o) + d.decoders <- newBlockDec(d.o.lowMem) + } + + if r == nil { + return &d, nil + } + return &d, d.Reset(r) +} + +// Read bytes from the decompressed stream into p. +// Returns the number of bytes written and any error that occurred. +// When the stream is done, io.EOF will be returned. +func (d *Decoder) Read(p []byte) (int, error) { + if d.stream == nil { + return 0, errors.New("no input has been initialized") + } + var n int + for { + if len(d.current.b) > 0 { + filled := copy(p, d.current.b) + p = p[filled:] + d.current.b = d.current.b[filled:] + n += filled + } + if len(p) == 0 { + break + } + if len(d.current.b) == 0 { + // We have an error and no more data + if d.current.err != nil { + break + } + if !d.nextBlock(n == 0) { + return n, nil + } + } + } + if len(d.current.b) > 0 { + if debug { + println("returning", n, "still bytes left:", len(d.current.b)) + } + // Only return error at end of block + return n, nil + } + if d.current.err != nil { + d.drainOutput() + } + if debug { + println("returning", n, d.current.err, len(d.decoders)) + } + return n, d.current.err +} + +// Reset will reset the decoder the supplied stream after the current has finished processing. +// Note that this functionality cannot be used after Close has been called. +func (d *Decoder) Reset(r io.Reader) error { + if d.current.err == ErrDecoderClosed { + return d.current.err + } + if r == nil { + return errors.New("nil Reader sent as input") + } + + if d.stream == nil { + d.stream = make(chan decodeStream, 1) + d.streamWg.Add(1) + go d.startStreamDecoder(d.stream) + } + + d.drainOutput() + + // If bytes buffer and < 1MB, do sync decoding anyway. + if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 { + if debug { + println("*bytes.Buffer detected, doing sync decode, len:", bb.Len()) + } + b := bb.Bytes() + var dst []byte + if cap(d.current.b) > 0 { + dst = d.current.b + } + + dst, err := d.DecodeAll(b, dst[:0]) + if err == nil { + err = io.EOF + } + d.current.b = dst + d.current.err = err + d.current.flushed = true + if debug { + println("sync decode to ", len(dst), "bytes, err:", err) + } + return nil + } + + // Remove current block. + d.current.decodeOutput = decodeOutput{} + d.current.err = nil + d.current.cancel = make(chan struct{}) + d.current.flushed = false + d.current.d = nil + + d.stream <- decodeStream{ + r: r, + output: d.current.output, + cancel: d.current.cancel, + } + return nil +} + +// drainOutput will drain the output until errEndOfStream is sent. +func (d *Decoder) drainOutput() { + if d.current.cancel != nil { + println("cancelling current") + close(d.current.cancel) + d.current.cancel = nil + } + if d.current.d != nil { + if debug { + printf("re-adding current decoder %p, decoders: %d", d.current.d, len(d.decoders)) + } + d.decoders <- d.current.d + d.current.d = nil + d.current.b = nil + } + if d.current.output == nil || d.current.flushed { + println("current already flushed") + return + } + for { + select { + case v := <-d.current.output: + if v.d != nil { + if debug { + printf("re-adding decoder %p", v.d) + } + d.decoders <- v.d + } + if v.err == errEndOfStream { + println("current flushed") + d.current.flushed = true + return + } + } + } +} + +// WriteTo writes data to w until there's no more data to write or when an error occurs. +// The return value n is the number of bytes written. +// Any error encountered during the write is also returned. +func (d *Decoder) WriteTo(w io.Writer) (int64, error) { + if d.stream == nil { + return 0, errors.New("no input has been initialized") + } + var n int64 + for { + if len(d.current.b) > 0 { + n2, err2 := w.Write(d.current.b) + n += int64(n2) + if err2 != nil && d.current.err == nil { + d.current.err = err2 + break + } + } + if d.current.err != nil { + break + } + d.nextBlock(true) + } + err := d.current.err + if err != nil { + d.drainOutput() + } + if err == io.EOF { + err = nil + } + return n, err +} + +// DecodeAll allows stateless decoding of a blob of bytes. +// Output will be appended to dst, so if the destination size is known +// you can pre-allocate the destination slice to avoid allocations. +// DecodeAll can be used concurrently. +// The Decoder concurrency limits will be respected. +func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) { + if d.current.err == ErrDecoderClosed { + return dst, ErrDecoderClosed + } + + // Grab a block decoder and frame decoder. + block, frame := <-d.decoders, <-d.frames + defer func() { + if debug { + printf("re-adding decoder: %p", block) + } + d.decoders <- block + frame.rawInput = nil + frame.bBuf = nil + d.frames <- frame + }() + frame.bBuf = input + + for { + err := frame.reset(&frame.bBuf) + if err == io.EOF { + return dst, nil + } + if err != nil { + return dst, err + } + if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) { + return dst, ErrDecoderSizeExceeded + } + if frame.FrameContentSize > 0 && frame.FrameContentSize < 1<<30 { + // Never preallocate moe than 1 GB up front. + if uint64(cap(dst)) < frame.FrameContentSize { + dst2 := make([]byte, len(dst), len(dst)+int(frame.FrameContentSize)) + copy(dst2, dst) + dst = dst2 + } + } + if cap(dst) == 0 { + // Allocate window size * 2 by default if nothing is provided and we didn't get frame content size. + size := frame.WindowSize * 2 + // Cap to 1 MB. + if size > 1<<20 { + size = 1 << 20 + } + dst = make([]byte, 0, size) + } + + dst, err = frame.runDecoder(dst, block) + if err != nil { + return dst, err + } + if len(frame.bBuf) == 0 { + break + } + } + return dst, nil +} + +// nextBlock returns the next block. +// If an error occurs d.err will be set. +// Optionally the function can block for new output. +// If non-blocking mode is used the returned boolean will be false +// if no data was available without blocking. +func (d *Decoder) nextBlock(blocking bool) (ok bool) { + if d.current.d != nil { + if debug { + printf("re-adding current decoder %p", d.current.d) + } + d.decoders <- d.current.d + d.current.d = nil + } + if d.current.err != nil { + // Keep error state. + return blocking + } + + if blocking { + d.current.decodeOutput = <-d.current.output + } else { + select { + case d.current.decodeOutput = <-d.current.output: + default: + return false + } + } + if debug { + println("got", len(d.current.b), "bytes, error:", d.current.err) + } + return true +} + +// Close will release all resources. +// It is NOT possible to reuse the decoder after this. +func (d *Decoder) Close() { + if d.current.err == ErrDecoderClosed { + return + } + d.drainOutput() + if d.stream != nil { + close(d.stream) + d.streamWg.Wait() + d.stream = nil + } + if d.decoders != nil { + close(d.decoders) + for dec := range d.decoders { + dec.Close() + } + d.decoders = nil + } + if d.current.d != nil { + d.current.d.Close() + d.current.d = nil + } + d.current.err = ErrDecoderClosed +} + +// IOReadCloser returns the decoder as an io.ReadCloser for convenience. +// Any changes to the decoder will be reflected, so the returned ReadCloser +// can be reused along with the decoder. +// io.WriterTo is also supported by the returned ReadCloser. +func (d *Decoder) IOReadCloser() io.ReadCloser { + return closeWrapper{d: d} +} + +// closeWrapper wraps a function call as a closer. +type closeWrapper struct { + d *Decoder +} + +// WriteTo forwards WriteTo calls to the decoder. +func (c closeWrapper) WriteTo(w io.Writer) (n int64, err error) { + return c.d.WriteTo(w) +} + +// Read forwards read calls to the decoder. +func (c closeWrapper) Read(p []byte) (n int, err error) { + return c.d.Read(p) +} + +// Close closes the decoder. +func (c closeWrapper) Close() error { + c.d.Close() + return nil +} + +type decodeOutput struct { + d *blockDec + b []byte + err error +} + +type decodeStream struct { + r io.Reader + + // Blocks ready to be written to output. + output chan decodeOutput + + // cancel reading from the input + cancel chan struct{} +} + +// errEndOfStream indicates that everything from the stream was read. +var errEndOfStream = errors.New("end-of-stream") + +// Create Decoder: +// Spawn n block decoders. These accept tasks to decode a block. +// Create goroutine that handles stream processing, this will send history to decoders as they are available. +// Decoders update the history as they decode. +// When a block is returned: +// a) history is sent to the next decoder, +// b) content written to CRC. +// c) return data to WRITER. +// d) wait for next block to return data. +// Once WRITTEN, the decoders reused by the writer frame decoder for re-use. +func (d *Decoder) startStreamDecoder(inStream chan decodeStream) { + defer d.streamWg.Done() + frame := newFrameDec(d.o) + for stream := range inStream { + if debug { + println("got new stream") + } + br := readerWrapper{r: stream.r} + decodeStream: + for { + err := frame.reset(&br) + if debug && err != nil { + println("Frame decoder returned", err) + } + if err != nil { + stream.output <- decodeOutput{ + err: err, + } + break + } + if debug { + println("starting frame decoder") + } + + // This goroutine will forward history between frames. + frame.frameDone.Add(1) + frame.initAsync() + + go frame.startDecoder(stream.output) + decodeFrame: + // Go through all blocks of the frame. + for { + dec := <-d.decoders + select { + case <-stream.cancel: + if !frame.sendErr(dec, io.EOF) { + // To not let the decoder dangle, send it back. + stream.output <- decodeOutput{d: dec} + } + break decodeStream + default: + } + err := frame.next(dec) + switch err { + case io.EOF: + // End of current frame, no error + println("EOF on next block") + break decodeFrame + case nil: + continue + default: + println("block decoder returned", err) + break decodeStream + } + } + // All blocks have started decoding, check if there are more frames. + println("waiting for done") + frame.frameDone.Wait() + println("done waiting...") + } + frame.frameDone.Wait() + println("Sending EOS") + stream.output <- decodeOutput{err: errEndOfStream} + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/decoder_options.go b/vendor/github.com/klauspost/compress/zstd/decoder_options.go new file mode 100644 index 00000000000..2ac9cd2dd30 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/decoder_options.go @@ -0,0 +1,68 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" + "runtime" +) + +// DOption is an option for creating a decoder. +type DOption func(*decoderOptions) error + +// options retains accumulated state of multiple options. +type decoderOptions struct { + lowMem bool + concurrent int + maxDecodedSize uint64 +} + +func (o *decoderOptions) setDefault() { + *o = decoderOptions{ + // use less ram: true for now, but may change. + lowMem: true, + concurrent: runtime.GOMAXPROCS(0), + } + o.maxDecodedSize = 1 << 63 +} + +// WithDecoderLowmem will set whether to use a lower amount of memory, +// but possibly have to allocate more while running. +func WithDecoderLowmem(b bool) DOption { + return func(o *decoderOptions) error { o.lowMem = b; return nil } +} + +// WithDecoderConcurrency will set the concurrency, +// meaning the maximum number of decoders to run concurrently. +// The value supplied must be at least 1. +// By default this will be set to GOMAXPROCS. +func WithDecoderConcurrency(n int) DOption { + return func(o *decoderOptions) error { + if n <= 0 { + return fmt.Errorf("Concurrency must be at least 1") + } + o.concurrent = n + return nil + } +} + +// WithDecoderMaxMemory allows to set a maximum decoded size for in-memory +// non-streaming operations or maximum window size for streaming operations. +// This can be used to control memory usage of potentially hostile content. +// For streaming operations, the maximum window size is capped at 1<<30 bytes. +// Maximum and default is 1 << 63 bytes. +func WithDecoderMaxMemory(n uint64) DOption { + return func(o *decoderOptions) error { + if n == 0 { + return errors.New("WithDecoderMaxMemory must be at least 1") + } + if n > 1<<63 { + return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63") + } + o.maxDecodedSize = n + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go new file mode 100644 index 00000000000..c120d905481 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go @@ -0,0 +1,518 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + betterLongTableBits = 19 // Bits used in the long match table + betterLongTableSize = 1 << betterLongTableBits // Size of the table + + // Note: Increasing the short table bits or making the hash shorter + // can actually lead to compression degradation since it will 'steal' more from the + // long match table and match offsets are quite big. + // This greatly depends on the type of input. + betterShortTableBits = 13 // Bits used in the short match table + betterShortTableSize = 1 << betterShortTableBits // Size of the table +) + +type prevEntry struct { + offset int32 + prev int32 +} + +// betterFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches. +// The long match table contains the previous entry with the same hash, +// effectively making it a "chain" of length 2. +// When we find a long match we choose between the two values and select the longest. +// When we find a short match, after checking the long, we check if we can find a long at n+1 +// and that it is longer (lazy matching). +type betterFastEncoder struct { + fastBase + table [betterShortTableSize]tableEntry + longTable [betterLongTableSize]prevEntry +} + +// Encode improves compression... +func (e *betterFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = prevEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + v2 := e.longTable[i].prev + if v < minOff { + v = 0 + v2 = 0 + } else { + v = v - e.cur + e.maxMatchOff + if v2 < minOff { + v2 = 0 + } else { + v2 = v2 - e.cur + e.maxMatchOff + } + } + e.longTable[i] = prevEntry{ + offset: v, + prev: v2, + } + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 9 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + var matched int32 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + off := s + e.cur + e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset} + e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)} + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Index match start+1 (long) -> s - 1 + index0 := s + repOff + s += lenght + repOff + + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + continue + } + const repOff2 = 1 + + // We deviate from the reference encoder and also check offset 2. + // Still slower and not much better, so disabled. + // repIndex = s - offset2 + repOff2 + if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) { + // Consider history as well. + var seq seq + lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 2 + seq.offset = 2 + if debugSequences { + println("repeat sequence 2", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + index0 := s + repOff2 + s += lenght + repOff2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + + // Index skipped... + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + cv = load6432(src, s) + // Swap offsets + offset1, offset2 = offset2, offset1 + continue + } + } + // Find the offsets of our two matches. + coffsetL := candidateL.offset - e.cur + coffsetLP := candidateL.prev - e.cur + + // Check if we have a long match. + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetL+8, src) + 8 + t = coffsetL + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8 + if prevMatch > matched { + matched = prevMatch + t = coffsetLP + } + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + } + break + } + + // Check if we have a long match on prev. + if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) { + // Found a long match, at least 8 bytes. + matched = e.matchlen(s+8, coffsetLP+8, src) + 8 + t = coffsetLP + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + coffsetS := candidateS.offset - e.cur + + // Check if we have a short match. + if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + matched = e.matchlen(s+4, coffsetS+4, src) + 4 + + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, betterLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = candidateL.offset - e.cur + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset} + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("long match (after short)") + } + break + } + } + + // Check prev long... + coffsetL = candidateL.prev - e.cur + if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) { + // Found a long match, at least 8 bytes. + matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8 + if matchedNext > matched { + t = coffsetL + s += checkAt + matched = matchedNext + if debugMatches { + println("prev long match (after short)") + } + break + } + } + t = coffsetS + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the n-byte match as long as possible. + l := matched + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) -> s - 1 + index0 := s - l + 1 + for index0 < s-1 { + cv0 := load6432(src, index0) + cv1 := cv0 >> 8 + h0 := hash8(cv0, betterLongTableBits) + off := index0 + e.cur + e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset} + e.table[hash5(cv1, betterShortTableBits)] = tableEntry{offset: off + 1, val: uint32(cv1)} + index0 += 2 + } + + cv = load6432(src, s) + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, betterShortTableBits) + nextHashL := hash8(cv, betterLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset} + e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + e.Encode(blk, src) +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go new file mode 100644 index 00000000000..5ebead9dc8e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go @@ -0,0 +1,674 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "fmt" + +const ( + dFastLongTableBits = 17 // Bits used in the long match table + dFastLongTableSize = 1 << dFastLongTableBits // Size of the table + dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + + dFastShortTableBits = tableBits // Bits used in the short match table + dFastShortTableSize = 1 << dFastShortTableBits // Size of the table + dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. +) + +type doubleFastEncoder struct { + fastEncoder + longTable [dFastLongTableSize]tableEntry +} + +// Encode mimmics functionality in zstd_dfast.c +func (e *doubleFastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + for i := range e.longTable[:] { + v := e.longTable[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.longTable[i].offset = v + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + // We allow the encoder to optionally turn off repeat offsets across blocks + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + + if canRepeat { + if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + + seq.matchLen = uint32(lenght - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += lenght + repOff + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, lenght) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, dFastLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + l := e.matchlen(s+4, t+4, src) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + e.longTable[hash8(cv0, dFastLongTableBits)] = te0 + e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + e.table[hash5(cv0, dFastShortTableBits)] = te0 + e.table[hash5(cv1, dFastShortTableBits)] = te1 + + cv = load6432(src, s) + + if !canRepeat { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + l := 4 + e.matchlen(s+4, o2+4, src) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *doubleFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + const ( + // Input margin is the number of bytes we read (8) + // and the maximum we will read ahead (2) + inputMargin = 8 + 2 + minNonLiteralBlockSize = 16 + ) + + // Protect against e.cur wraparound. + if e.cur >= bufferReset { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + for i := range e.longTable[:] { + e.longTable[i] = tableEntry{} + } + e.cur = e.maxMatchOff + } + + s := int32(0) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 1. + const stepSize = 1 + + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + var t int32 + for { + + nextHashS := hash5(cv, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + candidateL := e.longTable[nextHashL] + candidateS := e.table[nextHashS] + + const repOff = 1 + repIndex := s - offset1 + repOff + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + + if len(blk.sequences) > 2 { + if load3232(src, repIndex) == uint32(cv>>(repOff*8)) { + // Consider history as well. + var seq seq + //length := 4 + e.matchlen(s+4+repOff, repIndex+4, src) + length := 4 + int32(matchLen(src[s+4+repOff:], src[repIndex+4:])) + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + repOff + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + repOff + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + } + // Find the offsets of our two matches. + coffsetL := s - (candidateL.offset - e.cur) + coffsetS := s - (candidateS.offset - e.cur) + + // Check if we have a long match. + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugMatches { + println("long match") + } + break + } + + // Check if we have a short match. + if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val { + // found a regular match + // See if we can find a long match at s+1 + const checkAt = 1 + cv := load6432(src, s+checkAt) + nextHashL = hash8(cv, dFastLongTableBits) + candidateL = e.longTable[nextHashL] + coffsetL = s - (candidateL.offset - e.cur) + checkAt + + // We can store it, since we have at least a 4 byte match. + e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)} + if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val { + // Found a long match, likely at least 8 bytes. + // Reference encoder checks all 8 bytes, we only check 4, + // but the likelihood of both the first 4 bytes and the hash matching should be enough. + t = candidateL.offset - e.cur + s += checkAt + if debugMatches { + println("long match (after short)") + } + break + } + + t = candidateS.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + if debugMatches { + println("short match") + } + break + } + + // No match found, move forward in input. + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + + // A 4-byte match has been found. Update recent offsets. + // We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlen(s+4, t+4, src) + 4 + l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + + // Write our sequence + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + + // Index match start+1 (long) and start+2 (short) + index0 := s - l + 1 + // Index match end-2 (long) and end-1 (short) + index1 := s - 2 + + cv0 := load6432(src, index0) + cv1 := load6432(src, index1) + te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)} + te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)} + e.longTable[hash8(cv0, dFastLongTableBits)] = te0 + e.longTable[hash8(cv1, dFastLongTableBits)] = te1 + cv0 >>= 8 + cv1 >>= 8 + te0.offset++ + te1.offset++ + te0.val = uint32(cv0) + te1.val = uint32(cv1) + e.table[hash5(cv0, dFastShortTableBits)] = te0 + e.table[hash5(cv1, dFastShortTableBits)] = te1 + + cv = load6432(src, s) + + if len(blk.sequences) <= 2 { + continue + } + + // Check offset 2 + for { + o2 := s - offset2 + if load3232(src, o2) != uint32(cv) { + // Do regular search + break + } + + // Store this, since we have it. + nextHashS := hash5(cv1>>8, dFastShortTableBits) + nextHashL := hash8(cv, dFastLongTableBits) + + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlen(s+4, o2+4, src) + l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + + entry := tableEntry{offset: s + e.cur, val: uint32(cv)} + e.longTable[nextHashL] = entry + e.table[nextHashS] = entry + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + // Finished + break encodeLoop + } + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } + +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_fast.go b/vendor/github.com/klauspost/compress/zstd/enc_fast.go new file mode 100644 index 00000000000..d1d3658e611 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_fast.go @@ -0,0 +1,744 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "fmt" + "math" + "math/bits" + + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +const ( + tableBits = 15 // Bits used in the table + tableSize = 1 << tableBits // Size of the table + tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. + maxMatchLength = 131074 +) + +type tableEntry struct { + val uint32 + offset int32 +} + +type fastBase struct { + // cur is the offset at the start of hist + cur int32 + // maximum offset. Should be at least 2x block size. + maxMatchOff int32 + hist []byte + crc *xxhash.Digest + tmp [8]byte + blk *blockEnc +} + +type fastEncoder struct { + fastBase + table [tableSize]tableEntry +} + +// CRC returns the underlying CRC writer. +func (e *fastBase) CRC() *xxhash.Digest { + return e.crc +} + +// AppendCRC will append the CRC to the destination slice and return it. +func (e *fastBase) AppendCRC(dst []byte) []byte { + crc := e.crc.Sum(e.tmp[:0]) + dst = append(dst, crc[7], crc[6], crc[5], crc[4]) + return dst +} + +// WindowSize returns the window size of the encoder, +// or a window size small enough to contain the input size, if > 0. +func (e *fastBase) WindowSize(size int) int32 { + if size > 0 && size < int(e.maxMatchOff) { + b := int32(1) << uint(bits.Len(uint(size))) + // Keep minimum window. + if b < 1024 { + b = 1024 + } + return b + } + return e.maxMatchOff +} + +// Block returns the current block. +func (e *fastBase) Block() *blockEnc { + return e.blk +} + +// Encode mimmics functionality in zstd_fast.c +func (e *fastEncoder) Encode(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + + // Protect against e.cur wraparound. + for e.cur >= bufferReset { + if len(e.hist) == 0 { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = e.maxMatchOff + break + } + // Shift down everything in the table that isn't already too far away. + minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff + for i := range e.table[:] { + v := e.table[i].offset + if v < minOff { + v = 0 + } else { + v = v - e.cur + e.maxMatchOff + } + e.table[i].offset = v + } + e.cur = e.maxMatchOff + break + } + + s := e.addBlock(src) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + // Override src + src = e.hist + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 + + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + canRepeat := len(blk.sequences) > 2 + + for { + if debugAsserts && canRepeat && offset1 == 0 { + panic("offset0 was 0") + } + + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + + if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + var length int32 + // length = 4 + e.matchlen(s+6, repIndex+4, src) + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + 2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + if debugAsserts && canRepeat && int(offset1) > len(src) { + panic("invalid offset") + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlen(s+4, t+4, src) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlen(s+4, o2+4, src) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Store this, since we have it. + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + blk.recentOffsets[0] = uint32(offset1) + blk.recentOffsets[1] = uint32(offset2) + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +// EncodeNoHist will encode a block with no history and no following blocks. +// Most notable difference is that src will not be copied for history and +// we do not need to check for max match length. +func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) { + const ( + inputMargin = 8 + minNonLiteralBlockSize = 1 + 1 + inputMargin + ) + if debug { + if len(src) > maxBlockSize { + panic("src too big") + } + } + // Protect against e.cur wraparound. + if e.cur >= bufferReset { + for i := range e.table[:] { + e.table[i] = tableEntry{} + } + e.cur = e.maxMatchOff + } + + s := int32(0) + blk.size = len(src) + if len(src) < minNonLiteralBlockSize { + blk.extraLits = len(src) + blk.literals = blk.literals[:len(src)] + copy(blk.literals, src) + return + } + + sLimit := int32(len(src)) - inputMargin + // stepSize is the number of bytes to skip on every main loop iteration. + // It should be >= 2. + const stepSize = 2 + + // TEMPLATE + const hashLog = tableBits + // seems global, but would be nice to tweak. + const kSearchStrength = 8 + + // nextEmit is where in src the next emitLiteral should start from. + nextEmit := s + cv := load6432(src, s) + + // Relative offsets + offset1 := int32(blk.recentOffsets[0]) + offset2 := int32(blk.recentOffsets[1]) + + addLiterals := func(s *seq, until int32) { + if until == nextEmit { + return + } + blk.literals = append(blk.literals, src[nextEmit:until]...) + s.litLen = uint32(until - nextEmit) + } + if debug { + println("recent offsets:", blk.recentOffsets) + } + +encodeLoop: + for { + // t will contain the match offset when we find one. + // When existing the search loop, we have already checked 4 bytes. + var t int32 + + // We will not use repeat offsets across blocks. + // By not using them for the first 3 matches + + for { + nextHash := hash6(cv, hashLog) + nextHash2 := hash6(cv>>8, hashLog) + candidate := e.table[nextHash] + candidate2 := e.table[nextHash2] + repIndex := s - offset1 + 2 + + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)} + + if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) { + // Consider history as well. + var seq seq + // length := 4 + e.matchlen(s+6, repIndex+4, src) + // length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:])) + var length int32 + { + a := src[s+6:] + b := src[repIndex+4:] + endI := len(a) & (math.MaxInt32 - 7) + length = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + length = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + seq.matchLen = uint32(length - zstdMinMatch) + + // We might be able to match backwards. + // Extend as long as we can. + start := s + 2 + // We end the search early, so we don't risk 0 literals + // and have to do special offset treatment. + startLimit := nextEmit + 1 + + sMin := s - e.maxMatchOff + if sMin < 0 { + sMin = 0 + } + for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] { + repIndex-- + start-- + seq.matchLen++ + } + addLiterals(&seq, start) + + // rep 0 + seq.offset = 1 + if debugSequences { + println("repeat sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + s += length + 2 + nextEmit = s + if s >= sLimit { + if debug { + println("repeat ended", s, length) + + } + break encodeLoop + } + cv = load6432(src, s) + continue + } + coffset0 := s - (candidate.offset - e.cur) + coffset1 := s - (candidate2.offset - e.cur) + 1 + if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val { + // found a regular match + t = candidate.offset - e.cur + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + break + } + + if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val { + // found a regular match + t = candidate2.offset - e.cur + s++ + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + if debugAsserts && s-t > e.maxMatchOff { + panic("s - t >e.maxMatchOff") + } + if debugAsserts && t < 0 { + panic("t<0") + } + break + } + s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1)) + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + } + // A 4-byte match has been found. We'll later see if more than 4 bytes. + offset2 = offset1 + offset1 = s - t + + if debugAsserts && s <= t { + panic(fmt.Sprintf("s (%d) <= t (%d)", s, t)) + } + + // Extend the 4-byte match as long as possible. + //l := e.matchlenNoHist(s+4, t+4, src) + 4 + // l := int32(matchLen(src[s+4:], src[t+4:])) + 4 + var l int32 + { + a := src[s+4:] + b := src[t+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Extend backwards + tMin := s - e.maxMatchOff + if tMin < 0 { + tMin = 0 + } + for t > tMin && s > nextEmit && src[t-1] == src[s-1] { + s-- + t-- + l++ + } + + // Write our sequence. + var seq seq + seq.litLen = uint32(s - nextEmit) + seq.matchLen = uint32(l - zstdMinMatch) + if seq.litLen > 0 { + blk.literals = append(blk.literals, src[nextEmit:s]...) + } + // Don't use repeat offsets + seq.offset = uint32(s-t) + 3 + s += l + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + nextEmit = s + if s >= sLimit { + break encodeLoop + } + cv = load6432(src, s) + + // Check offset 2 + if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) { + // We have at least 4 byte match. + // No need to check backwards. We come straight from a match + //l := 4 + e.matchlenNoHist(s+4, o2+4, src) + // l := 4 + int32(matchLen(src[s+4:], src[o2+4:])) + var l int32 + { + a := src[s+4:] + b := src[o2+4:] + endI := len(a) & (math.MaxInt32 - 7) + l = int32(endI) + 4 + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + l = int32(i+bits.TrailingZeros64(diff)>>3) + 4 + break + } + } + } + + // Store this, since we have it. + nextHash := hash6(cv, hashLog) + e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)} + seq.matchLen = uint32(l) - zstdMinMatch + seq.litLen = 0 + // Since litlen is always 0, this is offset 1. + seq.offset = 1 + s += l + nextEmit = s + if debugSequences { + println("sequence", seq, "next s:", s) + } + blk.sequences = append(blk.sequences, seq) + + // Swap offset 1 and 2. + offset1, offset2 = offset2, offset1 + if s >= sLimit { + break encodeLoop + } + // Prepare next loop. + cv = load6432(src, s) + } + } + + if int(nextEmit) < len(src) { + blk.literals = append(blk.literals, src[nextEmit:]...) + blk.extraLits = len(src) - int(nextEmit) + } + if debug { + println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits) + } +} + +func (e *fastBase) addBlock(src []byte) int32 { + if debugAsserts && e.cur > bufferReset { + panic(fmt.Sprintf("ecur (%d) > buffer reset (%d)", e.cur, bufferReset)) + } + // check if we have space already + if len(e.hist)+len(src) > cap(e.hist) { + if cap(e.hist) == 0 { + l := e.maxMatchOff * 2 + // Make it at least 1MB. + if l < 1<<20 { + l = 1 << 20 + } + e.hist = make([]byte, 0, l) + } else { + if cap(e.hist) < int(e.maxMatchOff*2) { + panic("unexpected buffer size") + } + // Move down + offset := int32(len(e.hist)) - e.maxMatchOff + copy(e.hist[0:e.maxMatchOff], e.hist[offset:]) + e.cur += offset + e.hist = e.hist[:e.maxMatchOff] + } + } + s := int32(len(e.hist)) + e.hist = append(e.hist, src...) + return s +} + +// useBlock will replace the block with the provided one, +// but transfer recent offsets from the previous. +func (e *fastBase) UseBlock(enc *blockEnc) { + enc.reset(e.blk) + e.blk = enc +} + +func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 { + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +func (e *fastBase) matchlen(s, t int32, src []byte) int32 { + if debugAsserts { + if s < 0 { + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) + } + if t < 0 { + err := fmt.Sprintf("s (%d) < 0", s) + panic(err) + } + if s-t > e.maxMatchOff { + err := fmt.Sprintf("s (%d) - t (%d) > maxMatchOff (%d)", s, t, e.maxMatchOff) + panic(err) + } + if len(src)-int(s) > maxCompressedBlockSize { + panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize)) + } + } + + // Extend the match to be as long as possible. + return int32(matchLen(src[s:], src[t:])) +} + +// Reset the encoding table. +func (e *fastBase) Reset() { + if e.blk == nil { + e.blk = &blockEnc{} + e.blk.init() + } else { + e.blk.reset(nil) + } + e.blk.initNewEncode() + if e.crc == nil { + e.crc = xxhash.New() + } else { + e.crc.Reset() + } + if cap(e.hist) < int(e.maxMatchOff*2) { + l := e.maxMatchOff * 2 + // Make it at least 1MB. + if l < 1<<20 { + l = 1 << 20 + } + e.hist = make([]byte, 0, l) + } + // We offset current position so everything will be out of reach. + // If above reset line, history will be purged. + if e.cur < bufferReset { + e.cur += e.maxMatchOff + int32(len(e.hist)) + } + e.hist = e.hist[:0] +} diff --git a/vendor/github.com/klauspost/compress/zstd/enc_params.go b/vendor/github.com/klauspost/compress/zstd/enc_params.go new file mode 100644 index 00000000000..d874116f715 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/enc_params.go @@ -0,0 +1,157 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +/* +// encParams are not really used, just here for reference. +type encParams struct { + // largest match distance : larger == more compression, more memory needed during decompression + windowLog uint8 + + // fully searched segment : larger == more compression, slower, more memory (useless for fast) + chainLog uint8 + + // dispatch table : larger == faster, more memory + hashLog uint8 + + // < nb of searches : larger == more compression, slower + searchLog uint8 + + // < match length searched : larger == faster decompression, sometimes less compression + minMatch uint8 + + // acceptable match size for optimal parser (only) : larger == more compression, slower + targetLength uint32 + + // see ZSTD_strategy definition above + strategy strategy +} + +// strategy defines the algorithm to use when generating sequences. +type strategy uint8 + +const ( + // Compression strategies, listed from fastest to strongest + strategyFast strategy = iota + 1 + strategyDfast + strategyGreedy + strategyLazy + strategyLazy2 + strategyBtlazy2 + strategyBtopt + strategyBtultra + strategyBtultra2 + // note : new strategies _might_ be added in the future. + // Only the order (from fast to strong) is guaranteed + +) + +var defEncParams = [4][]encParams{ + { // "default" - for any srcSize > 256 KB + // W, C, H, S, L, TL, strat + {19, 12, 13, 1, 6, 1, strategyFast}, // base for negative levels + {19, 13, 14, 1, 7, 0, strategyFast}, // level 1 + {20, 15, 16, 1, 6, 0, strategyFast}, // level 2 + {21, 16, 17, 1, 5, 1, strategyDfast}, // level 3 + {21, 18, 18, 1, 5, 1, strategyDfast}, // level 4 + {21, 18, 19, 2, 5, 2, strategyGreedy}, // level 5 + {21, 19, 19, 3, 5, 4, strategyGreedy}, // level 6 + {21, 19, 19, 3, 5, 8, strategyLazy}, // level 7 + {21, 19, 19, 3, 5, 16, strategyLazy2}, // level 8 + {21, 19, 20, 4, 5, 16, strategyLazy2}, // level 9 + {22, 20, 21, 4, 5, 16, strategyLazy2}, // level 10 + {22, 21, 22, 4, 5, 16, strategyLazy2}, // level 11 + {22, 21, 22, 5, 5, 16, strategyLazy2}, // level 12 + {22, 21, 22, 5, 5, 32, strategyBtlazy2}, // level 13 + {22, 22, 23, 5, 5, 32, strategyBtlazy2}, // level 14 + {22, 23, 23, 6, 5, 32, strategyBtlazy2}, // level 15 + {22, 22, 22, 5, 5, 48, strategyBtopt}, // level 16 + {23, 23, 22, 5, 4, 64, strategyBtopt}, // level 17 + {23, 23, 22, 6, 3, 64, strategyBtultra}, // level 18 + {23, 24, 22, 7, 3, 256, strategyBtultra2}, // level 19 + {25, 25, 23, 7, 3, 256, strategyBtultra2}, // level 20 + {26, 26, 24, 7, 3, 512, strategyBtultra2}, // level 21 + {27, 27, 25, 9, 3, 999, strategyBtultra2}, // level 22 + }, + { // for srcSize <= 256 KB + // W, C, H, S, L, T, strat + {18, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels + {18, 13, 14, 1, 6, 0, strategyFast}, // level 1 + {18, 14, 14, 1, 5, 1, strategyDfast}, // level 2 + {18, 16, 16, 1, 4, 1, strategyDfast}, // level 3 + {18, 16, 17, 2, 5, 2, strategyGreedy}, // level 4. + {18, 18, 18, 3, 5, 2, strategyGreedy}, // level 5. + {18, 18, 19, 3, 5, 4, strategyLazy}, // level 6. + {18, 18, 19, 4, 4, 4, strategyLazy}, // level 7 + {18, 18, 19, 4, 4, 8, strategyLazy2}, // level 8 + {18, 18, 19, 5, 4, 8, strategyLazy2}, // level 9 + {18, 18, 19, 6, 4, 8, strategyLazy2}, // level 10 + {18, 18, 19, 5, 4, 12, strategyBtlazy2}, // level 11. + {18, 19, 19, 7, 4, 12, strategyBtlazy2}, // level 12. + {18, 18, 19, 4, 4, 16, strategyBtopt}, // level 13 + {18, 18, 19, 4, 3, 32, strategyBtopt}, // level 14. + {18, 18, 19, 6, 3, 128, strategyBtopt}, // level 15. + {18, 19, 19, 6, 3, 128, strategyBtultra}, // level 16. + {18, 19, 19, 8, 3, 256, strategyBtultra}, // level 17. + {18, 19, 19, 6, 3, 128, strategyBtultra2}, // level 18. + {18, 19, 19, 8, 3, 256, strategyBtultra2}, // level 19. + {18, 19, 19, 10, 3, 512, strategyBtultra2}, // level 20. + {18, 19, 19, 12, 3, 512, strategyBtultra2}, // level 21. + {18, 19, 19, 13, 3, 999, strategyBtultra2}, // level 22. + }, + { // for srcSize <= 128 KB + // W, C, H, S, L, T, strat + {17, 12, 12, 1, 5, 1, strategyFast}, // base for negative levels + {17, 12, 13, 1, 6, 0, strategyFast}, // level 1 + {17, 13, 15, 1, 5, 0, strategyFast}, // level 2 + {17, 15, 16, 2, 5, 1, strategyDfast}, // level 3 + {17, 17, 17, 2, 4, 1, strategyDfast}, // level 4 + {17, 16, 17, 3, 4, 2, strategyGreedy}, // level 5 + {17, 17, 17, 3, 4, 4, strategyLazy}, // level 6 + {17, 17, 17, 3, 4, 8, strategyLazy2}, // level 7 + {17, 17, 17, 4, 4, 8, strategyLazy2}, // level 8 + {17, 17, 17, 5, 4, 8, strategyLazy2}, // level 9 + {17, 17, 17, 6, 4, 8, strategyLazy2}, // level 10 + {17, 17, 17, 5, 4, 8, strategyBtlazy2}, // level 11 + {17, 18, 17, 7, 4, 12, strategyBtlazy2}, // level 12 + {17, 18, 17, 3, 4, 12, strategyBtopt}, // level 13. + {17, 18, 17, 4, 3, 32, strategyBtopt}, // level 14. + {17, 18, 17, 6, 3, 256, strategyBtopt}, // level 15. + {17, 18, 17, 6, 3, 128, strategyBtultra}, // level 16. + {17, 18, 17, 8, 3, 256, strategyBtultra}, // level 17. + {17, 18, 17, 10, 3, 512, strategyBtultra}, // level 18. + {17, 18, 17, 5, 3, 256, strategyBtultra2}, // level 19. + {17, 18, 17, 7, 3, 512, strategyBtultra2}, // level 20. + {17, 18, 17, 9, 3, 512, strategyBtultra2}, // level 21. + {17, 18, 17, 11, 3, 999, strategyBtultra2}, // level 22. + }, + { // for srcSize <= 16 KB + // W, C, H, S, L, T, strat + {14, 12, 13, 1, 5, 1, strategyFast}, // base for negative levels + {14, 14, 15, 1, 5, 0, strategyFast}, // level 1 + {14, 14, 15, 1, 4, 0, strategyFast}, // level 2 + {14, 14, 15, 2, 4, 1, strategyDfast}, // level 3 + {14, 14, 14, 4, 4, 2, strategyGreedy}, // level 4 + {14, 14, 14, 3, 4, 4, strategyLazy}, // level 5. + {14, 14, 14, 4, 4, 8, strategyLazy2}, // level 6 + {14, 14, 14, 6, 4, 8, strategyLazy2}, // level 7 + {14, 14, 14, 8, 4, 8, strategyLazy2}, // level 8. + {14, 15, 14, 5, 4, 8, strategyBtlazy2}, // level 9. + {14, 15, 14, 9, 4, 8, strategyBtlazy2}, // level 10. + {14, 15, 14, 3, 4, 12, strategyBtopt}, // level 11. + {14, 15, 14, 4, 3, 24, strategyBtopt}, // level 12. + {14, 15, 14, 5, 3, 32, strategyBtultra}, // level 13. + {14, 15, 15, 6, 3, 64, strategyBtultra}, // level 14. + {14, 15, 15, 7, 3, 256, strategyBtultra}, // level 15. + {14, 15, 15, 5, 3, 48, strategyBtultra2}, // level 16. + {14, 15, 15, 6, 3, 128, strategyBtultra2}, // level 17. + {14, 15, 15, 7, 3, 256, strategyBtultra2}, // level 18. + {14, 15, 15, 8, 3, 256, strategyBtultra2}, // level 19. + {14, 15, 15, 8, 3, 512, strategyBtultra2}, // level 20. + {14, 15, 15, 9, 3, 512, strategyBtultra2}, // level 21. + {14, 15, 15, 10, 3, 999, strategyBtultra2}, // level 22. + }, +} +*/ diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go new file mode 100644 index 00000000000..af4f00b7342 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/encoder.go @@ -0,0 +1,555 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "crypto/rand" + "fmt" + "io" + rdebug "runtime/debug" + "sync" + + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +// Encoder provides encoding to Zstandard. +// An Encoder can be used for either compressing a stream via the +// io.WriteCloser interface supported by the Encoder or as multiple independent +// tasks via the EncodeAll function. +// Smaller encodes are encouraged to use the EncodeAll function. +// Use NewWriter to create a new instance. +type Encoder struct { + o encoderOptions + encoders chan encoder + state encoderState + init sync.Once +} + +type encoder interface { + Encode(blk *blockEnc, src []byte) + EncodeNoHist(blk *blockEnc, src []byte) + Block() *blockEnc + CRC() *xxhash.Digest + AppendCRC([]byte) []byte + WindowSize(size int) int32 + UseBlock(*blockEnc) + Reset() +} + +type encoderState struct { + w io.Writer + filling []byte + current []byte + previous []byte + encoder encoder + writing *blockEnc + err error + writeErr error + nWritten int64 + headerWritten bool + eofWritten bool + fullFrameWritten bool + + // This waitgroup indicates an encode is running. + wg sync.WaitGroup + // This waitgroup indicates we have a block encoding/writing. + wWg sync.WaitGroup +} + +// NewWriter will create a new Zstandard encoder. +// If the encoder will be used for encoding blocks a nil writer can be used. +func NewWriter(w io.Writer, opts ...EOption) (*Encoder, error) { + initPredefined() + var e Encoder + e.o.setDefault() + for _, o := range opts { + err := o(&e.o) + if err != nil { + return nil, err + } + } + if w != nil { + e.Reset(w) + } + return &e, nil +} + +func (e *Encoder) initialize() { + if e.o.concurrent == 0 { + e.o.setDefault() + } + e.encoders = make(chan encoder, e.o.concurrent) + for i := 0; i < e.o.concurrent; i++ { + e.encoders <- e.o.encoder() + } +} + +// Reset will re-initialize the writer and new writes will encode to the supplied writer +// as a new, independent stream. +func (e *Encoder) Reset(w io.Writer) { + s := &e.state + s.wg.Wait() + s.wWg.Wait() + if cap(s.filling) == 0 { + s.filling = make([]byte, 0, e.o.blockSize) + } + if cap(s.current) == 0 { + s.current = make([]byte, 0, e.o.blockSize) + } + if cap(s.previous) == 0 { + s.previous = make([]byte, 0, e.o.blockSize) + } + if s.encoder == nil { + s.encoder = e.o.encoder() + } + if s.writing == nil { + s.writing = &blockEnc{} + s.writing.init() + } + s.writing.initNewEncode() + s.filling = s.filling[:0] + s.current = s.current[:0] + s.previous = s.previous[:0] + s.encoder.Reset() + s.headerWritten = false + s.eofWritten = false + s.fullFrameWritten = false + s.w = w + s.err = nil + s.nWritten = 0 + s.writeErr = nil +} + +// Write data to the encoder. +// Input data will be buffered and as the buffer fills up +// content will be compressed and written to the output. +// When done writing, use Close to flush the remaining output +// and write CRC if requested. +func (e *Encoder) Write(p []byte) (n int, err error) { + s := &e.state + for len(p) > 0 { + if len(p)+len(s.filling) < e.o.blockSize { + if e.o.crc { + _, _ = s.encoder.CRC().Write(p) + } + s.filling = append(s.filling, p...) + return n + len(p), nil + } + add := p + if len(p)+len(s.filling) > e.o.blockSize { + add = add[:e.o.blockSize-len(s.filling)] + } + if e.o.crc { + _, _ = s.encoder.CRC().Write(add) + } + s.filling = append(s.filling, add...) + p = p[len(add):] + n += len(add) + if len(s.filling) < e.o.blockSize { + return n, nil + } + err := e.nextBlock(false) + if err != nil { + return n, err + } + if debugAsserts && len(s.filling) > 0 { + panic(len(s.filling)) + } + } + return n, nil +} + +// nextBlock will synchronize and start compressing input in e.state.filling. +// If an error has occurred during encoding it will be returned. +func (e *Encoder) nextBlock(final bool) error { + s := &e.state + // Wait for current block. + s.wg.Wait() + if s.err != nil { + return s.err + } + if len(s.filling) > e.o.blockSize { + return fmt.Errorf("block > maxStoreBlockSize") + } + if !s.headerWritten { + // If we have a single block encode, do a sync compression. + if final && len(s.filling) > 0 { + s.current = e.EncodeAll(s.filling, s.current[:0]) + var n2 int + n2, s.err = s.w.Write(s.current) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + s.current = s.current[:0] + s.filling = s.filling[:0] + s.headerWritten = true + s.fullFrameWritten = true + return nil + } + + var tmp [maxHeaderSize]byte + fh := frameHeader{ + ContentSize: 0, + WindowSize: uint32(s.encoder.WindowSize(0)), + SingleSegment: false, + Checksum: e.o.crc, + DictID: 0, + } + dst, err := fh.appendTo(tmp[:0]) + if err != nil { + return err + } + s.headerWritten = true + s.wWg.Wait() + var n2 int + n2, s.err = s.w.Write(dst) + if s.err != nil { + return s.err + } + s.nWritten += int64(n2) + } + if s.eofWritten { + // Ensure we only write it once. + final = false + } + + if len(s.filling) == 0 { + // Final block, but no data. + if final { + enc := s.encoder + blk := enc.Block() + blk.reset(nil) + blk.last = true + blk.encodeRaw(nil) + s.wWg.Wait() + _, s.err = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + s.eofWritten = true + } + return s.err + } + + // Move blocks forward. + s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current + s.wg.Add(1) + go func(src []byte) { + if debug { + println("Adding block,", len(src), "bytes, final:", final) + } + defer func() { + if r := recover(); r != nil { + s.err = fmt.Errorf("panic while encoding: %v", r) + rdebug.PrintStack() + } + s.wg.Done() + }() + enc := s.encoder + blk := enc.Block() + enc.Encode(blk, src) + blk.last = final + if final { + s.eofWritten = true + } + // Wait for pending writes. + s.wWg.Wait() + if s.writeErr != nil { + s.err = s.writeErr + return + } + // Transfer encoders from previous write block. + blk.swapEncoders(s.writing) + // Transfer recent offsets to next. + enc.UseBlock(s.writing) + s.writing = blk + s.wWg.Add(1) + go func() { + defer func() { + if r := recover(); r != nil { + s.writeErr = fmt.Errorf("panic while encoding/writing: %v", r) + rdebug.PrintStack() + } + s.wWg.Done() + }() + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(src) != len(blk.literals) || len(src) != e.o.blockSize { + err = blk.encode(e.o.noEntropy) + } + switch err { + case errIncompressible: + if debug { + println("Storing incompressible block as raw") + } + blk.encodeRaw(src) + // In fast mode, we do not transfer offsets, so we don't have to deal with changing the. + case nil: + default: + s.writeErr = err + return + } + _, s.writeErr = s.w.Write(blk.output) + s.nWritten += int64(len(blk.output)) + }() + }(s.current) + return nil +} + +// ReadFrom reads data from r until EOF or error. +// The return value n is the number of bytes read. +// Any error except io.EOF encountered during the read is also returned. +// +// The Copy function uses ReaderFrom if available. +func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) { + if debug { + println("Using ReadFrom") + } + // Maybe handle stuff queued? + e.state.filling = e.state.filling[:e.o.blockSize] + src := e.state.filling + for { + n2, err := r.Read(src) + if e.o.crc { + _, _ = e.state.encoder.CRC().Write(src[:n2]) + } + // src is now the unfilled part... + src = src[n2:] + n += int64(n2) + switch err { + case io.EOF: + e.state.filling = e.state.filling[:len(e.state.filling)-len(src)] + if debug { + println("ReadFrom: got EOF final block:", len(e.state.filling)) + } + return n, e.nextBlock(true) + default: + if debug { + println("ReadFrom: got error:", err) + } + e.state.err = err + return n, err + case nil: + } + if len(src) > 0 { + if debug { + println("ReadFrom: got space left in source:", len(src)) + } + continue + } + err = e.nextBlock(false) + if err != nil { + return n, err + } + e.state.filling = e.state.filling[:e.o.blockSize] + src = e.state.filling + } +} + +// Flush will send the currently written data to output +// and block until everything has been written. +// This should only be used on rare occasions where pushing the currently queued data is critical. +func (e *Encoder) Flush() error { + s := &e.state + if len(s.filling) > 0 { + err := e.nextBlock(false) + if err != nil { + return err + } + } + s.wg.Wait() + s.wWg.Wait() + if s.err != nil { + return s.err + } + return s.writeErr +} + +// Close will flush the final output and close the stream. +// The function will block until everything has been written. +// The Encoder can still be re-used after calling this. +func (e *Encoder) Close() error { + s := &e.state + if s.encoder == nil { + return nil + } + err := e.nextBlock(true) + if err != nil { + return err + } + if e.state.fullFrameWritten { + return s.err + } + s.wg.Wait() + s.wWg.Wait() + + if s.err != nil { + return s.err + } + if s.writeErr != nil { + return s.writeErr + } + + // Write CRC + if e.o.crc && s.err == nil { + // heap alloc. + var tmp [4]byte + _, s.err = s.w.Write(s.encoder.AppendCRC(tmp[:0])) + s.nWritten += 4 + } + + // Add padding with content from crypto/rand.Reader + if s.err == nil && e.o.pad > 0 { + add := calcSkippableFrame(s.nWritten, int64(e.o.pad)) + frame, err := skippableFrame(s.filling[:0], add, rand.Reader) + if err != nil { + return err + } + _, s.err = s.w.Write(frame) + } + return s.err +} + +// EncodeAll will encode all input in src and append it to dst. +// This function can be called concurrently, but each call will only run on a single goroutine. +// If empty input is given, nothing is returned, unless WithZeroFrames is specified. +// Encoded blocks can be concatenated and the result will be the combined input stream. +// Data compressed with EncodeAll can be decoded with the Decoder, +// using either a stream or DecodeAll. +func (e *Encoder) EncodeAll(src, dst []byte) []byte { + if len(src) == 0 { + if e.o.fullZero { + // Add frame header. + fh := frameHeader{ + ContentSize: 0, + WindowSize: MinWindowSize, + SingleSegment: true, + // Adding a checksum would be a waste of space. + Checksum: false, + DictID: 0, + } + dst, _ = fh.appendTo(dst) + + // Write raw block as last one only. + var blk blockHeader + blk.setSize(0) + blk.setType(blockTypeRaw) + blk.setLast(true) + dst = blk.appendTo(dst) + } + return dst + } + e.init.Do(e.initialize) + enc := <-e.encoders + defer func() { + // Release encoder reference to last block. + enc.Reset() + e.encoders <- enc + }() + enc.Reset() + blk := enc.Block() + // Use single segments when above minimum window and below 1MB. + single := len(src) < 1<<20 && len(src) > MinWindowSize + if e.o.single != nil { + single = *e.o.single + } + fh := frameHeader{ + ContentSize: uint64(len(src)), + WindowSize: uint32(enc.WindowSize(len(src))), + SingleSegment: single, + Checksum: e.o.crc, + DictID: 0, + } + + // If less than 1MB, allocate a buffer up front. + if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 { + dst = make([]byte, 0, len(src)) + } + dst, err := fh.appendTo(dst) + if err != nil { + panic(err) + } + + if len(src) <= e.o.blockSize && len(src) <= maxBlockSize { + // Slightly faster with no history and everything in one block. + if e.o.crc { + _, _ = enc.CRC().Write(src) + } + blk.reset(nil) + blk.last = true + enc.EncodeNoHist(blk, src) + + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + err := errIncompressible + oldout := blk.output + if len(blk.literals) != len(src) || len(src) != e.o.blockSize { + // Output directly to dst + blk.output = dst + err = blk.encode(e.o.noEntropy) + } + + switch err { + case errIncompressible: + if debug { + println("Storing incompressible block as raw") + } + dst = blk.encodeRawTo(dst, src) + case nil: + dst = blk.output + default: + panic(err) + } + blk.output = oldout + } else { + for len(src) > 0 { + todo := src + if len(todo) > e.o.blockSize { + todo = todo[:e.o.blockSize] + } + src = src[len(todo):] + if e.o.crc { + _, _ = enc.CRC().Write(todo) + } + blk.reset(nil) + blk.pushOffsets() + enc.Encode(blk, todo) + if len(src) == 0 { + blk.last = true + } + err := errIncompressible + // If we got the exact same number of literals as input, + // assume the literals cannot be compressed. + if len(blk.literals) != len(todo) || len(todo) != e.o.blockSize { + err = blk.encode(e.o.noEntropy) + } + + switch err { + case errIncompressible: + if debug { + println("Storing incompressible block as raw") + } + dst = blk.encodeRawTo(dst, todo) + blk.popOffsets() + case nil: + dst = append(dst, blk.output...) + default: + panic(err) + } + } + } + if e.o.crc { + dst = enc.AppendCRC(dst) + } + // Add padding with content from crypto/rand.Reader + if e.o.pad > 0 { + add := calcSkippableFrame(int64(len(dst)), int64(e.o.pad)) + dst, err = skippableFrame(dst, add, rand.Reader) + if err != nil { + panic(err) + } + } + return dst +} diff --git a/vendor/github.com/klauspost/compress/zstd/encoder_options.go b/vendor/github.com/klauspost/compress/zstd/encoder_options.go new file mode 100644 index 00000000000..3fc03097a67 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/encoder_options.go @@ -0,0 +1,249 @@ +package zstd + +import ( + "errors" + "fmt" + "runtime" + "strings" +) + +// EOption is an option for creating a encoder. +type EOption func(*encoderOptions) error + +// options retains accumulated state of multiple options. +type encoderOptions struct { + concurrent int + level EncoderLevel + single *bool + pad int + blockSize int + windowSize int + crc bool + fullZero bool + noEntropy bool + customWindow bool +} + +func (o *encoderOptions) setDefault() { + *o = encoderOptions{ + // use less ram: true for now, but may change. + concurrent: runtime.GOMAXPROCS(0), + crc: true, + single: nil, + blockSize: 1 << 16, + windowSize: 8 << 20, + level: SpeedDefault, + } +} + +// encoder returns an encoder with the selected options. +func (o encoderOptions) encoder() encoder { + switch o.level { + case SpeedDefault: + return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}} + case SpeedBetterCompression: + return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + case SpeedFastest: + return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}} + } + panic("unknown compression level") +} + +// WithEncoderCRC will add CRC value to output. +// Output will be 4 bytes larger. +func WithEncoderCRC(b bool) EOption { + return func(o *encoderOptions) error { o.crc = b; return nil } +} + +// WithEncoderConcurrency will set the concurrency, +// meaning the maximum number of decoders to run concurrently. +// The value supplied must be at least 1. +// By default this will be set to GOMAXPROCS. +func WithEncoderConcurrency(n int) EOption { + return func(o *encoderOptions) error { + if n <= 0 { + return fmt.Errorf("concurrency must be at least 1") + } + o.concurrent = n + return nil + } +} + +// WithWindowSize will set the maximum allowed back-reference distance. +// The value must be a power of two between MinWindowSize and MaxWindowSize. +// A larger value will enable better compression but allocate more memory and, +// for above-default values, take considerably longer. +// The default value is determined by the compression level. +func WithWindowSize(n int) EOption { + return func(o *encoderOptions) error { + switch { + case n < MinWindowSize: + return fmt.Errorf("window size must be at least %d", MinWindowSize) + case n > MaxWindowSize: + return fmt.Errorf("window size must be at most %d", MaxWindowSize) + case (n & (n - 1)) != 0: + return errors.New("window size must be a power of 2") + } + + o.windowSize = n + o.customWindow = true + if o.blockSize > o.windowSize { + o.blockSize = o.windowSize + } + return nil + } +} + +// WithEncoderPadding will add padding to all output so the size will be a multiple of n. +// This can be used to obfuscate the exact output size or make blocks of a certain size. +// The contents will be a skippable frame, so it will be invisible by the decoder. +// n must be > 0 and <= 1GB, 1<<30 bytes. +// The padded area will be filled with data from crypto/rand.Reader. +// If `EncodeAll` is used with data already in the destination, the total size will be multiple of this. +func WithEncoderPadding(n int) EOption { + return func(o *encoderOptions) error { + if n <= 0 { + return fmt.Errorf("padding must be at least 1") + } + // No need to waste our time. + if n == 1 { + o.pad = 0 + } + if n > 1<<30 { + return fmt.Errorf("padding must less than 1GB (1<<30 bytes) ") + } + o.pad = n + return nil + } +} + +// EncoderLevel predefines encoder compression levels. +// Only use the constants made available, since the actual mapping +// of these values are very likely to change and your compression could change +// unpredictably when upgrading the library. +type EncoderLevel int + +const ( + speedNotSet EncoderLevel = iota + + // SpeedFastest will choose the fastest reasonable compression. + // This is roughly equivalent to the fastest Zstandard mode. + SpeedFastest + + // SpeedDefault is the default "pretty fast" compression option. + // This is roughly equivalent to the default Zstandard mode (level 3). + SpeedDefault + + // SpeedBetterCompression will yield better compression than the default. + // Currently it is about zstd level 7-8 with ~ 2x-3x the default CPU usage. + // By using this, notice that CPU usage may go up in the future. + SpeedBetterCompression + + // speedLast should be kept as the last actual compression option. + // The is not for external usage, but is used to keep track of the valid options. + speedLast + + // SpeedBestCompression will choose the best available compression option. + // For now this is not implemented. + SpeedBestCompression = SpeedBetterCompression +) + +// EncoderLevelFromString will convert a string representation of an encoding level back +// to a compression level. The compare is not case sensitive. +// If the string wasn't recognized, (false, SpeedDefault) will be returned. +func EncoderLevelFromString(s string) (bool, EncoderLevel) { + for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ { + if strings.EqualFold(s, l.String()) { + return true, l + } + } + return false, SpeedDefault +} + +// EncoderLevelFromZstd will return an encoder level that closest matches the compression +// ratio of a specific zstd compression level. +// Many input values will provide the same compression level. +func EncoderLevelFromZstd(level int) EncoderLevel { + switch { + case level < 3: + return SpeedFastest + case level >= 3 && level < 6: + return SpeedDefault + case level > 5: + return SpeedBetterCompression + } + return SpeedDefault +} + +// String provides a string representation of the compression level. +func (e EncoderLevel) String() string { + switch e { + case SpeedFastest: + return "fastest" + case SpeedDefault: + return "default" + case SpeedBetterCompression: + return "better" + default: + return "invalid" + } +} + +// WithEncoderLevel specifies a predefined compression level. +func WithEncoderLevel(l EncoderLevel) EOption { + return func(o *encoderOptions) error { + switch { + case l <= speedNotSet || l >= speedLast: + return fmt.Errorf("unknown encoder level") + } + o.level = l + if !o.customWindow { + switch o.level { + case SpeedFastest: + o.windowSize = 4 << 20 + case SpeedDefault: + o.windowSize = 8 << 20 + case SpeedBetterCompression: + o.windowSize = 16 << 20 + } + } + return nil + } +} + +// WithZeroFrames will encode 0 length input as full frames. +// This can be needed for compatibility with zstandard usage, +// but is not needed for this package. +func WithZeroFrames(b bool) EOption { + return func(o *encoderOptions) error { + o.fullZero = b + return nil + } +} + +// WithNoEntropyCompression will always skip entropy compression of literals. +// This can be useful if content has matches, but unlikely to benefit from entropy +// compression. Usually the slight speed improvement is not worth enabling this. +func WithNoEntropyCompression(b bool) EOption { + return func(o *encoderOptions) error { + o.noEntropy = b + return nil + } +} + +// WithSingleSegment will set the "single segment" flag when EncodeAll is used. +// If this flag is set, data must be regenerated within a single continuous memory segment. +// In this case, Window_Descriptor byte is skipped, but Frame_Content_Size is necessarily present. +// As a consequence, the decoder must allocate a memory segment of size equal or larger than size of your content. +// In order to preserve the decoder from unreasonable memory requirements, +// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range. +// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB. +// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations. +// If this is not specified, block encodes will automatically choose this based on the input size. +// This setting has no effect on streamed encodes. +func WithSingleSegment(b bool) EOption { + return func(o *encoderOptions) error { + o.single = &b + return nil + } +} diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go new file mode 100644 index 00000000000..780880ebe44 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/framedec.go @@ -0,0 +1,495 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "bytes" + "encoding/hex" + "errors" + "hash" + "io" + "sync" + + "github.com/klauspost/compress/zstd/internal/xxhash" +) + +type frameDec struct { + o decoderOptions + crc hash.Hash64 + offset int64 + + WindowSize uint64 + + // maxWindowSize is the maximum windows size to support. + // should never be bigger than max-int. + maxWindowSize uint64 + + // In order queue of blocks being decoded. + decoding chan *blockDec + + // Frame history passed between blocks + history history + + rawInput byteBuffer + + // Byte buffer that can be reused for small input blocks. + bBuf byteBuf + + FrameContentSize uint64 + frameDone sync.WaitGroup + + DictionaryID uint32 + HasCheckSum bool + SingleSegment bool + + // asyncRunning indicates whether the async routine processes input on 'decoding'. + asyncRunningMu sync.Mutex + asyncRunning bool +} + +const ( + // The minimum Window_Size is 1 KB. + MinWindowSize = 1 << 10 + MaxWindowSize = 1 << 29 +) + +var ( + frameMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} + skippableFrameMagic = []byte{0x2a, 0x4d, 0x18} +) + +func newFrameDec(o decoderOptions) *frameDec { + d := frameDec{ + o: o, + maxWindowSize: MaxWindowSize, + } + if d.maxWindowSize > o.maxDecodedSize { + d.maxWindowSize = o.maxDecodedSize + } + return &d +} + +// reset will read the frame header and prepare for block decoding. +// If nothing can be read from the input, io.EOF will be returned. +// Any other error indicated that the stream contained data, but +// there was a problem. +func (d *frameDec) reset(br byteBuffer) error { + d.HasCheckSum = false + d.WindowSize = 0 + var b []byte + for { + b = br.readSmall(4) + if b == nil { + return io.EOF + } + if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 { + if debug { + println("Not skippable", hex.EncodeToString(b), hex.EncodeToString(skippableFrameMagic)) + } + // Break if not skippable frame. + break + } + // Read size to skip + b = br.readSmall(4) + if b == nil { + println("Reading Frame Size EOF") + return io.ErrUnexpectedEOF + } + n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + println("Skipping frame with", n, "bytes.") + err := br.skipN(int(n)) + if err != nil { + if debug { + println("Reading discarded frame", err) + } + return err + } + } + if !bytes.Equal(b, frameMagic) { + println("Got magic numbers: ", b, "want:", frameMagic) + return ErrMagicMismatch + } + + // Read Frame_Header_Descriptor + fhd, err := br.readByte() + if err != nil { + println("Reading Frame_Header_Descriptor", err) + return err + } + d.SingleSegment = fhd&(1<<5) != 0 + + if fhd&(1<<3) != 0 { + return errors.New("Reserved bit set on frame header") + } + + // Read Window_Descriptor + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor + d.WindowSize = 0 + if !d.SingleSegment { + wd, err := br.readByte() + if err != nil { + println("Reading Window_Descriptor", err) + return err + } + printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3) + windowLog := 10 + (wd >> 3) + windowBase := uint64(1) << windowLog + windowAdd := (windowBase / 8) * uint64(wd&0x7) + d.WindowSize = windowBase + windowAdd + } + + // Read Dictionary_ID + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id + d.DictionaryID = 0 + if size := fhd & 3; size != 0 { + if size == 3 { + size = 4 + } + b = br.readSmall(int(size)) + if b == nil { + if debug { + println("Reading Dictionary_ID", io.ErrUnexpectedEOF) + } + return io.ErrUnexpectedEOF + } + switch size { + case 1: + d.DictionaryID = uint32(b[0]) + case 2: + d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) + case 4: + d.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + } + if debug { + println("Dict size", size, "ID:", d.DictionaryID) + } + if d.DictionaryID != 0 { + return ErrUnknownDictionary + } + } + + // Read Frame_Content_Size + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size + var fcsSize int + v := fhd >> 6 + switch v { + case 0: + if d.SingleSegment { + fcsSize = 1 + } + default: + fcsSize = 1 << v + } + d.FrameContentSize = 0 + if fcsSize > 0 { + b := br.readSmall(fcsSize) + if b == nil { + println("Reading Frame content", io.ErrUnexpectedEOF) + return io.ErrUnexpectedEOF + } + switch fcsSize { + case 1: + d.FrameContentSize = uint64(b[0]) + case 2: + // When FCS_Field_Size is 2, the offset of 256 is added. + d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256 + case 4: + d.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24) + case 8: + d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24) + d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24) + d.FrameContentSize = uint64(d1) | (uint64(d2) << 32) + } + if debug { + println("field size bits:", v, "fcsSize:", fcsSize, "FrameContentSize:", d.FrameContentSize, hex.EncodeToString(b[:fcsSize]), "singleseg:", d.SingleSegment, "window:", d.WindowSize) + } + } + // Move this to shared. + d.HasCheckSum = fhd&(1<<2) != 0 + if d.HasCheckSum { + if d.crc == nil { + d.crc = xxhash.New() + } + d.crc.Reset() + } + + if d.WindowSize == 0 && d.SingleSegment { + // We may not need window in this case. + d.WindowSize = d.FrameContentSize + if d.WindowSize < MinWindowSize { + d.WindowSize = MinWindowSize + } + } + + if d.WindowSize > d.maxWindowSize { + printf("window size %d > max %d\n", d.WindowSize, d.maxWindowSize) + return ErrWindowSizeExceeded + } + // The minimum Window_Size is 1 KB. + if d.WindowSize < MinWindowSize { + println("got window size: ", d.WindowSize) + return ErrWindowSizeTooSmall + } + d.history.windowSize = int(d.WindowSize) + if d.o.lowMem && d.history.windowSize < maxBlockSize { + d.history.maxSize = d.history.windowSize * 2 + } else { + d.history.maxSize = d.history.windowSize + maxBlockSize + } + // history contains input - maybe we do something + d.rawInput = br + return nil +} + +// next will start decoding the next block from stream. +func (d *frameDec) next(block *blockDec) error { + if debug { + printf("decoding new block %p:%p", block, block.data) + } + err := block.reset(d.rawInput, d.WindowSize) + if err != nil { + println("block error:", err) + // Signal the frame decoder we have a problem. + d.sendErr(block, err) + return err + } + block.input <- struct{}{} + if debug { + println("next block:", block) + } + d.asyncRunningMu.Lock() + defer d.asyncRunningMu.Unlock() + if !d.asyncRunning { + return nil + } + if block.Last { + // We indicate the frame is done by sending io.EOF + d.decoding <- block + return io.EOF + } + d.decoding <- block + return nil +} + +// sendEOF will queue an error block on the frame. +// This will cause the frame decoder to return when it encounters the block. +// Returns true if the decoder was added. +func (d *frameDec) sendErr(block *blockDec, err error) bool { + d.asyncRunningMu.Lock() + defer d.asyncRunningMu.Unlock() + if !d.asyncRunning { + return false + } + + println("sending error", err.Error()) + block.sendErr(err) + d.decoding <- block + return true +} + +// checkCRC will check the checksum if the frame has one. +// Will return ErrCRCMismatch if crc check failed, otherwise nil. +func (d *frameDec) checkCRC() error { + if !d.HasCheckSum { + return nil + } + var tmp [4]byte + got := d.crc.Sum64() + // Flip to match file order. + tmp[0] = byte(got >> 0) + tmp[1] = byte(got >> 8) + tmp[2] = byte(got >> 16) + tmp[3] = byte(got >> 24) + + // We can overwrite upper tmp now + want := d.rawInput.readSmall(4) + if want == nil { + println("CRC missing?") + return io.ErrUnexpectedEOF + } + + if !bytes.Equal(tmp[:], want) { + if debug { + println("CRC Check Failed:", tmp[:], "!=", want) + } + return ErrCRCMismatch + } + if debug { + println("CRC ok", tmp[:]) + } + return nil +} + +func (d *frameDec) initAsync() { + if !d.o.lowMem && !d.SingleSegment { + // set max extra size history to 10MB. + d.history.maxSize = d.history.windowSize + maxBlockSize*5 + } + // re-alloc if more than one extra block size. + if d.o.lowMem && cap(d.history.b) > d.history.maxSize+maxBlockSize { + d.history.b = make([]byte, 0, d.history.maxSize) + } + if cap(d.history.b) < d.history.maxSize { + d.history.b = make([]byte, 0, d.history.maxSize) + } + if cap(d.decoding) < d.o.concurrent { + d.decoding = make(chan *blockDec, d.o.concurrent) + } + if debug { + h := d.history + printf("history init. len: %d, cap: %d", len(h.b), cap(h.b)) + } + d.asyncRunningMu.Lock() + d.asyncRunning = true + d.asyncRunningMu.Unlock() +} + +// startDecoder will start decoding blocks and write them to the writer. +// The decoder will stop as soon as an error occurs or at end of frame. +// When the frame has finished decoding the *bufio.Reader +// containing the remaining input will be sent on frameDec.frameDone. +func (d *frameDec) startDecoder(output chan decodeOutput) { + // TODO: Init to dictionary + d.history.reset() + written := int64(0) + + defer func() { + d.asyncRunningMu.Lock() + d.asyncRunning = false + d.asyncRunningMu.Unlock() + + // Drain the currently decoding. + d.history.error = true + flushdone: + for { + select { + case b := <-d.decoding: + b.history <- &d.history + output <- <-b.result + default: + break flushdone + } + } + println("frame decoder done, signalling done") + d.frameDone.Done() + }() + // Get decoder for first block. + block := <-d.decoding + block.history <- &d.history + for { + var next *blockDec + // Get result + r := <-block.result + if r.err != nil { + println("Result contained error", r.err) + output <- r + return + } + if debug { + println("got result, from ", d.offset, "to", d.offset+int64(len(r.b))) + d.offset += int64(len(r.b)) + } + if !block.Last { + // Send history to next block + select { + case next = <-d.decoding: + if debug { + println("Sending ", len(d.history.b), "bytes as history") + } + next.history <- &d.history + default: + // Wait until we have sent the block, so + // other decoders can potentially get the decoder. + next = nil + } + } + + // Add checksum, async to decoding. + if d.HasCheckSum { + n, err := d.crc.Write(r.b) + if err != nil { + r.err = err + if n != len(r.b) { + r.err = io.ErrShortWrite + } + output <- r + return + } + } + written += int64(len(r.b)) + if d.SingleSegment && uint64(written) > d.FrameContentSize { + println("runDecoder: single segment and", uint64(written), ">", d.FrameContentSize) + r.err = ErrFrameSizeExceeded + output <- r + return + } + if block.Last { + r.err = d.checkCRC() + output <- r + return + } + output <- r + if next == nil { + // There was no decoder available, we wait for one now that we have sent to the writer. + if debug { + println("Sending ", len(d.history.b), " bytes as history") + } + next = <-d.decoding + next.history <- &d.history + } + block = next + } +} + +// runDecoder will create a sync decoder that will decode a block of data. +func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) { + // TODO: Init to dictionary + d.history.reset() + saved := d.history.b + + // We use the history for output to avoid copying it. + d.history.b = dst + // Store input length, so we only check new data. + crcStart := len(dst) + var err error + for { + err = dec.reset(d.rawInput, d.WindowSize) + if err != nil { + break + } + if debug { + println("next block:", dec) + } + err = dec.decodeBuf(&d.history) + if err != nil || dec.Last { + break + } + if uint64(len(d.history.b)) > d.o.maxDecodedSize { + err = ErrDecoderSizeExceeded + break + } + if d.SingleSegment && uint64(len(d.history.b)) > d.o.maxDecodedSize { + println("runDecoder: single segment and", uint64(len(d.history.b)), ">", d.o.maxDecodedSize) + err = ErrFrameSizeExceeded + break + } + } + dst = d.history.b + if err == nil { + if d.HasCheckSum { + var n int + n, err = d.crc.Write(dst[crcStart:]) + if err == nil { + if n != len(dst)-crcStart { + err = io.ErrShortWrite + } else { + err = d.checkCRC() + } + } + } + } + d.history.b = saved + return dst, err +} diff --git a/vendor/github.com/klauspost/compress/zstd/frameenc.go b/vendor/github.com/klauspost/compress/zstd/frameenc.go new file mode 100644 index 00000000000..4479cfe18b2 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/frameenc.go @@ -0,0 +1,115 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "fmt" + "io" + "math" + "math/bits" +) + +type frameHeader struct { + ContentSize uint64 + WindowSize uint32 + SingleSegment bool + Checksum bool + DictID uint32 // Not stored. +} + +const maxHeaderSize = 14 + +func (f frameHeader) appendTo(dst []byte) ([]byte, error) { + dst = append(dst, frameMagic...) + var fhd uint8 + if f.Checksum { + fhd |= 1 << 2 + } + if f.SingleSegment { + fhd |= 1 << 5 + } + var fcs uint8 + if f.ContentSize >= 256 { + fcs++ + } + if f.ContentSize >= 65536+256 { + fcs++ + } + if f.ContentSize >= 0xffffffff { + fcs++ + } + fhd |= fcs << 6 + + dst = append(dst, fhd) + if !f.SingleSegment { + const winLogMin = 10 + windowLog := (bits.Len32(f.WindowSize-1) - winLogMin) << 3 + dst = append(dst, uint8(windowLog)) + } + + switch fcs { + case 0: + if f.SingleSegment { + dst = append(dst, uint8(f.ContentSize)) + } + // Unless SingleSegment is set, framessizes < 256 are nto stored. + case 1: + f.ContentSize -= 256 + dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8)) + case 2: + dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24)) + case 3: + dst = append(dst, uint8(f.ContentSize), uint8(f.ContentSize>>8), uint8(f.ContentSize>>16), uint8(f.ContentSize>>24), + uint8(f.ContentSize>>32), uint8(f.ContentSize>>40), uint8(f.ContentSize>>48), uint8(f.ContentSize>>56)) + default: + panic("invalid fcs") + } + return dst, nil +} + +const skippableFrameHeader = 4 + 4 + +// calcSkippableFrame will return a total size to be added for written +// to be divisible by multiple. +// The value will always be > skippableFrameHeader. +// The function will panic if written < 0 or wantMultiple <= 0. +func calcSkippableFrame(written, wantMultiple int64) int { + if wantMultiple <= 0 { + panic("wantMultiple <= 0") + } + if written < 0 { + panic("written < 0") + } + leftOver := written % wantMultiple + if leftOver == 0 { + return 0 + } + toAdd := wantMultiple - leftOver + for toAdd < skippableFrameHeader { + toAdd += wantMultiple + } + return int(toAdd) +} + +// skippableFrame will add a skippable frame with a total size of bytes. +// total should be >= skippableFrameHeader and < math.MaxUint32. +func skippableFrame(dst []byte, total int, r io.Reader) ([]byte, error) { + if total == 0 { + return dst, nil + } + if total < skippableFrameHeader { + return dst, fmt.Errorf("requested skippable frame (%d) < 8", total) + } + if int64(total) > math.MaxUint32 { + return dst, fmt.Errorf("requested skippable frame (%d) > max uint32", total) + } + dst = append(dst, 0x50, 0x2a, 0x4d, 0x18) + f := uint32(total - skippableFrameHeader) + dst = append(dst, uint8(f), uint8(f>>8), uint8(f>>16), uint8(f>>24)) + start := len(dst) + dst = append(dst, make([]byte, f)...) + _, err := io.ReadFull(r, dst[start:]) + return dst, err +} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_decoder.go b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go new file mode 100644 index 00000000000..e002be98b9b --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_decoder.go @@ -0,0 +1,384 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" +) + +const ( + tablelogAbsoluteMax = 9 +) + +const ( + /*!MEMORY_USAGE : + * Memory usage formula : N->2^N Bytes (examples : 10 -> 1KB; 12 -> 4KB ; 16 -> 64KB; 20 -> 1MB; etc.) + * Increasing memory usage improves compression ratio + * Reduced memory usage can improve speed, due to cache effect + * Recommended max value is 14, for 16KB, which nicely fits into Intel x86 L1 cache */ + maxMemoryUsage = 11 + + maxTableLog = maxMemoryUsage - 2 + maxTablesize = 1 << maxTableLog + maxTableMask = (1 << maxTableLog) - 1 + minTablelog = 5 + maxSymbolValue = 255 +) + +// fseDecoder provides temporary storage for compression and decompression. +type fseDecoder struct { + dt [maxTablesize]decSymbol // Decompression table. + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + maxBits uint8 // Maximum number of additional bits + + // used for table creation to avoid allocations. + stateTable [256]uint16 + norm [maxSymbolValue + 1]int16 + preDefined bool +} + +// tableStep returns the next table index. +func tableStep(tableSize uint32) uint32 { + return (tableSize >> 1) + (tableSize >> 3) + 3 +} + +// readNCount will read the symbol distribution so decoding tables can be constructed. +func (s *fseDecoder) readNCount(b *byteReader, maxSymbol uint16) error { + var ( + charnum uint16 + previous0 bool + ) + if b.remain() < 4 { + return errors.New("input too small") + } + bitStream := b.Uint32() + nbBits := uint((bitStream & 0xF) + minTablelog) // extract tableLog + if nbBits > tablelogAbsoluteMax { + println("Invalid tablelog:", nbBits) + return errors.New("tableLog too large") + } + bitStream >>= 4 + bitCount := uint(4) + + s.actualTableLog = uint8(nbBits) + remaining := int32((1 << nbBits) + 1) + threshold := int32(1 << nbBits) + gotTotal := int32(0) + nbBits++ + + for remaining > 1 && charnum <= maxSymbol { + if previous0 { + //println("prev0") + n0 := charnum + for (bitStream & 0xFFFF) == 0xFFFF { + //println("24 x 0") + n0 += 24 + if r := b.remain(); r > 5 { + b.advance(2) + bitStream = b.Uint32() >> bitCount + } else { + // end of bit stream + bitStream >>= 16 + bitCount += 16 + } + } + //printf("bitstream: %d, 0b%b", bitStream&3, bitStream) + for (bitStream & 3) == 3 { + n0 += 3 + bitStream >>= 2 + bitCount += 2 + } + n0 += uint16(bitStream & 3) + bitCount += 2 + + if n0 > maxSymbolValue { + return errors.New("maxSymbolValue too small") + } + //println("inserting ", n0-charnum, "zeroes from idx", charnum, "ending before", n0) + for charnum < n0 { + s.norm[uint8(charnum)] = 0 + charnum++ + } + + if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + b.advance(bitCount >> 3) + bitCount &= 7 + bitStream = b.Uint32() >> bitCount + } else { + bitStream >>= 2 + } + } + + max := (2*threshold - 1) - remaining + var count int32 + + if int32(bitStream)&(threshold-1) < max { + count = int32(bitStream) & (threshold - 1) + if debugAsserts && nbBits < 1 { + panic("nbBits underflow") + } + bitCount += nbBits - 1 + } else { + count = int32(bitStream) & (2*threshold - 1) + if count >= threshold { + count -= max + } + bitCount += nbBits + } + + // extra accuracy + count-- + if count < 0 { + // -1 means +1 + remaining += count + gotTotal -= count + } else { + remaining -= count + gotTotal += count + } + s.norm[charnum&0xff] = int16(count) + charnum++ + previous0 = count == 0 + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + + //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "remain:", b.remain()) + if r := b.remain(); r >= 7 || r+int(bitCount>>3) >= 4 { + b.advance(bitCount >> 3) + bitCount &= 7 + } else { + bitCount -= (uint)(8 * (len(b.b) - 4 - b.off)) + b.off = len(b.b) - 4 + //println("b.off:", b.off, "len:", len(b.b), "bc:", bitCount, "iend", iend) + } + bitStream = b.Uint32() >> (bitCount & 31) + //printf("bitstream is now: 0b%b", bitStream) + } + s.symbolLen = charnum + if s.symbolLen <= 1 { + return fmt.Errorf("symbolLen (%d) too small", s.symbolLen) + } + if s.symbolLen > maxSymbolValue+1 { + return fmt.Errorf("symbolLen (%d) too big", s.symbolLen) + } + if remaining != 1 { + return fmt.Errorf("corruption detected (remaining %d != 1)", remaining) + } + if bitCount > 32 { + return fmt.Errorf("corruption detected (bitCount %d > 32)", bitCount) + } + if gotTotal != 1<> 3) + // println(s.norm[:s.symbolLen], s.symbolLen) + return s.buildDtable() +} + +// decSymbol contains information about a state entry, +// Including the state offset base, the output symbol and +// the number of bits to read for the low part of the destination state. +// Using a composite uint64 is faster than a struct with separate members. +type decSymbol uint64 + +func newDecSymbol(nbits, addBits uint8, newState uint16, baseline uint32) decSymbol { + return decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) +} + +func (d decSymbol) nbBits() uint8 { + return uint8(d) +} + +func (d decSymbol) addBits() uint8 { + return uint8(d >> 8) +} + +func (d decSymbol) newState() uint16 { + return uint16(d >> 16) +} + +func (d decSymbol) baseline() uint32 { + return uint32(d >> 32) +} + +func (d decSymbol) baselineInt() int { + return int(d >> 32) +} + +func (d *decSymbol) set(nbits, addBits uint8, newState uint16, baseline uint32) { + *d = decSymbol(nbits) | (decSymbol(addBits) << 8) | (decSymbol(newState) << 16) | (decSymbol(baseline) << 32) +} + +func (d *decSymbol) setNBits(nBits uint8) { + const mask = 0xffffffffffffff00 + *d = (*d & mask) | decSymbol(nBits) +} + +func (d *decSymbol) setAddBits(addBits uint8) { + const mask = 0xffffffffffff00ff + *d = (*d & mask) | (decSymbol(addBits) << 8) +} + +func (d *decSymbol) setNewState(state uint16) { + const mask = 0xffffffff0000ffff + *d = (*d & mask) | decSymbol(state)<<16 +} + +func (d *decSymbol) setBaseline(baseline uint32) { + const mask = 0xffffffff + *d = (*d & mask) | decSymbol(baseline)<<32 +} + +func (d *decSymbol) setExt(addBits uint8, baseline uint32) { + const mask = 0xffff00ff + *d = (*d & mask) | (decSymbol(addBits) << 8) | (decSymbol(baseline) << 32) +} + +// decSymbolValue returns the transformed decSymbol for the given symbol. +func decSymbolValue(symb uint8, t []baseOffset) (decSymbol, error) { + if int(symb) >= len(t) { + return 0, fmt.Errorf("rle symbol %d >= max %d", symb, len(t)) + } + lu := t[symb] + return newDecSymbol(0, lu.addBits, 0, lu.baseLine), nil +} + +// setRLE will set the decoder til RLE mode. +func (s *fseDecoder) setRLE(symbol decSymbol) { + s.actualTableLog = 0 + s.maxBits = symbol.addBits() + s.dt[0] = symbol +} + +// buildDtable will build the decoding table. +func (s *fseDecoder) buildDtable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + symbolNext := s.stateTable[:256] + + // Init, lay down lowprob symbols + { + for i, v := range s.norm[:s.symbolLen] { + if v == -1 { + s.dt[highThreshold].setAddBits(uint8(i)) + highThreshold-- + symbolNext[i] = 1 + } else { + symbolNext[i] = uint16(v) + } + } + } + // Spread symbols + { + tableMask := tableSize - 1 + step := tableStep(tableSize) + position := uint32(0) + for ss, v := range s.norm[:s.symbolLen] { + for i := 0; i < int(v); i++ { + s.dt[position].setAddBits(uint8(ss)) + position = (position + step) & tableMask + for position > highThreshold { + // lowprob area + position = (position + step) & tableMask + } + } + } + if position != 0 { + // position must reach all cells once, otherwise normalizedCounter is incorrect + return errors.New("corrupted input (position != 0)") + } + } + + // Build Decoding table + { + tableSize := uint16(1 << s.actualTableLog) + for u, v := range s.dt[:tableSize] { + symbol := v.addBits() + nextState := symbolNext[symbol] + symbolNext[symbol] = nextState + 1 + nBits := s.actualTableLog - byte(highBits(uint32(nextState))) + s.dt[u&maxTableMask].setNBits(nBits) + newState := (nextState << nBits) - tableSize + if newState > tableSize { + return fmt.Errorf("newState (%d) outside table size (%d)", newState, tableSize) + } + if newState == uint16(u) && nBits == 0 { + // Seems weird that this is possible with nbits > 0. + return fmt.Errorf("newState (%d) == oldState (%d) and no bits", newState, u) + } + s.dt[u&maxTableMask].setNewState(newState) + } + } + return nil +} + +// transform will transform the decoder table into a table usable for +// decoding without having to apply the transformation while decoding. +// The state will contain the base value and the number of bits to read. +func (s *fseDecoder) transform(t []baseOffset) error { + tableSize := uint16(1 << s.actualTableLog) + s.maxBits = 0 + for i, v := range s.dt[:tableSize] { + add := v.addBits() + if int(add) >= len(t) { + return fmt.Errorf("invalid decoding table entry %d, symbol %d >= max (%d)", i, v.addBits(), len(t)) + } + lu := t[add] + if lu.addBits > s.maxBits { + s.maxBits = lu.addBits + } + v.setExt(lu.addBits, lu.baseLine) + s.dt[i] = v + } + return nil +} + +type fseState struct { + dt []decSymbol + state decSymbol +} + +// Initialize and decodeAsync first state and symbol. +func (s *fseState) init(br *bitReader, tableLog uint8, dt []decSymbol) { + s.dt = dt + br.fill() + s.state = dt[br.getBits(tableLog)] +} + +// next returns the current symbol and sets the next state. +// At least tablelog bits must be available in the bit reader. +func (s *fseState) next(br *bitReader) { + lowBits := uint16(br.getBits(s.state.nbBits())) + s.state = s.dt[s.state.newState()+lowBits] +} + +// finished returns true if all bits have been read from the bitstream +// and the next state would require reading bits from the input. +func (s *fseState) finished(br *bitReader) bool { + return br.finished() && s.state.nbBits() > 0 +} + +// final returns the current state symbol without decoding the next. +func (s *fseState) final() (int, uint8) { + return s.state.baselineInt(), s.state.addBits() +} + +// final returns the current state symbol without decoding the next. +func (s decSymbol) final() (int, uint8) { + return s.baselineInt(), s.addBits() +} + +// nextFast returns the next symbol and sets the next state. +// This can only be used if no symbols are 0 bits. +// At least tablelog bits must be available in the bit reader. +func (s *fseState) nextFast(br *bitReader) (uint32, uint8) { + lowBits := uint16(br.getBitsFast(s.state.nbBits())) + s.state = s.dt[s.state.newState()+lowBits] + return s.state.baseline(), s.state.addBits() +} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_encoder.go b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go new file mode 100644 index 00000000000..aa9eba88b80 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_encoder.go @@ -0,0 +1,726 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" + "math" +) + +const ( + // For encoding we only support up to + maxEncTableLog = 8 + maxEncTablesize = 1 << maxTableLog + maxEncTableMask = (1 << maxTableLog) - 1 + minEncTablelog = 5 + maxEncSymbolValue = maxMatchLengthSymbol +) + +// Scratch provides temporary storage for compression and decompression. +type fseEncoder struct { + symbolLen uint16 // Length of active part of the symbol table. + actualTableLog uint8 // Selected tablelog. + ct cTable // Compression tables. + maxCount int // count of the most probable symbol + zeroBits bool // no bits has prob > 50%. + clearCount bool // clear count + useRLE bool // This encoder is for RLE + preDefined bool // This encoder is predefined. + reUsed bool // Set to know when the encoder has been reused. + rleVal uint8 // RLE Symbol + maxBits uint8 // Maximum output bits after transform. + + // TODO: Technically zstd should be fine with 64 bytes. + count [256]uint32 + norm [256]int16 +} + +// cTable contains tables used for compression. +type cTable struct { + tableSymbol []byte + stateTable []uint16 + symbolTT []symbolTransform +} + +// symbolTransform contains the state transform for a symbol. +type symbolTransform struct { + deltaNbBits uint32 + deltaFindState int16 + outBits uint8 +} + +// String prints values as a human readable string. +func (s symbolTransform) String() string { + return fmt.Sprintf("{deltabits: %08x, findstate:%d outbits:%d}", s.deltaNbBits, s.deltaFindState, s.outBits) +} + +// Histogram allows to populate the histogram and skip that step in the compression, +// It otherwise allows to inspect the histogram when compression is done. +// To indicate that you have populated the histogram call HistogramFinished +// with the value of the highest populated symbol, as well as the number of entries +// in the most populated entry. These are accepted at face value. +// The returned slice will always be length 256. +func (s *fseEncoder) Histogram() []uint32 { + return s.count[:] +} + +// HistogramFinished can be called to indicate that the histogram has been populated. +// maxSymbol is the index of the highest set symbol of the next data segment. +// maxCount is the number of entries in the most populated entry. +// These are accepted at face value. +func (s *fseEncoder) HistogramFinished(maxSymbol uint8, maxCount int) { + s.maxCount = maxCount + s.symbolLen = uint16(maxSymbol) + 1 + s.clearCount = maxCount != 0 +} + +// prepare will prepare and allocate scratch tables used for both compression and decompression. +func (s *fseEncoder) prepare() (*fseEncoder, error) { + if s == nil { + s = &fseEncoder{} + } + s.useRLE = false + if s.clearCount && s.maxCount == 0 { + for i := range s.count { + s.count[i] = 0 + } + s.clearCount = false + } + return s, nil +} + +// allocCtable will allocate tables needed for compression. +// If existing tables a re big enough, they are simply re-used. +func (s *fseEncoder) allocCtable() { + tableSize := 1 << s.actualTableLog + // get tableSymbol that is big enough. + if cap(s.ct.tableSymbol) < int(tableSize) { + s.ct.tableSymbol = make([]byte, tableSize) + } + s.ct.tableSymbol = s.ct.tableSymbol[:tableSize] + + ctSize := tableSize + if cap(s.ct.stateTable) < ctSize { + s.ct.stateTable = make([]uint16, ctSize) + } + s.ct.stateTable = s.ct.stateTable[:ctSize] + + if cap(s.ct.symbolTT) < 256 { + s.ct.symbolTT = make([]symbolTransform, 256) + } + s.ct.symbolTT = s.ct.symbolTT[:256] +} + +// buildCTable will populate the compression table so it is ready to be used. +func (s *fseEncoder) buildCTable() error { + tableSize := uint32(1 << s.actualTableLog) + highThreshold := tableSize - 1 + var cumul [256]int16 + + s.allocCtable() + tableSymbol := s.ct.tableSymbol[:tableSize] + // symbol start positions + { + cumul[0] = 0 + for ui, v := range s.norm[:s.symbolLen-1] { + u := byte(ui) // one less than reference + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = u + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + } + // Encode last symbol separately to avoid overflowing u + u := int(s.symbolLen - 1) + v := s.norm[s.symbolLen-1] + if v == -1 { + // Low proba symbol + cumul[u+1] = cumul[u] + 1 + tableSymbol[highThreshold] = byte(u) + highThreshold-- + } else { + cumul[u+1] = cumul[u] + v + } + if uint32(cumul[s.symbolLen]) != tableSize { + return fmt.Errorf("internal error: expected cumul[s.symbolLen] (%d) == tableSize (%d)", cumul[s.symbolLen], tableSize) + } + cumul[s.symbolLen] = int16(tableSize) + 1 + } + // Spread symbols + s.zeroBits = false + { + step := tableStep(tableSize) + tableMask := tableSize - 1 + var position uint32 + // if any symbol > largeLimit, we may have 0 bits output. + largeLimit := int16(1 << (s.actualTableLog - 1)) + for ui, v := range s.norm[:s.symbolLen] { + symbol := byte(ui) + if v > largeLimit { + s.zeroBits = true + } + for nbOccurrences := int16(0); nbOccurrences < v; nbOccurrences++ { + tableSymbol[position] = symbol + position = (position + step) & tableMask + for position > highThreshold { + position = (position + step) & tableMask + } /* Low proba area */ + } + } + + // Check if we have gone through all positions + if position != 0 { + return errors.New("position!=0") + } + } + + // Build table + table := s.ct.stateTable + { + tsi := int(tableSize) + for u, v := range tableSymbol { + // TableU16 : sorted by symbol order; gives next state value + table[cumul[v]] = uint16(tsi + u) + cumul[v]++ + } + } + + // Build Symbol Transformation Table + { + total := int16(0) + symbolTT := s.ct.symbolTT[:s.symbolLen] + tableLog := s.actualTableLog + tl := (uint32(tableLog) << 16) - (1 << tableLog) + for i, v := range s.norm[:s.symbolLen] { + switch v { + case 0: + case -1, 1: + symbolTT[i].deltaNbBits = tl + symbolTT[i].deltaFindState = int16(total - 1) + total++ + default: + maxBitsOut := uint32(tableLog) - highBit(uint32(v-1)) + minStatePlus := uint32(v) << maxBitsOut + symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus + symbolTT[i].deltaFindState = int16(total - v) + total += v + } + } + if total != int16(tableSize) { + return fmt.Errorf("total mismatch %d (got) != %d (want)", total, tableSize) + } + } + return nil +} + +var rtbTable = [...]uint32{0, 473195, 504333, 520860, 550000, 700000, 750000, 830000} + +func (s *fseEncoder) setRLE(val byte) { + s.allocCtable() + s.actualTableLog = 0 + s.ct.stateTable = s.ct.stateTable[:1] + s.ct.symbolTT[val] = symbolTransform{ + deltaFindState: 0, + deltaNbBits: 0, + } + if debug { + println("setRLE: val", val, "symbolTT", s.ct.symbolTT[val]) + } + s.rleVal = val + s.useRLE = true +} + +// setBits will set output bits for the transform. +// if nil is provided, the number of bits is equal to the index. +func (s *fseEncoder) setBits(transform []byte) { + if s.reUsed || s.preDefined { + return + } + if s.useRLE { + if transform == nil { + s.ct.symbolTT[s.rleVal].outBits = s.rleVal + s.maxBits = s.rleVal + return + } + s.maxBits = transform[s.rleVal] + s.ct.symbolTT[s.rleVal].outBits = s.maxBits + return + } + if transform == nil { + for i := range s.ct.symbolTT[:s.symbolLen] { + s.ct.symbolTT[i].outBits = uint8(i) + } + s.maxBits = uint8(s.symbolLen - 1) + return + } + s.maxBits = 0 + for i, v := range transform[:s.symbolLen] { + s.ct.symbolTT[i].outBits = v + if v > s.maxBits { + // We could assume bits always going up, but we play safe. + s.maxBits = v + } + } +} + +// normalizeCount will normalize the count of the symbols so +// the total is equal to the table size. +// If successful, compression tables will also be made ready. +func (s *fseEncoder) normalizeCount(length int) error { + if s.reUsed { + return nil + } + s.optimalTableLog(length) + var ( + tableLog = s.actualTableLog + scale = 62 - uint64(tableLog) + step = (1 << 62) / uint64(length) + vStep = uint64(1) << (scale - 20) + stillToDistribute = int16(1 << tableLog) + largest int + largestP int16 + lowThreshold = (uint32)(length >> tableLog) + ) + if s.maxCount == length { + s.useRLE = true + return nil + } + s.useRLE = false + for i, cnt := range s.count[:s.symbolLen] { + // already handled + // if (count[s] == s.length) return 0; /* rle special case */ + + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + stillToDistribute-- + } else { + proba := (int16)((uint64(cnt) * step) >> scale) + if proba < 8 { + restToBeat := vStep * uint64(rtbTable[proba]) + v := uint64(cnt)*step - (uint64(proba) << scale) + if v > restToBeat { + proba++ + } + } + if proba > largestP { + largestP = proba + largest = i + } + s.norm[i] = proba + stillToDistribute -= proba + } + } + + if -stillToDistribute >= (s.norm[largest] >> 1) { + // corner case, need another normalization method + err := s.normalizeCount2(length) + if err != nil { + return err + } + if debugAsserts { + err = s.validateNorm() + if err != nil { + return err + } + } + return s.buildCTable() + } + s.norm[largest] += stillToDistribute + if debugAsserts { + err := s.validateNorm() + if err != nil { + return err + } + } + return s.buildCTable() +} + +// Secondary normalization method. +// To be used when primary method fails. +func (s *fseEncoder) normalizeCount2(length int) error { + const notYetAssigned = -2 + var ( + distributed uint32 + total = uint32(length) + tableLog = s.actualTableLog + lowThreshold = uint32(total >> tableLog) + lowOne = uint32((total * 3) >> (tableLog + 1)) + ) + for i, cnt := range s.count[:s.symbolLen] { + if cnt == 0 { + s.norm[i] = 0 + continue + } + if cnt <= lowThreshold { + s.norm[i] = -1 + distributed++ + total -= cnt + continue + } + if cnt <= lowOne { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + s.norm[i] = notYetAssigned + } + toDistribute := (1 << tableLog) - distributed + + if (total / toDistribute) > lowOne { + // risk of rounding to zero + lowOne = uint32((total * 3) / (toDistribute * 2)) + for i, cnt := range s.count[:s.symbolLen] { + if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) { + s.norm[i] = 1 + distributed++ + total -= cnt + continue + } + } + toDistribute = (1 << tableLog) - distributed + } + if distributed == uint32(s.symbolLen)+1 { + // all values are pretty poor; + // probably incompressible data (should have already been detected); + // find max, then give all remaining points to max + var maxV int + var maxC uint32 + for i, cnt := range s.count[:s.symbolLen] { + if cnt > maxC { + maxV = i + maxC = cnt + } + } + s.norm[maxV] += int16(toDistribute) + return nil + } + + if total == 0 { + // all of the symbols were low enough for the lowOne or lowThreshold + for i := uint32(0); toDistribute > 0; i = (i + 1) % (uint32(s.symbolLen)) { + if s.norm[i] > 0 { + toDistribute-- + s.norm[i]++ + } + } + return nil + } + + var ( + vStepLog = 62 - uint64(tableLog) + mid = uint64((1 << (vStepLog - 1)) - 1) + rStep = (((1 << vStepLog) * uint64(toDistribute)) + mid) / uint64(total) // scale on remaining + tmpTotal = mid + ) + for i, cnt := range s.count[:s.symbolLen] { + if s.norm[i] == notYetAssigned { + var ( + end = tmpTotal + uint64(cnt)*rStep + sStart = uint32(tmpTotal >> vStepLog) + sEnd = uint32(end >> vStepLog) + weight = sEnd - sStart + ) + if weight < 1 { + return errors.New("weight < 1") + } + s.norm[i] = int16(weight) + tmpTotal = end + } + } + return nil +} + +// optimalTableLog calculates and sets the optimal tableLog in s.actualTableLog +func (s *fseEncoder) optimalTableLog(length int) { + tableLog := uint8(maxEncTableLog) + minBitsSrc := highBit(uint32(length)) + 1 + minBitsSymbols := highBit(uint32(s.symbolLen-1)) + 2 + minBits := uint8(minBitsSymbols) + if minBitsSrc < minBitsSymbols { + minBits = uint8(minBitsSrc) + } + + maxBitsSrc := uint8(highBit(uint32(length-1))) - 2 + if maxBitsSrc < tableLog { + // Accuracy can be reduced + tableLog = maxBitsSrc + } + if minBits > tableLog { + tableLog = minBits + } + // Need a minimum to safely represent all symbol values + if tableLog < minEncTablelog { + tableLog = minEncTablelog + } + if tableLog > maxEncTableLog { + tableLog = maxEncTableLog + } + s.actualTableLog = tableLog +} + +// validateNorm validates the normalized histogram table. +func (s *fseEncoder) validateNorm() (err error) { + var total int + for _, v := range s.norm[:s.symbolLen] { + if v >= 0 { + total += int(v) + } else { + total -= int(v) + } + } + defer func() { + if err == nil { + return + } + fmt.Printf("selected TableLog: %d, Symbol length: %d\n", s.actualTableLog, s.symbolLen) + for i, v := range s.norm[:s.symbolLen] { + fmt.Printf("%3d: %5d -> %4d \n", i, s.count[i], v) + } + }() + if total != (1 << s.actualTableLog) { + return fmt.Errorf("warning: Total == %d != %d", total, 1<> 3) + 3 + 2 + + // Write Table Size + bitStream = uint32(tableLog - minEncTablelog) + bitCount = uint(4) + remaining = int16(tableSize + 1) /* +1 for extra accuracy */ + threshold = int16(tableSize) + nbBits = uint(tableLog + 1) + outP = len(out) + ) + if cap(out) < outP+maxHeaderSize { + out = append(out, make([]byte, maxHeaderSize*3)...) + out = out[:len(out)-maxHeaderSize*3] + } + out = out[:outP+maxHeaderSize] + + // stops at 1 + for remaining > 1 { + if previous0 { + start := charnum + for s.norm[charnum] == 0 { + charnum++ + } + for charnum >= start+24 { + start += 24 + bitStream += uint32(0xFFFF) << bitCount + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + } + for charnum >= start+3 { + start += 3 + bitStream += 3 << bitCount + bitCount += 2 + } + bitStream += uint32(charnum-start) << bitCount + bitCount += 2 + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + count := s.norm[charnum] + charnum++ + max := (2*threshold - 1) - remaining + if count < 0 { + remaining += count + } else { + remaining -= count + } + count++ // +1 for extra accuracy + if count >= threshold { + count += max // [0..max[ [max..threshold[ (...) [threshold+max 2*threshold[ + } + bitStream += uint32(count) << bitCount + bitCount += nbBits + if count < max { + bitCount-- + } + + previous0 = count == 1 + if remaining < 1 { + return nil, errors.New("internal error: remaining < 1") + } + for remaining < threshold { + nbBits-- + threshold >>= 1 + } + + if bitCount > 16 { + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += 2 + bitStream >>= 16 + bitCount -= 16 + } + } + + if outP+2 > len(out) { + return nil, fmt.Errorf("internal error: %d > %d, maxheader: %d, sl: %d, tl: %d, normcount: %v", outP+2, len(out), maxHeaderSize, s.symbolLen, int(tableLog), s.norm[:s.symbolLen]) + } + out[outP] = byte(bitStream) + out[outP+1] = byte(bitStream >> 8) + outP += int((bitCount + 7) / 8) + + if charnum > s.symbolLen { + return nil, errors.New("internal error: charnum > s.symbolLen") + } + return out[:outP], nil +} + +// Approximate symbol cost, as fractional value, using fixed-point format (accuracyLog fractional bits) +// note 1 : assume symbolValue is valid (<= maxSymbolValue) +// note 2 : if freq[symbolValue]==0, @return a fake cost of tableLog+1 bits * +func (s *fseEncoder) bitCost(symbolValue uint8, accuracyLog uint32) uint32 { + minNbBits := s.ct.symbolTT[symbolValue].deltaNbBits >> 16 + threshold := (minNbBits + 1) << 16 + if debugAsserts { + if !(s.actualTableLog < 16) { + panic("!s.actualTableLog < 16") + } + // ensure enough room for renormalization double shift + if !(uint8(accuracyLog) < 31-s.actualTableLog) { + panic("!uint8(accuracyLog) < 31-s.actualTableLog") + } + } + tableSize := uint32(1) << s.actualTableLog + deltaFromThreshold := threshold - (s.ct.symbolTT[symbolValue].deltaNbBits + tableSize) + // linear interpolation (very approximate) + normalizedDeltaFromThreshold := (deltaFromThreshold << accuracyLog) >> s.actualTableLog + bitMultiplier := uint32(1) << accuracyLog + if debugAsserts { + if s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold { + panic("s.ct.symbolTT[symbolValue].deltaNbBits+tableSize > threshold") + } + if normalizedDeltaFromThreshold > bitMultiplier { + panic("normalizedDeltaFromThreshold > bitMultiplier") + } + } + return (minNbBits+1)*bitMultiplier - normalizedDeltaFromThreshold +} + +// Returns the cost in bits of encoding the distribution in count using ctable. +// Histogram should only be up to the last non-zero symbol. +// Returns an -1 if ctable cannot represent all the symbols in count. +func (s *fseEncoder) approxSize(hist []uint32) uint32 { + if int(s.symbolLen) < len(hist) { + // More symbols than we have. + return math.MaxUint32 + } + if s.useRLE { + // We will never reuse RLE encoders. + return math.MaxUint32 + } + const kAccuracyLog = 8 + badCost := (uint32(s.actualTableLog) + 1) << kAccuracyLog + var cost uint32 + for i, v := range hist { + if v == 0 { + continue + } + if s.norm[i] == 0 { + return math.MaxUint32 + } + bitCost := s.bitCost(uint8(i), kAccuracyLog) + if bitCost > badCost { + return math.MaxUint32 + } + cost += v * bitCost + } + return cost >> kAccuracyLog +} + +// maxHeaderSize returns the maximum header size in bits. +// This is not exact size, but we want a penalty for new tables anyway. +func (s *fseEncoder) maxHeaderSize() uint32 { + if s.preDefined { + return 0 + } + if s.useRLE { + return 8 + } + return (((uint32(s.symbolLen) * uint32(s.actualTableLog)) >> 3) + 3) * 8 +} + +// cState contains the compression state of a stream. +type cState struct { + bw *bitWriter + stateTable []uint16 + state uint16 +} + +// init will initialize the compression state to the first symbol of the stream. +func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) { + c.bw = bw + c.stateTable = ct.stateTable + if len(c.stateTable) == 1 { + // RLE + c.stateTable[0] = uint16(0) + c.state = 0 + return + } + nbBitsOut := (first.deltaNbBits + (1 << 15)) >> 16 + im := int32((nbBitsOut << 16) - first.deltaNbBits) + lu := (im >> nbBitsOut) + int32(first.deltaFindState) + c.state = c.stateTable[lu] + return +} + +// encode the output symbol provided and write it to the bitstream. +func (c *cState) encode(symbolTT symbolTransform) { + nbBitsOut := (uint32(c.state) + symbolTT.deltaNbBits) >> 16 + dstState := int32(c.state>>(nbBitsOut&15)) + int32(symbolTT.deltaFindState) + c.bw.addBits16NC(c.state, uint8(nbBitsOut)) + c.state = c.stateTable[dstState] +} + +// flush will write the tablelog to the output and flush the remaining full bytes. +func (c *cState) flush(tableLog uint8) { + c.bw.flush32() + c.bw.addBits16NC(c.state, tableLog) +} diff --git a/vendor/github.com/klauspost/compress/zstd/fse_predefined.go b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go new file mode 100644 index 00000000000..6c17dc17f4f --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/fse_predefined.go @@ -0,0 +1,158 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "fmt" + "math" + "sync" +) + +var ( + // fsePredef are the predefined fse tables as defined here: + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions + // These values are already transformed. + fsePredef [3]fseDecoder + + // fsePredefEnc are the predefined encoder based on fse tables as defined here: + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions + // These values are already transformed. + fsePredefEnc [3]fseEncoder + + // symbolTableX contain the transformations needed for each type as defined in + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets + symbolTableX [3][]baseOffset + + // maxTableSymbol is the biggest supported symbol for each table type + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#the-codes-for-literals-lengths-match-lengths-and-offsets + maxTableSymbol = [3]uint8{tableLiteralLengths: maxLiteralLengthSymbol, tableOffsets: maxOffsetLengthSymbol, tableMatchLengths: maxMatchLengthSymbol} + + // bitTables is the bits table for each table. + bitTables = [3][]byte{tableLiteralLengths: llBitsTable[:], tableOffsets: nil, tableMatchLengths: mlBitsTable[:]} +) + +type tableIndex uint8 + +const ( + // indexes for fsePredef and symbolTableX + tableLiteralLengths tableIndex = 0 + tableOffsets tableIndex = 1 + tableMatchLengths tableIndex = 2 + + maxLiteralLengthSymbol = 35 + maxOffsetLengthSymbol = 30 + maxMatchLengthSymbol = 52 +) + +// baseOffset is used for calculating transformations. +type baseOffset struct { + baseLine uint32 + addBits uint8 +} + +// fillBase will precalculate base offsets with the given bit distributions. +func fillBase(dst []baseOffset, base uint32, bits ...uint8) { + if len(bits) != len(dst) { + panic(fmt.Sprintf("len(dst) (%d) != len(bits) (%d)", len(dst), len(bits))) + } + for i, bit := range bits { + if base > math.MaxInt32 { + panic(fmt.Sprintf("invalid decoding table, base overflows int32")) + } + + dst[i] = baseOffset{ + baseLine: base, + addBits: bit, + } + base += 1 << bit + } +} + +var predef sync.Once + +func initPredefined() { + predef.Do(func() { + // Literals length codes + tmp := make([]baseOffset, 36) + for i := range tmp[:16] { + tmp[i] = baseOffset{ + baseLine: uint32(i), + addBits: 0, + } + } + fillBase(tmp[16:], 16, 1, 1, 1, 1, 2, 2, 3, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + symbolTableX[tableLiteralLengths] = tmp + + // Match length codes + tmp = make([]baseOffset, 53) + for i := range tmp[:32] { + tmp[i] = baseOffset{ + // The transformation adds the 3 length. + baseLine: uint32(i) + 3, + addBits: 0, + } + } + fillBase(tmp[32:], 35, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16) + symbolTableX[tableMatchLengths] = tmp + + // Offset codes + tmp = make([]baseOffset, maxOffsetBits+1) + tmp[1] = baseOffset{ + baseLine: 1, + addBits: 1, + } + fillBase(tmp[2:], 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30) + symbolTableX[tableOffsets] = tmp + + // Fill predefined tables and transform them. + // https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#default-distributions + for i := range fsePredef[:] { + f := &fsePredef[i] + switch tableIndex(i) { + case tableLiteralLengths: + // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L243 + f.actualTableLog = 6 + copy(f.norm[:], []int16{4, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 1, 1, 1, 1, 1, + -1, -1, -1, -1}) + f.symbolLen = 36 + case tableOffsets: + // https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L281 + f.actualTableLog = 5 + copy(f.norm[:], []int16{ + 1, 1, 1, 1, 1, 1, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1, -1}) + f.symbolLen = 29 + case tableMatchLengths: + //https://github.com/facebook/zstd/blob/ededcfca57366461021c922720878c81a5854a0a/lib/decompress/zstd_decompress_block.c#L304 + f.actualTableLog = 6 + copy(f.norm[:], []int16{ + 1, 4, 3, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, + -1, -1, -1, -1, -1}) + f.symbolLen = 53 + } + if err := f.buildDtable(); err != nil { + panic(fmt.Errorf("building table %v: %v", tableIndex(i), err)) + } + if err := f.transform(symbolTableX[i]); err != nil { + panic(fmt.Errorf("building table %v: %v", tableIndex(i), err)) + } + f.preDefined = true + + // Create encoder as well + enc := &fsePredefEnc[i] + copy(enc.norm[:], f.norm[:]) + enc.symbolLen = f.symbolLen + enc.actualTableLog = f.actualTableLog + if err := enc.buildCTable(); err != nil { + panic(fmt.Errorf("building encoding table %v: %v", tableIndex(i), err)) + } + enc.setBits(bitTables[i]) + enc.preDefined = true + } + }) +} diff --git a/vendor/github.com/klauspost/compress/zstd/hash.go b/vendor/github.com/klauspost/compress/zstd/hash.go new file mode 100644 index 00000000000..4a752067fc9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/hash.go @@ -0,0 +1,77 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +const ( + prime3bytes = 506832829 + prime4bytes = 2654435761 + prime5bytes = 889523592379 + prime6bytes = 227718039650203 + prime7bytes = 58295818150454627 + prime8bytes = 0xcf1bbcdcb7a56463 +) + +// hashLen returns a hash of the lowest l bytes of u for a size size of h bytes. +// l must be >=4 and <=8. Any other value will return hash for 4 bytes. +// h should always be <32. +// Preferably h and l should be a constant. +// FIXME: This does NOT get resolved, if 'mls' is constant, +// so this cannot be used. +func hashLen(u uint64, hashLog, mls uint8) uint32 { + switch mls { + case 5: + return hash5(u, hashLog) + case 6: + return hash6(u, hashLog) + case 7: + return hash7(u, hashLog) + case 8: + return hash8(u, hashLog) + default: + return hash4x64(u, hashLog) + } +} + +// hash3 returns the hash of the lower 3 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash3(u uint32, h uint8) uint32 { + return ((u << (32 - 24)) * prime3bytes) >> ((32 - h) & 31) +} + +// hash4 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4(u uint32, h uint8) uint32 { + return (u * prime4bytes) >> ((32 - h) & 31) +} + +// hash4x64 returns the hash of the lowest 4 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <32. +func hash4x64(u uint64, h uint8) uint32 { + return (uint32(u) * prime4bytes) >> ((32 - h) & 31) +} + +// hash5 returns the hash of the lowest 5 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash5(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 40)) * prime5bytes) >> ((64 - h) & 63)) +} + +// hash6 returns the hash of the lowest 6 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash6(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 48)) * prime6bytes) >> ((64 - h) & 63)) +} + +// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash7(u uint64, h uint8) uint32 { + return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & 63)) +} + +// hash8 returns the hash of u to fit in a hash table with h bits. +// Preferably h should be a constant and should always be <64. +func hash8(u uint64, h uint8) uint32 { + return uint32((u * prime8bytes) >> ((64 - h) & 63)) +} diff --git a/vendor/github.com/klauspost/compress/zstd/history.go b/vendor/github.com/klauspost/compress/zstd/history.go new file mode 100644 index 00000000000..e8c419bd533 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/history.go @@ -0,0 +1,73 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "github.com/klauspost/compress/huff0" +) + +// history contains the information transferred between blocks. +type history struct { + b []byte + huffTree *huff0.Scratch + recentOffsets [3]int + decoders sequenceDecs + windowSize int + maxSize int + error bool +} + +// reset will reset the history to initial state of a frame. +// The history must already have been initialized to the desired size. +func (h *history) reset() { + h.b = h.b[:0] + h.error = false + h.recentOffsets = [3]int{1, 4, 8} + if f := h.decoders.litLengths.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + } + if f := h.decoders.offsets.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + } + if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined { + fseDecoderPool.Put(f) + } + h.decoders = sequenceDecs{} + if h.huffTree != nil { + huffDecoderPool.Put(h.huffTree) + } + h.huffTree = nil + //printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b)) +} + +// append bytes to history. +// This function will make sure there is space for it, +// if the buffer has been allocated with enough extra space. +func (h *history) append(b []byte) { + if len(b) >= h.windowSize { + // Discard all history by simply overwriting + h.b = h.b[:h.windowSize] + copy(h.b, b[len(b)-h.windowSize:]) + return + } + + // If there is space, append it. + if len(b) < cap(h.b)-len(h.b) { + h.b = append(h.b, b...) + return + } + + // Move data down so we only have window size left. + // We know we have less than window size in b at this point. + discard := len(b) + len(h.b) - h.windowSize + copy(h.b, h.b[discard:]) + h.b = h.b[:h.windowSize] + copy(h.b[h.windowSize-len(b):], b) +} + +// append bytes to history without ever discarding anything. +func (h *history) appendKeep(b []byte) { + h.b = append(h.b, b...) +} diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt new file mode 100644 index 00000000000..24b53065f40 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/LICENSE.txt @@ -0,0 +1,22 @@ +Copyright (c) 2016 Caleb Spare + +MIT License + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md new file mode 100644 index 00000000000..69aa3bb587c --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/README.md @@ -0,0 +1,58 @@ +# xxhash + +VENDORED: Go to [github.com/cespare/xxhash](https://github.com/cespare/xxhash) for original package. + + +[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) +[![Build Status](https://travis-ci.org/cespare/xxhash.svg?branch=master)](https://travis-ci.org/cespare/xxhash) + +xxhash is a Go implementation of the 64-bit +[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a +high-quality hashing algorithm that is much faster than anything in the Go +standard library. + +This package provides a straightforward API: + +``` +func Sum64(b []byte) uint64 +func Sum64String(s string) uint64 +type Digest struct{ ... } + func New() *Digest +``` + +The `Digest` type implements hash.Hash64. Its key methods are: + +``` +func (*Digest) Write([]byte) (int, error) +func (*Digest) WriteString(string) (int, error) +func (*Digest) Sum64() uint64 +``` + +This implementation provides a fast pure-Go implementation and an even faster +assembly implementation for amd64. + +## Benchmarks + +Here are some quick benchmarks comparing the pure-Go and assembly +implementations of Sum64. + +| input size | purego | asm | +| --- | --- | --- | +| 5 B | 979.66 MB/s | 1291.17 MB/s | +| 100 B | 7475.26 MB/s | 7973.40 MB/s | +| 4 KB | 17573.46 MB/s | 17602.65 MB/s | +| 10 MB | 17131.46 MB/s | 17142.16 MB/s | + +These numbers were generated on Ubuntu 18.04 with an Intel i7-8700K CPU using +the following commands under Go 1.11.2: + +``` +$ go test -tags purego -benchtime 10s -bench '/xxhash,direct,bytes' +$ go test -benchtime 10s -bench '/xxhash,direct,bytes' +``` + +## Projects using this package + +- [InfluxDB](https://github.com/influxdata/influxdb) +- [Prometheus](https://github.com/prometheus/prometheus) +- [FreeCache](https://github.com/coocood/freecache) diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go new file mode 100644 index 00000000000..426b9cac786 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash.go @@ -0,0 +1,238 @@ +// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described +// at http://cyan4973.github.io/xxHash/. +// THIS IS VENDORED: Go to github.com/cespare/xxhash for original package. + +package xxhash + +import ( + "encoding/binary" + "errors" + "math/bits" +) + +const ( + prime1 uint64 = 11400714785074694791 + prime2 uint64 = 14029467366897019727 + prime3 uint64 = 1609587929392839161 + prime4 uint64 = 9650029242287828579 + prime5 uint64 = 2870177450012600261 +) + +// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where +// possible in the Go code is worth a small (but measurable) performance boost +// by avoiding some MOVQs. Vars are needed for the asm and also are useful for +// convenience in the Go code in a few places where we need to intentionally +// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the +// result overflows a uint64). +var ( + prime1v = prime1 + prime2v = prime2 + prime3v = prime3 + prime4v = prime4 + prime5v = prime5 +) + +// Digest implements hash.Hash64. +type Digest struct { + v1 uint64 + v2 uint64 + v3 uint64 + v4 uint64 + total uint64 + mem [32]byte + n int // how much of mem is used +} + +// New creates a new Digest that computes the 64-bit xxHash algorithm. +func New() *Digest { + var d Digest + d.Reset() + return &d +} + +// Reset clears the Digest's state so that it can be reused. +func (d *Digest) Reset() { + d.v1 = prime1v + prime2 + d.v2 = prime2 + d.v3 = 0 + d.v4 = -prime1v + d.total = 0 + d.n = 0 +} + +// Size always returns 8 bytes. +func (d *Digest) Size() int { return 8 } + +// BlockSize always returns 32 bytes. +func (d *Digest) BlockSize() int { return 32 } + +// Write adds more data to d. It always returns len(b), nil. +func (d *Digest) Write(b []byte) (n int, err error) { + n = len(b) + d.total += uint64(n) + + if d.n+n < 32 { + // This new data doesn't even fill the current block. + copy(d.mem[d.n:], b) + d.n += n + return + } + + if d.n > 0 { + // Finish off the partial block. + copy(d.mem[d.n:], b) + d.v1 = round(d.v1, u64(d.mem[0:8])) + d.v2 = round(d.v2, u64(d.mem[8:16])) + d.v3 = round(d.v3, u64(d.mem[16:24])) + d.v4 = round(d.v4, u64(d.mem[24:32])) + b = b[32-d.n:] + d.n = 0 + } + + if len(b) >= 32 { + // One or more full blocks left. + nw := writeBlocks(d, b) + b = b[nw:] + } + + // Store any remaining partial block. + copy(d.mem[:], b) + d.n = len(b) + + return +} + +// Sum appends the current hash to b and returns the resulting slice. +func (d *Digest) Sum(b []byte) []byte { + s := d.Sum64() + return append( + b, + byte(s>>56), + byte(s>>48), + byte(s>>40), + byte(s>>32), + byte(s>>24), + byte(s>>16), + byte(s>>8), + byte(s), + ) +} + +// Sum64 returns the current hash. +func (d *Digest) Sum64() uint64 { + var h uint64 + + if d.total >= 32 { + v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = d.v3 + prime5 + } + + h += d.total + + i, end := 0, d.n + for ; i+8 <= end; i += 8 { + k1 := round(0, u64(d.mem[i:i+8])) + h ^= k1 + h = rol27(h)*prime1 + prime4 + } + if i+4 <= end { + h ^= uint64(u32(d.mem[i:i+4])) * prime1 + h = rol23(h)*prime2 + prime3 + i += 4 + } + for i < end { + h ^= uint64(d.mem[i]) * prime5 + h = rol11(h) * prime1 + i++ + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return h +} + +const ( + magic = "xxh\x06" + marshaledSize = len(magic) + 8*5 + 32 +) + +// MarshalBinary implements the encoding.BinaryMarshaler interface. +func (d *Digest) MarshalBinary() ([]byte, error) { + b := make([]byte, 0, marshaledSize) + b = append(b, magic...) + b = appendUint64(b, d.v1) + b = appendUint64(b, d.v2) + b = appendUint64(b, d.v3) + b = appendUint64(b, d.v4) + b = appendUint64(b, d.total) + b = append(b, d.mem[:d.n]...) + b = b[:len(b)+len(d.mem)-d.n] + return b, nil +} + +// UnmarshalBinary implements the encoding.BinaryUnmarshaler interface. +func (d *Digest) UnmarshalBinary(b []byte) error { + if len(b) < len(magic) || string(b[:len(magic)]) != magic { + return errors.New("xxhash: invalid hash state identifier") + } + if len(b) != marshaledSize { + return errors.New("xxhash: invalid hash state size") + } + b = b[len(magic):] + b, d.v1 = consumeUint64(b) + b, d.v2 = consumeUint64(b) + b, d.v3 = consumeUint64(b) + b, d.v4 = consumeUint64(b) + b, d.total = consumeUint64(b) + copy(d.mem[:], b) + b = b[len(d.mem):] + d.n = int(d.total % uint64(len(d.mem))) + return nil +} + +func appendUint64(b []byte, x uint64) []byte { + var a [8]byte + binary.LittleEndian.PutUint64(a[:], x) + return append(b, a[:]...) +} + +func consumeUint64(b []byte) ([]byte, uint64) { + x := u64(b) + return b[8:], x +} + +func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } +func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } + +func round(acc, input uint64) uint64 { + acc += input * prime2 + acc = rol31(acc) + acc *= prime1 + return acc +} + +func mergeRound(acc, val uint64) uint64 { + val = round(0, val) + acc ^= val + acc = acc*prime1 + prime4 + return acc +} + +func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } +func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } +func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } +func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } +func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } +func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } +func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } +func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go new file mode 100644 index 00000000000..35318d7c46c --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.go @@ -0,0 +1,13 @@ +// +build !appengine +// +build gc +// +build !purego + +package xxhash + +// Sum64 computes the 64-bit xxHash digest of b. +// +//go:noescape +func Sum64(b []byte) uint64 + +//go:noescape +func writeBlocks(*Digest, []byte) int diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s new file mode 100644 index 00000000000..2c9c5357a14 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_amd64.s @@ -0,0 +1,215 @@ +// +build !appengine +// +build gc +// +build !purego + +#include "textflag.h" + +// Register allocation: +// AX h +// CX pointer to advance through b +// DX n +// BX loop end +// R8 v1, k1 +// R9 v2 +// R10 v3 +// R11 v4 +// R12 tmp +// R13 prime1v +// R14 prime2v +// R15 prime4v + +// round reads from and advances the buffer pointer in CX. +// It assumes that R13 has prime1v and R14 has prime2v. +#define round(r) \ + MOVQ (CX), R12 \ + ADDQ $8, CX \ + IMULQ R14, R12 \ + ADDQ R12, r \ + ROLQ $31, r \ + IMULQ R13, r + +// mergeRound applies a merge round on the two registers acc and val. +// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. +#define mergeRound(acc, val) \ + IMULQ R14, val \ + ROLQ $31, val \ + IMULQ R13, val \ + XORQ val, acc \ + IMULQ R13, acc \ + ADDQ R15, acc + +// func Sum64(b []byte) uint64 +TEXT ·Sum64(SB), NOSPLIT, $0-32 + // Load fixed primes. + MOVQ ·prime1v(SB), R13 + MOVQ ·prime2v(SB), R14 + MOVQ ·prime4v(SB), R15 + + // Load slice. + MOVQ b_base+0(FP), CX + MOVQ b_len+8(FP), DX + LEAQ (CX)(DX*1), BX + + // The first loop limit will be len(b)-32. + SUBQ $32, BX + + // Check whether we have at least one block. + CMPQ DX, $32 + JLT noBlocks + + // Set up initial state (v1, v2, v3, v4). + MOVQ R13, R8 + ADDQ R14, R8 + MOVQ R14, R9 + XORQ R10, R10 + XORQ R11, R11 + SUBQ R13, R11 + + // Loop until CX > BX. +blockLoop: + round(R8) + round(R9) + round(R10) + round(R11) + + CMPQ CX, BX + JLE blockLoop + + MOVQ R8, AX + ROLQ $1, AX + MOVQ R9, R12 + ROLQ $7, R12 + ADDQ R12, AX + MOVQ R10, R12 + ROLQ $12, R12 + ADDQ R12, AX + MOVQ R11, R12 + ROLQ $18, R12 + ADDQ R12, AX + + mergeRound(AX, R8) + mergeRound(AX, R9) + mergeRound(AX, R10) + mergeRound(AX, R11) + + JMP afterBlocks + +noBlocks: + MOVQ ·prime5v(SB), AX + +afterBlocks: + ADDQ DX, AX + + // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. + ADDQ $24, BX + + CMPQ CX, BX + JG fourByte + +wordLoop: + // Calculate k1. + MOVQ (CX), R8 + ADDQ $8, CX + IMULQ R14, R8 + ROLQ $31, R8 + IMULQ R13, R8 + + XORQ R8, AX + ROLQ $27, AX + IMULQ R13, AX + ADDQ R15, AX + + CMPQ CX, BX + JLE wordLoop + +fourByte: + ADDQ $4, BX + CMPQ CX, BX + JG singles + + MOVL (CX), R8 + ADDQ $4, CX + IMULQ R13, R8 + XORQ R8, AX + + ROLQ $23, AX + IMULQ R14, AX + ADDQ ·prime3v(SB), AX + +singles: + ADDQ $4, BX + CMPQ CX, BX + JGE finalize + +singlesLoop: + MOVBQZX (CX), R12 + ADDQ $1, CX + IMULQ ·prime5v(SB), R12 + XORQ R12, AX + + ROLQ $11, AX + IMULQ R13, AX + + CMPQ CX, BX + JL singlesLoop + +finalize: + MOVQ AX, R12 + SHRQ $33, R12 + XORQ R12, AX + IMULQ R14, AX + MOVQ AX, R12 + SHRQ $29, R12 + XORQ R12, AX + IMULQ ·prime3v(SB), AX + MOVQ AX, R12 + SHRQ $32, R12 + XORQ R12, AX + + MOVQ AX, ret+24(FP) + RET + +// writeBlocks uses the same registers as above except that it uses AX to store +// the d pointer. + +// func writeBlocks(d *Digest, b []byte) int +TEXT ·writeBlocks(SB), NOSPLIT, $0-40 + // Load fixed primes needed for round. + MOVQ ·prime1v(SB), R13 + MOVQ ·prime2v(SB), R14 + + // Load slice. + MOVQ arg1_base+8(FP), CX + MOVQ arg1_len+16(FP), DX + LEAQ (CX)(DX*1), BX + SUBQ $32, BX + + // Load vN from d. + MOVQ arg+0(FP), AX + MOVQ 0(AX), R8 // v1 + MOVQ 8(AX), R9 // v2 + MOVQ 16(AX), R10 // v3 + MOVQ 24(AX), R11 // v4 + + // We don't need to check the loop condition here; this function is + // always called with at least one block of data to process. +blockLoop: + round(R8) + round(R9) + round(R10) + round(R11) + + CMPQ CX, BX + JLE blockLoop + + // Copy vN back to d. + MOVQ R8, 0(AX) + MOVQ R9, 8(AX) + MOVQ R10, 16(AX) + MOVQ R11, 24(AX) + + // The number of bytes written is CX minus the old base pointer. + SUBQ arg1_base+8(FP), CX + MOVQ CX, ret+32(FP) + + RET diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go new file mode 100644 index 00000000000..4a5a821603e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_other.go @@ -0,0 +1,76 @@ +// +build !amd64 appengine !gc purego + +package xxhash + +// Sum64 computes the 64-bit xxHash digest of b. +func Sum64(b []byte) uint64 { + // A simpler version would be + // d := New() + // d.Write(b) + // return d.Sum64() + // but this is faster, particularly for small inputs. + + n := len(b) + var h uint64 + + if n >= 32 { + v1 := prime1v + prime2 + v2 := prime2 + v3 := uint64(0) + v4 := -prime1v + for len(b) >= 32 { + v1 = round(v1, u64(b[0:8:len(b)])) + v2 = round(v2, u64(b[8:16:len(b)])) + v3 = round(v3, u64(b[16:24:len(b)])) + v4 = round(v4, u64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] + } + h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + h = mergeRound(h, v1) + h = mergeRound(h, v2) + h = mergeRound(h, v3) + h = mergeRound(h, v4) + } else { + h = prime5 + } + + h += uint64(n) + + i, end := 0, len(b) + for ; i+8 <= end; i += 8 { + k1 := round(0, u64(b[i:i+8:len(b)])) + h ^= k1 + h = rol27(h)*prime1 + prime4 + } + if i+4 <= end { + h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 + h = rol23(h)*prime2 + prime3 + i += 4 + } + for ; i < end; i++ { + h ^= uint64(b[i]) * prime5 + h = rol11(h) * prime1 + } + + h ^= h >> 33 + h *= prime2 + h ^= h >> 29 + h *= prime3 + h ^= h >> 32 + + return h +} + +func writeBlocks(d *Digest, b []byte) int { + v1, v2, v3, v4 := d.v1, d.v2, d.v3, d.v4 + n := len(b) + for len(b) >= 32 { + v1 = round(v1, u64(b[0:8:len(b)])) + v2 = round(v2, u64(b[8:16:len(b)])) + v3 = round(v3, u64(b[16:24:len(b)])) + v4 = round(v4, u64(b[24:32:len(b)])) + b = b[32:len(b):len(b)] + } + d.v1, d.v2, d.v3, d.v4 = v1, v2, v3, v4 + return n - len(b) +} diff --git a/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go new file mode 100644 index 00000000000..6f3b0cb1026 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/internal/xxhash/xxhash_safe.go @@ -0,0 +1,11 @@ +package xxhash + +// Sum64String computes the 64-bit xxHash digest of s. +func Sum64String(s string) uint64 { + return Sum64([]byte(s)) +} + +// WriteString adds more data to d. It always returns len(s), nil. +func (d *Digest) WriteString(s string) (n int, err error) { + return d.Write([]byte(s)) +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec.go b/vendor/github.com/klauspost/compress/zstd/seqdec.go new file mode 100644 index 00000000000..15a45f7b501 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqdec.go @@ -0,0 +1,402 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "errors" + "fmt" + "io" +) + +type seq struct { + litLen uint32 + matchLen uint32 + offset uint32 + + // Codes are stored here for the encoder + // so they only have to be looked up once. + llCode, mlCode, ofCode uint8 +} + +func (s seq) String() string { + if s.offset <= 3 { + if s.offset == 0 { + return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset: INVALID (0)") + } + return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset, " (repeat)") + } + return fmt.Sprint("litLen:", s.litLen, ", matchLen:", s.matchLen+zstdMinMatch, ", offset:", s.offset-3, " (new)") +} + +type seqCompMode uint8 + +const ( + compModePredefined seqCompMode = iota + compModeRLE + compModeFSE + compModeRepeat +) + +type sequenceDec struct { + // decoder keeps track of the current state and updates it from the bitstream. + fse *fseDecoder + state fseState + repeat bool +} + +// init the state of the decoder with input from stream. +func (s *sequenceDec) init(br *bitReader) error { + if s.fse == nil { + return errors.New("sequence decoder not defined") + } + s.state.init(br, s.fse.actualTableLog, s.fse.dt[:1<= 0; i-- { + if br.overread() { + printf("reading sequence %d, exceeded available data\n", seqs-i) + return io.ErrUnexpectedEOF + } + var litLen, matchOff, matchLen int + if br.off > 4+((maxOffsetBits+16+16)>>3) { + litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState) + br.fillFast() + } else { + litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState) + br.fill() + } + + if debugSequences { + println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen) + } + + if litLen > len(s.literals) { + return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals)) + } + size := litLen + matchLen + len(s.out) + if size-startSize > maxBlockSize { + return fmt.Errorf("output (%d) bigger than max block size", size) + } + if size > cap(s.out) { + // Not enough size, will be extremely rarely triggered, + // but could be if destination slice is too small for sync operations. + // We add maxBlockSize to the capacity. + s.out = append(s.out, make([]byte, maxBlockSize)...) + s.out = s.out[:len(s.out)-maxBlockSize] + } + if matchLen > maxMatchLen { + return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen) + } + if matchOff > len(s.out)+len(hist)+litLen { + return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen) + } + if matchOff == 0 && matchLen > 0 { + return fmt.Errorf("zero matchoff and matchlen > 0") + } + + s.out = append(s.out, s.literals[:litLen]...) + s.literals = s.literals[litLen:] + out := s.out + + // Copy from history. + // TODO: Blocks without history could be made to ignore this completely. + if v := matchOff - len(s.out); v > 0 { + // v is the start position in history from end. + start := len(s.hist) - v + if matchLen > v { + // Some goes into current block. + // Copy remainder of history + out = append(out, s.hist[start:]...) + matchOff -= v + matchLen -= v + } else { + out = append(out, s.hist[start:start+matchLen]...) + matchLen = 0 + } + } + // We must be in current buffer now + if matchLen > 0 { + start := len(s.out) - matchOff + if matchLen <= len(s.out)-start { + // No overlap + out = append(out, s.out[start:start+matchLen]...) + } else { + // Overlapping copy + // Extend destination slice and copy one byte at the time. + out = out[:len(out)+matchLen] + src := out[start : start+matchLen] + // Destination is the space we just added. + dst := out[len(out)-matchLen:] + dst = dst[:len(src)] + for i := range src { + dst[i] = src[i] + } + } + } + s.out = out + if i == 0 { + // This is the last sequence, so we shouldn't update state. + break + } + + // Manually inlined, ~ 5-20% faster + // Update all 3 states at once. Approx 20% faster. + nBits := llState.nbBits() + mlState.nbBits() + ofState.nbBits() + if nBits == 0 { + llState = llTable[llState.newState()&maxTableMask] + mlState = mlTable[mlState.newState()&maxTableMask] + ofState = ofTable[ofState.newState()&maxTableMask] + } else { + bits := br.getBitsFast(nBits) + lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31)) + llState = llTable[(llState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits >> (ofState.nbBits() & 31)) + lowBits &= bitMask[mlState.nbBits()&15] + mlState = mlTable[(mlState.newState()+lowBits)&maxTableMask] + + lowBits = uint16(bits) & bitMask[ofState.nbBits()&15] + ofState = ofTable[(ofState.newState()+lowBits)&maxTableMask] + } + } + + // Add final literals + s.out = append(s.out, s.literals...) + return nil +} + +// update states, at least 27 bits must be available. +func (s *sequenceDecs) update(br *bitReader) { + // Max 8 bits + s.litLengths.state.next(br) + // Max 9 bits + s.matchLengths.state.next(br) + // Max 8 bits + s.offsets.state.next(br) +} + +var bitMask [16]uint16 + +func init() { + for i := range bitMask[:] { + bitMask[i] = uint16((1 << uint(i)) - 1) + } +} + +// update states, at least 27 bits must be available. +func (s *sequenceDecs) updateAlt(br *bitReader) { + // Update all 3 states at once. Approx 20% faster. + a, b, c := s.litLengths.state.state, s.matchLengths.state.state, s.offsets.state.state + + nBits := a.nbBits() + b.nbBits() + c.nbBits() + if nBits == 0 { + s.litLengths.state.state = s.litLengths.state.dt[a.newState()] + s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()] + s.offsets.state.state = s.offsets.state.dt[c.newState()] + return + } + bits := br.getBitsFast(nBits) + lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31)) + s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits] + + lowBits = uint16(bits >> (c.nbBits() & 31)) + lowBits &= bitMask[b.nbBits()&15] + s.matchLengths.state.state = s.matchLengths.state.dt[b.newState()+lowBits] + + lowBits = uint16(bits) & bitMask[c.nbBits()&15] + s.offsets.state.state = s.offsets.state.dt[c.newState()+lowBits] +} + +// nextFast will return new states when there are at least 4 unused bytes left on the stream when done. +func (s *sequenceDecs) nextFast(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { + // Final will not read from stream. + ll, llB := llState.final() + ml, mlB := mlState.final() + mo, moB := ofState.final() + + // extra bits are stored in reverse order. + br.fillFast() + mo += br.getBits(moB) + if s.maxBits > 32 { + br.fillFast() + } + ml += br.getBits(mlB) + ll += br.getBits(llB) + + if moB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = mo + return + } + // mo = s.adjustOffset(mo, ll, moB) + // Inlined for rather big speedup + if ll == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + mo++ + } + + if mo == 0 { + mo = s.prevOffset[0] + return + } + var temp int + if mo == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[mo] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("temp was 0") + temp = 1 + } + + if mo != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + mo = temp + return +} + +func (s *sequenceDecs) next(br *bitReader, llState, mlState, ofState decSymbol) (ll, mo, ml int) { + // Final will not read from stream. + ll, llB := llState.final() + ml, mlB := mlState.final() + mo, moB := ofState.final() + + // extra bits are stored in reverse order. + br.fill() + if s.maxBits <= 32 { + mo += br.getBits(moB) + ml += br.getBits(mlB) + ll += br.getBits(llB) + } else { + mo += br.getBits(moB) + br.fill() + // matchlength+literal length, max 32 bits + ml += br.getBits(mlB) + ll += br.getBits(llB) + + } + mo = s.adjustOffset(mo, ll, moB) + return +} + +func (s *sequenceDecs) adjustOffset(offset, litLen int, offsetB uint8) int { + if offsetB > 1 { + s.prevOffset[2] = s.prevOffset[1] + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = offset + return offset + } + + if litLen == 0 { + // There is an exception though, when current sequence's literals_length = 0. + // In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2, + // an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte. + offset++ + } + + if offset == 0 { + return s.prevOffset[0] + } + var temp int + if offset == 3 { + temp = s.prevOffset[0] - 1 + } else { + temp = s.prevOffset[offset] + } + + if temp == 0 { + // 0 is not valid; input is corrupted; force offset to 1 + println("temp was 0") + temp = 1 + } + + if offset != 1 { + s.prevOffset[2] = s.prevOffset[1] + } + s.prevOffset[1] = s.prevOffset[0] + s.prevOffset[0] = temp + return temp +} + +// mergeHistory will merge history. +func (s *sequenceDecs) mergeHistory(hist *sequenceDecs) (*sequenceDecs, error) { + for i := uint(0); i < 3; i++ { + var sNew, sHist *sequenceDec + switch i { + default: + // same as "case 0": + sNew = &s.litLengths + sHist = &hist.litLengths + case 1: + sNew = &s.offsets + sHist = &hist.offsets + case 2: + sNew = &s.matchLengths + sHist = &hist.matchLengths + } + if sNew.repeat { + if sHist.fse == nil { + return nil, fmt.Errorf("sequence stream %d, repeat requested, but no history", i) + } + continue + } + if sNew.fse == nil { + return nil, fmt.Errorf("sequence stream %d, no fse found", i) + } + if sHist.fse != nil && !sHist.fse.preDefined { + fseDecoderPool.Put(sHist.fse) + } + sHist.fse = sNew.fse + } + return hist, nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/seqenc.go b/vendor/github.com/klauspost/compress/zstd/seqenc.go new file mode 100644 index 00000000000..36bcc3cc02e --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/seqenc.go @@ -0,0 +1,115 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import "math/bits" + +type seqCoders struct { + llEnc, ofEnc, mlEnc *fseEncoder + llPrev, ofPrev, mlPrev *fseEncoder +} + +// swap coders with another (block). +func (s *seqCoders) swap(other *seqCoders) { + *s, *other = *other, *s +} + +// setPrev will update the previous encoders to the actually used ones +// and make sure a fresh one is in the main slot. +func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) { + compareSwap := func(used *fseEncoder, current, prev **fseEncoder) { + // We used the new one, more current to history and reuse the previous history + if *current == used { + *prev, *current = *current, *prev + c := *current + p := *prev + c.reUsed = false + p.reUsed = true + return + } + if used == *prev { + return + } + // Ensure we cannot reuse by accident + prevEnc := *prev + prevEnc.symbolLen = 0 + return + } + compareSwap(ll, &s.llEnc, &s.llPrev) + compareSwap(ml, &s.mlEnc, &s.mlPrev) + compareSwap(of, &s.ofEnc, &s.ofPrev) +} + +func highBit(val uint32) (n uint32) { + return uint32(bits.Len32(val) - 1) +} + +var llCodeTable = [64]byte{0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 16, 17, 17, 18, 18, 19, 19, + 20, 20, 20, 20, 21, 21, 21, 21, + 22, 22, 22, 22, 22, 22, 22, 22, + 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, + 24, 24, 24, 24, 24, 24, 24, 24} + +// Up to 6 bits +const maxLLCode = 35 + +// llBitsTable translates from ll code to number of bits. +var llBitsTable = [maxLLCode + 1]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 6, 7, 8, 9, 10, 11, 12, + 13, 14, 15, 16} + +// llCode returns the code that represents the literal length requested. +func llCode(litLength uint32) uint8 { + const llDeltaCode = 19 + if litLength <= 63 { + // Compiler insists on bounds check (Go 1.12) + return llCodeTable[litLength&63] + } + return uint8(highBit(litLength)) + llDeltaCode +} + +var mlCodeTable = [128]byte{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, + 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 36, 36, 37, 37, 37, 37, + 38, 38, 38, 38, 38, 38, 38, 38, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, + 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, 41, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42} + +// Up to 6 bits +const maxMLCode = 52 + +// mlBitsTable translates from ml code to number of bits. +var mlBitsTable = [maxMLCode + 1]byte{ + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 2, 2, 3, 3, + 4, 4, 5, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16} + +// note : mlBase = matchLength - MINMATCH; +// because it's the format it's stored in seqStore->sequences +func mlCode(mlBase uint32) uint8 { + const mlDeltaCode = 36 + if mlBase <= 127 { + // Compiler insists on bounds check (Go 1.12) + return mlCodeTable[mlBase&127] + } + return uint8(highBit(mlBase)) + mlDeltaCode +} + +func ofCode(offset uint32) uint8 { + // A valid offset will always be > 0. + return uint8(bits.Len32(offset) - 1) +} diff --git a/vendor/github.com/klauspost/compress/zstd/snappy.go b/vendor/github.com/klauspost/compress/zstd/snappy.go new file mode 100644 index 00000000000..356956ba256 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/snappy.go @@ -0,0 +1,436 @@ +// Copyright 2019+ Klaus Post. All rights reserved. +// License information can be found in the LICENSE file. +// Based on work by Yann Collet, released under BSD License. + +package zstd + +import ( + "encoding/binary" + "errors" + "hash/crc32" + "io" + + "github.com/klauspost/compress/huff0" + "github.com/klauspost/compress/snappy" +) + +const ( + snappyTagLiteral = 0x00 + snappyTagCopy1 = 0x01 + snappyTagCopy2 = 0x02 + snappyTagCopy4 = 0x03 +) + +const ( + snappyChecksumSize = 4 + snappyMagicBody = "sNaPpY" + + // snappyMaxBlockSize is the maximum size of the input to encodeBlock. It is not + // part of the wire format per se, but some parts of the encoder assume + // that an offset fits into a uint16. + // + // Also, for the framing format (Writer type instead of Encode function), + // https://github.com/google/snappy/blob/master/framing_format.txt says + // that "the uncompressed data in a chunk must be no longer than 65536 + // bytes". + snappyMaxBlockSize = 65536 + + // snappyMaxEncodedLenOfMaxBlockSize equals MaxEncodedLen(snappyMaxBlockSize), but is + // hard coded to be a const instead of a variable, so that obufLen can also + // be a const. Their equivalence is confirmed by + // TestMaxEncodedLenOfMaxBlockSize. + snappyMaxEncodedLenOfMaxBlockSize = 76490 +) + +const ( + chunkTypeCompressedData = 0x00 + chunkTypeUncompressedData = 0x01 + chunkTypePadding = 0xfe + chunkTypeStreamIdentifier = 0xff +) + +var ( + // ErrSnappyCorrupt reports that the input is invalid. + ErrSnappyCorrupt = errors.New("snappy: corrupt input") + // ErrSnappyTooLarge reports that the uncompressed length is too large. + ErrSnappyTooLarge = errors.New("snappy: decoded block is too large") + // ErrSnappyUnsupported reports that the input isn't supported. + ErrSnappyUnsupported = errors.New("snappy: unsupported input") + + errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length") +) + +// SnappyConverter can read SnappyConverter-compressed streams and convert them to zstd. +// Conversion is done by converting the stream directly from Snappy without intermediate +// full decoding. +// Therefore the compression ratio is much less than what can be done by a full decompression +// and compression, and a faulty Snappy stream may lead to a faulty Zstandard stream without +// any errors being generated. +// No CRC value is being generated and not all CRC values of the Snappy stream are checked. +// However, it provides really fast recompression of Snappy streams. +// The converter can be reused to avoid allocations, even after errors. +type SnappyConverter struct { + r io.Reader + err error + buf []byte + block *blockEnc +} + +// Convert the Snappy stream supplied in 'in' and write the zStandard stream to 'w'. +// If any error is detected on the Snappy stream it is returned. +// The number of bytes written is returned. +func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) { + initPredefined() + r.err = nil + r.r = in + if r.block == nil { + r.block = &blockEnc{} + r.block.init() + } + r.block.initNewEncode() + if len(r.buf) != snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize { + r.buf = make([]byte, snappyMaxEncodedLenOfMaxBlockSize+snappyChecksumSize) + } + r.block.litEnc.Reuse = huff0.ReusePolicyNone + var written int64 + var readHeader bool + { + var header []byte + var n int + header, r.err = frameHeader{WindowSize: snappyMaxBlockSize}.appendTo(r.buf[:0]) + + n, r.err = w.Write(header) + if r.err != nil { + return written, r.err + } + written += int64(n) + } + + for { + if !r.readFull(r.buf[:4], true) { + // Add empty last block + r.block.reset(nil) + r.block.last = true + err := r.block.encodeLits(false) + if err != nil { + return written, err + } + n, err := w.Write(r.block.output) + if err != nil { + return written, err + } + written += int64(n) + + return written, r.err + } + chunkType := r.buf[0] + if !readHeader { + if chunkType != chunkTypeStreamIdentifier { + println("chunkType != chunkTypeStreamIdentifier", chunkType) + r.err = ErrSnappyCorrupt + return written, r.err + } + readHeader = true + } + chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16 + if chunkLen > len(r.buf) { + println("chunkLen > len(r.buf)", chunkType) + r.err = ErrSnappyUnsupported + return written, r.err + } + + // The chunk types are specified at + // https://github.com/google/snappy/blob/master/framing_format.txt + switch chunkType { + case chunkTypeCompressedData: + // Section 4.2. Compressed data (chunk type 0x00). + if chunkLen < snappyChecksumSize { + println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize) + r.err = ErrSnappyCorrupt + return written, r.err + } + buf := r.buf[:chunkLen] + if !r.readFull(buf, false) { + return written, r.err + } + //checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + buf = buf[snappyChecksumSize:] + + n, hdr, err := snappyDecodedLen(buf) + if err != nil { + r.err = err + return written, r.err + } + buf = buf[hdr:] + if n > snappyMaxBlockSize { + println("n > snappyMaxBlockSize", n, snappyMaxBlockSize) + r.err = ErrSnappyCorrupt + return written, r.err + } + r.block.reset(nil) + r.block.pushOffsets() + if err := decodeSnappy(r.block, buf); err != nil { + r.err = err + return written, r.err + } + if r.block.size+r.block.extraLits != n { + printf("invalid size, want %d, got %d\n", n, r.block.size+r.block.extraLits) + r.err = ErrSnappyCorrupt + return written, r.err + } + err = r.block.encode(false) + switch err { + case errIncompressible: + r.block.popOffsets() + r.block.reset(nil) + r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen]) + if err != nil { + println("snappy.Decode:", err) + return written, err + } + err = r.block.encodeLits(false) + if err != nil { + return written, err + } + case nil: + default: + return written, err + } + + n, r.err = w.Write(r.block.output) + if r.err != nil { + return written, err + } + written += int64(n) + continue + case chunkTypeUncompressedData: + if debug { + println("Uncompressed, chunklen", chunkLen) + } + // Section 4.3. Uncompressed data (chunk type 0x01). + if chunkLen < snappyChecksumSize { + println("chunkLen < snappyChecksumSize", chunkLen, snappyChecksumSize) + r.err = ErrSnappyCorrupt + return written, r.err + } + r.block.reset(nil) + buf := r.buf[:snappyChecksumSize] + if !r.readFull(buf, false) { + return written, r.err + } + checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24 + // Read directly into r.decoded instead of via r.buf. + n := chunkLen - snappyChecksumSize + if n > snappyMaxBlockSize { + println("n > snappyMaxBlockSize", n, snappyMaxBlockSize) + r.err = ErrSnappyCorrupt + return written, r.err + } + r.block.literals = r.block.literals[:n] + if !r.readFull(r.block.literals, false) { + return written, r.err + } + if snappyCRC(r.block.literals) != checksum { + println("literals crc mismatch") + r.err = ErrSnappyCorrupt + return written, r.err + } + err := r.block.encodeLits(false) + if err != nil { + return written, err + } + n, r.err = w.Write(r.block.output) + if r.err != nil { + return written, err + } + written += int64(n) + continue + + case chunkTypeStreamIdentifier: + if debug { + println("stream id", chunkLen, len(snappyMagicBody)) + } + // Section 4.1. Stream identifier (chunk type 0xff). + if chunkLen != len(snappyMagicBody) { + println("chunkLen != len(snappyMagicBody)", chunkLen, len(snappyMagicBody)) + r.err = ErrSnappyCorrupt + return written, r.err + } + if !r.readFull(r.buf[:len(snappyMagicBody)], false) { + return written, r.err + } + for i := 0; i < len(snappyMagicBody); i++ { + if r.buf[i] != snappyMagicBody[i] { + println("r.buf[i] != snappyMagicBody[i]", r.buf[i], snappyMagicBody[i], i) + r.err = ErrSnappyCorrupt + return written, r.err + } + } + continue + } + + if chunkType <= 0x7f { + // Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f). + println("chunkType <= 0x7f") + r.err = ErrSnappyUnsupported + return written, r.err + } + // Section 4.4 Padding (chunk type 0xfe). + // Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd). + if !r.readFull(r.buf[:chunkLen], false) { + return written, r.err + } + } +} + +// decodeSnappy writes the decoding of src to dst. It assumes that the varint-encoded +// length of the decompressed bytes has already been read. +func decodeSnappy(blk *blockEnc, src []byte) error { + //decodeRef(make([]byte, snappyMaxBlockSize), src) + var s, length int + lits := blk.extraLits + var offset uint32 + for s < len(src) { + switch src[s] & 0x03 { + case snappyTagLiteral: + x := uint32(src[s] >> 2) + switch { + case x < 60: + s++ + case x == 60: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, src) + return ErrSnappyCorrupt + } + x = uint32(src[s-1]) + case x == 61: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, src) + return ErrSnappyCorrupt + } + x = uint32(src[s-2]) | uint32(src[s-1])<<8 + case x == 62: + s += 4 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, src) + return ErrSnappyCorrupt + } + x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16 + case x == 63: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, src) + return ErrSnappyCorrupt + } + x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + if x > snappyMaxBlockSize { + println("x > snappyMaxBlockSize", x, snappyMaxBlockSize) + return ErrSnappyCorrupt + } + length = int(x) + 1 + if length <= 0 { + println("length <= 0 ", length) + + return errUnsupportedLiteralLength + } + //if length > snappyMaxBlockSize-d || uint32(length) > len(src)-s { + // return ErrSnappyCorrupt + //} + + blk.literals = append(blk.literals, src[s:s+length]...) + //println(length, "litLen") + lits += length + s += length + continue + + case snappyTagCopy1: + s += 2 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, len(src)) + return ErrSnappyCorrupt + } + length = 4 + int(src[s-2])>>2&0x7 + offset = uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]) + + case snappyTagCopy2: + s += 3 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, len(src)) + return ErrSnappyCorrupt + } + length = 1 + int(src[s-3])>>2 + offset = uint32(src[s-2]) | uint32(src[s-1])<<8 + + case snappyTagCopy4: + s += 5 + if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line. + println("uint(s) > uint(len(src)", s, len(src)) + return ErrSnappyCorrupt + } + length = 1 + int(src[s-5])>>2 + offset = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24 + } + + if offset <= 0 || blk.size+lits < int(offset) /*|| length > len(blk)-d */ { + println("offset <= 0 || blk.size+lits < int(offset)", offset, blk.size+lits, int(offset), blk.size, lits) + + return ErrSnappyCorrupt + } + + // Check if offset is one of the recent offsets. + // Adjusts the output offset accordingly. + // Gives a tiny bit of compression, typically around 1%. + if false { + offset = blk.matchOffset(offset, uint32(lits)) + } else { + offset += 3 + } + + blk.sequences = append(blk.sequences, seq{ + litLen: uint32(lits), + offset: offset, + matchLen: uint32(length) - zstdMinMatch, + }) + blk.size += length + lits + lits = 0 + } + blk.extraLits = lits + return nil +} + +func (r *SnappyConverter) readFull(p []byte, allowEOF bool) (ok bool) { + if _, r.err = io.ReadFull(r.r, p); r.err != nil { + if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) { + r.err = ErrSnappyCorrupt + } + return false + } + return true +} + +var crcTable = crc32.MakeTable(crc32.Castagnoli) + +// crc implements the checksum specified in section 3 of +// https://github.com/google/snappy/blob/master/framing_format.txt +func snappyCRC(b []byte) uint32 { + c := crc32.Update(0, crcTable, b) + return uint32(c>>15|c<<17) + 0xa282ead8 +} + +// snappyDecodedLen returns the length of the decoded block and the number of bytes +// that the length header occupied. +func snappyDecodedLen(src []byte) (blockLen, headerLen int, err error) { + v, n := binary.Uvarint(src) + if n <= 0 || v > 0xffffffff { + return 0, 0, ErrSnappyCorrupt + } + + const wordSize = 32 << (^uint(0) >> 32 & 1) + if wordSize == 32 && v > 0x7fffffff { + return 0, 0, ErrSnappyTooLarge + } + return int(v), n, nil +} diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go new file mode 100644 index 00000000000..0807719c8b9 --- /dev/null +++ b/vendor/github.com/klauspost/compress/zstd/zstd.go @@ -0,0 +1,144 @@ +// Package zstd provides decompression of zstandard files. +// +// For advanced usage and examples, go to the README: https://github.com/klauspost/compress/tree/master/zstd#zstd +package zstd + +import ( + "errors" + "log" + "math" + "math/bits" +) + +// enable debug printing +const debug = false + +// Enable extra assertions. +const debugAsserts = debug || false + +// print sequence details +const debugSequences = false + +// print detailed matching information +const debugMatches = false + +// force encoder to use predefined tables. +const forcePreDef = false + +// zstdMinMatch is the minimum zstd match length. +const zstdMinMatch = 3 + +// Reset the buffer offset when reaching this. +const bufferReset = math.MaxInt32 - MaxWindowSize + +var ( + // ErrReservedBlockType is returned when a reserved block type is found. + // Typically this indicates wrong or corrupted input. + ErrReservedBlockType = errors.New("invalid input: reserved block type encountered") + + // ErrCompressedSizeTooBig is returned when a block is bigger than allowed. + // Typically this indicates wrong or corrupted input. + ErrCompressedSizeTooBig = errors.New("invalid input: compressed size too big") + + // ErrBlockTooSmall is returned when a block is too small to be decoded. + // Typically returned on invalid input. + ErrBlockTooSmall = errors.New("block too small") + + // ErrMagicMismatch is returned when a "magic" number isn't what is expected. + // Typically this indicates wrong or corrupted input. + ErrMagicMismatch = errors.New("invalid input: magic number mismatch") + + // ErrWindowSizeExceeded is returned when a reference exceeds the valid window size. + // Typically this indicates wrong or corrupted input. + ErrWindowSizeExceeded = errors.New("window size exceeded") + + // ErrWindowSizeTooSmall is returned when no window size is specified. + // Typically this indicates wrong or corrupted input. + ErrWindowSizeTooSmall = errors.New("invalid input: window size was too small") + + // ErrDecoderSizeExceeded is returned if decompressed size exceeds the configured limit. + ErrDecoderSizeExceeded = errors.New("decompressed size exceeds configured limit") + + // ErrUnknownDictionary is returned if the dictionary ID is unknown. + // For the time being dictionaries are not supported. + ErrUnknownDictionary = errors.New("unknown dictionary") + + // ErrFrameSizeExceeded is returned if the stated frame size is exceeded. + // This is only returned if SingleSegment is specified on the frame. + ErrFrameSizeExceeded = errors.New("frame size exceeded") + + // ErrCRCMismatch is returned if CRC mismatches. + ErrCRCMismatch = errors.New("CRC check failed") + + // ErrDecoderClosed will be returned if the Decoder was used after + // Close has been called. + ErrDecoderClosed = errors.New("decoder used after Close") +) + +func println(a ...interface{}) { + if debug { + log.Println(a...) + } +} + +func printf(format string, a ...interface{}) { + if debug { + log.Printf(format, a...) + } +} + +// matchLenFast does matching, but will not match the last up to 7 bytes. +func matchLenFast(a, b []byte) int { + endI := len(a) & (math.MaxInt32 - 7) + for i := 0; i < endI; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + bits.TrailingZeros64(diff)>>3 + } + } + return endI +} + +// matchLen returns the maximum length. +// a must be the shortest of the two. +// The function also returns whether all bytes matched. +func matchLen(a, b []byte) int { + b = b[:len(a)] + for i := 0; i < len(a)-7; i += 8 { + if diff := load64(a, i) ^ load64(b, i); diff != 0 { + return i + (bits.TrailingZeros64(diff) >> 3) + } + } + + checked := (len(a) >> 3) << 3 + a = a[checked:] + b = b[checked:] + for i := range a { + if a[i] != b[i] { + return i + checked + } + } + return len(a) + checked +} + +func load3232(b []byte, i int32) uint32 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:4] + return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 +} + +func load6432(b []byte, i int32) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} + +func load64(b []byte, i int) uint64 { + // Help the compiler eliminate bounds checks on the read so it can be done in a single read. + b = b[i:] + b = b[:8] + return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | + uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 +} diff --git a/vendor/github.com/pierrec/lz4/v4/.gitignore b/vendor/github.com/pierrec/lz4/v4/.gitignore new file mode 100644 index 00000000000..5d7e88de0a3 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/.gitignore @@ -0,0 +1,36 @@ +# Created by https://www.gitignore.io/api/macos + +### macOS ### +*.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# End of https://www.gitignore.io/api/macos + +cmd/*/*exe +.idea + +fuzz/*.zip diff --git a/vendor/github.com/pierrec/lz4/v4/LICENSE b/vendor/github.com/pierrec/lz4/v4/LICENSE new file mode 100644 index 00000000000..bd899d8353d --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/LICENSE @@ -0,0 +1,28 @@ +Copyright (c) 2015, Pierre Curto +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of xxHash nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/vendor/github.com/pierrec/lz4/v4/README.md b/vendor/github.com/pierrec/lz4/v4/README.md new file mode 100644 index 00000000000..df027e2c301 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/README.md @@ -0,0 +1,90 @@ +# lz4 : LZ4 compression in pure Go + +[![Go Reference](https://pkg.go.dev/badge/github.com/pierrec/lz4/v4.svg)](https://pkg.go.dev/github.com/pierrec/lz4/v4) +[![CI](https://github.com/pierrec/lz4/workflows/ci/badge.svg)](https://github.com/pierrec/lz4/actions) +[![Go Report Card](https://goreportcard.com/badge/github.com/pierrec/lz4)](https://goreportcard.com/report/github.com/pierrec/lz4) +[![GitHub tag (latest SemVer)](https://img.shields.io/github/tag/pierrec/lz4.svg?style=social)](https://github.com/pierrec/lz4/tags) + +## Overview + +This package provides a streaming interface to [LZ4 data streams](http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html) as well as low level compress and uncompress functions for LZ4 data blocks. +The implementation is based on the reference C [one](https://github.com/lz4/lz4). + +## Install + +Assuming you have the go toolchain installed: + +``` +go get github.com/pierrec/lz4/v4 +``` + +There is a command line interface tool to compress and decompress LZ4 files. + +``` +go install github.com/pierrec/lz4/v4/cmd/lz4c +``` + +Usage + +``` +Usage of lz4c: + -version + print the program version + +Subcommands: +Compress the given files or from stdin to stdout. +compress [arguments] [ ...] + -bc + enable block checksum + -l int + compression level (0=fastest) + -sc + disable stream checksum + -size string + block max size [64K,256K,1M,4M] (default "4M") + +Uncompress the given files or from stdin to stdout. +uncompress [arguments] [ ...] + +``` + + +## Example + +``` +// Compress and uncompress an input string. +s := "hello world" +r := strings.NewReader(s) + +// The pipe will uncompress the data from the writer. +pr, pw := io.Pipe() +zw := lz4.NewWriter(pw) +zr := lz4.NewReader(pr) + +go func() { + // Compress the input string. + _, _ = io.Copy(zw, r) + _ = zw.Close() // Make sure the writer is closed + _ = pw.Close() // Terminate the pipe +}() + +_, _ = io.Copy(os.Stdout, zr) + +// Output: +// hello world +``` + +## Contributing + +Contributions are very welcome for bug fixing, performance improvements...! + +- Open an issue with a proper description +- Send a pull request with appropriate test case(s) + +## Contributors + +Thanks to all [contributors](https://github.com/pierrec/lz4/graphs/contributors) so far! + +Special thanks to [@Zariel](https://github.com/Zariel) for his asm implementation of the decoder. + +Special thanks to [@klauspost](https://github.com/klauspost) for his work on optimizing the code. diff --git a/vendor/github.com/pierrec/lz4/v4/go.mod b/vendor/github.com/pierrec/lz4/v4/go.mod new file mode 100644 index 00000000000..42229b29672 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/go.mod @@ -0,0 +1,3 @@ +module github.com/pierrec/lz4/v4 + +go 1.14 diff --git a/vendor/github.com/pierrec/lz4/v4/go.sum b/vendor/github.com/pierrec/lz4/v4/go.sum new file mode 100644 index 00000000000..e69de29bb2d diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go new file mode 100644 index 00000000000..f3826494308 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/block.go @@ -0,0 +1,469 @@ +package lz4block + +import ( + "encoding/binary" + "math/bits" + "sync" + + "github.com/pierrec/lz4/v4/internal/lz4errors" +) + +const ( + // The following constants are used to setup the compression algorithm. + minMatch = 4 // the minimum size of the match sequence size (4 bytes) + winSizeLog = 16 // LZ4 64Kb window size limit + winSize = 1 << winSizeLog + winMask = winSize - 1 // 64Kb window of previous data for dependent blocks + + // hashLog determines the size of the hash table used to quickly find a previous match position. + // Its value influences the compression speed and memory usage, the lower the faster, + // but at the expense of the compression ratio. + // 16 seems to be the best compromise for fast compression. + hashLog = 16 + htSize = 1 << hashLog + + mfLimit = 10 + minMatch // The last match cannot start within the last 14 bytes. +) + +func recoverBlock(e *error) { + if r := recover(); r != nil && *e == nil { + *e = lz4errors.ErrInvalidSourceShortBuffer + } +} + +// blockHash hashes the lower 6 bytes into a value < htSize. +func blockHash(x uint64) uint32 { + const prime6bytes = 227718039650203 + return uint32(((x << (64 - 48)) * prime6bytes) >> (64 - hashLog)) +} + +func CompressBlockBound(n int) int { + return n + n/255 + 16 +} + +func UncompressBlock(src, dst []byte) (int, error) { + if len(src) == 0 { + return 0, nil + } + if di := decodeBlock(dst, src); di >= 0 { + return di, nil + } + return 0, lz4errors.ErrInvalidSourceShortBuffer +} + +type Compressor struct { + // Offsets are at most 64kiB, so we can store only the lower 16 bits of + // match positions: effectively, an offset from some 64kiB block boundary. + // + // When we retrieve such an offset, we interpret it as relative to the last + // block boundary si &^ 0xffff, or the one before, (si &^ 0xffff) - 0x10000, + // depending on which of these is inside the current window. If a table + // entry was generated more than 64kiB back in the input, we find out by + // inspecting the input stream. + table [htSize]uint16 + + needsReset bool +} + +// Get returns the position of a presumptive match for the hash h. +// The match may be a false positive due to a hash collision or an old entry. +// If si < winSize, the return value may be negative. +func (c *Compressor) get(h uint32, si int) int { + h &= htSize - 1 + i := int(c.table[h]) + i += si &^ winMask + if i >= si { + // Try previous 64kiB block (negative when in first block). + i -= winSize + } + return i +} + +func (c *Compressor) put(h uint32, si int) { + h &= htSize - 1 + c.table[h] = uint16(si) +} + +var compressorPool = sync.Pool{New: func() interface{} { return new(Compressor) }} + +func CompressBlock(src, dst []byte) (int, error) { + c := compressorPool.Get().(*Compressor) + n, err := c.CompressBlock(src, dst) + compressorPool.Put(c) + return n, err +} + +func (c *Compressor) CompressBlock(src, dst []byte) (int, error) { + if c.needsReset { + // Zero out reused table to avoid non-deterministic output (issue #65). + c.table = [htSize]uint16{} + } + c.needsReset = true // Only false on first call. + + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. + // This significantly speeds up incompressible data and usually has very small impact on compression. + // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) + const adaptSkipLog = 7 + + // si: Current position of the search. + // anchor: Position of the current literals. + var si, di, anchor int + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } + + // Fast scan strategy: the hash table only stores the last 4 bytes sequences. + for si < sn { + // Hash the next 6 bytes (sequence)... + match := binary.LittleEndian.Uint64(src[si:]) + h := blockHash(match) + h2 := blockHash(match >> 8) + + // We check a match at s, s+1 and s+2 and pick the first one we get. + // Checking 3 only requires us to load the source one. + ref := c.get(h, si) + ref2 := c.get(h2, si) + c.put(h, si) + c.put(h2, si+1) + + offset := si - ref + + if offset <= 0 || offset >= winSize || uint32(match) != binary.LittleEndian.Uint32(src[ref:]) { + // No match. Start calculating another hash. + // The processor can usually do this out-of-order. + h = blockHash(match >> 16) + ref3 := c.get(h, si+2) + + // Check the second match at si+1 + si += 1 + offset = si - ref2 + + if offset <= 0 || offset >= winSize || uint32(match>>8) != binary.LittleEndian.Uint32(src[ref2:]) { + // No match. Check the third match at si+2 + si += 1 + offset = si - ref3 + c.put(h, si) + + if offset <= 0 || offset >= winSize || uint32(match>>16) != binary.LittleEndian.Uint32(src[ref3:]) { + // Skip one extra byte (at si+3) before we check 3 matches again. + si += 2 + (si-anchor)>>adaptSkipLog + continue + } + } + } + + // Match found. + lLen := si - anchor // Literal length. + // We already matched 4 bytes. + mLen := 4 + + // Extend backwards if we can, reducing literals. + tOff := si - offset - 1 + for lLen > 0 && tOff >= 0 && src[si-1] == src[tOff] { + si-- + tOff-- + lLen-- + mLen++ + } + + // Add the match length, so we continue search at the end. + // Use mLen to store the offset base. + si, mLen = si+mLen, si+minMatch + + // Find the longest match by looking by batches of 8 bytes. + for si+8 < sn { + x := binary.LittleEndian.Uint64(src[si:]) ^ binary.LittleEndian.Uint64(src[si-offset:]) + if x == 0 { + si += 8 + } else { + // Stop is first non-zero byte. + si += bits.TrailingZeros64(x) >> 3 + break + } + } + + mLen = si - mLen + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + if di+lLen > len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + copy(dst[di:di+lLen], src[anchor:anchor+lLen]) + di += lLen + 2 + anchor = si + + // Encode offset. + if di > len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF && di < len(dst); mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + if di >= len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + dst[di] = byte(mLen) + di++ + } + // Check if we can load next values. + if si >= sn { + break + } + // Hash match end-2 + h = blockHash(binary.LittleEndian.Uint64(src[si-2:])) + c.put(h, si-2) + } + +lastLiterals: + if isNotCompressible && anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. + if di >= len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + for lLen -= 0xF; lLen >= 0xFF && di < len(dst); lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + if di >= len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if isNotCompressible && di >= anchor { + // Incompressible. + return 0, nil + } + if di+len(src)-anchor > len(dst) { + return 0, lz4errors.ErrInvalidSourceShortBuffer + } + di += copy(dst[di:di+len(src)-anchor], src[anchor:]) + return di, nil +} + +// blockHash hashes 4 bytes into a value < winSize. +func blockHashHC(x uint32) uint32 { + const hasher uint32 = 2654435761 // Knuth multiplicative hash. + return x * hasher >> (32 - winSizeLog) +} + +type CompressorHC struct { + // hashTable: stores the last position found for a given hash + // chainTable: stores previous positions for a given hash + hashTable, chainTable [htSize]int + needsReset bool +} + +var compressorHCPool = sync.Pool{New: func() interface{} { return new(CompressorHC) }} + +func CompressBlockHC(src, dst []byte, depth CompressionLevel) (int, error) { + c := compressorHCPool.Get().(*CompressorHC) + n, err := c.CompressBlock(src, dst, depth) + compressorHCPool.Put(c) + return n, err +} + +func (c *CompressorHC) CompressBlock(src, dst []byte, depth CompressionLevel) (_ int, err error) { + if c.needsReset { + // Zero out reused table to avoid non-deterministic output (issue #65). + c.hashTable = [htSize]int{} + c.chainTable = [htSize]int{} + } + c.needsReset = true // Only false on first call. + + defer recoverBlock(&err) + + // Return 0, nil only if the destination buffer size is < CompressBlockBound. + isNotCompressible := len(dst) < CompressBlockBound(len(src)) + + // adaptSkipLog sets how quickly the compressor begins skipping blocks when data is incompressible. + // This significantly speeds up incompressible data and usually has very small impact on compression. + // bytes to skip = 1 + (bytes since last match >> adaptSkipLog) + const adaptSkipLog = 7 + + var si, di, anchor int + sn := len(src) - mfLimit + if sn <= 0 { + goto lastLiterals + } + + if depth == 0 { + depth = winSize + } + + for si < sn { + // Hash the next 4 bytes (sequence). + match := binary.LittleEndian.Uint32(src[si:]) + h := blockHashHC(match) + + // Follow the chain until out of window and give the longest match. + mLen := 0 + offset := 0 + for next, try := c.hashTable[h], depth; try > 0 && next > 0 && si-next < winSize; next, try = c.chainTable[next&winMask], try-1 { + // The first (mLen==0) or next byte (mLen>=minMatch) at current match length + // must match to improve on the match length. + if src[next+mLen] != src[si+mLen] { + continue + } + ml := 0 + // Compare the current position with a previous with the same hash. + for ml < sn-si { + x := binary.LittleEndian.Uint64(src[next+ml:]) ^ binary.LittleEndian.Uint64(src[si+ml:]) + if x == 0 { + ml += 8 + } else { + // Stop is first non-zero byte. + ml += bits.TrailingZeros64(x) >> 3 + break + } + } + if ml < minMatch || ml <= mLen { + // Match too small (>adaptSkipLog + continue + } + + // Match found. + // Update hash/chain tables with overlapping bytes: + // si already hashed, add everything from si+1 up to the match length. + winStart := si + 1 + if ws := si + mLen - winSize; ws > winStart { + winStart = ws + } + for si, ml := winStart, si+mLen; si < ml; { + match >>= 8 + match |= uint32(src[si+3]) << 24 + h := blockHashHC(match) + c.chainTable[si&winMask] = c.hashTable[h] + c.hashTable[h] = si + si++ + } + + lLen := si - anchor + si += mLen + mLen -= minMatch // Match length does not include minMatch. + + if mLen < 0xF { + dst[di] = byte(mLen) + } else { + dst[di] = 0xF + } + + // Encode literals length. + if lLen < 0xF { + dst[di] |= byte(lLen << 4) + } else { + dst[di] |= 0xF0 + di++ + l := lLen - 0xF + for ; l >= 0xFF; l -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(l) + } + di++ + + // Literals. + copy(dst[di:di+lLen], src[anchor:anchor+lLen]) + di += lLen + anchor = si + + // Encode offset. + di += 2 + dst[di-2], dst[di-1] = byte(offset), byte(offset>>8) + + // Encode match length part 2. + if mLen >= 0xF { + for mLen -= 0xF; mLen >= 0xFF; mLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(mLen) + di++ + } + } + + if isNotCompressible && anchor == 0 { + // Incompressible. + return 0, nil + } + + // Last literals. +lastLiterals: + lLen := len(src) - anchor + if lLen < 0xF { + dst[di] = byte(lLen << 4) + } else { + dst[di] = 0xF0 + di++ + lLen -= 0xF + for ; lLen >= 0xFF; lLen -= 0xFF { + dst[di] = 0xFF + di++ + } + dst[di] = byte(lLen) + } + di++ + + // Write the last literals. + if isNotCompressible && di >= anchor { + // Incompressible. + return 0, nil + } + di += copy(dst[di:di+len(src)-anchor], src[anchor:]) + return di, nil +} diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go new file mode 100644 index 00000000000..e6cf88d71c0 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/blocks.go @@ -0,0 +1,88 @@ +// Package lz4block provides LZ4 BlockSize types and pools of buffers. +package lz4block + +import "sync" + +const ( + Block64Kb uint32 = 1 << (16 + iota*2) + Block256Kb + Block1Mb + Block4Mb + Block8Mb = 2 * Block4Mb + legacyBlockSize = Block8Mb + Block8Mb/255 + 16 // CompressBound(Block8Mb) +) + +var ( + BlockPool64K = sync.Pool{New: func() interface{} { return make([]byte, Block64Kb) }} + BlockPool256K = sync.Pool{New: func() interface{} { return make([]byte, Block256Kb) }} + BlockPool1M = sync.Pool{New: func() interface{} { return make([]byte, Block1Mb) }} + BlockPool4M = sync.Pool{New: func() interface{} { return make([]byte, Block4Mb) }} + BlockPool8M = sync.Pool{New: func() interface{} { return make([]byte, legacyBlockSize) }} +) + +func Index(b uint32) BlockSizeIndex { + switch b { + case Block64Kb: + return 4 + case Block256Kb: + return 5 + case Block1Mb: + return 6 + case Block4Mb: + return 7 + case Block8Mb: // only valid in legacy mode + return 3 + } + return 0 +} + +func IsValid(b uint32) bool { + return Index(b) > 0 +} + +type BlockSizeIndex uint8 + +func (b BlockSizeIndex) IsValid() bool { + switch b { + case 4, 5, 6, 7: + return true + } + return false +} + +func (b BlockSizeIndex) Get() []byte { + var buf interface{} + switch b { + case 4: + buf = BlockPool64K.Get() + case 5: + buf = BlockPool256K.Get() + case 6: + buf = BlockPool1M.Get() + case 7: + buf = BlockPool4M.Get() + case 3: + buf = BlockPool8M.Get() + } + return buf.([]byte) +} + +func Put(buf []byte) { + // Safeguard: do not allow invalid buffers. + switch c := cap(buf); uint32(c) { + case Block64Kb: + BlockPool64K.Put(buf[:c]) + case Block256Kb: + BlockPool256K.Put(buf[:c]) + case Block1Mb: + BlockPool1M.Put(buf[:c]) + case Block4Mb: + BlockPool4M.Put(buf[:c]) + case legacyBlockSize: + BlockPool8M.Put(buf[:c]) + } +} + +type CompressionLevel uint32 + +const Fast CompressionLevel = 0 diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s new file mode 100644 index 00000000000..be79faa3fe8 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_amd64.s @@ -0,0 +1,369 @@ +// +build !appengine +// +build gc +// +build !noasm + +#include "textflag.h" + +// AX scratch +// BX scratch +// CX scratch +// DX token +// +// DI &dst +// SI &src +// R8 &dst + len(dst) +// R9 &src + len(src) +// R11 &dst +// R12 short output end +// R13 short input end +// func decodeBlock(dst, src []byte) int +// using 50 bytes of stack currently +TEXT ·decodeBlock(SB), NOSPLIT, $64-56 + MOVQ dst_base+0(FP), DI + MOVQ DI, R11 + MOVQ dst_len+8(FP), R8 + ADDQ DI, R8 + + MOVQ src_base+24(FP), SI + MOVQ src_len+32(FP), R9 + CMPQ R9, $0 + JE err_corrupt + ADDQ SI, R9 + + // shortcut ends + // short output end + MOVQ R8, R12 + SUBQ $32, R12 + // short input end + MOVQ R9, R13 + SUBQ $16, R13 + +loop: + // for si < len(src) + CMPQ SI, R9 + JGE end + + // token := uint32(src[si]) + MOVBQZX (SI), DX + INCQ SI + + // lit_len = token >> 4 + // if lit_len > 0 + // CX = lit_len + MOVQ DX, CX + SHRQ $4, CX + + // if lit_len != 0xF + CMPQ CX, $0xF + JEQ lit_len_loop_pre + CMPQ DI, R12 + JGE lit_len_loop_pre + CMPQ SI, R13 + JGE lit_len_loop_pre + + // copy shortcut + + // A two-stage shortcut for the most common case: + // 1) If the literal length is 0..14, and there is enough space, + // enter the shortcut and copy 16 bytes on behalf of the literals + // (in the fast mode, only 8 bytes can be safely copied this way). + // 2) Further if the match length is 4..18, copy 18 bytes in a similar + // manner; but we ensure that there's enough space in the output for + // those 18 bytes earlier, upon entering the shortcut (in other words, + // there is a combined check for both stages). + + // copy literal + MOVOU (SI), X0 + MOVOU X0, (DI) + ADDQ CX, DI + ADDQ CX, SI + + MOVQ DX, CX + ANDQ $0xF, CX + + // The second stage: prepare for match copying, decode full info. + // If it doesn't work out, the info won't be wasted. + // offset := uint16(data[:2]) + MOVWQZX (SI), DX + ADDQ $2, SI + + MOVQ DI, AX + SUBQ DX, AX + CMPQ AX, DI + JGT err_short_buf + + // if we can't do the second stage then jump straight to read the + // match length, we already have the offset. + CMPQ CX, $0xF + JEQ match_len_loop_pre + CMPQ DX, $8 + JLT match_len_loop_pre + CMPQ AX, R11 + JLT err_short_buf + + // memcpy(op + 0, match + 0, 8); + MOVQ (AX), BX + MOVQ BX, (DI) + // memcpy(op + 8, match + 8, 8); + MOVQ 8(AX), BX + MOVQ BX, 8(DI) + // memcpy(op +16, match +16, 2); + MOVW 16(AX), BX + MOVW BX, 16(DI) + + LEAQ 4(DI)(CX*1), DI // minmatch + + // shortcut complete, load next token + JMP loop + +lit_len_loop_pre: + // if lit_len > 0 + CMPQ CX, $0 + JEQ offset + CMPQ CX, $0xF + JNE copy_literal + +lit_len_loop: + // for src[si] == 0xFF + CMPB (SI), $0xFF + JNE lit_len_finalise + + // bounds check src[si+1] + LEAQ 1(SI), AX + CMPQ AX, R9 + JGT err_short_buf + + // lit_len += 0xFF + ADDQ $0xFF, CX + INCQ SI + JMP lit_len_loop + +lit_len_finalise: + // lit_len += int(src[si]) + // si++ + MOVBQZX (SI), AX + ADDQ AX, CX + INCQ SI + +copy_literal: + // bounds check src and dst + LEAQ (SI)(CX*1), AX + CMPQ AX, R9 + JGT err_short_buf + + LEAQ (DI)(CX*1), AX + CMPQ AX, R8 + JGT err_short_buf + + // whats a good cut off to call memmove? + CMPQ CX, $16 + JGT memmove_lit + + // if len(dst[di:]) < 16 + MOVQ R8, AX + SUBQ DI, AX + CMPQ AX, $16 + JLT memmove_lit + + // if len(src[si:]) < 16 + MOVQ R9, AX + SUBQ SI, AX + CMPQ AX, $16 + JLT memmove_lit + + MOVOU (SI), X0 + MOVOU X0, (DI) + + JMP finish_lit_copy + +memmove_lit: + // memmove(to, from, len) + MOVQ DI, 0(SP) + MOVQ SI, 8(SP) + MOVQ CX, 16(SP) + // spill + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) // need len to inc SI, DI after + MOVB DX, 48(SP) + CALL runtime·memmove(SB) + + // restore registers + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + MOVB 48(SP), DX + + // recalc initial values + MOVQ dst_base+0(FP), R8 + MOVQ R8, R11 + ADDQ dst_len+8(FP), R8 + MOVQ src_base+24(FP), R9 + ADDQ src_len+32(FP), R9 + MOVQ R8, R12 + SUBQ $32, R12 + MOVQ R9, R13 + SUBQ $16, R13 + +finish_lit_copy: + ADDQ CX, SI + ADDQ CX, DI + + CMPQ SI, R9 + JGE end + +offset: + // CX := mLen + // free up DX to use for offset + MOVQ DX, CX + + LEAQ 2(SI), AX + CMPQ AX, R9 + JGT err_short_buf + + // offset + // DX := int(src[si]) | int(src[si+1])<<8 + MOVWQZX (SI), DX + ADDQ $2, SI + + // 0 offset is invalid + CMPQ DX, $0 + JEQ err_corrupt + + ANDB $0xF, CX + +match_len_loop_pre: + // if mlen != 0xF + CMPB CX, $0xF + JNE copy_match + +match_len_loop: + // for src[si] == 0xFF + // lit_len += 0xFF + CMPB (SI), $0xFF + JNE match_len_finalise + + // bounds check src[si+1] + LEAQ 1(SI), AX + CMPQ AX, R9 + JGT err_short_buf + + ADDQ $0xFF, CX + INCQ SI + JMP match_len_loop + +match_len_finalise: + // lit_len += int(src[si]) + // si++ + MOVBQZX (SI), AX + ADDQ AX, CX + INCQ SI + +copy_match: + // mLen += minMatch + ADDQ $4, CX + + // check we have match_len bytes left in dst + // di+match_len < len(dst) + LEAQ (DI)(CX*1), AX + CMPQ AX, R8 + JGT err_short_buf + + // DX = offset + // CX = match_len + // BX = &dst + (di - offset) + MOVQ DI, BX + SUBQ DX, BX + + // check BX is within dst + // if BX < &dst + CMPQ BX, R11 + JLT err_short_buf + + // if offset + match_len < di + LEAQ (BX)(CX*1), AX + CMPQ DI, AX + JGT copy_interior_match + + // AX := len(dst[:di]) + // MOVQ DI, AX + // SUBQ R11, AX + + // copy 16 bytes at a time + // if di-offset < 16 copy 16-(di-offset) bytes to di + // then do the remaining + +copy_match_loop: + // for match_len >= 0 + // dst[di] = dst[i] + // di++ + // i++ + MOVB (BX), AX + MOVB AX, (DI) + INCQ DI + INCQ BX + DECQ CX + + CMPQ CX, $0 + JGT copy_match_loop + + JMP loop + +copy_interior_match: + CMPQ CX, $16 + JGT memmove_match + + // if len(dst[di:]) < 16 + MOVQ R8, AX + SUBQ DI, AX + CMPQ AX, $16 + JLT memmove_match + + MOVOU (BX), X0 + MOVOU X0, (DI) + + ADDQ CX, DI + JMP loop + +memmove_match: + // memmove(to, from, len) + MOVQ DI, 0(SP) + MOVQ BX, 8(SP) + MOVQ CX, 16(SP) + // spill + MOVQ DI, 24(SP) + MOVQ SI, 32(SP) + MOVQ CX, 40(SP) // need len to inc SI, DI after + CALL runtime·memmove(SB) + + // restore registers + MOVQ 24(SP), DI + MOVQ 32(SP), SI + MOVQ 40(SP), CX + + // recalc initial values + MOVQ dst_base+0(FP), R8 + MOVQ R8, R11 // TODO: make these sensible numbers + ADDQ dst_len+8(FP), R8 + MOVQ src_base+24(FP), R9 + ADDQ src_len+32(FP), R9 + MOVQ R8, R12 + SUBQ $32, R12 + MOVQ R9, R13 + SUBQ $16, R13 + + ADDQ CX, DI + JMP loop + +err_corrupt: + MOVQ $-1, ret+48(FP) + RET + +err_short_buf: + MOVQ $-2, ret+48(FP) + RET + +end: + SUBQ R11, DI + MOVQ DI, ret+48(FP) + RET diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s new file mode 100644 index 00000000000..64be9adcaa8 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_arm.s @@ -0,0 +1,197 @@ +// +build gc +// +build !noasm + +#include "textflag.h" + +// Register allocation. +#define dst R0 +#define dstorig R1 +#define src R2 +#define dstend R3 +#define srcend R4 +#define match R5 // Match address. +#define token R6 +#define len R7 // Literal and match lengths. +#define offset R6 // Match offset; overlaps with token. +#define tmp1 R8 +#define tmp2 R9 +#define tmp3 R12 + +#define minMatch $4 + +// func decodeBlock(dst, src []byte) int +TEXT ·decodeBlock(SB), NOFRAME|NOSPLIT, $-4-28 + MOVW dst_base +0(FP), dst + MOVW dst_len +4(FP), dstend + MOVW src_base+12(FP), src + MOVW src_len +16(FP), srcend + + CMP $0, srcend + BEQ shortSrc + + ADD dst, dstend + ADD src, srcend + + MOVW dst, dstorig + +loop: + // Read token. Extract literal length. + MOVBU.P 1(src), token + MOVW token >> 4, len + CMP $15, len + BNE readLitlenDone + +readLitlenLoop: + CMP src, srcend + BEQ shortSrc + MOVBU.P 1(src), tmp1 + ADD tmp1, len + CMP $255, tmp1 + BEQ readLitlenLoop + +readLitlenDone: + CMP $0, len + BEQ copyLiteralDone + + // Bounds check dst+len and src+len. + ADD dst, len, tmp1 + CMP dstend, tmp1 + //BHI shortDst // Uncomment for distinct error codes. + ADD src, len, tmp2 + CMP.LS srcend, tmp2 + BHI shortSrc + + // Copy literal. + CMP $4, len + BLO copyLiteralFinish + + // Copy 0-3 bytes until src is aligned. + TST $1, src + MOVBU.NE.P 1(src), tmp1 + MOVB.NE.P tmp1, 1(dst) + SUB.NE $1, len + + TST $2, src + MOVHU.NE.P 2(src), tmp2 + MOVB.NE.P tmp2, 1(dst) + MOVW.NE tmp2 >> 8, tmp1 + MOVB.NE.P tmp1, 1(dst) + SUB.NE $2, len + + B copyLiteralLoopCond + +copyLiteralLoop: + // Aligned load, unaligned write. + MOVW.P 4(src), tmp1 + MOVW tmp1 >> 8, tmp2 + MOVB tmp2, 1(dst) + MOVW tmp1 >> 16, tmp3 + MOVB tmp3, 2(dst) + MOVW tmp1 >> 24, tmp2 + MOVB tmp2, 3(dst) + MOVB.P tmp1, 4(dst) +copyLiteralLoopCond: + // Loop until len-4 < 0. + SUB.S $4, len + BPL copyLiteralLoop + + // Restore len, which is now negative. + ADD $4, len + +copyLiteralFinish: + // Copy remaining 0-3 bytes. + TST $2, len + MOVHU.NE.P 2(src), tmp2 + MOVB.NE.P tmp2, 1(dst) + MOVW.NE tmp2 >> 8, tmp1 + MOVB.NE.P tmp1, 1(dst) + TST $1, len + MOVBU.NE.P 1(src), tmp1 + MOVB.NE.P tmp1, 1(dst) + +copyLiteralDone: + CMP src, srcend + BEQ end + + // Initial part of match length. + // This frees up the token register for reuse as offset. + AND $15, token, len + + // Read offset. + ADD $2, src + CMP srcend, src + BHI shortSrc + MOVBU -2(src), offset + MOVBU -1(src), tmp1 + ORR tmp1 << 8, offset + CMP $0, offset + BEQ corrupt + + // Read rest of match length. + CMP $15, len + BNE readMatchlenDone + +readMatchlenLoop: + CMP src, srcend + BEQ shortSrc + MOVBU.P 1(src), tmp1 + ADD tmp1, len + CMP $255, tmp1 + BEQ readMatchlenLoop + +readMatchlenDone: + // Bounds check dst+len+minMatch and match = dst-offset. + ADD dst, len, tmp1 + ADD minMatch, tmp1 + CMP dstend, tmp1 + //BHI shortDst // Uncomment for distinct error codes. + SUB offset, dst, match + CMP.LS match, dstorig + BHI corrupt + + // Since len+minMatch is at least four, we can do a 4× unrolled + // byte copy loop. Using MOVW instead of four byte loads is faster, + // but to remain portable we'd have to align match first, which is + // too expensive. By alternating loads and stores, we also handle + // the case offset < 4. +copyMatch4: + SUB.S $4, len + MOVBU.P 4(match), tmp1 + MOVB.P tmp1, 4(dst) + MOVBU -3(match), tmp2 + MOVB tmp2, -3(dst) + MOVBU -2(match), tmp3 + MOVB tmp3, -2(dst) + MOVBU -1(match), tmp1 + MOVB tmp1, -1(dst) + BPL copyMatch4 + + // Restore len, which is now negative. + ADD.S $4, len + BEQ copyMatchDone + +copyMatch: + // Finish with a byte-at-a-time copy. + SUB.S $1, len + MOVBU.P 1(match), tmp2 + MOVB.P tmp2, 1(dst) + BNE copyMatch + +copyMatchDone: + CMP src, srcend + BNE loop + +end: + SUB dstorig, dst, tmp1 + MOVW tmp1, ret+24(FP) + RET + + // The three error cases have distinct labels so we can put different + // return codes here when debugging, or if the error returns need to + // be changed. +shortDst: +shortSrc: +corrupt: + MOVW $-1, tmp1 + MOVW tmp1, ret+24(FP) + RET diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go new file mode 100644 index 00000000000..e26f8cd613e --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_asm.go @@ -0,0 +1,9 @@ +// +build amd64 arm +// +build !appengine +// +build gc +// +build !noasm + +package lz4block + +//go:noescape +func decodeBlock(dst, src []byte) int diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go new file mode 100644 index 00000000000..52df2f2b8ec --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4block/decode_other.go @@ -0,0 +1,108 @@ +// +build !amd64,!arm appengine !gc noasm + +package lz4block + +import "encoding/binary" + +func decodeBlock(dst, src []byte) (ret int) { + // Restrict capacities so we don't read or write out of bounds. + dst = dst[:len(dst):len(dst)] + src = src[:len(src):len(src)] + + const hasError = -2 + defer func() { + if recover() != nil { + ret = hasError + } + }() + + var si, di uint + for { + // Literals and match lengths (token). + b := uint(src[si]) + si++ + + // Literals. + if lLen := b >> 4; lLen > 0 { + switch { + case lLen < 0xF && si+16 < uint(len(src)): + // Shortcut 1 + // if we have enough room in src and dst, and the literals length + // is small enough (0..14) then copy all 16 bytes, even if not all + // are part of the literals. + copy(dst[di:], src[si:si+16]) + si += lLen + di += lLen + if mLen := b & 0xF; mLen < 0xF { + // Shortcut 2 + // if the match length (4..18) fits within the literals, then copy + // all 18 bytes, even if not all are part of the literals. + mLen += 4 + if offset := u16(src[si:]); mLen <= offset { + i := di - offset + end := i + 18 + if end > uint(len(dst)) { + // The remaining buffer may not hold 18 bytes. + // See https://github.com/pierrec/lz4/issues/51. + end = uint(len(dst)) + } + copy(dst[di:], dst[i:end]) + si += 2 + di += mLen + continue + } + } + case lLen == 0xF: + for src[si] == 0xFF { + lLen += 0xFF + si++ + } + lLen += uint(src[si]) + si++ + fallthrough + default: + copy(dst[di:di+lLen], src[si:si+lLen]) + si += lLen + di += lLen + } + } + if si == uint(len(src)) { + return int(di) + } else if si > uint(len(src)) { + return hasError + } + + offset := u16(src[si:]) + if offset == 0 { + return hasError + } + si += 2 + + // Match. + mLen := b & 0xF + if mLen == 0xF { + for src[si] == 0xFF { + mLen += 0xFF + si++ + } + mLen += uint(src[si]) + si++ + } + mLen += minMatch + + // Copy the match. + expanded := dst[di-offset:] + if mLen > offset { + // Efficiently copy the match dst[di-offset:di] into the dst slice. + bytesToCopy := offset * (mLen / offset) + for n := offset; n <= bytesToCopy+offset; n *= 2 { + copy(expanded[n:], expanded[:n]) + } + di += bytesToCopy + mLen -= bytesToCopy + } + di += uint(copy(dst[di:di+mLen], expanded[:mLen])) + } +} + +func u16(p []byte) uint { return uint(binary.LittleEndian.Uint16(p)) } diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4errors/errors.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4errors/errors.go new file mode 100644 index 00000000000..710ea42812e --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4errors/errors.go @@ -0,0 +1,19 @@ +package lz4errors + +type Error string + +func (e Error) Error() string { return string(e) } + +const ( + ErrInvalidSourceShortBuffer Error = "lz4: invalid source or destination buffer too short" + ErrInvalidFrame Error = "lz4: bad magic number" + ErrInternalUnhandledState Error = "lz4: unhandled state" + ErrInvalidHeaderChecksum Error = "lz4: invalid header checksum" + ErrInvalidBlockChecksum Error = "lz4: invalid block checksum" + ErrInvalidFrameChecksum Error = "lz4: invalid frame checksum" + ErrOptionInvalidCompressionLevel Error = "lz4: invalid compression level" + ErrOptionClosedOrError Error = "lz4: cannot apply options on closed or in error object" + ErrOptionInvalidBlockSize Error = "lz4: invalid block size" + ErrOptionNotApplicable Error = "lz4: option not applicable" + ErrWriterNotClosed Error = "lz4: writer not closed" +) diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/block.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/block.go new file mode 100644 index 00000000000..c7b929fdfec --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/block.go @@ -0,0 +1,335 @@ +package lz4stream + +import ( + "encoding/binary" + "fmt" + "io" + "sync" + + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" + "github.com/pierrec/lz4/v4/internal/xxh32" +) + +type Blocks struct { + Block *FrameDataBlock + Blocks chan chan *FrameDataBlock + mu sync.Mutex + err error +} + +func (b *Blocks) initW(f *Frame, dst io.Writer, num int) { + if num == 1 { + b.Blocks = nil + b.Block = NewFrameDataBlock(f) + return + } + b.Block = nil + if cap(b.Blocks) != num { + b.Blocks = make(chan chan *FrameDataBlock, num) + } + // goroutine managing concurrent block compression goroutines. + go func() { + // Process next block compression item. + for c := range b.Blocks { + // Read the next compressed block result. + // Waiting here ensures that the blocks are output in the order they were sent. + // The incoming channel is always closed as it indicates to the caller that + // the block has been processed. + block := <-c + if block == nil { + // Notify the block compression routine that we are done with its result. + // This is used when a sentinel block is sent to terminate the compression. + close(c) + return + } + // Do not attempt to write the block upon any previous failure. + if b.err == nil { + // Write the block. + if err := block.Write(f, dst); err != nil { + // Keep the first error. + b.err = err + // All pending compression goroutines need to shut down, so we need to keep going. + } + } + close(c) + } + }() +} + +func (b *Blocks) close(f *Frame, num int) error { + if num == 1 { + if b.Block != nil { + b.Block.Close(f) + } + err := b.err + b.err = nil + return err + } + if b.Blocks == nil { + err := b.err + b.err = nil + return err + } + c := make(chan *FrameDataBlock) + b.Blocks <- c + c <- nil + <-c + err := b.err + b.err = nil + return err +} + +// ErrorR returns any error set while uncompressing a stream. +func (b *Blocks) ErrorR() error { + b.mu.Lock() + defer b.mu.Unlock() + return b.err +} + +// initR returns a channel that streams the uncompressed blocks if in concurrent +// mode and no error. When the channel is closed, check for any error with b.ErrorR. +// +// If not in concurrent mode, the uncompressed block is b.Block and the returned error +// needs to be checked. +func (b *Blocks) initR(f *Frame, num int, src io.Reader) (chan []byte, error) { + size := f.Descriptor.Flags.BlockSizeIndex() + if num == 1 { + b.Blocks = nil + b.Block = NewFrameDataBlock(f) + return nil, nil + } + b.Block = nil + blocks := make(chan chan []byte, num) + // data receives the uncompressed blocks. + data := make(chan []byte) + // Read blocks from the source sequentially + // and uncompress them concurrently. + + // In legacy mode, accrue the uncompress sizes in cum. + var cum uint32 + go func() { + var cumx uint32 + var err error + for b.ErrorR() == nil { + block := NewFrameDataBlock(f) + cumx, err = block.Read(f, src, 0) + if err != nil { + block.Close(f) + break + } + // Recheck for an error as reading may be slow and uncompressing is expensive. + if b.ErrorR() != nil { + block.Close(f) + break + } + c := make(chan []byte) + blocks <- c + go func() { + defer block.Close(f) + data, err := block.Uncompress(f, size.Get(), false) + if err != nil { + b.closeR(err) + } else { + c <- data + } + }() + } + // End the collection loop and the data channel. + c := make(chan []byte) + blocks <- c + c <- nil // signal the collection loop that we are done + <-c // wait for the collect loop to complete + if f.isLegacy() && cum == cumx { + err = io.EOF + } + b.closeR(err) + close(data) + }() + // Collect the uncompressed blocks and make them available + // on the returned channel. + go func(leg bool) { + defer close(blocks) + for c := range blocks { + buf := <-c + if buf == nil { + // Signal to end the loop. + close(c) + return + } + // Perform checksum now as the blocks are received in order. + if f.Descriptor.Flags.ContentChecksum() { + _, _ = f.checksum.Write(buf) + } + if leg { + cum += uint32(len(buf)) + } + data <- buf + close(c) + } + }(f.isLegacy()) + return data, nil +} + +// closeR safely sets the error on b if not already set. +func (b *Blocks) closeR(err error) { + b.mu.Lock() + if b.err == nil { + b.err = err + } + b.mu.Unlock() +} + +func NewFrameDataBlock(f *Frame) *FrameDataBlock { + buf := f.Descriptor.Flags.BlockSizeIndex().Get() + return &FrameDataBlock{Data: buf, data: buf} +} + +type FrameDataBlock struct { + Size DataBlockSize + Data []byte // compressed or uncompressed data (.data or .src) + Checksum uint32 + data []byte // buffer for compressed data + src []byte // uncompressed data + err error // used in concurrent mode +} + +func (b *FrameDataBlock) Close(f *Frame) { + b.Size = 0 + b.Checksum = 0 + b.err = nil + if b.data != nil { + // Block was not already closed. + lz4block.Put(b.data) + b.Data = nil + b.data = nil + b.src = nil + } +} + +// Block compression errors are ignored since the buffer is sized appropriately. +func (b *FrameDataBlock) Compress(f *Frame, src []byte, level lz4block.CompressionLevel) *FrameDataBlock { + data := b.data + if f.isLegacy() { + data = data[:cap(data)] + } else { + data = data[:len(src)] // trigger the incompressible flag in CompressBlock + } + var n int + switch level { + case lz4block.Fast: + n, _ = lz4block.CompressBlock(src, data) + default: + n, _ = lz4block.CompressBlockHC(src, data, level) + } + if n == 0 { + b.Size.UncompressedSet(true) + b.Data = src + } else { + b.Size.UncompressedSet(false) + b.Data = data[:n] + } + b.Size.sizeSet(len(b.Data)) + b.src = src // keep track of the source for content checksum + + if f.Descriptor.Flags.BlockChecksum() { + b.Checksum = xxh32.ChecksumZero(src) + } + return b +} + +func (b *FrameDataBlock) Write(f *Frame, dst io.Writer) error { + // Write is called in the same order as blocks are compressed, + // so content checksum must be done here. + if f.Descriptor.Flags.ContentChecksum() { + _, _ = f.checksum.Write(b.src) + } + buf := f.buf[:] + binary.LittleEndian.PutUint32(buf, uint32(b.Size)) + if _, err := dst.Write(buf[:4]); err != nil { + return err + } + + if _, err := dst.Write(b.Data); err != nil { + return err + } + + if b.Checksum == 0 { + return nil + } + binary.LittleEndian.PutUint32(buf, b.Checksum) + _, err := dst.Write(buf[:4]) + return err +} + +// Read updates b with the next block data, size and checksum if available. +func (b *FrameDataBlock) Read(f *Frame, src io.Reader, cum uint32) (uint32, error) { + x, err := f.readUint32(src) + if err != nil { + return 0, err + } + if f.isLegacy() { + switch x { + case frameMagicLegacy: + // Concatenated legacy frame. + return b.Read(f, src, cum) + case cum: + // Only works in non concurrent mode, for concurrent mode + // it is handled separately. + // Linux kernel format appends the total uncompressed size at the end. + return 0, io.EOF + } + } else if x == 0 { + // Marker for end of stream. + return 0, io.EOF + } + b.Size = DataBlockSize(x) + + size := b.Size.size() + if size > cap(b.data) { + return x, lz4errors.ErrOptionInvalidBlockSize + } + b.data = b.data[:size] + if _, err := io.ReadFull(src, b.data); err != nil { + return x, err + } + if f.Descriptor.Flags.BlockChecksum() { + sum, err := f.readUint32(src) + if err != nil { + return 0, err + } + b.Checksum = sum + } + return x, nil +} + +func (b *FrameDataBlock) Uncompress(f *Frame, dst []byte, sum bool) ([]byte, error) { + if b.Size.Uncompressed() { + n := copy(dst, b.data) + dst = dst[:n] + } else { + n, err := lz4block.UncompressBlock(b.data, dst) + if err != nil { + return nil, err + } + dst = dst[:n] + } + if f.Descriptor.Flags.BlockChecksum() { + if c := xxh32.ChecksumZero(dst); c != b.Checksum { + err := fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidBlockChecksum, c, b.Checksum) + return nil, err + } + } + if sum && f.Descriptor.Flags.ContentChecksum() { + _, _ = f.checksum.Write(dst) + } + return dst, nil +} + +func (f *Frame) readUint32(r io.Reader) (x uint32, err error) { + if _, err = io.ReadFull(r, f.buf[:4]); err != nil { + return + } + x = binary.LittleEndian.Uint32(f.buf[:4]) + return +} diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame.go new file mode 100644 index 00000000000..cfbd5674d9d --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame.go @@ -0,0 +1,200 @@ +// Package lz4stream provides the types that support reading and writing LZ4 data streams. +package lz4stream + +import ( + "encoding/binary" + "fmt" + "io" + "io/ioutil" + + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" + "github.com/pierrec/lz4/v4/internal/xxh32" +) + +//go:generate go run gen.go + +const ( + frameMagic uint32 = 0x184D2204 + frameSkipMagic uint32 = 0x184D2A50 + frameMagicLegacy uint32 = 0x184C2102 +) + +func NewFrame() *Frame { + return &Frame{} +} + +type Frame struct { + buf [15]byte // frame descriptor needs at most 4(magic)+4+8+1=11 bytes + Magic uint32 + Descriptor FrameDescriptor + Blocks Blocks + Checksum uint32 + checksum xxh32.XXHZero +} + +// Reset allows reusing the Frame. +// The Descriptor configuration is not modified. +func (f *Frame) Reset(num int) { + f.Magic = 0 + f.Descriptor.Checksum = 0 + f.Descriptor.ContentSize = 0 + _ = f.Blocks.close(f, num) + f.Checksum = 0 +} + +func (f *Frame) InitW(dst io.Writer, num int, legacy bool) { + if legacy { + f.Magic = frameMagicLegacy + idx := lz4block.Index(lz4block.Block8Mb) + f.Descriptor.Flags.BlockSizeIndexSet(idx) + } else { + f.Magic = frameMagic + f.Descriptor.initW() + } + f.Blocks.initW(f, dst, num) + f.checksum.Reset() +} + +func (f *Frame) CloseW(dst io.Writer, num int) error { + if err := f.Blocks.close(f, num); err != nil { + return err + } + if f.isLegacy() { + return nil + } + buf := f.buf[:0] + // End mark (data block size of uint32(0)). + buf = append(buf, 0, 0, 0, 0) + if f.Descriptor.Flags.ContentChecksum() { + buf = f.checksum.Sum(buf) + } + _, err := dst.Write(buf) + return err +} + +func (f *Frame) isLegacy() bool { + return f.Magic == frameMagicLegacy +} + +func (f *Frame) InitR(src io.Reader, num int) (chan []byte, error) { + if f.Magic > 0 { + // Header already read. + return nil, nil + } + +newFrame: + var err error + if f.Magic, err = f.readUint32(src); err != nil { + return nil, err + } + switch m := f.Magic; { + case m == frameMagic || m == frameMagicLegacy: + // All 16 values of frameSkipMagic are valid. + case m>>8 == frameSkipMagic>>8: + skip, err := f.readUint32(src) + if err != nil { + return nil, err + } + if _, err := io.CopyN(ioutil.Discard, src, int64(skip)); err != nil { + return nil, err + } + goto newFrame + default: + return nil, lz4errors.ErrInvalidFrame + } + if err := f.Descriptor.initR(f, src); err != nil { + return nil, err + } + f.checksum.Reset() + return f.Blocks.initR(f, num, src) +} + +func (f *Frame) CloseR(src io.Reader) (err error) { + if f.isLegacy() { + return nil + } + if !f.Descriptor.Flags.ContentChecksum() { + return nil + } + if f.Checksum, err = f.readUint32(src); err != nil { + return err + } + if c := f.checksum.Sum32(); c != f.Checksum { + return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidFrameChecksum, c, f.Checksum) + } + return nil +} + +type FrameDescriptor struct { + Flags DescriptorFlags + ContentSize uint64 + Checksum uint8 +} + +func (fd *FrameDescriptor) initW() { + fd.Flags.VersionSet(1) + fd.Flags.BlockIndependenceSet(true) +} + +func (fd *FrameDescriptor) Write(f *Frame, dst io.Writer) error { + if fd.Checksum > 0 { + // Header already written. + return nil + } + + buf := f.buf[:4] + // Write the magic number here even though it belongs to the Frame. + binary.LittleEndian.PutUint32(buf, f.Magic) + if !f.isLegacy() { + buf = buf[:4+2] + binary.LittleEndian.PutUint16(buf[4:], uint16(fd.Flags)) + + if fd.Flags.Size() { + buf = buf[:4+2+8] + binary.LittleEndian.PutUint64(buf[4+2:], fd.ContentSize) + } + fd.Checksum = descriptorChecksum(buf[4:]) + buf = append(buf, fd.Checksum) + } + + _, err := dst.Write(buf) + return err +} + +func (fd *FrameDescriptor) initR(f *Frame, src io.Reader) error { + if f.isLegacy() { + idx := lz4block.Index(lz4block.Block8Mb) + f.Descriptor.Flags.BlockSizeIndexSet(idx) + return nil + } + // Read the flags and the checksum, hoping that there is not content size. + buf := f.buf[:3] + if _, err := io.ReadFull(src, buf); err != nil { + return err + } + descr := binary.LittleEndian.Uint16(buf) + fd.Flags = DescriptorFlags(descr) + if fd.Flags.Size() { + // Append the 8 missing bytes. + buf = buf[:3+8] + if _, err := io.ReadFull(src, buf[3:]); err != nil { + return err + } + fd.ContentSize = binary.LittleEndian.Uint64(buf[2:]) + } + fd.Checksum = buf[len(buf)-1] // the checksum is the last byte + buf = buf[:len(buf)-1] // all descriptor fields except checksum + if c := descriptorChecksum(buf); fd.Checksum != c { + return fmt.Errorf("%w: got %x; expected %x", lz4errors.ErrInvalidHeaderChecksum, c, fd.Checksum) + } + // Validate the elements that can be. + if idx := fd.Flags.BlockSizeIndex(); !idx.IsValid() { + return lz4errors.ErrOptionInvalidBlockSize + } + return nil +} + +func descriptorChecksum(buf []byte) byte { + return byte(xxh32.ChecksumZero(buf) >> 8) +} diff --git a/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame_gen.go b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame_gen.go new file mode 100644 index 00000000000..d33a6be95c3 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/lz4stream/frame_gen.go @@ -0,0 +1,103 @@ +// Code generated by `gen.exe`. DO NOT EDIT. + +package lz4stream + +import "github.com/pierrec/lz4/v4/internal/lz4block" + +// DescriptorFlags is defined as follow: +// field bits +// ----- ---- +// _ 2 +// ContentChecksum 1 +// Size 1 +// BlockChecksum 1 +// BlockIndependence 1 +// Version 2 +// _ 4 +// BlockSizeIndex 3 +// _ 1 +type DescriptorFlags uint16 + +// Getters. +func (x DescriptorFlags) ContentChecksum() bool { return x>>2&1 != 0 } +func (x DescriptorFlags) Size() bool { return x>>3&1 != 0 } +func (x DescriptorFlags) BlockChecksum() bool { return x>>4&1 != 0 } +func (x DescriptorFlags) BlockIndependence() bool { return x>>5&1 != 0 } +func (x DescriptorFlags) Version() uint16 { return uint16(x >> 6 & 0x3) } +func (x DescriptorFlags) BlockSizeIndex() lz4block.BlockSizeIndex { + return lz4block.BlockSizeIndex(x >> 12 & 0x7) +} + +// Setters. +func (x *DescriptorFlags) ContentChecksumSet(v bool) *DescriptorFlags { + const b = 1 << 2 + if v { + *x = *x&^b | b + } else { + *x &^= b + } + return x +} +func (x *DescriptorFlags) SizeSet(v bool) *DescriptorFlags { + const b = 1 << 3 + if v { + *x = *x&^b | b + } else { + *x &^= b + } + return x +} +func (x *DescriptorFlags) BlockChecksumSet(v bool) *DescriptorFlags { + const b = 1 << 4 + if v { + *x = *x&^b | b + } else { + *x &^= b + } + return x +} +func (x *DescriptorFlags) BlockIndependenceSet(v bool) *DescriptorFlags { + const b = 1 << 5 + if v { + *x = *x&^b | b + } else { + *x &^= b + } + return x +} +func (x *DescriptorFlags) VersionSet(v uint16) *DescriptorFlags { + *x = *x&^(0x3<<6) | (DescriptorFlags(v) & 0x3 << 6) + return x +} +func (x *DescriptorFlags) BlockSizeIndexSet(v lz4block.BlockSizeIndex) *DescriptorFlags { + *x = *x&^(0x7<<12) | (DescriptorFlags(v) & 0x7 << 12) + return x +} + +// Code generated by `gen.exe`. DO NOT EDIT. + +// DataBlockSize is defined as follow: +// field bits +// ----- ---- +// size 31 +// Uncompressed 1 +type DataBlockSize uint32 + +// Getters. +func (x DataBlockSize) size() int { return int(x & 0x7FFFFFFF) } +func (x DataBlockSize) Uncompressed() bool { return x>>31&1 != 0 } + +// Setters. +func (x *DataBlockSize) sizeSet(v int) *DataBlockSize { + *x = *x&^0x7FFFFFFF | DataBlockSize(v)&0x7FFFFFFF + return x +} +func (x *DataBlockSize) UncompressedSet(v bool) *DataBlockSize { + const b = 1 << 31 + if v { + *x = *x&^b | b + } else { + *x &^= b + } + return x +} diff --git a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero.go b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero.go new file mode 100644 index 00000000000..8d3206a87c5 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero.go @@ -0,0 +1,212 @@ +// Package xxh32 implements the very fast XXH hashing algorithm (32 bits version). +// (https://github.com/Cyan4973/XXH/) +package xxh32 + +import ( + "encoding/binary" +) + +const ( + prime1 uint32 = 2654435761 + prime2 uint32 = 2246822519 + prime3 uint32 = 3266489917 + prime4 uint32 = 668265263 + prime5 uint32 = 374761393 + + primeMask = 0xFFFFFFFF + prime1plus2 = uint32((uint64(prime1) + uint64(prime2)) & primeMask) // 606290984 + prime1minus = uint32((-int64(prime1)) & primeMask) // 1640531535 +) + +// XXHZero represents an xxhash32 object with seed 0. +type XXHZero struct { + v [4]uint32 + totalLen uint64 + buf [16]byte + bufused int +} + +// Sum appends the current hash to b and returns the resulting slice. +// It does not change the underlying hash state. +func (xxh XXHZero) Sum(b []byte) []byte { + h32 := xxh.Sum32() + return append(b, byte(h32), byte(h32>>8), byte(h32>>16), byte(h32>>24)) +} + +// Reset resets the Hash to its initial state. +func (xxh *XXHZero) Reset() { + xxh.v[0] = prime1plus2 + xxh.v[1] = prime2 + xxh.v[2] = 0 + xxh.v[3] = prime1minus + xxh.totalLen = 0 + xxh.bufused = 0 +} + +// Size returns the number of bytes returned by Sum(). +func (xxh *XXHZero) Size() int { + return 4 +} + +// BlockSizeIndex gives the minimum number of bytes accepted by Write(). +func (xxh *XXHZero) BlockSize() int { + return 1 +} + +// Write adds input bytes to the Hash. +// It never returns an error. +func (xxh *XXHZero) Write(input []byte) (int, error) { + if xxh.totalLen == 0 { + xxh.Reset() + } + n := len(input) + m := xxh.bufused + + xxh.totalLen += uint64(n) + + r := len(xxh.buf) - m + if n < r { + copy(xxh.buf[m:], input) + xxh.bufused += len(input) + return n, nil + } + + var buf *[16]byte + if m != 0 { + // some data left from previous update + buf = &xxh.buf + c := copy(buf[m:], input) + n -= c + input = input[c:] + } + update(&xxh.v, buf, input) + xxh.bufused = copy(xxh.buf[:], input[n-n%16:]) + + return n, nil +} + +// Portable version of update. This updates v by processing all of buf +// (if not nil) and all full 16-byte blocks of input. +func updateGo(v *[4]uint32, buf *[16]byte, input []byte) { + // Causes compiler to work directly from registers instead of stack: + v1, v2, v3, v4 := v[0], v[1], v[2], v[3] + + if buf != nil { + v1 = rol13(v1+binary.LittleEndian.Uint32(buf[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(buf[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(buf[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(buf[12:])*prime2) * prime1 + } + + for ; len(input) >= 16; input = input[16:] { + sub := input[:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + v[0], v[1], v[2], v[3] = v1, v2, v3, v4 +} + +// Sum32 returns the 32 bits Hash value. +func (xxh *XXHZero) Sum32() uint32 { + h32 := uint32(xxh.totalLen) + if h32 >= 16 { + h32 += rol1(xxh.v[0]) + rol7(xxh.v[1]) + rol12(xxh.v[2]) + rol18(xxh.v[3]) + } else { + h32 += prime5 + } + + p := 0 + n := xxh.bufused + buf := xxh.buf + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(buf[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for ; p < n; p++ { + h32 += uint32(buf[p]) * prime5 + h32 = rol11(h32) * prime1 + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +// Portable version of ChecksumZero. +func checksumZeroGo(input []byte) uint32 { + n := len(input) + h32 := uint32(n) + + if n < 16 { + h32 += prime5 + } else { + v1 := prime1plus2 + v2 := prime2 + v3 := uint32(0) + v4 := prime1minus + p := 0 + for n := n - 16; p <= n; p += 16 { + sub := input[p:][:16] //BCE hint for compiler + v1 = rol13(v1+binary.LittleEndian.Uint32(sub[:])*prime2) * prime1 + v2 = rol13(v2+binary.LittleEndian.Uint32(sub[4:])*prime2) * prime1 + v3 = rol13(v3+binary.LittleEndian.Uint32(sub[8:])*prime2) * prime1 + v4 = rol13(v4+binary.LittleEndian.Uint32(sub[12:])*prime2) * prime1 + } + input = input[p:] + n -= p + h32 += rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) + } + + p := 0 + for n := n - 4; p <= n; p += 4 { + h32 += binary.LittleEndian.Uint32(input[p:p+4]) * prime3 + h32 = rol17(h32) * prime4 + } + for p < n { + h32 += uint32(input[p]) * prime5 + h32 = rol11(h32) * prime1 + p++ + } + + h32 ^= h32 >> 15 + h32 *= prime2 + h32 ^= h32 >> 13 + h32 *= prime3 + h32 ^= h32 >> 16 + + return h32 +} + +func rol1(u uint32) uint32 { + return u<<1 | u>>31 +} + +func rol7(u uint32) uint32 { + return u<<7 | u>>25 +} + +func rol11(u uint32) uint32 { + return u<<11 | u>>21 +} + +func rol12(u uint32) uint32 { + return u<<12 | u>>20 +} + +func rol13(u uint32) uint32 { + return u<<13 | u>>19 +} + +func rol17(u uint32) uint32 { + return u<<17 | u>>15 +} + +func rol18(u uint32) uint32 { + return u<<18 | u>>14 +} diff --git a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.go b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.go new file mode 100644 index 00000000000..0978b2665bd --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.go @@ -0,0 +1,11 @@ +// +build !noasm + +package xxh32 + +// ChecksumZero returns the 32-bit hash of input. +// +//go:noescape +func ChecksumZero(input []byte) uint32 + +//go:noescape +func update(v *[4]uint32, buf *[16]byte, input []byte) diff --git a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.s b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.s new file mode 100644 index 00000000000..0e9f146a36a --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_arm.s @@ -0,0 +1,259 @@ +// +build !noasm + +#include "textflag.h" + +#define prime1 $2654435761 +#define prime2 $2246822519 +#define prime3 $3266489917 +#define prime4 $668265263 +#define prime5 $374761393 + +#define prime1plus2 $606290984 +#define prime1minus $1640531535 + +// Register allocation. +#define p R0 +#define n R1 +#define h R2 +#define v1 R2 // Alias for h. +#define v2 R3 +#define v3 R4 +#define v4 R5 +#define x1 R6 +#define x2 R7 +#define x3 R8 +#define x4 R9 + +// We need the primes in registers. The 16-byte loop only uses prime{1,2}. +#define prime1r R11 +#define prime2r R12 +#define prime3r R3 // The rest can alias v{2-4}. +#define prime4r R4 +#define prime5r R5 + +// Update round macros. These read from and increment p. + +#define round16aligned \ + MOVM.IA.W (p), [x1, x2, x3, x4] \ + \ + MULA x1, prime2r, v1, v1 \ + MULA x2, prime2r, v2, v2 \ + MULA x3, prime2r, v3, v3 \ + MULA x4, prime2r, v4, v4 \ + \ + MOVW v1 @> 19, v1 \ + MOVW v2 @> 19, v2 \ + MOVW v3 @> 19, v3 \ + MOVW v4 @> 19, v4 \ + \ + MUL prime1r, v1 \ + MUL prime1r, v2 \ + MUL prime1r, v3 \ + MUL prime1r, v4 \ + +#define round16unaligned \ + MOVBU.P 16(p), x1 \ + MOVBU -15(p), x2 \ + ORR x2 << 8, x1 \ + MOVBU -14(p), x3 \ + MOVBU -13(p), x4 \ + ORR x4 << 8, x3 \ + ORR x3 << 16, x1 \ + \ + MULA x1, prime2r, v1, v1 \ + MOVW v1 @> 19, v1 \ + MUL prime1r, v1 \ + \ + MOVBU -12(p), x1 \ + MOVBU -11(p), x2 \ + ORR x2 << 8, x1 \ + MOVBU -10(p), x3 \ + MOVBU -9(p), x4 \ + ORR x4 << 8, x3 \ + ORR x3 << 16, x1 \ + \ + MULA x1, prime2r, v2, v2 \ + MOVW v2 @> 19, v2 \ + MUL prime1r, v2 \ + \ + MOVBU -8(p), x1 \ + MOVBU -7(p), x2 \ + ORR x2 << 8, x1 \ + MOVBU -6(p), x3 \ + MOVBU -5(p), x4 \ + ORR x4 << 8, x3 \ + ORR x3 << 16, x1 \ + \ + MULA x1, prime2r, v3, v3 \ + MOVW v3 @> 19, v3 \ + MUL prime1r, v3 \ + \ + MOVBU -4(p), x1 \ + MOVBU -3(p), x2 \ + ORR x2 << 8, x1 \ + MOVBU -2(p), x3 \ + MOVBU -1(p), x4 \ + ORR x4 << 8, x3 \ + ORR x3 << 16, x1 \ + \ + MULA x1, prime2r, v4, v4 \ + MOVW v4 @> 19, v4 \ + MUL prime1r, v4 \ + + +// func ChecksumZero([]byte) uint32 +TEXT ·ChecksumZero(SB), NOFRAME|NOSPLIT, $-4-16 + MOVW input_base+0(FP), p + MOVW input_len+4(FP), n + + MOVW prime1, prime1r + MOVW prime2, prime2r + + // Set up h for n < 16. It's tempting to say {ADD prime5, n, h} + // here, but that's a pseudo-op that generates a load through R11. + MOVW prime5, prime5r + ADD prime5r, n, h + CMP $0, n + BEQ end + + // We let n go negative so we can do comparisons with SUB.S + // instead of separate CMP. + SUB.S $16, n + BMI loop16done + + MOVW prime1plus2, v1 + MOVW prime2, v2 + MOVW $0, v3 + MOVW prime1minus, v4 + + TST $3, p + BNE loop16unaligned + +loop16aligned: + SUB.S $16, n + round16aligned + BPL loop16aligned + B loop16finish + +loop16unaligned: + SUB.S $16, n + round16unaligned + BPL loop16unaligned + +loop16finish: + MOVW v1 @> 31, h + ADD v2 @> 25, h + ADD v3 @> 20, h + ADD v4 @> 14, h + + // h += len(input) with v2 as temporary. + MOVW input_len+4(FP), v2 + ADD v2, h + +loop16done: + ADD $16, n // Restore number of bytes left. + + SUB.S $4, n + MOVW prime3, prime3r + BMI loop4done + MOVW prime4, prime4r + + TST $3, p + BNE loop4unaligned + +loop4aligned: + SUB.S $4, n + + MOVW.P 4(p), x1 + MULA prime3r, x1, h, h + MOVW h @> 15, h + MUL prime4r, h + + BPL loop4aligned + B loop4done + +loop4unaligned: + SUB.S $4, n + + MOVBU.P 4(p), x1 + MOVBU -3(p), x2 + ORR x2 << 8, x1 + MOVBU -2(p), x3 + ORR x3 << 16, x1 + MOVBU -1(p), x4 + ORR x4 << 24, x1 + + MULA prime3r, x1, h, h + MOVW h @> 15, h + MUL prime4r, h + + BPL loop4unaligned + +loop4done: + ADD.S $4, n // Restore number of bytes left. + BEQ end + + MOVW prime5, prime5r + +loop1: + SUB.S $1, n + + MOVBU.P 1(p), x1 + MULA prime5r, x1, h, h + MOVW h @> 21, h + MUL prime1r, h + + BNE loop1 + +end: + MOVW prime3, prime3r + EOR h >> 15, h + MUL prime2r, h + EOR h >> 13, h + MUL prime3r, h + EOR h >> 16, h + + MOVW h, ret+12(FP) + RET + + +// func update(v *[4]uint64, buf *[16]byte, p []byte) +TEXT ·update(SB), NOFRAME|NOSPLIT, $-4-20 + MOVW v+0(FP), p + MOVM.IA (p), [v1, v2, v3, v4] + + MOVW prime1, prime1r + MOVW prime2, prime2r + + // Process buf, if not nil. + MOVW buf+4(FP), p + CMP $0, p + BEQ noBuffered + + round16aligned + +noBuffered: + MOVW input_base +8(FP), p + MOVW input_len +12(FP), n + + SUB.S $16, n + BMI end + + TST $3, p + BNE loop16unaligned + +loop16aligned: + SUB.S $16, n + round16aligned + BPL loop16aligned + B end + +loop16unaligned: + SUB.S $16, n + round16unaligned + BPL loop16unaligned + +end: + MOVW v+0(FP), p + MOVM.IA [v1, v2, v3, v4], (p) + RET diff --git a/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go new file mode 100644 index 00000000000..c96b59b8c3f --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/internal/xxh32/xxh32zero_other.go @@ -0,0 +1,10 @@ +// +build !arm noasm + +package xxh32 + +// ChecksumZero returns the 32-bit hash of input. +func ChecksumZero(input []byte) uint32 { return checksumZeroGo(input) } + +func update(v *[4]uint32, buf *[16]byte, input []byte) { + updateGo(v, buf, input) +} diff --git a/vendor/github.com/pierrec/lz4/v4/lz4.go b/vendor/github.com/pierrec/lz4/v4/lz4.go new file mode 100644 index 00000000000..c585d4064f5 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/lz4.go @@ -0,0 +1,147 @@ +// Package lz4 implements reading and writing lz4 compressed data. +// +// The package supports both the LZ4 stream format, +// as specified in http://fastcompression.blogspot.fr/2013/04/lz4-streaming-format-final.html, +// and the LZ4 block format, defined at +// http://fastcompression.blogspot.fr/2011/05/lz4-explained.html. +// +// See https://github.com/lz4/lz4 for the reference C implementation. +package lz4 + +import ( + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" +) + +func _() { + // Safety checks for duplicated elements. + var x [1]struct{} + _ = x[lz4block.CompressionLevel(Fast)-lz4block.Fast] + _ = x[Block64Kb-BlockSize(lz4block.Block64Kb)] + _ = x[Block256Kb-BlockSize(lz4block.Block256Kb)] + _ = x[Block1Mb-BlockSize(lz4block.Block1Mb)] + _ = x[Block4Mb-BlockSize(lz4block.Block4Mb)] +} + +// CompressBlockBound returns the maximum size of a given buffer of size n, when not compressible. +func CompressBlockBound(n int) int { + return lz4block.CompressBlockBound(n) +} + +// UncompressBlock uncompresses the source buffer into the destination one, +// and returns the uncompressed size. +// +// The destination buffer must be sized appropriately. +// +// An error is returned if the source data is invalid or the destination buffer is too small. +func UncompressBlock(src, dst []byte) (int, error) { + return lz4block.UncompressBlock(src, dst) +} + +// A Compressor compresses data into the LZ4 block format. +// It uses a fast compression algorithm. +// +// A Compressor is not safe for concurrent use by multiple goroutines. +// +// Use a Writer to compress into the LZ4 stream format. +type Compressor struct{ c lz4block.Compressor } + +// CompressBlock compresses the source buffer src into the destination dst. +// +// If compression is successful, the first return value is the size of the +// compressed data, which is always >0. +// +// If dst has length at least CompressBlockBound(len(src)), compression always +// succeeds. Otherwise, the first return value is zero. The error return is +// non-nil if the compressed data does not fit in dst, but it might fit in a +// larger buffer that is still smaller than CompressBlockBound(len(src)). The +// return value (0, nil) means the data is likely incompressible and a buffer +// of length CompressBlockBound(len(src)) should be passed in. +func (c *Compressor) CompressBlock(src, dst []byte) (int, error) { + return c.c.CompressBlock(src, dst) +} + +// CompressBlock compresses the source buffer into the destination one. +// This is the fast version of LZ4 compression and also the default one. +// +// The argument hashTable is scratch space for a hash table used by the +// compressor. If provided, it should have length at least 1<<16. If it is +// shorter (or nil), CompressBlock allocates its own hash table. +// +// The size of the compressed data is returned. +// +// If the destination buffer size is lower than CompressBlockBound and +// the compressed size is 0 and no error, then the data is incompressible. +// +// An error is returned if the destination buffer is too small. + +// CompressBlock is equivalent to Compressor.CompressBlock. +// The final argument is ignored and should be set to nil. +// +// This function is deprecated. Use a Compressor instead. +func CompressBlock(src, dst []byte, _ []int) (int, error) { + return lz4block.CompressBlock(src, dst) +} + +// A CompressorHC compresses data into the LZ4 block format. +// Its compression ratio is potentially better than that of a Compressor, +// but it is also slower and requires more memory. +// +// A Compressor is not safe for concurrent use by multiple goroutines. +// +// Use a Writer to compress into the LZ4 stream format. +type CompressorHC struct { + // Level is the maximum search depth for compression. + // Values <= 0 mean no maximum. + Level CompressionLevel + c lz4block.CompressorHC +} + +// CompressBlock compresses the source buffer src into the destination dst. +// +// If compression is successful, the first return value is the size of the +// compressed data, which is always >0. +// +// If dst has length at least CompressBlockBound(len(src)), compression always +// succeeds. Otherwise, the first return value is zero. The error return is +// non-nil if the compressed data does not fit in dst, but it might fit in a +// larger buffer that is still smaller than CompressBlockBound(len(src)). The +// return value (0, nil) means the data is likely incompressible and a buffer +// of length CompressBlockBound(len(src)) should be passed in. +func (c *CompressorHC) CompressBlock(src, dst []byte) (int, error) { + return c.c.CompressBlock(src, dst, lz4block.CompressionLevel(c.Level)) +} + +// CompressBlockHC is equivalent to CompressorHC.CompressBlock. +// The final two arguments are ignored and should be set to nil. +// +// This function is deprecated. Use a CompressorHC instead. +func CompressBlockHC(src, dst []byte, depth CompressionLevel, _, _ []int) (int, error) { + return lz4block.CompressBlockHC(src, dst, lz4block.CompressionLevel(depth)) +} + +const ( + // ErrInvalidSourceShortBuffer is returned by UncompressBlock or CompressBLock when a compressed + // block is corrupted or the destination buffer is not large enough for the uncompressed data. + ErrInvalidSourceShortBuffer = lz4errors.ErrInvalidSourceShortBuffer + // ErrInvalidFrame is returned when reading an invalid LZ4 archive. + ErrInvalidFrame = lz4errors.ErrInvalidFrame + // ErrInternalUnhandledState is an internal error. + ErrInternalUnhandledState = lz4errors.ErrInternalUnhandledState + // ErrInvalidHeaderChecksum is returned when reading a frame. + ErrInvalidHeaderChecksum = lz4errors.ErrInvalidHeaderChecksum + // ErrInvalidBlockChecksum is returned when reading a frame. + ErrInvalidBlockChecksum = lz4errors.ErrInvalidBlockChecksum + // ErrInvalidFrameChecksum is returned when reading a frame. + ErrInvalidFrameChecksum = lz4errors.ErrInvalidFrameChecksum + // ErrOptionInvalidCompressionLevel is returned when the supplied compression level is invalid. + ErrOptionInvalidCompressionLevel = lz4errors.ErrOptionInvalidCompressionLevel + // ErrOptionClosedOrError is returned when an option is applied to a closed or in error object. + ErrOptionClosedOrError = lz4errors.ErrOptionClosedOrError + // ErrOptionInvalidBlockSize is returned when + ErrOptionInvalidBlockSize = lz4errors.ErrOptionInvalidBlockSize + // ErrOptionNotApplicable is returned when trying to apply an option to an object not supporting it. + ErrOptionNotApplicable = lz4errors.ErrOptionNotApplicable + // ErrWriterNotClosed is returned when attempting to reset an unclosed writer. + ErrWriterNotClosed = lz4errors.ErrWriterNotClosed +) diff --git a/vendor/github.com/pierrec/lz4/v4/options.go b/vendor/github.com/pierrec/lz4/v4/options.go new file mode 100644 index 00000000000..4e1b6703b57 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/options.go @@ -0,0 +1,213 @@ +package lz4 + +import ( + "fmt" + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" + "reflect" + "runtime" +) + +//go:generate go run golang.org/x/tools/cmd/stringer -type=BlockSize,CompressionLevel -output options_gen.go + +type ( + applier interface { + Apply(...Option) error + private() + } + // Option defines the parameters to setup an LZ4 Writer or Reader. + Option func(applier) error +) + +// String returns a string representation of the option with its parameter(s). +func (o Option) String() string { + return o(nil).Error() +} + +// Default options. +var ( + DefaultBlockSizeOption = BlockSizeOption(Block4Mb) + DefaultChecksumOption = ChecksumOption(true) + DefaultConcurrency = ConcurrencyOption(1) + defaultOnBlockDone = OnBlockDoneOption(nil) +) + +const ( + Block64Kb BlockSize = 1 << (16 + iota*2) + Block256Kb + Block1Mb + Block4Mb +) + +// BlockSizeIndex defines the size of the blocks to be compressed. +type BlockSize uint32 + +// BlockSizeOption defines the maximum size of compressed blocks (default=Block4Mb). +func BlockSizeOption(size BlockSize) Option { + return func(a applier) error { + switch w := a.(type) { + case nil: + s := fmt.Sprintf("BlockSizeOption(%s)", size) + return lz4errors.Error(s) + case *Writer: + size := uint32(size) + if !lz4block.IsValid(size) { + return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidBlockSize, size) + } + w.frame.Descriptor.Flags.BlockSizeIndexSet(lz4block.Index(size)) + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// BlockChecksumOption enables or disables block checksum (default=false). +func BlockChecksumOption(flag bool) Option { + return func(a applier) error { + switch w := a.(type) { + case nil: + s := fmt.Sprintf("BlockChecksumOption(%v)", flag) + return lz4errors.Error(s) + case *Writer: + w.frame.Descriptor.Flags.BlockChecksumSet(flag) + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// ChecksumOption enables/disables all blocks or content checksum (default=true). +func ChecksumOption(flag bool) Option { + return func(a applier) error { + switch w := a.(type) { + case nil: + s := fmt.Sprintf("ChecksumOption(%v)", flag) + return lz4errors.Error(s) + case *Writer: + w.frame.Descriptor.Flags.ContentChecksumSet(flag) + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// SizeOption sets the size of the original uncompressed data (default=0). It is useful to know the size of the +// whole uncompressed data stream. +func SizeOption(size uint64) Option { + return func(a applier) error { + switch w := a.(type) { + case nil: + s := fmt.Sprintf("SizeOption(%d)", size) + return lz4errors.Error(s) + case *Writer: + w.frame.Descriptor.Flags.SizeSet(size > 0) + w.frame.Descriptor.ContentSize = size + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// ConcurrencyOption sets the number of go routines used for compression. +// If n <= 0, then the output of runtime.GOMAXPROCS(0) is used. +func ConcurrencyOption(n int) Option { + if n <= 0 { + n = runtime.GOMAXPROCS(0) + } + return func(a applier) error { + switch rw := a.(type) { + case nil: + s := fmt.Sprintf("ConcurrencyOption(%d)", n) + return lz4errors.Error(s) + case *Writer: + rw.num = n + return nil + case *Reader: + rw.num = n + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// CompressionLevel defines the level of compression to use. The higher the better, but slower, compression. +type CompressionLevel uint32 + +const ( + Fast CompressionLevel = 0 + Level1 CompressionLevel = 1 << (8 + iota) + Level2 + Level3 + Level4 + Level5 + Level6 + Level7 + Level8 + Level9 +) + +// CompressionLevelOption defines the compression level (default=Fast). +func CompressionLevelOption(level CompressionLevel) Option { + return func(a applier) error { + switch w := a.(type) { + case nil: + s := fmt.Sprintf("CompressionLevelOption(%s)", level) + return lz4errors.Error(s) + case *Writer: + switch level { + case Fast, Level1, Level2, Level3, Level4, Level5, Level6, Level7, Level8, Level9: + default: + return fmt.Errorf("%w: %d", lz4errors.ErrOptionInvalidCompressionLevel, level) + } + w.level = lz4block.CompressionLevel(level) + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +func onBlockDone(int) {} + +// OnBlockDoneOption is triggered when a block has been processed. For a Writer, it is when is has been compressed, +// for a Reader, it is when it has been uncompressed. +func OnBlockDoneOption(handler func(size int)) Option { + if handler == nil { + handler = onBlockDone + } + return func(a applier) error { + switch rw := a.(type) { + case nil: + s := fmt.Sprintf("OnBlockDoneOption(%s)", reflect.TypeOf(handler).String()) + return lz4errors.Error(s) + case *Writer: + rw.handler = handler + return nil + case *Reader: + rw.handler = handler + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} + +// LegacyOption provides support for writing LZ4 frames in the legacy format. +// +// See https://github.com/lz4/lz4/blob/dev/doc/lz4_Frame_format.md#legacy-frame. +// +// NB. compressed Linux kernel images use a tweaked LZ4 legacy format where +// the compressed stream is followed by the original (uncompressed) size of +// the kernel (https://events.static.linuxfound.org/sites/events/files/lcjpcojp13_klee.pdf). +// This is also supported as a special case. +func LegacyOption(legacy bool) Option { + return func(a applier) error { + switch rw := a.(type) { + case nil: + s := fmt.Sprintf("LegacyOption(%v)", legacy) + return lz4errors.Error(s) + case *Writer: + rw.legacy = legacy + return nil + } + return lz4errors.ErrOptionNotApplicable + } +} diff --git a/vendor/github.com/pierrec/lz4/v4/options_gen.go b/vendor/github.com/pierrec/lz4/v4/options_gen.go new file mode 100644 index 00000000000..2de814909ef --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/options_gen.go @@ -0,0 +1,92 @@ +// Code generated by "stringer -type=BlockSize,CompressionLevel -output options_gen.go"; DO NOT EDIT. + +package lz4 + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Block64Kb-65536] + _ = x[Block256Kb-262144] + _ = x[Block1Mb-1048576] + _ = x[Block4Mb-4194304] +} + +const ( + _BlockSize_name_0 = "Block64Kb" + _BlockSize_name_1 = "Block256Kb" + _BlockSize_name_2 = "Block1Mb" + _BlockSize_name_3 = "Block4Mb" +) + +func (i BlockSize) String() string { + switch { + case i == 65536: + return _BlockSize_name_0 + case i == 262144: + return _BlockSize_name_1 + case i == 1048576: + return _BlockSize_name_2 + case i == 4194304: + return _BlockSize_name_3 + default: + return "BlockSize(" + strconv.FormatInt(int64(i), 10) + ")" + } +} +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[Fast-0] + _ = x[Level1-512] + _ = x[Level2-1024] + _ = x[Level3-2048] + _ = x[Level4-4096] + _ = x[Level5-8192] + _ = x[Level6-16384] + _ = x[Level7-32768] + _ = x[Level8-65536] + _ = x[Level9-131072] +} + +const ( + _CompressionLevel_name_0 = "Fast" + _CompressionLevel_name_1 = "Level1" + _CompressionLevel_name_2 = "Level2" + _CompressionLevel_name_3 = "Level3" + _CompressionLevel_name_4 = "Level4" + _CompressionLevel_name_5 = "Level5" + _CompressionLevel_name_6 = "Level6" + _CompressionLevel_name_7 = "Level7" + _CompressionLevel_name_8 = "Level8" + _CompressionLevel_name_9 = "Level9" +) + +func (i CompressionLevel) String() string { + switch { + case i == 0: + return _CompressionLevel_name_0 + case i == 512: + return _CompressionLevel_name_1 + case i == 1024: + return _CompressionLevel_name_2 + case i == 2048: + return _CompressionLevel_name_3 + case i == 4096: + return _CompressionLevel_name_4 + case i == 8192: + return _CompressionLevel_name_5 + case i == 16384: + return _CompressionLevel_name_6 + case i == 32768: + return _CompressionLevel_name_7 + case i == 65536: + return _CompressionLevel_name_8 + case i == 131072: + return _CompressionLevel_name_9 + default: + return "CompressionLevel(" + strconv.FormatInt(int64(i), 10) + ")" + } +} diff --git a/vendor/github.com/pierrec/lz4/v4/reader.go b/vendor/github.com/pierrec/lz4/v4/reader.go new file mode 100644 index 00000000000..403aaf697a3 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/reader.go @@ -0,0 +1,243 @@ +package lz4 + +import ( + "io" + + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" + "github.com/pierrec/lz4/v4/internal/lz4stream" +) + +var readerStates = []aState{ + noState: newState, + errorState: newState, + newState: readState, + readState: closedState, + closedState: newState, +} + +// NewReader returns a new LZ4 frame decoder. +func NewReader(r io.Reader) *Reader { + return newReader(r, false) +} + +func newReader(r io.Reader, legacy bool) *Reader { + zr := &Reader{frame: lz4stream.NewFrame()} + zr.state.init(readerStates) + _ = zr.Apply(DefaultConcurrency, defaultOnBlockDone) + zr.Reset(r) + return zr +} + +// Reader allows reading an LZ4 stream. +type Reader struct { + state _State + src io.Reader // source reader + num int // concurrency level + frame *lz4stream.Frame // frame being read + data []byte // block buffer allocated in non concurrent mode + reads chan []byte // pending data + idx int // size of pending data + handler func(int) + cum uint32 +} + +func (*Reader) private() {} + +func (r *Reader) Apply(options ...Option) (err error) { + defer r.state.check(&err) + switch r.state.state { + case newState: + case errorState: + return r.state.err + default: + return lz4errors.ErrOptionClosedOrError + } + for _, o := range options { + if err = o(r); err != nil { + return + } + } + return +} + +// Size returns the size of the underlying uncompressed data, if set in the stream. +func (r *Reader) Size() int { + switch r.state.state { + case readState, closedState: + if r.frame.Descriptor.Flags.Size() { + return int(r.frame.Descriptor.ContentSize) + } + } + return 0 +} + +func (r *Reader) isNotConcurrent() bool { + return r.num == 1 +} + +func (r *Reader) init() error { + data, err := r.frame.InitR(r.src, r.num) + if err != nil { + return err + } + r.reads = data + r.idx = 0 + size := r.frame.Descriptor.Flags.BlockSizeIndex() + r.data = size.Get() + r.cum = 0 + return nil +} + +func (r *Reader) Read(buf []byte) (n int, err error) { + defer r.state.check(&err) + switch r.state.state { + case readState: + case closedState, errorState: + return 0, r.state.err + case newState: + // First initialization. + if err = r.init(); r.state.next(err) { + return + } + default: + return 0, r.state.fail() + } + for len(buf) > 0 { + var bn int + if r.idx == 0 { + if r.isNotConcurrent() { + bn, err = r.read(buf) + } else { + lz4block.Put(r.data) + r.data = <-r.reads + if len(r.data) == 0 { + // No uncompressed data: something went wrong or we are done. + err = r.frame.Blocks.ErrorR() + } + } + switch err { + case nil: + case io.EOF: + if er := r.frame.CloseR(r.src); er != nil { + err = er + } + lz4block.Put(r.data) + r.data = nil + return + default: + return + } + } + if bn == 0 { + // Fill buf with buffered data. + bn = copy(buf, r.data[r.idx:]) + r.idx += bn + if r.idx == len(r.data) { + // All data read, get ready for the next Read. + r.idx = 0 + } + } + buf = buf[bn:] + n += bn + r.handler(bn) + } + return +} + +// read uncompresses the next block as follow: +// - if buf has enough room, the block is uncompressed into it directly +// and the lenght of used space is returned +// - else, the uncompress data is stored in r.data and 0 is returned +func (r *Reader) read(buf []byte) (int, error) { + block := r.frame.Blocks.Block + _, err := block.Read(r.frame, r.src, r.cum) + if err != nil { + return 0, err + } + var direct bool + dst := r.data[:cap(r.data)] + if len(buf) >= len(dst) { + // Uncompress directly into buf. + direct = true + dst = buf + } + dst, err = block.Uncompress(r.frame, dst, true) + if err != nil { + return 0, err + } + r.cum += uint32(len(dst)) + if direct { + return len(dst), nil + } + r.data = dst + return 0, nil +} + +// Reset clears the state of the Reader r such that it is equivalent to its +// initial state from NewReader, but instead writing to writer. +// No access to reader is performed. +// +// w.Close must be called before Reset. +func (r *Reader) Reset(reader io.Reader) { + if r.data != nil { + lz4block.Put(r.data) + r.data = nil + } + r.frame.Reset(r.num) + r.state.reset() + r.src = reader + r.reads = nil +} + +// WriteTo efficiently uncompresses the data from the Reader underlying source to w. +func (r *Reader) WriteTo(w io.Writer) (n int64, err error) { + switch r.state.state { + case closedState, errorState: + return 0, r.state.err + case newState: + if err = r.init(); r.state.next(err) { + return + } + default: + return 0, r.state.fail() + } + defer r.state.nextd(&err) + + var data []byte + if r.isNotConcurrent() { + size := r.frame.Descriptor.Flags.BlockSizeIndex() + data = size.Get() + defer lz4block.Put(data) + } + for { + var bn int + var dst []byte + if r.isNotConcurrent() { + bn, err = r.read(data) + dst = data[:bn] + } else { + lz4block.Put(dst) + dst = <-r.reads + bn = len(dst) + if bn == 0 { + // No uncompressed data: something went wrong or we are done. + err = r.frame.Blocks.ErrorR() + } + } + switch err { + case nil: + case io.EOF: + err = r.frame.CloseR(r.src) + return + default: + return + } + r.handler(bn) + bn, err = w.Write(dst) + n += int64(bn) + if err != nil { + return + } + } +} diff --git a/vendor/github.com/pierrec/lz4/v4/state.go b/vendor/github.com/pierrec/lz4/v4/state.go new file mode 100644 index 00000000000..d94f04d05eb --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/state.go @@ -0,0 +1,75 @@ +package lz4 + +import ( + "errors" + "fmt" + "io" + + "github.com/pierrec/lz4/v4/internal/lz4errors" +) + +//go:generate go run golang.org/x/tools/cmd/stringer -type=aState -output state_gen.go + +const ( + noState aState = iota // uninitialized reader + errorState // unrecoverable error encountered + newState // instantiated object + readState // reading data + writeState // writing data + closedState // all done +) + +type ( + aState uint8 + _State struct { + states []aState + state aState + err error + } +) + +func (s *_State) init(states []aState) { + s.states = states + s.state = states[0] +} + +func (s *_State) reset() { + s.state = s.states[0] + s.err = nil +} + +// next sets the state to the next one unless it is passed a non nil error. +// It returns whether or not it is in error. +func (s *_State) next(err error) bool { + if err != nil { + s.err = fmt.Errorf("%s: %w", s.state, err) + s.state = errorState + return true + } + s.state = s.states[s.state] + return false +} + +// nextd is like next but for defers. +func (s *_State) nextd(errp *error) bool { + return errp != nil && s.next(*errp) +} + +// check sets s in error if not already in error and if the error is not nil or io.EOF, +func (s *_State) check(errp *error) { + if s.state == errorState || errp == nil { + return + } + if err := *errp; err != nil { + s.err = fmt.Errorf("%w[%s]", err, s.state) + if !errors.Is(err, io.EOF) { + s.state = errorState + } + } +} + +func (s *_State) fail() error { + s.state = errorState + s.err = fmt.Errorf("%w[%s]", lz4errors.ErrInternalUnhandledState, s.state) + return s.err +} diff --git a/vendor/github.com/pierrec/lz4/v4/state_gen.go b/vendor/github.com/pierrec/lz4/v4/state_gen.go new file mode 100644 index 00000000000..75fb8289243 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/state_gen.go @@ -0,0 +1,28 @@ +// Code generated by "stringer -type=aState -output state_gen.go"; DO NOT EDIT. + +package lz4 + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[noState-0] + _ = x[errorState-1] + _ = x[newState-2] + _ = x[readState-3] + _ = x[writeState-4] + _ = x[closedState-5] +} + +const _aState_name = "noStateerrorStatenewStatereadStatewriteStateclosedState" + +var _aState_index = [...]uint8{0, 7, 17, 25, 34, 44, 55} + +func (i aState) String() string { + if i >= aState(len(_aState_index)-1) { + return "aState(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _aState_name[_aState_index[i]:_aState_index[i+1]] +} diff --git a/vendor/github.com/pierrec/lz4/v4/writer.go b/vendor/github.com/pierrec/lz4/v4/writer.go new file mode 100644 index 00000000000..44a43d251b0 --- /dev/null +++ b/vendor/github.com/pierrec/lz4/v4/writer.go @@ -0,0 +1,233 @@ +package lz4 + +import ( + "io" + + "github.com/pierrec/lz4/v4/internal/lz4block" + "github.com/pierrec/lz4/v4/internal/lz4errors" + "github.com/pierrec/lz4/v4/internal/lz4stream" +) + +var writerStates = []aState{ + noState: newState, + newState: writeState, + writeState: closedState, + closedState: newState, + errorState: newState, +} + +// NewWriter returns a new LZ4 frame encoder. +func NewWriter(w io.Writer) *Writer { + zw := &Writer{frame: lz4stream.NewFrame()} + zw.state.init(writerStates) + _ = zw.Apply(DefaultBlockSizeOption, DefaultChecksumOption, DefaultConcurrency, defaultOnBlockDone) + zw.Reset(w) + return zw +} + +// Writer allows writing an LZ4 stream. +type Writer struct { + state _State + src io.Writer // destination writer + level lz4block.CompressionLevel // how hard to try + num int // concurrency level + frame *lz4stream.Frame // frame being built + data []byte // pending data + idx int // size of pending data + handler func(int) + legacy bool +} + +func (*Writer) private() {} + +func (w *Writer) Apply(options ...Option) (err error) { + defer w.state.check(&err) + switch w.state.state { + case newState: + case errorState: + return w.state.err + default: + return lz4errors.ErrOptionClosedOrError + } + for _, o := range options { + if err = o(w); err != nil { + return + } + } + w.Reset(w.src) + return +} + +func (w *Writer) isNotConcurrent() bool { + return w.num == 1 +} + +// init sets up the Writer when in newState. It does not change the Writer state. +func (w *Writer) init() error { + w.frame.InitW(w.src, w.num, w.legacy) + if true || !w.isNotConcurrent() { + size := w.frame.Descriptor.Flags.BlockSizeIndex() + w.data = size.Get() + } + w.idx = 0 + return w.frame.Descriptor.Write(w.frame, w.src) +} + +func (w *Writer) Write(buf []byte) (n int, err error) { + defer w.state.check(&err) + switch w.state.state { + case writeState: + case closedState, errorState: + return 0, w.state.err + case newState: + if err = w.init(); w.state.next(err) { + return + } + default: + return 0, w.state.fail() + } + + zn := len(w.data) + for len(buf) > 0 { + if w.idx == 0 && len(buf) >= zn { + // Avoid a copy as there is enough data for a block. + if err = w.write(buf[:zn], false); err != nil { + return + } + n += zn + buf = buf[zn:] + continue + } + // Accumulate the data to be compressed. + m := copy(w.data[w.idx:], buf) + n += m + w.idx += m + buf = buf[m:] + + if w.idx < len(w.data) { + // Buffer not filled. + return + } + + // Buffer full. + if err = w.write(w.data, true); err != nil { + return + } + if !w.isNotConcurrent() { + size := w.frame.Descriptor.Flags.BlockSizeIndex() + w.data = size.Get() + } + w.idx = 0 + } + return +} + +func (w *Writer) write(data []byte, safe bool) error { + if w.isNotConcurrent() { + block := w.frame.Blocks.Block + err := block.Compress(w.frame, data, w.level).Write(w.frame, w.src) + w.handler(len(block.Data)) + return err + } + c := make(chan *lz4stream.FrameDataBlock) + w.frame.Blocks.Blocks <- c + go func(c chan *lz4stream.FrameDataBlock, data []byte, safe bool) { + b := lz4stream.NewFrameDataBlock(w.frame) + c <- b.Compress(w.frame, data, w.level) + <-c + w.handler(len(b.Data)) + b.Close(w.frame) + if safe { + // safe to put it back as the last usage of it was FrameDataBlock.Write() called before c is closed + lz4block.Put(data) + } + }(c, data, safe) + + return nil +} + +// Close closes the Writer, flushing any unwritten data to the underlying io.Writer, +// but does not close the underlying io.Writer. +func (w *Writer) Close() (err error) { + switch w.state.state { + case writeState: + case errorState: + return w.state.err + default: + return nil + } + defer w.state.nextd(&err) + if w.idx > 0 { + // Flush pending data, disable w.data freeing as it is done later on. + if err = w.write(w.data[:w.idx], false); err != nil { + return err + } + w.idx = 0 + } + err = w.frame.CloseW(w.src, w.num) + // It is now safe to free the buffer. + if w.data != nil { + lz4block.Put(w.data) + w.data = nil + } + return +} + +// Reset clears the state of the Writer w such that it is equivalent to its +// initial state from NewWriter, but instead writing to writer. +// Reset keeps the previous options unless overwritten by the supplied ones. +// No access to writer is performed. +// +// w.Close must be called before Reset or pending data may be dropped. +func (w *Writer) Reset(writer io.Writer) { + w.frame.Reset(w.num) + w.state.reset() + w.src = writer +} + +// ReadFrom efficiently reads from r and compressed into the Writer destination. +func (w *Writer) ReadFrom(r io.Reader) (n int64, err error) { + switch w.state.state { + case closedState, errorState: + return 0, w.state.err + case newState: + if err = w.init(); w.state.next(err) { + return + } + default: + return 0, w.state.fail() + } + defer w.state.check(&err) + + size := w.frame.Descriptor.Flags.BlockSizeIndex() + var done bool + var rn int + data := size.Get() + if w.isNotConcurrent() { + // Keep the same buffer for the whole process. + defer lz4block.Put(data) + } + for !done { + rn, err = io.ReadFull(r, data) + switch err { + case nil: + case io.EOF, io.ErrUnexpectedEOF: // read may be partial + done = true + default: + return + } + n += int64(rn) + err = w.write(data[:rn], true) + if err != nil { + return + } + w.handler(rn) + if !done && !w.isNotConcurrent() { + // The buffer will be returned automatically by go routines (safe=true) + // so get a new one fo the next round. + data = size.Get() + } + } + err = w.Close() + return +} diff --git a/vendor/modules.txt b/vendor/modules.txt index b68212c008f..050a1634946 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -397,6 +397,7 @@ github.com/golang/protobuf/ptypes/struct github.com/golang/protobuf/ptypes/timestamp github.com/golang/protobuf/ptypes/wrappers # github.com/golang/snappy v0.0.2 +## explicit github.com/golang/snappy # github.com/golangci/check v0.0.0-20180506172741-cfe4005ccda2 github.com/golangci/check/cmd/structcheck @@ -645,6 +646,15 @@ github.com/julienschmidt/httprouter # github.com/kisielk/gotool v1.0.0 github.com/kisielk/gotool github.com/kisielk/gotool/internal/load +# github.com/klauspost/compress v1.10.5 +## explicit +github.com/klauspost/compress/flate +github.com/klauspost/compress/fse +github.com/klauspost/compress/gzip +github.com/klauspost/compress/huff0 +github.com/klauspost/compress/snappy +github.com/klauspost/compress/zstd +github.com/klauspost/compress/zstd/internal/xxhash # github.com/klauspost/cpuid v1.3.1 github.com/klauspost/cpuid # github.com/konsorten/go-windows-terminal-sequences v1.0.3 @@ -770,6 +780,13 @@ github.com/pborman/uuid github.com/pelletier/go-toml # github.com/phayes/checkstyle v0.0.0-20170904204023-bfd46e6a821d github.com/phayes/checkstyle +# github.com/pierrec/lz4/v4 v4.1.3 +## explicit +github.com/pierrec/lz4/v4 +github.com/pierrec/lz4/v4/internal/lz4block +github.com/pierrec/lz4/v4/internal/lz4errors +github.com/pierrec/lz4/v4/internal/lz4stream +github.com/pierrec/lz4/v4/internal/xxh32 # github.com/pkg/errors v0.9.1 ## explicit github.com/pkg/errors