Introduce and default to a second chunk format including chunkspan

By default all new chunks will be written in the new format. This means the second byte of the chunk specifies the chunkspan. We chose to use a lookup table because: - Using a log2 notation to define the chunk size would optimize for space, but it makes us have to deal with awkward numbers. - Writing down a static multiple of seconds would result in a relatively narrow range of chunk lengths that could be represented. Using the lookup table combines the advantages of the two above solutions while excluding the disadvantages.
grafana · Dec 14, 2016 · 0021004 · 0021004
1 parent 60bf930
commit 0021004
Show file tree

Hide file tree

Showing 12 changed files with 103 additions and 9 deletions.
diff --git a/docs/config.md b/docs/config.md
@@ -52,12 +52,13 @@ gc-interval = 1h
 # in clusters, best to assure the primary has saved all the data that a newly warmup instance will need to query, to prevent gaps in charts
 warm-up-period = 1h
 # settings for rollups (aggregation for archives)
-# comma-separated of archive specifications.
+# comma-separated list of archive specifications.
 # archive specification is of the form: aggSpan:chunkSpan:numChunks:TTL[:ready as bool. default true]
 # with these aggregation rules: 5min:1h:2:3mon,1h:6h:2:1y:false
 # 5 min of data, store in a chunk that lasts 1hour, keep 2 chunks in memory, keep for 3months in cassandra
 # 1hr worth of data, in chunks of 6 hours, 2 chunks in mem, keep for 1 year, but this series is not ready yet for querying.
 # When running a cluster of metrictank instances, all instances should have the same agg-settings.
+# chunk spans must be valid values as described here https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md
 agg-settings =
 ```
 

diff --git a/docs/consolidation.md b/docs/consolidation.md
@@ -107,6 +107,9 @@ should be able to transition from one band's maxT to the next minT
 must cleanly multiply between one another (why again?)
 try to minimize storage overhead of each band
 
+SPAN CHOICE
+As described in the page [Data knobs](https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md#valid-chunk-spans), only a finite set of values are valid chunk spans. This applies to rolled up chunks as well.
+
 RETENTION:
 should at the minimum be maxT otherwise what's the point
 shouldn't exceed the next band's minT, because that data wouldn't be used very much

diff --git a/docs/data-knobs.md b/docs/data-knobs.md
@@ -19,7 +19,7 @@ For more details, see the [go-tsz eval program](https://github.com/dgryski/go-ts
 
 ### Basic guideline
 
-`chunkspan` is how long of a timeframe should be covered by your chunks. E.g. you could store anywhere between 10 minutes to 24 hours worth of data in a chunk (chunks for each raw metric).
+`chunkspan` is how long of a timeframe should be covered by your chunks. E.g. you could store anywhere between 1 second to 24 hours worth of data in a chunk (chunks for each raw metric).
 `numchunks` is simply how many chunks should be retained in RAM. (for each raw metric)
 
 figuring out optimal configuration for the `chunkspan` and `numchunks` is not trivial.
@@ -44,7 +44,17 @@ Note:
 
 Several factors come into play that may affect the above recommendation:
 
+#### Valid chunk spans
+
+Chunkspans can be set to one of the following:
+```
+1sec, 5sec, 10sec, 15sec, 20sec, 30sec, 60sec, 90sec, 2min, 3min, 5min, 10min, 15min, 20min,
+30min, 45min, 1h, 90min, 2h, 150min, 3h, 4h, 5h, 6h, 7h, 8h, 9h, 10h, 12h, 15h, 18h, 24h
+```
+This list can be extended in the future.
+
 #### Rollups remove the need to keep large number of higher resolution chunks
+
 If you roll-up data for archival storage, those chunks will also be in memory as per your configuration.
 Querying for large timeframes may use the consolidated chunks in RAM, and keeping
 extra raw (or higher-resolution) data in RAM becomes pointless, putting an upper bound on how many chunks to keep.  

diff --git a/mdata/aggmetric.go b/mdata/aggmetric.go
@@ -356,6 +356,7 @@ func (a *AggMetric) persist(pos int) {
 		chunk:     chunk,
 		ttl:       a.ttl,
 		timestamp: time.Now(),
+		span:      a.ChunkSpan,
 	}
 
 	// if we recently became the primary, there may be older chunks
@@ -375,6 +376,7 @@ func (a *AggMetric) persist(pos int) {
 			chunk:     previousChunk,
 			ttl:       a.ttl,
 			timestamp: time.Now(),
+			span:      a.ChunkSpan,
 		})
 		previousPos--
 		if previousPos < 0 {

diff --git a/mdata/chunk/format.go b/mdata/chunk/format.go
@@ -10,4 +10,5 @@ type Format uint8
 // identifier of message format
 const (
 	FormatStandardGoTsz Format = iota
+	FormatStandardGoTszWithSpan
 )
diff --git a/mdata/chunk/spans.go b/mdata/chunk/spans.go
@@ -0,0 +1,49 @@
+package chunk
+
+type SpanCode uint8
+
+const min = 60
+const hour = 60 * min
+
+var ChunkSpans = [32]uint32{
+	1,
+	5,
+	10,
+	15,
+	20,
+	30,
+	60,        // 1m
+	90,        // 1.5m
+	2 * 60,    // 2m
+	3 * 60,    // 3m
+	5 * 60,    // 5m
+	10 * 60,   // 10m
+	15 * 60,   // 15m
+	20 * 60,   // 20m
+	30 * 60,   // 30m
+	45 * 60,   // 45m
+	3600,      // 1h
+	90 * 60,   // 1.5h
+	2 * 3600,  // 2h
+	150 * 60,  // 2.5h
+	3 * 3600,  // 3h
+	4 * 3600,  // 4h
+	5 * 3600,  // 5h
+	6 * 3600,  // 6h
+	7 * 3600,  // 7h
+	8 * 3600,  // 8h
+	9 * 3600,  // 9h
+	10 * 3600, // 10h
+	12 * 3600, // 12h
+	15 * 3600, // 15h
+	18 * 3600, // 18h
+	24 * 3600, // 24h
+}
+
+var RevChunkSpans = make(map[uint32]SpanCode, len(ChunkSpans))
+
+func init() {
+	for k, v := range ChunkSpans {
+		RevChunkSpans[v] = SpanCode(k)
+	}
+}
diff --git a/mdata/cwr.go b/mdata/cwr.go
@@ -20,4 +20,5 @@ type ChunkWriteRequest struct {
 	chunk     *chunk.Chunk
 	ttl       uint32
 	timestamp time.Time
+	span      uint32
 }
diff --git a/mdata/store_cassandra.go b/mdata/store_cassandra.go
@@ -204,9 +204,16 @@ func (c *cassandraStore) processWriteQueue(queue chan *ChunkWriteRequest, meter
 
 			data := cwr.chunk.Series.Bytes()
 			chunkSizeAtSave.Value(int64(len(data)))
-			version := chunk.FormatStandardGoTsz
+			version := chunk.FormatStandardGoTszWithSpan
 			buf := new(bytes.Buffer)
-			binary.Write(buf, binary.LittleEndian, uint8(version))
+			binary.Write(buf, binary.LittleEndian, version)
+
+			spanCode, ok := chunk.RevChunkSpans[cwr.span]
+			if !ok {
+				// it's probably better to panic than to persist the chunk with a wrong length
+				panic(fmt.Sprintf("Chunk span invalid: %d", cwr.span))
+			}
+			binary.Write(buf, binary.LittleEndian, spanCode)
 			buf.Write(data)
 			success := false
 			attempts := 0
@@ -359,11 +366,20 @@ func (c *cassandraStore) Search(key string, start, end uint32) ([]iter.Iter, err
 				log.Error(3, errChunkTooSmall.Error())
 				return iters, errChunkTooSmall
 			}
-			if chunk.Format(b[0]) != chunk.FormatStandardGoTsz {
+			switch chunk.Format(b[0]) {
+			case chunk.FormatStandardGoTsz:
+				b = b[1:]
+			case chunk.FormatStandardGoTszWithSpan:
+				if int(b[1]) >= len(chunk.ChunkSpans) {
+					log.Error(3, "corrupt data, chunk span code %d is not known", chunk.SpanCode(b[1]))
+				}
+				// getting the chunk span: _ = chunk.ChunkSpans[chunk.SpanCode(b[1])]
+				b = b[2:]
+			default:
 				log.Error(3, errUnknownChunkFormat.Error())
 				return iters, errUnknownChunkFormat
 			}
-			it, err := tsz.NewIterator(b[1:])
+			it, err := tsz.NewIterator(b)
 			if err != nil {
 				log.Error(3, "failed to unpack cassandra payload. %s", err)
 				return iters, err

diff --git a/metrictank-sample.ini b/metrictank-sample.ini
@@ -32,12 +32,13 @@ gc-interval = 1h
 warm-up-period = 1h
 
 # settings for rollups (aggregation for archives)
-# comma-separated of archive specifications.
+# comma-separated list of archive specifications.
 # archive specification is of the form: aggSpan:chunkSpan:numChunks:TTL[:ready as bool. default true]
 # with these aggregation rules: 5min:1h:2:3mon,1h:6h:2:1y:false
 # 5 min of data, store in a chunk that lasts 1hour, keep 2 chunks in memory, keep for 3months in cassandra
 # 1hr worth of data, in chunks of 6 hours, 2 chunks in mem, keep for 1 year, but this series is not ready yet for querying.
 # When running a cluster of metrictank instances, all instances should have the same agg-settings.
+# chunk spans must be valid values as described here https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md
 agg-settings =
 
 ## metric data storage in cassandra ##

diff --git a/metrictank.go b/metrictank.go
@@ -253,6 +253,10 @@ func main() {
 	if (mdata.Month_sec % chunkSpan) != 0 {
 		log.Fatal(4, "chunkSpan must fit without remainders into month_sec (28*24*60*60)")
 	}
+	_, ok := chunk.RevChunkSpans[chunkSpan]
+	if !ok {
+		log.Fatal(4, "chunkSpan %s is not a valid value (https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md#valid-chunk-spans)", *chunkSpanStr)
+	}
 
 	set := strings.Split(*aggSettings, ",")
 	finalSettings := make([]mdata.AggSetting, 0)
@@ -272,6 +276,10 @@ func main() {
 		if (mdata.Month_sec % aggChunkSpan) != 0 {
 			log.Fatal(4, "aggChunkSpan must fit without remainders into month_sec (28*24*60*60)")
 		}
+		_, ok := chunk.RevChunkSpans[aggChunkSpan]
+		if !ok {
+			log.Fatal(4, "aggChunkSpan %s is not a valid value (https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md#valid-chunk-spans)", fields[1])
+		}
 		highestChunkSpan = util.Max(highestChunkSpan, aggChunkSpan)
 		ready := true
 		if len(fields) == 5 {

diff --git a/scripts/config/metrictank-docker.ini b/scripts/config/metrictank-docker.ini
@@ -29,12 +29,13 @@ gc-interval = 1h
 warm-up-period = 1h
 
 # settings for rollups (aggregation for archives)
-# comma-separated of archive specifications.
+# comma-separated list of archive specifications.
 # archive specification is of the form: aggSpan:chunkSpan:numChunks:TTL[:ready as bool. default true]
 # with these aggregation rules: 5min:1h:2:3mon,1h:6h:2:1y:false
 # 5 min of data, store in a chunk that lasts 1hour, keep 2 chunks in memory, keep for 3months in cassandra
 # 1hr worth of data, in chunks of 6 hours, 2 chunks in mem, keep for 1 year, but this series is not ready yet for querying.
 # When running a cluster of metrictank instances, all instances should have the same agg-settings.
+# chunk spans must be valid values as described here https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md
 agg-settings =
 
 ## metric data storage in cassandra ##

diff --git a/scripts/config/metrictank-package.ini b/scripts/config/metrictank-package.ini
@@ -29,12 +29,13 @@ gc-interval = 1h
 warm-up-period = 1h
 
 # settings for rollups (aggregation for archives)
-# comma-separated of archive specifications.
+# comma-separated list of archive specifications.
 # archive specification is of the form: aggSpan:chunkSpan:numChunks:TTL[:ready as bool. default true]
 # with these aggregation rules: 5min:1h:2:3mon,1h:6h:2:1y:false
 # 5 min of data, store in a chunk that lasts 1hour, keep 2 chunks in memory, keep for 3months in cassandra
 # 1hr worth of data, in chunks of 6 hours, 2 chunks in mem, keep for 1 year, but this series is not ready yet for querying.
 # When running a cluster of metrictank instances, all instances should have the same agg-settings.
+# chunk spans must be valid values as described here https://github.com/raintank/metrictank/blob/master/docs/data-knobs.md
 agg-settings =
 
 ## metric data storage in cassandra ##