Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a format for object-size providing a wieighted random based on configured buckets #342

Merged
merged 2 commits into from
Oct 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,8 @@ By default warp uploads random data.

### Object Size

#### Fixed File Size

Most benchmarks use the `--obj.size` parameter to decide the size of objects to upload.

Different benchmark types will have different default values.
Expand Down Expand Up @@ -241,6 +243,20 @@ The average object size will be close to `--obj.size` multiplied by 0.179151.

To get a value for `--obj.size` multiply the desired average object size by 5.582 to get a maximum value.

#### Bucketed File Size

The `--obj.size` parameter accepts a string value whose format can describe size buckets.
The usage of that format activates the options of bucketed file sizes and preempts a possible activation
of random files sizes via `--obj.randsize`.

The format of the string is a coma-separated of colon-separated pairs, describing buckets and their respective weights.
Within each bucket, the size distribution is uniform.

E.g.: the value `4096:10740,8192:1685,16384:1623` will trigger objects whose size will be chosen
between 0 and 4096 with a weight of 10740, between 4096 and 8192 with a weight of 1685,
or between 8192 and 16384 with a weight of 1623.


## Automatic Termination
Adding `--autoterm` parameter will enable automatic termination when results are considered stable.
To detect a stable setup, warp continuously downsample the current data to
Expand Down
50 changes: 31 additions & 19 deletions cli/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ import (

"github.com/minio/cli"
"github.com/minio/warp/pkg/generator"

hist "github.com/jfsmig/prng/histogram"
)

var genFlags = []cli.Flag{
Expand Down Expand Up @@ -83,28 +85,38 @@ func newGenSource(ctx *cli.Context, sizeField string) func() generator.Source {
generator.WithCustomPrefix(ctx.String("prefix")),
generator.WithPrefixSize(prefixSize),
}
tokens := strings.Split(ctx.String(sizeField), ",")
switch len(tokens) {
case 1:
size, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid obj.size specified")
}
opts = append(opts, generator.WithSize(int64(size)))
case 2:
minSize, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid min obj.size specified")
if strings.IndexRune(ctx.String(sizeField), ':') > 0 {
if _, err := hist.ParseCSV(ctx.String(sizeField)); err != nil {
fatalIf(probe.NewError(err), "Invalid histogram format for the size parameter")
} else {
opts = append(opts, generator.WithSizeHistograms(ctx.String(sizeField)))
}
maxSize, err := toSize(tokens[1])
if err != nil {
fatalIf(probe.NewError(err), "Invalid max obj.size specified")
} else {
tokens := strings.Split(ctx.String(sizeField), ",")
switch len(tokens) {
case 1:
size, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid obj.size specified")
}
opts = append(opts, generator.WithSize(int64(size)))
case 2:
minSize, err := toSize(tokens[0])
if err != nil {
fatalIf(probe.NewError(err), "Invalid min obj.size specified")
}
maxSize, err := toSize(tokens[1])
if err != nil {
fatalIf(probe.NewError(err), "Invalid max obj.size specified")
}
opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize)))
default:
fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter")
}
opts = append(opts, generator.WithMinMaxSize(int64(minSize), int64(maxSize)))
default:
fatalIf(probe.NewError(fmt.Errorf("unexpected obj.size specified: %s", ctx.String(sizeField))), "Invalid obj.size parameter")

opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...)
}
opts = append([]generator.Option{g.Apply()}, append(opts, generator.WithRandomSize(ctx.Bool("obj.randsize")))...)

src, err := generator.NewFn(opts...)
fatalIf(probe.NewError(err), "Unable to create data generator")
return src
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ require (
github.com/dustin/go-humanize v1.0.1
github.com/fatih/color v1.17.0
github.com/influxdata/influxdb-client-go/v2 v2.13.0
github.com/jfsmig/prng v0.0.2
github.com/klauspost/compress v1.17.9
github.com/minio/cli v1.24.2
github.com/minio/madmin-go/v3 v3.0.51
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ github.com/influxdata/influxdb-client-go/v2 v2.13.0 h1:ioBbLmR5NMbAjP4UVA5r9b5xG
github.com/influxdata/influxdb-client-go/v2 v2.13.0/go.mod h1:k+spCbt9hcvqvUiz0sr5D8LolXHqAAOfPw9v/RIRHl4=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf h1:7JTmneyiNEwVBOHSjoMxiWAqB992atOeepeFYegn5RU=
github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf/go.mod h1:xaLFMmpvUxqXtVkUJfg9QmT88cDaCJ3ZKgdZ78oO8Qo=
github.com/jfsmig/prng v0.0.2 h1:aZun+YgmBnUyhqvI+EDjwmOYc1kCPsihdEr9V/1YlGA=
github.com/jfsmig/prng v0.0.2/go.mod h1:bz1fX1aizp8/Lu1thLzfirh5uExjC1lVwB8SSt6ExpE=
github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
Expand Down
29 changes: 25 additions & 4 deletions pkg/generator/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ package generator
import (
"errors"
"math/rand"

hist "github.com/jfsmig/prng/histogram"
)

// Options provides options.
Expand All @@ -33,6 +35,10 @@ type Options struct {
totalSize int64
randomPrefix int
randSize bool

// Activates the use of a distribution of sizes
flagSizesDistribution bool
sizesDistribution hist.Int64Distribution
}

// OptionApplier allows to abstract generator options.
Expand All @@ -42,6 +48,9 @@ type OptionApplier interface {

// getSize will return a size for an object.
func (o Options) getSize(rng *rand.Rand) int64 {
if o.flagSizesDistribution {
return o.sizesDistribution.Poll(rng)
}
if !o.randSize {
return o.totalSize
}
Expand All @@ -59,20 +68,32 @@ func defaultOptions() Options {
return o
}

func WithSizeHistograms(encoded string) Option {
return func(o *Options) error {
var err error
o.sizesDistribution, err = hist.ParseCSV(encoded)
if err != nil {
return err
}
o.flagSizesDistribution = true
return nil
}
}

// WithMinMaxSize sets the min and max size of the generated data.
func WithMinMaxSize(min, max int64) Option {
return func(o *Options) error {
if min <= 0 {
return errors.New("WithSize: minSize must be >= 0")
return errors.New("WithMinMaxSize: minSize must be >= 0")
}
if max < 0 {
return errors.New("WithSize: maxSize must be > 0")
return errors.New("WithMinMaxSize: maxSize must be > 0")
}
if min > max {
return errors.New("WithSize: minSize must be < maxSize")
return errors.New("WithMinMaxSize: minSize must be < maxSize")
}
if o.randSize && max < 256 {
return errors.New("WithSize: random sized objects should be at least 256 bytes")
return errors.New("WithMinMaxSize: random sized objects should be at least 256 bytes")
}

o.totalSize = max
Expand Down
Loading