From f7b4e4ef141b34572e547ad9f885382b3118a0e0 Mon Sep 17 00:00:00 2001 From: Chris LaPointe Date: Sat, 10 Sep 2022 16:23:31 -0400 Subject: [PATCH] Sorting (#79) Add display sorting across most aggregators to support text, numeric, contextual, date-parsing, and by aggregated value. This commit breaks backwards compatibility by replacing --sort-key and --reverse in many aggregators with simply --sort=name --- cmd/bargraph.go | 12 +-- cmd/heatmap.go | 16 ++- cmd/helpers/sorting.go | 95 ++++++++++++++++++ cmd/helpers/sorting_test.go | 93 +++++++++++++++++ cmd/histo.go | 36 +++---- cmd/tabulate.go | 44 ++++---- docs/usage/aggregators.md | 24 +++++ pkg/aggregation/counter.go | 51 ++-------- pkg/aggregation/counter_test.go | 7 +- pkg/aggregation/countersubkey.go | 23 ++--- pkg/aggregation/countersubkey_test.go | 10 +- pkg/aggregation/sorting/contextual.go | 105 ++++++++++++++++++++ pkg/aggregation/sorting/contextual_test.go | 28 ++++++ pkg/aggregation/sorting/dates.go | 39 ++++++++ pkg/aggregation/sorting/dates_test.go | 38 +++++++ pkg/aggregation/sorting/namevalue.go | 29 ++++++ pkg/aggregation/sorting/namevalue_test.go | 41 ++++++++ pkg/aggregation/sorting/sorter.go | 50 ++++++++++ pkg/aggregation/sorting/sorter_test.go | 73 ++++++++++++++ pkg/aggregation/sorting/strings.go | 22 ++++ pkg/aggregation/sorting/strings_test.go | 47 +++++++++ pkg/aggregation/table.go | 51 ++-------- pkg/aggregation/table_test.go | 13 +-- pkg/multiterm/termrenderers/heatmap.go | 7 +- pkg/multiterm/termrenderers/heatmap_test.go | 7 +- 25 files changed, 787 insertions(+), 174 deletions(-) create mode 100644 cmd/helpers/sorting.go create mode 100644 cmd/helpers/sorting_test.go create mode 100644 pkg/aggregation/sorting/contextual.go create mode 100644 pkg/aggregation/sorting/contextual_test.go create mode 100644 pkg/aggregation/sorting/dates.go create mode 100644 pkg/aggregation/sorting/dates_test.go create mode 100644 pkg/aggregation/sorting/namevalue.go create mode 100644 pkg/aggregation/sorting/namevalue_test.go create mode 100644 pkg/aggregation/sorting/sorter.go create mode 100644 pkg/aggregation/sorting/sorter_test.go create mode 100644 pkg/aggregation/sorting/strings.go create mode 100644 pkg/aggregation/sorting/strings_test.go diff --git a/cmd/bargraph.go b/cmd/bargraph.go index 4dd7936..6ad8866 100644 --- a/cmd/bargraph.go +++ b/cmd/bargraph.go @@ -16,8 +16,8 @@ go run . bars -sz -m "\[(.+?)\].*\" (\d+)" -e "{$ {buckettime {1} year nginx} {2 func bargraphFunction(c *cli.Context) error { var ( - stacked = c.Bool("stacked") - reverseSort = c.Bool("reverse") + stacked = c.Bool("stacked") + sortName = c.String(helpers.DefaultSortFlag.Name) ) counter := aggregation.NewSubKeyCounter() @@ -26,12 +26,13 @@ func bargraphFunction(c *cli.Context) error { batcher := helpers.BuildBatcherFromArguments(c) ext := helpers.BuildExtractorFromArguments(c, batcher) + sorter := helpers.BuildSorterOrFail(sortName) helpers.RunAggregationLoop(ext, counter, func() { line := 0 writer.SetKeys(counter.SubKeys()...) - for _, row := range counter.ItemsSorted(reverseSort) { + for _, row := range counter.ItemsSorted(sorter) { writer.WriteBar(line, row.Name, row.Item.Items()...) line++ } @@ -61,10 +62,7 @@ func bargraphCommand() *cli.Command { Aliases: []string{"s"}, Usage: "Display bargraph as stacked", }, - &cli.BoolFlag{ - Name: "reverse", - Usage: "Reverses the display sort-order", - }, + helpers.DefaultSortFlag, }, }) } diff --git a/cmd/heatmap.go b/cmd/heatmap.go index 68737ce..e070838 100644 --- a/cmd/heatmap.go +++ b/cmd/heatmap.go @@ -21,12 +21,16 @@ func heatmapFunction(c *cli.Context) error { minVal = c.Int64("min") maxFixed = c.IsSet("max") maxVal = c.Int64("max") + sortRows = c.String("sort-rows") + sortCols = c.String("sort-cols") ) counter := aggregation.NewTable(delim) batcher := helpers.BuildBatcherFromArguments(c) ext := helpers.BuildExtractorFromArguments(c, batcher) + rowSorter := helpers.BuildSorterOrFail(sortRows) + colSorter := helpers.BuildSorterOrFail(sortCols) writer := termrenderers.NewHeatmap(multiterm.New(), numRows, numCols) @@ -37,7 +41,7 @@ func heatmapFunction(c *cli.Context) error { } helpers.RunAggregationLoop(ext, counter, func() { - writer.WriteTable(counter) + writer.WriteTable(counter, rowSorter, colSorter) writer.WriteFooter(0, helpers.FWriteExtractorSummary(ext, counter.ParseErrors(), fmt.Sprintf("(R: %v; C: %v)", color.Wrapi(color.Yellow, counter.RowCount()), color.Wrapi(color.BrightBlue, counter.ColumnCount())))) writer.WriteFooter(1, batcher.StatusString()) @@ -80,6 +84,16 @@ func heatmapCommand() *cli.Command { Name: "max", Usage: "Sets the upper bounds of the heatmap (default: auto)", }, + &cli.StringFlag{ + Name: "sort-rows", + Usage: helpers.DefaultSortFlag.Usage, + Value: helpers.DefaultSortFlag.Value, + }, + &cli.StringFlag{ + Name: "sort-cols", + Usage: helpers.DefaultSortFlag.Usage, + Value: helpers.DefaultSortFlag.Value, + }, }, }) } diff --git a/cmd/helpers/sorting.go b/cmd/helpers/sorting.go new file mode 100644 index 0000000..fe32fcc --- /dev/null +++ b/cmd/helpers/sorting.go @@ -0,0 +1,95 @@ +package helpers + +import ( + "errors" + "fmt" + "rare/pkg/aggregation/sorting" + "rare/pkg/logger" + "rare/pkg/stringSplitter" + "strings" + + "github.com/urfave/cli/v2" +) + +var DefaultSortFlag = &cli.StringFlag{ + Name: "sort", + Usage: "Sorting method for display (value, text, numeric, contextual, date)", + Value: "numeric", +} + +// Create a sort flag with a different default value +func DefaultSortFlagWithDefault(dflt string) *cli.StringFlag { + if _, err := lookupSorter(dflt); err != nil { + panic(err) + } + + flag := *DefaultSortFlag + flag.Value = dflt + return &flag +} + +func BuildSorterOrFail(fullName string) sorting.NameValueSorter { + sorter, err := BuildSorter(fullName) + if err != nil { + logger.Fatal(err) + } + return sorter +} + +func BuildSorter(fullName string) (sorting.NameValueSorter, error) { + name, reverse, err := parseSort(fullName) + if err != nil { + return nil, fmt.Errorf("error parsing sort: %v", err) + } + + sorter, err := lookupSorter(name) + if err != nil { + return nil, fmt.Errorf("unknown sort: %s", name) + } + if reverse { + sorter = sorting.Reverse(sorter) + } + return sorter, nil +} + +func parseSort(name string) (realname string, reverse bool, err error) { + splitter := stringSplitter.Splitter{ + S: name, + Delim: ":", + } + + realname = strings.ToLower(splitter.Next()) + reverse = (realname == "value") // Value defaults descending + + if modifier, hasModifier := splitter.NextOk(); hasModifier { + switch strings.ToLower(modifier) { + case "rev", "reverse": + reverse = !reverse + case "desc": + reverse = true + case "asc": + reverse = false + default: + return "", false, errors.New("invalid sort modifier") + } + } + + return +} + +func lookupSorter(name string) (sorting.NameValueSorter, error) { + name = strings.ToLower(name) + switch name { + case "text", "": + return sorting.ValueNilSorter(sorting.ByName), nil + case "numeric": + return sorting.ValueNilSorter(sorting.ByNameSmart), nil + case "contextual", "context": + return sorting.ValueNilSorter(sorting.ByContextual()), nil + case "date": + return sorting.ValueNilSorter(sorting.ByDateWithContextual()), nil + case "value": + return sorting.ValueSorterEx(sorting.ByName), nil + } + return nil, errors.New("unknown sort") +} diff --git a/cmd/helpers/sorting_test.go b/cmd/helpers/sorting_test.go new file mode 100644 index 0000000..82d2743 --- /dev/null +++ b/cmd/helpers/sorting_test.go @@ -0,0 +1,93 @@ +package helpers + +import ( + "rare/pkg/aggregation/sorting" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestBuildSorter(t *testing.T) { + assert.NotNil(t, BuildSorterOrFail("text")) + assert.NotNil(t, BuildSorterOrFail("numeric")) + assert.NotNil(t, BuildSorterOrFail("contextual")) + assert.NotNil(t, BuildSorterOrFail("value")) + assert.NotNil(t, BuildSorterOrFail("value:reverse")) +} + +func TestOrderResults(t *testing.T) { + assertSortEquals(t, "text", 1, 4, 2, 0, 3) + assertSortEquals(t, "text:asc", 1, 4, 2, 0, 3) + assertSortEquals(t, "text:reverse", 3, 0, 2, 4, 1) + assertSortEquals(t, "text:desc", 3, 0, 2, 4, 1) + + assertSortEquals(t, "numeric", 1, 4, 2, 0, 3) + assertSortEquals(t, "numeric:asc", 1, 4, 2, 0, 3) + assertSortEquals(t, "numeric:reverse", 3, 0, 2, 4, 1) + assertSortEquals(t, "numeric:desc", 3, 0, 2, 4, 1) + + assertSortEquals(t, "value", 3, 2, 1, 0, 4) + assertSortEquals(t, "value:desc", 3, 2, 1, 0, 4) + assertSortEquals(t, "value:reverse", 4, 0, 1, 2, 3) + assertSortEquals(t, "value:asc", 4, 0, 1, 2, 3) +} + +func TestInvalidSortNames(t *testing.T) { + sorter, err := BuildSorter("bla") + assert.Nil(t, sorter) + assert.Error(t, err) + + sorter, err = BuildSorter("numeric:bla") + assert.Nil(t, sorter) + assert.Error(t, err) +} + +// Given a hardcoded set of values, and a sort name assert the order is as expected +func assertSortEquals(t *testing.T, sortName string, order ...int) { + sorter, err := BuildSorter(sortName) + assert.NoError(t, err) + + type orderedPair struct { + sorting.NameValuePair + id int + } + + vals := []orderedPair{ + {sorting.NameValuePair{Name: "qef", Value: 5}, 0}, + {sorting.NameValuePair{Name: "abc", Value: 12}, 1}, + {sorting.NameValuePair{Name: "egf", Value: 52}, 2}, + {sorting.NameValuePair{Name: "zac", Value: 52}, 3}, + {sorting.NameValuePair{Name: "bbb", Value: 3}, 4}, + } + + if len(order) != len(vals) { + panic("bad test") + } + + sorting.SortBy(vals, sorter, func(obj orderedPair) sorting.NameValuePair { + return obj.NameValuePair + }) + + for i := 0; i < len(vals); i++ { + assert.Equal(t, order[i], vals[i].id) + } + +} + +func TestDefaultSortResolves(t *testing.T) { + sortName, _, err := parseSort(DefaultSortFlag.Value) + assert.NoError(t, err) + + sorter, sorterErr := lookupSorter(sortName) + assert.NoError(t, sorterErr) + assert.NotNil(t, sorter) +} + +func TestBuildSortFlag(t *testing.T) { + flag := DefaultSortFlagWithDefault("contextual") + assert.Equal(t, "contextual", flag.Value) + + assert.Panics(t, func() { + DefaultSortFlagWithDefault("fake") + }) +} diff --git a/cmd/histo.go b/cmd/histo.go index c70d5a4..14fe6aa 100644 --- a/cmd/histo.go +++ b/cmd/histo.go @@ -5,6 +5,7 @@ import ( "os" "rare/cmd/helpers" "rare/pkg/aggregation" + "rare/pkg/aggregation/sorting" "rare/pkg/color" "rare/pkg/multiterm" "rare/pkg/multiterm/termrenderers" @@ -12,13 +13,8 @@ import ( "github.com/urfave/cli/v2" ) -func writeHistoOutput(writer *termrenderers.HistoWriter, counter *aggregation.MatchCounter, count int, reverse bool, sortByKey bool, atLeast int64) { - var items []aggregation.MatchPair - if sortByKey { - items = counter.ItemsSortedByKey(count, reverse) - } else { - items = counter.ItemsSorted(count, reverse) - } +func writeHistoOutput(writer *termrenderers.HistoWriter, counter *aggregation.MatchCounter, count int, sorter sorting.NameValueSorter, atLeast int64) { + items := counter.ItemsSortedBy(count, sorter) line := 0 writer.UpdateSamples(counter.Count()) for _, match := range items { @@ -32,12 +28,11 @@ func writeHistoOutput(writer *termrenderers.HistoWriter, counter *aggregation.Ma func histoFunction(c *cli.Context) error { var ( - topItems = c.Int("n") - reverseSort = c.Bool("reverse") - sortByKey = c.Bool("sk") - atLeast = c.Int64("atleast") - extra = c.Bool("extra") - all = c.Bool("all") + topItems = c.Int("n") + atLeast = c.Int64("atleast") + extra = c.Bool("extra") + all = c.Bool("all") + sortName = c.String(helpers.DefaultSortFlag.Name) ) counter := aggregation.NewCounter() @@ -47,6 +42,7 @@ func histoFunction(c *cli.Context) error { batcher := helpers.BuildBatcherFromArguments(c) ext := helpers.BuildExtractorFromArguments(c, batcher) + sorter := helpers.BuildSorterOrFail(sortName) progressString := func() string { return helpers.FWriteExtractorSummary(ext, @@ -55,7 +51,7 @@ func histoFunction(c *cli.Context) error { } helpers.RunAggregationLoop(ext, counter, func() { - writeHistoOutput(writer, counter, topItems, reverseSort, sortByKey, atLeast) + writeHistoOutput(writer, counter, topItems, sorter, atLeast) writer.WriteFooter(0, progressString()) writer.WriteFooter(1, batcher.StatusString()) }) @@ -66,7 +62,7 @@ func histoFunction(c *cli.Context) error { fmt.Println("Full Table:") vterm := multiterm.NewVirtualTerm() vWriter := termrenderers.NewHistogram(vterm, counter.GroupCount()) - writeHistoOutput(vWriter, counter, counter.GroupCount(), reverseSort, sortByKey, atLeast) + writeHistoOutput(vWriter, counter, counter.GroupCount(), sorter, atLeast) vterm.WriteToOutput(os.Stdout) fmt.Println(progressString()) @@ -121,15 +117,7 @@ func histogramCommand() *cli.Command { Usage: "Only show results if there are at least this many samples", Value: 0, }, - &cli.BoolFlag{ - Name: "reverse", - Usage: "Reverses the display sort-order", - }, - &cli.BoolFlag{ - Name: "sortkey", - Aliases: []string{"sk"}, - Usage: "Sort by key, rather than value", - }, + helpers.DefaultSortFlagWithDefault("value"), }, }) } diff --git a/cmd/tabulate.go b/cmd/tabulate.go index c37e947..76db110 100644 --- a/cmd/tabulate.go +++ b/cmd/tabulate.go @@ -22,12 +22,13 @@ func minColSlice(count int, cols []string) []string { func tabulateFunction(c *cli.Context) error { var ( - delim = c.String("delim") - numRows = c.Int("num") - numCols = c.Int("cols") - sortByKeys = c.Bool("sortkey") - rowtotals = c.Bool("rowtotal") || c.Bool("x") - coltotals = c.Bool("coltotal") || c.Bool("x") + delim = c.String("delim") + numRows = c.Int("num") + numCols = c.Int("cols") + rowtotals = c.Bool("rowtotal") || c.Bool("x") + coltotals = c.Bool("coltotal") || c.Bool("x") + sortRows = c.String("sort-rows") + sortCols = c.String("sort-cols") ) counter := aggregation.NewTable(delim) @@ -35,14 +36,11 @@ func tabulateFunction(c *cli.Context) error { batcher := helpers.BuildBatcherFromArguments(c) ext := helpers.BuildExtractorFromArguments(c, batcher) + rowSorter := helpers.BuildSorterOrFail(sortRows) + colSorter := helpers.BuildSorterOrFail(sortCols) helpers.RunAggregationLoop(ext, counter, func() { - var cols []string - if sortByKeys { - cols = counter.OrderedColumnsByName() - } else { - cols = counter.OrderedColumns() - } + cols := counter.OrderedColumns(colSorter) cols = minColSlice(numCols, cols) // Cap columns // Write header row @@ -58,12 +56,7 @@ func tabulateFunction(c *cli.Context) error { } // Write each row - var rows []*aggregation.TableRow - if sortByKeys { - rows = counter.OrderedRowsByName() - } else { - rows = counter.OrderedRows() - } + rows := counter.OrderedRows(rowSorter) line := 1 for i := 0; i < len(rows) && i < numRows; i++ { @@ -132,11 +125,6 @@ func tabulateCommand() *cli.Command { Usage: "Number of columns to display", Value: 10, }, - &cli.BoolFlag{ - Name: "sortkey", - Aliases: []string{"sk"}, - Usage: "Sort rows by key name rather than by values", - }, &cli.BoolFlag{ Name: "rowtotal", Usage: "Show row totals", @@ -150,6 +138,16 @@ func tabulateCommand() *cli.Command { Aliases: []string{"x"}, Usage: "Display row and column totals", }, + &cli.StringFlag{ + Name: "sort-rows", + Usage: helpers.DefaultSortFlag.Usage, + Value: "value", + }, + &cli.StringFlag{ + Name: "sort-cols", + Usage: helpers.DefaultSortFlag.Usage, + Value: "value", + }, }, }) } diff --git a/docs/usage/aggregators.md b/docs/usage/aggregators.md index e4a1c09..6a52cfe 100644 --- a/docs/usage/aggregators.md +++ b/docs/usage/aggregators.md @@ -210,3 +210,27 @@ Matched: 1,035,666 / 1,035,666 (R: 8; C: 61) ``` ![Gif of heatmap](../images/heatmap.gif) + +## Sorting + +Many of the aggregators support changing the order in which the data is displayed in. You +can change this from default either by setting the `--sort` flag or `--sort-rows` and `--sort-cols` +flags for tables. + +These are the supported sorters: + +* `text` -- Pure alphanumeric sort. Fastest, but can sort numbers oddly (eg. would sort 1, 11, 2, ...) +* `numeric` -- Attempts to parse the value as numeric. If unable to parse, falls back to alphanumeric (Default) +* `contextual` -- Tries to use context to be smart about sorting, eg if it sees a month or weekday name, will sort by that. Falls back to numeric +* `date` -- Parses the value as if it were a date. Falls back to contextual +* `value` -- Orders the results based on their aggregated *value*. eg. would put the most frequent item at the top. Defaults to descending order + +### Modifiers + +In addition to the sorting method, you can also modify the sort by adding a colon and the modifier, eg: `numeric:desc` + +These are the supported modifiers: + +* `:reverse` -- Reverse of the "default" +* `:asc` -- Ascending order +* `:desc` -- Descending order diff --git a/pkg/aggregation/counter.go b/pkg/aggregation/counter.go index d816cca..7f2d4b2 100644 --- a/pkg/aggregation/counter.go +++ b/pkg/aggregation/counter.go @@ -1,9 +1,9 @@ package aggregation import ( + "rare/pkg/aggregation/sorting" "rare/pkg/expressions" "rare/pkg/stringSplitter" - "sort" "strconv" ) @@ -90,54 +90,17 @@ func minSlice(items []MatchPair, count int) []MatchPair { return items[:count] } -func (s *MatchCounter) ItemsSorted(count int, reverse bool) []MatchPair { +func (s *MatchCounter) ItemsSortedBy(count int, sorter sorting.NameValueSorter) []MatchPair { items := s.Items() - - sorter := func(i, j int) bool { - c0 := items[i].Item.count - c1 := items[j].Item.count - if c0 == c1 { - return items[i].Name < items[j].Name + sorting.SortBy(items, sorter, func(obj MatchPair) sorting.NameValuePair { + return sorting.NameValuePair{ + Name: obj.Name, + Value: obj.Item.count, } - return c0 > c1 - } - - if reverse { - sort.Slice(items, func(i, j int) bool { - return !sorter(i, j) - }) - } else { - sort.Slice(items, sorter) - } + }) return minSlice(items, count) } -func (s *MatchCounter) ItemsSortedByKey(count int, reverse bool) []MatchPair { - items := s.Items() - - smartKeySort := func(i, j int) bool { - num0, err0 := strconv.ParseFloat(items[i].Name, 64) - num1, err1 := strconv.ParseFloat(items[j].Name, 64) - if err0 != nil || err1 != nil { - return items[i].Name < items[j].Name - } - return num0 < num1 - } - - if reverse { - sort.Slice(items, func(i, j int) bool { - return !smartKeySort(i, j) - }) - } else { - sort.Slice(items, smartKeySort) - } - return minSlice(items, count) -} - -func (s *MatchCounter) ItemsTop(count int) []MatchPair { - return s.ItemsSorted(count, false) -} - func (s *MatchItem) Count() int64 { return s.count } diff --git a/pkg/aggregation/counter_test.go b/pkg/aggregation/counter_test.go index 96e4983..14b6922 100644 --- a/pkg/aggregation/counter_test.go +++ b/pkg/aggregation/counter_test.go @@ -1,6 +1,7 @@ package aggregation import ( + "rare/pkg/aggregation/sorting" "testing" "github.com/stretchr/testify/assert" @@ -25,7 +26,7 @@ func TestInOrderItems(t *testing.T) { val.Sample("abc") val.Sample("qq") - items := val.ItemsTop(2) + items := val.ItemsSortedBy(2, sorting.NVValueSorter) assert.Equal(t, 2, len(items), "Expected top 2") assert.Equal(t, "abc", items[0].Name) @@ -43,7 +44,7 @@ func TestInOrderItemsByKey(t *testing.T) { val.Sample("qq\x002") val.Sample("qq\x00bad") - items := val.ItemsSortedByKey(3, false) + items := val.ItemsSortedBy(3, sorting.ValueNilSorter(sorting.ByName)) assert.Equal(t, 3, len(items)) assert.Equal(t, 3, val.GroupCount()) @@ -55,7 +56,7 @@ func TestInOrderItemsByKey(t *testing.T) { assert.Equal(t, int64(3), items[1].Item.Count()) assert.Equal(t, "test", items[2].Name) - reverseSort := val.ItemsSortedByKey(3, true) + reverseSort := val.ItemsSortedBy(3, sorting.Reverse(sorting.ValueNilSorter(sorting.ByName))) assert.Equal(t, 3, len(reverseSort)) assert.Equal(t, "test", reverseSort[0].Name) } diff --git a/pkg/aggregation/countersubkey.go b/pkg/aggregation/countersubkey.go index 739cf74..1f7b037 100644 --- a/pkg/aggregation/countersubkey.go +++ b/pkg/aggregation/countersubkey.go @@ -1,9 +1,9 @@ package aggregation import ( + "rare/pkg/aggregation/sorting" "rare/pkg/expressions" "rare/pkg/stringSplitter" - "sort" "strconv" ) @@ -138,21 +138,14 @@ func (s *SubKeyCounter) Items() []SubKeyNamedItem { return ret } -func (s *SubKeyCounter) ItemsSorted(reverse bool) []SubKeyNamedItem { +func (s *SubKeyCounter) ItemsSorted(sorter sorting.NameValueSorter) []SubKeyNamedItem { items := s.Items() - - sorter := func(i, j int) bool { - return items[i].Name < items[j].Name - } - - if reverse { - sort.Slice(items, func(i, j int) bool { - return !sorter(i, j) - }) - } else { - sort.Slice(items, sorter) - } - + sorting.SortBy(items, sorter, func(obj SubKeyNamedItem) sorting.NameValuePair { + return sorting.NameValuePair{ + Name: obj.Name, + Value: obj.Item.count, + } + }) return items } diff --git a/pkg/aggregation/countersubkey_test.go b/pkg/aggregation/countersubkey_test.go index 859ea3e..441b15d 100644 --- a/pkg/aggregation/countersubkey_test.go +++ b/pkg/aggregation/countersubkey_test.go @@ -2,6 +2,7 @@ package aggregation import ( "fmt" + "rare/pkg/aggregation/sorting" "testing" "github.com/stretchr/testify/assert" @@ -12,8 +13,7 @@ func TestSubKeyEmpty(t *testing.T) { assert.Equal(t, uint64(0), sk.ParseErrors()) assert.Len(t, sk.SubKeys(), 0) assert.Len(t, sk.Items(), 0) - assert.Len(t, sk.ItemsSorted(false), 0) - assert.Len(t, sk.ItemsSorted(true), 0) + assert.Len(t, sk.ItemsSorted(sorting.NVNameSorter), 0) } func TestSubKeyWithOnlyKeys(t *testing.T) { @@ -25,7 +25,7 @@ func TestSubKeyWithOnlyKeys(t *testing.T) { assert.Len(t, sk.SubKeys(), 1) assert.Len(t, sk.Items(), 2) - items := sk.ItemsSorted(false) + items := sk.ItemsSorted(sorting.NVNameSorter) assert.Equal(t, "test", items[0].Name) assert.Equal(t, int64(1), items[0].Item.Count()) assert.Equal(t, "test2", items[1].Name) @@ -42,7 +42,7 @@ func TestSubKeyWithSubKeys(t *testing.T) { assert.Len(t, sk.SubKeys(), 2) assert.Len(t, sk.Items(), 2) - items := sk.ItemsSorted(false) + items := sk.ItemsSorted(sorting.NVNameSorter) assert.Len(t, items[0].Item.Items(), 2) assert.Len(t, items[1].Item.Items(), 2) } @@ -54,7 +54,7 @@ func TestComplexSubKeys(t *testing.T) { sk.SampleValue("test", "100", 3) sk.SampleValue("test", "200", 1) - items := sk.ItemsSorted(false) + items := sk.ItemsSorted(sorting.NVNameSorter) assert.Equal(t, items[0].Name, "test") assert.Len(t, items[0].Item.Items(), 3) assert.Equal(t, items[0].Item.Items()[0], int64(3)) diff --git a/pkg/aggregation/sorting/contextual.go b/pkg/aggregation/sorting/contextual.go new file mode 100644 index 0000000..511ee7c --- /dev/null +++ b/pkg/aggregation/sorting/contextual.go @@ -0,0 +1,105 @@ +package sorting + +import "strings" + +type sortSet map[string]int + +// But what about localization?! +// Unfortunately (or fortunately) golang only can encode dates in english, which helps for the time being + +var weekdays = sortSet{ + "sunday": 0, + "monday": 1, + "tuesday": 2, + "wednesday": 3, + "thursday": 4, + "friday": 5, + "saturday": 6, + + "sun": 0, + "mon": 1, + "tue": 2, + "tues": 2, + "wed": 3, + "thu": 4, + "thur": 4, + "thurs": 4, + "fri": 5, + "sat": 6, +} + +var months = sortSet{ + "january": 0, + "jan": 0, + "february": 1, + "feb": 1, + "march": 2, + "mar": 2, + "april": 3, + "apr": 3, + "may": 4, + "june": 5, + "jun": 5, + "july": 6, + "jul": 6, + "august": 7, + "aug": 7, + "september": 8, + "sep": 8, + "sept": 8, + "october": 9, + "oct": 9, + "november": 10, + "nov": 10, + "december": 11, + "dec": 11, +} + +var sortSets = [...]sortSet{ + weekdays, + months, +} + +func ByContextualEx(fallbackSort NameSorter) NameSorter { + var set sortSet + fallback := false + + return func(a, b string) bool { + if !fallback && set == nil { + set = inferSortSetByValue(a) + if set == nil { + fallback = true + } + } + + // Try using the set + if !fallback { + lowerA := strings.ToLower(a) + lowerB := strings.ToLower(b) + v0, ok0 := set[lowerA] + v1, ok1 := set[lowerB] + if !ok0 || !ok1 { + fallback = true + } else { + return v0 < v1 + } + } + + // Fallback + return fallbackSort(a, b) + } +} + +func ByContextual() NameSorter { + return ByContextualEx(ByNameSmart) +} + +func inferSortSetByValue(val string) sortSet { + val = strings.ToLower(val) + for _, set := range sortSets { + if _, ok := set[val]; ok { + return set + } + } + return nil +} diff --git a/pkg/aggregation/sorting/contextual_test.go b/pkg/aggregation/sorting/contextual_test.go new file mode 100644 index 0000000..e51eb8e --- /dev/null +++ b/pkg/aggregation/sorting/contextual_test.go @@ -0,0 +1,28 @@ +package sorting + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDayOfWeekSort(t *testing.T) { + list := []string{ + "wed", + "tues", + "mon", + "thurs", + } + sorter := ByContextual() + Sort(list, sorter) + + assert.Equal(t, []string{"mon", "tues", "wed", "thurs"}, list) +} + +func TestFallbackSort(t *testing.T) { + list := []string{"wed", "abc", "00"} + sorter := ByContextual() + Sort(list, sorter) + + assert.Equal(t, []string{"00", "abc", "wed"}, list) +} diff --git a/pkg/aggregation/sorting/dates.go b/pkg/aggregation/sorting/dates.go new file mode 100644 index 0000000..554eeac --- /dev/null +++ b/pkg/aggregation/sorting/dates.go @@ -0,0 +1,39 @@ +package sorting + +import ( + "time" + + "github.com/araddon/dateparse" +) + +func ByDate(fallbackSort NameSorter) NameSorter { + format := "" + fallback := false + + return func(a, b string) bool { + if !fallback { + if format == "" { + var err error + if format, err = dateparse.ParseFormat(a); err != nil { + fallback = true + } + } + + if format != "" { + d0, err0 := time.Parse(format, a) + d1, err1 := time.Parse(format, b) + if err0 == nil && err1 == nil { + return d0.Before(d1) + } else { + fallback = true + } + } + } + + return fallbackSort(a, b) + } +} + +func ByDateWithContextual() NameSorter { + return ByDate(ByContextual()) +} diff --git a/pkg/aggregation/sorting/dates_test.go b/pkg/aggregation/sorting/dates_test.go new file mode 100644 index 0000000..8de9dc8 --- /dev/null +++ b/pkg/aggregation/sorting/dates_test.go @@ -0,0 +1,38 @@ +package sorting + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestDateSort(t *testing.T) { + vals := []string{"2022-09-03", "2022-09-02", "2021-09-01"} + + Sort(vals, ByDate(func(a, b string) bool { + panic("fail") + })) + + assert.Equal(t, []string{"2021-09-01", "2022-09-02", "2022-09-03"}, vals) +} + +func TestDateFallback(t *testing.T) { + vals := []string{"2022-09-03", "2022-09-02", "notadate", "2021-09-01"} + + fellback := false + Sort(vals, ByDate(func(a, b string) bool { + fellback = true + return ByName(a, b) + })) + + assert.True(t, fellback) + assert.Equal(t, []string{"2021-09-01", "2022-09-02", "2022-09-03", "notadate"}, vals) +} + +func TestByDateWithContextual(t *testing.T) { + vals := []string{"2022-09-03", "2022-09-02", "2021-09-01"} + + Sort(vals, ByDateWithContextual()) + + assert.Equal(t, []string{"2021-09-01", "2022-09-02", "2022-09-03"}, vals) +} diff --git a/pkg/aggregation/sorting/namevalue.go b/pkg/aggregation/sorting/namevalue.go new file mode 100644 index 0000000..ab1bc26 --- /dev/null +++ b/pkg/aggregation/sorting/namevalue.go @@ -0,0 +1,29 @@ +package sorting + +type NameValuePair struct { + Name string + Value int64 +} + +type NameValueSorter Sorter[NameValuePair] + +func ValueSorterEx(fallback NameSorter) NameValueSorter { + return func(a, b NameValuePair) bool { + if a.Value == b.Value { + return fallback(a.Name, b.Name) + } + return a.Value < b.Value + } +} + +func ValueNilSorter(sorter NameSorter) NameValueSorter { + return func(a, b NameValuePair) bool { + return sorter(a.Name, b.Name) + } +} + +var ( + NVValueSorter = Reverse(ValueSorterEx(Reverse(ByName))) + NVNameSorter = ValueNilSorter(ByName) + NVSmartSorter = ValueNilSorter(ByNameSmart) +) diff --git a/pkg/aggregation/sorting/namevalue_test.go b/pkg/aggregation/sorting/namevalue_test.go new file mode 100644 index 0000000..4dc092b --- /dev/null +++ b/pkg/aggregation/sorting/namevalue_test.go @@ -0,0 +1,41 @@ +package sorting + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNameValueSorter(t *testing.T) { + arr := []NameValuePair{ + {"b", 123}, + {"q", 44}, + {"a", 44}, + } + + Sort(arr, NVValueSorter) + + expected := []NameValuePair{ + {"b", 123}, + {"a", 44}, + {"q", 44}, + } + assert.Equal(t, expected, arr) +} + +func TestNameValueNilSorter(t *testing.T) { + arr := []NameValuePair{ + {"b", 123}, + {"q", 44}, + {"a", 44}, + } + + Sort(arr, ValueNilSorter(ByName)) + + expected := []NameValuePair{ + {"a", 44}, + {"b", 123}, + {"q", 44}, + } + assert.Equal(t, expected, arr) +} diff --git a/pkg/aggregation/sorting/sorter.go b/pkg/aggregation/sorting/sorter.go new file mode 100644 index 0000000..4e8dc10 --- /dev/null +++ b/pkg/aggregation/sorting/sorter.go @@ -0,0 +1,50 @@ +package sorting + +import "sort" + +type Sorter[T any] func(a, b T) bool + +// Wrapped sorter (so sort package doesn't have to reflect in sort.Slice) +// Slight performance increase + +type wrappedSorter[T any] struct { + arr []T + less func(a, b T) bool +} + +var _ sort.Interface = &wrappedSorter[string]{} + +func (s *wrappedSorter[T]) Len() int { + return len(s.arr) +} + +func (s *wrappedSorter[T]) Swap(i, j int) { + s.arr[i], s.arr[j] = s.arr[j], s.arr[i] +} + +func (s *wrappedSorter[T]) Less(i, j int) bool { + return s.less(s.arr[i], s.arr[j]) +} + +// Sorting helpers + +// Sort an array that can be sorted by Sorter +func Sort[TElem any, TSort ~func(a, b TElem) bool](arr []TElem, sorter TSort) { + ws := wrappedSorter[TElem]{arr, sorter} + sort.Sort(&ws) +} + +// Sort an array of elements, by a sub-element, than be sorted by T +func SortBy[TElem any, TBy any, TSort ~func(a, b TBy) bool](arr []TElem, sorter TSort, extractor func(obj TElem) TBy) { + ws := wrappedSorter[TElem]{arr, func(a, b TElem) bool { + return sorter(extractor(a), extractor(b)) + }} + sort.Sort(&ws) +} + +// Reverse a sorter (`not` the comparer) +func Reverse[TElem any, TSort ~func(a, b TElem) bool](sorter TSort) TSort { + return func(a, b TElem) bool { + return !sorter(a, b) + } +} diff --git a/pkg/aggregation/sorting/sorter_test.go b/pkg/aggregation/sorting/sorter_test.go new file mode 100644 index 0000000..5eb5fbf --- /dev/null +++ b/pkg/aggregation/sorting/sorter_test.go @@ -0,0 +1,73 @@ +package sorting + +import ( + "sort" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestWrappedSorter(t *testing.T) { + ws := wrappedSorter[int]{ + arr: []int{5, 2, 3, 1}, + less: func(a, b int) bool { return a < b }, + } + assert.Equal(t, 4, ws.Len()) + assert.True(t, ws.less(1, 2)) + ws.Swap(1, 2) + assert.Equal(t, []int{5, 3, 2, 1}, ws.arr) + + sort.Sort(&ws) + assert.Equal(t, []int{1, 2, 3, 5}, ws.arr) +} + +func TestSort(t *testing.T) { + arr := []int{5, 4, 1, 2, 3} + Sort(arr, func(a, b int) bool { return a < b }) + assert.Equal(t, []int{1, 2, 3, 4, 5}, arr) +} + +func TestReverseSort(t *testing.T) { + arr := []int{5, 4, 1, 2, 3} + Sort(arr, Reverse(func(a, b int) bool { return a < b })) + assert.Equal(t, []int{5, 4, 3, 2, 1}, arr) +} + +func TestSortBy(t *testing.T) { + type w struct { + a string + } + list := []w{ + {"b"}, + {"c"}, + {"d"}, + {"a"}, + {"b"}, + } + SortBy(list, ByName, func(obj w) string { return obj.a }) + + assert.Equal(t, []w{ + {"a"}, + {"b"}, + {"b"}, + {"c"}, + {"d"}, + }, list) +} + +// BenchmarkExtractSort-4 3838752 329.0 ns/op 64 B/op 2 allocs/op +func BenchmarkExtractSort(b *testing.B) { + type wrappedStruct struct { + s string + } + list := []wrappedStruct{ + {"b"}, + {"c"}, + {"d"}, + {"e"}, + {"f"}, + } + for i := 0; i < b.N; i++ { + SortBy(list, ByName, func(obj wrappedStruct) string { return obj.s }) + } +} diff --git a/pkg/aggregation/sorting/strings.go b/pkg/aggregation/sorting/strings.go new file mode 100644 index 0000000..471d369 --- /dev/null +++ b/pkg/aggregation/sorting/strings.go @@ -0,0 +1,22 @@ +package sorting + +import ( + "strconv" +) + +type NameSorter Sorter[string] + +// Sorters + +func ByName(a, b string) bool { + return a < b +} + +func ByNameSmart(a, b string) bool { + v0, err0 := strconv.ParseFloat(a, 64) + v1, err1 := strconv.ParseFloat(b, 64) + if err0 == nil && err1 == nil { + return v0 < v1 + } + return a < b +} diff --git a/pkg/aggregation/sorting/strings_test.go b/pkg/aggregation/sorting/strings_test.go new file mode 100644 index 0000000..99b7458 --- /dev/null +++ b/pkg/aggregation/sorting/strings_test.go @@ -0,0 +1,47 @@ +package sorting + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNameSort(t *testing.T) { + assert.True(t, ByName("a", "b")) +} + +func TestNameSmartSort(t *testing.T) { + assert.True(t, ByNameSmart("a", "b")) + assert.True(t, ByNameSmart("0.0", "1.0")) + assert.True(t, ByNameSmart("1", "b")) +} + +func TestSortStrings(t *testing.T) { + arr := []string{"b", "c", "a", "q"} + Sort(arr, ByNameSmart) + assert.Equal(t, []string{"a", "b", "c", "q"}, arr) +} + +func TestSortStringsBy(t *testing.T) { + type wrapper struct { + s string + } + arr := []wrapper{ + {"b"}, + {"a"}, + {"c"}, + } + SortBy(arr, ByName, func(w wrapper) string { return w.s }) + + assert.Equal(t, "a", arr[0].s) + assert.Equal(t, "b", arr[1].s) + assert.Equal(t, "c", arr[2].s) +} + +// wrapped BenchmarkStringSort-4 6859735 177.0 ns/op 32 B/op 1 allocs/op +func BenchmarkStringSort(b *testing.B) { + list := []string{"b", "c", "d", "e", "f"} + for i := 0; i < b.N; i++ { + Sort(list, ByName) + } +} diff --git a/pkg/aggregation/table.go b/pkg/aggregation/table.go index 1be0be8..0906edb 100644 --- a/pkg/aggregation/table.go +++ b/pkg/aggregation/table.go @@ -2,8 +2,8 @@ package aggregation import ( "math" + "rare/pkg/aggregation/sorting" "rare/pkg/stringSplitter" - "sort" "strconv" ) @@ -84,29 +84,14 @@ func (s *TableAggregator) Columns() []string { return keys } -// OrderedColumns returns columns ordered by the column's value first -func (s *TableAggregator) OrderedColumns() []string { +func (s *TableAggregator) OrderedColumns(sorter sorting.NameValueSorter) []string { keys := s.Columns() - - sort.Slice(keys, func(i, j int) bool { - c0 := s.cols[keys[i]] - c1 := s.cols[keys[j]] - if c0 == c1 { - return keys[i] < keys[j] + sorting.SortBy(keys, sorter, func(name string) sorting.NameValuePair { + return sorting.NameValuePair{ + Name: name, + Value: s.cols[name], } - return c0 > c1 }) - - return keys -} - -func (s *TableAggregator) OrderedColumnsByName() []string { - keys := s.Columns() - - sort.Slice(keys, func(i, j int) bool { - return keys[i] < keys[j] - }) - return keys } @@ -124,28 +109,14 @@ func (s *TableAggregator) Rows() []*TableRow { return rows } -// OrderedRows returns rows ordered first by the sum of the row, and then by name if equal -func (s *TableAggregator) OrderedRows() []*TableRow { +func (s *TableAggregator) OrderedRows(sorter sorting.NameValueSorter) []*TableRow { rows := s.Rows() - - sort.Slice(rows, func(i, j int) bool { - if rows[i].sum == rows[j].sum { - return rows[i].name < rows[j].name + sorting.SortBy(rows, sorter, func(obj *TableRow) sorting.NameValuePair { + return sorting.NameValuePair{ + Name: obj.name, + Value: obj.sum, } - return rows[i].sum > rows[j].sum }) - - return rows -} - -// OrderedRowsByName orders rows by name -func (s *TableAggregator) OrderedRowsByName() []*TableRow { - rows := s.Rows() - - sort.Slice(rows, func(i, j int) bool { - return rows[i].name < rows[j].name - }) - return rows } diff --git a/pkg/aggregation/table_test.go b/pkg/aggregation/table_test.go index 763a67e..f8710ea 100644 --- a/pkg/aggregation/table_test.go +++ b/pkg/aggregation/table_test.go @@ -1,6 +1,7 @@ package aggregation import ( + "rare/pkg/aggregation/sorting" "testing" "github.com/stretchr/testify/assert" @@ -15,9 +16,9 @@ func TestSimpleTable(t *testing.T) { table.Sample("b c") table.Sample("b b q") // invalid - assert.Equal(t, []string{"b", "a"}, table.OrderedColumns()) + assert.Equal(t, []string{"b", "a"}, table.OrderedColumns(sorting.NVValueSorter)) - rows := table.OrderedRows() + rows := table.OrderedRows(sorting.NVValueSorter) assert.Equal(t, 2, len(rows)) assert.Equal(t, "c", rows[0].Name()) assert.Equal(t, "b", rows[1].Name()) @@ -31,7 +32,7 @@ func TestSimpleTable(t *testing.T) { assert.Contains(t, table.Columns(), "a") assert.Contains(t, table.Columns(), "b") - assert.Equal(t, []string{"b", "a"}, table.OrderedColumns()) + assert.Equal(t, []string{"b", "a"}, table.OrderedColumns(sorting.NVValueSorter)) assert.Equal(t, uint64(1), table.ParseErrors()) // Col totals @@ -55,15 +56,15 @@ func TestTableMultiIncrement(t *testing.T) { table.Sample("b c -1") // Row names and col vals - rows := table.OrderedRowsByName() + rows := table.OrderedRows(sorting.NVNameSorter) assert.Equal(t, "b", rows[0].Name()) assert.Equal(t, int64(1), rows[0].Value("a")) assert.Equal(t, "c", rows[1].Name()) assert.Equal(t, int64(5), rows[1].Value("b")) // Column names - assert.Equal(t, []string{"a", "b"}, table.OrderedColumnsByName()) - assert.Equal(t, []string{"b", "a"}, table.OrderedColumns()) + assert.Equal(t, []string{"a", "b"}, table.OrderedColumns(sorting.NVNameSorter)) + assert.Equal(t, []string{"b", "a"}, table.OrderedColumns(sorting.NVValueSorter)) // Totals assert.Equal(t, int64(5), table.ColTotal("b")) diff --git a/pkg/multiterm/termrenderers/heatmap.go b/pkg/multiterm/termrenderers/heatmap.go index 9a92849..de4b8f3 100644 --- a/pkg/multiterm/termrenderers/heatmap.go +++ b/pkg/multiterm/termrenderers/heatmap.go @@ -2,6 +2,7 @@ package termrenderers import ( "rare/pkg/aggregation" + "rare/pkg/aggregation/sorting" "rare/pkg/color" "rare/pkg/humanize" "rare/pkg/multiterm" @@ -28,15 +29,15 @@ func NewHeatmap(term multiterm.MultilineTerm, rows, cols int) *Heatmap { } } -func (s *Heatmap) WriteTable(agg *aggregation.TableAggregator) { +func (s *Heatmap) WriteTable(agg *aggregation.TableAggregator, rowSorter, colSorter sorting.NameValueSorter) { s.UpdateMinMaxFromData(agg) // Write header - colNames := agg.OrderedColumnsByName() // TODO: Smart? eg. by number? + colNames := agg.OrderedColumns(colSorter) colCount := s.WriteHeader(colNames...) // Each row... - rows := agg.OrderedRowsByName() + rows := agg.OrderedRows(rowSorter) rowCount := mini(len(rows), s.rowCount) for i := 0; i < rowCount; i++ { s.WriteRow(i, rows[i], colNames[:colCount]) diff --git a/pkg/multiterm/termrenderers/heatmap_test.go b/pkg/multiterm/termrenderers/heatmap_test.go index b0f49da..155b49d 100644 --- a/pkg/multiterm/termrenderers/heatmap_test.go +++ b/pkg/multiterm/termrenderers/heatmap_test.go @@ -2,6 +2,7 @@ package termrenderers import ( "rare/pkg/aggregation" + "rare/pkg/aggregation/sorting" "rare/pkg/multiterm" "testing" @@ -16,7 +17,7 @@ func TestSimpleHeatmap(t *testing.T) { agg.Sample("test abc") hm.maxRowKeyWidth = 4 - hm.WriteTable(agg) + hm.WriteTable(agg, sorting.NVNameSorter, sorting.NVNameSorter) assert.Equal(t, 3, vt.LineCount()) assert.Equal(t, " - 1 - 1 - 1", vt.Get(0)) @@ -35,7 +36,7 @@ func TestUnicodeHeatmap(t *testing.T) { agg.Sample("qef test") hm.maxRowKeyWidth = 4 - hm.WriteTable(agg) + hm.WriteTable(agg, sorting.NVNameSorter, sorting.NVNameSorter) assert.Equal(t, 4, vt.LineCount()) assert.Equal(t, " - 0 - 0 9 1", vt.Get(0)) @@ -59,7 +60,7 @@ func TestCompressedHeatmap(t *testing.T) { agg.Sample("test abc4") hm.maxRowKeyWidth = 4 - hm.WriteTable(agg) + hm.WriteTable(agg, sorting.NVNameSorter, sorting.NVNameSorter) hm.WriteFooter(0, "footer") assert.Equal(t, 6, vt.LineCount())