-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathsort.go
224 lines (198 loc) · 5.97 KB
/
sort.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
package csv
import (
"fmt"
"github.com/wildducktheories/go-csv/utils"
"sort"
)
// An adapter that converts a slice of CSV records into an instance of sort.Interface using the
// specified comparators, in order, to compare records.
type Sortable struct {
Keys []string
Data []Record
Comparators []SortComparator
}
// An implementation of sort.Interface.Len()
func (b *Sortable) Len() int {
return len(b.Data)
}
// An implementation of sort.Interface.Swap()
func (b *Sortable) Swap(i, j int) {
b.Data[i], b.Data[j] = b.Data[j], b.Data[i]
}
// An implementation of sort.Interface.Less()
func (b *Sortable) Less(i, j int) bool {
for _, c := range b.Comparators {
if c(i, j) {
return true
} else if c(j, i) {
return false
}
}
return false
}
// Derives a SortProcess from the receiver. Note that it isn't safe
// to run multiple processes derived from the same Sortable at the same
// time.
func (b *Sortable) AsSortProcess() *SortProcess {
return &SortProcess{
Keys: b.Keys,
AsSort: func(data []Record) sort.Interface {
b.Data = data
return b
},
}
}
// Answer a comparator for the field named k, using the string comparator specified by less.
func (b *Sortable) Comparator(k string, less StringComparator) SortComparator {
return func(i, j int) bool {
return less(b.Data[i].Get(k), b.Data[j].Get(k))
}
}
// Answers true if l is less than r, according to a lexical comparison
func LessStrings(l, r string) bool {
return l < r
}
// Answers true if the numeric value of l is less than r according to a numerical
// comparison (if l and r are both parseable as floats) or according to a lexical
// comparison otherwise.
func LessNumericStrings(l, r string) bool {
var lf, rf float64
if _, err := fmt.Sscanf(l, "%f", &lf); err != nil {
return LessStrings(l, r)
} else if _, err := fmt.Sscanf(r, "%f", &rf); err != nil {
return LessStrings(l, r)
} else {
return lf < rf
}
}
// Specifies the keys to be used by a CSV sort.
type SortKeys struct {
Keys []string // list of columns to use for sorting
Numeric []string // list of columns for which a numerical string comparison is used
Reversed []string // list of columns for which the comparison is reversed
}
// Answer a Sort for the specified slice of CSV records, using the comparators derived from the
// keys specified by the receiver.
func (p *SortKeys) AsSort(data []Record) sort.Interface {
return p.AsSortable(data)
}
// Answer a Sortable whose comparators have been initialized with string or numerical string
// comparators according the specification of the receiver.
func (p *SortKeys) AsSortable(data []Record) *Sortable {
bk := &Sortable{
Keys: p.Keys,
Data: data,
Comparators: make([]SortComparator, len(p.Keys), len(p.Keys)),
}
for x, c := range p.AsRecordComparators() {
c := c
bk.Comparators[x] = func(i, j int) bool {
return c(bk.Data[i], bk.Data[j])
}
}
return bk
}
// Derive a SortProcess from the receiver.
func (p *SortKeys) AsSortProcess() *SortProcess {
return &SortProcess{
AsSort: p.AsSort,
Keys: p.Keys,
}
}
// Derive a StringProjection from the sort keys.
func (p *SortKeys) AsStringProjection() StringProjection {
return func(r Record) []string {
result := make([]string, len(p.Keys))
for i, k := range p.Keys {
result[i] = r.Get(k)
}
return result
}
}
// Answers a comparator that can compare two slices.
func (p *SortKeys) AsStringSliceComparator() StringSliceComparator {
numeric := utils.NewIndex(p.Numeric)
reverseIndex := utils.NewIndex(p.Reversed)
comparators := make([]StringComparator, len(p.Keys))
for i, k := range p.Keys {
if numeric.Contains(k) {
comparators[i] = LessNumericStrings
} else {
comparators[i] = LessStrings
}
if reverseIndex.Contains(k) {
f := comparators[i]
comparators[i] = func(l, r string) bool {
return !f(l, r)
}
}
}
return AsStringSliceComparator(comparators)
}
// Answers a slice of comparators that can compare two records.
func (p *SortKeys) AsRecordComparators() []RecordComparator {
numeric := utils.NewIndex(p.Numeric)
reverseIndex := utils.NewIndex(p.Reversed)
comparators := make([]RecordComparator, len(p.Keys))
for i, k := range p.Keys {
k := k
if numeric.Contains(k) {
comparators[i] = func(l, r Record) bool {
return LessNumericStrings(l.Get(k), r.Get(k))
}
} else {
comparators[i] = func(l, r Record) bool {
return LessStrings(l.Get(k), r.Get(k))
}
}
if reverseIndex.Contains(k) {
f := comparators[i]
comparators[i] = func(l, r Record) bool {
return !f(l, r)
}
}
}
return comparators
}
// Answers a comparator that can compare two records.
func (p *SortKeys) AsRecordComparator() RecordComparator {
return AsRecordComparator(p.AsRecordComparators())
}
// A process, which given a CSV reader, sorts a stream of Records using the sort
// specified by the result of the AsSort function. The stream is checked to verify
// that it has the specified keys.
type SortProcess struct {
AsSort func(data []Record) sort.Interface
Keys []string
}
// Run the sort process specified by the receiver against the specified CSV reader,
// writing the results to a Writer constructed from the specified builder.
// Termination of the sort process is signalled by writing nil or at most one error
// into the specified error channel.
// It is an error to apply the receiving process to a reader whose Header is not
// a strict superset of the receiver's Keys.
func (p *SortProcess) Run(reader Reader, builder WriterBuilder, errCh chan<- error) {
errCh <- func() (err error) {
defer reader.Close()
keys := p.Keys
// get the data header
dataHeader := reader.Header()
writer := builder(dataHeader)
defer writer.Close(err)
_, x, _ := utils.Intersect(keys, dataHeader)
if len(x) != 0 {
return fmt.Errorf("invalid keys: %v", x)
}
if all, err := ReadAll(reader); err != nil {
return err
} else {
sort.Sort(p.AsSort(all))
for _, e := range all {
if err := writer.Write(e); err != nil {
return err
}
}
}
return nil
}()
}