Skip to content

Commit

Permalink
Add feature to specify few columns to consider for value hash
Browse files Browse the repository at this point in the history
  • Loading branch information
aswinkarthik committed Apr 16, 2018
1 parent 25df6d8 commit 7adecca
Show file tree
Hide file tree
Showing 7 changed files with 139 additions and 48 deletions.
34 changes: 22 additions & 12 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import (
"io"
"log"
"os"

"github.com/aswinkarthik93/csvdiff/pkg/digest"
)

var config Config
Expand All @@ -13,33 +15,41 @@ func init() {
}

type Config struct {
KeyPositions []int
Base string
Delta string
Additions string
Modifications string
PrimaryKeyPositions []int
ValueColumnPositions []int
Base string
Delta string
Additions string
Modifications string
}

func (c Config) GetKeyPositions() []int {
if len(c.KeyPositions) > 0 {
return c.KeyPositions
func (c *Config) GetPrimaryKeys() digest.Positions {
if len(c.PrimaryKeyPositions) > 0 {
return c.PrimaryKeyPositions
}
return []int{0}
}

func (c Config) GetBaseReader() io.Reader {
func (c *Config) GetValueColumns() digest.Positions {
if len(c.ValueColumnPositions) > 0 {
return c.ValueColumnPositions
}
return []int{}
}

func (c *Config) GetBaseReader() io.Reader {
return getReader(c.Base)
}

func (c Config) GetDeltaReader() io.Reader {
func (c *Config) GetDeltaReader() io.Reader {
return getReader(c.Delta)
}

func (c Config) AdditionsWriter() io.WriteCloser {
func (c *Config) AdditionsWriter() io.WriteCloser {
return getWriter(c.Additions)
}

func (c Config) ModificationsWriter() io.WriteCloser {
func (c *Config) ModificationsWriter() io.WriteCloser {
return getWriter(c.Modifications)
}

Expand Down
25 changes: 20 additions & 5 deletions cmd/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,28 @@ package cmd
import (
"testing"

"github.com/aswinkarthik93/csvdiff/pkg/digest"
"github.com/stretchr/testify/assert"
)

func TestGetKeyPositions(t *testing.T) {
config := Config{KeyPositions: []int{0, 1}}
assert.Equal(t, []int{0, 1}, config.GetKeyPositions())
func TestPrimaryKeyPositions(t *testing.T) {
config := Config{PrimaryKeyPositions: []int{0, 1}}
assert.Equal(t, digest.Positions([]int{0, 1}), config.GetPrimaryKeys())

config = Config{KeyPositions: []int{}}
assert.Equal(t, []int{0}, config.GetKeyPositions())
config = Config{PrimaryKeyPositions: []int{}}
assert.Equal(t, digest.Positions([]int{0}), config.GetPrimaryKeys())

config = Config{}
assert.Equal(t, digest.Positions([]int{0}), config.GetPrimaryKeys())
}

func TestValueColumnPositions(t *testing.T) {
config := Config{ValueColumnPositions: []int{0, 1}}
assert.Equal(t, digest.Positions([]int{0, 1}), config.GetValueColumns())

config = Config{ValueColumnPositions: []int{}}
assert.Equal(t, digest.Positions([]int{}), config.GetValueColumns())

config = Config{}
assert.Equal(t, digest.Positions([]int{}), config.GetValueColumns())
}
19 changes: 5 additions & 14 deletions cmd/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ import (
"fmt"
"io"
"log"
"os"
"sync"

"github.com/aswinkarthik93/csvdiff/pkg/digest"
Expand Down Expand Up @@ -54,7 +53,8 @@ func init() {

digestCmd.Flags().StringVarP(&config.Base, "base", "b", "", "The base csv file")
digestCmd.Flags().StringVarP(&config.Delta, "delta", "d", "", "The delta csv file")
digestCmd.Flags().IntSliceVarP(&config.KeyPositions, "key-positions", "k", []int{0}, "Primary key positions of the Input CSV as comma separated values Eg: 1,2")
digestCmd.Flags().IntSliceVarP(&config.PrimaryKeyPositions, "primary-key", "p", []int{0}, "Primary key positions of the Input CSV as comma separated values Eg: 1,2")
digestCmd.Flags().IntSliceVarP(&config.ValueColumnPositions, "value-columns", "", []int{}, "Value key positions of the Input CSV as comma separated values Eg: 1,2. Default is entire row")
digestCmd.Flags().BoolVarP(&debug, "debug", "", false, "Debug mode")
digestCmd.Flags().StringVarP(&config.Additions, "additions", "a", "STDOUT", "Output stream for the additions in delta file")
digestCmd.Flags().StringVarP(&config.Modifications, "modifications", "m", "STDOUT", "Output stream for the modifications in delta file")
Expand All @@ -70,18 +70,9 @@ func run() {
log.Fatal(err)
}

baseConfig := digest.DigestConfig{
KeyPositions: config.GetKeyPositions(),
Reader: config.GetBaseReader(),
Writer: os.Stdout,
}
baseConfig := digest.NewConfig(config.GetBaseReader(), false, config.GetPrimaryKeys(), config.GetValueColumns())

deltaConfig := digest.DigestConfig{
KeyPositions: config.GetKeyPositions(),
Reader: config.GetDeltaReader(),
Writer: os.Stdout,
SourceMap: true,
}
deltaConfig := digest.NewConfig(config.GetDeltaReader(), true, config.GetPrimaryKeys(), config.GetValueColumns())

var wg sync.WaitGroup
baseChannel := make(chan message)
Expand All @@ -104,7 +95,7 @@ type message struct {
sourceMap map[uint64]string
}

func generateInBackground(name string, config digest.DigestConfig, wg *sync.WaitGroup, channel chan<- message) {
func generateInBackground(name string, config *digest.Config, wg *sync.WaitGroup, channel chan<- message) {
digest, sourceMap, err := digest.Create(config)
if err != nil {
panic(err)
Expand Down
32 changes: 20 additions & 12 deletions pkg/digest/digest.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"github.com/cespare/xxhash"
)

const Separator = ","

// Digest represents the binding of the key of each csv line
// and the digest that gets created for the entire line
type Digest struct {
Expand All @@ -18,28 +20,34 @@ type Digest struct {

// CreateDigest creates a Digest for each line of csv.
// There will be one Digest per line
func CreateDigest(csv []string, keyPositions []int) Digest {
keyCsv := make([]string, len(keyPositions))
for i, pos := range keyPositions {
keyCsv[i] = csv[pos]
}

row := strings.Join(csv, ",")
key := xxhash.Sum64String(strings.Join(keyCsv, ","))
digest := xxhash.Sum64String(row)
func CreateDigest(csv []string, pKey Positions, pRow Positions) Digest {
row := strings.Join(csv, Separator)
key := xxhash.Sum64String(pKey.MapToValue(csv))
digest := xxhash.Sum64String(pRow.MapToValue(csv))

return Digest{Key: key, Value: digest, Row: row}

}

type DigestConfig struct {
type Config struct {
KeyPositions []int
Key Positions
Value Positions
Reader io.Reader
Writer io.Writer
SourceMap bool
}

func Create(config DigestConfig) (map[uint64]uint64, map[uint64]string, error) {
func NewConfig(r io.Reader, createSourceMap bool, primaryKey Positions, valueColumns Positions) *Config {
return &Config{
Reader: r,
SourceMap: createSourceMap,
Key: primaryKey,
Value: valueColumns,
}
}

func Create(config *Config) (map[uint64]uint64, map[uint64]string, error) {
reader := csv.NewReader(config.Reader)

output := make(map[uint64]uint64)
Expand All @@ -52,7 +60,7 @@ func Create(config DigestConfig) (map[uint64]uint64, map[uint64]string, error) {
}
return nil, nil, err
}
digest := CreateDigest(line, config.KeyPositions)
digest := CreateDigest(line, config.Key, config.Value)
output[digest.Key] = digest.Value
if config.SourceMap {
sourceMap[digest.Key] = digest.Row
Expand Down
12 changes: 7 additions & 5 deletions pkg/digest/digest_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,27 @@ func TestCreateDigest(t *testing.T) {

expectedDigest := Digest{Key: firstKey, Value: firstLineDigest, Row: firstLine}

actualDigest := CreateDigest(strings.Split(firstLine, ","), []int{0})
actualDigest := CreateDigest(strings.Split(firstLine, Separator), []int{0}, []int{})

assert.Equal(t, expectedDigest, actualDigest)
}

func TestDigestForFile(t *testing.T) {
firstLine := "1,first-line"
firstLine := "1,first-line,some-columne,friday"
firstKey := xxhash.Sum64String("1")
firstDigest := xxhash.Sum64String(firstLine)

secondLine := "2,second-line"
secondLine := "2,second-line,nobody-needs-this,saturday"
secondKey := xxhash.Sum64String("2")
secondDigest := xxhash.Sum64String(secondLine)

var outputBuffer bytes.Buffer

testConfig := DigestConfig{
testConfig := &Config{
Reader: strings.NewReader(firstLine + "\n" + secondLine),
Writer: &outputBuffer,
KeyPositions: []int{0},
Key: []int{0},
SourceMap: true,
}

Expand All @@ -49,10 +50,11 @@ func TestDigestForFile(t *testing.T) {
assert.Equal(t, expectedSourceMap, sourceMap)

// No source map
testConfigWithoutSourceMap := DigestConfig{
testConfigWithoutSourceMap := &Config{
Reader: strings.NewReader(firstLine + "\n" + secondLine),
Writer: &outputBuffer,
KeyPositions: []int{0},
Key: []int{0},
SourceMap: false,
}

Expand Down
24 changes: 24 additions & 0 deletions pkg/digest/positions.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
package digest

import "strings"

type Positions []int

func (p Positions) MapToValue(csv []string) string {
if p.Length() == 0 {
return strings.Join(csv, Separator)
}
output := make([]string, p.Length())
for i, pos := range p.Items() {
output[i] = csv[pos]
}
return strings.Join(output, Separator)
}

func (p Positions) Length() int {
return len([]int(p))
}

func (p Positions) Items() []int {
return []int(p)
}
41 changes: 41 additions & 0 deletions pkg/digest/positions_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package digest

import (
"strings"
"testing"

"github.com/stretchr/testify/assert"
)

func TestPositionsMapValues(t *testing.T) {
positions := Positions([]int{0, 3})
csv := []string{"zero", "one", "two", "three"}

actual := positions.MapToValue(csv)
expected := "zero,three"

assert.Equal(t, expected, actual)
}

func TestPositionsMapValuesReturnsCompleteStringCsvIfEmpty(t *testing.T) {
positions := Positions([]int{})
csv := []string{"zero", "one", "two", "three"}

actual := positions.MapToValue(csv)
expected := strings.Join(csv, Separator)

assert.Equal(t, expected, actual)
}

func TestPositionsLength(t *testing.T) {
positions := Positions([]int{0, 3})

assert.Equal(t, 2, positions.Length())
}

func TestPositionsItems(t *testing.T) {
items := []int{0, 3}
positions := Positions(items)

assert.Equal(t, items, positions.Items())
}

0 comments on commit 7adecca

Please sign in to comment.