Skip to content

Commit

Permalink
allow custom delimiter usage
Browse files Browse the repository at this point in the history
  • Loading branch information
xiorcal authored and aswinkarthik committed Oct 17, 2019
1 parent 32862fe commit 27c8242
Show file tree
Hide file tree
Showing 14 changed files with 153 additions and 78 deletions.
32 changes: 19 additions & 13 deletions cmd/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@ package cmd
import (
"encoding/csv"
"fmt"
"github.com/spf13/afero"
"io"
"strings"

"github.com/spf13/afero"

"github.com/aswinkarthik/csvdiff/pkg/digest"
)

Expand All @@ -22,6 +23,7 @@ type Context struct {
baseFile afero.File
deltaFile afero.File
recordCount int
separator rune
}

// NewContext can take all CLI flags and create a cmd.Context
Expand All @@ -36,13 +38,14 @@ func NewContext(
format string,
baseFilename string,
deltaFilename string,
separator rune,
) (*Context, error) {
baseRecordCount, err := getColumnsCount(fs, baseFilename)
baseRecordCount, err := getColumnsCount(fs, baseFilename, separator)
if err != nil {
return nil, fmt.Errorf("error in base-file: %v", err)
}

deltaRecordCount, err := getColumnsCount(fs, deltaFilename)
deltaRecordCount, err := getColumnsCount(fs, deltaFilename, separator)
if err != nil {
return nil, fmt.Errorf("error in delta-file: %v", err)
}
Expand Down Expand Up @@ -77,6 +80,7 @@ func NewContext(
baseFile: baseFile,
deltaFile: deltaFile,
recordCount: baseRecordCount,
separator: separator,
}

if err := ctx.validate(); err != nil {
Expand Down Expand Up @@ -174,14 +178,14 @@ func assertAll(elements []int, assertFn func(element int) bool) bool {
return true
}

func getColumnsCount(fs afero.Fs, filename string) (int, error) {
func getColumnsCount(fs afero.Fs, filename string, separator rune) (int, error) {
base, err := fs.Open(filename)
if err != nil {
return 0, err
}
defer base.Close()
csvReader := csv.NewReader(base)

csvReader.Comma = separator
record, err := csvReader.Read()
if err != nil {
if err == io.EOF {
Expand All @@ -197,21 +201,23 @@ func getColumnsCount(fs afero.Fs, filename string) (int, error) {
// that is needed to start the diff process
func (c *Context) BaseDigestConfig() (digest.Config, error) {
return digest.Config{
Reader: c.baseFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Reader: c.baseFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
}, nil
}

// DeltaDigestConfig creates a digest.Context from cmd.Context
// that is needed to start the diff process
func (c *Context) DeltaDigestConfig() (digest.Config, error) {
return digest.Config{
Reader: c.deltaFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Reader: c.deltaFile,
Value: c.valueColumnPositions,
Key: c.primaryKeyPositions,
Include: c.includeColumnPositions,
Separator: c.separator,
}, nil
}

Expand Down
16 changes: 16 additions & 0 deletions cmd/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ func TestPrimaryKeyPositions(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)
assert.Equal(t, tt.out, ctx.GetPrimaryKeys())
Expand Down Expand Up @@ -89,6 +90,7 @@ func TestValueColumnPositions(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)
assert.Equal(t, tt.out, ctx.GetValueColumns())
Expand All @@ -114,6 +116,7 @@ func TestNewContext(t *testing.T) {
"",
"/base.csv",
"/delta.csv",
',',
)

assert.EqualError(t, err, "validation failed: specified format is not valid")
Expand All @@ -129,6 +132,7 @@ func TestNewContext(t *testing.T) {
"rowmark",
"/base.csv",
"/delta.csv",
',',
)

assert.NoError(t, err)
Expand All @@ -144,6 +148,7 @@ func TestNewContext(t *testing.T) {
"jSOn",
"/base.csv",
"/delta.csv",
',',
)

assert.NoError(t, err)
Expand All @@ -162,6 +167,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.EqualError(t, err, "error in base-file: open "+string(os.PathSeparator)+"base.csv: file does not exist")
})
Expand All @@ -182,6 +188,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.EqualError(t, err, "error in base-file: unable to process headers from csv file. EOF reached. invalid CSV file")
})
Expand All @@ -202,6 +209,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.EqualError(t, err, "error in delta-file: unable to process headers from csv file. EOF reached. invalid CSV file")
})
Expand All @@ -219,6 +227,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)
})
Expand Down Expand Up @@ -246,6 +255,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)

assert.EqualError(t, err, "validation failed: --primary-key positions are out of bounds")
Expand All @@ -261,6 +271,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)

assert.EqualError(t, err, "validation failed: --include positions are out of bounds")
Expand All @@ -276,6 +287,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)

assert.EqualError(t, err, "validation failed: --columns positions are out of bounds")
Expand All @@ -297,6 +309,7 @@ func TestNewContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.EqualError(t, err, "base-file and delta-file columns count do not match")
})
Expand All @@ -315,6 +328,7 @@ func TestNewContext(t *testing.T) {
"jSOn",
"/base.csv",
"/delta.csv",
',',
)

assert.EqualError(t, err, "only one of --columns or --ignore-columns")
Expand All @@ -338,6 +352,7 @@ func TestConfig_DigestConfig(t *testing.T) {
"jSOn",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)

Expand Down Expand Up @@ -372,6 +387,7 @@ func TestConfig_DigestConfig(t *testing.T) {
"jSOn",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)

Expand Down
28 changes: 26 additions & 2 deletions cmd/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@ package cmd

import (
"fmt"
"github.com/fatih/color"
"github.com/spf13/afero"
"io"
"os"
"strings"
"time"
"unicode/utf8"

"github.com/fatih/color"
"github.com/spf13/afero"

"github.com/aswinkarthik/csvdiff/pkg/digest"
"github.com/spf13/cobra"
Expand Down Expand Up @@ -60,7 +62,12 @@ Most suitable for csv files created from database tables`,
fs := afero.NewOsFs()
baseFilename := args[0]
deltaFilename := args[1]
runeSeparator, err := parseSeparator(separator)
fmt.Printf("root.go ####### %c, %q\n", runeSeparator, runeSeparator)

if err != nil {
return err
}
ctx, err := NewContext(
fs,
primaryKeyPositions,
Expand All @@ -70,6 +77,7 @@ Most suitable for csv files created from database tables`,
format,
baseFilename,
deltaFilename,
runeSeparator,
)

if err != nil {
Expand Down Expand Up @@ -118,6 +126,7 @@ var (
ignoreValueColumnPositions []int
includeColumnPositions []int
format string
separator string
)

func init() {
Expand All @@ -128,6 +137,7 @@ func init() {
rootCmd.Flags().IntSliceVarP(&ignoreValueColumnPositions, "ignore-columns", "", []int{}, "Inverse of --columns flag. This cannot be used if --columns are specified")
rootCmd.Flags().IntSliceVarP(&includeColumnPositions, "include", "", []int{}, "Include positions in CSV to display Eg: 1,2. Default is entire row")
rootCmd.Flags().StringVarP(&format, "format", "o", "diff", fmt.Sprintf("Available (%s)", strings.Join(allFormats, "|")))
rootCmd.Flags().StringVarP(&separator, "separator", "s", ",", "use specific separator")

rootCmd.Flags().BoolVarP(&timed, "time", "", false, "Measure time")
}
Expand All @@ -136,3 +146,17 @@ func timeTrack(start time.Time, name string) {
elapsed := time.Since(start)
_, _ = fmt.Fprintln(os.Stderr, fmt.Sprintf("%s took %s", name, elapsed))
}

func parseSeparator(sep string) (rune, error) {

if strings.HasPrefix(sep, "\\t") {
return '\t', nil
}

runesep, _ := utf8.DecodeRuneInString(sep)
if runesep == utf8.RuneError {
return ' ', fmt.Errorf("Unable to use %v (%q) as a separator", separator, separator)
}

return runesep, nil
}
6 changes: 4 additions & 2 deletions cmd/root_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,12 @@ package cmd

import (
"bytes"
"os"
"testing"

"github.com/aswinkarthik/csvdiff/pkg/digest"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"os"
"testing"
)

func TestRunContext(t *testing.T) {
Expand Down Expand Up @@ -41,6 +42,7 @@ func TestRunContext(t *testing.T) {
"json",
"/base.csv",
"/delta.csv",
',',
)
assert.NoError(t, err)

Expand Down
6 changes: 6 additions & 0 deletions examples/no_comma.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
15 12 wordpress.com com 207790 792348 wordpress.com com 15 12 207589 791634
43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818
69 48 aol.com com 97543 225532 aol.com com 70 49 97328 224491
1615 905 proboards.com com 19833 33110 proboards.com com 1613 902 19835 33135
1616 906 ccleaner.com com 19831 32507 ccleaner.com com 1614 903 19834 32463
1617 907 doodle.com com 19827 32902 doodle.com com 1621 909 19787 32822
4 changes: 4 additions & 0 deletions examples/no_comma_delta.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
15 12 wordpress.com com 207790 792348 wordpress.com com 15 12 207589 791634
43 1 europa.eu eu 116613 353412 europa.eu eu 41 1 119129 359818
69 1048 aol.com com 97543 225532 aol.com com 70 49 97328 224491
24564 907 completely-newsite.com com 19827 32902 completely-newsite.com com 1621 909 19787 32822
19 changes: 11 additions & 8 deletions pkg/digest/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@ import "io"
// Value: The Value positions that needs to be compared for diff
// Include: Include these positions in output. It is Value positions by default.
type Config struct {
Key Positions
Value Positions
Include Positions
Reader io.Reader
Key Positions
Value Positions
Include Positions
Reader io.Reader
Separator rune
}

// NewConfig creates an instance of Config struct.
Expand All @@ -21,15 +22,17 @@ func NewConfig(
primaryKey Positions,
valueColumns Positions,
includeColumns Positions,
separator rune,
) *Config {
if len(includeColumns) == 0 {
includeColumns = valueColumns
}

return &Config{
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
Reader: r,
Key: primaryKey,
Value: valueColumns,
Include: includeColumns,
Separator: separator,
}
}
10 changes: 6 additions & 4 deletions pkg/digest/diff_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@ func TestDiff(t *testing.T) {

t.Run("default config", func(t *testing.T) {
baseConfig := &digest.Config{
Reader: strings.NewReader(base),
Key: []int{0},
Reader: strings.NewReader(base),
Key: []int{0},
Separator: ',',
}

deltaConfig := &digest.Config{
Reader: strings.NewReader(delta),
Key: []int{0},
Reader: strings.NewReader(delta),
Key: []int{0},
Separator: ',',
}

expected := digest.Differences{
Expand Down
Loading

0 comments on commit 27c8242

Please sign in to comment.