-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Mstislav Bobakov
committed
Feb 2, 2017
0 parents
commit f38079e
Showing
359 changed files
with
143,690 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
vendor/bin | ||
vendor/pkg | ||
stutterlog |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
# stutterlog | ||
'''stutterlog''' is dead simple tool for search for dublicates in the log. | ||
It's use [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance) for finding almost same log lines. | ||
|
||
### Install | ||
``` go get https://github.com/mbobakov/stutterlog``` | ||
|
||
or download via releases page | ||
### Usage | ||
Usage of ./stutterlog: | ||
-debug | ||
Debug output. WARNING! this cause huge output | ||
-dist int | ||
Minimal Damerau-Levenshtein distance to match (default 20) | ||
-top int | ||
How many lines will be shown (default 10) | ||
### Examples | ||
Let's me show how it works. For example we have log file like this: | ||
|
||
2015-03-26T01:27:38-04:00 debug trash message with tr@sH | ||
2015-03-26T01:27:39-04:00 info status of id=1 is OK | ||
2015-03-26T01:27:42-04:00 info status of id=2 is OK | ||
2015-03-26T01:27:44-04:00 debug trash message with t4H | ||
2015-03-26T01:27:44-04:00 info status of id=3 is OK | ||
2015-03-26T01:27:45-04:00 debug trash message with t7H | ||
|
||
You can pass logs via stdin or as argument: | ||
|
||
mbobakov$> cat test | ./stutterlog | ||
---------------------------------- | ||
- Line Example: '2015-03-26T01:27:38-04:00 debug trash message with tr@sH' | ||
Matches: 2 | ||
Average Levenstein distance: 5.000000 | ||
---------------------------------- | ||
- Line Example: '2015-03-26T01:27:39-04:00 info status of id=1 is OK' | ||
Matches: 2 | ||
Average Levenstein distance: 3.000000 | ||
|
||
### Performance | ||
On 2 GHz Intel Core i7 / 8 GB 1600 MHz DDR3 / SSD. 1000 log lines with 512 symbols for each | ||
|
||
mbobakov$> time ./stutterlog 1000linedFile.log > /dev/null | ||
1.07 real 1.10 user 0.02 sys |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
package main | ||
|
||
import ( | ||
"bufio" | ||
"io" | ||
"log" | ||
|
||
"github.com/masatana/go-textdistance" | ||
) | ||
|
||
func (r *results) processInput(input io.Reader, debug bool) { | ||
var ( | ||
match bool | ||
bestMatchIndx int | ||
bestMatchCoeff int | ||
) | ||
scanner := bufio.NewScanner(input) | ||
for scanner.Scan() { | ||
match = false | ||
bestMatchIndx = 0 | ||
bestMatchCoeff = *damerauLevenshteinDistance | ||
currString := scanner.Text() | ||
for i, res := range *r { | ||
dld := textdistance.DamerauLevenshteinDistance(res.source, currString) | ||
if dld > *damerauLevenshteinDistance { | ||
continue | ||
} | ||
match = true | ||
if dld < bestMatchCoeff { | ||
bestMatchCoeff = dld | ||
bestMatchIndx = i | ||
if debug { | ||
log.Printf(" Log line '%s' seems like '%s' because Damerau-Levenshtein distance is '%d' \n", currString, res.source, dld) | ||
} | ||
} | ||
} | ||
if !match { | ||
*r = append(*r, result{source: currString}) | ||
continue | ||
} | ||
if (*r)[bestMatchIndx].matches != 0 { | ||
(*r)[bestMatchIndx].avgLevDist = ((*r)[bestMatchIndx].avgLevDist + float32(bestMatchCoeff)) / 2 | ||
} else { | ||
(*r)[bestMatchIndx].avgLevDist = float32(bestMatchCoeff) | ||
} | ||
(*r)[bestMatchIndx].matches++ | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
package main | ||
|
||
import ( | ||
"io" | ||
"os" | ||
"reflect" | ||
"testing" | ||
) | ||
|
||
func Test_results_processInput(t *testing.T) { | ||
tests := []struct { | ||
name string | ||
expect results | ||
input io.Reader | ||
}{ | ||
{name: "testCase1", expect: results{result{source: "test", matches: 3, avgLevDist: 1}}}, | ||
} | ||
for _, tt := range tests { | ||
t.Run(tt.name, func(t *testing.T) { | ||
f, err := os.Open("testdata/" + tt.name) | ||
if err != nil { | ||
t.Error(err) | ||
} | ||
tt.input = f | ||
got := new(results) | ||
got.processInput(tt.input, false) | ||
if !reflect.DeepEqual(*got, tt.expect) { | ||
t.Errorf("%s:\nGot '%+v'\nExpect: '%+v'", tt.name, *got, tt.expect) | ||
} | ||
}) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
) | ||
|
||
func consoleOut(r result) []byte { | ||
delimeter := "----------------------------------\n" | ||
return []byte(fmt.Sprintf("%s\t- Line Example: '%s'\n\tMatches: %d\n\tAverage Levenstein distance: %f\n", delimeter, r.source, r.matches, r.avgLevDist)) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
{ | ||
"memo": "56a3660465438fb10fc3e514d77a1571927570e4d50f0f55e85cdb8eea76359a", | ||
"projects": [ | ||
{ | ||
"name": "github.com/deckarep/golang-set", | ||
"version": "v1.5", | ||
"revision": "52ba7f28f319e9d560acbf47c7fcf9b9ee7aa7d3", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "github.com/fatih/color", | ||
"version": "v1.2", | ||
"revision": "34e4ee095d12986a2cef5ddb9aeb3b8cfcfea17c", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "github.com/masatana/go-textdistance", | ||
"branch": "master", | ||
"revision": "15c30c968fddf3e2c5c4175fbdb4f9eebbbb8802", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "github.com/mattn/go-colorable", | ||
"version": "v0.0.6", | ||
"revision": "ed8eb9e318d7a84ce5915b495b7d35e0cfe7b5a8", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "github.com/mattn/go-isatty", | ||
"version": "v0.0.1", | ||
"revision": "3a115632dcd687f9c8cd01679c83a06a0e21c1f3", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "github.com/pkg/errors", | ||
"version": "v0.8.0", | ||
"revision": "645ef00459ed84a119197bfb8d8205042c6df63d", | ||
"packages": [ | ||
"." | ||
] | ||
}, | ||
{ | ||
"name": "golang.org/x/sys", | ||
"branch": "master", | ||
"revision": "7a6e5648d140666db5d920909e082ca00a87ba2c", | ||
"packages": [ | ||
"unix" | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
package main | ||
|
||
import ( | ||
"flag" | ||
"log" | ||
"os" | ||
) | ||
|
||
var ( | ||
damerauLevenshteinDistance = flag.Int("dist", 20, "Minimal Damerau-Levenshtein distance to match") | ||
top = flag.Int("top", 10, "How many lines will be shown") | ||
debug = flag.Bool("debug", false, "Debug output. WARNING! this cause huge output") | ||
) | ||
|
||
func main() { | ||
flag.Parse() | ||
|
||
input, err := detectInput() | ||
if err != nil { | ||
log.Fatalf("Please use stdin or first parameter as input.\nErr: '%s'\n", err) | ||
} | ||
it := make(results, 0) | ||
it.processInput(input, *debug) | ||
|
||
err = it.print(os.Stdout, *top, consoleOut) | ||
if err != nil { | ||
log.Fatalf("Print error.\nErr: '%s'\n", err) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
{ | ||
"dependencies": { | ||
"github.com/fatih/color": {}, | ||
"github.com/masatana/go-textdistance": {}, | ||
"github.com/olekukonko/tablewriter": {} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package main | ||
|
||
import ( | ||
"io" | ||
"sort" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
type results []result | ||
|
||
type result struct { | ||
source string | ||
matches int64 | ||
avgLevDist float32 | ||
} | ||
|
||
func (r *results) Len() int { return len(*r) } | ||
func (r *results) Less(i, j int) bool { return (*r)[i].matches > (*r)[j].matches } | ||
func (r *results) Swap(i, j int) { (*r)[i], (*r)[j] = (*r)[j], (*r)[i] } | ||
|
||
func (r *results) print(out io.Writer, top int, fmtr func(r result) []byte) error { | ||
sort.Sort(r) | ||
for i := 0; i < top; i++ { | ||
if i+1 > r.Len() { | ||
break | ||
} | ||
_, err := out.Write(fmtr((*r)[i])) | ||
if err != nil { | ||
return errors.Wrap(err, "Coudn't print result") | ||
} | ||
} | ||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
test | ||
test1 | ||
test2 | ||
Test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
package main | ||
|
||
import ( | ||
"io" | ||
"os" | ||
|
||
"flag" | ||
|
||
"github.com/pkg/errors" | ||
) | ||
|
||
// detectInput returns io.Reader with input data | ||
// TODO(mbobakov): Close file gracefully | ||
func detectInput() (io.Reader, error) { | ||
stats, err := os.Stdin.Stat() | ||
if err != nil { | ||
return nil, errors.Wrap(err, "file.Stat() in detectInput") | ||
} | ||
|
||
if stats.Mode()&os.ModeNamedPipe != 0 { | ||
return os.Stdin, nil | ||
} | ||
if len(flag.Args()) < 1 { | ||
return nil, errors.New("No inputs found") | ||
} | ||
if len(flag.Args()) > 1 { | ||
return nil, errors.Errorf("Multiple input files is not supported. Your choise is '%v'", flag.Args()[1:]) | ||
} | ||
f, err := os.Open(flag.Args()[0]) | ||
if err != nil { | ||
return nil, errors.Wrap(err, "Coudn't open file") | ||
} | ||
return f, nil | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
Oops, something went wrong.