Skip to content

Commit

Permalink
Done)
Browse files Browse the repository at this point in the history
  • Loading branch information
Mstislav Bobakov committed Feb 2, 2017
0 parents commit f38079e
Show file tree
Hide file tree
Showing 359 changed files with 143,690 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
vendor/bin
vendor/pkg
stutterlog
43 changes: 43 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# stutterlog
'''stutterlog''' is dead simple tool for search for dublicates in the log.
It's use [Damerau-Levenshtein distance](https://en.wikipedia.org/wiki/Damerau–Levenshtein_distance) for finding almost same log lines.

### Install
``` go get https://github.com/mbobakov/stutterlog```

or download via releases page
### Usage
Usage of ./stutterlog:
-debug
Debug output. WARNING! this cause huge output
-dist int
Minimal Damerau-Levenshtein distance to match (default 20)
-top int
How many lines will be shown (default 10)
### Examples
Let's me show how it works. For example we have log file like this:

2015-03-26T01:27:38-04:00 debug trash message with tr@sH
2015-03-26T01:27:39-04:00 info status of id=1 is OK
2015-03-26T01:27:42-04:00 info status of id=2 is OK
2015-03-26T01:27:44-04:00 debug trash message with t4H
2015-03-26T01:27:44-04:00 info status of id=3 is OK
2015-03-26T01:27:45-04:00 debug trash message with t7H

You can pass logs via stdin or as argument:

mbobakov$> cat test | ./stutterlog
----------------------------------
- Line Example: '2015-03-26T01:27:38-04:00 debug trash message with tr@sH'
Matches: 2
Average Levenstein distance: 5.000000
----------------------------------
- Line Example: '2015-03-26T01:27:39-04:00 info status of id=1 is OK'
Matches: 2
Average Levenstein distance: 3.000000

### Performance
On 2 GHz Intel Core i7 / 8 GB 1600 MHz DDR3 / SSD. 1000 log lines with 512 symbols for each

mbobakov$> time ./stutterlog 1000linedFile.log > /dev/null
1.07 real 1.10 user 0.02 sys
48 changes: 48 additions & 0 deletions calculate.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package main

import (
"bufio"
"io"
"log"

"github.com/masatana/go-textdistance"
)

func (r *results) processInput(input io.Reader, debug bool) {
var (
match bool
bestMatchIndx int
bestMatchCoeff int
)
scanner := bufio.NewScanner(input)
for scanner.Scan() {
match = false
bestMatchIndx = 0
bestMatchCoeff = *damerauLevenshteinDistance
currString := scanner.Text()
for i, res := range *r {
dld := textdistance.DamerauLevenshteinDistance(res.source, currString)
if dld > *damerauLevenshteinDistance {
continue
}
match = true
if dld < bestMatchCoeff {
bestMatchCoeff = dld
bestMatchIndx = i
if debug {
log.Printf(" Log line '%s' seems like '%s' because Damerau-Levenshtein distance is '%d' \n", currString, res.source, dld)
}
}
}
if !match {
*r = append(*r, result{source: currString})
continue
}
if (*r)[bestMatchIndx].matches != 0 {
(*r)[bestMatchIndx].avgLevDist = ((*r)[bestMatchIndx].avgLevDist + float32(bestMatchCoeff)) / 2
} else {
(*r)[bestMatchIndx].avgLevDist = float32(bestMatchCoeff)
}
(*r)[bestMatchIndx].matches++
}
}
32 changes: 32 additions & 0 deletions calculate_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
package main

import (
"io"
"os"
"reflect"
"testing"
)

func Test_results_processInput(t *testing.T) {
tests := []struct {
name string
expect results
input io.Reader
}{
{name: "testCase1", expect: results{result{source: "test", matches: 3, avgLevDist: 1}}},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
f, err := os.Open("testdata/" + tt.name)
if err != nil {
t.Error(err)
}
tt.input = f
got := new(results)
got.processInput(tt.input, false)
if !reflect.DeepEqual(*got, tt.expect) {
t.Errorf("%s:\nGot '%+v'\nExpect: '%+v'", tt.name, *got, tt.expect)
}
})
}
}
10 changes: 10 additions & 0 deletions formatters.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package main

import (
"fmt"
)

func consoleOut(r result) []byte {
delimeter := "----------------------------------\n"
return []byte(fmt.Sprintf("%s\t- Line Example: '%s'\n\tMatches: %d\n\tAverage Levenstein distance: %f\n", delimeter, r.source, r.matches, r.avgLevDist))
}
61 changes: 61 additions & 0 deletions lock.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
{
"memo": "56a3660465438fb10fc3e514d77a1571927570e4d50f0f55e85cdb8eea76359a",
"projects": [
{
"name": "github.com/deckarep/golang-set",
"version": "v1.5",
"revision": "52ba7f28f319e9d560acbf47c7fcf9b9ee7aa7d3",
"packages": [
"."
]
},
{
"name": "github.com/fatih/color",
"version": "v1.2",
"revision": "34e4ee095d12986a2cef5ddb9aeb3b8cfcfea17c",
"packages": [
"."
]
},
{
"name": "github.com/masatana/go-textdistance",
"branch": "master",
"revision": "15c30c968fddf3e2c5c4175fbdb4f9eebbbb8802",
"packages": [
"."
]
},
{
"name": "github.com/mattn/go-colorable",
"version": "v0.0.6",
"revision": "ed8eb9e318d7a84ce5915b495b7d35e0cfe7b5a8",
"packages": [
"."
]
},
{
"name": "github.com/mattn/go-isatty",
"version": "v0.0.1",
"revision": "3a115632dcd687f9c8cd01679c83a06a0e21c1f3",
"packages": [
"."
]
},
{
"name": "github.com/pkg/errors",
"version": "v0.8.0",
"revision": "645ef00459ed84a119197bfb8d8205042c6df63d",
"packages": [
"."
]
},
{
"name": "golang.org/x/sys",
"branch": "master",
"revision": "7a6e5648d140666db5d920909e082ca00a87ba2c",
"packages": [
"unix"
]
}
]
}
29 changes: 29 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package main

import (
"flag"
"log"
"os"
)

var (
damerauLevenshteinDistance = flag.Int("dist", 20, "Minimal Damerau-Levenshtein distance to match")
top = flag.Int("top", 10, "How many lines will be shown")
debug = flag.Bool("debug", false, "Debug output. WARNING! this cause huge output")
)

func main() {
flag.Parse()

input, err := detectInput()
if err != nil {
log.Fatalf("Please use stdin or first parameter as input.\nErr: '%s'\n", err)
}
it := make(results, 0)
it.processInput(input, *debug)

err = it.print(os.Stdout, *top, consoleOut)
if err != nil {
log.Fatalf("Print error.\nErr: '%s'\n", err)
}
}
7 changes: 7 additions & 0 deletions manifest.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"dependencies": {
"github.com/fatih/color": {},
"github.com/masatana/go-textdistance": {},
"github.com/olekukonko/tablewriter": {}
}
}
34 changes: 34 additions & 0 deletions results.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package main

import (
"io"
"sort"

"github.com/pkg/errors"
)

type results []result

type result struct {
source string
matches int64
avgLevDist float32
}

func (r *results) Len() int { return len(*r) }
func (r *results) Less(i, j int) bool { return (*r)[i].matches > (*r)[j].matches }
func (r *results) Swap(i, j int) { (*r)[i], (*r)[j] = (*r)[j], (*r)[i] }

func (r *results) print(out io.Writer, top int, fmtr func(r result) []byte) error {
sort.Sort(r)
for i := 0; i < top; i++ {
if i+1 > r.Len() {
break
}
_, err := out.Write(fmtr((*r)[i]))
if err != nil {
return errors.Wrap(err, "Coudn't print result")
}
}
return nil
}
4 changes: 4 additions & 0 deletions testdata/testCase1
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
test
test1
test2
Test
34 changes: 34 additions & 0 deletions utils.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package main

import (
"io"
"os"

"flag"

"github.com/pkg/errors"
)

// detectInput returns io.Reader with input data
// TODO(mbobakov): Close file gracefully
func detectInput() (io.Reader, error) {
stats, err := os.Stdin.Stat()
if err != nil {
return nil, errors.Wrap(err, "file.Stat() in detectInput")
}

if stats.Mode()&os.ModeNamedPipe != 0 {
return os.Stdin, nil
}
if len(flag.Args()) < 1 {
return nil, errors.New("No inputs found")
}
if len(flag.Args()) > 1 {
return nil, errors.Errorf("Multiple input files is not supported. Your choise is '%v'", flag.Args()[1:])
}
f, err := os.Open(flag.Args()[0])
if err != nil {
return nil, errors.Wrap(err, "Coudn't open file")
}
return f, nil
}
22 changes: 22 additions & 0 deletions vendor/github.com/deckarep/golang-set/.gitignore

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions vendor/github.com/deckarep/golang-set/.travis.yml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

22 changes: 22 additions & 0 deletions vendor/github.com/deckarep/golang-set/LICENSE

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit f38079e

Please sign in to comment.