Skip to content

Commit

Permalink
Merge branch 'urls'
Browse files Browse the repository at this point in the history
  • Loading branch information
osvik committed Oct 22, 2017
2 parents 921999e + db6e27c commit d53ea78
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Download both files as csvs to the same folder as the script. Then, using the co
./ecompare -data=emails -A=fileA.csv -B=fileB.csv
```

* `-data` specifies the type of data to compare. It can be `emails`, `sha256` or `dni` (Spanish ID numbers)
* `-data` specifies the type of data to compare. It can be `emails`, `sha256`, `urls` or `dni` (Spanish ID numbers)
* `-A` and `-B` specify the names of both files.

#### Get details about the comparison
Expand Down
2 changes: 1 addition & 1 deletion doc.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ Download both files as csvs to the same folder as the script. Then, using the co
./ecompare -data=emails -A=fileA.csv -B=fileB.csv
-data specifies the type of data to compare. It can be emails, sha256 or dni (Spanish ID numbers)
-data specifies the type of data to compare. It can be emails, sha256, urls or dni (Spanish ID numbers)
-A and -B specify the names of both files.
Expand Down
5 changes: 5 additions & 0 deletions ecompare.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ const shaRegex string = `[A-Fa-f0-9]{64}`

const dninieRegex string = `[A-z]?\d{7,8}[TRWAGMYFPDXBNJZSQVHLCKEtrwagmyfpdxbnjzsqvhlcke]`

const urlsRegex string = `https?://([\da-z\.-]+)\.([a-z\.]{2,6})([/\w \.-]*)*/?`

var debug *bool

func main() {
Expand Down Expand Up @@ -49,6 +51,9 @@ func main() {
case "dni":
aMap = searchInStringToMap(aFile, dninieRegex)
bMap = searchInStringToMap(bFile, dninieRegex)
case "urls":
aMap = searchInStringToMapCS(aFile, urlsRegex)
bMap = searchInStringToMapCS(bFile, urlsRegex)
default:
aMap = searchInStringToMap(aFile, emailRegex)
bMap = searchInStringToMap(bFile, emailRegex)
Expand Down
15 changes: 15 additions & 0 deletions functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,21 @@ func searchInStringToMap(total string, expression string) map[string]bool {
return a
}

// searchInStringToMapCS Reads a string and returns all matches (case sensitive) in the regular expression as map keys
func searchInStringToMapCS(total string, expression string) map[string]bool {
defer timeTrack(time.Now(), "searchInStringToMapCS")
r, err := regexp.Compile(expression)
if err != nil {
panic(err)
}
allMatches := r.FindAllString(total, -1)
a := make(map[string]bool)
for _, v := range allMatches {
a[v] = false
}
return a
}

// Compare Compares 2 maps with words as what to search and boleans false value. Transforms in true when the key exists in the other map.
func Compare(a map[string]bool, b map[string]bool) (map[string]bool, map[string]bool) {
defer timeTrack(time.Now(), "Compare")
Expand Down
4 changes: 2 additions & 2 deletions help.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ func helpMe() {
Script to compare unique data from two text files, named A and B
- unique data includes emails or sha256
- unique data includes emails, sha256, urls or dni
- text files include csv, txt, sql or html
Use the options as in this example:
Expand All @@ -30,7 +30,7 @@ Each time the script runs it overwrites this 3 files.
Comand line options:
-help Display this help
-data=emails What to compare in the files. It can be "emails", "sha256" or "dni". By default it compares emails.
-data=emails What to compare in the files. It can be "emails", "sha256", "urls" or "dni". By default it compares emails.
-A=fileA.csv File A name
-B=fileB.csv File B name
-debug=true Debug the script
Expand Down

0 comments on commit d53ea78

Please sign in to comment.