From 10293941d7bbb5f2f7d6ab4717411132dd3000d4 Mon Sep 17 00:00:00 2001 From: Osvaldo Gago Date: Sun, 22 Oct 2017 10:01:57 +0200 Subject: [PATCH 1/4] Create case sensitive maps for the urls --- functions.go | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/functions.go b/functions.go index 6898de8..38f7ac8 100644 --- a/functions.go +++ b/functions.go @@ -42,6 +42,21 @@ func searchInStringToMap(total string, expression string) map[string]bool { return a } +// searchInStringToMapCS Reads a string and returns all matches (case sensitive) in the regular expression as map keys +func searchInStringToMapCS(total string, expression string) map[string]bool { + defer timeTrack(time.Now(), "searchInStringToMap") + r, err := regexp.Compile(expression) + if err != nil { + panic(err) + } + allMatches := r.FindAllString(total, -1) + a := make(map[string]bool) + for _, v := range allMatches { + a[v] = false + } + return a +} + // Compare Compares 2 maps with words as what to search and boleans false value. Transforms in true when the key exists in the other map. func Compare(a map[string]bool, b map[string]bool) (map[string]bool, map[string]bool) { defer timeTrack(time.Now(), "Compare") From 7a636f49e5bdbdd9586b0bef135fbcc9f3665c61 Mon Sep 17 00:00:00 2001 From: Osvaldo Gago Date: Sun, 22 Oct 2017 10:04:10 +0200 Subject: [PATCH 2/4] Regular expression and process when -data=urls --- ecompare.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ecompare.go b/ecompare.go index 920478c..4ec28c7 100644 --- a/ecompare.go +++ b/ecompare.go @@ -13,6 +13,8 @@ const shaRegex string = `[A-Fa-f0-9]{64}` const dninieRegex string = `[A-z]?\d{7,8}[TRWAGMYFPDXBNJZSQVHLCKEtrwagmyfpdxbnjzsqvhlcke]` +const urlsRegex string = `https?://([\da-z\.-]+)\.([a-z\.]{2,6})([/\w \.-]*)*/?` + var debug *bool func main() { @@ -49,6 +51,9 @@ func main() { case "dni": aMap = searchInStringToMap(aFile, dninieRegex) bMap = searchInStringToMap(bFile, dninieRegex) + case "urls": + aMap = searchInStringToMapCS(aFile, urlsRegex) + bMap = searchInStringToMapCS(bFile, urlsRegex) default: aMap = searchInStringToMap(aFile, emailRegex) bMap = searchInStringToMap(bFile, emailRegex) From 2556233a4bd5c48943f954b49e240fad430ceaf6 Mon Sep 17 00:00:00 2001 From: Osvaldo Gago Date: Sun, 22 Oct 2017 10:10:07 +0200 Subject: [PATCH 3/4] Correctly name the function when debugging --- functions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/functions.go b/functions.go index 38f7ac8..bf512bd 100644 --- a/functions.go +++ b/functions.go @@ -44,7 +44,7 @@ func searchInStringToMap(total string, expression string) map[string]bool { // searchInStringToMapCS Reads a string and returns all matches (case sensitive) in the regular expression as map keys func searchInStringToMapCS(total string, expression string) map[string]bool { - defer timeTrack(time.Now(), "searchInStringToMap") + defer timeTrack(time.Now(), "searchInStringToMapCS") r, err := regexp.Compile(expression) if err != nil { panic(err) From db6e27c27411c67071337e8800e54c55b94767f5 Mon Sep 17 00:00:00 2001 From: Osvaldo Gago Date: Sun, 22 Oct 2017 10:11:21 +0200 Subject: [PATCH 4/4] Add urls to readme, help and doc --- README.md | 2 +- doc.go | 2 +- help.go | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fd482ee..88714d1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Download both files as csvs to the same folder as the script. Then, using the co ./ecompare -data=emails -A=fileA.csv -B=fileB.csv ``` -* `-data` specifies the type of data to compare. It can be `emails`, `sha256` or `dni` (Spanish ID numbers) +* `-data` specifies the type of data to compare. It can be `emails`, `sha256`, `urls` or `dni` (Spanish ID numbers) * `-A` and `-B` specify the names of both files. #### Get details about the comparison diff --git a/doc.go b/doc.go index c056c95..66040e2 100644 --- a/doc.go +++ b/doc.go @@ -30,7 +30,7 @@ Download both files as csvs to the same folder as the script. Then, using the co ./ecompare -data=emails -A=fileA.csv -B=fileB.csv - -data specifies the type of data to compare. It can be emails, sha256 or dni (Spanish ID numbers) + -data specifies the type of data to compare. It can be emails, sha256, urls or dni (Spanish ID numbers) -A and -B specify the names of both files. diff --git a/help.go b/help.go index b029fc9..d855429 100644 --- a/help.go +++ b/help.go @@ -10,7 +10,7 @@ func helpMe() { Script to compare unique data from two text files, named A and B -- unique data includes emails or sha256 +- unique data includes emails, sha256, urls or dni - text files include csv, txt, sql or html Use the options as in this example: @@ -30,7 +30,7 @@ Each time the script runs it overwrites this 3 files. Comand line options: -help Display this help --data=emails What to compare in the files. It can be "emails", "sha256" or "dni". By default it compares emails. +-data=emails What to compare in the files. It can be "emails", "sha256", "urls" or "dni". By default it compares emails. -A=fileA.csv File A name -B=fileB.csv File B name -debug=true Debug the script