diff --git a/README.md b/README.md index fd482ee..88714d1 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Download both files as csvs to the same folder as the script. Then, using the co ./ecompare -data=emails -A=fileA.csv -B=fileB.csv ``` -* `-data` specifies the type of data to compare. It can be `emails`, `sha256` or `dni` (Spanish ID numbers) +* `-data` specifies the type of data to compare. It can be `emails`, `sha256`, `urls` or `dni` (Spanish ID numbers) * `-A` and `-B` specify the names of both files. #### Get details about the comparison diff --git a/doc.go b/doc.go index c056c95..66040e2 100644 --- a/doc.go +++ b/doc.go @@ -30,7 +30,7 @@ Download both files as csvs to the same folder as the script. Then, using the co ./ecompare -data=emails -A=fileA.csv -B=fileB.csv - -data specifies the type of data to compare. It can be emails, sha256 or dni (Spanish ID numbers) + -data specifies the type of data to compare. It can be emails, sha256, urls or dni (Spanish ID numbers) -A and -B specify the names of both files. diff --git a/ecompare.go b/ecompare.go index 920478c..4ec28c7 100644 --- a/ecompare.go +++ b/ecompare.go @@ -13,6 +13,8 @@ const shaRegex string = `[A-Fa-f0-9]{64}` const dninieRegex string = `[A-z]?\d{7,8}[TRWAGMYFPDXBNJZSQVHLCKEtrwagmyfpdxbnjzsqvhlcke]` +const urlsRegex string = `https?://([\da-z\.-]+)\.([a-z\.]{2,6})([/\w \.-]*)*/?` + var debug *bool func main() { @@ -49,6 +51,9 @@ func main() { case "dni": aMap = searchInStringToMap(aFile, dninieRegex) bMap = searchInStringToMap(bFile, dninieRegex) + case "urls": + aMap = searchInStringToMapCS(aFile, urlsRegex) + bMap = searchInStringToMapCS(bFile, urlsRegex) default: aMap = searchInStringToMap(aFile, emailRegex) bMap = searchInStringToMap(bFile, emailRegex) diff --git a/functions.go b/functions.go index 6898de8..bf512bd 100644 --- a/functions.go +++ b/functions.go @@ -42,6 +42,21 @@ func searchInStringToMap(total string, expression string) map[string]bool { return a } +// searchInStringToMapCS Reads a string and returns all matches (case sensitive) in the regular expression as map keys +func searchInStringToMapCS(total string, expression string) map[string]bool { + defer timeTrack(time.Now(), "searchInStringToMapCS") + r, err := regexp.Compile(expression) + if err != nil { + panic(err) + } + allMatches := r.FindAllString(total, -1) + a := make(map[string]bool) + for _, v := range allMatches { + a[v] = false + } + return a +} + // Compare Compares 2 maps with words as what to search and boleans false value. Transforms in true when the key exists in the other map. func Compare(a map[string]bool, b map[string]bool) (map[string]bool, map[string]bool) { defer timeTrack(time.Now(), "Compare") diff --git a/help.go b/help.go index b029fc9..d855429 100644 --- a/help.go +++ b/help.go @@ -10,7 +10,7 @@ func helpMe() { Script to compare unique data from two text files, named A and B -- unique data includes emails or sha256 +- unique data includes emails, sha256, urls or dni - text files include csv, txt, sql or html Use the options as in this example: @@ -30,7 +30,7 @@ Each time the script runs it overwrites this 3 files. Comand line options: -help Display this help --data=emails What to compare in the files. It can be "emails", "sha256" or "dni". By default it compares emails. +-data=emails What to compare in the files. It can be "emails", "sha256", "urls" or "dni". By default it compares emails. -A=fileA.csv File A name -B=fileB.csv File B name -debug=true Debug the script