Skip to content

Commit

Permalink
standardize email pattern (#3524)
Browse files Browse the repository at this point in the history
* standardize email pattern

* added email pattern test cases

* added negative test cases for email pattern
  • Loading branch information
kashifkhan0771 authored Oct 30, 2024
1 parent d6ce0f8 commit 84ec670
Show file tree
Hide file tree
Showing 38 changed files with 2,249 additions and 1,239 deletions.
2 changes: 1 addition & 1 deletion pkg/common/patterns.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import (
"strings"
)

const EmailPattern = `\b(?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])\b`
const EmailPattern = `\b((?:[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\x60{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\]))\b`
const SubDomainPattern = `\b([A-Za-z0-9](?:[A-Za-z0-9\-]{0,61}[A-Za-z0-9])?)\b`
const UUIDPattern = `\b([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})\b`
const UUIDPatternUpperCase = `\b([0-9A-Z]{8}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{4}-[0-9A-Z]{12})\b`
Expand Down
44 changes: 43 additions & 1 deletion pkg/common/patterns_test.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
package common

import (
"github.com/stretchr/testify/assert"
"regexp"
"testing"

"github.com/stretchr/testify/assert"
)

const (
Expand All @@ -13,6 +14,47 @@ const (
passwordRegex = `(?im)(?:pass|password)\S{0,40}?[:=\s]{1,3}[ '"=]{0,1}([^:^<>;.*&|£\n\s]{4,40})`
)

func TestEmailRegexCheck(t *testing.T) {
testEmails := `
// positive cases
standard email = john.doe@example.com
subdomain email = jane_doe123@sub.domain.co.us
organization email = alice.smith@test.org
test email = bob@test.name
with tag email = user.name+tag@domain.com
hyphen domain = info@my-site.net
service email = contact@web-service.io
underscore email = example_user@domain.info
departement email = first.last@department.company.edu
alphanumeric email = user1234@domain.co
local server email = admin@local-server.local
dot email = test.email@my-email-service.xyz
special char email = special@characters.com
support email = support@customer-service.org
// negative cases
not an email = abc.123@z
looks like email = test@user <- no domain
email but not = user12@service.COM <- capital letters not supported for domain
random text = here's some information about local-user@edu user
`

expectedStr := []string{
"john.doe@example.com", "jane_doe123@sub.domain.co.us",
"alice.smith@test.org", "bob@test.name", "user.name+tag@domain.com",
"info@my-site.net", "contact@web-service.io", "example_user@domain.info",
"first.last@department.company.edu", "user1234@domain.co", "admin@local-server.local",
"test.email@my-email-service.xyz", "special@characters.com", "support@customer-service.org",
}

emailRegex := regexp.MustCompile(EmailPattern)

emailMatches := emailRegex.FindAllString(testEmails, -1)

assert.Exactly(t, emailMatches, expectedStr)

}

func TestUsernameRegexCheck(t *testing.T) {
usernameRegexPat := UsernameRegexCheck(usernamePattern)

Expand Down
22 changes: 11 additions & 11 deletions pkg/detectors/checkvist/checkvist.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,18 @@ package checkvist

import (
"context"
regexp "github.com/wasilibs/go-re2"
"net/http"
"net/url"
"strings"

regexp "github.com/wasilibs/go-re2"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

type Scanner struct{
type Scanner struct {
detectors.DefaultMultiPartCredentialProvider
}

Expand All @@ -24,7 +25,7 @@ var (

// Make sure that your group is surrounded in boundary characters such as below to reduce false positives.
keyPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([0-9a-zA-Z]{14})\b`)
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + `\b([\w\.-]+@[\w-]+\.[\w\.-]{2,5})\b`)
emailPat = regexp.MustCompile(detectors.PrefixRegex([]string{"checkvist"}) + common.EmailPattern)
)

// Keywords are used for efficiently pre-filtering chunks.
Expand All @@ -38,14 +39,13 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
dataStr := string(data)

matches := keyPat.FindAllStringSubmatch(dataStr, -1)
emailMatches := emailPat.FindAllStringSubmatch(dataStr, -1)

for _, emailMatch := range emailMatches {
if len(emailMatch) != 2 {
continue
}
resEmailMatch := strings.TrimSpace(emailMatch[1])
uniqueEmailMatches := make(map[string]struct{})
for _, match := range emailPat.FindAllStringSubmatch(dataStr, -1) {
uniqueEmailMatches[strings.TrimSpace(match[1])] = struct{}{}
}

for emailMatch := range uniqueEmailMatches {
for _, match := range matches {
if len(match) != 2 {
continue
Expand All @@ -55,12 +55,12 @@ func (s Scanner) FromData(ctx context.Context, verify bool, data []byte) (result
s1 := detectors.Result{
DetectorType: detectorspb.DetectorType_Checkvist,
Raw: []byte(resMatch),
RawV2: []byte(resMatch + resEmailMatch),
RawV2: []byte(resMatch + emailMatch),
}

if verify {
payload := url.Values{}
payload.Add("username", resEmailMatch)
payload.Add("username", emailMatch)
payload.Add("remote_key", resMatch)

req, err := http.NewRequestWithContext(ctx, "GET", "https://checkvist.com/auth/login.json?version=2", strings.NewReader(payload.Encode()))
Expand Down
118 changes: 118 additions & 0 deletions pkg/detectors/checkvist/checkvist_integration_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
//go:build detectors
// +build detectors

package checkvist

import (
"context"
"fmt"
"testing"
"time"

"github.com/kylelemons/godebug/pretty"

"github.com/trufflesecurity/trufflehog/v3/pkg/common"
"github.com/trufflesecurity/trufflehog/v3/pkg/detectors"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/detectorspb"
)

func TestCheckvist_FromChunk(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*5)
defer cancel()
testSecrets, err := common.GetSecret(ctx, "trufflehog-testing", "detectors2")
if err != nil {
t.Fatalf("could not get test secrets from GCP: %s", err)
}
user := testSecrets.MustGetField("CHECKVIST_EMAIL")
secret := testSecrets.MustGetField("CHECKVIST")
inactiveSecret := testSecrets.MustGetField("CHECKVIST_INACTIVE")

type args struct {
ctx context.Context
data []byte
verify bool
}
tests := []struct {
name string
s Scanner
args args
want []detectors.Result
wantErr bool
}{
{
name: "found, verified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within", user, secret)),
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Checkvist,
Verified: true,
},
},
wantErr: false,
},
{
name: "found, unverified",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte(fmt.Sprintf("You can find a checkvist user %s with checkvist secret %s within but not valid", user, inactiveSecret)), // the secret would satisfy the regex but not pass validation
verify: true,
},
want: []detectors.Result{
{
DetectorType: detectorspb.DetectorType_Checkvist,
Verified: false,
},
},
wantErr: false,
},
{
name: "not found",
s: Scanner{},
args: args{
ctx: context.Background(),
data: []byte("You cannot find the secret within"),
verify: true,
},
want: nil,
wantErr: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
s := Scanner{}
got, err := s.FromData(tt.args.ctx, tt.args.verify, tt.args.data)
if (err != nil) != tt.wantErr {
t.Errorf("Checkvist.FromData() error = %v, wantErr %v", err, tt.wantErr)
return
}
for i := range got {
if len(got[i].Raw) == 0 {
t.Fatalf("no raw secret present: \n %+v", got[i])
}
got[i].Raw = nil
}
if diff := pretty.Compare(got, tt.want); diff != "" {
t.Errorf("Checkvist.FromData() %s diff: (-got +want)\n%s", tt.name, diff)
}
})
}
}

func BenchmarkFromData(benchmark *testing.B) {
ctx := context.Background()
s := Scanner{}
for name, data := range detectors.MustGetBenchmarkData() {
benchmark.Run(name, func(b *testing.B) {
b.ResetTimer()
for n := 0; n < b.N; n++ {
s.FromData(ctx, false, data)
}
})
}
}
Loading

0 comments on commit 84ec670

Please sign in to comment.