Skip to content

Commit

Permalink
fix(panic): fix panic when censoring utf-8 (#64)
Browse files Browse the repository at this point in the history
* fix(panic): fix panic when censoring utf-8

Signed-off-by: stephenduke-care <stephen.duke@care.com>

* fix(panic): fix panic when censoring utf-8

Signed-off-by: stephenduke-care <stephen.duke@care.com>

---------

Signed-off-by: stephenduke-care <stephen.duke@care.com>
  • Loading branch information
stephenduke-care authored Sep 21, 2023
1 parent 8607c2b commit f76b6ad
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 33 deletions.
66 changes: 33 additions & 33 deletions goaway.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,63 +143,63 @@ func (g *ProfanityDetector) indexToRune(s string, index int) int {
count := 0
for i := range s {
if i == index {
return count
break
}
if i < index {
count++
}
count++
}
return -1
return count
}

// Censor takes in a string (word or sentence) and tries to censor all profanities found.
func (g *ProfanityDetector) Censor(s string) string {
censored := []rune(s)
var originalIndexes []int
s, originalIndexes = g.sanitize(s, true)
// Check for false negatives
for _, word := range g.falseNegatives {
runeWordLength := 0

g.checkProfanity(&s, &originalIndexes, &censored, g.falseNegatives, &runeWordLength)
g.removeFalsePositives(&s, &originalIndexes, &runeWordLength)
g.checkProfanity(&s, &originalIndexes, &censored, g.profanities, &runeWordLength)

return string(censored)
}

func (g *ProfanityDetector) checkProfanity(s *string, originalIndexes *[]int, censored *[]rune, wordList []string, runeWordLength *int) {
for _, word := range wordList {
currentIndex := 0
*runeWordLength = len([]rune(word))
for currentIndex != -1 {
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
for i := 0; i < len([]rune(word)); i++ {
runeIndex := g.indexToRune(string(censored), currentIndex+foundIndex+i)
censored[originalIndexes[runeIndex]] = '*'
if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 {
for i := 0; i < *runeWordLength; i++ {
runeIndex := g.indexToRune(*s, currentIndex+foundIndex) + i
if runeIndex < len(*originalIndexes) {
(*censored)[(*originalIndexes)[runeIndex]] = '*'
}
}
currentIndex += foundIndex + len([]rune(word))
currentIndex += foundIndex + len([]byte(word))
} else {
break
}
}
}
// Remove false positives
}

func (g *ProfanityDetector) removeFalsePositives(s *string, originalIndexes *[]int, runeWordLength *int) {
for _, word := range g.falsePositives {
currentIndex := 0
*runeWordLength = len([]rune(word))
for currentIndex != -1 {
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
foundRuneIndex := g.indexToRune(s, foundIndex)
originalIndexes = append(originalIndexes[:foundRuneIndex], originalIndexes[foundRuneIndex+len(word):]...)
currentIndex += foundIndex + len([]rune(word))
} else {
break
}
}
s = strings.Replace(s, word, "", -1)
}
// Check for profanities
for _, word := range g.profanities {
currentIndex := 0
for currentIndex != -1 {
if foundIndex := strings.Index(s[currentIndex:], word); foundIndex != -1 {
for i := 0; i < len([]rune(word)); i++ {
runeIndex := g.indexToRune(string(censored), currentIndex+foundIndex+i)
censored[originalIndexes[runeIndex]] = '*'
}
currentIndex += foundIndex + len([]rune(word))
if foundIndex := strings.Index((*s)[currentIndex:], word); foundIndex != -1 {
foundRuneIndex := g.indexToRune(*s, foundIndex)
*originalIndexes = append((*originalIndexes)[:foundRuneIndex], (*originalIndexes)[foundRuneIndex+*runeWordLength:]...)
currentIndex += foundIndex + len([]byte(word))
} else {
break
}
}
*s = strings.Replace(*s, word, "", -1)
}
return string(censored)
}

func (g ProfanityDetector) sanitize(s string, rememberOriginalIndexes bool) (string, []int) {
Expand Down
21 changes: 21 additions & 0 deletions goaway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -655,3 +655,24 @@ func TestSanitizeWithoutSanitizingLeetSpeak(t *testing.T) {
t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString)
}
}

func TestDefaultDriver_UTF8(t *testing.T) {
detector := NewProfanityDetector().WithCustomDictionary(
[]string{"anal", "あほ"}, // profanities
[]string{"あほほ"}, // falsePositives
[]string{"あほほし"}, // falseNegatives
)

unsanitizedString := "いい加減にしろ あほほし あほほ あほ anal ほ"
expectedString := "いい加減にしろ **** あほほ ** **** ほ"

isProfane := detector.IsProfane(unsanitizedString)
if !isProfane {
t.Error("Expected false, got false from sentence", unsanitizedString)
}

sanitizedString := detector.Censor(unsanitizedString)
if sanitizedString != expectedString {
t.Errorf("Expected '%s', got '%s'", expectedString, sanitizedString)
}
}

0 comments on commit f76b6ad

Please sign in to comment.