Skip to content

Commit

Permalink
Merge pull request #1 from LyricTian/dev
Browse files Browse the repository at this point in the history
增加敏感词字符替换处理
  • Loading branch information
LyricTian committed Jun 8, 2016
2 parents f504489 + d0963bd commit a32361e
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 30 deletions.
5 changes: 5 additions & 0 deletions filter.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,4 +29,9 @@ type DirtyFilter interface {
// 返回可读流中出现的敏感词及出现次数,如果敏感词不存在则返回nil
// 如果出现异常,则返回error
FilterReaderResult(reader io.Reader, excludes ...rune) (map[string]int, error)

// Replace 使用字符替换文本中的敏感词
// delim 替换的字符
// 如果出现异常,则返回error
Replace(text string, delim rune) (string, error)
}
60 changes: 60 additions & 0 deletions nodefilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,15 @@ func (nf *nodeFilter) FilterReaderResult(reader io.Reader, excludes ...rune) (ma
return data, nil
}

func (nf *nodeFilter) Replace(text string, delim rune) (string, error) {
uchars := []rune(text)
idexs := nf.doIndexes(uchars)
for i := 0; i < len(idexs); i++ {
uchars[idexs[i]] = rune(delim)
}
return string(uchars), nil
}

func (nf *nodeFilter) checkExclude(u rune, excludes ...rune) bool {
if len(excludes) == 0 {
return false
Expand Down Expand Up @@ -188,3 +197,54 @@ func (nf *nodeFilter) doFilter(uchars []rune, data map[string]int) {
data[result[i]] = c + 1
}
}

func (nf *nodeFilter) doIndexes(uchars []rune) (idexs []int) {
var (
tIdexs []int
ul = len(uchars)
n = nf.root
)
for i := 0; i < ul; i++ {
if _, ok := n.child[uchars[i]]; !ok {
continue
}
n = n.child[uchars[i]]
tIdexs = append(tIdexs, i)
if n.end {
idexs = nf.appendTo(idexs, tIdexs)
tIdexs = nil
}
for j := i + 1; j < ul; j++ {
if _, ok := n.child[uchars[j]]; !ok {
break
}
n = n.child[uchars[j]]
tIdexs = append(tIdexs, j)
if n.end {
idexs = nf.appendTo(idexs, tIdexs)
}
}
if tIdexs != nil {
tIdexs = nil
}
n = nf.root
}
return
}

func (nf *nodeFilter) appendTo(dst, src []int) []int {
var t []int
for i, il := 0, len(src); i < il; i++ {
var exist bool
for j, jl := 0, len(dst); j < jl; j++ {
if src[i] == dst[j] {
exist = true
break
}
}
if !exist {
t = append(t, src[i])
}
}
return append(dst, t...)
}
11 changes: 11 additions & 0 deletions nodefilter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package filter_test

import (
"bytes"
"strings"

"github.com/antlinker/go-dirtyfilter"
. "github.com/onsi/ginkgo"
Expand Down Expand Up @@ -73,4 +74,14 @@ var _ = Describe("使用节点过滤器过滤敏感词数据", func() {
Expect(result).To(Equal(map[string]int{"陈水扁": 1}))
})

It("替换文本中的敏感词数据", func() {
nodeFilter = filter.NewNodeFilter([]string{"共产主义"})
data, err := nodeFilter.Replace(filterText, '*')
if err != nil {
Fail(err.Error())
return
}
Expect(data).To(Equal(strings.Replace(filterText, "共产主义", "****", 1)))
})

})
2 changes: 1 addition & 1 deletion sample/memory/main.go → sample/memory_sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import (
)

var (
filterText = `我是需要过滤的内容,内容为:**文@@件,需要过滤。。。`
filterText = `我是需要过滤的内容,内容为:**文*@@件**名,需要过滤。。。`
)

func main() {
Expand Down
29 changes: 0 additions & 29 deletions wercker.yml

This file was deleted.

0 comments on commit a32361e

Please sign in to comment.