diff --git a/dag.go b/dag.go index 6fbcd90..dd15cb9 100644 --- a/dag.go +++ b/dag.go @@ -123,6 +123,7 @@ func (seg *Segmenter) Analyze(text []string, t1 string, by ...bool) (az []Analyz return } +// getDag get a directed acyclic graph (DAG) from slice of runes(containing Unicode characters) func (seg *Segmenter) getDag(runes []rune) map[int][]int { dag := make(map[int][]int) n := len(runes) diff --git a/dict_1.16.go b/dict_1.16.go index b388892..e90fceb 100644 --- a/dict_1.16.go +++ b/dict_1.16.go @@ -87,7 +87,7 @@ func (seg *Segmenter) LoadDictEmbed(dict ...string) (err error) { return seg.loadZh() } -// LoadDictStr load the dictionary from string +// LoadDictStr load the dictionary from dict path func (seg *Segmenter) LoadDictStr(dict string) error { if seg.Dict == nil { seg.Dict = NewDict() @@ -153,7 +153,7 @@ func (seg *Segmenter) LoadStopEmbed(dict ...string) (err error) { return seg.LoadStopStr(stopDict) } -// LoadDictStr load the stop dictionary from string +// LoadStopStr load the stop dictionary from dict path func (seg *Segmenter) LoadStopStr(dict string) error { if seg.StopWordMap == nil { seg.StopWordMap = make(map[string]bool) diff --git a/dict_util.go b/dict_util.go index 72c0afe..58bf42c 100644 --- a/dict_util.go +++ b/dict_util.go @@ -29,7 +29,7 @@ import ( ) var ( - // ToLower set alpha tolower + // ToLower set alpha to lowercase ToLower = true ) @@ -238,7 +238,7 @@ func (seg *Segmenter) GetIdfPath(files ...string) []string { return files } -// Read read the dict flie +// Read read the dict file func (seg *Segmenter) Read(file string) error { if !seg.SkipLog { log.Printf("Load the gse dictionary: \"%s\" ", file) diff --git a/dictionary.go b/dictionary.go index e30a6d6..31a85bb 100755 --- a/dictionary.go +++ b/dictionary.go @@ -131,7 +131,7 @@ func (dict *Dictionary) Find(word []byte) (float64, string, bool) { } // Value find word in the dictionary -// retrun the word's value and id +// return the word's value and id func (dict *Dictionary) Value(word []byte) (val, id int, err error) { id, err = dict.trie.Jump(word, id) if err != nil { diff --git a/examples/dict/main.go b/examples/dict/main.go index 32fd5d6..e7f1be0 100644 --- a/examples/dict/main.go +++ b/examples/dict/main.go @@ -67,7 +67,7 @@ func segment() { segments := seg.Segment(text1) // fmt.Println(gse.ToString(segments, false)) fmt.Println(gse.ToString(segments)) - //"旧金山湾/n 金门大桥/nz " + // "旧金山湾/n 金门大桥/nz " // 搜索模式主要用于给搜索引擎提供尽可能多的关键字 segs := seg.ModeSegment(text1, true) diff --git a/gse.go b/gse.go index dae8b6a..084d9ba 100644 --- a/gse.go +++ b/gse.go @@ -141,14 +141,14 @@ func (seg *Segmenter) HMMCutMod(str string, prob ...map[rune]float64) []string { return hmm.Cut(str) } -// Slice use modeSegment segment retrun []string +// Slice use modeSegment segment return []string // using search mode if searchMode is true func (seg *Segmenter) Slice(s string, searchMode ...bool) []string { segs := seg.ModeSegment([]byte(s), searchMode...) return ToSlice(segs, searchMode...) } -// Slice use modeSegment segment retrun string +// Slice use modeSegment segment return string // using search mode if searchMode is true func (seg *Segmenter) String(s string, searchMode ...bool) string { segs := seg.ModeSegment([]byte(s), searchMode...) diff --git a/hmm/idf/idf.go b/hmm/idf/idf.go index c43f405..65e1f42 100644 --- a/hmm/idf/idf.go +++ b/hmm/idf/idf.go @@ -58,7 +58,7 @@ func (i *Idf) NumTokens() int { return i.seg.Dict.NumTokens() } -// TotalFreq reruen the IDF total frequency +// TotalFreq return the IDF total frequency func (i *Idf) TotalFreq() float64 { return i.seg.Dict.TotalFreq() } diff --git a/hmm/pos/dict.go b/hmm/pos/dict.go index 7e0ba91..cde3159 100644 --- a/hmm/pos/dict.go +++ b/hmm/pos/dict.go @@ -44,12 +44,12 @@ func (d *Dict) updateLogTotal() { d.logTotal = math.Log(d.total) } -// Freq find the word return the frequency and existenced +// Freq find the word return the word's freq, pos and existence func (d *Dict) Freq(key string) (float64, string, bool) { return d.Seg.Find(key) } -// Pos find the key return the POS and existenced +// Pos find the key return the POS and existence func (d *Dict) Pos(key string) (string, bool) { value, _, _ := d.Seg.Value(key) if value == 0 { diff --git a/seg_utils.go b/seg_utils.go index cb9797f..4ad16c4 100755 --- a/seg_utils.go +++ b/seg_utils.go @@ -20,7 +20,7 @@ import ( "fmt" ) -// ToString converts a segments slice to string retrun the string +// ToString converts a segments slice to string return the string // // two output modes: // @@ -75,7 +75,7 @@ func tokenToBytes(token *Token) (output []byte) { return } -// ToSlice converts a segments to slice retrun string slice +// ToSlice converts a segments to slice return string slice func ToSlice(segs []Segment, searchMode ...bool) (output []string) { var mode bool if len(searchMode) > 0 { @@ -158,7 +158,7 @@ func tokenToPos(token *Token) (output []SegPos) { return } -// let make multiple []Text into one string ooutput +// let make multiple []Text into one string output func textToString(text []Text) (output string) { for _, word := range text { output += string(word) @@ -171,7 +171,7 @@ func textSliceToString(text []Text) string { return Join(text) } -// retrun total length of text slice +// return total length of text slice func textSliceByteLen(text []Text) (length int) { for _, word := range text { length += len(word) diff --git a/segmenter.go b/segmenter.go index bbd7c95..0cd1125 100755 --- a/segmenter.go +++ b/segmenter.go @@ -43,7 +43,7 @@ type Segmenter struct { LoadNoFreq bool // MinTokenFreq load min freq token MinTokenFreq float64 - // TextFreq add token frenquency when not specified freq + // TextFreq add token frequency when not specified freq TextFreq string // SkipLog set skip log print @@ -65,15 +65,15 @@ type jumper struct { token *Token } -// Segment use shortest path to segment the text +// Segment use the shortest path to segment the text // // input parameter: // -// bytes UTF8 text []byte +// bytes UTF8 text []byte // // output: // -// []Segment retrun segments result +// []Segment return segments result func (seg *Segmenter) Segment(bytes []byte) []Segment { return seg.internalSegment(bytes, false) } @@ -237,7 +237,7 @@ func (seg *Segmenter) SplitTextToWords(text Text) []Text { current += size } - // procsss last byte is alpha and num + // process last byte is alpha and num if inAlphanumeric && !seg.AlphaNum { if current != 0 { output = append(output, toLow(text[alphanumericStart:current])) diff --git a/token.go b/token.go index 2a30f49..5df6ed5 100755 --- a/token.go +++ b/token.go @@ -50,12 +50,12 @@ func (s *Segment) Start() int { return s.start } -// End retrun the end byte position of the segment (not including this) +// End return the end byte position of the segment (not including this) func (s *Segment) End() int { return s.end } -// Token retrun the segment token information +// Token return the segment token information func (s *Segment) Token() *Token { return s.token } @@ -71,13 +71,13 @@ type Token struct { // a segment string,it's []Text text []Text - // a frenquency of the token + // a frequency of the token freq float64 // part of speech label pos string - // log2(total frequency/this segment frenquency),equal to log2(1/p(segment))), + // log2(total frequency/this segment frequency),equal to log2(1/p(segment))), // used by the short path as the path length of the clause in dynamic programming. // Solving for the maximum of prod(p(segment)) is equivalent to solving for the minimum of // the minimum of sum(distance(segment)),