Skip to content

Commit

Permalink
Calculate scores immediately
Browse files Browse the repository at this point in the history
  • Loading branch information
cyradin committed Jun 18, 2024
1 parent b890b18 commit 6cd47f3
Showing 1 changed file with 14 additions and 32 deletions.
46 changes: 14 additions & 32 deletions dictionary.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,24 +85,21 @@ func (d *dictionary) find(word string, n int) []match {
}

bm := d.alphabet.encode([]rune(word))

candidates := d.getCandidates(word, bm)
result := calcScores([]rune(word), candidates)
sort.Slice(candidates, func(i, j int) bool { return candidates[i].Score > candidates[j].Score })

if len(result) < n {
return result
if len(candidates) < n {
return candidates
}

return result[0:n]
return candidates[0:n]
}

type сandidate struct {
Word string
Distance int
Count int
}
func (d *dictionary) getCandidates(word string, bmSrc bitmap.Bitmap32) []match {
result := make([]match, 0, 50)

func (d *dictionary) getCandidates(word string, bmSrc bitmap.Bitmap32) []сandidate {
result := make([]сandidate, 0, 50)
wordRunes := []rune(word)

// "exact match" OR "candidate has all the same letters as the word but in different order"
key := sum(bmSrc)
Expand All @@ -117,10 +114,9 @@ func (d *dictionary) getCandidates(word string, bmSrc bitmap.Bitmap32) []сandid
if distance > d.maxErrors {
continue
}
result = append(result, сandidate{
Word: docWord,
Count: d.counts[id],
Distance: distance,
result = append(result, match{
Value: docWord,
Score: calcScore(wordRunes, []rune(docWord), distance, d.counts[id]),
})
}
// the most common mistake is a transposition of letters.
Expand All @@ -142,10 +138,9 @@ func (d *dictionary) getCandidates(word string, bmSrc bitmap.Bitmap32) []сandid
if distance > d.maxErrors {
continue
}
result = append(result, сandidate{
Word: docWord,
Count: d.counts[id],
Distance: distance,
result = append(result, match{
Value: docWord,
Score: calcScore(wordRunes, []rune(docWord), distance, d.counts[id]),
})
}
}
Expand Down Expand Up @@ -190,19 +185,6 @@ func (d *dictionary) computeCandidateBitmaps(bmSrc bitmap.Bitmap32) map[uint64]s
return bitmaps
}

func calcScores(src []rune, candidates []сandidate) []match {
result := make([]match, len(candidates))
for i, c := range candidates {
result[i] = match{
Value: c.Word,
Score: calcScore(src, []rune(c.Word), c.Distance, c.Count),
}
}
sort.Slice(result, func(i, j int) bool { return result[i].Score > result[j].Score })

return result
}

func calcScore(src []rune, candidate []rune, distance int, cnt int) float64 {
mult := math.Log1p(float64(cnt))
// if first letters are the same, increase score
Expand Down

0 comments on commit 6cd47f3

Please sign in to comment.