lithammer · lithammer · Dec 9, 2019 · Dec 7, 2019 · Dec 9, 2019
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
@@ -1,5 +1,5 @@
 name: Go
-on: [push]
+on: [push, pull_request]
 jobs:
   test:
     strategy:

diff --git a/fuzzy/fuzzy.go b/fuzzy/fuzzy.go
@@ -3,27 +3,48 @@
 package fuzzy
 
 import (
+	"bytes"
 	"unicode"
 	"unicode/utf8"
+
+	"golang.org/x/text/runes"
+	"golang.org/x/text/transform"
+	"golang.org/x/text/unicode/norm"
 )
 
-var noop = func(r rune) rune { return r }
+var foldTransformer = unicodeFoldTransformer{}
+var noopTransformer = transform.Nop
+var normalizeTransformer = transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
+var normalizeFoldTransformer = transform.Chain(normalizeTransformer, foldTransformer)
 
 // Match returns true if source matches target using a fuzzy-searching
 // algorithm. Note that it doesn't implement Levenshtein distance (see
 // RankMatch instead), but rather a simplified version where there's no
 // approximation. The method will return true only if each character in the
 // source can be found in the target and occurs after the preceding matches.
 func Match(source, target string) bool {
-	return match(source, target, noop)
+	return match(source, target, noopTransformer)
 }
 
 // MatchFold is a case-insensitive version of Match.
 func MatchFold(source, target string) bool {
-	return match(source, target, unicode.ToLower)
+	return match(source, target, foldTransformer)
+}
+
+// MatchNormalized is a unicode-normalized version of Match.
+func MatchNormalized(source, target string) bool {
+	return match(source, target, normalizeTransformer)
 }
 
-func match(source, target string, fn func(rune) rune) bool {
+// MatchNormalizedFold is a unicode-normalized and case-insensitive version of Match.
+func MatchNormalizedFold(source, target string) bool {
+	return match(source, target, normalizeFoldTransformer)
+}
+
+func match(source, target string, transformer transform.Transformer) bool {
+	source = stringTransform(source, transformer)
+	target = stringTransform(target, transformer)
+
 	lenDiff := len(target) - len(source)
 
 	if lenDiff < 0 {
@@ -37,7 +58,7 @@ func match(source, target string, fn func(rune) rune) bool {
 Outer:
 	for _, r1 := range source {
 		for i, r2 := range target {
-			if fn(r1) == fn(r2) {
+			if r1 == r2 {
 				target = target[i+utf8.RuneLen(r2):]
 				continue Outer
 			}
@@ -50,19 +71,29 @@ Outer:
 
 // Find will return a list of strings in targets that fuzzy matches source.
 func Find(source string, targets []string) []string {
-	return find(source, targets, noop)
+	return find(source, targets, noopTransformer)
 }
 
 // FindFold is a case-insensitive version of Find.
 func FindFold(source string, targets []string) []string {
-	return find(source, targets, unicode.ToLower)
+	return find(source, targets, foldTransformer)
 }
 
-func find(source string, targets []string, fn func(rune) rune) []string {
+// FindNormalized is a unicode-normalized version of Find.
+func FindNormalized(source string, targets []string) []string {
+	return find(source, targets, normalizeTransformer)
+}
+
+// FindNormalizedFold is a unicode-normalized and case-insensitive version of Find.
+func FindNormalizedFold(source string, targets []string) []string {
+	return find(source, targets, normalizeFoldTransformer)
+}
+
+func find(source string, targets []string, transformer transform.Transformer) []string {
 	var matches []string
 
 	for _, target := range targets {
-		if match(source, target, fn) {
+		if match(source, target, transformer) {
 			matches = append(matches, target)
 		}
 	}
@@ -77,21 +108,34 @@ func find(source string, targets []string, fn func(rune) rune) []string {
 // the Levenshtein calculation, only deletions need be considered, required
 // additions and substitutions would fail the match test.
 func RankMatch(source, target string) int {
-	return rank(source, target, noop)
+	return rank(source, target, noopTransformer)
 }
 
 // RankMatchFold is a case-insensitive version of RankMatch.
 func RankMatchFold(source, target string) int {
-	return rank(source, target, unicode.ToLower)
+	return rank(source, target, foldTransformer)
 }
 
-func rank(source, target string, fn func(rune) rune) int {
+// RankMatchNormalized is a unicode-normalized version of RankMatch.
+func RankMatchNormalized(source, target string) int {
+	return rank(source, target, normalizeTransformer)
+}
+
+// RankMatchNormalizedFold is a unicode-normalized and case-insensitive version of RankMatch.
+func RankMatchNormalizedFold(source, target string) int {
+	return rank(source, target, normalizeFoldTransformer)
+}
+
+func rank(source, target string, transformer transform.Transformer) int {
 	lenDiff := len(target) - len(source)
 
 	if lenDiff < 0 {
 		return -1
 	}
 
+	source = stringTransform(source, transformer)
+	target = stringTransform(target, transformer)
+
 	if lenDiff == 0 && source == target {
 		return 0
 	}
@@ -101,7 +145,7 @@ func rank(source, target string, fn func(rune) rune) int {
 Outer:
 	for _, r1 := range source {
 		for i, r2 := range target {
-			if fn(r1) == fn(r2) {
+			if r1 == r2 {
 				target = target[i+utf8.RuneLen(r2):]
 				continue Outer
 			} else {
@@ -120,23 +164,29 @@ Outer:
 // RankFind is similar to Find, except it will also rank all matches using
 // Levenshtein distance.
 func RankFind(source string, targets []string) Ranks {
-	var r Ranks
-
-	for index, target := range targets {
-		if match(source, target, noop) {
-			distance := LevenshteinDistance(source, target)
-			r = append(r, Rank{source, target, distance, index})
-		}
-	}
-	return r
+	return rankFind(source, targets, noopTransformer)
 }
 
 // RankFindFold is a case-insensitive version of RankFind.
 func RankFindFold(source string, targets []string) Ranks {
+	return rankFind(source, targets, foldTransformer)
+}
+
+// RankFindNormalized is a unicode-normalizedversion of RankFind.
+func RankFindNormalized(source string, targets []string) Ranks {
+	return rankFind(source, targets, normalizeTransformer)
+}
+
+// RankFindNormalizedFold is a unicode-normalized and case-insensitive version of RankFind.
+func RankFindNormalizedFold(source string, targets []string) Ranks {
+	return rankFind(source, targets, normalizeFoldTransformer)
+}
+
+func rankFind(source string, targets []string, transformer transform.Transformer) Ranks {
 	var r Ranks
 
 	for index, target := range targets {
-		if match(source, target, unicode.ToLower) {
+		if match(source, target, transformer) {
 			distance := LevenshteinDistance(source, target)
 			r = append(r, Rank{source, target, distance, index})
 		}
@@ -171,3 +221,33 @@ func (r Ranks) Swap(i, j int) {
 func (r Ranks) Less(i, j int) bool {
 	return r[i].Distance < r[j].Distance
 }
+
+func stringTransform(s string, t transform.Transformer) (transformed string) {
+	var err error
+	transformed, _, err = transform.String(t, s)
+	if err != nil {
+		transformed = s
+	}
+
+	return
+}
+
+type unicodeFoldTransformer struct{}
+
+func (unicodeFoldTransformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
+	runes := bytes.Runes(src)
+	var lowerRunes []rune
+	for _, r := range runes {
+		lowerRunes = append(lowerRunes, unicode.ToLower(r))
+	}
+
+	srcBytes := []byte(string(lowerRunes))
+	n := copy(dst, srcBytes)
+	if n < len(srcBytes) {
+		err = transform.ErrShortDst
+	}
+
+	return n, n, err
+}
+
+func (unicodeFoldTransformer) Reset() {}