-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: prebuilt gob-files for faster loading (#12)
* tmp: ac[string][]index -> []string * feat: ac[string]int -> [][]string * tmp: using ahocorasick lookup * tried using openacid/slim * feat: added dghubble to the mix * feat: scrapped any notion of a trie, but made a prebuilt datastructure instead * fix: removed commented out package
- Loading branch information
Showing
29 changed files
with
240 additions
and
1,279 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
dicts/it/data.go filter=lfs diff=lfs merge=lfs -text | ||
dicts/sv/data.go filter=lfs diff=lfs merge=lfs -text | ||
dicts/de/data.go filter=lfs diff=lfs merge=lfs -text | ||
dicts/en/data.go filter=lfs diff=lfs merge=lfs -text | ||
dicts/es/data.go filter=lfs diff=lfs merge=lfs -text | ||
dicts/fr/data.go filter=lfs diff=lfs merge=lfs -text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
data | ||
|
||
vendor | ||
.vscode | ||
# Testing and benchmarks | ||
*.out | ||
*.test | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,107 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/gob" | ||
"fmt" | ||
"io/ioutil" | ||
"os" | ||
"sort" | ||
"strings" | ||
) | ||
|
||
type localStorage struct { | ||
Lookup map[string]int | ||
Words [][]string | ||
} | ||
|
||
func main() { | ||
if len(os.Args) != 3 { | ||
fmt.Println("usage: cedar_creator [input] [output]") | ||
os.Exit(1) | ||
} | ||
inName, outName := os.Args[1], os.Args[2] | ||
f, err := os.Open(inName) | ||
if err != nil { | ||
fmt.Println(err) | ||
os.Exit(1) | ||
} | ||
defer f.Close() | ||
b, err := ioutil.ReadAll(f) | ||
if err != nil { | ||
fmt.Println(err) | ||
os.Exit(1) | ||
} | ||
|
||
ls := &localStorage{} | ||
m := make(map[string][]string) | ||
for _, line := range strings.Split(strings.TrimSpace(string(b)), "\n") { | ||
parts := strings.Split(strings.TrimSpace(line), "\t") | ||
if len(parts) == 2 { | ||
base := strings.ToLower(parts[0]) | ||
form := strings.ToLower(parts[1]) | ||
add(m, form, base) | ||
add(m, base, base) | ||
} else { | ||
fmt.Printf("the line >%s< is odd\n", line) | ||
} | ||
} | ||
joined2Index := make(map[string]int) | ||
var forms []string | ||
ls.Lookup = make(map[string]int) | ||
for k, v := range m { | ||
lookup := strings.Join(v, "|") | ||
index, ok := joined2Index[lookup] | ||
if !ok { | ||
index = len(ls.Words) | ||
joined2Index[lookup] = index | ||
ls.Words = append(ls.Words, v) | ||
} | ||
forms = append(forms, k) | ||
ls.Lookup[k] = index | ||
} | ||
|
||
count := 0.0 | ||
for _, form := range forms { | ||
if _, found := ls.Lookup[form]; !found { | ||
count++ | ||
} | ||
} | ||
if count > 0 { | ||
fmt.Printf("Couldn't find %f%% of the keys entered\n", count/float64(len(forms))*100) | ||
os.Exit(1) | ||
} | ||
|
||
f, err = os.Create(outName) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer f.Close() | ||
err = gob.NewEncoder(f).Encode(ls) | ||
if err != nil { | ||
fmt.Println(err) | ||
os.Exit(1) | ||
} | ||
fmt.Println("Words in dict:", len(forms)) | ||
fmt.Println("Saved to", outName, "and all is good") | ||
} | ||
|
||
func add(m map[string][]string, key, value string) { | ||
if values, ok := m[key]; ok { | ||
if !contains(values, value) { | ||
values = append(values, value) | ||
sort.Strings(values) | ||
m[key] = values | ||
} | ||
} else { | ||
m[key] = []string{value} | ||
} | ||
} | ||
|
||
func contains(values []string, value string) bool { | ||
for _, v := range values { | ||
if v == value { | ||
return true | ||
} | ||
} | ||
return false | ||
} |
Git LFS file not shown
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
package dicts | ||
|
||
// LanguagePack is what each language should implement | ||
type LanguagePack interface { | ||
GetResource() ([]byte, error) | ||
GetLocale() string | ||
} |
Git LFS file not shown
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Git LFS file not shown
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Git LFS file not shown
Oops, something went wrong.