From 01cc7e4d8d4cef467824ed0a8132c4522a8f4d53 Mon Sep 17 00:00:00 2001 From: vk Date: Mon, 8 Nov 2021 20:07:56 +0200 Subject: [PATCH] Work optimization: part #2 --- src/sheldon_dictionary.erl | 153 ++++++++++++++++++++----------------- 1 file changed, 83 insertions(+), 70 deletions(-) diff --git a/src/sheldon_dictionary.erl b/src/sheldon_dictionary.erl index 695b9f3..57e8841 100644 --- a/src/sheldon_dictionary.erl +++ b/src/sheldon_dictionary.erl @@ -33,7 +33,7 @@ -export_type([language/0]). --define(CHARS, "abcdefghijklmnopqrstuvwxyz-"). +%-define(CHARS, "abcdefghijklmnopqrstuvwxyz-"). -type language() :: eng. @@ -133,7 +133,14 @@ bazinga_name(Lang) -> fill_cashe(EtsName, Source) -> {ok, SourceBin} = file:read_file(Source), Words = re:split(SourceBin, "\n"), % one word per line - [persistent_term:put({EtsName, Word}, Word) || Word <- Words], + EtsName = ets:new(EtsName, [named_table, bag, {read_concurrency, true}]), + + [begin + persistent_term:put({EtsName, Word}, Word), % FIXMI + LWord = binary_to_list(Word), + [ets:insert(EtsName, {lists:delete(Char, LWord), LWord}) || Char <- LWord] + end + || Word <- Words], ok. %%%=================================================================== @@ -142,71 +149,77 @@ fill_cashe(EtsName, Source) -> -spec candidates(string(), language()) -> [string()]. candidates(WordStr, Lang) -> - Word = list_to_binary(string:to_lower(WordStr)), - Acc = edits1(Word, []), - Result = edits2(Acc, []), - DictName = dictionary_name(Lang), - Candidates = know([Word | Result], Lang, DictName, []), - Candidates. - --spec know([binary()], language(), atom(), list()) -> [string()]. -know([], _Lang, _, Acc) -> - Acc; -know([Word | Words], Lang, DictName, Acc) -> - case persistent_term:get({DictName, Word}, undefined) of - undefined -> - know(Words, Lang, DictName, Acc); - Word -> - know(Words, Lang, DictName, [binary_to_list(Word) | Acc]) - end. - --spec edits1(binary(), [binary()]) -> [binary()]. -edits1(WordBinary, Acc0) -> - Word = binary_to_list(WordBinary), - Splits = [lists:split(I, Word) || I <- lists:seq(0, length(Word))], - Acc1 = deletes(Splits, Acc0), - Acc2 = transposes(Splits, Acc1), - Acc3 = replaces(Splits, Acc2), - inserts(Splits, Acc3). - --spec deletes([tuple()], list()) -> list(). -deletes([], Acc) -> - Acc; -deletes([{Left, [_ | Right]} | Splits], Acc) -> - deletes(Splits, [iolist_to_binary([Left, Right]) | Acc]); -deletes([_ | Splits], Acc) -> - deletes(Splits, Acc). - --spec transposes([tuple()], list()) -> list(). -transposes([], Acc) -> - Acc; -transposes([{Left, [A, B | Right]} | Splits], Acc) -> - transposes(Splits, [iolist_to_binary([Left, B, A, Right]) | Acc]); -transposes([_ | Splits], Acc) -> - transposes(Splits, Acc). - --spec replaces([tuple()], list()) -> list(). -replaces([], Acc) -> - Acc; -replaces([{Left, [_ | Right]} | Splits], Acc) -> - replaces(Splits, chars(?CHARS, Left, Right, Acc)); -replaces([_ | Splits], Acc) -> - replaces(Splits, Acc). - --spec inserts([tuple()], list()) -> list(). -inserts([], Acc) -> - Acc; -inserts([{Left, Right} | Splits], Acc) -> - inserts(Splits, chars(?CHARS, Left, Right, Acc)). - --spec chars([integer()], string(), string(), list()) -> list(). -chars([], _, _, Acc) -> - Acc; -chars([Char | Chars], Left, Right, Acc) -> - chars(Chars, Left, Right, [iolist_to_binary([Left, Char, Right]) | Acc]). - --spec edits2([binary()], [binary()]) -> [binary()]. -edits2([], Acc) -> - Acc; -edits2([H | T], Acc) -> - edits2(T, edits1(H, Acc)). + Word = string:to_lower(WordStr), + MaybeWords = [Word | [lists:delete(Char, Word) || Char <- Word]], + lists:usort([V + || {_, V} + <- lists:flatten([begin ets:lookup(dictionary_name(Lang), MW) end + || MW <- MaybeWords])]). + +%Acc = edits1(Word, []), +%Result = edits2(Acc, []), +%DictName = dictionary_name(Lang), +%Candidates = know([Word | Result], Lang, DictName, []), +%Candidates. + +%-spec know([binary()], language(), atom(), list()) -> [string()]. +%know([], _Lang, _, Acc) -> +% Acc; +%know([Word | Words], Lang, DictName, Acc) -> +% case persistent_term:get({DictName, Word}, undefined) of +% undefined -> +% know(Words, Lang, DictName, Acc); +% Word -> +% know(Words, Lang, DictName, [binary_to_list(Word) | Acc]) +% end. +% +%-spec edits1(binary(), [binary()]) -> [binary()]. +%edits1(WordBinary, Acc0) -> +% Word = binary_to_list(WordBinary), +% Splits = [lists:split(I, Word) || I <- lists:seq(0, length(Word))], +% Acc1 = deletes(Splits, Acc0), +% Acc2 = transposes(Splits, Acc1), +% Acc3 = replaces(Splits, Acc2), +% inserts(Splits, Acc3). +% +%-spec deletes([tuple()], list()) -> list(). +%deletes([], Acc) -> +% Acc; +%deletes([{Left, [_ | Right]} | Splits], Acc) -> +% deletes(Splits, [iolist_to_binary([Left, Right]) | Acc]); +%deletes([_ | Splits], Acc) -> +% deletes(Splits, Acc). +% +%-spec transposes([tuple()], list()) -> list(). +%transposes([], Acc) -> +% Acc; +%transposes([{Left, [A, B | Right]} | Splits], Acc) -> +% transposes(Splits, [iolist_to_binary([Left, B, A, Right]) | Acc]); +%transposes([_ | Splits], Acc) -> +% transposes(Splits, Acc). +% +%-spec replaces([tuple()], list()) -> list(). +%replaces([], Acc) -> +% Acc; +%replaces([{Left, [_ | Right]} | Splits], Acc) -> +% replaces(Splits, chars(?CHARS, Left, Right, Acc)); +%replaces([_ | Splits], Acc) -> +% replaces(Splits, Acc). +% +%-spec inserts([tuple()], list()) -> list(). +%inserts([], Acc) -> +% Acc; +%inserts([{Left, Right} | Splits], Acc) -> +% inserts(Splits, chars(?CHARS, Left, Right, Acc)). +% +%-spec chars([integer()], string(), string(), list()) -> list(). +%chars([], _, _, Acc) -> +% Acc; +%chars([Char | Chars], Left, Right, Acc) -> +% chars(Chars, Left, Right, [iolist_to_binary([Left, Char, Right]) | Acc]). +% +%-spec edits2([binary()], [binary()]) -> [binary()]. +%edits2([], Acc) -> +% Acc; +%edits2([H | T], Acc) -> +% edits2(T, edits1(H, Acc)).