Skip to content

Commit

Permalink
Work optimization: part #2
Browse files Browse the repository at this point in the history
  • Loading branch information
vkatsuba committed Nov 8, 2021
1 parent bb7ff53 commit 01cc7e4
Showing 1 changed file with 83 additions and 70 deletions.
153 changes: 83 additions & 70 deletions src/sheldon_dictionary.erl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

-export_type([language/0]).

-define(CHARS, "abcdefghijklmnopqrstuvwxyz-").
%-define(CHARS, "abcdefghijklmnopqrstuvwxyz-").

-type language() :: eng.

Expand Down Expand Up @@ -133,7 +133,14 @@ bazinga_name(Lang) ->
fill_cashe(EtsName, Source) ->
{ok, SourceBin} = file:read_file(Source),
Words = re:split(SourceBin, "\n"), % one word per line
[persistent_term:put({EtsName, Word}, Word) || Word <- Words],
EtsName = ets:new(EtsName, [named_table, bag, {read_concurrency, true}]),

[begin
persistent_term:put({EtsName, Word}, Word), % FIXMI
LWord = binary_to_list(Word),
[ets:insert(EtsName, {lists:delete(Char, LWord), LWord}) || Char <- LWord]
end
|| Word <- Words],
ok.

%%%===================================================================
Expand All @@ -142,71 +149,77 @@ fill_cashe(EtsName, Source) ->

-spec candidates(string(), language()) -> [string()].
candidates(WordStr, Lang) ->
Word = list_to_binary(string:to_lower(WordStr)),
Acc = edits1(Word, []),
Result = edits2(Acc, []),
DictName = dictionary_name(Lang),
Candidates = know([Word | Result], Lang, DictName, []),
Candidates.

-spec know([binary()], language(), atom(), list()) -> [string()].
know([], _Lang, _, Acc) ->
Acc;
know([Word | Words], Lang, DictName, Acc) ->
case persistent_term:get({DictName, Word}, undefined) of
undefined ->
know(Words, Lang, DictName, Acc);
Word ->
know(Words, Lang, DictName, [binary_to_list(Word) | Acc])
end.

-spec edits1(binary(), [binary()]) -> [binary()].
edits1(WordBinary, Acc0) ->
Word = binary_to_list(WordBinary),
Splits = [lists:split(I, Word) || I <- lists:seq(0, length(Word))],
Acc1 = deletes(Splits, Acc0),
Acc2 = transposes(Splits, Acc1),
Acc3 = replaces(Splits, Acc2),
inserts(Splits, Acc3).

-spec deletes([tuple()], list()) -> list().
deletes([], Acc) ->
Acc;
deletes([{Left, [_ | Right]} | Splits], Acc) ->
deletes(Splits, [iolist_to_binary([Left, Right]) | Acc]);
deletes([_ | Splits], Acc) ->
deletes(Splits, Acc).

-spec transposes([tuple()], list()) -> list().
transposes([], Acc) ->
Acc;
transposes([{Left, [A, B | Right]} | Splits], Acc) ->
transposes(Splits, [iolist_to_binary([Left, B, A, Right]) | Acc]);
transposes([_ | Splits], Acc) ->
transposes(Splits, Acc).

-spec replaces([tuple()], list()) -> list().
replaces([], Acc) ->
Acc;
replaces([{Left, [_ | Right]} | Splits], Acc) ->
replaces(Splits, chars(?CHARS, Left, Right, Acc));
replaces([_ | Splits], Acc) ->
replaces(Splits, Acc).

-spec inserts([tuple()], list()) -> list().
inserts([], Acc) ->
Acc;
inserts([{Left, Right} | Splits], Acc) ->
inserts(Splits, chars(?CHARS, Left, Right, Acc)).

-spec chars([integer()], string(), string(), list()) -> list().
chars([], _, _, Acc) ->
Acc;
chars([Char | Chars], Left, Right, Acc) ->
chars(Chars, Left, Right, [iolist_to_binary([Left, Char, Right]) | Acc]).

-spec edits2([binary()], [binary()]) -> [binary()].
edits2([], Acc) ->
Acc;
edits2([H | T], Acc) ->
edits2(T, edits1(H, Acc)).
Word = string:to_lower(WordStr),
MaybeWords = [Word | [lists:delete(Char, Word) || Char <- Word]],
lists:usort([V
|| {_, V}
<- lists:flatten([begin ets:lookup(dictionary_name(Lang), MW) end
|| MW <- MaybeWords])]).

%Acc = edits1(Word, []),
%Result = edits2(Acc, []),
%DictName = dictionary_name(Lang),
%Candidates = know([Word | Result], Lang, DictName, []),
%Candidates.

%-spec know([binary()], language(), atom(), list()) -> [string()].
%know([], _Lang, _, Acc) ->
% Acc;
%know([Word | Words], Lang, DictName, Acc) ->
% case persistent_term:get({DictName, Word}, undefined) of
% undefined ->
% know(Words, Lang, DictName, Acc);
% Word ->
% know(Words, Lang, DictName, [binary_to_list(Word) | Acc])
% end.
%
%-spec edits1(binary(), [binary()]) -> [binary()].
%edits1(WordBinary, Acc0) ->
% Word = binary_to_list(WordBinary),
% Splits = [lists:split(I, Word) || I <- lists:seq(0, length(Word))],
% Acc1 = deletes(Splits, Acc0),
% Acc2 = transposes(Splits, Acc1),
% Acc3 = replaces(Splits, Acc2),
% inserts(Splits, Acc3).
%
%-spec deletes([tuple()], list()) -> list().
%deletes([], Acc) ->
% Acc;
%deletes([{Left, [_ | Right]} | Splits], Acc) ->
% deletes(Splits, [iolist_to_binary([Left, Right]) | Acc]);
%deletes([_ | Splits], Acc) ->
% deletes(Splits, Acc).
%
%-spec transposes([tuple()], list()) -> list().
%transposes([], Acc) ->
% Acc;
%transposes([{Left, [A, B | Right]} | Splits], Acc) ->
% transposes(Splits, [iolist_to_binary([Left, B, A, Right]) | Acc]);
%transposes([_ | Splits], Acc) ->
% transposes(Splits, Acc).
%
%-spec replaces([tuple()], list()) -> list().
%replaces([], Acc) ->
% Acc;
%replaces([{Left, [_ | Right]} | Splits], Acc) ->
% replaces(Splits, chars(?CHARS, Left, Right, Acc));
%replaces([_ | Splits], Acc) ->
% replaces(Splits, Acc).
%
%-spec inserts([tuple()], list()) -> list().
%inserts([], Acc) ->
% Acc;
%inserts([{Left, Right} | Splits], Acc) ->
% inserts(Splits, chars(?CHARS, Left, Right, Acc)).
%
%-spec chars([integer()], string(), string(), list()) -> list().
%chars([], _, _, Acc) ->
% Acc;
%chars([Char | Chars], Left, Right, Acc) ->
% chars(Chars, Left, Right, [iolist_to_binary([Left, Char, Right]) | Acc]).
%
%-spec edits2([binary()], [binary()]) -> [binary()].
%edits2([], Acc) ->
% Acc;
%edits2([H | T], Acc) ->
% edits2(T, edits1(H, Acc)).

0 comments on commit 01cc7e4

Please sign in to comment.