Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: speed up word look up. #5984

Merged
merged 3 commits into from
Jul 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import type { SuggestionCollector, SuggestionResult } from 'cspell-trie-lib';
import { CASE_INSENSITIVE_PREFIX, CompoundWordsMethod } from 'cspell-trie-lib';
import { genSequence } from 'gensequence';

import { isDefined } from '../util/util.js';
import * as Defaults from './defaults.js';
Expand Down Expand Up @@ -138,7 +137,7 @@ function isWordInAnyDictionary(
word: string,
options: SearchOptions,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.has(word, options));
return dicts.find((dict) => dict.has(word, options));
}

function findInAnyDictionary(
Expand All @@ -160,15 +159,15 @@ function isNoSuggestWordInAnyDictionary(
word: string,
options: HasOptions,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.isNoSuggestWord(word, options));
return dicts.find((dict) => dict.isNoSuggestWord(word, options));
}

function isWordForbiddenInAnyDictionary(
dicts: SpellingDictionary[],
word: string,
ignoreCase: boolean | undefined,
): SpellingDictionary | undefined {
return genSequence(dicts).first((dict) => dict.isForbidden(word, ignoreCase));
return dicts.find((dict) => dict.isForbidden(word, ignoreCase));
}

export function isSpellingDictionaryCollection(dict: SpellingDictionary): dict is SpellingDictionaryCollection {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import type {
} from 'cspell-trie-lib';
import { CompoundWordsMethod, decodeTrie, suggestionCollector } from 'cspell-trie-lib';

import { autoCache, createCache01 } from '../util/AutoCache.js';
import { clean } from '../util/clean.js';
import { createMapper, createRepMapper } from '../util/repMap.js';
import * as Defaults from './defaults.js';
Expand All @@ -27,7 +26,6 @@ const findWordOptionsCaseSensitive: FindWordOptions = Object.freeze({ caseSensit
const findWordOptionsNotCaseSensitive: FindWordOptions = Object.freeze({ caseSensitive: false });

export class SpellingDictionaryFromTrie implements SpellingDictionary {
static readonly cachedWordsLimit = 50_000;
private _size = 0;
readonly knownWords = new Set<string>();
readonly unknownWords = new Set<string>();
Expand Down Expand Up @@ -95,9 +93,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
return { useCompounds, ignoreCase };
}

private _find = findCache((word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) =>
this.findAnyForm(word, useCompounds, ignoreCase),
);
private _find = (word: string, useCompounds: number | boolean | undefined, ignoreCase: boolean) =>
this.findAnyForm(word, useCompounds, ignoreCase);

private findAnyForm(
word: string,
Expand Down Expand Up @@ -147,12 +144,8 @@ export class SpellingDictionaryFromTrie implements SpellingDictionary {
}

public isForbidden(word: string, _ignoreCaseAndAccents?: boolean): boolean {
return this._isForbidden(word);
}

private _isForbidden = autoCache((word: string): boolean => {
return this.trie.isForbiddenWord(word);
});
}

public suggest(word: string, suggestOptions: SuggestOptions = {}): SuggestionResult[] {
return this._suggest(word, suggestOptions);
Expand Down Expand Up @@ -214,38 +207,6 @@ export function createSpellingDictionaryFromTrieFile(
return new SpellingDictionaryFromTrie(trie, name, options, source);
}

type FindFunction = (
word: string,
useCompounds: number | boolean | undefined,
ignoreCase: boolean,
) => FindAnyFormResult | undefined;

interface CachedFind {
useCompounds: number | boolean | undefined;
ignoreCase: boolean;
findResult: FindAnyFormResult | undefined;
}

function findCache(fn: FindFunction, size = 2000): FindFunction {
const cache = createCache01<CachedFind>(size);

function find(
word: string,
useCompounds: number | boolean | undefined,
ignoreCase: boolean,
): FindAnyFormResult | undefined {
const r = cache.get(word);
if (r !== undefined && r.useCompounds === useCompounds && r.ignoreCase === ignoreCase) {
return r.findResult;
}
const findResult = fn(word, useCompounds, ignoreCase);
cache.set(word, { useCompounds, ignoreCase, findResult });
return findResult;
}

return find;
}

function* outerWordForms(word: string, mapWord: (word: string) => string[]): Iterable<string> {
// Only generate the needed forms.
const sent = new Set<string>();
Expand Down
12 changes: 6 additions & 6 deletions packages/cspell-dictionary/src/perf/has.perf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,28 +57,28 @@ suite('dictionary has Not', async (test) => {
const dict3 = createSpellingDictionary(words3, 'test3', import.meta.url);
const dictCol = createCollection([dict, dict2, dict3], 'test-collection');

test('dictionary has 100k words', () => {
test('dictionary has not 100k words', () => {
checkWords(dict, missingWords, false);
});

test('dictionary has 100k words (2nd time)', () => {
test('dictionary has not 100k words (2nd time)', () => {
checkWords(dict, missingWords, false);
});

test('collection has 100k words', () => {
test('collection has not 100k words', () => {
checkWords(dictCol, missingWords, false);
});

test('iTrie has 100k words', () => {
test('iTrie has not 100k words', () => {
checkWords(iTrie, missingWords, false);
});

test('iTrie.hasWord has 100k words', () => {
test('iTrie.hasWord has not 100k words', () => {
const dict = { has: (word: string) => iTrie.hasWord(word, true) };
checkWords(dict, missingWords, false);
});

test('iTrie.data has 100k words', () => {
test('iTrie.data has not 100k words', () => {
checkWords(iTrie.data, missingWords, false);
});
});
Expand Down
54 changes: 54 additions & 0 deletions packages/cspell-dictionary/src/perf/misc.perf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { genSequence } from 'gensequence';
import { loremIpsum } from 'lorem-ipsum';
import { suite } from 'perf-insight';

suite('Array Primitives', async (test) => {
const words = genWords(20);
const toFind = [...words, 'not-a-word'];
const iterations = 1000;

test('Array.find', () => {
for (let i = 0; i < iterations; ++i) {
for (const word of toFind) {
words.find((w) => w === word);
}
}
});

test('genSequence.first', () => {
for (let i = 0; i < iterations; ++i) {
for (const word of toFind) {
genSequence(words).first((w) => w === word);
}
}
});
});

function genWords(count: number, includeForbidden = true): string[] {
const setOfWords = new Set(loremIpsum({ count }).split(' '));

if (includeForbidden) {
setOfWords.add('!forbidden');
setOfWords.add('!bad-word');
setOfWords.add('!rejection');
}

while (setOfWords.size < count) {
const words = [...setOfWords];
for (const a of words) {
for (const b of words) {
if (a !== b) {
setOfWords.add(a + b);
}
if (setOfWords.size >= count) {
break;
}
}
if (setOfWords.size >= count) {
break;
}
}
}

return [...setOfWords];
}
40 changes: 26 additions & 14 deletions packages/cspell-dictionary/src/util/AutoCache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,50 +10,62 @@ export interface CacheStats {
swaps: number;
}

class Cache01<R> implements CacheStats {
private count = 0;
private cache0: Record<string, R> = Object.create(null);
private cache1: Record<string, R> = Object.create(null);

abstract class Cache01<R> implements CacheStats {
hits = 0;
misses = 0;
swaps = 0;

constructor(readonly maxSize: number) {}

abstract get(key: string): R | undefined;
abstract set(key: string, value: R): this;
}

class Cache01Map<R> extends Cache01<R> implements CacheStats {
private count = 0;
private cache0: Map<string, R> = new Map();
private cache1: Map<string, R> = new Map();

constructor(maxSize: number) {
super(maxSize);
}

get(key: string): R | undefined {
const cache0 = this.cache0;
const cache1 = this.cache1;
if (key in cache0) {
let found = cache0.get(key);
if (found !== undefined) {
++this.hits;
return cache0[key];
return found;
}
if (key in cache1) {
found = cache1.get(key);
if (found !== undefined) {
++this.hits;
++this.count;
const r = cache1[key];
cache0[key] = r;
return r;
cache0.set(key, found);
return found;
}
++this.misses;
return undefined;
}

set(key: string, value: R): this {
if (this.count >= this.maxSize) {
const c = this.cache1;
this.cache1 = this.cache0;
this.cache0 = Object.create(null);
this.cache0 = c;
c.clear();
this.swaps++;
this.count = 0;
}
++this.count;
this.cache0[key] = value;
this.cache0.set(key, value);
return this;
}
}

export function createCache01<R>(size: number): Cache01<R> {
return new Cache01(size);
return new Cache01Map(size);
}

export function autoCache<R>(fn: (p: string) => R, size = CACHE_SIZE): AutoCache<R> {
Expand Down
5 changes: 5 additions & 0 deletions packages/cspell-trie-lib/api/api.d.ts

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 3 additions & 12 deletions packages/cspell-trie-lib/src/lib/ITrie.ts
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ export interface ITrie {

export class ITrieImpl implements ITrie {
private _info: TrieInfo;
private _findOptionsDefaults: PartialFindOptions;
private hasForbidden: boolean;
private root: ITrieNodeRoot;
private count?: number;
Expand All @@ -127,11 +126,6 @@ export class ITrieImpl implements ITrie {
this.root = data.getRoot();
this._info = mergeOptionalWithDefaults(data.info);
this.hasForbidden = data.hasForbiddenWords();
this._findOptionsDefaults = {
caseInsensitivePrefix: this._info.stripCaseAndAccentsPrefix,
compoundFix: this._info.compoundCharacter,
forbidPrefix: this._info.forbiddenWordPrefix,
};
}

/**
Expand Down Expand Up @@ -195,7 +189,7 @@ export class ITrieImpl implements ITrie {
: defaultLegacyMinCompoundLength;
const findOptions = this.createFindOptions({
legacyMinCompoundLength: len,
matchCase: options.caseSensitive,
matchCase: options.caseSensitive || false,
});
return findLegacyCompound(this.root, word, findOptions);
}
Expand Down Expand Up @@ -290,11 +284,8 @@ export class ITrieImpl implements ITrie {
return new ITrieImpl(root, undefined);
}

private createFindOptions(options: PartialFindOptions = {}): FindOptions {
const findOptions = createFindOptions({
...this._findOptionsDefaults,
...options,
});
private createFindOptions(options: PartialFindOptions | undefined): FindOptions {
const findOptions = createFindOptions(options);
return findOptions;
}

Expand Down
5 changes: 1 addition & 4 deletions packages/cspell-trie-lib/src/lib/ITrieNode/FindOptions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,7 @@ import type { CompoundModes } from './CompoundModes.js';
export interface FindOptions {
matchCase: boolean;
compoundMode: CompoundModes;
forbidPrefix: string;
compoundFix: string;
caseInsensitivePrefix: string;
legacyMinCompoundLength: number;
legacyMinCompoundLength?: number;
}

export type PartialFindOptions = PartialWithUndefined<FindOptions> | undefined;
6 changes: 6 additions & 0 deletions packages/cspell-trie-lib/src/lib/ITrieNode/ITrieNode.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ export interface ITrieNode {
has(char: string): boolean;
/** `true` iff this node has children */
hasChildren(): boolean;
/** check if a word exists within this node. */
findExact?: ((word: string) => boolean) | undefined;
}

export interface ITrieNodeRoot extends ITrieNode {
Expand All @@ -75,4 +77,8 @@ export interface ITrieNodeRoot extends ITrieNode {
find?: ((word: string, strict: boolean) => FindResult | undefined) | undefined;

isForbidden?: ((word: string) => boolean) | undefined;

forbidPrefix: string;
compoundFix: string;
caseInsensitivePrefix: string;
}
Loading