Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev: Support Legacy dictionary type #1840

Merged
merged 1 commit into from
Oct 6, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions cspell.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
"$ref": "#/definitions/ReplaceMap",
"description": "Replacement pairs"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand Down Expand Up @@ -108,6 +113,11 @@
],
"description": "Defines the scope for when words will be added to the dictionary. Scope values: `user`, `workspace`, `folder`"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand Down Expand Up @@ -143,6 +153,11 @@
"$ref": "#/definitions/ReplaceMap",
"description": "Replacement pairs"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand All @@ -154,6 +169,15 @@
],
"type": "object"
},
"DictionaryFileTypes": {
"enum": [
"S",
"W",
"C",
"T"
],
"type": "string"
},
"DictionaryId": {
"description": "This is the name of a dictionary.\n\nName Format:\n- Must contain at least 1 number or letter.\n- spaces are allowed.\n- Leading and trailing space will be removed.\n- Names ARE case-sensitive\n- Must not contain `*`, `!`, `;`, `,`, `{`, `}`, `[`, `]`, `~`",
"pattern": "^(?=[^!*,;{}[\\]~\\n]+$)(?=(.*\\w)).+$",
Expand Down
1 change: 1 addition & 0 deletions packages/cspell-lib/samples/words.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@ cherry
left-right
Geschäft
aujourd’hui
class:name
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,21 @@ describe('Validate DictionaryLoader', () => {
function nfc(s: string): string {
return s.normalize('NFC');
}
// cspell:ignore aujourd’hui
const csharp = require.resolve('@cspell/dict-csharp/csharp.txt.gz');
test.each`
testCase | file | options | word | maxAge | hasWord | hasErrors
${'sample words'} | ${sample('words.txt')} | ${{}} | ${'apple'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{}} | ${'class:name'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{}} | ${'left-right'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 5 }} | ${'apple'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'S' }} | ${'pear'} | ${undefined} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'strawberry'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'left-right'} | ${1} | ${false} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'left'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'class:name'} | ${1} | ${false} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'name'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{ type: 'C' }} | ${'aujourd’hui'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{}} | ${'tree'} | ${1} | ${false} | ${false}
${'unknown loader'} | ${sample('words.txt')} | ${{ type: 5 }} | ${'apple'} | ${1} | ${true} | ${false}
${'sample words'} | ${sample('words.txt')} | ${{}} | ${'left-right'} | ${1} | ${true} | ${false}
Expand Down
26 changes: 20 additions & 6 deletions packages/cspell-lib/src/SpellingDictionary/DictionaryLoader.ts
Original file line number Diff line number Diff line change
@@ -1,17 +1,18 @@
import type { DictionaryDefinitionPreferred } from '@cspell/cspell-types';
import type { DictionaryDefinitionPreferred, DictionaryFileTypes } from '@cspell/cspell-types';
import { stat } from 'fs-extra';
import * as path from 'path';
import { readLines } from '../util/fileReader';
import { createFailedToLoadDictionary, createSpellingDictionary } from './createSpellingDictionary';
import { SpellingDictionary } from './SpellingDictionary';
import { SpellingDictionaryLoadError } from './SpellingDictionaryError';
import { createSpellingDictionaryTrie } from './SpellingDictionaryFromTrie';
import { genSequence } from 'gensequence';

const MAX_AGE = 10000;

const loaders: Loaders = {
S: loadSimpleWordList,
C: loadSimpleWordList,
C: legacyWordList,
T: loadTrie,
default: loadSimpleWordList,
};
Expand Down Expand Up @@ -52,7 +53,7 @@ export function loadDictionary(uri: string, options: DictionaryDefinitionPreferr
const importantOptionKeys: (keyof DictionaryDefinitionPreferred)[] = ['noSuggest', 'useCompounds'];

function calcKey(uri: string, options: DictionaryDefinitionPreferred) {
const loaderType = determineType(uri);
const loaderType = determineType(uri, options);
const optValues = importantOptionKeys.map((k) => options[k]?.toString() || '');
const parts = [uri, loaderType].concat(optValues);

Expand Down Expand Up @@ -107,18 +108,31 @@ function loadEntry(uri: string, options: LoadOptions, now = Date.now()): CacheEn
};
}

function determineType(uri: string): LoaderType {
const defType = uri.endsWith('.trie.gz') ? 'T' : uri.endsWith('.txt.gz') ? 'S' : 'S';
function determineType(uri: string, opts: Pick<LoadOptions, 'type'>): LoaderType {
const t: DictionaryFileTypes = (opts.type && opts.type in loaders && opts.type) || 'S';
const defLoaderType = t as LoaderType;
const defType = uri.endsWith('.trie.gz') ? 'T' : uri.endsWith('.txt.gz') ? defLoaderType : defLoaderType;
const regTrieTest = /\.trie\b/i;
return regTrieTest.test(uri) ? 'T' : defType;
}

function load(uri: string, options: LoadOptions): Promise<SpellingDictionary> {
const type = determineType(uri);
const type = determineType(uri, options);
const loader = loaders[type] || loaders.default;
return loader(uri, options);
}

async function legacyWordList(filename: string, options: LoadOptions) {
const lines = await readLines(filename);
const words = genSequence(lines)
// Remove comments
.map((line) => line.replace(/#.*/g, ''))
// Split on everything else
.concatMap((line) => line.split(/[^\w\p{L}\p{M}'’]+/gu))
.filter((word) => !!word);
return createSpellingDictionary(words, determineName(filename, options), filename, options);
}

async function loadSimpleWordList(filename: string, options: LoadOptions) {
const lines = await readLines(filename);
return createSpellingDictionary(lines, determineName(filename, options), filename, options);
Expand Down
24 changes: 24 additions & 0 deletions packages/cspell-types/cspell.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
"$ref": "#/definitions/ReplaceMap",
"description": "Replacement pairs"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand Down Expand Up @@ -108,6 +113,11 @@
],
"description": "Defines the scope for when words will be added to the dictionary. Scope values: `user`, `workspace`, `folder`"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand Down Expand Up @@ -143,6 +153,11 @@
"$ref": "#/definitions/ReplaceMap",
"description": "Replacement pairs"
},
"type": {
"$ref": "#/definitions/DictionaryFileTypes",
"default": "S",
"description": "Type of file: S - single word per line, W - each line can contain one or more words separated by space, C - each line is treated like code (Camel Case is allowed) Default is S C is the slowest to load due to the need to split each line based upon code splitting rules."
},
"useCompounds": {
"description": "Use Compounds",
"type": "boolean"
Expand All @@ -154,6 +169,15 @@
],
"type": "object"
},
"DictionaryFileTypes": {
"enum": [
"S",
"W",
"C",
"T"
],
"type": "string"
},
"DictionaryId": {
"description": "This is the name of a dictionary.\n\nName Format:\n- Must contain at least 1 number or letter.\n- spaces are allowed.\n- Leading and trailing space will be removed.\n- Names ARE case-sensitive\n- Must not contain `*`, `!`, `;`, `,`, `{`, `}`, `[`, `]`, `~`",
"pattern": "^(?=[^!*,;{}[\\]~\\n]+$)(?=(.*\\w)).+$",
Expand Down
10 changes: 10 additions & 0 deletions packages/cspell-types/src/CSpellSettingsDef.ts
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,16 @@ export interface DictionaryDefinitionBase {
* possible suggestions.
*/
noSuggest?: boolean;
/**
* Type of file:
* S - single word per line,
* W - each line can contain one or more words separated by space,
* C - each line is treated like code (Camel Case is allowed)
* Default is S
* C is the slowest to load due to the need to split each line based upon code splitting rules.
* @default "S"
*/
type?: DictionaryFileTypes;
}

export interface DictionaryDefinitionPreferred extends DictionaryDefinitionBase {
Expand Down