From 3c8012e39796b16b5f6ada0454b90e5098857bbb Mon Sep 17 00:00:00 2001 From: Jason Dent Date: Fri, 16 Aug 2024 20:21:36 +0200 Subject: [PATCH] feat: Add filetype detection package. (#6103) --- .prettierrc.json | 5 +- cspell.code-workspace | 1 + .../RustPython/RustPython/report.yaml | 5 +- .../RustPython/RustPython/snapshot.txt | 3 +- .../snapshots/eslint/eslint/report.yaml | 6 +- .../snapshots/eslint/eslint/snapshot.txt | 2 +- .../googleapis/google-cloud-cpp/report.yaml | 4 +- .../googleapis/google-cloud-cpp/snapshot.txt | 2 +- packages/cspell-filetypes/CHANGELOG.md | 4 + packages/cspell-filetypes/LICENSE | 21 + packages/cspell-filetypes/README.md | 17 + packages/cspell-filetypes/package.json | 51 +++ .../scripts/extract-filetypes-from-package.js | 35 ++ .../cspell-filetypes/scripts/sortFileTypes.js | 76 ++++ packages/cspell-filetypes/src/definitions.ts | 375 ++++++++++++++++++ .../src/filetypes.test.ts} | 69 +++- packages/cspell-filetypes/src/filetypes.ts | 224 +++++++++++ packages/cspell-filetypes/src/index.ts | 11 + packages/cspell-filetypes/src/types.ts | 18 + packages/cspell-filetypes/tsconfig.json | 8 + packages/cspell-lib/api/api.d.ts | 12 +- packages/cspell-lib/package.json | 1 + .../src/lib/Document/isBinaryDoc.ts | 2 +- packages/cspell-lib/src/lib/LanguageIds.ts | 359 ----------------- .../cspell-lib/src/lib/Models/TextDocument.ts | 2 +- packages/cspell-lib/src/lib/fileTypes.ts | 6 + packages/cspell-lib/src/lib/index.ts | 2 +- packages/cspell-lib/src/lib/suggestions.ts | 2 +- .../determineTextDocumentSettings.ts | 2 +- packages/cspell-lib/src/lib/trace.ts | 2 +- pnpm-lock.yaml | 5 + 31 files changed, 926 insertions(+), 406 deletions(-) create mode 100644 packages/cspell-filetypes/CHANGELOG.md create mode 100644 packages/cspell-filetypes/LICENSE create mode 100644 packages/cspell-filetypes/README.md create mode 100644 packages/cspell-filetypes/package.json create mode 100644 packages/cspell-filetypes/scripts/extract-filetypes-from-package.js create mode 100644 packages/cspell-filetypes/scripts/sortFileTypes.js create mode 100644 packages/cspell-filetypes/src/definitions.ts rename packages/{cspell-lib/src/lib/LanguageIds.test.ts => cspell-filetypes/src/filetypes.test.ts} (66%) create mode 100644 packages/cspell-filetypes/src/filetypes.ts create mode 100644 packages/cspell-filetypes/src/index.ts create mode 100644 packages/cspell-filetypes/src/types.ts create mode 100644 packages/cspell-filetypes/tsconfig.json delete mode 100644 packages/cspell-lib/src/lib/LanguageIds.ts create mode 100644 packages/cspell-lib/src/lib/fileTypes.ts diff --git a/.prettierrc.json b/.prettierrc.json index 8127dc1ead0..37f652521dc 100644 --- a/.prettierrc.json +++ b/.prettierrc.json @@ -9,7 +9,10 @@ } }, { - "files": "**/src/GlobMatcher.test.ts", + "files": [ + "**/src/GlobMatcher.test.ts", + "packages/cspell-filetypes/**/definitions.ts" + ], "options": { "printWidth": 180 } diff --git a/cspell.code-workspace b/cspell.code-workspace index 45c23f07718..4323b25fca1 100644 --- a/cspell.code-workspace +++ b/cspell.code-workspace @@ -11,6 +11,7 @@ { "path": "packages/cspell-config-lib" }, { "path": "packages/cspell-dictionary" }, { "path": "packages/cspell-eslint-plugin" }, + { "path": "packages/cspell-filetypes" }, { "path": "packages/cspell-gitignore" }, { "path": "packages/cspell-glob" }, { "path": "packages/cspell-grammar" }, diff --git a/integration-tests/snapshots/RustPython/RustPython/report.yaml b/integration-tests/snapshots/RustPython/RustPython/report.yaml index 815d2349cce..1ffed229a10 100644 --- a/integration-tests/snapshots/RustPython/RustPython/report.yaml +++ b/integration-tests/snapshots/RustPython/RustPython/report.yaml @@ -4,8 +4,8 @@ Url: https://github.com/RustPython/RustPython.git Args: '["**"]' Summary: files: 621 - filesWithIssues: 327 - issues: 2971 + filesWithIssues: 326 + issues: 2968 errors: 0 Errors: [] @@ -100,7 +100,6 @@ issues: - "DEVELOPMENT.md:24:20 rustup U - If you have `rustup` on your system, enter" - "DEVELOPMENT.md:113:5 speedscope U The speedscope json format (default" - "DEVELOPMENT.md:131:26 subcrates U src`: using the other subcrates to bring rustpython" - - "Dockerfile.bin:3:1 WORKDIR U WORKDIR /rustpython" - "Dockerfile.bin:13:5 RUSTPYTHONPATH U ENV RUSTPYTHONPATH /usr/lib/rustpython" - "example_projects/aheui-rust.md:1:3 aheui U # aheui-rust" - "example_projects/aheui-rust.md:4:14 frozenlib U - Creating a frozenlib: https://github.com" diff --git a/integration-tests/snapshots/RustPython/RustPython/snapshot.txt b/integration-tests/snapshots/RustPython/RustPython/snapshot.txt index 106e667457a..4f95decf4b0 100644 --- a/integration-tests/snapshots/RustPython/RustPython/snapshot.txt +++ b/integration-tests/snapshots/RustPython/RustPython/snapshot.txt @@ -3,7 +3,7 @@ Repository: RustPython/RustPython Url: "https://github.com/RustPython/RustPython.git" Args: ["**"] Lines: - CSpell: Files checked: 621, Issues found: 2971 in 327 files. + CSpell: Files checked: 621, Issues found: 2968 in 326 files. exit code: 1 Cargo.toml:150:1 - Unknown word (getrandom) -- getrandom = "0.2.12" Cargo.toml:160:119 - Unknown word (zerocopy) -- "socket", "sched", "zerocopy", "dir", "hostname" @@ -18,7 +18,6 @@ DEVELOPMENT.md:113:5 - Unknown word (speedscope) -- The speedscope json form DEVELOPMENT.md:131:26 - Unknown word (subcrates) -- src`: using the other subcrates to bring rustpython DEVELOPMENT.md:24:20 - Unknown word (rustup) -- - If you have `rustup` on your system, enter Dockerfile.bin:13:5 - Unknown word (RUSTPYTHONPATH) -- ENV RUSTPYTHONPATH /usr/lib/rustpython -Dockerfile.bin:3:1 - Unknown word (WORKDIR) -- WORKDIR /rustpython LICENSE-logo:139:15 - Unknown word (sublicensable) -- non-sublicensable, non-exclusive, irrevocable LICENSE-logo:205:34 - Unknown word (waivable) -- under any voluntary or waivable statutory or compulsory LICENSE-logo:36:7 - Unknown word (creativecommons) -- wiki.creativecommons.org/Considerations_for diff --git a/integration-tests/snapshots/eslint/eslint/report.yaml b/integration-tests/snapshots/eslint/eslint/report.yaml index 5a012088074..f8c8f12d967 100644 --- a/integration-tests/snapshots/eslint/eslint/report.yaml +++ b/integration-tests/snapshots/eslint/eslint/report.yaml @@ -4,8 +4,8 @@ Url: https://github.com/eslint/eslint Args: '[".","--config=../../../config/eslint/cspell.config.yaml","--issues-summary-report","--exclude=bin/**","--exclude=CHANGELOG.md","--exclude=_data","--exclude=tests/bench/large.js","--exclude=docs/src/_includes","--exclude=docs/src/assets/{fonts,s?css,images}"]' Summary: files: 1981 - filesWithIssues: 330 - issues: 1774 + filesWithIssues: 329 + issues: 1773 errors: 0 Errors: [] @@ -686,7 +686,7 @@ issuesSummary: - "text: egal, count: 2, files: 1," - "text: elapseds, count: 1, files: 1," - "text: ELEVENTEEN, count: 2, files: 1," - - "text: eleventy, count: 4, files: 4," + - "text: eleventy, count: 3, files: 3," - "text: embertest, count: 1, files: 1," - "text: emtpy, count: 1, files: 1," - "text: endfor, count: 8, files: 4," diff --git a/integration-tests/snapshots/eslint/eslint/snapshot.txt b/integration-tests/snapshots/eslint/eslint/snapshot.txt index 98418592a5f..c3bc2793166 100644 --- a/integration-tests/snapshots/eslint/eslint/snapshot.txt +++ b/integration-tests/snapshots/eslint/eslint/snapshot.txt @@ -3,7 +3,7 @@ Repository: eslint/eslint Url: "https://github.com/eslint/eslint" Args: [".","--config=../../../config/eslint/cspell.config.yaml","--issues-summary-report","--exclude=bin/**","--exclude=CHANGELOG.md","--exclude=_data","--exclude=tests/bench/large.js","--exclude=docs/src/_includes","--exclude=docs/src/assets/{fonts,s?css,images}"] Lines: - CSpell: Files checked: 1981, Issues found: 1774 in 330 files. + CSpell: Files checked: 1981, Issues found: 1773 in 329 files. exit code: 1 Makefile.js:132:88 - Unknown word (ined) -- followed by the string "ined". Makefile.js:343:12 - Unknown word (gensite) -- target.gensite(); diff --git a/integration-tests/snapshots/googleapis/google-cloud-cpp/report.yaml b/integration-tests/snapshots/googleapis/google-cloud-cpp/report.yaml index 1b5c51f69c0..ea86a478945 100644 --- a/integration-tests/snapshots/googleapis/google-cloud-cpp/report.yaml +++ b/integration-tests/snapshots/googleapis/google-cloud-cpp/report.yaml @@ -4,8 +4,8 @@ Url: https://github.com/googleapis/google-cloud-cpp.git Args: '["--config=${repoConfig}/cspell.json","**/*","-e","{*.BUILD,BUILD,CHANGELOG.md,*.sh,*.cfg,*.ps1,Dockerfile.*,*.Dockerfile,*.{yaml,xml,json,cmake}}"]' Summary: files: 19462 - filesWithIssues: 12311 - issues: 99911 + filesWithIssues: 12310 + issues: 99907 errors: 0 Errors: [] diff --git a/integration-tests/snapshots/googleapis/google-cloud-cpp/snapshot.txt b/integration-tests/snapshots/googleapis/google-cloud-cpp/snapshot.txt index 72e99ad717f..2c9994fa21d 100644 --- a/integration-tests/snapshots/googleapis/google-cloud-cpp/snapshot.txt +++ b/integration-tests/snapshots/googleapis/google-cloud-cpp/snapshot.txt @@ -3,7 +3,7 @@ Repository: googleapis/google-cloud-cpp Url: "https://github.com/googleapis/google-cloud-cpp.git" Args: ["--config=../../../../config/repositories/googleapis/google-cloud-cpp/cspell.json","**/*","-e","{*.BUILD,BUILD,CHANGELOG.md,*.sh,*.cfg,*.ps1,Dockerfile.*,*.Dockerfile,*.{yaml,xml,json,cmake}}"] Lines: - CSpell: Files checked: 19462, Issues found: 99911 in 12311 files. + CSpell: Files checked: 19462, Issues found: 99907 in 12310 files. exit code: 1 ARCHITECTURE.md:110:20 - Unknown word (bigtable) -- - Bigtable has a [`bigtable::DataClient`](/google ARCHITECTURE.md:110:3 - Unknown word (Bigtable) -- - Bigtable has a [`bigtable::DataClient diff --git a/packages/cspell-filetypes/CHANGELOG.md b/packages/cspell-filetypes/CHANGELOG.md new file mode 100644 index 00000000000..e4d87c4d45c --- /dev/null +++ b/packages/cspell-filetypes/CHANGELOG.md @@ -0,0 +1,4 @@ +# Change Log + +All notable changes to this project will be documented in this file. +See [Conventional Commits](https://conventionalcommits.org) for commit guidelines. diff --git a/packages/cspell-filetypes/LICENSE b/packages/cspell-filetypes/LICENSE new file mode 100644 index 00000000000..d697c613a49 --- /dev/null +++ b/packages/cspell-filetypes/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Jason Dent + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/packages/cspell-filetypes/README.md b/packages/cspell-filetypes/README.md new file mode 100644 index 00000000000..58bbfba3eb8 --- /dev/null +++ b/packages/cspell-filetypes/README.md @@ -0,0 +1,17 @@ +# `@cspell/filetypes` + +A library to help determine the type of a file. + +## Install + +```sh +npm install -S @cspell/filetypes +``` + +## Usage + +```ts +import { findMatchingFileTypes } from '@cspell/filetypes'; + +console.log(findMatchingFileTypes('code.js')); // outputs: [ 'javascript' ] +``` diff --git a/packages/cspell-filetypes/package.json b/packages/cspell-filetypes/package.json new file mode 100644 index 00000000000..a9c63e9c253 --- /dev/null +++ b/packages/cspell-filetypes/package.json @@ -0,0 +1,51 @@ +{ + "name": "@cspell/filetypes", + "version": "8.13.3", + "description": "Library to determine file types based upon the file name.", + "keywords": [ + "cspell", + "filetypes" + ], + "author": "Jason Dent ", + "homepage": "https://github.com/streetsidesoftware/cspell/tree/main/packages/cspell-filetypes#readme", + "license": "MIT", + "type": "module", + "sideEffects": false, + "exports": { + ".": "./dist/index.js" + }, + "directories": { + "dist": "dist" + }, + "typings": "dist/index.d.ts", + "files": [ + "dist", + "!**/*.tsbuildInfo", + "!**/__mocks__", + "!**/*.test.*", + "!**/*.spec.*", + "!**/*.map" + ], + "scripts": { + "build": "tsc -p .", + "watch": "tsc -p . -w", + "clean": "shx rm -rf dist temp coverage \"*.tsbuildInfo\"", + "clean-build": "pnpm run clean && pnpm run build", + "sort-filetypes": "pnpm build && node scripts/sortFileTypes.js && prettier --write src/definitions.ts", + "coverage": "vitest run --coverage", + "test-watch": "vitest", + "test": "vitest run" + }, + "repository": { + "type": "git", + "url": "https://github.com/streetsidesoftware/cspell.git", + "directory": "packages/cspell-filetypes" + }, + "bugs": { + "url": "https://github.com/streetsidesoftware/cspell/labels/filetype" + }, + "engines": { + "node": ">=18" + }, + "dependencies": {} +} diff --git a/packages/cspell-filetypes/scripts/extract-filetypes-from-package.js b/packages/cspell-filetypes/scripts/extract-filetypes-from-package.js new file mode 100644 index 00000000000..01933f1c90e --- /dev/null +++ b/packages/cspell-filetypes/scripts/extract-filetypes-from-package.js @@ -0,0 +1,35 @@ +import { readFile } from 'node:fs/promises'; + +async function processFiles(files) { + const defs = []; + + for (const file of files) { + const content = await readFile(file, 'utf8'); + const pkg = JSON.parse(content); + + const languages = pkg.contributes?.languages; + if (!languages?.length) { + continue; + } + + for (const def of languages) { + if (!def.id) { + continue; + } + const filenames = [...(def.filenamePatterns || []), ...(def.filenames || [])]; + defs.push({ + id: def.id, + extensions: def.extensions || [], + filenames: filenames.length ? filenames : undefined, + }); + } + } + + return defs; +} + +const packageFiles = process.argv.slice(2); + +const defs = await processFiles(packageFiles); + +console.log(JSON.stringify(defs, undefined, 2)); diff --git a/packages/cspell-filetypes/scripts/sortFileTypes.js b/packages/cspell-filetypes/scripts/sortFileTypes.js new file mode 100644 index 00000000000..9cec1227969 --- /dev/null +++ b/packages/cspell-filetypes/scripts/sortFileTypes.js @@ -0,0 +1,76 @@ +import { readFile, writeFile } from 'node:fs/promises'; + +import { definitions } from '../dist/definitions.js'; + +const urlFile = new URL('../src/definitions.ts', import.meta.url); + +/** + * @typedef {import('../dist/types.js').FileTypeDefinition} FileTypeDefinition + */ + +/** + * + * @param {FileTypeDefinition} a + * @param {FileTypeDefinition} b + */ +function compare(a, b) { + if (a.format !== b.format) { + return !a.format || a.format === 'Text' ? -1 : 1; + } + return a.id.localeCompare(b.id); +} + +const fieldOrder = ['id', 'extensions', 'filenames', 'format', 'description', 'comment']; + +/** + * + * @param {FileTypeDefinition} def + * @returns {FileTypeDefinition} + */ +function normalizeDef(def) { + const entries = Object.entries(def).sort(([a], [b]) => fieldOrder.indexOf(a) - fieldOrder.indexOf(b)); + def = Object.fromEntries(entries); + def.extensions = [...new Set(def.extensions.map((a) => (a.startsWith('.') ? a : '.' + a)).sort())]; + def.filenames = def.filenames ? [...new Set(def.filenames.sort())] : undefined; + return def; +} + +const defs = dedupe(definitions).sort(compare).map(normalizeDef); + +async function updateFile() { + const content = await readFile(urlFile, 'utf8'); + const newLines = `export const definitions: FileTypeDefinitions = ${JSON.stringify(defs)};\n`; + const start = content.indexOf('export const definitions:'); + const end = content.indexOf('];\n', start); + const output = content.slice(0, start) + newLines + content.slice(end + 3); + await writeFile(urlFile, output, 'utf8'); +} + +/** + * + * @param {FileTypeDefinition[]} defs + * @returns {FileTypeDefinition[]} + */ +function dedupe(defs) { + /** @type {Map} */ + const map = new Map(); + for (const def of defs) { + const key = def.id; + const existing = map.get(key); + if (!existing) { + map.set(key, def); + } else { + existing.extensions = [...existing.extensions, ...def.extensions]; + existing.filenames = [...(existing.filenames || []), ...(def.filenames || [])]; + if (!existing.filenames.length) { + delete existing.filenames; + } + existing.format ??= def.format; + existing.description ??= def.description; + existing.comment ??= def.comment; + } + } + return [...map.values()]; +} + +updateFile(); diff --git a/packages/cspell-filetypes/src/definitions.ts b/packages/cspell-filetypes/src/definitions.ts new file mode 100644 index 00000000000..aeb60a7f4ed --- /dev/null +++ b/packages/cspell-filetypes/src/definitions.ts @@ -0,0 +1,375 @@ +import type { FileTypeDefinitions } from './types.js'; + +export const definitions: FileTypeDefinitions = [ + { id: 'ada', extensions: ['.adb', '.ads'] }, + { id: 'apiblueprint', extensions: ['.apib', '.apiblueprint'] }, + { id: 'argdown', extensions: ['.ad', '.adown', '.argdn', '.argdown'] }, + { id: 'asciidoc', extensions: ['.adoc', '.asc', '.asciidoc'] }, + { id: 'bat', extensions: ['.bat', '.cmd'] }, + { id: 'bazel', extensions: ['.bazel', '.bzl'] }, + { id: 'bibtex', extensions: ['.bib'] }, + { id: 'bicep', extensions: ['.bicep'] }, + { id: 'c', extensions: ['.c', '.i'] }, + { id: 'cache_files', extensions: [], filenames: ['.DS_Store', '.cspellcache', '.eslintcache'] }, + { id: 'clojure', extensions: ['.clj', '.cljc', '.cljs', '.cljx', '.clojure', '.edn'] }, + { id: 'cmake', extensions: ['.cmake'], filenames: ['CMakeLists.txt'] }, + { id: 'coffeescript', extensions: ['.coffee', '.cson', '.iced'] }, + { + id: 'cpp', + extensions: [ + '.c++', + '.c++m', + '.cc', + '.ccm', + '.cpp', + '.cppm', + '.cxx', + '.cxxm', + '.h', + '.h++', + '.h.in', + '.hh', + '.hpp', + '.hpp.in', + '.hxx', + '.ii', + '.inl', + '.ino', + '.ipp', + '.ixx', + '.mm', + '.tpp', + '.txx', + ], + }, + { id: 'cpp_embedded_latex', extensions: [] }, + { id: 'csharp', extensions: ['.cake', '.cs', '.csx'] }, + { id: 'css', extensions: ['.css'] }, + { id: 'cuda-cpp', extensions: ['.cu', '.cuh'] }, + { id: 'dart', extensions: ['.dart'] }, + { id: 'dhall', extensions: ['.dhall'] }, + { id: 'diff', extensions: ['.diff', '.patch', '.rej'] }, + { id: 'dockercompose', extensions: [], filenames: ['*docker*compose*.yaml', '*docker*compose*.yml', 'compose.*.yaml', 'compose.*.yml', 'compose.yaml', 'compose.yml'] }, + { + id: 'dockerfile', + extensions: ['.containerfile', '.dockerfile'], + filenames: ['*.Dockerfile.*', 'Containerfile', 'Containerfile.*', 'Dockerfile', 'Dockerfile.*', 'Dockerfile.dev', 'dockerfile'], + }, + { id: 'elisp', extensions: ['.el'] }, + { id: 'elixir', extensions: ['.ex', '.exs'] }, + { id: 'elm', extensions: ['.elm'] }, + { id: 'erb', extensions: ['.erb', '.html.erb', '.rhtml'] }, + { id: 'fsharp', extensions: ['.fs', '.fsi', '.fsscript', '.fsx'] }, + { id: 'git-commit', extensions: [], filenames: ['COMMIT_EDITMSG', 'MERGE_MSG'] }, + { id: 'git-rebase', extensions: [], filenames: ['git-rebase-todo'] }, + { id: 'github-issues', extensions: ['.github-issues'] }, + { id: 'go', extensions: ['.go'] }, + { id: 'godot', extensions: ['.gd', '.godot', '.tres', '.tscn'] }, + { id: 'gradle', extensions: ['.gradle'] }, + { id: 'groovy', extensions: ['.gradle', '.groovy', '.gvy', '.jenkinsfile', '.nf'], filenames: ['Jenkinsfile', 'Jenkinsfile*'] }, + { id: 'haml', extensions: ['.haml'] }, + { id: 'handlebars', extensions: ['.handlebars', '.hbs', '.hjs'] }, + { id: 'haskell', extensions: ['.hs', '.lhs'] }, + { id: 'haxe', extensions: ['.hx'] }, + { id: 'hlsl', extensions: ['.cginc', '.compute', '.fx', '.fxh', '.hlsl', '.hlsli', '.psh', '.vsh'] }, + { id: 'html', extensions: ['.asp', '.aspx', '.ejs', '.htm', '.html', '.jshtm', '.jsp', '.mdoc', '.rhtml', '.shtml', '.volt', '.vue', '.xht', '.xhtml'] }, + { id: 'ignore', extensions: ['.git-blame-ignore-revs', '.gitignore', '.gitignore_global', '.npmignore'], filenames: ['.vscodeignore'] }, + { id: 'ini', extensions: ['.conf', '.ini'] }, + { id: 'jade', extensions: ['.jade', '.pug'] }, + { id: 'java', extensions: ['.jav', '.java'] }, + { id: 'javascript', extensions: ['.cjs', '.es6', '.js', '.mjs', '.pac'], filenames: ['jakefile'] }, + { id: 'javascriptreact', extensions: ['.jsx'] }, + { id: 'jinja', extensions: ['.jinja'] }, + { + id: 'json', + extensions: [ + '.babelrc', + '.bowerrc', + '.code-profile', + '.css.map', + '.eslintrc', + '.geojson', + '.har', + '.ipynb', + '.js.map', + '.jscsrc', + '.jshintrc', + '.jslintrc', + '.json', + '.jsonc', + '.jsonld', + '.ts.map', + '.tsbuildinfo', + '.vuerc', + '.webmanifest', + ], + filenames: ['.watchmanconfig', 'composer.lock'], + }, + { + id: 'jsonc', + extensions: [ + '.babelrc', + '.code-workspace', + '.color-theme.json', + '.eslintrc', + '.eslintrc.json', + '.hintrc', + '.icon-theme.json', + '.jsfmtrc', + '.jshintrc', + '.jsonc', + '.language-configuration.json', + '.swcrc', + ], + filenames: [ + '.babelrc.json', + '.code-workspace', + '.devcontainer.json', + '.ember-cli', + 'argv.json', + 'babel.config.json', + 'devcontainer.json', + 'extensions.json', + 'jsconfig-*.json', + 'jsconfig.*.json', + 'jsconfig.json', + 'keybindings.json', + 'launch.json', + 'profiles.json', + 'settings.json', + 'tasks.json', + 'tsconfig-*.json', + 'tsconfig.*.json', + 'tsconfig.json', + 'typedoc.json', + ], + }, + { id: 'jsonl', extensions: ['.jsonl'] }, + { id: 'jsx-tags', extensions: [] }, + { id: 'julia', extensions: ['.jl'] }, + { id: 'juliamarkdown', extensions: ['.jmd'] }, + { id: 'jungle', extensions: ['.jungle'] }, + { id: 'kotlin', extensions: ['.kt'] }, + { id: 'latex', extensions: ['.ctx', '.ltx', '.tex'] }, + { id: 'less', extensions: ['.less'] }, + { id: 'lisp', extensions: ['.fasl', '.l', '.lisp', '.lsp'] }, + { id: 'literate haskell', extensions: ['.lhs'] }, + { id: 'lock', extensions: ['.lock'], filenames: ['Cargo.lock', 'berksfile.lock', 'composer.lock', 'package-lock.json'] }, + { id: 'log', extensions: ['.log'], filenames: ['*.log.?'] }, + { id: 'lua', extensions: ['.lua'] }, + { id: 'makefile', extensions: ['.mak', '.mk'], filenames: ['GNUmakefile', 'Makefile', 'OCamlMakefile', 'makefile'] }, + { id: 'map', extensions: ['.map', '.css.map', '.ts.map', '.js.map'] }, + { id: 'markdown', extensions: ['.markdn', '.markdown', '.md', '.mdown', '.mdtext', '.mdtxt', '.mdwn', '.mkd', '.workbook'] }, + { id: 'markdown_latex_combined', extensions: [] }, + { id: 'markdown-math', extensions: [] }, + { id: 'mdx', extensions: ['.mdx'] }, + { id: 'monkeyc', extensions: ['.mb', '.mc'] }, + { id: 'mustache', extensions: ['.mst', '.mu', '.mustache', '.stache'] }, + { id: 'nix', extensions: ['.nix'] }, + { id: 'nunjucks', extensions: ['.nj', '.njk', '.nunj', '.nunjs', '.nunjucks', '.tmpl', '.tpl'] }, + { id: 'objective-c', extensions: ['.m'] }, + { id: 'objective-cpp', extensions: ['.mm'] }, + { id: 'ocaml', extensions: ['.eliom', '.eliomi', '.ml', '.mli', '.mll', '.mly'] }, + { id: 'pdf', extensions: ['.pdf'] }, + { id: 'pem', extensions: ['.pem', '.private-key.pem'] }, + { id: 'pem-private-key', extensions: ['.private-key.pem'] }, + { id: 'perl', extensions: ['.PL', '.pl', '.pm', '.pod', '.psgi', '.t'] }, + { id: 'perl6', extensions: ['.nqp', '.p6', '.pl6', '.pm6'] }, + { id: 'php', extensions: ['.ctp', '.php', '.php4', '.php5', '.phtml'] }, + { id: 'plaintext', extensions: ['.txt'] }, + { id: 'powershell', extensions: ['.ps1', '.psd1', '.psm1', '.psrc', '.pssc'] }, + { + id: 'properties', + extensions: ['.cfg', '.conf', '.directory', '.editorconfig', '.gitattributes', '.gitconfig', '.gitmodules', '.npmrc', '.properties', '.repo'], + filenames: ['.env', 'gitconfig'], + }, + { id: 'puppet', extensions: ['.puppet'] }, + { id: 'purescript', extensions: ['.purs'] }, + { id: 'python', extensions: ['.cpy', '.gyp', '.gypi', '.ipy', '.py', '.pyi', '.pyt', '.pyw', '.rpy'], filenames: ['SConscript', 'SConstruct'] }, + { id: 'r', extensions: ['.R', '.r', '.rhistory', '.rprofile', '.rt'] }, + { id: 'raku', extensions: ['.nqp', '.p6', '.pl6', '.pm6', '.raku', '.rakudoc', '.rakumod', '.rakutest'] }, + { id: 'razor', extensions: ['.cshtml', '.razor'] }, + { id: 'rescript', extensions: ['.res', '.resi'] }, + { id: 'restructuredtext', extensions: ['.rst'] }, + { id: 'rsa', extensions: ['.pub'], filenames: ['id_rsa', 'id_rsa.pub'] }, + { + id: 'ruby', + extensions: ['.erb', '.gemspec', '.podspec', '.rake', '.rb', '.rbi', '.rbx', '.rjs', '.ru'], + filenames: [ + 'Gemfile', + 'appfile', + 'appraisals', + 'berksfile', + 'berksfile.lock', + 'brewfile', + 'capfile', + 'cheffile', + 'dangerfile', + 'deliverfile', + 'fastfile', + 'gemfile', + 'guardfile', + 'gymfile', + 'hobofile', + 'matchfile', + 'podfile', + 'puppetfile', + 'rakefile', + 'rantfile', + 'scanfile', + 'snapfile', + 'thorfile', + 'vagrantfile', + ], + }, + { id: 'rust', extensions: ['.rs'] }, + { id: 'sass', extensions: ['.sass'] }, + { id: 'scala', extensions: ['.sbt', '.sc', '.scala'] }, + { id: 'scss', extensions: ['.scss'] }, + { id: 'search-result', extensions: ['.code-search'] }, + { id: 'shaderlab', extensions: ['.cginc', '.shader'] }, + { + id: 'shellscript', + extensions: [ + '.Xsession', + '.bash', + '.bash_aliases', + '.bash_login', + '.bash_logout', + '.bash_profile', + '.bashrc', + '.csh', + '.cshrc', + '.ebuild', + '.eclass', + '.fish', + '.install', + '.ksh', + '.profile', + '.sh', + '.tcshrc', + '.xprofile', + '.xsession', + '.xsessionrc', + '.yash_profile', + '.yashrc', + '.zlogin', + '.zlogout', + '.zprofile', + '.zsh', + '.zsh-theme', + '.zshenv', + '.zshrc', + ], + filenames: ['.env.*', '.envrc', '.hushlogin', 'APKBUILD', 'PKGBUILD', 'bashrc_Apple_Terminal', 'zlogin', 'zlogout', 'zprofile', 'zshenv', 'zshrc', 'zshrc_Apple_Terminal'], + }, + { id: 'snippets', extensions: ['.code-snippets'] }, + { id: 'sql', extensions: ['.dsql', '.sql'] }, + { id: 'stylus', extensions: ['.styl'] }, + { id: 'svelte', extensions: ['.svelte'] }, + { id: 'swift', extensions: ['.swift'] }, + { id: 'terraform', extensions: ['.hcl', '.tf', '.tf.json', '.tfvars'] }, + { id: 'tex', extensions: ['.bbx', '.cbx', '.cls', '.sty'] }, + { id: 'tfvars', extensions: ['.tfvars'], description: 'Terraform Variables' }, + { id: 'todo', extensions: [], filenames: ['todo'] }, + { id: 'toml', extensions: ['.toml'], filenames: ['Cargo.lock', 'Cargo.toml'] }, + { id: 'typescript', extensions: ['.cts', '.mts', '.ts'] }, + { id: 'typescriptreact', extensions: ['.tsx'] }, + { id: 'typst', extensions: ['.typst'] }, + { id: 'vala', extensions: ['.vala'] }, + { id: 'vb', extensions: ['.bas', '.brs', '.vb', '.vba', '.vbs'] }, + { id: 'vue', extensions: ['.vue'] }, + { + id: 'xml', + extensions: [ + '.ascx', + '.atom', + '.axaml', + '.axml', + '.bpmn', + '.config', + '.cpt', + '.csl', + '.csproj', + '.csproj.user', + '.dita', + '.ditamap', + '.dtd', + '.dtml', + '.ent', + '.fsproj', + '.fxml', + '.iml', + '.isml', + '.jmx', + '.launch', + '.menu', + '.mod', + '.mxml', + '.nuspec', + '.opml', + '.owl', + '.proj', + '.props', + '.pt', + '.publishsettings', + '.pubxml', + '.pubxml.user', + '.rbxlx', + '.rbxmx', + '.rdf', + '.rng', + '.rss', + '.shproj', + '.storyboard', + '.svg', + '.targets', + '.tld', + '.tmx', + '.vbproj', + '.vbproj.user', + '.vcxproj', + '.vcxproj.filters', + '.wsdl', + '.wxi', + '.wxl', + '.wxs', + '.xaml', + '.xbl', + '.xib', + '.xlf', + '.xliff', + '.xml', + '.xoml', + '.xpdl', + '.xsd', + '.xul', + ], + }, + { id: 'xsl', extensions: ['.xsl', '.xslt'] }, + { id: 'yaml', extensions: ['.cff', '.eyaml', '.eyml', '.yaml', '.yaml-tmlanguage', '.yaml-tmpreferences', '.yaml-tmtheme', '.yml'] }, + { id: 'binary', extensions: ['.bin', '.cur', '.dll', '.eot', '.exe', '.gz', '.lib', '.o', '.obj', '.phar', '.zip'], format: 'Binary' }, + { id: 'dll', extensions: ['.dll'], format: 'Binary' }, + { id: 'exe', extensions: ['.exe'], format: 'Binary' }, + { id: 'fonts', extensions: ['.ttf', '.woff', '.woff2'], format: 'Binary' }, + { id: 'gzip', extensions: ['.gz'], format: 'Binary' }, + { + id: 'image', + extensions: ['.bmp', '.exr', '.gif', '.heic', '.ico', '.jpeg', '.jpg', '.pbm', '.pgm', '.png', '.ppm', '.ras', '.sgi', '.tiff', '.webp', '.xbm'], + format: 'Binary', + description: 'Some image extensions', + }, + { id: 'jar', extensions: ['.jar'], format: 'Binary' }, + { id: 'mdb', extensions: ['.mdb'], format: 'Binary', description: 'Microsoft Access DB' }, + { id: 'object-file', extensions: ['.o', '.obj'], format: 'Binary' }, + { id: 'spv', extensions: ['.spv'], format: 'Binary', description: 'SPSS Output Document' }, + { id: 'trie', extensions: ['.trie'], format: 'Binary', description: 'CSpell dictionary file.' }, + { id: 'video', extensions: ['.avi', '.flv', '.mkv', '.mov', '.mp4', '.mpeg', '.mpg', '.wmv'], format: 'Binary' }, + { id: 'webm', extensions: ['.webm'], format: 'Binary', description: 'WebM is an audiovisual media file format.' }, + { id: 'wheel', extensions: ['.whl'], format: 'Binary' }, +]; + +// cspell:ignoreRegExp /id: '.*?'/g +// cspell:ignoreRegExp /extensions: \[[^\]]*?\]/g +// cspell:ignoreRegExp /filenames: \[[^\]]*?\]/g +// cspell:ignore SPSS diff --git a/packages/cspell-lib/src/lib/LanguageIds.test.ts b/packages/cspell-filetypes/src/filetypes.test.ts similarity index 66% rename from packages/cspell-lib/src/lib/LanguageIds.test.ts rename to packages/cspell-filetypes/src/filetypes.test.ts index d107e8df1da..4981391b32f 100644 --- a/packages/cspell-lib/src/lib/LanguageIds.test.ts +++ b/packages/cspell-filetypes/src/filetypes.test.ts @@ -1,6 +1,7 @@ import { describe, expect, test } from 'vitest'; -import * as LangId from './LanguageIds.js'; +import { definitions } from './definitions.js'; +import * as LangId from './filetypes.js'; describe('Validate LanguageIds', () => { test.each` @@ -13,7 +14,7 @@ describe('Validate LanguageIds', () => { ${'hs'} | ${['haskell']} ${'PNG'} | ${['image']} `('getLanguagesForExt $ext', ({ ext, expected }) => { - expect(LangId.getLanguagesForExt(ext)).toEqual(expected); + expect(LangId.getFileTypesForExt(ext)).toEqual(expected); }); test.each` @@ -22,43 +23,54 @@ describe('Validate LanguageIds', () => { ${'base.r'} | ${['r']} ${'base.R'} | ${['r']} ${'doc.tex'} | ${['latex']} + ${'Dockerfile.bin'} | ${['dockerfile']} + ${'aws.Dockerfile'} | ${['dockerfile']} ${'image.jpg'} | ${['image']} ${'workspace.code-workspace'} | ${['jsonc']} + ${'.code-workspace'} | ${['jsonc']} ${'.cspellcache'} | ${['cache_files']} ${'Gemfile'} | ${['ruby']} - ${'path/Gemfile'} | ${[]} + ${'path/Gemfile'} | ${['ruby']} + ${'Cargo.lock'} | ${['lock', 'toml']} + ${'.errors.log.2'} | ${['log']} ${'my-cert.pem'} | ${['pem']} ${'my-private-cert.private-key.pem'} | ${['pem', 'pem-private-key']} ${'Dockerfile'} | ${['dockerfile']} ${'Dockerfile.dev'} | ${['dockerfile']} + ${'docker.aws.compose.yaml'} | ${['dockercompose']} + ${'composer.lock'} | ${['json', 'lock']} ${'code.jl'} | ${['julia']} + ${'code.ts.map'} | ${['json', 'map']} `('getLanguagesForBasename $filename', ({ filename, expected }) => { - expect(LangId.getLanguagesForBasename(filename)).toEqual(expected); + expect(LangId.findMatchingFileTypes(filename)).toEqual(expected); }); test('that all extensions start with a .', () => { - for (const def of LangId.languageExtensionDefinitions) { + for (const def of definitions) { const extsWithoutPeriod = def.extensions.filter((ext) => ext[0] !== '.'); expect(extsWithoutPeriod).toEqual([]); } }); test.each` - ext | expected - ${'.md'} | ${false} - ${'.exe'} | ${true} - ${'.obj'} | ${true} - ${'.dll'} | ${true} - ${'.gif'} | ${true} - ${'.jpeg'} | ${true} - ${'.jpg'} | ${true} - ${'.txt'} | ${false} - ${'md'} | ${false} - ${'exe'} | ${true} - ${'obj'} | ${true} - ${'dll'} | ${true} - ${'gif'} | ${true} - ${'txt'} | ${false} + ext | expected + ${'.md'} | ${false} + ${'.exe'} | ${true} + ${'.obj'} | ${true} + ${'.dll'} | ${true} + ${'.gif'} | ${true} + ${'.jpeg'} | ${true} + ${'.jpg'} | ${true} + ${'.txt'} | ${false} + ${'md'} | ${false} + ${'exe'} | ${true} + ${'obj'} | ${true} + ${'.EXE'} | ${true} + ${'.bin'} | ${true} + ${'dll'} | ${true} + ${'gif'} | ${true} + ${'txt'} | ${false} + ${'unknown'} | ${false} `('isBinaryExt $ext => $expected', ({ ext, expected }) => { expect(LangId.isBinaryExt(ext)).toBe(expected); }); @@ -75,6 +87,8 @@ describe('Validate LanguageIds', () => { ${'.txt'} | ${false} ${'md'} | ${false} ${'exe'} | ${true} + ${'.EXE'} | ${true} + ${'.bin'} | ${true} ${'obj'} | ${true} ${'dll'} | ${true} ${'gif'} | ${true} @@ -95,6 +109,7 @@ describe('Validate LanguageIds', () => { ${'image.gif'} | ${true} ${'picture.jpeg'} | ${true} ${'picture.jpg'} | ${true} + ${'Cargo.lock'} | ${true} ${'doc.txt'} | ${false} ${'lock'} | ${false} ${'Gemfile'} | ${false} @@ -117,11 +132,23 @@ describe('Validate LanguageIds', () => { ${'picture.jpg'} | ${true} ${'doc.txt'} | ${false} ${'lock'} | ${false} + ${'Cargo.lock'} | ${false} ${'Gemfile'} | ${false} ${'.cspellcache'} | ${false} ${'my-video.webm'} | ${true} ${'my-logo.svg'} | ${false} - `('isGeneratedExt $filename => $expected', ({ filename, expected }) => { + `('isBinaryFile $filename => $expected', ({ filename, expected }) => { expect(LangId.isBinaryFile(filename)).toBe(expected); }); + + test.each` + filetype | expected + ${'typescript'} | ${false} + ${'gzip'} | ${true} + ${'unknown'} | ${false} + `('isBinaryFileType $filetype => $expected', ({ filetype, expected }) => { + expect(LangId.isBinaryFileType(filetype)).toBe(expected); + }); }); + +// cspell:ignore dockercompose diff --git a/packages/cspell-filetypes/src/filetypes.ts b/packages/cspell-filetypes/src/filetypes.ts new file mode 100644 index 00000000000..97c199d4572 --- /dev/null +++ b/packages/cspell-filetypes/src/filetypes.ts @@ -0,0 +1,224 @@ +import { definitions } from './definitions.js'; +import { FileTypeDefinition, FileTypeDefinitions, FileTypeId } from './types.js'; + +type ExtensionToFileTypeIdMapSet = Map>; +type ExtensionToFileTypeIdMap = Map; + +const binaryFormatIds = definitions.filter((d) => d.format === 'Binary').map((d) => d.id); + +export const binaryLanguages = new Set(['binary', 'image', 'video', 'fonts', ...binaryFormatIds]); + +export const generatedFiles = new Set([ + ...binaryLanguages, + 'map', + 'lock', + 'pdf', + 'cache_files', + 'rsa', + 'pem', + 'trie', + 'log', +]); + +export const languageIds: FileTypeId[] = definitions.map(({ id }) => id); + +const mapExtensionToSetOfLanguageIds: ExtensionToFileTypeIdMapSet = buildLanguageExtensionMapSet(definitions); +const mapExtensionToLanguageIds: ExtensionToFileTypeIdMap = + buildExtensionToLanguageIdMap(mapExtensionToSetOfLanguageIds); + +interface RegExpMatchToFileTypeId { + regexp: RegExp; + id: FileTypeId; +} + +const idsWithRegExp: RegExpMatchToFileTypeId[] = definitions.map(defToRegExp).filter((f) => !!f); + +/** + * Checks to see if a file type is considered to be a binary file type. + * @param ext - the file extension to check + * @returns true if the file type is known to be binary. + */ +export function isBinaryExt(ext: string): boolean { + return isBinaryFileType(getFileTypesForExt(ext)); +} + +/** + * Checks to see if a file type is considered to be a binary file type. + * @param filename - the filename to check + * @returns true if the file type is known to be binary. + */ +export function isBinaryFile(filename: string): boolean { + filename = basename(filename); + return isBinaryFileType(findMatchingFileTypes(filename)); +} + +/** + * Checks to see if a file type is considered to be a binary file type. + * @param fileTypeId - the file type id to check + * @returns true if the file type is known to be binary. + */ +export function isBinaryFileType(fileTypeId: FileTypeId | FileTypeId[] | Iterable): boolean { + return doesSetContainAnyOf(binaryLanguages, fileTypeId); +} + +/** + * Check if a file extension is associated with generated file.. Generated files are files that are not typically edited by a human. + * Example: + * - package-lock.json + * @param ext - the file extension to check. + * @returns true if the file type known to be generated. + */ +export function isGeneratedExt(ext: string): boolean { + return isFileTypeGenerated(getFileTypesForExt(ext)); +} + +/** + * Check if a file is auto generated. Generated files are files that are not typically edited by a human. + * Example: + * - package-lock.json + * @param filename - the full filename to check + * @returns true if the file type known to be generated. + */ +export function isGeneratedFile(filename: string): boolean { + return isFileTypeGenerated(findMatchingFileTypes(filename)); +} + +/** + * Check if a file type is auto generated. Generated files are files that are not typically edited by a human. + * Example: + * - package-lock.json + * @param fileTypeId - the file type id to check + * @returns true if the file type known to be generated. + */ +export function isFileTypeGenerated(fileTypeId: FileTypeId | FileTypeId[] | Iterable): boolean { + return doesSetContainAnyOf(generatedFiles, fileTypeId); +} + +function doesSetContainAnyOf( + setOfIds: Set, + fileTypeId: FileTypeId | FileTypeId[] | Iterable, +): boolean { + if (typeof fileTypeId === 'string') { + return setOfIds.has(fileTypeId); + } + for (const id of fileTypeId) { + if (setOfIds.has(id)) { + return true; + } + } + return false; +} + +function buildLanguageExtensionMapSet(defs: FileTypeDefinitions): ExtensionToFileTypeIdMapSet { + return defs.reduce((map, def) => { + function addId(value: string) { + autoResolve(map, value, () => new Set()).add(def.id); + } + + def.extensions.forEach(addId); + def.filenames?.forEach((filename) => (typeof filename === 'string' ? addId(filename) : undefined)); + return map; + }, new Map>()); +} + +function buildExtensionToLanguageIdMap(map: ExtensionToFileTypeIdMapSet): ExtensionToFileTypeIdMap { + return new Map([...map].map(([k, s]) => [k, [...s]])); +} + +function _getLanguagesForExt(ext: string): string[] | undefined { + return mapExtensionToLanguageIds.get(ext) || mapExtensionToLanguageIds.get('.' + ext); +} + +/** + * Tries to find a matching language for a given file extension. + * @param ext - the file extension to look up. + * @returns an array of language ids that match the extension. The array is empty if no matches are found. + */ +export function getFileTypesForExt(ext: string): FileTypeId[] { + return _getLanguagesForExt(ext) || _getLanguagesForExt(ext.toLowerCase()) || []; +} + +function matchPatternsToFilename(basename: string): FileTypeId[] { + return idsWithRegExp.filter(({ regexp }) => regexp.test(basename)).map(({ id }) => id); +} + +function _getLanguagesForBasename(basename: string): string[] | undefined { + const found = mapExtensionToLanguageIds.get(basename); + if (found) return found; + + const patternMatches = matchPatternsToFilename(basename); + if (patternMatches.length) return patternMatches; + + for (let pos = basename.indexOf('.'); pos >= 0; pos = basename.indexOf('.', pos + 1)) { + const ids = mapExtensionToLanguageIds.get(basename.slice(pos)); + if (ids) return ids; + } + + return undefined; +} + +/** + * Find the matching file types for a given filename. + * @param filename - the full filename + * @returns an array of language ids that match the filename. The array is empty if no matches are found. + */ +export function findMatchingFileTypes(filename: string): FileTypeId[] { + filename = basename(filename); + return _getLanguagesForBasename(filename) || _getLanguagesForBasename(filename.toLowerCase()) || []; +} + +const regExpPathSep = /[\\/]/g; + +function basename(filename: string): string { + return regExpPathSep.test(filename) ? filename.split(regExpPathSep).slice(-1).join('') : filename; +} + +export function autoResolve(map: Map, key: K, resolve: (k: K) => V): V { + const found = map.get(key); + if (found !== undefined || map.has(key)) return found as V; + const value = resolve(key); + map.set(key, value); + return value; +} + +function escapeRegEx(s: string): string { + return s.replaceAll(/[|\\{}()[\]^$+*?.]/g, '\\$&').replaceAll('-', '\\x2d'); +} + +function stringOrGlob(s: string): string | RegExp { + return s.includes('*') ? simpleGlob(s) : s; +} + +function simpleGlob(s: string): RegExp { + s = s.replaceAll('**', '*'); + let pattern = ''; + for (const char of s) { + switch (char) { + case '?': { + pattern += '.'; + break; + } + case '*': { + pattern += '.*'; + break; + } + default: { + pattern += escapeRegEx(char); + } + } + } + return new RegExp(pattern); +} + +function defToRegExp(def: FileTypeDefinition): RegExpMatchToFileTypeId | undefined { + if (!def.filenames) return undefined; + const regExps = def.filenames + .map(stringOrGlob) + .map((f) => (f instanceof RegExp ? f : undefined)) + .filter((f) => !!f); + + if (!regExps.length) return undefined; + + const regexp = new RegExp(regExps.map((r) => r.source).join('|')); + return { regexp, id: def.id }; +} diff --git a/packages/cspell-filetypes/src/index.ts b/packages/cspell-filetypes/src/index.ts new file mode 100644 index 00000000000..32a81207238 --- /dev/null +++ b/packages/cspell-filetypes/src/index.ts @@ -0,0 +1,11 @@ +export { + findMatchingFileTypes, + getFileTypesForExt, + isBinaryExt, + isBinaryFile, + isBinaryFileType, + isFileTypeGenerated, + isGeneratedExt, + isGeneratedFile, +} from './filetypes.js'; +export type { FileTypeId } from './types.js'; diff --git a/packages/cspell-filetypes/src/types.ts b/packages/cspell-filetypes/src/types.ts new file mode 100644 index 00000000000..a78b11f6d80 --- /dev/null +++ b/packages/cspell-filetypes/src/types.ts @@ -0,0 +1,18 @@ +export type FileTypeId = string; + +export interface FileTypeExtensionDefinition { + id: FileTypeId; + /** List of extensions starting with '.' */ + extensions: string[]; + /** Filenames that do not have an extension or have a different type than their implied extension */ + filenames?: string[]; + /** Indicates that it is a Text or Binary file type. */ + format?: 'Text' | 'Binary'; + /** Optional Description */ + description?: string; + /** Optional Comment */ + comment?: string; +} + +export type FileTypeDefinition = FileTypeExtensionDefinition; +export type FileTypeDefinitions = FileTypeDefinition[]; diff --git a/packages/cspell-filetypes/tsconfig.json b/packages/cspell-filetypes/tsconfig.json new file mode 100644 index 00000000000..89f72354670 --- /dev/null +++ b/packages/cspell-filetypes/tsconfig.json @@ -0,0 +1,8 @@ +{ + "extends": "../../tsconfig.esm.json", + "compilerOptions": { + "types": ["node"], + "outDir": "dist" + }, + "include": ["src/**/*.ts", "src/**/*.test.ts"] +} diff --git a/packages/cspell-lib/api/api.d.ts b/packages/cspell-lib/api/api.d.ts index eb4a962c20d..e5d2f18f075 100644 --- a/packages/cspell-lib/api/api.d.ts +++ b/packages/cspell-lib/api/api.d.ts @@ -5,6 +5,8 @@ import { VFileSystem } from 'cspell-io'; export { FSCapabilityFlags, VFileSystemProvider, VirtualFS, asyncIterableToArray, readFileText as readFile, readFileTextSync as readFileSync, writeToFile, writeToFileIterable, writeToFileIterableP } from 'cspell-io'; import { SpellingDictionaryCollection, SuggestOptions, SuggestionResult, CachingDictionary } from 'cspell-dictionary'; export { SpellingDictionary, SpellingDictionaryCollection, SuggestOptions, SuggestionCollector, SuggestionResult, createSpellingDictionary, createCollection as createSpellingDictionaryCollection } from 'cspell-dictionary'; +import { FileTypeId } from '@cspell/filetypes'; +export { findMatchingFileTypes as getLanguageIdsForBaseFilename, getFileTypesForExt as getLanguagesForExt } from '@cspell/filetypes'; import { WeightMap } from 'cspell-trie-lib'; export { CompoundWordsMethod } from 'cspell-trie-lib'; import { CSpellConfigFile } from 'cspell-config-lib'; @@ -255,10 +257,6 @@ declare function isSpellingDictionaryLoadError(e: Error): e is SpellingDictionar */ declare function getDictionary(settings: CSpellUserSettings): Promise; -type LanguageId = string; -declare function getLanguagesForExt(ext: string): string[]; -declare function getLanguagesForBasename(basename: string): string[]; - interface PerfTimer { readonly name: string; readonly startTime: number; @@ -606,7 +604,7 @@ interface SuggestionOptions extends FromSuggestOptions { /** * languageId to use when determining file type. */ - languageId?: LanguageId | LanguageId[]; + languageId?: FileTypeId | FileTypeId[]; /** * Locale to use. */ @@ -948,7 +946,7 @@ interface TraceResult extends DictionaryTraceResult { dictActive: boolean; } interface TraceOptions { - languageId?: LanguageId | LanguageId[]; + languageId?: FileTypeId | FileTypeId[]; locale?: LocaleId; ignoreCase?: boolean; allowCompoundWords?: boolean; @@ -1059,4 +1057,4 @@ declare namespace textApi_d { export { textApi_d_calculateTextDocumentOffsets as calculateTextDocumentOffsets, textApi_d_camelToSnake as camelToSnake, textApi_d_cleanText as cleanText, textApi_d_cleanTextOffset as cleanTextOffset, textApi_d_extractLinesOfText as extractLinesOfText, textApi_d_extractPossibleWordsFromTextOffset as extractPossibleWordsFromTextOffset, textApi_d_extractText as extractText, textApi_d_extractWordsFromCode as extractWordsFromCode, textApi_d_extractWordsFromCodeTextOffset as extractWordsFromCodeTextOffset, textApi_d_extractWordsFromText as extractWordsFromText, textApi_d_extractWordsFromTextOffset as extractWordsFromTextOffset, textApi_d_isFirstCharacterLower as isFirstCharacterLower, textApi_d_isFirstCharacterUpper as isFirstCharacterUpper, textApi_d_isLowerCase as isLowerCase, textApi_d_isUpperCase as isUpperCase, textApi_d_lcFirst as lcFirst, textApi_d_match as match, textApi_d_matchCase as matchCase, textApi_d_matchStringToTextOffset as matchStringToTextOffset, textApi_d_matchToTextOffset as matchToTextOffset, textApi_d_removeAccents as removeAccents, textApi_d_snakeToCamel as snakeToCamel, textApi_d_splitCamelCaseWord as splitCamelCaseWord, textApi_d_splitCamelCaseWordWithOffset as splitCamelCaseWordWithOffset, textApi_d_stringToRegExp as stringToRegExp, textApi_d_textOffset as textOffset, textApi_d_ucFirst as ucFirst }; } -export { type CheckTextInfo, type ConfigurationDependencies, type CreateTextDocumentParams, type DetermineFinalDocumentSettingsResult, type Document, DocumentValidator, type DocumentValidatorOptions, ENV_CSPELL_GLOB_ROOT, type ExcludeFilesGlobMap, type ExclusionFunction, exclusionHelper_d as ExclusionHelper, type FeatureFlag, FeatureFlags, ImportError, type ImportFileRefWithError$1 as ImportFileRefWithError, IncludeExcludeFlag, type IncludeExcludeOptions, index_link_d as Link, type Logger, type PerfTimer, type SpellCheckFileOptions, type SpellCheckFilePerf, type SpellCheckFileResult, SpellingDictionaryLoadError, type SuggestedWord, SuggestionError, type SuggestionOptions, type SuggestionsForWordResult, textApi_d as Text, type TextDocument, type TextDocumentLine, type TextDocumentRef, type TextInfoItem, type TraceOptions, type TraceResult, type TraceWordResult, UnknownFeatureFlagError, type ValidationIssue, calcOverrideSettings, checkFilenameMatchesExcludeGlob as checkFilenameMatchesGlob, checkText, checkTextDocument, clearCachedFiles, clearCaches, combineTextAndLanguageSettings, combineTextAndLanguageSettings as constructSettingsForText, createConfigLoader, createPerfTimer, createTextDocument, currentSettingsFileVersion, defaultConfigFilenames, defaultFileName, defaultFileName as defaultSettingsFilename, determineFinalDocumentSettings, extractDependencies, extractImportErrors, fileToDocument, fileToTextDocument, finalizeSettings, getCachedFileSize, getDefaultBundledSettingsAsync, getDefaultConfigLoader, getDefaultSettings, getDictionary, getGlobalSettings, getGlobalSettingsAsync, getLanguagesForBasename as getLanguageIdsForBaseFilename, getLanguagesForExt, getLogger, getSources, getSystemFeatureFlags, getVirtualFS, isBinaryFile, isSpellingDictionaryLoadError, loadConfig, loadPnP, mergeInDocSettings, mergeSettings, readRawSettings, readSettings, readSettingsFiles, refreshDictionaryCache, resolveFile, searchForConfig, sectionCSpell, setLogger, shouldCheckDocument, spellCheckDocument, spellCheckFile, suggestionsForWord, suggestionsForWords, traceWords, traceWordsAsync, updateTextDocument, validateText }; +export { type CheckTextInfo, type ConfigurationDependencies, type CreateTextDocumentParams, type DetermineFinalDocumentSettingsResult, type Document, DocumentValidator, type DocumentValidatorOptions, ENV_CSPELL_GLOB_ROOT, type ExcludeFilesGlobMap, type ExclusionFunction, exclusionHelper_d as ExclusionHelper, type FeatureFlag, FeatureFlags, ImportError, type ImportFileRefWithError$1 as ImportFileRefWithError, IncludeExcludeFlag, type IncludeExcludeOptions, index_link_d as Link, type Logger, type PerfTimer, type SpellCheckFileOptions, type SpellCheckFilePerf, type SpellCheckFileResult, SpellingDictionaryLoadError, type SuggestedWord, SuggestionError, type SuggestionOptions, type SuggestionsForWordResult, textApi_d as Text, type TextDocument, type TextDocumentLine, type TextDocumentRef, type TextInfoItem, type TraceOptions, type TraceResult, type TraceWordResult, UnknownFeatureFlagError, type ValidationIssue, calcOverrideSettings, checkFilenameMatchesExcludeGlob as checkFilenameMatchesGlob, checkText, checkTextDocument, clearCachedFiles, clearCaches, combineTextAndLanguageSettings, combineTextAndLanguageSettings as constructSettingsForText, createConfigLoader, createPerfTimer, createTextDocument, currentSettingsFileVersion, defaultConfigFilenames, defaultFileName, defaultFileName as defaultSettingsFilename, determineFinalDocumentSettings, extractDependencies, extractImportErrors, fileToDocument, fileToTextDocument, finalizeSettings, getCachedFileSize, getDefaultBundledSettingsAsync, getDefaultConfigLoader, getDefaultSettings, getDictionary, getGlobalSettings, getGlobalSettingsAsync, getLogger, getSources, getSystemFeatureFlags, getVirtualFS, isBinaryFile, isSpellingDictionaryLoadError, loadConfig, loadPnP, mergeInDocSettings, mergeSettings, readRawSettings, readSettings, readSettingsFiles, refreshDictionaryCache, resolveFile, searchForConfig, sectionCSpell, setLogger, shouldCheckDocument, spellCheckDocument, spellCheckFile, suggestionsForWord, suggestionsForWords, traceWords, traceWordsAsync, updateTextDocument, validateText }; diff --git a/packages/cspell-lib/package.json b/packages/cspell-lib/package.json index d6b5e660bf5..363493d2a06 100644 --- a/packages/cspell-lib/package.json +++ b/packages/cspell-lib/package.json @@ -63,6 +63,7 @@ "@cspell/cspell-resolver": "workspace:*", "@cspell/cspell-types": "workspace:*", "@cspell/dynamic-import": "workspace:*", + "@cspell/filetypes": "workspace:*", "@cspell/strong-weak-map": "workspace:*", "@cspell/url": "workspace:*", "clear-module": "^4.1.2", diff --git a/packages/cspell-lib/src/lib/Document/isBinaryDoc.ts b/packages/cspell-lib/src/lib/Document/isBinaryDoc.ts index 0bbf65043e3..747956a287c 100644 --- a/packages/cspell-lib/src/lib/Document/isBinaryDoc.ts +++ b/packages/cspell-lib/src/lib/Document/isBinaryDoc.ts @@ -1,4 +1,4 @@ -import { getLanguagesForBasename, isGenerated } from '../LanguageIds.js'; +import { getLanguagesForBasename, isGenerated } from '../fileTypes.js'; import type { Uri } from '../util/Uri.js'; import { basename, toUri } from '../util/Uri.js'; import type { Document } from './Document.js'; diff --git a/packages/cspell-lib/src/lib/LanguageIds.ts b/packages/cspell-lib/src/lib/LanguageIds.ts deleted file mode 100644 index 13656038ca0..00000000000 --- a/packages/cspell-lib/src/lib/LanguageIds.ts +++ /dev/null @@ -1,359 +0,0 @@ -/** - * The data for this file was constructed from: - * ``` - * cd ~/projects/clones/vscode/extensions - * find . -type f -iname package.json -exec pcregrep -M '(?:"id":.*)|(?:"extensions":[^\]]+)' {} \; > ~/projects/cspell/src/languageIds.txt` - * ``` - */ - -import { autoResolve } from './util/AutoResolve.js'; - -// cspell:ignore cljs cljx cson iname pcregrep fsscript fasl gradle shtml xhtml mdoc aspx jshtm gitconfig bowerrc -// cspell:ignore jshintrc jscsrc eslintrc babelrc webmanifest mdown markdn psgi phtml pssc psrc gypi rhistory -// cspell:ignore rprofile cshtml gemspec cginc ebuild zshrc zprofile zlogin zlogout zshenv dsql ascx axml -// cspell:ignore bpmn csproj dita ditamap dtml fsproj fxml isml mxml adoc -// cspell:ignore purescript purs dhall SPSS tfvars - -export interface LanguageExtensionDefinition { - id: string; - /** List of extensions starting with '.' */ - extensions: string[]; - /** Filenames that do not have an extension or have a different type than their implied extension */ - filenames?: string[]; - /** Indicates that it is a Text or Binary file type. */ - format?: 'Text' | 'Binary'; - /** Optional Description */ - description?: string; -} -export type LanguageDefinition = LanguageExtensionDefinition; -export type LanguageDefinitions = LanguageDefinition[]; -export type ExtensionToLanguageIdMapSet = Map>; -export type ExtensionToLanguageIdMap = Map; - -export const languageExtensionDefinitions: LanguageDefinitions = [ - { id: 'ada', extensions: ['.adb', '.ads'] }, - { id: 'apiblueprint', extensions: ['.apib', '.apiblueprint'] }, - { id: 'asciidoc', extensions: ['.adoc', '.asc', '.asciidoc'] }, - { id: 'bat', extensions: ['.bat', '.cmd'] }, - { id: 'clojure', extensions: ['.clj', '.cljs', '.cljx', '.clojure', '.edn'] }, - { id: 'coffeescript', extensions: ['.coffee', '.cson'] }, - { id: 'c', extensions: ['.c'] }, - // cspell:ignore cmake - { id: 'cmake', extensions: ['.cmake'], filenames: ['CMakeLists.txt'] }, - { - id: 'cpp', - extensions: ['.cpp', '.cc', '.cxx', '.hpp', '.hh', '.hxx', '.h', '.mm', '.ino', '.inl'], - }, - { id: 'csharp', extensions: ['.cs'] }, - { id: 'css', extensions: ['.css'] }, - { id: 'dhall', extensions: ['.dhall'] }, - { id: 'diff', extensions: ['.diff', '.patch', '.rej'] }, - { id: 'dockerfile', extensions: ['.dockerfile'], filenames: ['Dockerfile', 'dockerfile', 'Dockerfile.dev'] }, - { id: 'elisp', extensions: ['.el'] }, - { id: 'elixir', extensions: ['.ex', '.exs'] }, - { id: 'fsharp', extensions: ['.fs', '.fsi', '.fsx', '.fsscript'] }, - { id: 'go', extensions: ['.go'] }, - { id: 'groovy', extensions: ['.groovy', '.gvy', '.gradle'] }, - { id: 'handlebars', extensions: ['.handlebars', '.hbs'] }, - { id: 'haskell', extensions: ['.hs', '.lhs'] }, - { - id: 'html', - extensions: ['.html', '.htm', '.shtml', '.xhtml', '.mdoc', '.jsp', '.asp', '.aspx', '.jshtm', '.volt', '.vue'], - }, - { id: 'ini', extensions: ['.ini', '.conf'] }, - { id: 'properties', extensions: ['.properties', '.gitconfig', '.cfg', '.conf'] }, - { id: 'jade', extensions: ['.jade', '.pug'] }, - { id: 'java', extensions: ['.java', '.jav'] }, - { id: 'javascriptreact', extensions: ['.jsx'] }, - { id: 'javascript', extensions: ['.js', '.mjs', '.es6', '.cjs'] }, - { - id: 'json', - extensions: ['.json', '.jsonc', '.bowerrc', '.jshintrc', '.jscsrc', '.eslintrc', '.babelrc', '.webmanifest'], - }, - { id: 'jsonc', extensions: ['.jsonc'] }, - { id: 'jsonc', extensions: ['.code-workspace'], filenames: ['.code-workspace'] }, - { id: 'julia', extensions: ['.jl'] }, - { id: 'jungle', extensions: ['.jungle'] }, - { id: 'less', extensions: ['.less'] }, - { id: 'lisp', extensions: ['.lisp', '.lsp', '.l', '.fasl'] }, - { id: 'literate haskell', extensions: ['.lhs'] }, - { id: 'lua', extensions: ['.lua'] }, - { id: 'makefile', extensions: ['.mk'], filenames: ['makefile'] }, - { id: 'markdown', extensions: ['.md', '.mdown', '.markdown', '.markdn'] }, - { id: 'mdx', extensions: ['.mdx'] }, - { id: 'monkeyc', extensions: ['.mc', '.mb'] }, - { id: 'objective-c', extensions: ['.m'] }, - { id: 'perl', extensions: ['.pl', '.pm', '.pod', '.t', '.PL', '.psgi'] }, - { id: 'perl6', extensions: ['.p6', '.pl6', '.pm6', '.nqp'] }, - { id: 'php', extensions: ['.php', '.php4', '.php5', '.phtml', '.ctp'] }, - { id: 'plaintext', extensions: ['.txt'] }, - { id: 'powershell', extensions: ['.ps1', '.psm1', '.psd1', '.pssc', '.psrc'] }, - { id: 'purescript', extensions: ['.purs'] }, - { id: 'python', extensions: ['.py', '.rpy', '.pyw', '.cpy', '.gyp', '.gypi'] }, - { id: 'r', extensions: ['.r', '.R', '.rhistory', '.rprofile', '.rt'] }, - { id: 'razor', extensions: ['.cshtml'] }, - { id: 'ruby', extensions: ['.rb', '.rbx', '.rjs', '.gemspec', '.rake', '.ru'] }, - { id: 'ruby', extensions: [], filenames: ['Gemfile'] }, - { id: 'rust', extensions: ['.rs'] }, - { id: 'scala', extensions: ['.scala', '.sc', '.sbt'] }, - { id: 'scss', extensions: ['.scss'] }, - { id: 'shaderlab', extensions: ['.shader', '.cginc'] }, - { - id: 'shellscript', - extensions: [ - '.sh', - '.bash', - '.bashrc', - '.bash_aliases', - '.bash_profile', - '.bash_login', - '.ebuild', - '.install', - '.profile', - '.bash_logout', - '.zsh', - '.zshrc', - '.zprofile', - '.zlogin', - '.zlogout', - '.zshenv', - '.zsh-theme', - ], - }, - { id: 'sql', extensions: ['.sql', '.dsql'] }, - { id: 'svelte', extensions: ['.svelte'] }, - { id: 'swift', extensions: ['.swift'] }, - { id: 'toml', extensions: ['.toml'] }, - { id: 'terraform', extensions: ['.tf', '.tf.json'] }, - { id: 'tfvars', extensions: ['.tfvars'], description: 'Terraform Variables' }, - { id: 'typescript', extensions: ['.ts', '.cts', '.mts'] }, - { id: 'typescriptreact', extensions: ['.tsx'] }, - { id: 'vb', extensions: ['.vb', '.brs', '.vbs', '.bas'] }, - { id: 'vue', extensions: ['.vue'] }, - { - id: 'xml', - extensions: [ - '.ascx', - '.atom', - '.axml', - '.bpmn', - '.config', - '.cpt', - '.csl', - '.csproj.user', - '.csproj', - '.dita', - '.ditamap', - '.dtd', - '.dtml', - '.ent', - '.fsproj', - '.fxml', - '.iml', - '.isml', - '.jmx', - '.launch', - '.menu', - '.mod', - '.mxml', - '.nuspec', - '.opml', - '.owl', - '.proj', - '.pt', - '.pubxml.user', - '.pubxml', - '.rdf', - '.rng', - '.rss', - '.shproj', - '.storyboard', - '.svg', - '.targets', - '.tld', - '.tmx', - '.vbproj.user', - '.vbproj', - '.vcxproj.filters', - '.vcxproj', - '.wsdl', - '.wxi', - '.wxl', - '.wxs', - '.xaml', - '.xbl', - '.xib', - '.xlf', - '.xliff', - '.xml', - '.xoml', - '.xpdl', - '.xsd', - '.xul', - ], - }, - { id: 'wheel', extensions: ['.whl'], format: 'Binary' }, - { id: 'xsl', extensions: ['.xsl', '.xslt'] }, - { id: 'yaml', extensions: ['.eyaml', '.eyml', '.yaml', '.yml'] }, - { id: 'latex', extensions: ['.tex'] }, - { id: 'map', extensions: ['.map'] }, - { id: 'pdf', extensions: ['.pdf'] }, - { id: 'rsa', extensions: ['.pub'], filenames: ['id_rsa', 'id_rsa.pub'] }, - { id: 'pem', extensions: ['.private-key.pem', '.pem'] }, - { id: 'pem-private-key', extensions: ['.private-key.pem'] }, - - // - // Special file types used to prevent spell checking. - // - { - id: 'image', - extensions: [ - '.bmp', - '.exr', - '.gif', - '.heic', - '.ico', - '.jpeg', - '.jpg', - '.pbm', - '.pgm', - '.png', - '.ppm', - '.ras', - '.sgi', - '.tiff', - '.webp', - '.xbm', - ], - format: 'Binary', - description: 'Some image extensions', - }, - // cspell:ignore woff - { - id: 'binary', - extensions: ['.gz', '.exe', '.dll', '.lib', '.obj', '.o', '.eot', '.cur', '.zip'], - format: 'Binary', - }, - { - id: 'fonts', - extensions: ['.ttf', '.woff', '.woff2'], - format: 'Binary', - }, - { - id: 'video', - extensions: ['.mov', '.mpg', '.mpeg', '.mp4', '.avi', '.wmv', '.mkv', '.flv'], - format: 'Binary', - }, - { - id: 'lock', - extensions: ['.lock'], - filenames: ['package-lock.json'], - }, - { - id: 'cache_files', - extensions: [], - // cspell:ignore eslintcache - filenames: ['.cspellcache', '.DS_Store', '.eslintcache'], - }, - { id: 'dll', extensions: ['.dll'], format: 'Binary' }, - { id: 'exe', extensions: ['.exe'], format: 'Binary' }, - { id: 'object-file', extensions: ['.o', '.obj'], format: 'Binary' }, - { id: 'jar', extensions: ['.jar'], format: 'Binary' }, - { id: 'spv', extensions: ['.spv'], format: 'Binary', description: 'SPSS Output Document' }, - { id: 'mdb', extensions: ['.mdb'], format: 'Binary', description: 'Microsoft Access DB' }, - { id: 'webm', extensions: ['.webm'], format: 'Binary', description: 'WebM is an audiovisual media file format.' }, - { id: 'trie', extensions: ['.trie'], format: 'Binary', description: 'CSpell dictionary file.' }, -]; - -export type LanguageId = string; - -const binaryFormatIds = languageExtensionDefinitions.filter((d) => d.format === 'Binary').map((d) => d.id); -export const binaryLanguages = new Set(['binary', 'image', 'video', 'fonts', ...binaryFormatIds]); - -export const generatedFiles = new Set([...binaryLanguages, 'map', 'lock', 'pdf', 'cache_files', 'rsa', 'pem', 'trie']); - -export const languageIds: LanguageId[] = languageExtensionDefinitions.map(({ id }) => id); - -const mapExtensionToSetOfLanguageIds: ExtensionToLanguageIdMapSet = - buildLanguageExtensionMapSet(languageExtensionDefinitions); -const mapExtensionToLanguageIds: ExtensionToLanguageIdMap = - buildExtensionToLanguageIdMap(mapExtensionToSetOfLanguageIds); - -export function isBinaryExt(ext: string): boolean { - return isBinary(getLanguagesForExt(ext)); -} - -export function isBinaryFile(basename: string): boolean { - return isBinary(getLanguagesForBasename(basename)); -} - -export function isBinary(languageId: LanguageId | LanguageId[] | Iterable): boolean { - return doesSetContainAnyOf(binaryLanguages, languageId); -} - -export function isGeneratedExt(ext: string): boolean { - return isGenerated(getLanguagesForExt(ext)); -} - -export function isGeneratedFile(basename: string): boolean { - return isGenerated(getLanguagesForBasename(basename)); -} - -export function isGenerated(languageId: LanguageId | LanguageId[] | Iterable): boolean { - return doesSetContainAnyOf(generatedFiles, languageId); -} - -function doesSetContainAnyOf( - setOfIds: Set, - languageId: LanguageId | LanguageId[] | Iterable, -): boolean { - if (typeof languageId === 'string') { - return setOfIds.has(languageId); - } - for (const id of languageId) { - if (setOfIds.has(id)) { - return true; - } - } - return false; -} - -export function buildLanguageExtensionMapSet(defs: LanguageDefinitions): ExtensionToLanguageIdMapSet { - return defs.reduce((map, def) => { - function addId(value: string) { - autoResolve(map, value, () => new Set()).add(def.id); - } - - def.extensions.forEach(addId); - def.filenames?.forEach(addId); - return map; - }, new Map>()); -} - -function buildExtensionToLanguageIdMap(map: ExtensionToLanguageIdMapSet): ExtensionToLanguageIdMap { - return new Map([...map].map(([k, s]) => [k, [...s]])); -} - -function _getLanguagesForExt(ext: string): string[] | undefined { - return mapExtensionToLanguageIds.get(ext) || mapExtensionToLanguageIds.get('.' + ext); -} - -export function getLanguagesForExt(ext: string): string[] { - return _getLanguagesForExt(ext) || _getLanguagesForExt(ext.toLowerCase()) || []; -} - -function _getLanguagesForBasename(basename: string): string[] | undefined { - const found = mapExtensionToLanguageIds.get(basename); - if (found) return found; - - for (let pos = basename.indexOf('.'); pos >= 0; pos = basename.indexOf('.', pos + 1)) { - const ids = mapExtensionToLanguageIds.get(basename.slice(pos)); - if (ids) return ids; - } - - return undefined; -} - -export function getLanguagesForBasename(basename: string): string[] { - return _getLanguagesForBasename(basename) || _getLanguagesForBasename(basename.toLowerCase()) || []; -} diff --git a/packages/cspell-lib/src/lib/Models/TextDocument.ts b/packages/cspell-lib/src/lib/Models/TextDocument.ts index d126e0e3aa6..70c3e790f32 100644 --- a/packages/cspell-lib/src/lib/Models/TextDocument.ts +++ b/packages/cspell-lib/src/lib/Models/TextDocument.ts @@ -4,7 +4,7 @@ import { toFileURL, toURL } from '@cspell/url'; import { TextDocument as VsTextDocument } from 'vscode-languageserver-textdocument'; import { getFileSystem } from '../fileSystem.js'; -import { getLanguagesForBasename } from '../LanguageIds.js'; +import { getLanguagesForBasename } from '../fileTypes.js'; import * as Uri from '../util/Uri.js'; export type DocumentUri = Uri.Uri | URL | string; diff --git a/packages/cspell-lib/src/lib/fileTypes.ts b/packages/cspell-lib/src/lib/fileTypes.ts new file mode 100644 index 00000000000..a530479be13 --- /dev/null +++ b/packages/cspell-lib/src/lib/fileTypes.ts @@ -0,0 +1,6 @@ +export type { FileTypeId, FileTypeId as LanguageId } from '@cspell/filetypes'; +export { + findMatchingFileTypes as getLanguagesForBasename, + getFileTypesForExt as getLanguagesForExt, + isFileTypeGenerated as isGenerated, +} from '@cspell/filetypes'; diff --git a/packages/cspell-lib/src/lib/index.ts b/packages/cspell-lib/src/lib/index.ts index 1832e1a1783..2eb336adf2f 100644 --- a/packages/cspell-lib/src/lib/index.ts +++ b/packages/cspell-lib/src/lib/index.ts @@ -7,7 +7,7 @@ export { FeatureFlag, FeatureFlags, getSystemFeatureFlags, UnknownFeatureFlagErr export type { VFileSystemProvider, VirtualFS } from './fileSystem.js'; export { FSCapabilityFlags, getVirtualFS } from './fileSystem.js'; export { getDictionary } from './getDictionary.js'; -export { getLanguagesForBasename as getLanguageIdsForBaseFilename, getLanguagesForExt } from './LanguageIds.js'; +export { getLanguagesForBasename as getLanguageIdsForBaseFilename, getLanguagesForExt } from './fileTypes.js'; export type { CreateTextDocumentParams, TextDocument, diff --git a/packages/cspell-lib/src/lib/suggestions.ts b/packages/cspell-lib/src/lib/suggestions.ts index 7bb4e3e9af0..f54b4c4feff 100644 --- a/packages/cspell-lib/src/lib/suggestions.ts +++ b/packages/cspell-lib/src/lib/suggestions.ts @@ -2,7 +2,7 @@ import assert from 'node:assert'; import type { CSpellSettings, LocaleId } from '@cspell/cspell-types'; -import type { LanguageId } from './LanguageIds.js'; +import type { LanguageId } from './fileTypes.js'; import { finalizeSettings, getDefaultSettings, getGlobalSettingsAsync, mergeSettings } from './Settings/index.js'; import { calcSettingsForLanguageId, diff --git a/packages/cspell-lib/src/lib/textValidation/determineTextDocumentSettings.ts b/packages/cspell-lib/src/lib/textValidation/determineTextDocumentSettings.ts index c3729364e9d..ec0ade62778 100644 --- a/packages/cspell-lib/src/lib/textValidation/determineTextDocumentSettings.ts +++ b/packages/cspell-lib/src/lib/textValidation/determineTextDocumentSettings.ts @@ -2,7 +2,7 @@ import * as path from 'node:path'; import type { CSpellUserSettings } from '@cspell/cspell-types'; -import { getLanguagesForBasename } from '../LanguageIds.js'; +import { getLanguagesForBasename } from '../fileTypes.js'; import type { CSpellSettingsInternal } from '../Models/CSpellSettingsInternalDef.js'; import type { TextDocument, TextDocumentRef } from '../Models/TextDocument.js'; import { calcOverrideSettings, getDefaultSettings, getGlobalSettings, mergeSettings } from '../Settings/index.js'; diff --git a/packages/cspell-lib/src/lib/trace.ts b/packages/cspell-lib/src/lib/trace.ts index e7854f8c816..97ede9804ec 100644 --- a/packages/cspell-lib/src/lib/trace.ts +++ b/packages/cspell-lib/src/lib/trace.ts @@ -1,7 +1,7 @@ import type { CSpellSettings, DictionaryId, LocaleId } from '@cspell/cspell-types'; import { genSequence } from 'gensequence'; -import type { LanguageId } from './LanguageIds.js'; +import type { LanguageId } from './fileTypes.js'; import type { CSpellSettingsInternal } from './Models/CSpellSettingsInternalDef.js'; import { toInternalSettings } from './Settings/CSpellSettingsServer.js'; import { finalizeSettings, mergeSettings } from './Settings/index.js'; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 0a82471d802..e51d148a675 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -569,6 +569,8 @@ importers: specifier: ^2.3.0 version: 2.3.0 + packages/cspell-filetypes: {} + packages/cspell-gitignore: dependencies: '@cspell/url': @@ -649,6 +651,9 @@ importers: '@cspell/dynamic-import': specifier: workspace:* version: link:../dynamic-import + '@cspell/filetypes': + specifier: workspace:* + version: link:../cspell-filetypes '@cspell/strong-weak-map': specifier: workspace:* version: link:../cspell-strong-weak-map