From 821a6de98434d204a68f24f61aeb38bf03cef745 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Wed, 1 Dec 2021 19:37:18 +0100 Subject: [PATCH 01/13] Upd keyword pattern for case-insensitive configuration --- packages/langium/src/parser/token-builder.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index f0848d291..9e502670c 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -80,6 +80,13 @@ export class DefaultTokenBuilder implements TokenBuilder { } protected buildKeywordPattern(keyword: Keyword): TokenPattern { + if (/\w+/.test(keyword.value)) { + const regexLetters: string[] = []; + for (const letter of keyword.value) { + regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); + } + return new RegExp(regexLetters.join('')); + } return keyword.value; } From 0290881f62307e9be374d698b935f364494a28d1 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Thu, 2 Dec 2021 19:39:07 +0100 Subject: [PATCH 02/13] Add caseInsensitive flag in LangiumConfig --- .../src/language-server/generated/module.ts | 3 ++- .../src/language-server/generated/module.ts | 3 ++- .../src/language-server/generated/module.ts | 3 ++- packages/langium-cli/langium-config-schema.json | 4 ++++ .../langium-cli/src/generator/module-generator.ts | 3 ++- packages/langium-cli/src/package.ts | 2 ++ packages/langium/src/grammar/generated/module.ts | 3 ++- packages/langium/src/grammar/language-meta-data.ts | 1 + .../langium/src/parser/langium-parser-builder.ts | 2 +- packages/langium/src/parser/token-builder.ts | 14 +++++++------- 10 files changed, 25 insertions(+), 13 deletions(-) diff --git a/examples/arithmetics/src/language-server/generated/module.ts b/examples/arithmetics/src/language-server/generated/module.ts index ac2714d3e..dffb3da0e 100644 --- a/examples/arithmetics/src/language-server/generated/module.ts +++ b/examples/arithmetics/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { ArithmeticsGrammar } from './grammar'; export const ArithmeticsLanguageMetaData: LanguageMetaData = { languageId: 'arithmetics', - fileExtensions: ['.calc'] + fileExtensions: ['.calc'], + caseInsensitive: false }; export const ArithmeticsGeneratedSharedModule: Module = { diff --git a/examples/domainmodel/src/language-server/generated/module.ts b/examples/domainmodel/src/language-server/generated/module.ts index aa93e79dc..f9f7ad32b 100644 --- a/examples/domainmodel/src/language-server/generated/module.ts +++ b/examples/domainmodel/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { DomainModelGrammar } from './grammar'; export const DomainModelLanguageMetaData: LanguageMetaData = { languageId: 'domain-model', - fileExtensions: ['.dmodel'] + fileExtensions: ['.dmodel'], + caseInsensitive: false }; export const parserConfig: IParserConfig = { diff --git a/examples/statemachine/src/language-server/generated/module.ts b/examples/statemachine/src/language-server/generated/module.ts index 7bb5804a6..04dde94cd 100644 --- a/examples/statemachine/src/language-server/generated/module.ts +++ b/examples/statemachine/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { StatemachineGrammar } from './grammar'; export const StatemachineLanguageMetaData: LanguageMetaData = { languageId: 'statemachine', - fileExtensions: ['.statemachine'] + fileExtensions: ['.statemachine'], + caseInsensitive: false }; export const StatemachineGeneratedSharedModule: Module = { diff --git a/packages/langium-cli/langium-config-schema.json b/packages/langium-cli/langium-config-schema.json index d42bbb1b7..f8cb5d138 100644 --- a/packages/langium-cli/langium-config-schema.json +++ b/packages/langium-cli/langium-config-schema.json @@ -69,6 +69,10 @@ } ] }, + "caseInsensitive": { + "description": "Enable case-insensitive keywords parsing", + "type": "boolean" + }, "textMate": { "description": "An object to describe the textMate grammar properties", "type": "object", diff --git a/packages/langium-cli/src/generator/module-generator.ts b/packages/langium-cli/src/generator/module-generator.ts index 36dcd8006..50af53374 100644 --- a/packages/langium-cli/src/generator/module-generator.ts +++ b/packages/langium-cli/src/generator/module-generator.ts @@ -40,7 +40,8 @@ export function generateModule(grammars: langium.Grammar[], config: LangiumConfi node.append('export const ', grammar.name, 'LanguageMetaData: LanguageMetaData = {', NL); node.indent(metaData => { metaData.append(`languageId: '${config.id}',`, NL); - metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}]`, NL); + metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}],`, NL); + metaData.append(`caseInsensitive: ${config.caseInsensitive ? true : false}`, NL); }); node.append('};', NL, NL); } diff --git a/packages/langium-cli/src/package.ts b/packages/langium-cli/src/package.ts index 21502b32d..9fa80ff60 100644 --- a/packages/langium-cli/src/package.ts +++ b/packages/langium-cli/src/package.ts @@ -36,6 +36,8 @@ export interface LangiumLanguageConfig { grammar: string /** File extensions with leading `.` */ fileExtensions?: string[] + /** Enable case-insensitive keywords parsing */ + caseInsensitive?: boolean /** Enable generating a TextMate syntax highlighting file */ textMate?: { /** Output path to syntax highlighting file */ diff --git a/packages/langium/src/grammar/generated/module.ts b/packages/langium/src/grammar/generated/module.ts index 7d10660ca..7f997f77b 100644 --- a/packages/langium/src/grammar/generated/module.ts +++ b/packages/langium/src/grammar/generated/module.ts @@ -11,7 +11,8 @@ import { LangiumGrammarGrammar } from './grammar'; export const LangiumGrammarLanguageMetaData: LanguageMetaData = { languageId: 'langium', - fileExtensions: ['.langium'] + fileExtensions: ['.langium'], + caseInsensitive: false }; export const LangiumGrammarGeneratedSharedModule: Module = { diff --git a/packages/langium/src/grammar/language-meta-data.ts b/packages/langium/src/grammar/language-meta-data.ts index a0e73f1c9..a43651c70 100644 --- a/packages/langium/src/grammar/language-meta-data.ts +++ b/packages/langium/src/grammar/language-meta-data.ts @@ -7,4 +7,5 @@ export interface LanguageMetaData { languageId: string; fileExtensions: string[]; + caseInsensitive: boolean; } diff --git a/packages/langium/src/parser/langium-parser-builder.ts b/packages/langium/src/parser/langium-parser-builder.ts index 49f5c5ecf..fc55bd5c1 100644 --- a/packages/langium/src/parser/langium-parser-builder.ts +++ b/packages/langium/src/parser/langium-parser-builder.ts @@ -31,7 +31,7 @@ type Method = () => void; export function createLangiumParser(services: LangiumServices): LangiumParser { const grammar = services.Grammar; const tokens = new Map(); - const buildTokens = services.parser.TokenBuilder.buildTokens(grammar); + const buildTokens = services.parser.TokenBuilder.buildTokens(grammar, services.LanguageMetaData.caseInsensitive); buildTokens.forEach(e => { tokens.set(e.name, e); }); diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 9e502670c..f798026d2 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -12,7 +12,7 @@ import { partialMatches } from '../utils/regex-util'; import { stream } from '../utils/stream'; export interface TokenBuilder { - buildTokens(grammar: Grammar): TokenType[]; + buildTokens(grammar: Grammar, caseInsensitive: boolean): TokenType[]; } export class DefaultTokenBuilder implements TokenBuilder { @@ -21,7 +21,7 @@ export class DefaultTokenBuilder implements TokenBuilder { protected readonly KEYWORD_SUFFIX = '_KEYWORD'; protected readonly TERMINAL_SUFFIX = '_TERMINAL'; - buildTokens(grammar: Grammar): TokenType[] { + buildTokens(grammar: Grammar, caseInsensitive: boolean): TokenType[] { const tokenMap = new Map(); const terminalsTokens: TokenType[] = []; const terminals = Array.from(stream(grammar.rules).filter(isTerminalRule)); @@ -37,7 +37,7 @@ export class DefaultTokenBuilder implements TokenBuilder { .sort((a, b) => b.value.length - a.value.length); for (const keyword of keywords) { - const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap); + const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap, caseInsensitive); tokens.push(keywordToken); tokenMap.set(keyword.value + this.KEYWORD_SUFFIX, keywordToken); } @@ -74,13 +74,13 @@ export class DefaultTokenBuilder implements TokenBuilder { return token; } - protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map): TokenType { + protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map, caseInsensitive: boolean): TokenType { const longerAlt = this.findLongerAlt(keyword, keywords, terminals, tokenMap); - return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword), LONGER_ALT: longerAlt }; + return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword, caseInsensitive), LONGER_ALT: longerAlt }; } - protected buildKeywordPattern(keyword: Keyword): TokenPattern { - if (/\w+/.test(keyword.value)) { + protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern { + if (caseInsensitive && /\w+/.test(keyword.value)) { const regexLetters: string[] = []; for (const letter of keyword.value) { regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); From 8835965f8b51124ddeedf24637cff5af5e7712f9 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Thu, 2 Dec 2021 19:40:40 +0100 Subject: [PATCH 03/13] Make arifmetics example case-insensitive --- examples/arithmetics/example/example.calc | 10 +++++----- examples/arithmetics/langium-config.json | 1 + .../src/language-server/generated/module.ts | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/examples/arithmetics/example/example.calc b/examples/arithmetics/example/example.calc index 595e5f954..23e6407f0 100644 --- a/examples/arithmetics/example/example.calc +++ b/examples/arithmetics/example/example.calc @@ -1,14 +1,14 @@ -module example1 +MODULE example1 -def y: 1 + 3 - 99828932 / 2 + 2 - 1; +Def y: 1 + 3 - 99828932 / 2 + 2 - 1; -def x: 12 / 3 - 1; +DEF x: 12 / 3 - 1; x * 2 - 4; -def t: 4; +deF t: 4; -def func(t, x): +DEF func(t, x): t * t * t + x; func(t, x); diff --git a/examples/arithmetics/langium-config.json b/examples/arithmetics/langium-config.json index 4439b7a7d..4603058ec 100644 --- a/examples/arithmetics/langium-config.json +++ b/examples/arithmetics/langium-config.json @@ -4,6 +4,7 @@ "id": "arithmetics", "grammar": "src/language-server/arithmetics.langium", "fileExtensions": [".calc"], + "caseInsensitive": true, "textMate": { "out": "syntaxes/arithmetics.tmLanguage.json" } diff --git a/examples/arithmetics/src/language-server/generated/module.ts b/examples/arithmetics/src/language-server/generated/module.ts index dffb3da0e..5c3ac3367 100644 --- a/examples/arithmetics/src/language-server/generated/module.ts +++ b/examples/arithmetics/src/language-server/generated/module.ts @@ -10,7 +10,7 @@ import { ArithmeticsGrammar } from './grammar'; export const ArithmeticsLanguageMetaData: LanguageMetaData = { languageId: 'arithmetics', fileExtensions: ['.calc'], - caseInsensitive: false + caseInsensitive: true }; export const ArithmeticsGeneratedSharedModule: Module = { From 2123e194b0992eaa820279fe31fcdd8abcd5ea34 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Thu, 2 Dec 2021 20:55:10 +0100 Subject: [PATCH 04/13] Add highlighting of case-insensitive keywords --- .../syntaxes/arithmetics.tmLanguage.json | 2 +- .../langium-cli/src/generator/textmate-generator.ts | 8 ++++---- packages/langium/src/parser/token-builder.ts | 13 ++++--------- packages/langium/src/utils/regex-util.ts | 11 +++++++++++ 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json b/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json index cdcd6f4dd..21f6ca2fc 100644 --- a/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json +++ b/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json @@ -10,7 +10,7 @@ }, { "name": "keyword.control.arithmetics", - "match": "\\b(def|module)\\b" + "match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b" } ], "repository": { diff --git a/packages/langium-cli/src/generator/textmate-generator.ts b/packages/langium-cli/src/generator/textmate-generator.ts index 0643c3e96..7ad385421 100644 --- a/packages/langium-cli/src/generator/textmate-generator.ts +++ b/packages/langium-cli/src/generator/textmate-generator.ts @@ -5,7 +5,7 @@ ******************************************************************************/ import * as langium from 'langium'; -import { escapeRegExp, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium'; +import { escapeRegExp, getCaseInsensitivePattern, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium'; import { LangiumLanguageConfig } from '../package'; import { collectKeywords } from './util'; @@ -118,14 +118,14 @@ function getControlKeywords(grammar: langium.Grammar, pack: LangiumLanguageConfi const regex = /[A-Za-z]/; const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw)); const keywords = controlKeywords.map(escapeRegExp); - const groups = groupKeywords(keywords); + const groups = groupKeywords(keywords, pack.caseInsensitive); return { 'name': `keyword.control.${pack.id}`, 'match': groups.join('|') }; } -function groupKeywords(keywords: string[]): string[] { +function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined): string[] { const groups: { letter: string[], leftSpecial: string[], @@ -136,7 +136,7 @@ function groupKeywords(keywords: string[]): string[] { keywords.forEach(keyword => { if (/\w/.test(keyword[0])) { if (/\w/.test(keyword[keyword.length - 1])) { - groups.letter.push(keyword); + groups.letter.push(caseInsensitive ? getCaseInsensitivePattern(keyword) : keyword); } else { groups.rightSpecial.push(keyword); } diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index f798026d2..eb39fafc4 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -8,7 +8,7 @@ import { Lexer, TokenPattern, TokenType } from 'chevrotain'; import { terminalRegex } from '..'; import { Grammar, isKeyword, isTerminalRule, Keyword, TerminalRule } from '../grammar/generated/ast'; import { streamAllContents } from '../utils/ast-util'; -import { partialMatches } from '../utils/regex-util'; +import { getCaseInsensitivePattern, partialMatches } from '../utils/regex-util'; import { stream } from '../utils/stream'; export interface TokenBuilder { @@ -80,14 +80,9 @@ export class DefaultTokenBuilder implements TokenBuilder { } protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern { - if (caseInsensitive && /\w+/.test(keyword.value)) { - const regexLetters: string[] = []; - for (const letter of keyword.value) { - regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); - } - return new RegExp(regexLetters.join('')); - } - return keyword.value; + return caseInsensitive && /\w+/.test(keyword.value) ? + new RegExp(getCaseInsensitivePattern(keyword.value)) : + keyword.value; } protected findLongerAlt(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map): TokenType[] { diff --git a/packages/langium/src/utils/regex-util.ts b/packages/langium/src/utils/regex-util.ts index 514a539b3..ca3ae3636 100644 --- a/packages/langium/src/utils/regex-util.ts +++ b/packages/langium/src/utils/regex-util.ts @@ -114,6 +114,17 @@ export function escapeRegExp(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } +export function getCaseInsensitivePattern(keyword: string): string { + if (/\w+/.test(keyword)) { + const regexLetters: string[] = []; + for (const letter of keyword) { + regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); + } + return regexLetters.join(''); + } + return keyword; +} + /** * Determines whether the given input has a partial match with the specified regex. * @param regex The regex to partially match against From 4de6aac14f6cc46211735cac92102a0ccf304843 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 3 Dec 2021 10:41:13 +0100 Subject: [PATCH 05/13] Fix arguments in buildTokens --- packages/langium/src/parser/token-builder.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index eb39fafc4..0bcad5246 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -12,7 +12,7 @@ import { getCaseInsensitivePattern, partialMatches } from '../utils/regex-util'; import { stream } from '../utils/stream'; export interface TokenBuilder { - buildTokens(grammar: Grammar, caseInsensitive: boolean): TokenType[]; + buildTokens(grammar: Grammar, caseInsensitive?: boolean): TokenType[]; } export class DefaultTokenBuilder implements TokenBuilder { @@ -21,7 +21,7 @@ export class DefaultTokenBuilder implements TokenBuilder { protected readonly KEYWORD_SUFFIX = '_KEYWORD'; protected readonly TERMINAL_SUFFIX = '_TERMINAL'; - buildTokens(grammar: Grammar, caseInsensitive: boolean): TokenType[] { + buildTokens(grammar: Grammar, caseInsensitive = false): TokenType[] { const tokenMap = new Map(); const terminalsTokens: TokenType[] = []; const terminals = Array.from(stream(grammar.rules).filter(isTerminalRule)); From 0cba5eaafe076d9d3e4161a2b5053ad800f637a1 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 3 Dec 2021 18:31:36 +0100 Subject: [PATCH 06/13] Fix word pattern --- packages/langium/src/parser/token-builder.ts | 2 +- packages/langium/src/utils/regex-util.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 0bcad5246..7d33f5df9 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -80,7 +80,7 @@ export class DefaultTokenBuilder implements TokenBuilder { } protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern { - return caseInsensitive && /\w+/.test(keyword.value) ? + return caseInsensitive && /^\w+$/.test(keyword.value) ? new RegExp(getCaseInsensitivePattern(keyword.value)) : keyword.value; } diff --git a/packages/langium/src/utils/regex-util.ts b/packages/langium/src/utils/regex-util.ts index ca3ae3636..53cd7d945 100644 --- a/packages/langium/src/utils/regex-util.ts +++ b/packages/langium/src/utils/regex-util.ts @@ -115,7 +115,7 @@ export function escapeRegExp(value: string): string { } export function getCaseInsensitivePattern(keyword: string): string { - if (/\w+/.test(keyword)) { + if (/^\w+$/.test(keyword)) { const regexLetters: string[] = []; for (const letter of keyword) { regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); From f48a92d3ecde34020e7b73d0b20f5208a8d05b3f Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 3 Dec 2021 19:30:54 +0100 Subject: [PATCH 07/13] Add case-insensitive parsing of keywords with special symbols --- .../langium-cli/src/generator/textmate-generator.ts | 12 ++++++------ packages/langium/src/parser/token-builder.ts | 2 +- packages/langium/src/utils/regex-util.ts | 11 ++++------- 3 files changed, 11 insertions(+), 14 deletions(-) diff --git a/packages/langium-cli/src/generator/textmate-generator.ts b/packages/langium-cli/src/generator/textmate-generator.ts index 7ad385421..93685d964 100644 --- a/packages/langium-cli/src/generator/textmate-generator.ts +++ b/packages/langium-cli/src/generator/textmate-generator.ts @@ -117,8 +117,7 @@ function getRepository(grammar: langium.Grammar, config: LangiumLanguageConfig): function getControlKeywords(grammar: langium.Grammar, pack: LangiumLanguageConfig): Pattern { const regex = /[A-Za-z]/; const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw)); - const keywords = controlKeywords.map(escapeRegExp); - const groups = groupKeywords(keywords, pack.caseInsensitive); + const groups = groupKeywords(controlKeywords, pack.caseInsensitive); return { 'name': `keyword.control.${pack.id}`, 'match': groups.join('|') @@ -134,17 +133,18 @@ function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined) } = {letter: [], leftSpecial: [], rightSpecial: [], special: []}; keywords.forEach(keyword => { + const keywordPattern = caseInsensitive ? getCaseInsensitivePattern(keyword) : escapeRegExp(keyword); if (/\w/.test(keyword[0])) { if (/\w/.test(keyword[keyword.length - 1])) { - groups.letter.push(caseInsensitive ? getCaseInsensitivePattern(keyword) : keyword); + groups.letter.push(keywordPattern); } else { - groups.rightSpecial.push(keyword); + groups.rightSpecial.push(keywordPattern); } } else { if ((/\w/).test(keyword[keyword.length - 1])) { - groups.leftSpecial.push(keyword); + groups.leftSpecial.push(keywordPattern); } else { - groups.special.push(keyword); + groups.special.push(keywordPattern); } } }); diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 7d33f5df9..913f5146c 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -80,7 +80,7 @@ export class DefaultTokenBuilder implements TokenBuilder { } protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern { - return caseInsensitive && /^\w+$/.test(keyword.value) ? + return caseInsensitive ? new RegExp(getCaseInsensitivePattern(keyword.value)) : keyword.value; } diff --git a/packages/langium/src/utils/regex-util.ts b/packages/langium/src/utils/regex-util.ts index 53cd7d945..3cf6ed66b 100644 --- a/packages/langium/src/utils/regex-util.ts +++ b/packages/langium/src/utils/regex-util.ts @@ -115,14 +115,11 @@ export function escapeRegExp(value: string): string { } export function getCaseInsensitivePattern(keyword: string): string { - if (/^\w+$/.test(keyword)) { - const regexLetters: string[] = []; - for (const letter of keyword) { - regexLetters.push(`[${letter.toLowerCase()}${letter.toUpperCase()}]`); - } - return regexLetters.join(''); + const regexLetters: string[] = []; + for (const letter of keyword) { + regexLetters.push(/\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter)); } - return keyword; + return regexLetters.join(''); } /** From 9e356f7774b8f71fa6d08ffdbcc9d64374a015b3 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 3 Dec 2021 19:37:59 +0100 Subject: [PATCH 08/13] Add tests --- .../langium/test/parser/token-builder.test.ts | 62 ++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/packages/langium/test/parser/token-builder.test.ts b/packages/langium/test/parser/token-builder.test.ts index 42ecdf55e..ce84b4b16 100644 --- a/packages/langium/test/parser/token-builder.test.ts +++ b/packages/langium/test/parser/token-builder.test.ts @@ -4,7 +4,7 @@ * terms of the MIT License, which is available in the project root. ******************************************************************************/ -import { TokenType } from '@chevrotain/types'; +import { TokenPattern, TokenType } from '@chevrotain/types'; import { createLangiumGrammarServices, Grammar } from '../../src'; import { parseHelper } from '../../src/test'; @@ -53,3 +53,63 @@ describe('tokenBuilder#longerAlts', () => { }); }); + +let implementPattern: TokenPattern | undefined; +let strangePattern: TokenPattern | undefined; +let abcPattern: TokenPattern | undefined; +let abPattern: TokenPattern | undefined; +let aPattern: TokenPattern | undefined; +let booleanTerminalPattern: TokenPattern | undefined; +let abTerminalPattern: TokenPattern | undefined; + +describe('tokenBuilder#caseInsensitivePattern', () => { + beforeAll(async () => { + const text = ` + grammar test + Main: 'A' 'ab' 'AbC' | Implement | '\\strange\\'; + Implement: '@implement' AB; + terminal BOOLEAN returns boolean: /true|false/; + terminal AB: /ABD?/; + `; + const grammar = (await parseHelper(services)(text)).document.parseResult.value; + const tokens = tokenBuilder.buildTokens(grammar, true); + const patterns = tokens.map(token => token.PATTERN); + + implementPattern = patterns[0]; + strangePattern = patterns[1]; + abcPattern = patterns[2]; + abPattern = patterns[3]; + aPattern = patterns[4]; + booleanTerminalPattern = patterns[5]; + abTerminalPattern = patterns[6]; + }); + + test('should create from keyword with special symbols', () => { + expect(implementPattern).toEqual(new RegExp(/@[iI][mM][pP][lL][eE][mM][eE][nN][tT]/)); + }); + + test('should create from keyword with special escape symbols', () => { + expect(strangePattern).toEqual(new RegExp(/\\[sS][tT][rR][aA][nN][gG][eE]\\/)); + }); + + test('should create from mixed-case word', () => { + expect(abcPattern).toEqual(new RegExp(/[aA][bB][cC]/)); + }); + + test('should create from lower-case word', () => { + expect(abPattern).toEqual(new RegExp(/[aA][bB]/)); + }); + + test('should create from upper-case word', () => { + expect(aPattern).toEqual(new RegExp(/[aA]/)); + }); + + test('should ignore terminals', () => { + expect(booleanTerminalPattern).toEqual(new RegExp(/true|false/)); + }); + + test('should ignore terminals with ?', () => { + expect(abTerminalPattern).toEqual(new RegExp(/ABD?/)); + }); + +}); From 8d6ed780e4b4c407130e755bba70e509064065c8 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 3 Dec 2021 19:45:36 +0100 Subject: [PATCH 09/13] TMP: add keywords with special symbols in arithmetics example --- examples/arithmetics/example/example.calc | 4 ++++ .../src/language-server/arithmetics.langium | 2 +- .../src/language-server/generated/grammar.ts | 15 +++++++++++++++ .../syntaxes/arithmetics.tmLanguage.json | 2 +- 4 files changed, 21 insertions(+), 2 deletions(-) diff --git a/examples/arithmetics/example/example.calc b/examples/arithmetics/example/example.calc index 23e6407f0..fd681f7a3 100644 --- a/examples/arithmetics/example/example.calc +++ b/examples/arithmetics/example/example.calc @@ -1,5 +1,9 @@ MODULE example1 +@ImPlement +\straNge\ +xX^(#*&^)$#*%*!^)}{:>:: loadedArithmeticsGrammar ||(loa "$refText": "Evaluation" }, "elements": [] + }, + { + "$type": "Keyword", + "value": "@Implement", + "elements": [] + }, + { + "$type": "Keyword", + "value": "\\\\strange\\\\", + "elements": [] + }, + { + "$type": "Keyword", + "value": "xx^(#*&^)$#*%*!^)}{:>::<[qQ])\\b|\\B(@[iI][mM][pP][lL][eE][mM][eE][nN][tT])\\b|\\B(\\\\[sS][tT][rR][aA][nN][gG][eE]\\\\)\\B" } ], "repository": { From 784728439f4f216627df99db1d3df8befa8dd767 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 10 Dec 2021 13:13:29 +0100 Subject: [PATCH 10/13] Fix tests after multi-language support --- packages/langium/test/parser/token-builder.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/langium/test/parser/token-builder.test.ts b/packages/langium/test/parser/token-builder.test.ts index ce84b4b16..1b564ab3f 100644 --- a/packages/langium/test/parser/token-builder.test.ts +++ b/packages/langium/test/parser/token-builder.test.ts @@ -71,7 +71,7 @@ describe('tokenBuilder#caseInsensitivePattern', () => { terminal BOOLEAN returns boolean: /true|false/; terminal AB: /ABD?/; `; - const grammar = (await parseHelper(services)(text)).document.parseResult.value; + const grammar = (await parseHelper(grammarServices)(text)).document.parseResult.value; const tokens = tokenBuilder.buildTokens(grammar, true); const patterns = tokens.map(token => token.PATTERN); From e6e8da210b4b2bc339810fbfa1e077ae08c3c65f Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 10 Dec 2021 13:26:13 +0100 Subject: [PATCH 11/13] Fix notes --- packages/langium-cli/src/generator/module-generator.ts | 2 +- packages/langium/src/utils/regex-util.ts | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/packages/langium-cli/src/generator/module-generator.ts b/packages/langium-cli/src/generator/module-generator.ts index 50af53374..df4d52724 100644 --- a/packages/langium-cli/src/generator/module-generator.ts +++ b/packages/langium-cli/src/generator/module-generator.ts @@ -41,7 +41,7 @@ export function generateModule(grammars: langium.Grammar[], config: LangiumConfi node.indent(metaData => { metaData.append(`languageId: '${config.id}',`, NL); metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}],`, NL); - metaData.append(`caseInsensitive: ${config.caseInsensitive ? true : false}`, NL); + metaData.append(`caseInsensitive: ${!!config.caseInsensitive}`, NL); }); node.append('};', NL, NL); } diff --git a/packages/langium/src/utils/regex-util.ts b/packages/langium/src/utils/regex-util.ts index 3cf6ed66b..09e70a350 100644 --- a/packages/langium/src/utils/regex-util.ts +++ b/packages/langium/src/utils/regex-util.ts @@ -115,11 +115,9 @@ export function escapeRegExp(value: string): string { } export function getCaseInsensitivePattern(keyword: string): string { - const regexLetters: string[] = []; - for (const letter of keyword) { - regexLetters.push(/\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter)); - } - return regexLetters.join(''); + return Array.prototype.map.call(keyword, letter => + /\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter) + ).join(''); } /** From bf7380254e7ea1d9ea377f204e7b8d976adb67ba Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 10 Dec 2021 13:28:56 +0100 Subject: [PATCH 12/13] Clean the arithmetics example --- examples/arithmetics/example/example.calc | 8 ++------ .../src/language-server/arithmetics.langium | 2 +- .../src/language-server/generated/grammar.ts | 15 --------------- .../syntaxes/arithmetics.tmLanguage.json | 2 +- 4 files changed, 4 insertions(+), 23 deletions(-) diff --git a/examples/arithmetics/example/example.calc b/examples/arithmetics/example/example.calc index fd681f7a3..5bbfa86f3 100644 --- a/examples/arithmetics/example/example.calc +++ b/examples/arithmetics/example/example.calc @@ -1,8 +1,4 @@ -MODULE example1 - -@ImPlement -\straNge\ -xX^(#*&^)$#*%*!^)}{:>:: loadedArithmeticsGrammar ||(loa "$refText": "Evaluation" }, "elements": [] - }, - { - "$type": "Keyword", - "value": "@Implement", - "elements": [] - }, - { - "$type": "Keyword", - "value": "\\\\strange\\\\", - "elements": [] - }, - { - "$type": "Keyword", - "value": "xx^(#*&^)$#*%*!^)}{:>::<[qQ])\\b|\\B(@[iI][mM][pP][lL][eE][mM][eE][nN][tT])\\b|\\B(\\\\[sS][tT][rR][aA][nN][gG][eE]\\\\)\\B" + "match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b" } ], "repository": { From 2b4b3b906200ac649b1a66851e9b1a7acc15c205 Mon Sep 17 00:00:00 2001 From: Pluralia Date: Fri, 10 Dec 2021 16:29:13 +0100 Subject: [PATCH 13/13] Replace boolean argument with object --- packages/langium/src/parser/langium-parser-builder.ts | 2 +- packages/langium/src/parser/token-builder.ts | 6 +++--- packages/langium/test/parser/token-builder.test.ts | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/packages/langium/src/parser/langium-parser-builder.ts b/packages/langium/src/parser/langium-parser-builder.ts index fc55bd5c1..2a72d9360 100644 --- a/packages/langium/src/parser/langium-parser-builder.ts +++ b/packages/langium/src/parser/langium-parser-builder.ts @@ -31,7 +31,7 @@ type Method = () => void; export function createLangiumParser(services: LangiumServices): LangiumParser { const grammar = services.Grammar; const tokens = new Map(); - const buildTokens = services.parser.TokenBuilder.buildTokens(grammar, services.LanguageMetaData.caseInsensitive); + const buildTokens = services.parser.TokenBuilder.buildTokens(grammar, { caseInsensitive: services.LanguageMetaData.caseInsensitive }); buildTokens.forEach(e => { tokens.set(e.name, e); }); diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index 913f5146c..c55564a6d 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -12,7 +12,7 @@ import { getCaseInsensitivePattern, partialMatches } from '../utils/regex-util'; import { stream } from '../utils/stream'; export interface TokenBuilder { - buildTokens(grammar: Grammar, caseInsensitive?: boolean): TokenType[]; + buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[]; } export class DefaultTokenBuilder implements TokenBuilder { @@ -21,7 +21,7 @@ export class DefaultTokenBuilder implements TokenBuilder { protected readonly KEYWORD_SUFFIX = '_KEYWORD'; protected readonly TERMINAL_SUFFIX = '_TERMINAL'; - buildTokens(grammar: Grammar, caseInsensitive = false): TokenType[] { + buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[] { const tokenMap = new Map(); const terminalsTokens: TokenType[] = []; const terminals = Array.from(stream(grammar.rules).filter(isTerminalRule)); @@ -37,7 +37,7 @@ export class DefaultTokenBuilder implements TokenBuilder { .sort((a, b) => b.value.length - a.value.length); for (const keyword of keywords) { - const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap, caseInsensitive); + const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap, !!options?.caseInsensitive); tokens.push(keywordToken); tokenMap.set(keyword.value + this.KEYWORD_SUFFIX, keywordToken); } diff --git a/packages/langium/test/parser/token-builder.test.ts b/packages/langium/test/parser/token-builder.test.ts index 1b564ab3f..553493fef 100644 --- a/packages/langium/test/parser/token-builder.test.ts +++ b/packages/langium/test/parser/token-builder.test.ts @@ -72,7 +72,7 @@ describe('tokenBuilder#caseInsensitivePattern', () => { terminal AB: /ABD?/; `; const grammar = (await parseHelper(grammarServices)(text)).document.parseResult.value; - const tokens = tokenBuilder.buildTokens(grammar, true); + const tokens = tokenBuilder.buildTokens(grammar, { caseInsensitive: true }); const patterns = tokens.map(token => token.PATTERN); implementPattern = patterns[0];