diff --git a/examples/arithmetics/example/example.calc b/examples/arithmetics/example/example.calc index 595e5f954..5bbfa86f3 100644 --- a/examples/arithmetics/example/example.calc +++ b/examples/arithmetics/example/example.calc @@ -1,14 +1,14 @@ -module example1 +Module example1 -def y: 1 + 3 - 99828932 / 2 + 2 - 1; +Def y: 1 + 3 - 99828932 / 2 + 2 - 1; -def x: 12 / 3 - 1; +DEF x: 12 / 3 - 1; x * 2 - 4; def t: 4; -def func(t, x): +DEF func(t, x): t * t * t + x; func(t, x); diff --git a/examples/arithmetics/langium-config.json b/examples/arithmetics/langium-config.json index 4439b7a7d..4603058ec 100644 --- a/examples/arithmetics/langium-config.json +++ b/examples/arithmetics/langium-config.json @@ -4,6 +4,7 @@ "id": "arithmetics", "grammar": "src/language-server/arithmetics.langium", "fileExtensions": [".calc"], + "caseInsensitive": true, "textMate": { "out": "syntaxes/arithmetics.tmLanguage.json" } diff --git a/examples/arithmetics/src/language-server/generated/module.ts b/examples/arithmetics/src/language-server/generated/module.ts index ac2714d3e..5c3ac3367 100644 --- a/examples/arithmetics/src/language-server/generated/module.ts +++ b/examples/arithmetics/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { ArithmeticsGrammar } from './grammar'; export const ArithmeticsLanguageMetaData: LanguageMetaData = { languageId: 'arithmetics', - fileExtensions: ['.calc'] + fileExtensions: ['.calc'], + caseInsensitive: true }; export const ArithmeticsGeneratedSharedModule: Module = { diff --git a/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json b/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json index cdcd6f4dd..21f6ca2fc 100644 --- a/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json +++ b/examples/arithmetics/syntaxes/arithmetics.tmLanguage.json @@ -10,7 +10,7 @@ }, { "name": "keyword.control.arithmetics", - "match": "\\b(def|module)\\b" + "match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b" } ], "repository": { diff --git a/examples/domainmodel/src/language-server/generated/module.ts b/examples/domainmodel/src/language-server/generated/module.ts index aa93e79dc..f9f7ad32b 100644 --- a/examples/domainmodel/src/language-server/generated/module.ts +++ b/examples/domainmodel/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { DomainModelGrammar } from './grammar'; export const DomainModelLanguageMetaData: LanguageMetaData = { languageId: 'domain-model', - fileExtensions: ['.dmodel'] + fileExtensions: ['.dmodel'], + caseInsensitive: false }; export const parserConfig: IParserConfig = { diff --git a/examples/statemachine/src/language-server/generated/module.ts b/examples/statemachine/src/language-server/generated/module.ts index 7bb5804a6..04dde94cd 100644 --- a/examples/statemachine/src/language-server/generated/module.ts +++ b/examples/statemachine/src/language-server/generated/module.ts @@ -9,7 +9,8 @@ import { StatemachineGrammar } from './grammar'; export const StatemachineLanguageMetaData: LanguageMetaData = { languageId: 'statemachine', - fileExtensions: ['.statemachine'] + fileExtensions: ['.statemachine'], + caseInsensitive: false }; export const StatemachineGeneratedSharedModule: Module = { diff --git a/packages/langium-cli/langium-config-schema.json b/packages/langium-cli/langium-config-schema.json index d42bbb1b7..f8cb5d138 100644 --- a/packages/langium-cli/langium-config-schema.json +++ b/packages/langium-cli/langium-config-schema.json @@ -69,6 +69,10 @@ } ] }, + "caseInsensitive": { + "description": "Enable case-insensitive keywords parsing", + "type": "boolean" + }, "textMate": { "description": "An object to describe the textMate grammar properties", "type": "object", diff --git a/packages/langium-cli/src/generator/module-generator.ts b/packages/langium-cli/src/generator/module-generator.ts index 36dcd8006..df4d52724 100644 --- a/packages/langium-cli/src/generator/module-generator.ts +++ b/packages/langium-cli/src/generator/module-generator.ts @@ -40,7 +40,8 @@ export function generateModule(grammars: langium.Grammar[], config: LangiumConfi node.append('export const ', grammar.name, 'LanguageMetaData: LanguageMetaData = {', NL); node.indent(metaData => { metaData.append(`languageId: '${config.id}',`, NL); - metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}]`, NL); + metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}],`, NL); + metaData.append(`caseInsensitive: ${!!config.caseInsensitive}`, NL); }); node.append('};', NL, NL); } diff --git a/packages/langium-cli/src/generator/textmate-generator.ts b/packages/langium-cli/src/generator/textmate-generator.ts index 0643c3e96..93685d964 100644 --- a/packages/langium-cli/src/generator/textmate-generator.ts +++ b/packages/langium-cli/src/generator/textmate-generator.ts @@ -5,7 +5,7 @@ ******************************************************************************/ import * as langium from 'langium'; -import { escapeRegExp, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium'; +import { escapeRegExp, getCaseInsensitivePattern, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium'; import { LangiumLanguageConfig } from '../package'; import { collectKeywords } from './util'; @@ -117,15 +117,14 @@ function getRepository(grammar: langium.Grammar, config: LangiumLanguageConfig): function getControlKeywords(grammar: langium.Grammar, pack: LangiumLanguageConfig): Pattern { const regex = /[A-Za-z]/; const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw)); - const keywords = controlKeywords.map(escapeRegExp); - const groups = groupKeywords(keywords); + const groups = groupKeywords(controlKeywords, pack.caseInsensitive); return { 'name': `keyword.control.${pack.id}`, 'match': groups.join('|') }; } -function groupKeywords(keywords: string[]): string[] { +function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined): string[] { const groups: { letter: string[], leftSpecial: string[], @@ -134,17 +133,18 @@ function groupKeywords(keywords: string[]): string[] { } = {letter: [], leftSpecial: [], rightSpecial: [], special: []}; keywords.forEach(keyword => { + const keywordPattern = caseInsensitive ? getCaseInsensitivePattern(keyword) : escapeRegExp(keyword); if (/\w/.test(keyword[0])) { if (/\w/.test(keyword[keyword.length - 1])) { - groups.letter.push(keyword); + groups.letter.push(keywordPattern); } else { - groups.rightSpecial.push(keyword); + groups.rightSpecial.push(keywordPattern); } } else { if ((/\w/).test(keyword[keyword.length - 1])) { - groups.leftSpecial.push(keyword); + groups.leftSpecial.push(keywordPattern); } else { - groups.special.push(keyword); + groups.special.push(keywordPattern); } } }); diff --git a/packages/langium-cli/src/package.ts b/packages/langium-cli/src/package.ts index 21502b32d..9fa80ff60 100644 --- a/packages/langium-cli/src/package.ts +++ b/packages/langium-cli/src/package.ts @@ -36,6 +36,8 @@ export interface LangiumLanguageConfig { grammar: string /** File extensions with leading `.` */ fileExtensions?: string[] + /** Enable case-insensitive keywords parsing */ + caseInsensitive?: boolean /** Enable generating a TextMate syntax highlighting file */ textMate?: { /** Output path to syntax highlighting file */ diff --git a/packages/langium/src/grammar/generated/module.ts b/packages/langium/src/grammar/generated/module.ts index 7d10660ca..7f997f77b 100644 --- a/packages/langium/src/grammar/generated/module.ts +++ b/packages/langium/src/grammar/generated/module.ts @@ -11,7 +11,8 @@ import { LangiumGrammarGrammar } from './grammar'; export const LangiumGrammarLanguageMetaData: LanguageMetaData = { languageId: 'langium', - fileExtensions: ['.langium'] + fileExtensions: ['.langium'], + caseInsensitive: false }; export const LangiumGrammarGeneratedSharedModule: Module = { diff --git a/packages/langium/src/grammar/language-meta-data.ts b/packages/langium/src/grammar/language-meta-data.ts index a0e73f1c9..a43651c70 100644 --- a/packages/langium/src/grammar/language-meta-data.ts +++ b/packages/langium/src/grammar/language-meta-data.ts @@ -7,4 +7,5 @@ export interface LanguageMetaData { languageId: string; fileExtensions: string[]; + caseInsensitive: boolean; } diff --git a/packages/langium/src/parser/langium-parser-builder.ts b/packages/langium/src/parser/langium-parser-builder.ts index 49f5c5ecf..2a72d9360 100644 --- a/packages/langium/src/parser/langium-parser-builder.ts +++ b/packages/langium/src/parser/langium-parser-builder.ts @@ -31,7 +31,7 @@ type Method = () => void; export function createLangiumParser(services: LangiumServices): LangiumParser { const grammar = services.Grammar; const tokens = new Map(); - const buildTokens = services.parser.TokenBuilder.buildTokens(grammar); + const buildTokens = services.parser.TokenBuilder.buildTokens(grammar, { caseInsensitive: services.LanguageMetaData.caseInsensitive }); buildTokens.forEach(e => { tokens.set(e.name, e); }); diff --git a/packages/langium/src/parser/token-builder.ts b/packages/langium/src/parser/token-builder.ts index f0848d291..c55564a6d 100644 --- a/packages/langium/src/parser/token-builder.ts +++ b/packages/langium/src/parser/token-builder.ts @@ -8,11 +8,11 @@ import { Lexer, TokenPattern, TokenType } from 'chevrotain'; import { terminalRegex } from '..'; import { Grammar, isKeyword, isTerminalRule, Keyword, TerminalRule } from '../grammar/generated/ast'; import { streamAllContents } from '../utils/ast-util'; -import { partialMatches } from '../utils/regex-util'; +import { getCaseInsensitivePattern, partialMatches } from '../utils/regex-util'; import { stream } from '../utils/stream'; export interface TokenBuilder { - buildTokens(grammar: Grammar): TokenType[]; + buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[]; } export class DefaultTokenBuilder implements TokenBuilder { @@ -21,7 +21,7 @@ export class DefaultTokenBuilder implements TokenBuilder { protected readonly KEYWORD_SUFFIX = '_KEYWORD'; protected readonly TERMINAL_SUFFIX = '_TERMINAL'; - buildTokens(grammar: Grammar): TokenType[] { + buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[] { const tokenMap = new Map(); const terminalsTokens: TokenType[] = []; const terminals = Array.from(stream(grammar.rules).filter(isTerminalRule)); @@ -37,7 +37,7 @@ export class DefaultTokenBuilder implements TokenBuilder { .sort((a, b) => b.value.length - a.value.length); for (const keyword of keywords) { - const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap); + const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap, !!options?.caseInsensitive); tokens.push(keywordToken); tokenMap.set(keyword.value + this.KEYWORD_SUFFIX, keywordToken); } @@ -74,13 +74,15 @@ export class DefaultTokenBuilder implements TokenBuilder { return token; } - protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map): TokenType { + protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map, caseInsensitive: boolean): TokenType { const longerAlt = this.findLongerAlt(keyword, keywords, terminals, tokenMap); - return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword), LONGER_ALT: longerAlt }; + return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword, caseInsensitive), LONGER_ALT: longerAlt }; } - protected buildKeywordPattern(keyword: Keyword): TokenPattern { - return keyword.value; + protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern { + return caseInsensitive ? + new RegExp(getCaseInsensitivePattern(keyword.value)) : + keyword.value; } protected findLongerAlt(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map): TokenType[] { diff --git a/packages/langium/src/utils/regex-util.ts b/packages/langium/src/utils/regex-util.ts index 514a539b3..09e70a350 100644 --- a/packages/langium/src/utils/regex-util.ts +++ b/packages/langium/src/utils/regex-util.ts @@ -114,6 +114,12 @@ export function escapeRegExp(value: string): string { return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); } +export function getCaseInsensitivePattern(keyword: string): string { + return Array.prototype.map.call(keyword, letter => + /\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter) + ).join(''); +} + /** * Determines whether the given input has a partial match with the specified regex. * @param regex The regex to partially match against diff --git a/packages/langium/test/parser/token-builder.test.ts b/packages/langium/test/parser/token-builder.test.ts index 42ecdf55e..553493fef 100644 --- a/packages/langium/test/parser/token-builder.test.ts +++ b/packages/langium/test/parser/token-builder.test.ts @@ -4,7 +4,7 @@ * terms of the MIT License, which is available in the project root. ******************************************************************************/ -import { TokenType } from '@chevrotain/types'; +import { TokenPattern, TokenType } from '@chevrotain/types'; import { createLangiumGrammarServices, Grammar } from '../../src'; import { parseHelper } from '../../src/test'; @@ -53,3 +53,63 @@ describe('tokenBuilder#longerAlts', () => { }); }); + +let implementPattern: TokenPattern | undefined; +let strangePattern: TokenPattern | undefined; +let abcPattern: TokenPattern | undefined; +let abPattern: TokenPattern | undefined; +let aPattern: TokenPattern | undefined; +let booleanTerminalPattern: TokenPattern | undefined; +let abTerminalPattern: TokenPattern | undefined; + +describe('tokenBuilder#caseInsensitivePattern', () => { + beforeAll(async () => { + const text = ` + grammar test + Main: 'A' 'ab' 'AbC' | Implement | '\\strange\\'; + Implement: '@implement' AB; + terminal BOOLEAN returns boolean: /true|false/; + terminal AB: /ABD?/; + `; + const grammar = (await parseHelper(grammarServices)(text)).document.parseResult.value; + const tokens = tokenBuilder.buildTokens(grammar, { caseInsensitive: true }); + const patterns = tokens.map(token => token.PATTERN); + + implementPattern = patterns[0]; + strangePattern = patterns[1]; + abcPattern = patterns[2]; + abPattern = patterns[3]; + aPattern = patterns[4]; + booleanTerminalPattern = patterns[5]; + abTerminalPattern = patterns[6]; + }); + + test('should create from keyword with special symbols', () => { + expect(implementPattern).toEqual(new RegExp(/@[iI][mM][pP][lL][eE][mM][eE][nN][tT]/)); + }); + + test('should create from keyword with special escape symbols', () => { + expect(strangePattern).toEqual(new RegExp(/\\[sS][tT][rR][aA][nN][gG][eE]\\/)); + }); + + test('should create from mixed-case word', () => { + expect(abcPattern).toEqual(new RegExp(/[aA][bB][cC]/)); + }); + + test('should create from lower-case word', () => { + expect(abPattern).toEqual(new RegExp(/[aA][bB]/)); + }); + + test('should create from upper-case word', () => { + expect(aPattern).toEqual(new RegExp(/[aA]/)); + }); + + test('should ignore terminals', () => { + expect(booleanTerminalPattern).toEqual(new RegExp(/true|false/)); + }); + + test('should ignore terminals with ?', () => { + expect(abTerminalPattern).toEqual(new RegExp(/ABD?/)); + }); + +});