Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add case-insensitive configuration for keywords parsing #316

Merged
merged 13 commits into from
Dec 13, 2021
8 changes: 4 additions & 4 deletions examples/arithmetics/example/example.calc
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
module example1
Module example1

def y: 1 + 3 - 99828932 / 2 + 2 - 1;
Def y: 1 + 3 - 99828932 / 2 + 2 - 1;

def x: 12 / 3 - 1;
DEF x: 12 / 3 - 1;

x * 2 - 4;

def t: 4;

def func(t, x):
DEF func(t, x):
t * t * t + x;

func(t, x);
Expand Down
1 change: 1 addition & 0 deletions examples/arithmetics/langium-config.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
"id": "arithmetics",
"grammar": "src/language-server/arithmetics.langium",
"fileExtensions": [".calc"],
"caseInsensitive": true,
"textMate": {
"out": "syntaxes/arithmetics.tmLanguage.json"
}
Expand Down
3 changes: 2 additions & 1 deletion examples/arithmetics/src/language-server/generated/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { ArithmeticsGrammar } from './grammar';

export const ArithmeticsLanguageMetaData: LanguageMetaData = {
languageId: 'arithmetics',
fileExtensions: ['.calc']
fileExtensions: ['.calc'],
caseInsensitive: true
};

export const ArithmeticsGeneratedSharedModule: Module<LangiumSharedServices, LangiumGeneratedSharedServices> = {
Expand Down
2 changes: 1 addition & 1 deletion examples/arithmetics/syntaxes/arithmetics.tmLanguage.json
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
},
{
"name": "keyword.control.arithmetics",
"match": "\\b(def|module)\\b"
"match": "\\b([dD][eE][fF]|[mM][oO][dD][uU][lL][eE])\\b"
}
],
"repository": {
Expand Down
3 changes: 2 additions & 1 deletion examples/domainmodel/src/language-server/generated/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { DomainModelGrammar } from './grammar';

export const DomainModelLanguageMetaData: LanguageMetaData = {
languageId: 'domain-model',
fileExtensions: ['.dmodel']
fileExtensions: ['.dmodel'],
caseInsensitive: false
};

export const parserConfig: IParserConfig = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ import { StatemachineGrammar } from './grammar';

export const StatemachineLanguageMetaData: LanguageMetaData = {
languageId: 'statemachine',
fileExtensions: ['.statemachine']
fileExtensions: ['.statemachine'],
caseInsensitive: false
};

export const StatemachineGeneratedSharedModule: Module<LangiumSharedServices, LangiumGeneratedSharedServices> = {
Expand Down
4 changes: 4 additions & 0 deletions packages/langium-cli/langium-config-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@
}
]
},
"caseInsensitive": {
"description": "Enable case-insensitive keywords parsing",
"type": "boolean"
},
"textMate": {
"description": "An object to describe the textMate grammar properties",
"type": "object",
Expand Down
3 changes: 2 additions & 1 deletion packages/langium-cli/src/generator/module-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ export function generateModule(grammars: langium.Grammar[], config: LangiumConfi
node.append('export const ', grammar.name, 'LanguageMetaData: LanguageMetaData = {', NL);
node.indent(metaData => {
metaData.append(`languageId: '${config.id}',`, NL);
metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}]`, NL);
metaData.append(`fileExtensions: [${config.fileExtensions && config.fileExtensions.map(e => appendQuotesAndDot(e)).join(', ')}],`, NL);
metaData.append(`caseInsensitive: ${!!config.caseInsensitive}`, NL);
});
node.append('};', NL, NL);
}
Expand Down
16 changes: 8 additions & 8 deletions packages/langium-cli/src/generator/textmate-generator.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
******************************************************************************/

import * as langium from 'langium';
import { escapeRegExp, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium';
import { escapeRegExp, getCaseInsensitivePattern, getTerminalParts, isCommentTerminal, isTerminalRule, terminalRegex } from 'langium';
import { LangiumLanguageConfig } from '../package';
import { collectKeywords } from './util';

Expand Down Expand Up @@ -117,15 +117,14 @@ function getRepository(grammar: langium.Grammar, config: LangiumLanguageConfig):
function getControlKeywords(grammar: langium.Grammar, pack: LangiumLanguageConfig): Pattern {
const regex = /[A-Za-z]/;
const controlKeywords = collectKeywords(grammar).filter(kw => regex.test(kw));
const keywords = controlKeywords.map(escapeRegExp);
const groups = groupKeywords(keywords);
const groups = groupKeywords(controlKeywords, pack.caseInsensitive);
return {
'name': `keyword.control.${pack.id}`,
'match': groups.join('|')
};
}

function groupKeywords(keywords: string[]): string[] {
function groupKeywords(keywords: string[], caseInsensitive: boolean | undefined): string[] {
const groups: {
letter: string[],
leftSpecial: string[],
Expand All @@ -134,17 +133,18 @@ function groupKeywords(keywords: string[]): string[] {
} = {letter: [], leftSpecial: [], rightSpecial: [], special: []};

keywords.forEach(keyword => {
const keywordPattern = caseInsensitive ? getCaseInsensitivePattern(keyword) : escapeRegExp(keyword);
if (/\w/.test(keyword[0])) {
if (/\w/.test(keyword[keyword.length - 1])) {
groups.letter.push(keyword);
groups.letter.push(keywordPattern);
} else {
groups.rightSpecial.push(keyword);
groups.rightSpecial.push(keywordPattern);
}
} else {
if ((/\w/).test(keyword[keyword.length - 1])) {
groups.leftSpecial.push(keyword);
groups.leftSpecial.push(keywordPattern);
} else {
groups.special.push(keyword);
groups.special.push(keywordPattern);
}
}
});
Expand Down
2 changes: 2 additions & 0 deletions packages/langium-cli/src/package.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export interface LangiumLanguageConfig {
grammar: string
/** File extensions with leading `.` */
fileExtensions?: string[]
/** Enable case-insensitive keywords parsing */
caseInsensitive?: boolean
/** Enable generating a TextMate syntax highlighting file */
textMate?: {
/** Output path to syntax highlighting file */
Expand Down
3 changes: 2 additions & 1 deletion packages/langium/src/grammar/generated/module.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ import { LangiumGrammarGrammar } from './grammar';

export const LangiumGrammarLanguageMetaData: LanguageMetaData = {
languageId: 'langium',
fileExtensions: ['.langium']
fileExtensions: ['.langium'],
caseInsensitive: false
};

export const LangiumGrammarGeneratedSharedModule: Module<LangiumSharedServices, LangiumGeneratedSharedServices> = {
Expand Down
1 change: 1 addition & 0 deletions packages/langium/src/grammar/language-meta-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,5 @@
export interface LanguageMetaData {
languageId: string;
fileExtensions: string[];
caseInsensitive: boolean;
}
2 changes: 1 addition & 1 deletion packages/langium/src/parser/langium-parser-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ type Method = () => void;
export function createLangiumParser(services: LangiumServices): LangiumParser {
const grammar = services.Grammar;
const tokens = new Map<string, TokenType>();
const buildTokens = services.parser.TokenBuilder.buildTokens(grammar);
const buildTokens = services.parser.TokenBuilder.buildTokens(grammar, { caseInsensitive: services.LanguageMetaData.caseInsensitive });
buildTokens.forEach(e => {
tokens.set(e.name, e);
});
Expand Down
18 changes: 10 additions & 8 deletions packages/langium/src/parser/token-builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ import { Lexer, TokenPattern, TokenType } from 'chevrotain';
import { terminalRegex } from '..';
import { Grammar, isKeyword, isTerminalRule, Keyword, TerminalRule } from '../grammar/generated/ast';
import { streamAllContents } from '../utils/ast-util';
import { partialMatches } from '../utils/regex-util';
import { getCaseInsensitivePattern, partialMatches } from '../utils/regex-util';
import { stream } from '../utils/stream';

export interface TokenBuilder {
buildTokens(grammar: Grammar): TokenType[];
buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[];
}

export class DefaultTokenBuilder implements TokenBuilder {
Expand All @@ -21,7 +21,7 @@ export class DefaultTokenBuilder implements TokenBuilder {
protected readonly KEYWORD_SUFFIX = '_KEYWORD';
protected readonly TERMINAL_SUFFIX = '_TERMINAL';

buildTokens(grammar: Grammar): TokenType[] {
buildTokens(grammar: Grammar, options?: { caseInsensitive?: boolean }): TokenType[] {
const tokenMap = new Map<string, TokenType>();
const terminalsTokens: TokenType[] = [];
const terminals = Array.from(stream(grammar.rules).filter(isTerminalRule));
Expand All @@ -37,7 +37,7 @@ export class DefaultTokenBuilder implements TokenBuilder {
.sort((a, b) => b.value.length - a.value.length);

for (const keyword of keywords) {
const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap);
const keywordToken = this.buildKeywordToken(keyword, keywords, terminals, tokenMap, !!options?.caseInsensitive);
tokens.push(keywordToken);
tokenMap.set(keyword.value + this.KEYWORD_SUFFIX, keywordToken);
}
Expand Down Expand Up @@ -74,13 +74,15 @@ export class DefaultTokenBuilder implements TokenBuilder {
return token;
}

protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map<string, TokenType>): TokenType {
protected buildKeywordToken(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map<string, TokenType>, caseInsensitive: boolean): TokenType {
const longerAlt = this.findLongerAlt(keyword, keywords, terminals, tokenMap);
return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword), LONGER_ALT: longerAlt };
return { name: keyword.value, PATTERN: this.buildKeywordPattern(keyword, caseInsensitive), LONGER_ALT: longerAlt };
}

protected buildKeywordPattern(keyword: Keyword): TokenPattern {
return keyword.value;
protected buildKeywordPattern(keyword: Keyword, caseInsensitive: boolean): TokenPattern {
return caseInsensitive ?
new RegExp(getCaseInsensitivePattern(keyword.value)) :
keyword.value;
}

protected findLongerAlt(keyword: Keyword, keywords: Keyword[], terminals: TerminalRule[], tokenMap: Map<string, TokenType>): TokenType[] {
Expand Down
6 changes: 6 additions & 0 deletions packages/langium/src/utils/regex-util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,12 @@ export function escapeRegExp(value: string): string {
return value.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}

export function getCaseInsensitivePattern(keyword: string): string {
return Array.prototype.map.call(keyword, letter =>
/\w/.test(letter) ? `[${letter.toLowerCase()}${letter.toUpperCase()}]` : escapeRegExp(letter)
).join('');
}

/**
* Determines whether the given input has a partial match with the specified regex.
* @param regex The regex to partially match against
Expand Down
62 changes: 61 additions & 1 deletion packages/langium/test/parser/token-builder.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* terms of the MIT License, which is available in the project root.
******************************************************************************/

import { TokenType } from '@chevrotain/types';
import { TokenPattern, TokenType } from '@chevrotain/types';
import { createLangiumGrammarServices, Grammar } from '../../src';
import { parseHelper } from '../../src/test';

Expand Down Expand Up @@ -53,3 +53,63 @@ describe('tokenBuilder#longerAlts', () => {
});

});

let implementPattern: TokenPattern | undefined;
let strangePattern: TokenPattern | undefined;
let abcPattern: TokenPattern | undefined;
let abPattern: TokenPattern | undefined;
let aPattern: TokenPattern | undefined;
let booleanTerminalPattern: TokenPattern | undefined;
let abTerminalPattern: TokenPattern | undefined;

describe('tokenBuilder#caseInsensitivePattern', () => {
beforeAll(async () => {
const text = `
grammar test
Main: 'A' 'ab' 'AbC' | Implement | '\\strange\\';
Implement: '@implement' AB;
terminal BOOLEAN returns boolean: /true|false/;
terminal AB: /ABD?/;
`;
const grammar = (await parseHelper<Grammar>(grammarServices)(text)).document.parseResult.value;
const tokens = tokenBuilder.buildTokens(grammar, { caseInsensitive: true });
const patterns = tokens.map(token => token.PATTERN);

implementPattern = patterns[0];
strangePattern = patterns[1];
abcPattern = patterns[2];
abPattern = patterns[3];
aPattern = patterns[4];
booleanTerminalPattern = patterns[5];
abTerminalPattern = patterns[6];
});

test('should create from keyword with special symbols', () => {
expect(implementPattern).toEqual(new RegExp(/@[iI][mM][pP][lL][eE][mM][eE][nN][tT]/));
});

test('should create from keyword with special escape symbols', () => {
expect(strangePattern).toEqual(new RegExp(/\\[sS][tT][rR][aA][nN][gG][eE]\\/));
});

test('should create from mixed-case word', () => {
expect(abcPattern).toEqual(new RegExp(/[aA][bB][cC]/));
});

test('should create from lower-case word', () => {
expect(abPattern).toEqual(new RegExp(/[aA][bB]/));
});

test('should create from upper-case word', () => {
expect(aPattern).toEqual(new RegExp(/[aA]/));
});

test('should ignore terminals', () => {
expect(booleanTerminalPattern).toEqual(new RegExp(/true|false/));
});

test('should ignore terminals with ?', () => {
expect(abTerminalPattern).toEqual(new RegExp(/ABD?/));
});

});