Implement error reporting in monaco for painless language (#84695)

elastic · Dec 8, 2020 · f8edc51 · f8edc51
1 parent 5cc9bf8
commit f8edc51
Show file tree

Hide file tree

Showing 28 changed files with 9,075 additions and 16 deletions.
diff --git a/.eslintignore b/.eslintignore
@@ -44,3 +44,4 @@ snapshots.js
 /packages/kbn-ui-framework/doc_site/build
 /packages/kbn-ui-framework/generator-kui/*/templates/
 /packages/kbn-ui-shared-deps/flot_charts
+/packages/kbn-monaco/src/painless/antlr
diff --git a/package.json b/package.json
@@ -157,6 +157,7 @@
     "angular-resource": "1.8.0",
     "angular-sanitize": "^1.8.0",
     "angular-ui-ace": "0.2.3",
+    "antlr4ts": "^0.5.0-alpha.3",
     "apollo-cache-inmemory": "1.6.2",
     "apollo-client": "^2.3.8",
     "apollo-link-http": "^1.5.16",
@@ -576,6 +577,7 @@
     "angular-recursion": "^1.0.5",
     "angular-route": "^1.8.0",
     "angular-sortable-view": "^0.0.17",
+    "antlr4ts-cli": "^0.5.0-alpha.3",
     "apidoc": "^0.25.0",
     "apidoc-markdown": "^5.1.8",
     "apollo-link": "^1.2.3",

diff --git a/packages/kbn-monaco/package.json b/packages/kbn-monaco/package.json
@@ -6,7 +6,8 @@
   "license": "Apache-2.0",
   "scripts": {
     "build": "node ./scripts/build.js",
-    "kbn:bootstrap": "yarn build --dev"
+    "kbn:bootstrap": "yarn build --dev",
+    "build:antlr4ts": "../../node_modules/antlr4ts-cli/antlr4ts ./src/painless/antlr/painless_lexer.g4 ./src/painless/antlr/painless_parser.g4 && node ./scripts/fix_generated_antlr.js"
   },
   "devDependencies": {
     "@kbn/babel-preset": "link:../kbn-babel-preset",
@@ -15,4 +16,4 @@
   "dependencies": {
     "@kbn/i18n": "link:../kbn-i18n"
   }
-}
+}
diff --git a/packages/kbn-monaco/scripts/fix_generated_antlr.js b/packages/kbn-monaco/scripts/fix_generated_antlr.js
@@ -0,0 +1,66 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+const { join } = require('path');
+const { readdirSync, readFileSync, writeFileSync, renameSync } = require('fs');
+const ora = require('ora');
+
+const generatedAntlrFolder = join(__dirname, '..', 'src', 'painless', 'antlr');
+
+const generatedAntlrFolderContents = readdirSync(generatedAntlrFolder);
+
+const log = ora('Updating generated antlr grammar').start();
+
+// The generated TS produces some TS linting errors
+// This script adds a //@ts-nocheck comment at the top of each generated file
+// so that the errors can be ignored for now
+generatedAntlrFolderContents
+  .filter((file) => {
+    const fileExtension = file.split('.')[1];
+    return fileExtension.includes('ts');
+  })
+  .forEach((file) => {
+    try {
+      const fileContentRows = readFileSync(join(generatedAntlrFolder, file), 'utf8')
+        .toString()
+        .split('\n');
+
+      fileContentRows.unshift('// @ts-nocheck');
+
+      const filePath = join(generatedAntlrFolder, file);
+      const fileContent = fileContentRows.join('\n');
+
+      writeFileSync(filePath, fileContent, { encoding: 'utf8' });
+    } catch (err) {
+      return log.fail(err.message);
+    }
+  });
+
+// Rename generated parserListener file to snakecase to satisfy file casing check
+// There doesn't appear to be a way to fix this OOTB with antlr4ts-cli
+try {
+  renameSync(
+    join(generatedAntlrFolder, 'painless_parserListener.ts'),
+    join(generatedAntlrFolder, 'painless_parser_listener.ts')
+  );
+} catch (err) {
+  log.warn(`Unable to rename parserListener file to snakecase: ${err.message}`);
+}
+
+log.succeed('Updated generated antlr grammar successfully');
diff --git a/packages/kbn-monaco/src/painless/README.md b/packages/kbn-monaco/src/painless/README.md
@@ -8,7 +8,7 @@ This folder contains the language definitions for Painless used by the Monaco ed
 
 Initializes the worker proxy service when the Painless language is first needed. It also exports the [suggestion provider](https://microsoft.github.io/monaco-editor/api/interfaces/monaco.languages.completionitemprovider.html) needed for autocompletion.
 
-### ./services
+### ./lib
 This directory exports two services:
 
 1. Worker proxy: Responsible for holding a reference to the Monaco-provided proxy getter.
@@ -32,12 +32,15 @@ Contains the Monarch-specific language tokenization rules for Painless.
 
 ### ./worker
 
-The worker proxy and worker instantiation code used in both the main thread and the worker thread. The logic for providing autocomplete suggestions resides here.
+The worker proxy and worker instantiation code used in both the main thread and the worker thread. The logic for providing autocomplete suggestions and error reporting resides here.
 
 ### ./autocomplete_definitions
 
 This directory is generated by a script and should not be changed manually. Read [Updating autocomplete definitions](#updating-autocomplete-definitions) for more information.
 
+### ./antlr
+This directory contains the Painless lexer and grammar rules, as well as the generated Typescript code. Read [Compiling ANTLR](#compiling-ANTLR) for more information.
+
 ## Example usage
 
 ```
@@ -102,4 +105,20 @@ node scripts/generate_autocomplete --branch <branch_name>
   - `score`
   - `string_script_field_script_field`
 
-To add additional contexts, edit the `supportedContexts` constant in `kbn-monaco/scripts/constants.js`.
+To add additional contexts, edit the `supportedContexts` constant in `kbn-monaco/scripts/constants.js`.
+
+## Compiling ANTLR
+
+[ANTLR](https://www.antlr.org/) generates lexical and syntax errors out of the box, which we can use to set error markers in monaco. 
+
+Elasticsearch has defined [lexer and parser grammar](https://github.com/elastic/elasticsearch/tree/master/modules/lang-painless/src/main/antlr) for the Painless language. For now, these rules have been largely copied from ES to Kibana and reside in the `antlr` directory with the `.g4` file extension. We then use [antlr4ts](https://github.com/tunnelvisionlabs/antlr4ts) to generate a lexer and a parser in Typescript.
+
+To regenerate the lexer and parser, run the following script:
+
+```
+npm run build:antlr4ts
+```
+
+*Note:* This script should only need to be run if a change has been made to `painless_lexer.g4` or `painless_parser.g4`.
+
+*Note:* There is a manual change made to the `sempred()` method in the generated `painless_lexer.ts`. This needs further investigation, but it appears there is an offset between the rule index and the token value. Without this manual change, ANTLR incorrectly reports an error when using a `/` or regex in a script. There is a comment in the generated code to this effect.
diff --git a/packages/kbn-monaco/src/painless/antlr/painless_lexer.g4 b/packages/kbn-monaco/src/painless/antlr/painless_lexer.g4
@@ -0,0 +1,122 @@
+/*
+ * Licensed to Elasticsearch B.V. under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch B.V. licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+lexer grammar painless_lexer;
+
+WS: [ \t\n\r]+ -> skip;
+COMMENT: ( '//' .*? [\n\r] | '/*' .*? '*/' ) -> skip;
+
+LBRACK:    '{';
+RBRACK:    '}';
+LBRACE:    '[';
+RBRACE:    ']';
+LP:        '(';
+RP:        ')';
+// We switch modes after a dot to ensure there are not conflicts
+// between shortcuts and decimal values.  Without the mode switch
+// shortcuts such as id.0.0 will fail because 0.0 will be interpreted
+// as a decimal value instead of two individual list-style shortcuts.
+DOT:       '.'  -> mode(AFTER_DOT);
+NSDOT:     '?.' -> mode(AFTER_DOT);
+COMMA:     ',';
+SEMICOLON: ';';
+IF:        'if';
+IN:        'in';
+ELSE:      'else';
+WHILE:     'while';
+DO:        'do';
+FOR:       'for';
+CONTINUE:  'continue';
+BREAK:     'break';
+RETURN:    'return';
+NEW:       'new';
+TRY:       'try';
+CATCH:     'catch';
+THROW:     'throw';
+THIS:      'this';
+INSTANCEOF: 'instanceof';
+
+BOOLNOT: '!';
+BWNOT:   '~';
+MUL:     '*';
+DIV:     '/' { this.isSlashRegex() == false }?;
+REM:     '%';
+ADD:     '+';
+SUB:     '-';
+LSH:     '<<';
+RSH:     '>>';
+USH:     '>>>';
+LT:      '<';
+LTE:     '<=';
+GT:      '>';
+GTE:     '>=';
+EQ:      '==';
+EQR:     '===';
+NE:      '!=';
+NER:     '!==';
+BWAND:   '&';
+XOR:     '^';
+BWOR:    '|';
+BOOLAND: '&&';
+BOOLOR:  '||';
+COND:    '?';
+COLON:   ':';
+ELVIS:   '?:';
+REF:     '::';
+ARROW:   '->';
+FIND:    '=~';
+MATCH:   '==~';
+INCR:    '++';
+DECR:    '--';
+
+ASSIGN: '=';
+AADD:   '+=';
+ASUB:   '-=';
+AMUL:   '*=';
+ADIV:   '/=';
+AREM:   '%=';
+AAND:   '&=';
+AXOR:   '^=';
+AOR:    '|=';
+ALSH:   '<<=';
+ARSH:   '>>=';
+AUSH:   '>>>=';
+
+OCTAL: '0' [0-7]+ [lL]?;
+HEX: '0' [xX] [0-9a-fA-F]+ [lL]?;
+INTEGER: ( '0' | [1-9] [0-9]* ) [lLfFdD]?;
+DECIMAL: ( '0' | [1-9] [0-9]* ) (DOT [0-9]+)? ( [eE] [+\-]? [0-9]+ )? [fFdD]?;
+
+STRING: ( '"' ( '\\"' | '\\\\' | ~[\\"] )*? '"' ) | ( '\'' ( '\\\'' | '\\\\' | ~[\\'] )*? '\'' );
+REGEX: '/' ( '\\' ~'\n' | ~('/' | '\n') )+? '/' [cilmsUux]* { this.isSlashRegex() }?;
+
+TRUE:  'true';
+FALSE: 'false';
+
+NULL: 'null';
+
+PRIMITIVE: 'boolean' | 'byte' | 'short' | 'char' | 'int' | 'long' | 'float' | 'double';
+DEF: 'def';
+
+ID: [_a-zA-Z] [_a-zA-Z0-9]*;
+
+mode AFTER_DOT;
+
+DOTINTEGER: ( '0' | [1-9] [0-9]* ) -> mode(DEFAULT_MODE);
+DOTID: [_a-zA-Z] [_a-zA-Z0-9]*     -> mode(DEFAULT_MODE);