Change messages and Gherkin parser/pickle compiler to retain step key…

…word (#1741) * Expand the messages protocol with keyword types Extracted from #1741 for ease of review on request of @aurelien-reeves, implements the `messages` side of #768. * Incorporate review comments * Treat all translations associated with multiple keywords as 'General' keywordType * Incorporate latest reviews * Change the enum in the Pickle as per your last comment * Add translation of But/And to the preceeding keyword in the pickle compiler * Rename the "keywordType" field in the Pickle to `type` * Adjust Perl implementation over changed naming of keyword type General * Use Cucumber::Message's constant declarations instead of hard-coded strings * Align Perl's TokenMatcher implementation with Ruby's * Port initial Perl implementation to Ruby * Fix test failures from the latest refactoring * Add step keyword types to token and AST test data * Fix error found while fixing test data * Make sure the 'keywordType' is correctly populated Since it's legal to start a scenario or even a background with a conjunction, it's fair to say that the keyword type is unknown until a keyword with known type is encountered. * Update testdata with keyword and step types Sync to the implementations which have been adapted. * Follow Ruby's token_matcher structure in Perl And fix a syntax error using constants. * Eliminate warnings during Perl tests Based on running tests on the coverted test data, fix a warning about an undefined value being used. * Rewrite Perl's Pickle compiler to follow Ruby's structure more * Expand 'Conjunction's into their implied keyword type * Convert Perl/Ruby token matcher to pattern available in JS&Java too * Port Ruby/Perl implementation of keyword retention to Java * Sync JavaScript testdata * Progress on JS (works, except Markdown?) * Implement keywordType in Markdown Also align the Classic Gherkin implementation * Use hash lookups to get the keyword's StepKeywordType At the suggestion of @ciaranmcnulty, simplify the loop in `match_StepLine` by using a hash lookup. * Add comment as to why the StepKeywordType is overriden to UNKNOWN * Fix typo (capitalization error) * Update and sync Gherkin-In-Markdown test data * Include background steps when the scenario *has* steps * Rename 'keywordType' to 'matchedKeywordType' Change suggested by @ciaranmcnulty to align the field with the names of the other fields which are only available after the token matched. * Synchronise testdata * Adjust PickleCompiler for 'keywordType' type change Now that 'keywordType' is no longer required, it's type changed to Optional, which introduces an additional level of indirection. * Collect all applicable types per step keyword When more than one mapping exists, return the UNKNOWN step keyword type to indicate that the keyword's meaning isn't exact. * Fix temporarly json-to-messages broken test We need to upgrade json-to-messages to use ci-environment rather than the former create-meta. In the meantime to prevent being stuck, we make the test pass * Restore original jsontToMessages test * Report changes from #1972 on gherkin testdata * Add support for keyword type in go * Add StepKeywordType support to GherkinDialect * Add support of step keyword type to php * revert to c# 9 * Implement PickleStep type in php * [dotnet] try build keyword type * [python] Add support of step keyword type * hopefully fixing .NET finally... * Clarify order of operations when computing lastKeywordType * Clarify type of keywordType list * Add PR to CHANGELOG.md * Remove useless 'if' condition * Add again the check * Add more changelog entries for Go Co-authored-by: Ciaran McNulty <mail@ciaranmcnulty.com> Co-authored-by: aurelien-reeves <aurelien.reeves@smartbear.com> Co-authored-by: aurelien-reeves <aurelien.reeves@hiptest.net> Co-authored-by: Aslak Hellesøy <1000+aslakhellesoy@users.noreply.github.com> Co-authored-by: Aslak Hellesøy <aslak.hellesoy@gmail.com> Co-authored-by: Gaspar Nagy <gaspar.nagy@gmail.com> Co-authored-by: Björn Rasmusson <B.Rasmusson@computer.org>
cucumber · May 25, 2022 · 087019a · 087019a
1 parent 08e0a57
commit 087019a
Show file tree

Hide file tree

Showing 7 changed files with 49 additions and 15 deletions.
diff --git a/gherkin/ast_builder.py b/gherkin/ast_builder.py
@@ -98,6 +98,7 @@ def transform_node(self, node):
                 'id': self.id_generator.get_next_id(),
                 'location': self.get_location(step_line),
                 'keyword': step_line.matched_keyword,
+                'keywordType': step_line.matched_keyword_type,
                 'text': step_line.matched_text,
                 step_argument_type: step_argument
             })

diff --git a/gherkin/gherkin-languages.json b/gherkin/gherkin-languages.json
@@ -967,9 +967,12 @@
     ],
     "when": [
       "* ",
-      "Tha ",
-      "Þa ",
-      "Ða "
+      "Bæþsealf ",
+      "Bæþsealfa ",
+      "Bæþsealfe ",
+      "Ciricæw ",
+      "Ciricæwe ",
+      "Ciricæwa "
     ]
   },
   "en-pirate": {
@@ -2395,7 +2398,7 @@
     "and": [
       "* ",
       "र ",
-      "अनी "
+      "अनि "
     ],
     "background": [
       "पृष्ठभूमी"
@@ -3459,7 +3462,7 @@
     ],
     "given": [
       "* ",
-      "Агар "
+      "Belgilangan "
     ],
     "name": "Uzbek",
     "native": "Узбекча",

diff --git a/gherkin/pickles/compiler.py b/gherkin/pickles/compiler.py
@@ -57,9 +57,12 @@ def _compile_rule(self, uri, feature_tags, feature_background_steps, rule, langu
 
     def _compile_scenario(self, uri, inherited_tags, background_steps, scenario, language, pickles):
         tags = list(inherited_tags) + list(scenario['tags'])
+        last_keyword_type = 'Unknown'
         steps = list()
         if scenario['steps']:
-            steps.extend(self._pickle_steps(background_steps + scenario['steps']))
+            for step in background_steps + scenario['steps']:
+                last_keyword_type = last_keyword_type if step['keywordType'] == 'Conjunction' else step['keywordType']
+                steps.append(self._pickle_step(step, last_keyword_type))
 
         pickle = {
             'astNodeIds': [scenario['id']],
@@ -79,13 +82,17 @@ def _compile_scenario_outline(self, uri, inherited_tags, background_steps, scena
 
             for values in examples['tableBody']:
                 value_cells = values['cells']
+                tags = list(inherited_tags) + list(scenario['tags']) + list(examples['tags'])
+                last_keyword_type = None
                 steps = list()
                 if scenario['steps']:
-                    steps.extend(self._pickle_steps(background_steps))
-                tags = list(inherited_tags) + list(scenario['tags']) + list(examples['tags'])
+                    for step in background_steps:
+                        last_keyword_type = last_keyword_type if step['keywordType'] == 'Conjunction' else step['keywordType']
+                        steps.append(self._pickle_step(step, last_keyword_type))
 
                 if scenario['steps']:
                     for outline_step in scenario['steps']:
+                        last_keyword_type = last_keyword_type if outline_step['keywordType'] == 'Conjunction' else outline_step['keywordType']
                         step_text = self._interpolate(
                             outline_step['text'],
                             variable_cells,
@@ -97,6 +104,7 @@ def _compile_scenario_outline(self, uri, inherited_tags, background_steps, scena
                         _pickle_step = {
                             'astNodeIds': [outline_step['id'], values['id']],
                             'id': self.id_generator.get_next_id(),
+                            'type': last_keyword_type,
                             'text': step_text
                         }
                         if argument is not None:
@@ -154,13 +162,11 @@ def _interpolate(self, name, variable_cells, value_cells):
             )
         return name
 
-    def _pickle_steps(self, steps):
-        return [self._pickle_step(step)for step in steps]
-
-    def _pickle_step(self, step):
+    def _pickle_step(self, step, keyword_type):
         pickle_step = {
             'astNodeIds': [step['id']],
             'id': self.id_generator.get_next_id(),
+            'type': keyword_type,
             'text': step['text'],
         }
         argument = self._create_pickle_arguments(

diff --git a/gherkin/token_formatter_builder.py b/gherkin/token_formatter_builder.py
@@ -30,7 +30,12 @@ def _format_token(self, token):
             ')',
             token.matched_type,
             ':',
-            (token.matched_keyword if token.matched_keyword else ''),
+            ''.join([
+                '(',
+                token.matched_keyword_type if token.matched_keyword_type else '',
+                ')',
+                token.matched_keyword
+            ]) if token.matched_keyword else '',
             '/',
             (token.matched_text if token.matched_text else ""),
             '/',

diff --git a/gherkin/token_matcher.py b/gherkin/token_matcher.py
@@ -1,4 +1,5 @@
 import re
+from collections import defaultdict
 from .dialect import Dialect
 from .errors import NoSuchLanguageException
 
@@ -49,7 +50,12 @@ def match_StepLine(self, token):
                     self.dialect.but_keywords)
         for keyword in (k for k in keywords if token.line.startswith(k)):
             title = token.line.get_rest_trimmed(len(keyword))
-            self._set_token_matched(token, 'StepLine', title, keyword)
+            keyword_types = self.keyword_types[keyword]
+            if len(keyword_types) == 1:
+                keyword_type = keyword_types[0]
+            else:
+                keyword_type = 'Unknown'
+            self._set_token_matched(token, 'StepLine', title, keyword, keyword_type=keyword_type)
             return True
 
         return False
@@ -134,13 +140,14 @@ def _match_title_line(self, token, token_type, keywords):
         return False
 
     def _set_token_matched(self, token, matched_type, text=None,
-                           keyword=None, indent=None, items=None):
+                           keyword=None, keyword_type=None, indent=None, items=None):
         if items is None:
             items = []
         token.matched_type = matched_type
         # text == '' should not result in None
         token.matched_text = text.rstrip('\r\n') if text is not None else None
         token.matched_keyword = keyword
+        token.matched_keyword_type = keyword_type
         if indent is not None:
             token.matched_indent = indent
         else:
@@ -156,6 +163,15 @@ def _change_dialect(self, dialect_name, location=None):
 
         self.dialect_name = dialect_name
         self.dialect = dialect
+        self.keyword_types = defaultdict(list)
+        for keyword in self.dialect.given_keywords:
+            self.keyword_types[keyword].append('Context')
+        for keyword in self.dialect.when_keywords:
+            self.keyword_types[keyword].append('Action')
+        for keyword in self.dialect.then_keywords:
+            self.keyword_types[keyword].append('Outcome')
+        for keyword in self.dialect.and_keywords + self.dialect.but_keywords:
+            self.keyword_types[keyword].append('Conjunction')
 
     def _unescaped_docstring(self, text):
         if self._active_doc_string_separator == '"""':

diff --git a/test/gherkin_test.py b/test/gherkin_test.py
@@ -60,6 +60,7 @@ def test_parse_feature_after_parser_error():
             'text': u'x',
             'location': {'column': 5, 'line': 3},
             'keyword': u'Given ',
+            'keywordType': u'Context',
             'docString': {
                 'content': u'closed docstring',
                 'delimiter': '"""',

diff --git a/test/pickles_test/compiler_test.py b/test/pickles_test/compiler_test.py
@@ -34,6 +34,7 @@ def test_compiles_a_scenario():
               {
                 "id": "2",
                 "astNodeIds": ["0"],
+                "type": "Context",
                 "text": "passing"
               }
             ],
@@ -73,6 +74,7 @@ def test_compiles_a_scenario_outline_with_i18n_characters():
               {
                 "id": "5",
                 "astNodeIds": ["0", "2"],
+                "type": "Context",
                 "text": "passing"
               }
             ],