diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index d4d2cd7798cb..d8d6f480e5a2 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -1,7 +1,7 @@ import logging from collections import OrderedDict from pathlib import Path -from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional +from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional, Callable import rasa.shared.data from rasa.shared.core.domain import Domain @@ -470,39 +470,40 @@ def process_training_examples_by_key( training_examples: Dict, key_name: Text, key_examples: Text, - example_extraction_predicate=lambda x: x, + example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, ) -> List[OrderedDict]: - result = [] + intents = [] - for entity_key, examples in training_examples.items(): + for intent_name, examples in training_examples.items(): converted, intent_metadata = RasaYAMLWriter._convert_training_examples( examples, example_extraction_predicate ) - next_item = OrderedDict() - next_item[key_name] = entity_key + intent = OrderedDict() + intent[key_name] = intent_name if intent_metadata: - next_item[KEY_METADATA] = intent_metadata + intent[KEY_METADATA] = intent_metadata - render_as_objects = True in [KEY_METADATA in ex for ex in converted] + render_as_objects = any(KEY_METADATA in ex for ex in converted) if render_as_objects: rendered = RasaYAMLWriter._render_training_examples_as_objects( converted ) else: rendered = RasaYAMLWriter._render_training_examples_as_text(converted) - next_item[key_examples] = rendered + intent[key_examples] = rendered - result.append(next_item) + intents.append(intent) - return result + return intents @staticmethod def _convert_training_examples( - training_examples: List[Dict], example_extraction_predicate=lambda x: x + training_examples: List[Dict], + example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, ) -> Tuple[List[Dict], Optional[Dict]]: """Returns converted training examples and potential intent metadata.""" - result = [] + converted_examples = [] intent_metadata = None for example in training_examples: @@ -521,16 +522,27 @@ def _convert_training_examples( if intent_metadata is None and METADATA_INTENT in metadata: intent_metadata = metadata[METADATA_INTENT] - result.append(converted) + converted_examples.append(converted) - return result, intent_metadata + return converted_examples, intent_metadata @staticmethod def _render_training_examples_as_objects(examples: List[Dict]) -> List[Dict]: + """Renders training examples as objects with its `text` item as a literal scalar string. + + Given the input of a single example: + {'text': 'how much CO2 will that use?'} + Its return value is a dictionary that will be rendered in YAML as: + ``` + text: | + how much CO2 will that use? + ``` + """ + def render(example: Dict) -> Dict: - value = example[KEY_INTENT_TEXT] + text = example[KEY_INTENT_TEXT] example[KEY_INTENT_TEXT] = LiteralScalarString( - TrainingDataWriter.generate_string_item(value) + TrainingDataWriter.generate_string_item(text) ) return example diff --git a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py index 9386a7ec5394..4e85cd77d0f2 100644 --- a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py +++ b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py @@ -28,6 +28,8 @@ - what's the carbon footprint of a flight from london to new york? - how much co2 to new york? - how much co2 is produced on a return flight from london to new york? + - what's the co2 usage of a return flight to new york? + - can you calculate the co2 footprint of a flight to london? """ MULTILINE_INTENT_EXAMPLE_WITH_SYNONYM = """ @@ -65,6 +67,14 @@ examples: | - Hi - Hello +- intent: goodbye + examples: + - text: | + bye + metadata: positive-sentiment + - text: | + goodbye + metadata: positive-sentiment """ @@ -152,7 +162,7 @@ def test_multiline_intent_is_parsed(example: Text): assert not len(record) - assert len(training_data.training_examples) == 5 + assert len(training_data.training_examples) == 7 assert training_data.training_examples[0].get( INTENT ) == training_data.training_examples[1].get(INTENT) @@ -167,7 +177,7 @@ def test_intent_with_metadata_is_parsed(): assert not len(record) - assert len(training_data.training_examples) == 5 + assert len(training_data.training_examples) == 7 example_1, example_2, *other_examples = training_data.training_examples assert example_1.get(METADATA) == { METADATA_INTENT: ["johnny"],