diff --git a/changelog/7731.improvement.md b/changelog/7731.improvement.md new file mode 100644 index 000000000000..b0f2bca89db2 --- /dev/null +++ b/changelog/7731.improvement.md @@ -0,0 +1,2 @@ +Add support for in `RasaYAMLWriter` for writing intent and example metadata back +into NLU YAML files. diff --git a/rasa/shared/nlu/training_data/formats/rasa_yaml.py b/rasa/shared/nlu/training_data/formats/rasa_yaml.py index ce254b109b01..d8d6f480e5a2 100644 --- a/rasa/shared/nlu/training_data/formats/rasa_yaml.py +++ b/rasa/shared/nlu/training_data/formats/rasa_yaml.py @@ -1,18 +1,20 @@ import logging from collections import OrderedDict from pathlib import Path -from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional +from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional, Callable import rasa.shared.data from rasa.shared.core.domain import Domain from rasa.shared.exceptions import YamlException from rasa.shared.utils import validation from ruamel.yaml import StringIO +from ruamel.yaml.scalarstring import LiteralScalarString from rasa.shared.constants import ( DOCS_URL_TRAINING_DATA, LATEST_TRAINING_DATA_FORMAT_VERSION, ) +from rasa.shared.nlu.constants import METADATA_INTENT, METADATA_EXAMPLE from rasa.shared.nlu.training_data.formats.readerwriter import ( TrainingDataReader, TrainingDataWriter, @@ -468,23 +470,87 @@ def process_training_examples_by_key( training_examples: Dict, key_name: Text, key_examples: Text, - example_extraction_predicate=lambda x: x, + example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, ) -> List[OrderedDict]: - from ruamel.yaml.scalarstring import LiteralScalarString + intents = [] - result = [] - for entity_key, examples in training_examples.items(): + for intent_name, examples in training_examples.items(): + converted, intent_metadata = RasaYAMLWriter._convert_training_examples( + examples, example_extraction_predicate + ) + + intent = OrderedDict() + intent[key_name] = intent_name + if intent_metadata: + intent[KEY_METADATA] = intent_metadata - converted_examples = [ - TrainingDataWriter.generate_list_item( - example_extraction_predicate(example).strip(STRIP_SYMBOLS) + render_as_objects = any(KEY_METADATA in ex for ex in converted) + if render_as_objects: + rendered = RasaYAMLWriter._render_training_examples_as_objects( + converted ) - for example in examples - ] + else: + rendered = RasaYAMLWriter._render_training_examples_as_text(converted) + intent[key_examples] = rendered - next_item = OrderedDict() - next_item[key_name] = entity_key - next_item[key_examples] = LiteralScalarString("".join(converted_examples)) - result.append(next_item) + intents.append(intent) - return result + return intents + + @staticmethod + def _convert_training_examples( + training_examples: List[Dict], + example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x, + ) -> Tuple[List[Dict], Optional[Dict]]: + """Returns converted training examples and potential intent metadata.""" + converted_examples = [] + intent_metadata = None + + for example in training_examples: + converted = { + KEY_INTENT_TEXT: example_extraction_predicate(example).strip( + STRIP_SYMBOLS + ) + } + + if isinstance(example, dict) and KEY_METADATA in example: + metadata = example[KEY_METADATA] + + if METADATA_EXAMPLE in metadata: + converted[KEY_METADATA] = metadata[METADATA_EXAMPLE] + + if intent_metadata is None and METADATA_INTENT in metadata: + intent_metadata = metadata[METADATA_INTENT] + + converted_examples.append(converted) + + return converted_examples, intent_metadata + + @staticmethod + def _render_training_examples_as_objects(examples: List[Dict]) -> List[Dict]: + """Renders training examples as objects with its `text` item as a literal scalar string. + + Given the input of a single example: + {'text': 'how much CO2 will that use?'} + Its return value is a dictionary that will be rendered in YAML as: + ``` + text: | + how much CO2 will that use? + ``` + """ + + def render(example: Dict) -> Dict: + text = example[KEY_INTENT_TEXT] + example[KEY_INTENT_TEXT] = LiteralScalarString( + TrainingDataWriter.generate_string_item(text) + ) + return example + + return [render(ex) for ex in examples] + + @staticmethod + def _render_training_examples_as_text(examples: List[Dict]) -> List[Text]: + def render(example: Dict) -> Text: + return TrainingDataWriter.generate_list_item(example[KEY_INTENT_TEXT]) + + return LiteralScalarString("".join([render(example) for example in examples])) diff --git a/rasa/shared/nlu/training_data/formats/readerwriter.py b/rasa/shared/nlu/training_data/formats/readerwriter.py index 2a538833b929..5e2bba93fe89 100644 --- a/rasa/shared/nlu/training_data/formats/readerwriter.py +++ b/rasa/shared/nlu/training_data/formats/readerwriter.py @@ -69,8 +69,12 @@ def prepare_training_examples(training_data: "TrainingData") -> OrderedDict: @staticmethod def generate_list_item(text: Text) -> Text: """Generates text for a list item.""" + return f"- {TrainingDataWriter.generate_string_item(text)}" - return f"- {rasa.shared.nlu.training_data.util.encode_string(text)}\n" + @staticmethod + def generate_string_item(text: Text) -> Text: + """Generates text for a string item.""" + return f"{rasa.shared.nlu.training_data.util.encode_string(text)}\n" @staticmethod def generate_message(message: Dict[Text, Any]) -> Text: diff --git a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py index 7369d86c58fc..6749f82a4a88 100644 --- a/tests/shared/nlu/training_data/formats/test_rasa_yaml.py +++ b/tests/shared/nlu/training_data/formats/test_rasa_yaml.py @@ -19,13 +19,17 @@ ) -MULTILINE_INTENT_EXAMPLES = f""" -version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" +MULTILINE_INTENT_EXAMPLES = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" nlu: - intent: intent_name examples: | - how much CO2 will that use? - how much carbon will a one way flight from [new york]{{"entity": "city", "role": "from"}} to california produce? + - what's the carbon footprint of a flight from london to new york? + - how much co2 to new york? + - how much co2 is produced on a return flight from london to new york? + - what's the co2 usage of a return flight to new york? + - can you calculate the co2 footprint of a flight to london? """ MULTILINE_INTENT_EXAMPLE_WITH_SYNONYM = """ @@ -43,7 +47,7 @@ - how much carbon will a one way flight from [new york]{"entity": "city", "role": "from"} to california produce? """ -INTENT_EXAMPLES_WITH_METADATA = """ +INTENT_EXAMPLES_WITH_METADATA = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}" nlu: - intent: intent_name metadata: @@ -54,9 +58,26 @@ metadata: sentiment: positive - text: | - how much carbon will a one way flight from [new york]{"entity": "city", "role": "from"} to california produce? + how much carbon will a one way flight from [new york]{{"entity": "city", "role": "from"}} to california produce? + metadata: co2-trip-calculation + - text: | + how much CO2 to [new york]{{"entity": "city", "role": "to"}}? +- intent: greet + metadata: initiate-conversation + examples: | + - Hi + - Hello +- intent: goodbye + examples: + - text: | + bye + metadata: positive-sentiment + - text: | + goodbye + metadata: positive-sentiment """ + MINIMAL_VALID_EXAMPLE = """ nlu:\n stories: @@ -141,7 +162,7 @@ def test_multiline_intent_is_parsed(example: Text): assert not len(record) - assert len(training_data.training_examples) == 2 + assert len(training_data.training_examples) == 7 assert training_data.training_examples[0].get( INTENT ) == training_data.training_examples[1].get(INTENT) @@ -156,13 +177,40 @@ def test_intent_with_metadata_is_parsed(): assert not len(record) - assert len(training_data.training_examples) == 2 - example_1, example_2 = training_data.training_examples + assert len(training_data.training_examples) == 7 + example_1, example_2, *other_examples = training_data.training_examples assert example_1.get(METADATA) == { METADATA_INTENT: ["johnny"], METADATA_EXAMPLE: {"sentiment": "positive"}, } - assert example_2.get(METADATA) == {METADATA_INTENT: ["johnny"]} + assert example_2.get(METADATA) == { + METADATA_INTENT: ["johnny"], + METADATA_EXAMPLE: "co2-trip-calculation", + } + + +def test_metadata_roundtrip(): + reader = RasaYAMLReader() + result = reader.reads(INTENT_EXAMPLES_WITH_METADATA) + + dumped = RasaYAMLWriter().dumps(result) + assert dumped == INTENT_EXAMPLES_WITH_METADATA + + validation_reader = RasaYAMLReader() + dumped_result = validation_reader.reads(dumped) + + assert dumped_result.training_examples == result.training_examples + + +def test_write_metadata_stripped(): + reader = RasaYAMLReader() + result = reader.reads(INTENT_EXAMPLES_WITH_METADATA) + + # Add strippable characters to first example text + result.training_examples[0].data["text"] += " \r\n " + + dumped = RasaYAMLWriter().dumps(result) + assert dumped == INTENT_EXAMPLES_WITH_METADATA # This test would work only with examples that have a `version` key specified