Skip to content

Commit

Permalink
Merge pull request #7761 from RasaHQ/7731/dump-nlu-metadata
Browse files Browse the repository at this point in the history
Support for writing NLU intent/example metadata to YAML
  • Loading branch information
rasabot authored Jan 25, 2021
2 parents 149c19f + ca52ad8 commit fb668ca
Show file tree
Hide file tree
Showing 4 changed files with 144 additions and 24 deletions.
2 changes: 2 additions & 0 deletions changelog/7731.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add support for in `RasaYAMLWriter` for writing intent and example metadata back
into NLU YAML files.
96 changes: 81 additions & 15 deletions rasa/shared/nlu/training_data/formats/rasa_yaml.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,20 @@
import logging
from collections import OrderedDict
from pathlib import Path
from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional
from typing import Text, Any, List, Dict, Tuple, Union, Iterator, Optional, Callable

import rasa.shared.data
from rasa.shared.core.domain import Domain
from rasa.shared.exceptions import YamlException
from rasa.shared.utils import validation
from ruamel.yaml import StringIO
from ruamel.yaml.scalarstring import LiteralScalarString

from rasa.shared.constants import (
DOCS_URL_TRAINING_DATA,
LATEST_TRAINING_DATA_FORMAT_VERSION,
)
from rasa.shared.nlu.constants import METADATA_INTENT, METADATA_EXAMPLE
from rasa.shared.nlu.training_data.formats.readerwriter import (
TrainingDataReader,
TrainingDataWriter,
Expand Down Expand Up @@ -468,23 +470,87 @@ def process_training_examples_by_key(
training_examples: Dict,
key_name: Text,
key_examples: Text,
example_extraction_predicate=lambda x: x,
example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x,
) -> List[OrderedDict]:
from ruamel.yaml.scalarstring import LiteralScalarString
intents = []

result = []
for entity_key, examples in training_examples.items():
for intent_name, examples in training_examples.items():
converted, intent_metadata = RasaYAMLWriter._convert_training_examples(
examples, example_extraction_predicate
)

intent = OrderedDict()
intent[key_name] = intent_name
if intent_metadata:
intent[KEY_METADATA] = intent_metadata

converted_examples = [
TrainingDataWriter.generate_list_item(
example_extraction_predicate(example).strip(STRIP_SYMBOLS)
render_as_objects = any(KEY_METADATA in ex for ex in converted)
if render_as_objects:
rendered = RasaYAMLWriter._render_training_examples_as_objects(
converted
)
for example in examples
]
else:
rendered = RasaYAMLWriter._render_training_examples_as_text(converted)
intent[key_examples] = rendered

next_item = OrderedDict()
next_item[key_name] = entity_key
next_item[key_examples] = LiteralScalarString("".join(converted_examples))
result.append(next_item)
intents.append(intent)

return result
return intents

@staticmethod
def _convert_training_examples(
training_examples: List[Dict],
example_extraction_predicate: Callable[[Dict[Text, Any]], Text] = lambda x: x,
) -> Tuple[List[Dict], Optional[Dict]]:
"""Returns converted training examples and potential intent metadata."""
converted_examples = []
intent_metadata = None

for example in training_examples:
converted = {
KEY_INTENT_TEXT: example_extraction_predicate(example).strip(
STRIP_SYMBOLS
)
}

if isinstance(example, dict) and KEY_METADATA in example:
metadata = example[KEY_METADATA]

if METADATA_EXAMPLE in metadata:
converted[KEY_METADATA] = metadata[METADATA_EXAMPLE]

if intent_metadata is None and METADATA_INTENT in metadata:
intent_metadata = metadata[METADATA_INTENT]

converted_examples.append(converted)

return converted_examples, intent_metadata

@staticmethod
def _render_training_examples_as_objects(examples: List[Dict]) -> List[Dict]:
"""Renders training examples as objects with its `text` item as a literal scalar string.
Given the input of a single example:
{'text': 'how much CO2 will that use?'}
Its return value is a dictionary that will be rendered in YAML as:
```
text: |
how much CO2 will that use?
```
"""

def render(example: Dict) -> Dict:
text = example[KEY_INTENT_TEXT]
example[KEY_INTENT_TEXT] = LiteralScalarString(
TrainingDataWriter.generate_string_item(text)
)
return example

return [render(ex) for ex in examples]

@staticmethod
def _render_training_examples_as_text(examples: List[Dict]) -> List[Text]:
def render(example: Dict) -> Text:
return TrainingDataWriter.generate_list_item(example[KEY_INTENT_TEXT])

return LiteralScalarString("".join([render(example) for example in examples]))
6 changes: 5 additions & 1 deletion rasa/shared/nlu/training_data/formats/readerwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,12 @@ def prepare_training_examples(training_data: "TrainingData") -> OrderedDict:
@staticmethod
def generate_list_item(text: Text) -> Text:
"""Generates text for a list item."""
return f"- {TrainingDataWriter.generate_string_item(text)}"

return f"- {rasa.shared.nlu.training_data.util.encode_string(text)}\n"
@staticmethod
def generate_string_item(text: Text) -> Text:
"""Generates text for a string item."""
return f"{rasa.shared.nlu.training_data.util.encode_string(text)}\n"

@staticmethod
def generate_message(message: Dict[Text, Any]) -> Text:
Expand Down
64 changes: 56 additions & 8 deletions tests/shared/nlu/training_data/formats/test_rasa_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,17 @@
)


MULTILINE_INTENT_EXAMPLES = f"""
version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
MULTILINE_INTENT_EXAMPLES = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
nlu:
- intent: intent_name
examples: |
- how much CO2 will that use?
- how much carbon will a one way flight from [new york]{{"entity": "city", "role": "from"}} to california produce?
- what's the carbon footprint of a flight from london to new york?
- how much co2 to new york?
- how much co2 is produced on a return flight from london to new york?
- what's the co2 usage of a return flight to new york?
- can you calculate the co2 footprint of a flight to london?
"""

MULTILINE_INTENT_EXAMPLE_WITH_SYNONYM = """
Expand All @@ -43,7 +47,7 @@
- how much carbon will a one way flight from [new york]{"entity": "city", "role": "from"} to california produce?
"""

INTENT_EXAMPLES_WITH_METADATA = """
INTENT_EXAMPLES_WITH_METADATA = f"""version: "{LATEST_TRAINING_DATA_FORMAT_VERSION}"
nlu:
- intent: intent_name
metadata:
Expand All @@ -54,9 +58,26 @@
metadata:
sentiment: positive
- text: |
how much carbon will a one way flight from [new york]{"entity": "city", "role": "from"} to california produce?
how much carbon will a one way flight from [new york]{{"entity": "city", "role": "from"}} to california produce?
metadata: co2-trip-calculation
- text: |
how much CO2 to [new york]{{"entity": "city", "role": "to"}}?
- intent: greet
metadata: initiate-conversation
examples: |
- Hi
- Hello
- intent: goodbye
examples:
- text: |
bye
metadata: positive-sentiment
- text: |
goodbye
metadata: positive-sentiment
"""


MINIMAL_VALID_EXAMPLE = """
nlu:\n
stories:
Expand Down Expand Up @@ -141,7 +162,7 @@ def test_multiline_intent_is_parsed(example: Text):

assert not len(record)

assert len(training_data.training_examples) == 2
assert len(training_data.training_examples) == 7
assert training_data.training_examples[0].get(
INTENT
) == training_data.training_examples[1].get(INTENT)
Expand All @@ -156,13 +177,40 @@ def test_intent_with_metadata_is_parsed():

assert not len(record)

assert len(training_data.training_examples) == 2
example_1, example_2 = training_data.training_examples
assert len(training_data.training_examples) == 7
example_1, example_2, *other_examples = training_data.training_examples
assert example_1.get(METADATA) == {
METADATA_INTENT: ["johnny"],
METADATA_EXAMPLE: {"sentiment": "positive"},
}
assert example_2.get(METADATA) == {METADATA_INTENT: ["johnny"]}
assert example_2.get(METADATA) == {
METADATA_INTENT: ["johnny"],
METADATA_EXAMPLE: "co2-trip-calculation",
}


def test_metadata_roundtrip():
reader = RasaYAMLReader()
result = reader.reads(INTENT_EXAMPLES_WITH_METADATA)

dumped = RasaYAMLWriter().dumps(result)
assert dumped == INTENT_EXAMPLES_WITH_METADATA

validation_reader = RasaYAMLReader()
dumped_result = validation_reader.reads(dumped)

assert dumped_result.training_examples == result.training_examples


def test_write_metadata_stripped():
reader = RasaYAMLReader()
result = reader.reads(INTENT_EXAMPLES_WITH_METADATA)

# Add strippable characters to first example text
result.training_examples[0].data["text"] += " \r\n "

dumped = RasaYAMLWriter().dumps(result)
assert dumped == INTENT_EXAMPLES_WITH_METADATA


# This test would work only with examples that have a `version` key specified
Expand Down

0 comments on commit fb668ca

Please sign in to comment.