From b1fc73f3f238905ddd55f50e75bb494b8e9188d9 Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Tue, 16 Feb 2021 20:42:19 +0100 Subject: [PATCH 1/2] load schema files for `pykwalify` to avoid global `yaml` usage --- rasa/shared/utils/validation.py | 8 +++- tests/shared/utils/test_validation.py | 55 +++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/rasa/shared/utils/validation.py b/rasa/shared/utils/validation.py index b5e52b4fbb9d..459bab1156f9 100644 --- a/rasa/shared/utils/validation.py +++ b/rasa/shared/utils/validation.py @@ -154,9 +154,15 @@ def validate_yaml_schema(yaml_file_content: Text, schema_path: Text) -> None: PACKAGE_NAME, SCHEMA_EXTENSIONS_FILE ) + # Load schema content using our YAML loader as `pykwalify` uses a global instance + # which can fail when used concurrently + schema_content = rasa.shared.utils.io.read_yaml_file(schema_file) + schema_utils_content = rasa.shared.utils.io.read_yaml_file(schema_utils_file) + schema_content = dict(schema_content, **schema_utils_content) + c = Core( source_data=source_data, - schema_files=[schema_file, schema_utils_file], + schema_data=schema_content, extensions=[schema_extensions], ) diff --git a/tests/shared/utils/test_validation.py b/tests/shared/utils/test_validation.py index 89cf95996af7..420d70997b18 100644 --- a/tests/shared/utils/test_validation.py +++ b/tests/shared/utils/test_validation.py @@ -1,3 +1,5 @@ +from threading import Thread + import pytest from pep440_version_utils import Version @@ -12,6 +14,7 @@ DOMAIN_SCHEMA_FILE, LATEST_TRAINING_DATA_FORMAT_VERSION, ) +from rasa.shared.nlu.training_data.formats.rasa_yaml import NLU_SCHEMA_FILE from rasa.shared.utils.validation import KEY_TRAINING_DATA_FORMAT_VERSION @@ -181,3 +184,55 @@ async def test_invalid_training_data_format_version_warns(): for version in [invalid_version_1, invalid_version_2]: with pytest.warns(UserWarning): assert validation_utils.validate_training_data_format_version(version, "") + + +def test_concurrent_schema_validation(): + successful_results = [] + + def validate() -> None: + payload = """ +version: "2.0" +nlu: +- intent: greet + examples: | + - hey + - hello + - hi + - hello there + - good morning + - good evening + - moin + - hey there + - let's go + - hey dude + - goodmorning + - goodevening + - good afternoon +- intent: goodbye + examples: | + - good afternoon + - cu + - good by + - cee you later + - good night + - bye + - goodbye + - have a nice day + - see you around + - bye bye + - see you later + """ + rasa.shared.utils.validation.validate_yaml_schema(payload, NLU_SCHEMA_FILE) + successful_results.append(True) + + threads = [] + for i in range(10): + threads.append(Thread(target=validate)) + + for thread in threads: + thread.start() + + for thread in threads: + thread.join() + + assert len(successful_results) == len(threads) From b7175d349ee719953ffe06e0191a9e51175c44fa Mon Sep 17 00:00:00 2001 From: Tobias Wochinger Date: Wed, 17 Feb 2021 09:40:33 +0100 Subject: [PATCH 2/2] add changelog --- changelog/7970.bugfix.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog/7970.bugfix.md diff --git a/changelog/7970.bugfix.md b/changelog/7970.bugfix.md new file mode 100644 index 000000000000..c4ddc69087cc --- /dev/null +++ b/changelog/7970.bugfix.md @@ -0,0 +1,3 @@ +Fixed a YAML validation error which happened when executing multiple validations +concurrently. This could e.g. happen when sending concurrent requests to server +endpoints which process YAML training data.