RasaHQ · RomuloSouza · Oct 15, 2020 · Oct 21, 2020 · Oct 22, 2020 · Oct 22, 2020
diff --git a/changelog/3153.improvement.md b/changelog/3153.improvement.md
@@ -0,0 +1 @@
+Reduce usage of  `os.path` in favor of `pathlib` throughout the code base.
@@ -4,7 +4,6 @@
 from pathlib import Path
 
 import numpy as np
-import os
 import scipy.sparse
 import tensorflow as tf
 import tensorflow_addons as tfa
@@ -964,7 +963,7 @@ def load(
             logger.debug(
                 f"Failed to load model for '{cls.__name__}'. "
                 f"Maybe you did not provide enough training data and no model was "
-                f"trained or the path '{os.path.abspath(model_dir)}' doesn't exist?"
+                f"trained or the path '{Path(model_dir).resolve()}' doesn't exist?"
             )
             return cls(component_config=meta)
 
@@ -1041,7 +1040,8 @@ def _load_model(
         model_dir: Text,
     ) -> "RasaModel":
         file_name = meta.get("file")
-        tf_model_file = os.path.join(model_dir, file_name + ".tf_model")
+        tf_model_file = f"{file_name}.tf_model"
+        tf_model_file = Path(model_dir) / tf_model_file
 
         label_key = LABEL_KEY if meta[INTENT_CLASSIFICATION] else None
         label_sub_key = LABEL_SUB_KEY if meta[INTENT_CLASSIFICATION] else None

@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import logging
 import re
 from typing import Any, Dict, Optional, Text
@@ -128,7 +128,7 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
         """
 
         file_name = file_name + ".json"
-        keyword_file = os.path.join(model_dir, file_name)
+        keyword_file = Path(model_dir) / file_name
         utils.write_json_to_file(keyword_file, self.intent_keyword_map)
 
         return {"file": file_name}
@@ -145,8 +145,8 @@ def load(
 
         if model_dir and meta.get("file"):
             file_name = meta.get("file")
-            keyword_file = os.path.join(model_dir, file_name)
-            if os.path.exists(keyword_file):
+            keyword_file = Path(model_dir) / file_name
+            if keyword_file.exists():
                 intent_keyword_map = rasa.shared.utils.io.read_json_file(keyword_file)
             else:
                 rasa.shared.utils.io.raise_warning(
@@ -158,5 +158,5 @@ def load(
         else:
             raise Exception(
                 f"Failed to load keyword intent classifier model. "
-                f"Path {os.path.abspath(meta.get('file'))} doesn't exist."
+                f"Path {Path(meta.get('files')).resolve()} doesn't exist."
             )
@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 import typing
 from typing import Any, Dict, List, Optional, Text, Type
 
@@ -103,19 +103,22 @@ def load(
 
         if not file_name:
             return cls(meta)
-        classifier_file = os.path.join(model_dir, file_name)
-        if os.path.exists(classifier_file):
-            classifier = mitie.text_categorizer(classifier_file)
+        classifier_file = Path(model_dir) / file_name
+        if classifier_file.exists():
+            classifier = mitie.text_categorizer(str(classifier_file))
             return cls(meta, classifier)
         else:
             return cls(meta)
 
     def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
+        """Persist this model into the passed directory.
 
+        Return the metadata necessary to load the model again.
+        """
         if self.clf:
-            file_name = file_name + ".dat"
-            classifier_file = os.path.join(model_dir, file_name)
-            self.clf.save_to_disk(classifier_file, pure_model=True)
+            file_name = f"{file_name}.dat"
+            classifier_file = Path(model_dir) / file_name
+            self.clf.save_to_disk(str(classifier_file), pure_model=True)
             return {"file": file_name}
         else:
             return {"file": None}
@@ -1,5 +1,5 @@
 import logging
-import os
+from pathlib import Path
 import typing
 import warnings
 from typing import Any, Dict, List, Optional, Text, Tuple, Type
@@ -231,12 +231,11 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
 
         classifier_file_name = file_name + "_classifier.pkl"
         encoder_file_name = file_name + "_encoder.pkl"
+        model_dir = Path(model_dir)
         if self.clf and self.le:
+            io_utils.json_pickle(model_dir / encoder_file_name, self.le.classes_)
             io_utils.json_pickle(
-                os.path.join(model_dir, encoder_file_name), self.le.classes_
-            )
-            io_utils.json_pickle(
-                os.path.join(model_dir, classifier_file_name), self.clf.best_estimator_
+                model_dir / classifier_file_name, self.clf.best_estimator_
             )
         return {"classifier": classifier_file_name, "encoder": encoder_file_name}
 
@@ -251,10 +250,12 @@ def load(
     ) -> "SklearnIntentClassifier":
         from sklearn.preprocessing import LabelEncoder
 
-        classifier_file = os.path.join(model_dir, meta.get("classifier"))
-        encoder_file = os.path.join(model_dir, meta.get("encoder"))
+        model_dir = Path(model_dir)
+
+        classifier_file = model_dir / meta.get("classifier")
+        encoder_file = model_dir / meta.get("encoder")
 
-        if os.path.exists(classifier_file):
+        if classifier_file.exists():
             classifier = io_utils.json_unpickle(classifier_file)
             classes = io_utils.json_unpickle(encoder_file)
             encoder = LabelEncoder()

@@ -1,7 +1,7 @@
 import copy
 import logging
-import os
 import ruamel.yaml as yaml
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Text, Union
 
 from rasa.shared.exceptions import InvalidConfigException, RasaException
@@ -38,7 +38,7 @@ def load(
         return _load_from_dict(config, **kwargs)
 
     file_config = {}
-    if config is None and os.path.isfile(DEFAULT_CONFIG_PATH):
+    if config is None and Path(DEFAULT_CONFIG_PATH).is_file():
         config = DEFAULT_CONFIG_PATH
 
     if config is not None:

@@ -1,6 +1,6 @@
 import argparse
-import os
 from typing import Text
+from pathlib import Path
 
 from rasa.shared.utils.cli import print_error
 import rasa.shared.nlu.training_data.loading
@@ -10,7 +10,8 @@
 def convert_training_data(
     data_file: Text, out_file: Text, output_format: Text, language: Text
 ):
-    if not os.path.exists(data_file):
+    """Convert the training data to the specified language and format."""
+    if not Path(data_file).exists():
         print_error(
             "Data file '{}' does not exist. Provide a valid NLU data file using "
             "the '--data' argument.".format(data_file)

@@ -1,8 +1,7 @@
 import logging
-import os
 import typing
-
 import numpy as np
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Text, Tuple, Type, Callable
 
 import rasa.nlu.utils.bilou_utils as bilou_utils
@@ -312,24 +311,25 @@ def load(
 
         file_names = meta.get("files")
         entity_taggers = {}
+        model_dir = Path(model_dir)
 
         if not file_names:
             logger.debug(
                 f"Failed to load model for 'CRFEntityExtractor'. "
                 f"Maybe you did not provide enough training data and no model was "
-                f"trained or the path '{os.path.abspath(model_dir)}' doesn't exist?"
+                f"trained or the path '{model_dir.resolve()}' doesn't exist?"
             )
             return cls(component_config=meta)
 
         for name, file_name in file_names.items():
-            model_file = os.path.join(model_dir, file_name)
-            if os.path.exists(model_file):
-                entity_taggers[name] = joblib.load(model_file)
+            model_file = model_dir / file_name
+            if model_file.exists():
+                entity_taggers[name] = joblib.load(str(model_file))
             else:
                 logger.debug(
                     f"Failed to load model for tag '{name}' for 'CRFEntityExtractor'. "
                     f"Maybe you did not provide enough training data and no model was "
-                    f"trained or the path '{os.path.abspath(model_file)}' doesn't "
+                    f"trained or the path '{model_file.resolve()}' doesn't "
                     f"exist?"
                 )
 
@@ -347,8 +347,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
         if self.entity_taggers:
             for name, entity_tagger in self.entity_taggers.items():
                 file_name = f"{file_name}.{name}.pkl"
-                model_file_name = os.path.join(model_dir, file_name)
-                joblib.dump(entity_tagger, model_file_name)
+                model_file_name = Path(model_dir) / file_name
+                joblib.dump(entity_tagger, str(model_file_name))
                 file_names[name] = file_name
 
         return {"files": file_names}

@@ -1,4 +1,4 @@
-import os
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Text, Type
 
 from rasa.nlu.components import Component
@@ -51,10 +51,14 @@ def process(self, message: Message, **kwargs: Any) -> None:
         message.set(ENTITIES, updated_entities, add_to_output=True)
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
+        """Persist this model into the passed directory.
 
+        Return the metadata necessary to load the model again.
+        """
+        model_dir = Path(model_dir)
         if self.synonyms:
             file_name = file_name + ".json"
-            entity_synonyms_file = os.path.join(model_dir, file_name)
+            entity_synonyms_file = model_dir / file_name
             write_json_to_file(
                 entity_synonyms_file, self.synonyms, separators=(",", ": ")
             )
@@ -77,13 +81,13 @@ def load(
             synonyms = None
             return cls(meta, synonyms)
 
-        entity_synonyms_file = os.path.join(model_dir, file_name)
-        if os.path.isfile(entity_synonyms_file):
+        entity_synonyms_file = Path(model_dir) / file_name
+        if entity_synonyms_file.is_file():
             synonyms = rasa.shared.utils.io.read_json_file(entity_synonyms_file)
         else:
             synonyms = None
             rasa.shared.utils.io.raise_warning(
-                f"Failed to load synonyms file from '{entity_synonyms_file}'.",
+                f"Failed to load synonyms file from '{str(entity_synonyms_file)}'.",
                 docs=DOCS_URL_TRAINING_DATA + "#synonyms",
             )
         return cls(meta, synonyms)

@@ -1,6 +1,6 @@
 import logging
-import os
 import typing
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Text, Type
 
 from rasa.nlu.constants import TOKENS_NAMES
@@ -158,9 +158,9 @@ def load(
         if not file_name:
             return cls(meta)
 
-        classifier_file = os.path.join(model_dir, file_name)
-        if os.path.exists(classifier_file):
-            extractor = mitie.named_entity_extractor(classifier_file)
+        classifier_file = Path(model_dir) / file_name
+        if classifier_file.exists():
+            extractor = mitie.named_entity_extractor(str(classifier_file))
             return cls(meta, extractor)
         else:
             return cls(meta)
@@ -169,8 +169,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
 
         if self.ner:
             file_name = file_name + ".dat"
-            entity_extractor_file = os.path.join(model_dir, file_name)
-            self.ner.save_to_disk(entity_extractor_file, pure_model=True)
+            entity_extractor_file = Path(model_dir) / file_name
+            self.ner.save_to_disk(str(entity_extractor_file), pure_model=True)
             return {"file": file_name}
         else:
             return {"file": None}
@@ -1,6 +1,6 @@
 import logging
-import os
 import re
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Text
 
 import rasa.shared.utils.io
@@ -120,19 +120,21 @@ def load(
     ) -> "RegexEntityExtractor":
 
         file_name = meta.get("file")
-        regex_file = os.path.join(model_dir, file_name)
+        regex_file = Path(model_dir) / file_name
 
-        if os.path.exists(regex_file):
+        if regex_file.exists():
             patterns = rasa.shared.utils.io.read_json_file(regex_file)
             return RegexEntityExtractor(meta, patterns=patterns)
 
         return RegexEntityExtractor(meta)
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
         """Persist this model into the passed directory.
-        Return the metadata necessary to load the model again."""
+
+        Return the metadata necessary to load the model again.
+        """
         file_name = f"{file_name}.json"
-        regex_file = os.path.join(model_dir, file_name)
+        regex_file = Path(model_dir) / file_name
         rasa.shared.utils.io.dump_obj_as_json_to_file(regex_file, self.patterns)
 
         return {"file": file_name}
@@ -1,5 +1,5 @@
 import logging
-import os
+from pathlib import Path
 import re
 import scipy.sparse
 from typing import Any, Dict, List, Optional, Text, Type, Tuple
@@ -595,7 +595,7 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
             attribute_vocabularies = self._collect_vectorizer_vocabularies()
             if self._is_any_model_trained(attribute_vocabularies):
                 # Definitely need to persist some vocabularies
-                featurizer_file = os.path.join(model_dir, file_name)
+                featurizer_file = Path(model_dir) / file_name
 
                 if self.use_shared_vocab:
                     # Only persist vocabulary from one attribute. Can be loaded and
@@ -675,9 +675,9 @@ def load(
     ) -> "CountVectorsFeaturizer":
 
         file_name = meta.get("file")
-        featurizer_file = os.path.join(model_dir, file_name)
+        featurizer_file = Path(model_dir) / file_name
 
-        if not os.path.exists(featurizer_file):
+        if not featurizer_file.exists():
             return cls(meta)
 
         vocabulary = io_utils.json_unpickle(featurizer_file)

@@ -1,5 +1,5 @@
 import logging
-import os
+from pathlib import Path
 import re
 from typing import Any, Dict, List, Optional, Text, Type, Tuple
 
@@ -164,19 +164,21 @@ def load(
     ) -> "RegexFeaturizer":
 
         file_name = meta.get("file")
-        regex_file = os.path.join(model_dir, file_name)
+        regex_file = Path(model_dir) / file_name
 
-        if os.path.exists(regex_file):
+        if regex_file.exists():
             known_patterns = rasa.shared.utils.io.read_json_file(regex_file)
             return RegexFeaturizer(meta, known_patterns=known_patterns)
         else:
             return RegexFeaturizer(meta)
 
     def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
         """Persist this model into the passed directory.
-        Return the metadata necessary to load the model again."""
+
+        Return the metadata necessary to load the model again.
+        """
         file_name = file_name + ".pkl"
-        regex_file = os.path.join(model_dir, file_name)
+        regex_file = Path(model_dir) / file_name
         utils.write_json_to_file(regex_file, self.known_patterns, indent=4)
 
         return {"file": file_name}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Reduce usage of `os.path` in favor of `pathlib` throughout the code base.