Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace os.path for pathlib in NLU module #7118

Closed
wants to merge 28 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
fc90d5a
Change nlu model and extractors to use pathlib
RomuloSouza Oct 15, 2020
0fb959b
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Oct 21, 2020
df212e1
Change nlu tokenizers, utils and some files to use pathlib
RomuloSouza Oct 22, 2020
842bf01
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Oct 22, 2020
a0ed4d4
Change nlu classifiers to use pathlib
silvasara Oct 22, 2020
c7f3516
Change nlu featurizers to use pathlib
silvasara Oct 22, 2020
8b90d4b
Merge branch 'master' of https://github.com/FGA-GCES/rasa into replac…
RomuloSouza Oct 23, 2020
5b578ed
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Oct 27, 2020
9df5e66
Merge branch 'master' into replace-os.path-pathlib
silvasara Nov 1, 2020
82e8d9b
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Nov 8, 2020
5370197
Make changes suggested by the reviewer
silvasara Nov 8, 2020
4fdd821
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Nov 10, 2020
8c7bebb
Remove unnecessary casting
RomuloSouza Nov 10, 2020
47704ec
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Nov 10, 2020
8e70983
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Nov 11, 2020
2b1d8c1
Make changes suggested by the reviewer
silvasara Nov 11, 2020
a150ec8
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Nov 11, 2020
7e59da0
Merge branch 'master' into replace-os.path-pathlib
RomuloSouza Nov 16, 2020
03582d7
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Nov 17, 2020
f9675a1
Merge branch 'master' into replace-os.path-pathlib
RomuloSouza Nov 23, 2020
beabb3f
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Nov 23, 2020
933fcb6
Merge branch 'replace-os.path-pathlib' of https://github.com/FGA-GCES…
silvasara Nov 23, 2020
97ed7c3
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
silvasara Nov 26, 2020
7b436fb
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Dec 6, 2020
b425e6c
Fix type of param
RomuloSouza Dec 6, 2020
fdac786
Fix lint problems
RomuloSouza Dec 7, 2020
db6116d
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Dec 7, 2020
e703ddc
Merge branch 'master' of https://github.com/RasaHQ/rasa into replace-…
RomuloSouza Dec 8, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/3153.improvement.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Reduce usage of `os.path` in favor of `pathlib` throughout the code base.
6 changes: 3 additions & 3 deletions rasa/nlu/classifiers/diet_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

import numpy as np
import os
import scipy.sparse
import tensorflow as tf
import tensorflow_addons as tfa
Expand Down Expand Up @@ -964,7 +963,7 @@ def load(
logger.debug(
f"Failed to load model for '{cls.__name__}'. "
f"Maybe you did not provide enough training data and no model was "
f"trained or the path '{os.path.abspath(model_dir)}' doesn't exist?"
f"trained or the path '{Path(model_dir).resolve()}' doesn't exist?"
)
return cls(component_config=meta)

Expand Down Expand Up @@ -1041,7 +1040,8 @@ def _load_model(
model_dir: Text,
) -> "RasaModel":
file_name = meta.get("file")
tf_model_file = os.path.join(model_dir, file_name + ".tf_model")
tf_model_file = f"{file_name}.tf_model"
tf_model_file = Path(model_dir) / tf_model_file

label_key = LABEL_KEY if meta[INTENT_CLASSIFICATION] else None
label_sub_key = LABEL_SUB_KEY if meta[INTENT_CLASSIFICATION] else None
Expand Down
10 changes: 5 additions & 5 deletions rasa/nlu/classifiers/keyword_intent_classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
import logging
import re
from typing import Any, Dict, Optional, Text
Expand Down Expand Up @@ -128,7 +128,7 @@ def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
"""

file_name = file_name + ".json"
keyword_file = os.path.join(model_dir, file_name)
keyword_file = Path(model_dir) / file_name
utils.write_json_to_file(keyword_file, self.intent_keyword_map)

return {"file": file_name}
Expand All @@ -145,8 +145,8 @@ def load(

if model_dir and meta.get("file"):
file_name = meta.get("file")
keyword_file = os.path.join(model_dir, file_name)
if os.path.exists(keyword_file):
keyword_file = Path(model_dir) / file_name
if keyword_file.exists():
intent_keyword_map = rasa.shared.utils.io.read_json_file(keyword_file)
else:
rasa.shared.utils.io.raise_warning(
Expand All @@ -158,5 +158,5 @@ def load(
else:
raise Exception(
f"Failed to load keyword intent classifier model. "
f"Path {os.path.abspath(meta.get('file'))} doesn't exist."
f"Path {Path(meta.get('files')).resolve()} doesn't exist."
)
17 changes: 10 additions & 7 deletions rasa/nlu/classifiers/mitie_intent_classifier.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
import typing
from typing import Any, Dict, List, Optional, Text, Type

Expand Down Expand Up @@ -103,19 +103,22 @@ def load(

if not file_name:
return cls(meta)
classifier_file = os.path.join(model_dir, file_name)
if os.path.exists(classifier_file):
classifier = mitie.text_categorizer(classifier_file)
classifier_file = Path(model_dir) / file_name
if classifier_file.exists():
classifier = mitie.text_categorizer(str(classifier_file))
return cls(meta, classifier)
else:
return cls(meta)

def persist(self, file_name: Text, model_dir: Text) -> Dict[Text, Any]:
"""Persist this model into the passed directory.

Return the metadata necessary to load the model again.
"""
if self.clf:
file_name = file_name + ".dat"
classifier_file = os.path.join(model_dir, file_name)
self.clf.save_to_disk(classifier_file, pure_model=True)
file_name = f"{file_name}.dat"
classifier_file = Path(model_dir) / file_name
RomuloSouza marked this conversation as resolved.
Show resolved Hide resolved
self.clf.save_to_disk(str(classifier_file), pure_model=True)
return {"file": file_name}
else:
return {"file": None}
17 changes: 9 additions & 8 deletions rasa/nlu/classifiers/sklearn_intent_classifier.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import os
from pathlib import Path
import typing
import warnings
from typing import Any, Dict, List, Optional, Text, Tuple, Type
Expand Down Expand Up @@ -231,12 +231,11 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]

classifier_file_name = file_name + "_classifier.pkl"
encoder_file_name = file_name + "_encoder.pkl"
model_dir = Path(model_dir)
if self.clf and self.le:
io_utils.json_pickle(model_dir / encoder_file_name, self.le.classes_)
io_utils.json_pickle(
os.path.join(model_dir, encoder_file_name), self.le.classes_
)
io_utils.json_pickle(
os.path.join(model_dir, classifier_file_name), self.clf.best_estimator_
model_dir / classifier_file_name, self.clf.best_estimator_
)
return {"classifier": classifier_file_name, "encoder": encoder_file_name}

Expand All @@ -251,10 +250,12 @@ def load(
) -> "SklearnIntentClassifier":
from sklearn.preprocessing import LabelEncoder

classifier_file = os.path.join(model_dir, meta.get("classifier"))
encoder_file = os.path.join(model_dir, meta.get("encoder"))
model_dir = Path(model_dir)

classifier_file = model_dir / meta.get("classifier")
encoder_file = model_dir / meta.get("encoder")

if os.path.exists(classifier_file):
if classifier_file.exists():
classifier = io_utils.json_unpickle(classifier_file)
classes = io_utils.json_unpickle(encoder_file)
encoder = LabelEncoder()
Expand Down
4 changes: 2 additions & 2 deletions rasa/nlu/config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import copy
import logging
import os
import ruamel.yaml as yaml
from pathlib import Path
from typing import Any, Dict, List, Optional, Text, Union

from rasa.shared.exceptions import InvalidConfigException, RasaException
Expand Down Expand Up @@ -38,7 +38,7 @@ def load(
return _load_from_dict(config, **kwargs)

file_config = {}
if config is None and os.path.isfile(DEFAULT_CONFIG_PATH):
if config is None and Path(DEFAULT_CONFIG_PATH).is_file():
config = DEFAULT_CONFIG_PATH

if config is not None:
Expand Down
5 changes: 3 additions & 2 deletions rasa/nlu/convert.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import argparse
import os
from typing import Text
from pathlib import Path

from rasa.shared.utils.cli import print_error
import rasa.shared.nlu.training_data.loading
Expand All @@ -10,7 +10,8 @@
def convert_training_data(
data_file: Text, out_file: Text, output_format: Text, language: Text
):
if not os.path.exists(data_file):
"""Convert the training data to the specified language and format."""
if not Path(data_file).exists():
print_error(
"Data file '{}' does not exist. Provide a valid NLU data file using "
"the '--data' argument.".format(data_file)
Expand Down
18 changes: 9 additions & 9 deletions rasa/nlu/extractors/crf_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
import logging
import os
import typing

import numpy as np
from pathlib import Path
from typing import Any, Dict, List, Optional, Text, Tuple, Type, Callable

import rasa.nlu.utils.bilou_utils as bilou_utils
Expand Down Expand Up @@ -312,24 +311,25 @@ def load(

file_names = meta.get("files")
entity_taggers = {}
model_dir = Path(model_dir)

if not file_names:
logger.debug(
f"Failed to load model for 'CRFEntityExtractor'. "
f"Maybe you did not provide enough training data and no model was "
f"trained or the path '{os.path.abspath(model_dir)}' doesn't exist?"
f"trained or the path '{model_dir.resolve()}' doesn't exist?"
)
return cls(component_config=meta)

for name, file_name in file_names.items():
model_file = os.path.join(model_dir, file_name)
if os.path.exists(model_file):
entity_taggers[name] = joblib.load(model_file)
model_file = model_dir / file_name
if model_file.exists():
entity_taggers[name] = joblib.load(str(model_file))
else:
logger.debug(
f"Failed to load model for tag '{name}' for 'CRFEntityExtractor'. "
f"Maybe you did not provide enough training data and no model was "
f"trained or the path '{os.path.abspath(model_file)}' doesn't "
f"trained or the path '{model_file.resolve()}' doesn't "
f"exist?"
)

Expand All @@ -347,8 +347,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
if self.entity_taggers:
for name, entity_tagger in self.entity_taggers.items():
file_name = f"{file_name}.{name}.pkl"
model_file_name = os.path.join(model_dir, file_name)
joblib.dump(entity_tagger, model_file_name)
model_file_name = Path(model_dir) / file_name
joblib.dump(entity_tagger, str(model_file_name))
file_names[name] = file_name

return {"files": file_names}
Expand Down
14 changes: 9 additions & 5 deletions rasa/nlu/extractors/entity_synonyms.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
from typing import Any, Dict, List, Optional, Text, Type

from rasa.nlu.components import Component
Expand Down Expand Up @@ -51,10 +51,14 @@ def process(self, message: Message, **kwargs: Any) -> None:
message.set(ENTITIES, updated_entities, add_to_output=True)

def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
"""Persist this model into the passed directory.

Return the metadata necessary to load the model again.
"""
model_dir = Path(model_dir)
if self.synonyms:
file_name = file_name + ".json"
entity_synonyms_file = os.path.join(model_dir, file_name)
entity_synonyms_file = model_dir / file_name
write_json_to_file(
entity_synonyms_file, self.synonyms, separators=(",", ": ")
)
Expand All @@ -77,13 +81,13 @@ def load(
synonyms = None
return cls(meta, synonyms)

entity_synonyms_file = os.path.join(model_dir, file_name)
if os.path.isfile(entity_synonyms_file):
entity_synonyms_file = Path(model_dir) / file_name
if entity_synonyms_file.is_file():
synonyms = rasa.shared.utils.io.read_json_file(entity_synonyms_file)
else:
synonyms = None
rasa.shared.utils.io.raise_warning(
f"Failed to load synonyms file from '{entity_synonyms_file}'.",
f"Failed to load synonyms file from '{str(entity_synonyms_file)}'.",
docs=DOCS_URL_TRAINING_DATA + "#synonyms",
)
return cls(meta, synonyms)
Expand Down
12 changes: 6 additions & 6 deletions rasa/nlu/extractors/mitie_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
import typing
from pathlib import Path
from typing import Any, Dict, List, Optional, Text, Type

from rasa.nlu.constants import TOKENS_NAMES
Expand Down Expand Up @@ -158,9 +158,9 @@ def load(
if not file_name:
return cls(meta)

classifier_file = os.path.join(model_dir, file_name)
if os.path.exists(classifier_file):
extractor = mitie.named_entity_extractor(classifier_file)
classifier_file = Path(model_dir) / file_name
if classifier_file.exists():
extractor = mitie.named_entity_extractor(str(classifier_file))
return cls(meta, extractor)
else:
return cls(meta)
Expand All @@ -169,8 +169,8 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]

if self.ner:
file_name = file_name + ".dat"
entity_extractor_file = os.path.join(model_dir, file_name)
self.ner.save_to_disk(entity_extractor_file, pure_model=True)
entity_extractor_file = Path(model_dir) / file_name
self.ner.save_to_disk(str(entity_extractor_file), pure_model=True)
return {"file": file_name}
else:
return {"file": None}
12 changes: 7 additions & 5 deletions rasa/nlu/extractors/regex_entity_extractor.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import logging
import os
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Text

import rasa.shared.utils.io
Expand Down Expand Up @@ -120,19 +120,21 @@ def load(
) -> "RegexEntityExtractor":

file_name = meta.get("file")
regex_file = os.path.join(model_dir, file_name)
regex_file = Path(model_dir) / file_name

if os.path.exists(regex_file):
if regex_file.exists():
patterns = rasa.shared.utils.io.read_json_file(regex_file)
return RegexEntityExtractor(meta, patterns=patterns)

return RegexEntityExtractor(meta)

def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
"""Persist this model into the passed directory.
Return the metadata necessary to load the model again."""

Return the metadata necessary to load the model again.
"""
file_name = f"{file_name}.json"
regex_file = os.path.join(model_dir, file_name)
regex_file = Path(model_dir) / file_name
rasa.shared.utils.io.dump_obj_as_json_to_file(regex_file, self.patterns)

return {"file": file_name}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import os
from pathlib import Path
import re
import scipy.sparse
from typing import Any, Dict, List, Optional, Text, Type, Tuple
Expand Down Expand Up @@ -595,7 +595,7 @@ def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]
attribute_vocabularies = self._collect_vectorizer_vocabularies()
if self._is_any_model_trained(attribute_vocabularies):
# Definitely need to persist some vocabularies
featurizer_file = os.path.join(model_dir, file_name)
featurizer_file = Path(model_dir) / file_name

if self.use_shared_vocab:
# Only persist vocabulary from one attribute. Can be loaded and
Expand Down Expand Up @@ -675,9 +675,9 @@ def load(
) -> "CountVectorsFeaturizer":

file_name = meta.get("file")
featurizer_file = os.path.join(model_dir, file_name)
featurizer_file = Path(model_dir) / file_name

if not os.path.exists(featurizer_file):
if not featurizer_file.exists():
return cls(meta)

vocabulary = io_utils.json_unpickle(featurizer_file)
Expand Down
12 changes: 7 additions & 5 deletions rasa/nlu/featurizers/sparse_featurizer/regex_featurizer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
import os
from pathlib import Path
import re
from typing import Any, Dict, List, Optional, Text, Type, Tuple

Expand Down Expand Up @@ -164,19 +164,21 @@ def load(
) -> "RegexFeaturizer":

file_name = meta.get("file")
regex_file = os.path.join(model_dir, file_name)
regex_file = Path(model_dir) / file_name

if os.path.exists(regex_file):
if regex_file.exists():
known_patterns = rasa.shared.utils.io.read_json_file(regex_file)
return RegexFeaturizer(meta, known_patterns=known_patterns)
else:
return RegexFeaturizer(meta)

def persist(self, file_name: Text, model_dir: Text) -> Optional[Dict[Text, Any]]:
"""Persist this model into the passed directory.
Return the metadata necessary to load the model again."""

Return the metadata necessary to load the model again.
"""
file_name = file_name + ".pkl"
regex_file = os.path.join(model_dir, file_name)
regex_file = Path(model_dir) / file_name
utils.write_json_to_file(regex_file, self.known_patterns, indent=4)

return {"file": file_name}
Loading