Type cast strings to Path objects

deepset-ai · Jan 16, 2020 · a380d99 · a380d99
1 parent a2e32fc
commit a380d99
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 15 deletions.
diff --git a/examples/doc_classification_cola.py b/examples/doc_classification_cola.py
@@ -1,5 +1,6 @@
 # fmt: off
 import logging
+from pathlib import Path
 
 from farm.data_handler.data_silo import DataSilo
 from farm.data_handler.processor import TextClassificationProcessor
@@ -44,8 +45,8 @@ def doc_classification_cola():
 
     processor = TextClassificationProcessor(tokenizer=tokenizer,
                                             max_seq_len=64,
-                                            data_dir="../data/cola",
-                                            dev_filename="dev.tsv",
+                                            data_dir=Path("../data/cola"),
+                                            dev_filename=Path("dev.tsv"),
                                             dev_split=None,
                                             test_filename=None,
                                             label_list=label_list,
@@ -97,7 +98,7 @@ def doc_classification_cola():
     model = trainer.train(model)
 
     # 8. Hooray! You have a model. Store it:
-    save_dir = "saved_models/bert-doc-tutorial"
+    save_dir = Path("saved_models/bert-doc-tutorial")
     model.save(save_dir)
     processor.save(save_dir)
 

diff --git a/examples/doc_classification_crossvalidation.py b/examples/doc_classification_crossvalidation.py
@@ -1,6 +1,7 @@
 # fmt: off
 import logging
 import json
+from pathlib import Path
 
 from farm.data_handler.data_silo import DataSilo, DataSiloForCrossVal
 from farm.data_handler.processor import TextClassificationProcessor
@@ -83,7 +84,7 @@ def mymetrics(preds, labels):
     label_list = ["OTHER", "OFFENSE"]
     processor = TextClassificationProcessor(tokenizer=tokenizer,
                                             max_seq_len=64,
-                                            data_dir="../data/germeval18",
+                                            data_dir=Path("../data/germeval18"),
                                             label_list=label_list,
                                             metric=metric,
                                             label_column_name="coarse_label"
@@ -163,7 +164,7 @@ def train_on_split(silo_to_use, n_fold, save_dir):
     all_labels = []
     bestfold = None
     bestf1_offense = -1
-    save_dir = "saved_models/bert-german-doc-tutorial-es"
+    save_dir = Path("saved_models/bert-german-doc-tutorial-es")
     for num_fold, silo in enumerate(silos):
         model = train_on_split(silo, num_fold, save_dir)
 
@@ -215,7 +216,7 @@ def train_on_split(silo_to_use, n_fold, save_dir):
     )
     # restore model from the best fold
     lm_name = model.language_model.name
-    save_dir = "saved_models/bert-german-doc-tutorial-es-{}".format(bestfold)
+    save_dir = Path(f"saved_models/bert-german-doc-tutorial-es-{bestfold}")
     model = AdaptiveModel.load(save_dir, device, lm_name=lm_name)
     model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True)
 

diff --git a/examples/doc_classification_multilabel_roberta.py b/examples/doc_classification_multilabel_roberta.py
@@ -1,5 +1,6 @@
 # fmt: off
 import logging
+from pathlib import Path
 
 from farm.data_handler.data_silo import DataSilo
 from farm.data_handler.processor import TextClassificationProcessor
@@ -45,14 +46,14 @@ def doc_classification_multilabel_roberta():
 
     processor = TextClassificationProcessor(tokenizer=tokenizer,
                                             max_seq_len=128,
-                                            data_dir="../data/toxic-comments",
+                                            data_dir=Path("../data/toxic-comments"),
                                             label_list=label_list,
                                             label_column_name="label",
                                             metric=metric,
                                             quote_char='"',
                                             multilabel=True,
-                                            train_filename="train.tsv",
-                                            dev_filename="val.tsv",
+                                            train_filename=Path("train.tsv"),
+                                            dev_filename=Path("val.tsv"),
                                             test_filename=None,
                                             dev_split=0
                                             )
@@ -97,7 +98,7 @@ def doc_classification_multilabel_roberta():
     model = trainer.train(model)
 
     # 8. Hooray! You have a model. Store it:
-    save_dir = "saved_models/bert-multi-doc-roberta"
+    save_dir = Path("saved_models/bert-multi-doc-roberta")
     model.save(save_dir)
     processor.save(save_dir)
 

diff --git a/examples/doc_classification_with_earlystopping.py b/examples/doc_classification_with_earlystopping.py
@@ -1,5 +1,6 @@
 # fmt: off
 import logging
+from pathlib import Path
 
 from farm.data_handler.data_silo import DataSilo
 from farm.data_handler.processor import TextClassificationProcessor
@@ -65,7 +66,7 @@ def mymetrics(preds, labels):
 
     processor = TextClassificationProcessor(tokenizer=tokenizer,
                                             max_seq_len=64,
-                                            data_dir="../data/germeval18",
+                                            data_dir=Path("../data/germeval18"),
                                             label_list=label_list,
                                             metric=metric,
                                             label_column_name="coarse_label"
@@ -110,7 +111,7 @@ def mymetrics(preds, labels):
         metric="f1_offense", mode="max",   # use the metric from our own metrics function instead of loss
         # metric="f1_macro", mode="max",  # use f1_macro from the dev evaluator of the trainer
         # metric="loss", mode="min",   # use loss from the dev evaluator of the trainer
-        save_dir="saved_models/bert-german-doc-tutorial-es",  # where to save the best model
+        save_dir=Path("saved_models/bert-german-doc-tutorial-es"),  # where to save the best model
         patience=5    # number of evaluations to wait for improvement before terminating the training
     )
 
@@ -132,7 +133,7 @@ def mymetrics(preds, labels):
     # defined with the EarlyStopping instance
     # The model we have at this moment is the model from the last training epoch that was carried
     # out before early stopping terminated the training
-    save_dir = "saved_models/bert-german-doc-tutorial"
+    save_dir = Path("saved_models/bert-german-doc-tutorial")
     model.save(save_dir)
     processor.save(save_dir)
 

diff --git a/examples/doc_regression.py b/examples/doc_regression.py
@@ -1,5 +1,6 @@
 # fmt: off
 import logging
+from pathlib import Path
 
 from farm.data_handler.data_silo import DataSilo
 from farm.data_handler.processor import RegressionProcessor
@@ -41,7 +42,7 @@ def doc_regression():
     #    We do not have a sample dataset for regression yet, add your own dataset to run the example
     processor = RegressionProcessor(tokenizer=tokenizer,
                                     max_seq_len=128,
-                                    data_dir="../data/<YOUR-DATASET>",
+                                    data_dir=Path("../data/<YOUR-DATASET>"),
                                     label_column_name="label"
                                     )
 
@@ -85,7 +86,7 @@ def doc_regression():
     model = trainer.train(model)
 
     # 8. Hooray! You have a model. Store it:
-    save_dir = "saved_models/bert-doc-regression-tutorial"
+    save_dir = Path("saved_models/bert-doc-regression-tutorial")
     model.save(save_dir)
     processor.save(save_dir)