Skip to content

Commit

Permalink
Type cast strings to Path objects
Browse files Browse the repository at this point in the history
  • Loading branch information
tanaysoni committed Jan 16, 2020
1 parent a2e32fc commit a380d99
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 15 deletions.
7 changes: 4 additions & 3 deletions examples/doc_classification_cola.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# fmt: off
import logging
from pathlib import Path

from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import TextClassificationProcessor
Expand Down Expand Up @@ -44,8 +45,8 @@ def doc_classification_cola():

processor = TextClassificationProcessor(tokenizer=tokenizer,
max_seq_len=64,
data_dir="../data/cola",
dev_filename="dev.tsv",
data_dir=Path("../data/cola"),
dev_filename=Path("dev.tsv"),
dev_split=None,
test_filename=None,
label_list=label_list,
Expand Down Expand Up @@ -97,7 +98,7 @@ def doc_classification_cola():
model = trainer.train(model)

# 8. Hooray! You have a model. Store it:
save_dir = "saved_models/bert-doc-tutorial"
save_dir = Path("saved_models/bert-doc-tutorial")
model.save(save_dir)
processor.save(save_dir)

Expand Down
7 changes: 4 additions & 3 deletions examples/doc_classification_crossvalidation.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# fmt: off
import logging
import json
from pathlib import Path

from farm.data_handler.data_silo import DataSilo, DataSiloForCrossVal
from farm.data_handler.processor import TextClassificationProcessor
Expand Down Expand Up @@ -83,7 +84,7 @@ def mymetrics(preds, labels):
label_list = ["OTHER", "OFFENSE"]
processor = TextClassificationProcessor(tokenizer=tokenizer,
max_seq_len=64,
data_dir="../data/germeval18",
data_dir=Path("../data/germeval18"),
label_list=label_list,
metric=metric,
label_column_name="coarse_label"
Expand Down Expand Up @@ -163,7 +164,7 @@ def train_on_split(silo_to_use, n_fold, save_dir):
all_labels = []
bestfold = None
bestf1_offense = -1
save_dir = "saved_models/bert-german-doc-tutorial-es"
save_dir = Path("saved_models/bert-german-doc-tutorial-es")
for num_fold, silo in enumerate(silos):
model = train_on_split(silo, num_fold, save_dir)

Expand Down Expand Up @@ -215,7 +216,7 @@ def train_on_split(silo_to_use, n_fold, save_dir):
)
# restore model from the best fold
lm_name = model.language_model.name
save_dir = "saved_models/bert-german-doc-tutorial-es-{}".format(bestfold)
save_dir = Path(f"saved_models/bert-german-doc-tutorial-es-{bestfold}")
model = AdaptiveModel.load(save_dir, device, lm_name=lm_name)
model.connect_heads_with_processor(data_silo.processor.tasks, require_labels=True)

Expand Down
9 changes: 5 additions & 4 deletions examples/doc_classification_multilabel_roberta.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# fmt: off
import logging
from pathlib import Path

from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import TextClassificationProcessor
Expand Down Expand Up @@ -45,14 +46,14 @@ def doc_classification_multilabel_roberta():

processor = TextClassificationProcessor(tokenizer=tokenizer,
max_seq_len=128,
data_dir="../data/toxic-comments",
data_dir=Path("../data/toxic-comments"),
label_list=label_list,
label_column_name="label",
metric=metric,
quote_char='"',
multilabel=True,
train_filename="train.tsv",
dev_filename="val.tsv",
train_filename=Path("train.tsv"),
dev_filename=Path("val.tsv"),
test_filename=None,
dev_split=0
)
Expand Down Expand Up @@ -97,7 +98,7 @@ def doc_classification_multilabel_roberta():
model = trainer.train(model)

# 8. Hooray! You have a model. Store it:
save_dir = "saved_models/bert-multi-doc-roberta"
save_dir = Path("saved_models/bert-multi-doc-roberta")
model.save(save_dir)
processor.save(save_dir)

Expand Down
7 changes: 4 additions & 3 deletions examples/doc_classification_with_earlystopping.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# fmt: off
import logging
from pathlib import Path

from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import TextClassificationProcessor
Expand Down Expand Up @@ -65,7 +66,7 @@ def mymetrics(preds, labels):

processor = TextClassificationProcessor(tokenizer=tokenizer,
max_seq_len=64,
data_dir="../data/germeval18",
data_dir=Path("../data/germeval18"),
label_list=label_list,
metric=metric,
label_column_name="coarse_label"
Expand Down Expand Up @@ -110,7 +111,7 @@ def mymetrics(preds, labels):
metric="f1_offense", mode="max", # use the metric from our own metrics function instead of loss
# metric="f1_macro", mode="max", # use f1_macro from the dev evaluator of the trainer
# metric="loss", mode="min", # use loss from the dev evaluator of the trainer
save_dir="saved_models/bert-german-doc-tutorial-es", # where to save the best model
save_dir=Path("saved_models/bert-german-doc-tutorial-es"), # where to save the best model
patience=5 # number of evaluations to wait for improvement before terminating the training
)

Expand All @@ -132,7 +133,7 @@ def mymetrics(preds, labels):
# defined with the EarlyStopping instance
# The model we have at this moment is the model from the last training epoch that was carried
# out before early stopping terminated the training
save_dir = "saved_models/bert-german-doc-tutorial"
save_dir = Path("saved_models/bert-german-doc-tutorial")
model.save(save_dir)
processor.save(save_dir)

Expand Down
5 changes: 3 additions & 2 deletions examples/doc_regression.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# fmt: off
import logging
from pathlib import Path

from farm.data_handler.data_silo import DataSilo
from farm.data_handler.processor import RegressionProcessor
Expand Down Expand Up @@ -41,7 +42,7 @@ def doc_regression():
# We do not have a sample dataset for regression yet, add your own dataset to run the example
processor = RegressionProcessor(tokenizer=tokenizer,
max_seq_len=128,
data_dir="../data/<YOUR-DATASET>",
data_dir=Path("../data/<YOUR-DATASET>"),
label_column_name="label"
)

Expand Down Expand Up @@ -85,7 +86,7 @@ def doc_regression():
model = trainer.train(model)

# 8. Hooray! You have a model. Store it:
save_dir = "saved_models/bert-doc-regression-tutorial"
save_dir = Path("saved_models/bert-doc-regression-tutorial")
model.save(save_dir)
processor.save(save_dir)

Expand Down

0 comments on commit a380d99

Please sign in to comment.