Skip to content

Commit

Permalink
Fixes for dependency parsing example script (#430)
Browse files Browse the repository at this point in the history
  • Loading branch information
calpt authored Oct 6, 2022
1 parent ca32936 commit f3be497
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 17 deletions.
37 changes: 23 additions & 14 deletions examples/pytorch/dependency-parsing/run_udp.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ class DataTrainingArguments:
default=False,
metadata={"help": "Overwrite the cached training and evaluation sets."},
)
use_mock_data: bool = field(default=False)
evaluate_on: str = field(default="validation")


def main():
Expand Down Expand Up @@ -241,7 +243,16 @@ def main():
)

# Load and preprocess dataset
dataset = load_dataset("universal_dependencies", data_args.task_name)
if data_args.use_mock_data:
from datasets import Version, load_dataset_builder
from datasets.commands.dummy_data import MockDownloadManager

dataset_builder = load_dataset_builder("universal_dependencies", data_args.task_name)
mock_dl_manager = MockDownloadManager("universal_dependencies", dataset_builder.config, Version("2.7.0"))
dataset_builder.download_and_prepare(dl_manager=mock_dl_manager, ignore_verifications=True)
dataset = dataset_builder.as_dataset()
else:
dataset = load_dataset("universal_dependencies", data_args.task_name)
dataset = preprocess_dataset(dataset, tokenizer, labels, data_args, pad_token_id=-1)

# Initialize our Trainer
Expand All @@ -252,19 +263,21 @@ def main():
model=model,
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["validation"],
eval_dataset=dataset[data_args.evaluate_on],
)

# Training
if training_args.do_train:
trainer.train(
train_result = trainer.train(
model_path=model_args.model_name_or_path if os.path.isdir(model_args.model_name_or_path) else None
)
metrics = train_result.metrics

trainer.save_model()
# For convenience, we also re-save the tokenizer to the same directory,
# so that you can share your model easily on huggingface.co/models =)
if trainer.is_world_process_zero():
tokenizer.save_pretrained(training_args.output_dir)

trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

# Evaluation
results = {}
Expand All @@ -273,16 +286,12 @@ def main():

result = trainer.evaluate()

output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt")
if trainer.is_world_process_zero():
with open(output_eval_file, "w") as writer:
logger.info("***** Eval results *****")
for key, value in result.items():
logger.info(" %s = %s", key, value)
writer.write("%s = %s\n" % (key, value))

results.update(result)

trainer.log_metrics("eval", result)
trainer.save_metrics("eval", result)

# Predict
if training_args.do_predict:
logging.info("*** Test ***")
Expand Down
21 changes: 18 additions & 3 deletions examples/pytorch/dependency-parsing/utils_udp.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ def evaluate(
self,
eval_dataset: Optional[Dataset] = None,
prediction_loss_only: Optional[bool] = None,
metric_key_prefix: str = "eval",
) -> Dict[str, float]:
"""
Run evaluation and return metrics.
Expand All @@ -228,7 +229,12 @@ def evaluate(
"""
eval_dataloader = self.get_eval_dataloader(eval_dataset)

output = self._prediction_loop(eval_dataloader, description="Evaluation")
output = self._prediction_loop(
eval_dataloader,
description="Evaluation",
prediction_loss_only=prediction_loss_only,
metric_key_prefix=metric_key_prefix,
)

if self.args.store_best_model:
self.store_best_model(output)
Expand Down Expand Up @@ -297,7 +303,11 @@ def store_best_model(self, output):
f.write(str(output.metrics))

def _prediction_loop(
self, dataloader: DataLoader, description: str, prediction_loss_only: Optional[bool] = None
self,
dataloader: DataLoader,
description: str,
prediction_loss_only: Optional[bool] = None,
metric_key_prefix: str = "eval",
) -> PredictionOutput:
"""
Prediction/evaluation loop, shared by :obj:`Trainer.evaluate()` and :obj:`Trainer.predict()`.
Expand Down Expand Up @@ -351,7 +361,12 @@ def _prediction_loop(
metric.add(labels_arcs, labels_rels, predictions_arcs, predictions_rels)

results = metric.get_metric()
results[f"{description}_loss"] = np.mean(eval_losses)
results[f"{metric_key_prefix}_loss"] = np.mean(eval_losses)

# Prefix all keys with metric_key_prefix + '_'
for key in list(results.keys()):
if not key.startswith(f"{metric_key_prefix}_"):
results[f"{metric_key_prefix}_{key}"] = results.pop(key)

# Add predictions_rels to output, even though we are only interested in the metrics
return PredictionOutput(predictions=predictions_rels, label_ids=None, metrics=results)
Expand Down
32 changes: 32 additions & 0 deletions examples/pytorch/test_adapter_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"question-answering",
"summarization",
"translation",
"dependency-parsing",
]
]
sys.path.extend(SRC_DIRS)
Expand All @@ -37,6 +38,7 @@
import run_summarization
import run_swag
import run_translation
import run_udp

logging.basicConfig(level=logging.DEBUG)

Expand Down Expand Up @@ -368,3 +370,33 @@ def test_run_ner_adapter(self):
self.assertGreaterEqual(result["eval_accuracy"], 0.75)
self.assertGreaterEqual(result["eval_precision"], 0.75)
self.assertLess(result["eval_loss"], 0.5)

def test_run_udp_adapter(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
run_udp.py
--model_name_or_path bert-base-uncased
--do_train
--do_eval
--task_name en_ewt
--use_mock_data
--evaluate_on train
--per_device_train_batch_size=2
--per_device_eval_batch_size=1
--learning_rate=5e-4
--max_steps=10
--output_dir {tmp_dir}
--overwrite_output_dir
--train_adapter
""".split()

if torch_device != "cuda":
testargs.append("--no_cuda")

with patch.object(sys, "argv", testargs):
run_udp.main()
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_uas"], 100.0)
31 changes: 31 additions & 0 deletions examples/pytorch/test_pytorch_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
"speech-pretraining",
"image-pretraining",
"semantic-segmentation",
"dependency-parsing",
]
]
sys.path.extend(SRC_DIRS)
Expand All @@ -67,6 +68,7 @@
import run_summarization
import run_swag
import run_translation
import run_udp
import run_wav2vec2_pretraining_no_trainer


Expand Down Expand Up @@ -586,3 +588,32 @@ def test_run_semantic_segmentation(self):
run_semantic_segmentation.main()
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_overall_accuracy"], 0.1)

def test_run_udp(self):
stream_handler = logging.StreamHandler(sys.stdout)
logger.addHandler(stream_handler)

tmp_dir = self.get_auto_remove_tmp_dir()
testargs = f"""
run_udp.py
--model_name_or_path bert-base-uncased
--do_train
--do_eval
--task_name en_ewt
--use_mock_data
--evaluate_on train
--per_device_train_batch_size=2
--per_device_eval_batch_size=1
--learning_rate=5e-4
--max_steps=10
--output_dir {tmp_dir}
--overwrite_output_dir
""".split()

if torch_device != "cuda":
testargs.append("--no_cuda")

with patch.object(sys, "argv", testargs):
run_udp.main()
result = get_results(tmp_dir)
self.assertGreaterEqual(result["eval_uas"], 100.0)

0 comments on commit f3be497

Please sign in to comment.