From 69c68901a2e7d251be5c43fa785cb44b24f1c875 Mon Sep 17 00:00:00 2001 From: timoeller Date: Tue, 18 Feb 2020 14:56:29 +0100 Subject: [PATCH 1/2] Bugfix parameter loading through config, adjust configs, add bert2.0 eval configs --- .../germEval14_config.json | 66 +++++++++++++++++ .../germEval18Coarse_config.json | 61 ++++++++++++++++ .../germEval18Fine_config.json | 62 ++++++++++++++++ experiments/ner/conll2003_de_config.json | 2 +- experiments/ner/conll2003_en_config.json | 2 +- experiments/ner/germEval14_config.json | 6 +- .../text_classification/cola_config.json | 2 +- .../germEval18Coarse_config.json | 2 +- .../germEval18Fine_config.json | 2 +- .../text_classification/gnad_config.json | 2 +- .../xlm_roberta_eval/conll2003_de_config.json | 2 +- .../xlm_roberta_eval/germEval14_config.json | 2 +- .../germEval18Coarse_config.json | 2 +- farm/experiment.py | 11 ++- farm/file_utils.py | 72 ++++++------------- 15 files changed, 228 insertions(+), 68 deletions(-) create mode 100644 experiments/german-bert2.0-eval/germEval14_config.json create mode 100644 experiments/german-bert2.0-eval/germEval18Coarse_config.json create mode 100644 experiments/german-bert2.0-eval/germEval18Fine_config.json diff --git a/experiments/german-bert2.0-eval/germEval14_config.json b/experiments/german-bert2.0-eval/germEval14_config.json new file mode 100644 index 000000000..6aa05c750 --- /dev/null +++ b/experiments/german-bert2.0-eval/germEval14_config.json @@ -0,0 +1,66 @@ +{ + "general": { + "cache_dir": {"value": null, "default": "", "desc": "Path for storing pre-trained models downloaded from s3."}, + "data_dir": {"value": null, "default": "data/germeval14", "desc": "Input directory for downstream task. Should contain train + test (+ dev) files."}, + "output_dir": {"value": null, "default": "saved_models", "desc": "Output directory where model predictions and checkpoints will be saved."}, + + "cuda": {"value": null, "default": true, "desc": "CUDA flag, uses CUDA if available."}, + "local_rank": {"value": null, "default": -1, "desc": "If local_rank == -1 -> multiGPU mode on one machine, other values signal distributed computation across several nodes (apex install required)."}, + "use_amp": {"value": null, "default": false, "desc": "Automatic mixed precision with APEX. Must be set to null to disable or to any optimisation level (see apex documentation). 'O1' is recommended."}, + "seed": {"value": null, "default": 42, "desc": "Random seed for initializations."} + }, + + "task": { + "name": {"value": null, "default": "GermEval14", "desc": "Name of task."}, + "type": {"value": null, "default": "ner"}, + "language": {"value": null, "default": "de"}, + "do_eval": {"value": null, "default": true, "desc": "Whether to run eval on the dev set."}, + "do_train": {"value": null, "default": true, "desc": "Whether to run training. Can be used to only evaluate on an already trained model."}, + + "processor_name": {"value": null, "default": "NERProcessor", "desc": "A Dataprocessor that is suited for tabular data. Needs special data parameters defined."}, + "dev_split": {"value": null, "default": 0.1, "desc": "Split a dev set from the training set using dev_split as proportion."}, + "train_filename": {"value": null, "default": "train.txt", "desc": "Filename for training."}, + "dev_filename": {"value": null, "default": null, "desc": "Filename for development. Missing in case of GermEval2018."}, + "test_filename": {"value": null, "default": "test.txt", "desc": "Filename for testing. It is the submission file from competition."}, + "delimiter": {"value": null, "default": " ", "desc": "Delimiter used to seprate columns in input data."}, + "label_list": {"value": null, "default": ["[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH"], "desc": ""}, + "metric": {"value": null, "default": "seq_f1", "desc": "Metric used. A f1 scored tailored to sequences of labels."} + }, + + "parameter": { + "model": {"value": "bert-base-german-cased", "default": null, "desc": "Bert pre-trained model selected in the list: bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese."}, + "prediction_head": {"value": null, "default": "TokenClassificationHead", "desc": "Kind of prediction head we use on top of Language Model"}, + "lm_output_type": {"value": null, "default": "per_token", "desc": "Language Model output."}, + "lower_case": {"value": null, "default": false, "desc": "Set to true if you are using an uncased model."}, + "max_seq_len": {"value": null, "default": 128, "desc": "The maximum total input sequence length after WordPiece tokenization. Some GNAD texts even extend beyond 512 tokens."}, + "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, + + "epochs": {"value": null, "default": 4, "desc": "Total number of training epochs to perform."}, + "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on a single V100 GPU."}, + "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, + "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, + "layer_dims": {"value": null, "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} + }, + + "optimizer": { + "learning_rate": {"value": null, "default": 5e-5, "desc": "The learning rate for the optimizer."}, + "optimizer_opts": {"value": null, "default": null, "desc": "Additional optimizer config."}, + "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.4}, "desc": "opts for lr schedule"} + }, + + "logging": { + "eval_every": {"value": null, "default": 60, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, + "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, + + "mlflow_experiment": {"value": "Public_FARM", "default": null, "desc": "Experiment name used for mlflow"}, + "mlflow_run_name": {"value": "germeval14 by config", "default": null, "desc": "Name of the particular run for mlflow"} + } +} + + + + + + + diff --git a/experiments/german-bert2.0-eval/germEval18Coarse_config.json b/experiments/german-bert2.0-eval/germEval18Coarse_config.json new file mode 100644 index 000000000..daa0e465b --- /dev/null +++ b/experiments/german-bert2.0-eval/germEval18Coarse_config.json @@ -0,0 +1,61 @@ +{ + "general": { + "cache_dir": {"value": null, "default": "", "desc": "Path for storing pre-trained models downloaded from s3."}, + "data_dir": {"value": null, "default": "data/germeval18", "desc": "Input directory for downstream task. Should contain train + test (+ dev) files."}, + "output_dir": {"value": null, "default": "saved_models", "desc": "Output directory where model predictions and checkpoints will be saved."}, + + "cuda": {"value": null, "default": true, "desc": "CUDA flag, uses CUDA if available."}, + "local_rank": {"value": null, "default": -1, "desc": "If local_rank == -1 -> multiGPU mode on one machine, other values signal distributed computation across several nodes (apex install required)."}, + "use_amp": {"value": null, "default": null, "desc": "Automatic mixed precision with APEX. Must be set to null to disable or to any optimisation level (see apex documentation). 'O1' is recommended."}, + "seed": {"value": null, "default": 42, "desc": "Random seed for initializations."} + }, + + "task": { + "type": {"value": null, "default": "text_classification"}, + "language": {"value": null, "default": "de"}, + "name": {"value": null, "default": "GermEval18Coarse", "desc": "GermEval18Coarse: binary offensive language detection."}, + "do_eval": {"value": null, "default": true, "desc": "Whether to run eval on the dev set."}, + "do_train": {"value": null, "default": true, "desc": "Whether to run training. Can be used to only evaluate on an already trained model."}, + + "processor_name": {"value": null, "default": "TextClassificationProcessor","desc": "A Dataprocessor that is suited for tabular data. Needs special data parameters defined."}, + "dev_split": {"value": null, "default": 0.1, "desc": "Split a dev set from the training set using dev_split as proportion."}, + "train_filename": {"value": null, "default": "train.tsv", "desc": "Filename for training."}, + "dev_filename": {"value": null, "default": null, "desc": "Filename for development. Missing in case of GermEval2018."}, + "test_filename": {"value": null, "default": "test.tsv", "desc": "Filename for testing. It is the submission file from competition."}, + "delimiter": {"value": null, "default": "\t", "desc": "Filename for testing. It is the submission file from competition."}, + "columns": {"value": null, "default": ["text", "label", "unused"], "desc": "Columns specifying position of text and labels in data files."}, + "label_list": {"value": null, "default": ["OTHER", "OFFENSE"], "desc": "List of possible labels."}, + "metric": {"value": null, "default": "f1_macro", "desc": "Metric used. The competition uses macro averaged f1 score."}, + "label_column_name":{"value": null, "default": "coarse_label", "desc":"Name of field that the label comes from in datasource"}, + "skiprows": {"value": null, "default": null, "desc":""} + }, + "parameter": { + "model": {"value": "bert-base-german-cased", "default": null, "desc": "Bert pre-trained model selected in the list: bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese."}, + "prediction_head": {"value": null, "default": "TextClassificationHead", "desc": "Kind of prediction head we use on top of Language Model"}, + "lm_output_type": {"value": null, "default": "per_sequence", "desc": "Language Model output."}, + "lower_case": {"value": null, "default": false, "desc": "Set to true if you are using an uncased model."}, + "max_seq_len": {"value": null, "default": 150, "desc": "The maximum total input sequence length after WordPiece tokenization. 128 was too short for some texts"}, + "balance_classes": {"value": null, "default": true, "desc": "Balance classes using weighted CrossEntropyLoss. Original train set from GermEval18 is skewed and the final evaluation is macro averaged, so we need to balance for optimal performance.."}, + + "epochs": {"value": null, "default": 2.0, "desc": "Total number of training epochs to perform."}, + "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training on a single V100 GPU."}, + "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, + "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, + "layer_dims": {"value": null, "default": [768, 2], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} + }, + "optimizer": { + "learning_rate": {"value": null, "default": 2e-5, "desc": "The initial learning rate for AdamW."}, + "optimizer_opts": {"value": null, "default": null, "desc": "Additional optimizer config."}, + "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} + }, + "logging": { + "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, + "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, + + "mlflow_experiment": {"value": "Public_FARM", "default": null, "desc": "Experiment name used for mlflow"}, + "mlflow_run_name": {"value": "germeval coarse by config", "default": null, "desc": "Name of the particular run for mlflow"} + } +} + + diff --git a/experiments/german-bert2.0-eval/germEval18Fine_config.json b/experiments/german-bert2.0-eval/germEval18Fine_config.json new file mode 100644 index 000000000..213b61cc9 --- /dev/null +++ b/experiments/german-bert2.0-eval/germEval18Fine_config.json @@ -0,0 +1,62 @@ +{ + "general": { + "cache_dir": {"value": null, "default": "", "desc": "Path for storing pre-trained models downloaded from s3."}, + "data_dir": {"value": null, "default": "data/germeval18", "desc": "Input directory for downstream task. Should contain train + test (+ dev) files."}, + "output_dir": {"value": null, "default": "saved_models", "desc": "Output directory where model predictions and checkpoints will be saved."}, + + "cuda": {"value": null, "default": true, "desc": "CUDA flag, uses CUDA if available."}, + "local_rank": {"value": null, "default": -1, "desc": "If local_rank == -1 -> multiGPU mode on one machine, other values signal distributed computation across several nodes (apex install required)."}, + "use_amp": {"value": null, "default": null, "desc": "Automatic mixed precision with APEX. Must be set to null to disable or to any optimisation level (see apex documentation). 'O1' is recommended."}, + "seed": {"value": null, "default": 42, "desc": "Random seed for initializations."} + }, + + "task": { + "type": {"value": null, "default": "text_classification"}, + "language": {"value": null, "default": "de"}, + "name": {"value": null, "default": "GermEval18Fine", "desc": "GermEval18Fine: Finegrained multiclass offensive language detection, 4 classes."}, + "do_eval": {"value": null, "default": true, "desc": "Whether to run eval on the dev set."}, + "do_train": {"value": null, "default": true, "desc": "Whether to run training. Can be used to only evaluate on an already trained model."}, + + "processor_name": {"value": null, "default": "TextClassificationProcessor","desc": "A Dataprocessor that is suited for tabular data. Needs special data parameters defined."}, + "dev_split": {"value": null, "default": 0.1, "desc": "Split a dev set from the training set using dev_split as proportion."}, + "train_filename": {"value": null, "default": "train.tsv", "desc": "Filename for training."}, + "dev_filename": {"value": null, "default": null, "desc": "Filename for development. Missing in case of GermEval2018."}, + "test_filename": {"value": null, "default": "test.tsv", "desc": "Filename for testing. It is the submission file from competition."}, + "delimiter": {"value": null, "default": "\t", "desc": "Filename for testing. It is the submission file from competition."}, + "columns": {"value": null, "default": ["text", "unused", "label"], "desc": "Columns specifying position of text and labels in data files."}, + "label_list": {"value": null, "default": ["OTHER", "INSULT", "ABUSE", "PROFANITY"],"desc": "List of possible labels."}, + "metric": {"value": null, "default": "f1_macro", "desc": "Metric used. The competition uses macro averaged f1 score."}, + "label_column_name":{"value": null, "default": "fine_label", "desc":"Name of field that the label comes from in datasource"}, + "skiprows": {"value": null, "default": null, "desc":""} + }, + + "parameter": { + "model": {"value": "bert-base-german-cased", "default": null, "desc": "Bert pre-trained model selected in the list: bert-base-uncased, bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, bert-base-multilingual-cased, bert-base-chinese."}, + "prediction_head": {"value": null, "default": "TextClassificationHead", "desc": "Kind of prediction head we use on top of Language Model"}, + "lm_output_type": {"value": null, "default": "per_sequence", "desc": "Language Model output."}, + "lower_case": {"value": null, "default": false, "desc": "Set to true if you are using an uncased model."}, + "max_seq_len": {"value": null, "default": 150, "desc": "The maximum total input sequence length after WordPiece tokenization. 128 was too short for some texts"}, + "balance_classes": {"value": null, "default": true, "desc": "Balance classes using weighted CrossEntropyLoss. Original train set from GermEval18 is skewed and the final evaluation is macro averaged, so we need to balance for optimal performance.."}, + + "epochs": {"value": null, "default": 3.0, "desc": "Total number of training epochs to perform."}, + "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training on a single V100 GPU."}, + "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, + "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, + "layer_dims": {"value": null, "default": [768, 4], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} + }, + "optimizer": { + "learning_rate": {"value": null, "default": 5e-5, "desc": "The initial learning rate for AdamW."}, + "optimizer_opts": {"value": null, "default": null, "desc": "Additional optimizer config."}, + "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} + }, + "logging": { + "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, + "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, + + "mlflow_experiment": {"value": "Public_FARM", "default": null, "desc": "Experiment name used for mlflow"}, + "mlflow_run_name": {"value": "germeval fine by config", "default": null, "desc": "Name of the particular run for mlflow"} + } +} + + diff --git a/experiments/ner/conll2003_de_config.json b/experiments/ner/conll2003_de_config.json index 3d1e538c5..0c563b906 100644 --- a/experiments/ner/conll2003_de_config.json +++ b/experiments/ner/conll2003_de_config.json @@ -34,7 +34,7 @@ "max_seq_len": {"value": null, "default": 128, "desc": "The maximum total input sequence length after WordPiece tokenization. Some GNAD texts even extend beyond 512 tokens."}, "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": null, "default": 2, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on a single V100 GPU."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/ner/conll2003_en_config.json b/experiments/ner/conll2003_en_config.json index 7dd4c0364..1f5282a33 100644 --- a/experiments/ner/conll2003_en_config.json +++ b/experiments/ner/conll2003_en_config.json @@ -36,7 +36,7 @@ "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": null, "default": 2, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on a single V100 GPU."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/ner/germEval14_config.json b/experiments/ner/germEval14_config.json index 625f01c91..6aa05c750 100644 --- a/experiments/ner/germEval14_config.json +++ b/experiments/ner/germEval14_config.json @@ -18,9 +18,9 @@ "do_train": {"value": null, "default": true, "desc": "Whether to run training. Can be used to only evaluate on an already trained model."}, "processor_name": {"value": null, "default": "NERProcessor", "desc": "A Dataprocessor that is suited for tabular data. Needs special data parameters defined."}, - "dev_split": {"value": null, "default": 0.0, "desc": "Split a dev set from the training set using dev_split as proportion."}, + "dev_split": {"value": null, "default": 0.1, "desc": "Split a dev set from the training set using dev_split as proportion."}, "train_filename": {"value": null, "default": "train.txt", "desc": "Filename for training."}, - "dev_filename": {"value": null, "default": "dev.txt", "desc": "Filename for development. Missing in case of GermEval2018."}, + "dev_filename": {"value": null, "default": null, "desc": "Filename for development. Missing in case of GermEval2018."}, "test_filename": {"value": null, "default": "test.txt", "desc": "Filename for testing. It is the submission file from competition."}, "delimiter": {"value": null, "default": " ", "desc": "Delimiter used to seprate columns in input data."}, "label_list": {"value": null, "default": ["[PAD]", "X", "O", "B-MISC", "I-MISC", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC", "B-OTH", "I-OTH"], "desc": ""}, @@ -36,7 +36,7 @@ "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": null, "default": 4, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 64, "desc": "Total batch size for training on a single V100 GPU."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/text_classification/cola_config.json b/experiments/text_classification/cola_config.json index 140908991..72aca0bc7 100644 --- a/experiments/text_classification/cola_config.json +++ b/experiments/text_classification/cola_config.json @@ -37,7 +37,7 @@ "max_seq_len": {"value": null, "default": 64, "desc": "The maximum total input sequence length after WordPiece tokenization. Some GNAD texts even extend beyond 512 tokens."}, "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": null, "default": 2, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 100, "desc": "Total batch size for training for single GPU v100. Only low values possible because of large sequence length. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 100, "desc": ""}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 2], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/text_classification/germEval18Coarse_config.json b/experiments/text_classification/germEval18Coarse_config.json index 8a80f0a67..daa0e465b 100644 --- a/experiments/text_classification/germEval18Coarse_config.json +++ b/experiments/text_classification/germEval18Coarse_config.json @@ -38,7 +38,7 @@ "balance_classes": {"value": null, "default": true, "desc": "Balance classes using weighted CrossEntropyLoss. Original train set from GermEval18 is skewed and the final evaluation is macro averaged, so we need to balance for optimal performance.."}, "epochs": {"value": null, "default": 2.0, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training for single GPU v100. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training on a single V100 GPU."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 2], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/text_classification/germEval18Fine_config.json b/experiments/text_classification/germEval18Fine_config.json index 703ae1400..213b61cc9 100644 --- a/experiments/text_classification/germEval18Fine_config.json +++ b/experiments/text_classification/germEval18Fine_config.json @@ -39,7 +39,7 @@ "balance_classes": {"value": null, "default": true, "desc": "Balance classes using weighted CrossEntropyLoss. Original train set from GermEval18 is skewed and the final evaluation is macro averaged, so we need to balance for optimal performance.."}, "epochs": {"value": null, "default": 3.0, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training for single GPU v100. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 48, "desc": "Total batch size for training on a single V100 GPU."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 4], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/text_classification/gnad_config.json b/experiments/text_classification/gnad_config.json index 065eb7089..e3564cb4e 100644 --- a/experiments/text_classification/gnad_config.json +++ b/experiments/text_classification/gnad_config.json @@ -38,7 +38,7 @@ "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": null, "default": 2, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": null, "default": 8, "desc": "Total batch size for training for single GPU v100. Only low values possible because of large sequence length. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": null, "default": 8, "desc": "Total batch size for training on a single V100 GPU.. Only low values possible because of large sequence length."}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": null, "default": [768, 9], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/xlm_roberta_eval/conll2003_de_config.json b/experiments/xlm_roberta_eval/conll2003_de_config.json index c1340a1f5..adfb22a4f 100644 --- a/experiments/xlm_roberta_eval/conll2003_de_config.json +++ b/experiments/xlm_roberta_eval/conll2003_de_config.json @@ -34,7 +34,7 @@ "max_seq_len": {"value": null, "default": 128, "desc": "The maximum total input sequence length after WordPiece tokenization. Some GNAD texts even extend beyond 512 tokens."}, "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": 4, "default": 5, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": 32, "default": 32, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": 32, "default": 32, "desc": ""}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": [1024, 15], "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/xlm_roberta_eval/germEval14_config.json b/experiments/xlm_roberta_eval/germEval14_config.json index 4989d93cc..bdb7af1bc 100644 --- a/experiments/xlm_roberta_eval/germEval14_config.json +++ b/experiments/xlm_roberta_eval/germEval14_config.json @@ -36,7 +36,7 @@ "balance_classes": {"value": null, "default": false, "desc": "Balance classes using weighted CrossEntropyLoss."}, "epochs": {"value": 5, "default": 4, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": 8, "default": 64, "desc": "Total batch size for training on one V100 GPU. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": 8, "default": 64, "desc": ""}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": [1024, 15], "default": [768, 15], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/experiments/xlm_roberta_eval/germEval18Coarse_config.json b/experiments/xlm_roberta_eval/germEval18Coarse_config.json index 56891de36..d64a9663a 100644 --- a/experiments/xlm_roberta_eval/germEval18Coarse_config.json +++ b/experiments/xlm_roberta_eval/germEval18Coarse_config.json @@ -38,7 +38,7 @@ "balance_classes": {"value": null, "default": true, "desc": "Balance classes using weighted CrossEntropyLoss. Original train set from GermEval18 is skewed and the final evaluation is macro averaged, so we need to balance for optimal performance.."}, "epochs": {"value": 10, "default": 2.0, "desc": "Total number of training epochs to perform."}, - "batch_size": {"value": 8, "default": 48, "desc": "Total batch size for training for single GPU v100. If using multiGPU, the total batch size will be automatically adjusted."}, + "batch_size": {"value": 8, "default": 48, "desc": ""}, "gradient_accumulation_steps": {"value": null, "default": 1, "desc": "Number of updates steps (batches) to accumulate before performing a backward/update pass."}, "embeds_dropout_prob": {"value": null, "default": 0.1, "desc": "Strength of dropout to be applied to the word embeddings generated by the language model."}, "layer_dims": {"value": [1024,2], "default": [768, 2], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} diff --git a/farm/experiment.py b/farm/experiment.py index af2d86ff5..7f699939f 100644 --- a/farm/experiment.py +++ b/farm/experiment.py @@ -23,8 +23,8 @@ def load_experiments(file): - args = read_config(file, flattend=False) - experiments = unnestConfig(args, flattened=False) + args = read_config(file) + experiments = unnestConfig(args) return experiments @@ -54,8 +54,7 @@ def run_experiment(args): args.parameter.batch_size = int( args.parameter.batch_size // args.parameter.gradient_accumulation_steps ) - # if n_gpu > 1: - # args.parameter.batch_size = args.parameter.batch_size * n_gpu + set_all_seeds(args.general.seed) # Prepare Data @@ -94,8 +93,8 @@ def run_experiment(args): ) # Init optimizer - optimizer_opts = dict(args.optimizer.optimizer_opts) if args.optimizer.optimizer_opts else None - schedule_opts = dict(args.optimizer.schedule_opts) if args.optimizer.schedule_opts else None + optimizer_opts = args.optimizer.optimizer_opts.toDict() if args.optimizer.optimizer_opts else None + schedule_opts = args.optimizer.schedule_opts.toDict() if args.optimizer.schedule_opts else None model, optimizer, lr_schedule = initialize_optimizer( model=model, learning_rate=args.optimizer.learning_rate, diff --git a/farm/file_utils.py b/farm/file_utils.py index 03841c52e..11b22ac85 100644 --- a/farm/file_utils.py +++ b/farm/file_utils.py @@ -282,82 +282,54 @@ def get_file_extension(path, dot=True, lower=True): return ext.lower() if lower else ext -def read_config(path, flattend=False): +def read_config(path): if path: with open(path) as json_data_file: conf_args = json.load(json_data_file) else: raise ValueError("No config provided for classifier") - def getArgValue(arg): - if "value" not in arg: - logger.error( - "Only depth 2 config files supported. Failed to convert: %s" % str(arg) - ) - return arg["value"] if (arg["value"] is not None) else arg["default"] - # flatten last part of config, take either value or default as value for gk, gv in conf_args.items(): for k, v in gv.items(): - if isinstance(getArgValue(v), dict): - logger.error("Config is too deeply nested, at %s" % str(v)) - conf_args[gk][k] = getArgValue(v) + conf_args[gk][k] = v["value"] if (v["value"] is not None) else v["default"] # DotMap for making nested dictionary accessible through dot notation - flat_args = dict( - conf_args["general"], - **conf_args["task"], - **conf_args["parameter"], - **conf_args["logging"], - ) - if flattend: - args = DotMap(flat_args, _dynamic=False) - else: - args = DotMap(conf_args, _dynamic=False) + args = DotMap(conf_args, _dynamic=False) return args -def unnestConfig(config, flattened=False): +def unnestConfig(config): """ This function creates a list of config files for evaluating parameters with different values. If a config parameter is of type list this list is iterated over and a config object without lists is returned. Can handle lists inside any number of parameters. - Can handle shallow or nested (one level) configs + Can handle nested (one level) configs """ nestedKeys = [] nestedVals = [] - if flattened: - for k, v in config.items(): - if isinstance(v, list): - if k != "layer_dims": # exclude layer dims, since it is already a list - nestedKeys.append(k) - nestedVals.append(v) - else: - for gk, gv in config.items(): - if(gk != "task"): - for k, v in gv.items(): - if isinstance(v, list): - if isinstance(v, list): - if ( - k != "layer_dims" - ): # exclude layer dims, since it is already a list - nestedKeys.append([gk, k]) - nestedVals.append(v) - elif isinstance(v, dict): - logger.error("Config too deep!") + + for gk, gv in config.items(): + if(gk != "task"): + for k, v in gv.items(): + if isinstance(v, list): + if ( + k != "layer_dims" + ): # exclude layer dims, since it is already a list + nestedKeys.append([gk, k]) + nestedVals.append(v) + elif isinstance(v, dict): + logger.warning("Config too deep! Working on %s" %(str(v))) if len(nestedKeys) == 0: unnestedConfig = [config] else: - if flattened: - logger.info("Nested config at parameters: %s" % (", ".join(nestedKeys))) - else: - logger.info( - "Nested config at parameters: %s" - % (", ".join(".".join(x) for x in nestedKeys)) - ) + logger.info( + "Nested config at parameters: %s" + % (", ".join(".".join(x) for x in nestedKeys)) + ) unnestedConfig = [] mesh = np.meshgrid( *nestedVals @@ -376,7 +348,7 @@ def unnestConfig(config, flattened=False): elif len(k) == 2: tempconfig[k[0]][k[1]] = mesh[j][i] # set nested dictionary keys else: - logger.error("Config too deep!") + logger.warning("Config too deep! Working on %s" %(str(k))) unnestedConfig.append(tempconfig) return unnestedConfig From 03ceab0bb00864a0f181ca11d26ac7fbee9761e8 Mon Sep 17 00:00:00 2001 From: timoeller Date: Tue, 18 Feb 2020 15:03:56 +0100 Subject: [PATCH 2/2] Adjust configs --- experiments/german-bert2.0-eval/germEval18Coarse_config.json | 2 +- experiments/german-bert2.0-eval/germEval18Fine_config.json | 4 ++-- experiments/text_classification/germEval18Coarse_config.json | 2 +- experiments/text_classification/germEval18Fine_config.json | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/experiments/german-bert2.0-eval/germEval18Coarse_config.json b/experiments/german-bert2.0-eval/germEval18Coarse_config.json index daa0e465b..8be6befac 100644 --- a/experiments/german-bert2.0-eval/germEval18Coarse_config.json +++ b/experiments/german-bert2.0-eval/germEval18Coarse_config.json @@ -49,7 +49,7 @@ "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} }, "logging": { - "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "eval_every": {"value": null, "default": 50, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, diff --git a/experiments/german-bert2.0-eval/germEval18Fine_config.json b/experiments/german-bert2.0-eval/germEval18Fine_config.json index 213b61cc9..719da244b 100644 --- a/experiments/german-bert2.0-eval/germEval18Fine_config.json +++ b/experiments/german-bert2.0-eval/germEval18Fine_config.json @@ -45,12 +45,12 @@ "layer_dims": {"value": null, "default": [768, 4], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} }, "optimizer": { - "learning_rate": {"value": null, "default": 5e-5, "desc": "The initial learning rate for AdamW."}, + "learning_rate": {"value": null, "default": 2e-5, "desc": "The initial learning rate for AdamW."}, "optimizer_opts": {"value": null, "default": null, "desc": "Additional optimizer config."}, "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} }, "logging": { - "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "eval_every": {"value": null, "default": 50, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, diff --git a/experiments/text_classification/germEval18Coarse_config.json b/experiments/text_classification/germEval18Coarse_config.json index daa0e465b..8be6befac 100644 --- a/experiments/text_classification/germEval18Coarse_config.json +++ b/experiments/text_classification/germEval18Coarse_config.json @@ -49,7 +49,7 @@ "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} }, "logging": { - "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "eval_every": {"value": null, "default": 50, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."}, diff --git a/experiments/text_classification/germEval18Fine_config.json b/experiments/text_classification/germEval18Fine_config.json index 213b61cc9..719da244b 100644 --- a/experiments/text_classification/germEval18Fine_config.json +++ b/experiments/text_classification/germEval18Fine_config.json @@ -45,12 +45,12 @@ "layer_dims": {"value": null, "default": [768, 4], "desc": "Cannot do experiments on this value, since it is already a list. Dimensions of the prediction head. Needs to be of type String, otherwise it gets iterated over."} }, "optimizer": { - "learning_rate": {"value": null, "default": 5e-5, "desc": "The initial learning rate for AdamW."}, + "learning_rate": {"value": null, "default": 2e-5, "desc": "The initial learning rate for AdamW."}, "optimizer_opts": {"value": null, "default": null, "desc": "Additional optimizer config."}, "schedule_opts": {"value": null, "default": {"name": "LinearWarmup", "warmup_proportion": 0.2}, "desc": "opts for lr schedule"} }, "logging": { - "eval_every": {"value": null, "default": 30, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, + "eval_every": {"value": null, "default": 50, "desc": "Steps per training loop (batches) required for evaluation on dev set. Set to 0 when you do not want to do evaluation on dev set during training."}, "mlflow_url": {"value": "https://public-mlflow.deepset.ai/", "default": null, "desc": "Mlflow server for tracking experiments (e.g. http://80.123.45.167:5000/)"}, "mlflow_nested": {"value": null, "default": true, "desc": "Nesting mlflow experiments. For doing multiple runs across a set of hyperparameters."},