Fix electra (#1291)

* update Dockerfile * fix num_out_files * fix run_electra * Revert "update Dockerfile" This reverts commit 80593a2.
dmlc · Aug 8, 2020 · 9e268c0 · 9e268c0
1 parent c33e62e
commit 9e268c0
Show file tree

Hide file tree

Showing 2 changed files with 6 additions and 6 deletions.
diff --git a/scripts/pretraining/data_preprocessing.py b/scripts/pretraining/data_preprocessing.py
@@ -53,9 +53,7 @@ def main(args):
         random.shuffle(fnames)
     num_files = len(fnames)
     num_out_files = min(args.num_out_files, num_files)
-    file_volume = math.ceil(num_files / num_out_files)
-    splited_files = np.array_split(fnames, file_volume)
-    num_out_files = len(splited_files)
+    splited_files = np.array_split(fnames, num_out_files)
     output_files = [os.path.join(
         args.output, "owt-pretrain-record-{}.npz".format(str(i).zfill(4))) for i in range(num_out_files)]
     print("All preprocessed features will be saved in {} npz files".format(num_out_files))

diff --git a/scripts/pretraining/run_electra.py b/scripts/pretraining/run_electra.py
@@ -472,9 +472,11 @@ def train(args):
         train_end_time - train_start_time))
     if writer is not None:
         writer.close()
-    model_name = args.model_name.replace('google', 'gluon')
-    save_dir = os.path.join(args.output_dir, model_name)
-    final_save(model, save_dir, tokenizer)
+
+    if local_rank == 0:
+        model_name = args.model_name.replace('google', 'gluon')
+        save_dir = os.path.join(args.output_dir, model_name)
+        final_save(model, save_dir, tokenizer)
 
 # TODO(zheyuye), Directly implement a metric for weighted accuracy