From 471fbbca598661c2b4ebcc06ba623c384e1aa6a3 Mon Sep 17 00:00:00 2001 From: Emmanuel Benazera Date: Tue, 7 May 2024 12:40:12 +0000 Subject: [PATCH] fix: multi-gpu ddp collective mismatch upon resume --- train.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/train.py b/train.py index e284392d9..4ac07c3c2 100644 --- a/train.py +++ b/train.py @@ -127,14 +127,13 @@ def train_gpu(rank, world_size, opt, trainset, trainset_temporal): if opt.output_display_env == "": opt.output_display_env = opt.name - if rank_0: - visualizer = Visualizer( - opt - ) # create a visualizer that display/save images and plots + visualizer = Visualizer( + opt + ) # create a visualizer that display/save images and plots - if opt.train_continue: - opt.train_epoch_count = visualizer.load_data() - opt.total_iters = opt.train_epoch_count * trainset_size + if opt.train_continue: + opt.train_epoch_count = visualizer.load_data() + opt.total_iters = opt.train_epoch_count * trainset_size opt.optim = optim # set optimizer model = create_model(opt, rank) # create a model given opt.model and other options