diff --git a/docs/source/usage_guides/deepspeed.md b/docs/source/usage_guides/deepspeed.md index e6b0abd7d5f..a3f00e57870 100644 --- a/docs/source/usage_guides/deepspeed.md +++ b/docs/source/usage_guides/deepspeed.md @@ -433,7 +433,7 @@ Only the `auto` fields specified in above examples are handled by `prepare` meth The `auto` values are calculated as: - `reduce_bucket_size`: `hidden_size * hidden_size` -- `stage3_prefetch_bucket_size`: `0.9 * hidden_size * hidden_size` +- `stage3_prefetch_bucket_size`: `int(0.9 * hidden_size * hidden_size)` - `stage3_param_persistence_threshold`: `10 * hidden_size` For the `auto` feature to work for these 3 config entries - Accelerate will use `model.config.hidden_size` or `max(model.config.hidden_sizes)` as `hidden_size`. If neither of these is available, the launching will fail and you will have to set these 3 config entries manually. Remember the first 2 config entries are the communication buffers - the larger they are the more efficient the comms will be, and the larger they are the more GPU memory they will consume, so it's a tunable performance trade-off. diff --git a/src/accelerate/accelerator.py b/src/accelerate/accelerator.py index ef95c5ec049..5bd90acecfc 100755 --- a/src/accelerate/accelerator.py +++ b/src/accelerate/accelerator.py @@ -1706,7 +1706,7 @@ def _prepare_deepspeed(self, *args): config_kwargs.update( { "zero_optimization.reduce_bucket_size": hidden_size * hidden_size, - "zero_optimization.stage3_prefetch_bucket_size": 0.9 * hidden_size * hidden_size, + "zero_optimization.stage3_prefetch_bucket_size": int(0.9 * hidden_size * hidden_size), "zero_optimization.stage3_param_persistence_threshold": 10 * hidden_size, } ) diff --git a/tests/deepspeed/test_deepspeed.py b/tests/deepspeed/test_deepspeed.py index 8d593a6df6f..858ff154474 100644 --- a/tests/deepspeed/test_deepspeed.py +++ b/tests/deepspeed/test_deepspeed.py @@ -638,7 +638,7 @@ def test_autofill_dsconfig(self): assert config["gradient_clipping"] == 1.0 assert config["zero_optimization"]["reduce_bucket_size"] == (hidden_size * hidden_size) - assert config["zero_optimization"]["stage3_prefetch_bucket_size"] == ((0.9 * hidden_size) * hidden_size) + assert config["zero_optimization"]["stage3_prefetch_bucket_size"] == int((0.9 * hidden_size) * hidden_size) assert config["zero_optimization"]["stage3_param_persistence_threshold"] == (10 * hidden_size) assert not config["zero_optimization"]["stage3_gather_16bit_weights_on_model_save"] @@ -695,7 +695,7 @@ def test_autofill_comm_buffers_dsconfig(self, model_type): ) zero_opt = accelerator.deepspeed_config["zero_optimization"] assert zero_opt["reduce_bucket_size"] == (hidden_size * hidden_size) - assert zero_opt["stage3_prefetch_bucket_size"] == (0.9 * hidden_size) * hidden_size + assert zero_opt["stage3_prefetch_bucket_size"] == int((0.9 * hidden_size) * hidden_size) assert zero_opt["stage3_param_persistence_threshold"] == (10 * hidden_size) @parameterized.expand([FP16, BF16], name_func=parameterized_custom_name_func)