From 7e7ec273e23c406d4d520d1b0e004d62e062f45e Mon Sep 17 00:00:00 2001 From: Max Kovalenko Date: Wed, 20 Nov 2024 18:37:22 +0200 Subject: [PATCH 1/3] Enable torch compile on _allgather_params * Previosuly ZerO3 was crashing when trying to compile _allgather_params * Disabling grad solves the issue --- deepspeed/runtime/zero/partition_parameters.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index 22a6746bb57c..cb0cd7c8017d 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -1882,6 +1882,7 @@ def _allgather_params_coalesced(self, param_list, hierarchy=0, quantize=False): return None + @torch.no_grad() def _allgather_params(self, param_list, hierarchy=0): if len(param_list) == 0: return From 612056c0d58641375e5166667546c5eac36a17b1 Mon Sep 17 00:00:00 2001 From: Max Kovalenko Date: Thu, 21 Nov 2024 16:11:05 +0200 Subject: [PATCH 2/3] Dummy commit --- deepspeed/runtime/zero/partition_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index cb0cd7c8017d..e382860ac805 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -1882,7 +1882,7 @@ def _allgather_params_coalesced(self, param_list, hierarchy=0, quantize=False): return None - @torch.no_grad() + @torch.no_grad() # Enable torch compile def _allgather_params(self, param_list, hierarchy=0): if len(param_list) == 0: return From 4953eb94c9f335a80301d626d10393e635fcf359 Mon Sep 17 00:00:00 2001 From: Max Kovalenko Date: Thu, 21 Nov 2024 16:13:01 +0200 Subject: [PATCH 3/3] Dummy commit --- deepspeed/runtime/zero/partition_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index e382860ac805..cb0cd7c8017d 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -1882,7 +1882,7 @@ def _allgather_params_coalesced(self, param_list, hierarchy=0, quantize=False): return None - @torch.no_grad() # Enable torch compile + @torch.no_grad() def _allgather_params(self, param_list, hierarchy=0): if len(param_list) == 0: return