diff --git a/invokeai/backend/ip_adapter/ip_adapter.py b/invokeai/backend/ip_adapter/ip_adapter.py index 75286f4733b..87ce029a875 100644 --- a/invokeai/backend/ip_adapter/ip_adapter.py +++ b/invokeai/backend/ip_adapter/ip_adapter.py @@ -124,16 +124,14 @@ def __init__( self.device, dtype=self.dtype ) - def to( - self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None, non_blocking: bool = False - ): + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None): if device is not None: self.device = device if dtype is not None: self.dtype = dtype - self._image_proj_model.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking) - self.attn_weights.to(device=self.device, dtype=self.dtype, non_blocking=non_blocking) + self._image_proj_model.to(device=self.device, dtype=self.dtype) + self.attn_weights.to(device=self.device, dtype=self.dtype) def calc_size(self) -> int: # HACK(ryand): Fix this issue with circular imports. diff --git a/invokeai/backend/lora.py b/invokeai/backend/lora.py index 9c669a4c789..8ef81915f14 100644 --- a/invokeai/backend/lora.py +++ b/invokeai/backend/lora.py @@ -11,7 +11,6 @@ from invokeai.backend.model_manager import BaseModelType from invokeai.backend.raw_model import RawModel -from invokeai.backend.util.devices import TorchDevice class LoRALayerBase: @@ -57,14 +56,9 @@ def calc_size(self) -> int: model_size += val.nelement() * val.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: if self.bias is not None: - self.bias = self.bias.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.bias = self.bias.to(device=device, dtype=dtype) # TODO: find and debug lora/locon with bias @@ -106,19 +100,14 @@ def calc_size(self) -> int: model_size += val.nelement() * val.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: - super().to(device=device, dtype=dtype, non_blocking=non_blocking) + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: + super().to(device=device, dtype=dtype) - self.up = self.up.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.down = self.down.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.up = self.up.to(device=device, dtype=dtype) + self.down = self.down.to(device=device, dtype=dtype) if self.mid is not None: - self.mid = self.mid.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.mid = self.mid.to(device=device, dtype=dtype) class LoHALayer(LoRALayerBase): @@ -167,23 +156,18 @@ def calc_size(self) -> int: model_size += val.nelement() * val.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: super().to(device=device, dtype=dtype) - self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.w1_a = self.w1_a.to(device=device, dtype=dtype) + self.w1_b = self.w1_b.to(device=device, dtype=dtype) if self.t1 is not None: - self.t1 = self.t1.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.t1 = self.t1.to(device=device, dtype=dtype) - self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.w2_a = self.w2_a.to(device=device, dtype=dtype) + self.w2_b = self.w2_b.to(device=device, dtype=dtype) if self.t2 is not None: - self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.t2 = self.t2.to(device=device, dtype=dtype) class LoKRLayer(LoRALayerBase): @@ -264,12 +248,7 @@ def calc_size(self) -> int: model_size += val.nelement() * val.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: super().to(device=device, dtype=dtype) if self.w1 is not None: @@ -277,19 +256,19 @@ def to( else: assert self.w1_a is not None assert self.w1_b is not None - self.w1_a = self.w1_a.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.w1_b = self.w1_b.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.w1_a = self.w1_a.to(device=device, dtype=dtype) + self.w1_b = self.w1_b.to(device=device, dtype=dtype) if self.w2 is not None: - self.w2 = self.w2.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.w2 = self.w2.to(device=device, dtype=dtype) else: assert self.w2_a is not None assert self.w2_b is not None - self.w2_a = self.w2_a.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.w2_b = self.w2_b.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.w2_a = self.w2_a.to(device=device, dtype=dtype) + self.w2_b = self.w2_b.to(device=device, dtype=dtype) if self.t2 is not None: - self.t2 = self.t2.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.t2 = self.t2.to(device=device, dtype=dtype) class FullLayer(LoRALayerBase): @@ -319,15 +298,10 @@ def calc_size(self) -> int: model_size += self.weight.nelement() * self.weight.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: super().to(device=device, dtype=dtype) - self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.weight = self.weight.to(device=device, dtype=dtype) class IA3Layer(LoRALayerBase): @@ -359,16 +333,11 @@ def calc_size(self) -> int: model_size += self.on_input.nelement() * self.on_input.element_size() return model_size - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ): + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None): super().to(device=device, dtype=dtype) - self.weight = self.weight.to(device=device, dtype=dtype, non_blocking=non_blocking) - self.on_input = self.on_input.to(device=device, dtype=dtype, non_blocking=non_blocking) + self.weight = self.weight.to(device=device, dtype=dtype) + self.on_input = self.on_input.to(device=device, dtype=dtype) AnyLoRALayer = Union[LoRALayer, LoHALayer, LoKRLayer, FullLayer, IA3Layer] @@ -390,15 +359,10 @@ def __init__( def name(self) -> str: return self._name - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: # TODO: try revert if exception? for _key, layer in self.layers.items(): - layer.to(device=device, dtype=dtype, non_blocking=non_blocking) + layer.to(device=device, dtype=dtype) def calc_size(self) -> int: model_size = 0 @@ -521,7 +485,7 @@ def from_checkpoint( # lower memory consumption by removing already parsed layer values state_dict[layer_key].clear() - layer.to(device=device, dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device)) + layer.to(device=device, dtype=dtype) model.layers[layer_key] = layer return model diff --git a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py index 9027b7b5b76..e69201e7391 100644 --- a/invokeai/backend/model_manager/load/model_cache/model_cache_default.py +++ b/invokeai/backend/model_manager/load/model_cache/model_cache_default.py @@ -289,11 +289,9 @@ def move_model_to_device(self, cache_entry: CacheRecord[AnyModel], target_device else: new_dict: Dict[str, torch.Tensor] = {} for k, v in cache_entry.state_dict.items(): - new_dict[k] = v.to( - target_device, copy=True, non_blocking=TorchDevice.get_non_blocking(target_device) - ) + new_dict[k] = v.to(target_device, copy=True) cache_entry.model.load_state_dict(new_dict, assign=True) - cache_entry.model.to(target_device, non_blocking=TorchDevice.get_non_blocking(target_device)) + cache_entry.model.to(target_device) cache_entry.device = target_device except Exception as e: # blow away cache entry self._delete_cache_entry(cache_entry) diff --git a/invokeai/backend/model_patcher.py b/invokeai/backend/model_patcher.py index 8c7a62c3719..8b8aa6d5a58 100644 --- a/invokeai/backend/model_patcher.py +++ b/invokeai/backend/model_patcher.py @@ -139,15 +139,12 @@ def apply_lora( # We intentionally move to the target device first, then cast. Experimentally, this was found to # be significantly faster for 16-bit CPU tensors being moved to a CUDA device than doing the # same thing in a single call to '.to(...)'. - layer.to(device=device, non_blocking=TorchDevice.get_non_blocking(device)) - layer.to(dtype=torch.float32, non_blocking=TorchDevice.get_non_blocking(device)) + layer.to(device=device) + layer.to(dtype=torch.float32) # TODO(ryand): Using torch.autocast(...) over explicit casting may offer a speed benefit on CUDA # devices here. Experimentally, it was found to be very slow on CPU. More investigation needed. layer_weight = layer.get_weight(module.weight) * (lora_weight * layer_scale) - layer.to( - device=TorchDevice.CPU_DEVICE, - non_blocking=TorchDevice.get_non_blocking(TorchDevice.CPU_DEVICE), - ) + layer.to(device=TorchDevice.CPU_DEVICE) assert isinstance(layer_weight, torch.Tensor) # mypy thinks layer_weight is a float|Any ??! if module.weight.shape != layer_weight.shape: @@ -156,7 +153,7 @@ def apply_lora( layer_weight = layer_weight.reshape(module.weight.shape) assert isinstance(layer_weight, torch.Tensor) # mypy thinks layer_weight is a float|Any ??! - module.weight += layer_weight.to(dtype=dtype, non_blocking=TorchDevice.get_non_blocking(device)) + module.weight += layer_weight.to(dtype=dtype) yield # wait for context manager exit @@ -164,9 +161,7 @@ def apply_lora( assert hasattr(model, "get_submodule") # mypy not picking up fact that torch.nn.Module has get_submodule() with torch.no_grad(): for module_key, weight in original_weights.items(): - model.get_submodule(module_key).weight.copy_( - weight, non_blocking=TorchDevice.get_non_blocking(weight.device) - ) + model.get_submodule(module_key).weight.copy_(weight) @classmethod @contextmanager diff --git a/invokeai/backend/onnx/onnx_runtime.py b/invokeai/backend/onnx/onnx_runtime.py index d562a46dffa..a8132d4b233 100644 --- a/invokeai/backend/onnx/onnx_runtime.py +++ b/invokeai/backend/onnx/onnx_runtime.py @@ -190,12 +190,7 @@ def __call__(self, **kwargs): return self.session.run(None, inputs) # compatability with RawModel ABC - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: pass # compatability with diffusers load code diff --git a/invokeai/backend/raw_model.py b/invokeai/backend/raw_model.py index 7bca6945d98..931804c985d 100644 --- a/invokeai/backend/raw_model.py +++ b/invokeai/backend/raw_model.py @@ -20,10 +20,5 @@ class RawModel(ABC): """Abstract base class for 'Raw' model wrappers.""" @abstractmethod - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: pass diff --git a/invokeai/backend/textual_inversion.py b/invokeai/backend/textual_inversion.py index 483f2da88c3..0345478b975 100644 --- a/invokeai/backend/textual_inversion.py +++ b/invokeai/backend/textual_inversion.py @@ -65,17 +65,12 @@ def from_checkpoint( return result - def to( - self, - device: Optional[torch.device] = None, - dtype: Optional[torch.dtype] = None, - non_blocking: bool = False, - ) -> None: + def to(self, device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None) -> None: if not torch.cuda.is_available(): return for emb in [self.embedding, self.embedding_2]: if emb is not None: - emb.to(device=device, dtype=dtype, non_blocking=non_blocking) + emb.to(device=device, dtype=dtype) def calc_size(self) -> int: """Get the size of this model in bytes.""" diff --git a/invokeai/backend/util/devices.py b/invokeai/backend/util/devices.py index 1cba70c6626..83ce055024f 100644 --- a/invokeai/backend/util/devices.py +++ b/invokeai/backend/util/devices.py @@ -112,15 +112,3 @@ def empty_cache(cls) -> None: @classmethod def _to_dtype(cls, precision_name: TorchPrecisionNames) -> torch.dtype: return NAME_TO_PRECISION[precision_name] - - @staticmethod - def get_non_blocking(to_device: torch.device) -> bool: - """Return the non_blocking flag to be used when moving a tensor to a given device. - MPS may have unexpected errors with non-blocking operations - we should not use non-blocking when moving _to_ MPS. - When moving _from_ MPS, we can use non-blocking operations. - - See: - - https://github.com/pytorch/pytorch/issues/107455 - - https://discuss.pytorch.org/t/should-we-set-non-blocking-to-true/38234/28 - """ - return False if to_device.type == "mps" else True