From 16603b7dec1feead4768f3eb4219a06e74d85b41 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Lucas=20de=20Sousa=20Almeida?= Date: Tue, 6 Feb 2024 11:32:40 -0300 Subject: [PATCH 1/2] It is better to guarantee that each activation is an independent instance MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: João Lucas de Sousa Almeida --- simulai/templates/_pytorch_network.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/simulai/templates/_pytorch_network.py b/simulai/templates/_pytorch_network.py index 6e0e0a3..b5d93e8 100644 --- a/simulai/templates/_pytorch_network.py +++ b/simulai/templates/_pytorch_network.py @@ -172,7 +172,7 @@ def _setup_activations( activation_op = self._get_operation(operation=activation) return ( - (n_layers - 1) * [activation_op] + [self._get_operation(operation=activation) for i in range(n_layers - 1)] + [self._get_operation(operation=self.default_last_activation)], (n_layers - 1) * [activation] + [self.default_last_activation], ) @@ -212,7 +212,6 @@ def _setup_activations( return activations_list, activation - else: raise Exception( "The activation format," From 94bede5306485630d14afbd1f44745ca74f24810 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Lucas=20de=20Sousa=20Almeida?= Date: Tue, 6 Feb 2024 11:33:11 -0300 Subject: [PATCH 2/2] Sending transformer stages to dedicated device MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: João Lucas de Sousa Almeida --- simulai/models/_pytorch_models/_transformer.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/simulai/models/_pytorch_models/_transformer.py b/simulai/models/_pytorch_models/_transformer.py index 9fe8fe6..b43106c 100644 --- a/simulai/models/_pytorch_models/_transformer.py +++ b/simulai/models/_pytorch_models/_transformer.py @@ -178,6 +178,7 @@ def __init__( decoder_mlp_layer_config: dict = None, number_of_encoders: int = 1, number_of_decoders: int = 1, + devices: Union[str, list] = "cpu", ) -> None: r"""A classical encoder-decoder transformer: @@ -229,6 +230,9 @@ def __init__( self.encoder_mlp_layers_list = list() self.decoder_mlp_layers_list = list() + #Determining the kind of device in which the modelwill be executed + self.device = self._set_device(devices=devices) + # Creating independent copies for the MLP layers which will be used # by the multiple encoders/decoders. for e in range(self.number_of_encoders): @@ -281,6 +285,11 @@ def __init__( self.final_layer = Linear(input_size=self.embed_dim_decoder, output_size=self.output_dim) self.add_module("final_linear_layer", self.final_layer) + # Sending everything to the proper device + self.EncoderStage = self.EncoderStage.to(self.device) + self.DecoderStage = self.DecoderStage.to(self.device) + self.final_layer = self.final_layer.to(self.device) + @as_tensor def forward( self, input_data: Union[torch.Tensor, np.ndarray] = None