From 77c12021253b572db7aacdfbeaf002546bf4f26f Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 5 Mar 2025 11:27:05 +0100
Subject: [PATCH 01/11] Update model.py

---
 ctlearn/core/model.py | 518 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 517 insertions(+), 1 deletion(-)

diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py
index 80ff0668..966fb2ab 100644
--- a/ctlearn/core/model.py
+++ b/ctlearn/core/model.py
@@ -4,7 +4,8 @@
 
 from abc import abstractmethod
 import keras
-
+from tensorflow.keras.layers import Layer, Concatenate, Input
+import tensorflow as tf
 from ctapipe.core import Component
 from ctapipe.core.traits import Bool, Int, CaselessStrEnum, List, Dict, Unicode, Path
 from ctlearn.core.attention import (
@@ -20,6 +21,7 @@
     "SingleCNN",
     "ResNet",
     "LoadedModel",
+    "SpatialAttention_ResNet"
 ]
 
   
@@ -857,3 +859,517 @@ def _build_backbone(self, input_shape):
             network_input, network_output, name=self.backbone_name
         )
         return backbone_model, network_input
+
+
+        
+class ChannelAttention(Layer):
+    def __init__(self, reduction=16, name=None):
+        super(ChannelAttention, self).__init__(name=name)
+        self.reduction = reduction
+
+    def build(self, input_shape):
+        channels = input_shape[-1]
+        self.global_avg_pool = keras.layers.GlobalAveragePooling2D()
+        self.fc1 = keras.layers.Dense(channels // self.reduction, activation='relu', use_bias=False)
+        self.fc2 = keras.layers.Dense(channels, activation='sigmoid', use_bias=False)
+        super(ChannelAttention, self).build(input_shape)
+
+    def call(self, x):
+        avg_out = self.global_avg_pool(x)
+        avg_out = self.fc1(avg_out)
+        avg_out = self.fc2(avg_out)
+        attention = keras.layers.Reshape((1, 1, -1))(avg_out)
+        return x * attention
+
+
+class SpatialAttention(Layer):
+    def __init__(self, se_kernel_size=1, name=None):
+        super(SpatialAttention, self).__init__(name=name)
+        self.se_kernel_size = se_kernel_size 
+
+    def build(self, input_shape):
+        # Define un kernel (peso) de forma (1, 1, input_shape[-1], 1)
+        # input_shape[-1] será 3 para imágenes RGB
+        self.kernel = self.add_weight(name='kernel',
+                                      shape=(self.se_kernel_size,self.se_kernel_size,input_shape[-1],1),
+                                      initializer='uniform',
+                                      trainable=True)
+        super(SpatialAttention, self).build(input_shape,)
+
+    def call(self, x):
+        # Aplica una convolución 2D usando el kernel definido
+        attention = tf.nn.sigmoid(tf.nn.conv2d(x, self.kernel, strides=[1,1], padding='SAME'))
+        # Multiplica la entrada original por la máscara de atención
+        return x * attention
+
+
+class CBAM(Layer):
+    def __init__(self, reduction=16, se_kernel_size=7, name=None):
+        super(CBAM, self).__init__(name=name)
+        self.channel_attention = ChannelAttention(reduction)
+        self.spatial_attention = SpatialAttention(se_kernel_size)
+
+    def call(self, x):
+        x = self.channel_attention(x)
+        x = self.spatial_attention(x)
+        return x
+
+
+class SpatialAttention_ResNet(CTLearnModel):
+    """
+    ``ResNet`` is a residual neural network model.
+
+    This class extends the functionality of ``CTLearnModel`` by implementing
+    methods to build a residual neural network model.
+    """
+
+    name = Unicode(
+        "SE_ThinResNet",
+        help="Name of the model backbone.",
+    ).tag(config=True)
+
+    init_layer = Dict(
+        default_value=None,
+        allow_none=True,
+        help=(
+            "Parameters for the first convolutional layer. "
+            "E.g. ``{'filters': 64, 'kernel_size': 7, 'strides': 2}``."
+        ),
+    ).tag(config=True)
+
+    init_max_pool = Dict(
+        default_value=None,
+        allow_none=True,
+        help=(
+            "Parameters for the first max pooling layer. "
+            "E.g. ``{'size': 3, 'strides': 2}``."
+        ),
+    ).tag(config=True)
+
+    residual_block_type = CaselessStrEnum(
+        ["basic", "bottleneck"],
+        default_value="bottleneck",
+        allow_none=False,
+        help="Type of residual block to use.",
+    ).tag(config=True)
+    
+    architecture = List(
+        trait=Dict(),
+        default_value=[{'filters': 48, 'blocks': 2}, {'filters': 96, 'blocks': 3}, {'filters': 128, 'blocks': 3}, {'filters': 256, 'blocks': 3}],
+        allow_none=False,
+        help=(
+            "List of dicts containing the number of filters and residual blocks. "
+            "E.g. ``[{'filters': 12, 'blocks': 2}, ...]``."
+        ),
+    ).tag(config=True)
+
+    def __init__(
+        self,
+        input_shape,
+        tasks,
+        se_kernel_size,
+        config=None,
+        parent=None,
+        **kwargs,
+    ):
+        super().__init__(
+            config=config,
+            parent=parent,
+            **kwargs,
+        )
+	
+
+	
+        # Validate the architecture trait
+        for layer in self.architecture:
+            validate_trait_dict(layer, ["filters", "blocks"])
+        # Validate the initial layers trait
+        if self.init_layer is not None:
+            validate_trait_dict(self.init_layer, ["filters", "kernel_size", "strides"])
+        if self.init_max_pool is not None:
+            validate_trait_dict(self.init_max_pool, ["size", "strides"])
+        print("AQI", input_shape)
+        # Construct the name of the backbone model by appending "_block" to the model name
+        self.backbone_name = self.name + "_block"
+
+        # Build the ResNet model backbone
+        self.backbone_model, self.input_layer = self._build_backbone(input_shape,se_kernel_size)
+        backbone_output = self.backbone_model(self.input_layer)
+        # Validate the head trait with the provided tasks
+        validate_trait_dict(self.head, tasks)
+        # Build the fully connected head depending on the tasks
+        self.logits = build_fully_connect_head(backbone_output, self.head, tasks)
+
+        self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")
+        print(self.model.summary(expand_nested=True))
+
+    def _build_backbone(self, input_shape, se_kernel_size):
+        """
+        Build the ResNet model backbone.
+
+        Function to build the backbone of the ResNet model using the specified parameters.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            Shape of the input data (batch_size, height, width, channels).  
+        
+        Returns
+        -------
+        backbone_model : keras.Model
+            Keras model object representing the ResNet backbone.
+        network_input : keras.Input
+            Keras input layer object for the backbone model.
+        """
+        # Define the input layer from the input shape
+        network_input = Input(shape=input_shape, name="input")
+        
+        print('-HOLA---------------------------------',network_input.shape)
+        
+
+        
+
+        # Apply initial padding if specified
+        if self.init_padding > 0:
+            network_input = keras.layers.ZeroPadding2D(
+                padding=self.init_padding,
+                kernel_size=self.init_layer["kernel_size"],
+                strides=self.init_layer["strides"],
+                name=self.backbone_name + "_padding",
+            )(network_input)
+        # Apply initial convolutional layer if specified
+        if self.init_layer is not None:
+            network_input = keras.layers.Conv2D(
+                filters=self.init_layer["filters"],
+                kernel_size=self.init_layer["kernel_size"],
+                strides=self.init_layer["strides"],
+                name=self.backbone_name + "_conv1_conv",
+            )(network_input)
+        # Apply max pooling if specified
+        if self.init_max_pool is not None:
+            network_input = keras.layers.MaxPool2D(
+                pool_size=self.init_max_pool["size"],
+                strides=self.init_max_pool["strides"],
+                name=self.backbone_name + "_pool1_pool",
+            )(network_input)
+        # Build the residual blocks
+        
+        engine_output = self._stacked_res_blocks(
+            network_input,
+            architecture=self.architecture,
+            residual_block_type=self.residual_block_type,
+            attention=self.attention,
+            se_kernel_size = se_kernel_size,
+            name=self.backbone_name
+
+        )
+
+        # Apply global average pooling as the final layer of the backbone
+        network_output = keras.layers.GlobalAveragePooling2D(
+            name=self.backbone_name + "_global_avgpool"
+        )(engine_output)
+
+        # Create the backbone model
+        backbone_model = keras.Model(
+            network_input, network_output, name=self.backbone_name
+        )
+        #print(backbone_model.summary())
+        return backbone_model, network_input
+
+        
+    def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attention,se_kernel_size, name=None):
+        """
+        Build a stack of residual blocks for the CTLearn model.
+
+        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
+        Each residual block consists of a series of convolutional layers with skip connections.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input Keras layer to the residual blocks.
+        architecture : list of dict
+            List of dictionaries containing the architecture of the ResNet model, which includes:
+            - Number of filters for the convolutional layers in the residual blocks.
+            - Number of residual blocks to stack.
+        residual_block_type : str
+            Type of residual block to use. Options are 'basic' or 'bottleneck'.
+        attention : dict
+            Dictionary containing the configuration parameters for the attention mechanism.
+        name : str, optional
+            Label for the model.
+
+        Returns
+        -------
+        x : keras.layers.Layer
+            Output Keras layer after passing through the stack of residual blocks.
+        """
+
+        # Get hyperparameters for the model architecture
+        filters_list = [
+            layer["filters"]
+            for layer in architecture
+        ]
+        blocks_list = [
+            layer["blocks"]
+            for layer in architecture
+        ]
+        
+        spatt = SpatialAttention(se_kernel_size,"SPATIAL")(inputs)
+
+
+        print('----------------------------------',spatt.shape)
+        
+        # Build the ResNet model
+        x = self._stack_fn(
+            spatt,
+            filters_list[0],
+            blocks_list[0],
+            residual_block_type,
+            stride=1,
+            attention=attention,
+            name=name + "_conv2",
+        )
+        for i, (filters, blocks) in enumerate(zip(filters_list[1:], blocks_list[1:])):
+            x = self._stack_fn(
+                x,
+                filters,
+                blocks,
+                residual_block_type,
+                attention=attention,
+                name=name + "_conv" + str(i + 3),
+            )
+        return x
+
+
+    def _stack_fn(
+        self,
+        inputs,
+        filters,
+        blocks,
+        residual_block_type,
+        stride=2,
+        attention=None,
+        name=None,
+    ):
+        """
+        Stack residual blocks for the CTLearn model.
+
+        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
+        Each residual block can be of different types (e.g., basic or bottleneck) and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual blocks.
+        filters : int
+            Number of filters for the bottleneck layer in a block.
+        blocks : int
+            Number of residual blocks to stack.
+        residual_block_type : str
+            Type of residual block ('basic' or 'bottleneck').
+        stride : int, optional
+            Stride for the first layer in the first block. Default is 2.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Label for the stack. Default is None.
+
+        Returns
+        -------
+        keras.layers.Layer
+            Output tensor for the stacked blocks.
+        """
+
+        res_blocks = {
+            "basic": self._basic_residual_block,
+            "bottleneck": self._bottleneck_residual_block,
+        }
+
+        x = res_blocks[residual_block_type](
+            inputs,
+            filters,
+            stride=stride,
+            attention=attention,
+            name=name + "_block1",
+        )
+        for i in range(2, blocks + 1):
+            x = res_blocks[residual_block_type](
+                x,
+                filters,
+                conv_shortcut=False,
+                attention=attention,
+                name=name + "_block" + str(i),
+            )
+
+        return x
+
+
+    def _basic_residual_block(
+        self,
+        inputs,
+        filters,
+        kernel_size=3,
+        stride=1,
+        conv_shortcut=True,
+        attention=None,
+        name=None,
+    ):
+        """
+        Build a basic residual block for the CTLearn model.
+
+        This function constructs a basic residual block, which is a fundamental building block
+        of ResNet architectures. The block consists of two convolutional layers with an optional
+        convolutional shortcut, and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual block.
+        filters : int
+            Number of filters for the convolutional layers.
+        kernel_size : int, optional
+            Size of the convolutional kernel. Default is 3.
+        stride : int, optional
+            Stride for the convolutional layers. Default is 1.
+        conv_shortcut : bool, optional
+            Whether to use a convolutional layer for the shortcut connection. Default is True.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Name for the residual block. Default is None.
+
+        Returns
+        -------
+        keras.layers.Layer
+            Output tensor after applying the residual block.
+        """
+
+        if conv_shortcut:
+            shortcut = keras.layers.Conv2D(
+                filters=filters, kernel_size=1, strides=stride, name=name + "_0_conv"
+            )(inputs)
+        else:
+            shortcut = inputs
+        
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=stride,
+            padding="same",
+            activation="relu",
+            name=name + "_1_conv",
+        )(inputs)
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            padding="same",
+            activation="relu",
+            name=name + "_2_conv",
+        )(x)
+
+        # Attention mechanism
+        if attention is not None:
+            if attention["mechanism"] == "Dual-SE":
+                x = dual_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_dse"
+                )
+            elif attention["mechanism"] == "Channel-SE":
+                x = channel_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_cse"
+                )
+            elif attention["mechanism"] == "Spatial-SE":
+                x = spatial_squeeze_excite_block(x, name=name + "_sse")
+
+        x = keras.layers.Add(name=name + "_add")([shortcut, x])
+        x = keras.layers.ReLU(name=name + "_out")(x)
+
+        return x
+
+
+    def _bottleneck_residual_block(
+        self,
+        inputs,
+        filters,
+        kernel_size=3,
+        stride=1,
+        conv_shortcut=True,
+        attention=None,
+        name=None,
+    ):
+        """
+        Build a bottleneck residual block for the CTLearn model.
+
+        This function constructs a bottleneck residual block, which is a fundamental building block of
+        ResNet architectures. The block consists of three convolutional layers: a 1x1 convolution to reduce
+        dimensionality, a 3x3 convolution for main computation, and another 1x1 convolution to restore dimensionality.
+        It also includes an optional shortcut connection and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual block.
+        filters : int
+            Number of filters for the convolutional layers.
+        kernel_size : int, optional
+            Size of the convolutional kernel. Default is 3.
+        stride : int, optional
+            Stride for the convolutional layers. Default is 1.
+        conv_shortcut : bool, optional
+            Whether to use a convolutional layer for the shortcut connection. Default is True.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Name for the residual block. Default is None.
+
+        Returns
+        -------
+        output : keras.layers.Layer
+            Output layer of the residual block.
+        """
+
+        if conv_shortcut:
+            shortcut = keras.layers.Conv2D(
+                filters=4 * filters,
+                kernel_size=1,
+                strides=stride,
+                name=name + "_0_conv",
+            )(inputs)
+        else:
+            shortcut = inputs
+
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=1,
+            strides=stride,
+            activation="relu",
+            name=name + "_1_conv",
+        )(inputs)
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            padding="same",
+            activation="relu",
+            name=name + "_2_conv",
+        )(x)
+        x = keras.layers.Conv2D(filters=4 * filters, kernel_size=1, name=name + "_3_conv")(
+            x
+        )
+
+        # Attention mechanism
+        if attention is not None:
+            if attention["mechanism"] == "Dual-SE":
+                x = dual_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_dse"
+                )
+            elif attention["mechanism"] == "Channel-SE":
+                x = channel_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_cse"
+                )
+            elif attention["mechanism"] == "Spatial-SE":
+                x = spatial_squeeze_excite_block(x, name=name + "_sse")
+
+        x = keras.layers.Add(name=name + "_add")([shortcut, x])
+        x = keras.layers.ReLU(name=name + "_out")(x)
+
+        return x

From 47f97f1c992e4f03988f6fec95e459df240c6de0 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 5 Mar 2025 11:33:20 +0100
Subject: [PATCH 02/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index c4e17b52..2982d571 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -155,6 +155,19 @@ class TrainCTLearnModel(Tool):
         allow_none=False,
         help="Number of epochs to train the neural network.",
     ).tag(config=True)
+    
+    se_kernel_size = Int(
+        default_value=1,
+        allow_none=False,
+        help="Kernel size of the Spatial Attention layer",
+    ).tag(config=True)
+
+    channel_attention_reduction = Int(
+        default_value=16,
+        allow_none=False,
+        help="Reduction size of the Channel Attention layer",
+    ).tag(config=True)
+
 
     batch_size = Int(
         default_value=64,
@@ -377,6 +390,8 @@ def start(self):
                 input_shape=self.training_loader.input_shape,
                 tasks=self.reco_tasks,
                 parent=self,
+                se_kernel_size = self.se_kernel_size,
+                channel_attention_reduction = self.channel_attention_reduction
             ).model
             # Validate the optimizer parameters
             validate_trait_dict(self.optimizer, ["name", "base_learning_rate"])

From cd24601635bd75ffe35d35d2d7157b3567ad7f87 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 5 Mar 2025 11:35:11 +0100
Subject: [PATCH 03/11] Update model.py

---
 ctlearn/core/model.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py
index 966fb2ab..ca91b9fd 100644
--- a/ctlearn/core/model.py
+++ b/ctlearn/core/model.py
@@ -863,14 +863,14 @@ def _build_backbone(self, input_shape):
 
         
 class ChannelAttention(Layer):
-    def __init__(self, reduction=16, name=None):
+    def __init__(self, channel_attention_reduction=16, name=None):
         super(ChannelAttention, self).__init__(name=name)
-        self.reduction = reduction
+        self.channel_attention_reduction = channel_attention_reduction
 
     def build(self, input_shape):
         channels = input_shape[-1]
         self.global_avg_pool = keras.layers.GlobalAveragePooling2D()
-        self.fc1 = keras.layers.Dense(channels // self.reduction, activation='relu', use_bias=False)
+        self.fc1 = keras.layers.Dense(channels // self.channel_attention_reduction, activation='relu', use_bias=False)
         self.fc2 = keras.layers.Dense(channels, activation='sigmoid', use_bias=False)
         super(ChannelAttention, self).build(input_shape)
 
@@ -904,9 +904,9 @@ def call(self, x):
 
 
 class CBAM(Layer):
-    def __init__(self, reduction=16, se_kernel_size=7, name=None):
+    def __init__(self, channel_attention_reduction=16, se_kernel_size=7, name=None):
         super(CBAM, self).__init__(name=name)
-        self.channel_attention = ChannelAttention(reduction)
+        self.channel_attention = ChannelAttention(channel_attention_reduction)
         self.spatial_attention = SpatialAttention(se_kernel_size)
 
     def call(self, x):

From 72a974760bdbdb538d947e65dbc0170670e8dea3 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Tue, 11 Mar 2025 10:45:05 +0100
Subject: [PATCH 04/11] Update model.py

---
 ctlearn/core/model.py | 463 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 462 insertions(+), 1 deletion(-)

diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py
index ca91b9fd..0e3bc0c4 100644
--- a/ctlearn/core/model.py
+++ b/ctlearn/core/model.py
@@ -21,7 +21,8 @@
     "SingleCNN",
     "ResNet",
     "LoadedModel",
-    "SpatialAttention_ResNet"
+    "SpatialAttention_ResNet",
+    "ChannelAttention_ResNet"
 ]
 
   
@@ -1373,3 +1374,463 @@ def _bottleneck_residual_block(
         x = keras.layers.ReLU(name=name + "_out")(x)
 
         return x
+
+
+class ChannelAttention_ResNet(CTLearnModel):
+    """
+    ``ResNet`` is a residual neural network model.
+
+    This class extends the functionality of ``CTLearnModel`` by implementing
+    methods to build a residual neural network model.
+    """
+
+    name = Unicode(
+        "CH_ThinResNet",
+        help="Name of the model backbone.",
+    ).tag(config=True)
+
+    init_layer = Dict(
+        default_value=None,
+        allow_none=True,
+        help=(
+            "Parameters for the first convolutional layer. "
+            "E.g. ``{'filters': 64, 'kernel_size': 7, 'strides': 2}``."
+        ),
+    ).tag(config=True)
+
+    init_max_pool = Dict(
+        default_value=None,
+        allow_none=True,
+        help=(
+            "Parameters for the first max pooling layer. "
+            "E.g. ``{'size': 3, 'strides': 2}``."
+        ),
+    ).tag(config=True)
+
+    residual_block_type = CaselessStrEnum(
+        ["basic", "bottleneck"],
+        default_value="bottleneck",
+        allow_none=False,
+        help="Type of residual block to use.",
+    ).tag(config=True)
+    
+    architecture = List(
+        trait=Dict(),
+        default_value=[{'filters': 48, 'blocks': 2}, {'filters': 96, 'blocks': 3}, {'filters': 128, 'blocks': 3}, {'filters': 256, 'blocks': 3}],
+        allow_none=False,
+        help=(
+            "List of dicts containing the number of filters and residual blocks. "
+            "E.g. ``[{'filters': 12, 'blocks': 2}, ...]``."
+        ),
+    ).tag(config=True)
+
+    def __init__(
+        self,
+        input_shape,
+        tasks,
+        channel_attention_reduction,
+        config=None,
+        parent=None,
+        **kwargs,
+    ):
+        super().__init__(
+            config=config,
+            parent=parent,
+            **kwargs,
+        )
+	
+
+	
+        # Validate the architecture trait
+        for layer in self.architecture:
+            validate_trait_dict(layer, ["filters", "blocks"])
+        # Validate the initial layers trait
+        if self.init_layer is not None:
+            validate_trait_dict(self.init_layer, ["filters", "kernel_size", "strides"])
+        if self.init_max_pool is not None:
+            validate_trait_dict(self.init_max_pool, ["size", "strides"])
+        print("AQI", input_shape)
+        # Construct the name of the backbone model by appending "_block" to the model name
+        self.backbone_name = self.name + "_block"
+
+        # Build the ResNet model backbone
+        self.backbone_model, self.input_layer = self._build_backbone(input_shape,channel_attention_reduction)
+        backbone_output = self.backbone_model(self.input_layer)
+        # Validate the head trait with the provided tasks
+        validate_trait_dict(self.head, tasks)
+        # Build the fully connected head depending on the tasks
+        self.logits = build_fully_connect_head(backbone_output, self.head, tasks)
+
+        self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")
+        print(self.model.summary(expand_nested=True))
+
+    def _build_backbone(self, input_shape, channel_attention_reduction):
+        """
+        Build the ResNet model backbone.
+
+        Function to build the backbone of the ResNet model using the specified parameters.
+
+        Parameters
+        ----------
+        input_shape : tuple
+            Shape of the input data (batch_size, height, width, channels).  
+        
+        Returns
+        -------
+        backbone_model : keras.Model
+            Keras model object representing the ResNet backbone.
+        network_input : keras.Input
+            Keras input layer object for the backbone model.
+        """
+        # Define the input layer from the input shape
+        network_input = Input(shape=input_shape, name="input")
+        
+        print('-HOLA---------------------------------',network_input.shape)
+        
+
+        
+
+        # Apply initial padding if specified
+        if self.init_padding > 0:
+            network_input = keras.layers.ZeroPadding2D(
+                padding=self.init_padding,
+                kernel_size=self.init_layer["kernel_size"],
+                strides=self.init_layer["strides"],
+                name=self.backbone_name + "_padding",
+            )(network_input)
+        # Apply initial convolutional layer if specified
+        if self.init_layer is not None:
+            network_input = keras.layers.Conv2D(
+                filters=self.init_layer["filters"],
+                kernel_size=self.init_layer["kernel_size"],
+                strides=self.init_layer["strides"],
+                name=self.backbone_name + "_conv1_conv",
+            )(network_input)
+        # Apply max pooling if specified
+        if self.init_max_pool is not None:
+            network_input = keras.layers.MaxPool2D(
+                pool_size=self.init_max_pool["size"],
+                strides=self.init_max_pool["strides"],
+                name=self.backbone_name + "_pool1_pool",
+            )(network_input)
+        # Build the residual blocks
+        
+        engine_output = self._stacked_res_blocks(
+            network_input,
+            architecture=self.architecture,
+            residual_block_type=self.residual_block_type,
+            attention=self.attention,
+            channel_attention_reduction = channel_attention_reduction,
+            name=self.backbone_name
+
+        )
+
+        # Apply global average pooling as the final layer of the backbone
+        network_output = keras.layers.GlobalAveragePooling2D(
+            name=self.backbone_name + "_global_avgpool"
+        )(engine_output)
+
+        # Create the backbone model
+        backbone_model = keras.Model(
+            network_input, network_output, name=self.backbone_name
+        )
+        #print(backbone_model.summary())
+        return backbone_model, network_input
+
+        
+    def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attention,channel_attention_reduction, name=None):
+        """
+        Build a stack of residual blocks for the CTLearn model.
+
+        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
+        Each residual block consists of a series of convolutional layers with skip connections.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input Keras layer to the residual blocks.
+        architecture : list of dict
+            List of dictionaries containing the architecture of the ResNet model, which includes:
+            - Number of filters for the convolutional layers in the residual blocks.
+            - Number of residual blocks to stack.
+        residual_block_type : str
+            Type of residual block to use. Options are 'basic' or 'bottleneck'.
+        attention : dict
+            Dictionary containing the configuration parameters for the attention mechanism.
+        name : str, optional
+            Label for the model.
+
+        Returns
+        -------
+        x : keras.layers.Layer
+            Output Keras layer after passing through the stack of residual blocks.
+        """
+
+        # Get hyperparameters for the model architecture
+        filters_list = [
+            layer["filters"]
+            for layer in architecture
+        ]
+        blocks_list = [
+            layer["blocks"]
+            for layer in architecture
+        ]
+        
+        spatt = ChannelAttention(channel_attention_reduction,"CHANNEL")(inputs)
+
+
+        print('----------------------------------',spatt.shape)
+        
+        # Build the ResNet model
+        x = self._stack_fn(
+            spatt,
+            filters_list[0],
+            blocks_list[0],
+            residual_block_type,
+            stride=1,
+            attention=attention,
+            name=name + "_conv2",
+        )
+        for i, (filters, blocks) in enumerate(zip(filters_list[1:], blocks_list[1:])):
+            x = self._stack_fn(
+                x,
+                filters,
+                blocks,
+                residual_block_type,
+                attention=attention,
+                name=name + "_conv" + str(i + 3),
+            )
+        return x
+
+
+    def _stack_fn(
+        self,
+        inputs,
+        filters,
+        blocks,
+        residual_block_type,
+        stride=2,
+        attention=None,
+        name=None,
+    ):
+        """
+        Stack residual blocks for the CTLearn model.
+
+        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
+        Each residual block can be of different types (e.g., basic or bottleneck) and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual blocks.
+        filters : int
+            Number of filters for the bottleneck layer in a block.
+        blocks : int
+            Number of residual blocks to stack.
+        residual_block_type : str
+            Type of residual block ('basic' or 'bottleneck').
+        stride : int, optional
+            Stride for the first layer in the first block. Default is 2.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Label for the stack. Default is None.
+
+        Returns
+        -------
+        keras.layers.Layer
+            Output tensor for the stacked blocks.
+        """
+
+        res_blocks = {
+            "basic": self._basic_residual_block,
+            "bottleneck": self._bottleneck_residual_block,
+        }
+
+        x = res_blocks[residual_block_type](
+            inputs,
+            filters,
+            stride=stride,
+            attention=attention,
+            name=name + "_block1",
+        )
+        for i in range(2, blocks + 1):
+            x = res_blocks[residual_block_type](
+                x,
+                filters,
+                conv_shortcut=False,
+                attention=attention,
+                name=name + "_block" + str(i),
+            )
+
+        return x
+
+
+    def _basic_residual_block(
+        self,
+        inputs,
+        filters,
+        kernel_size=3,
+        stride=1,
+        conv_shortcut=True,
+        attention=None,
+        name=None,
+    ):
+        """
+        Build a basic residual block for the CTLearn model.
+
+        This function constructs a basic residual block, which is a fundamental building block
+        of ResNet architectures. The block consists of two convolutional layers with an optional
+        convolutional shortcut, and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual block.
+        filters : int
+            Number of filters for the convolutional layers.
+        kernel_size : int, optional
+            Size of the convolutional kernel. Default is 3.
+        stride : int, optional
+            Stride for the convolutional layers. Default is 1.
+        conv_shortcut : bool, optional
+            Whether to use a convolutional layer for the shortcut connection. Default is True.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Name for the residual block. Default is None.
+
+        Returns
+        -------
+        keras.layers.Layer
+            Output tensor after applying the residual block.
+        """
+
+        if conv_shortcut:
+            shortcut = keras.layers.Conv2D(
+                filters=filters, kernel_size=1, strides=stride, name=name + "_0_conv"
+            )(inputs)
+        else:
+            shortcut = inputs
+        
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=stride,
+            padding="same",
+            activation="relu",
+            name=name + "_1_conv",
+        )(inputs)
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            padding="same",
+            activation="relu",
+            name=name + "_2_conv",
+        )(x)
+
+        # Attention mechanism
+        if attention is not None:
+            if attention["mechanism"] == "Dual-SE":
+                x = dual_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_dse"
+                )
+            elif attention["mechanism"] == "Channel-SE":
+                x = channel_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_cse"
+                )
+            elif attention["mechanism"] == "Spatial-SE":
+                x = spatial_squeeze_excite_block(x, name=name + "_sse")
+
+        x = keras.layers.Add(name=name + "_add")([shortcut, x])
+        x = keras.layers.ReLU(name=name + "_out")(x)
+
+        return x
+
+
+    def _bottleneck_residual_block(
+        self,
+        inputs,
+        filters,
+        kernel_size=3,
+        stride=1,
+        conv_shortcut=True,
+        attention=None,
+        name=None,
+    ):
+        """
+        Build a bottleneck residual block for the CTLearn model.
+
+        This function constructs a bottleneck residual block, which is a fundamental building block of
+        ResNet architectures. The block consists of three convolutional layers: a 1x1 convolution to reduce
+        dimensionality, a 3x3 convolution for main computation, and another 1x1 convolution to restore dimensionality.
+        It also includes an optional shortcut connection and can include attention mechanisms.
+
+        Parameters
+        ----------
+        inputs : keras.layers.Layer
+            Input tensor to the residual block.
+        filters : int
+            Number of filters for the convolutional layers.
+        kernel_size : int, optional
+            Size of the convolutional kernel. Default is 3.
+        stride : int, optional
+            Stride for the convolutional layers. Default is 1.
+        conv_shortcut : bool, optional
+            Whether to use a convolutional layer for the shortcut connection. Default is True.
+        attention : dict, optional
+            Configuration parameters for the attention mechanism. Default is None.
+        name : str, optional
+            Name for the residual block. Default is None.
+
+        Returns
+        -------
+        output : keras.layers.Layer
+            Output layer of the residual block.
+        """
+
+        if conv_shortcut:
+            shortcut = keras.layers.Conv2D(
+                filters=4 * filters,
+                kernel_size=1,
+                strides=stride,
+                name=name + "_0_conv",
+            )(inputs)
+        else:
+            shortcut = inputs
+
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=1,
+            strides=stride,
+            activation="relu",
+            name=name + "_1_conv",
+        )(inputs)
+        x = keras.layers.Conv2D(
+            filters=filters,
+            kernel_size=kernel_size,
+            padding="same",
+            activation="relu",
+            name=name + "_2_conv",
+        )(x)
+        x = keras.layers.Conv2D(filters=4 * filters, kernel_size=1, name=name + "_3_conv")(
+            x
+        )
+
+        # Attention mechanism
+        if attention is not None:
+            if attention["mechanism"] == "Dual-SE":
+                x = dual_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_dse"
+                )
+            elif attention["mechanism"] == "Channel-SE":
+                x = channel_squeeze_excite_block(
+                    x, attention["reduction_ratio"], name=name + "_cse"
+                )
+            elif attention["mechanism"] == "Spatial-SE":
+                x = spatial_squeeze_excite_block(x, name=name + "_sse")
+
+        x = keras.layers.Add(name=name + "_add")([shortcut, x])
+        x = keras.layers.ReLU(name=name + "_out")(x)
+
+        return x

From d81f79870324921c470b5b515379747a7eb13f53 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Tue, 11 Mar 2025 11:42:44 +0100
Subject: [PATCH 05/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 24 +++++++++++++++++++++---
 1 file changed, 21 insertions(+), 3 deletions(-)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index 2982d571..74fb6453 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -385,14 +385,32 @@ def start(self):
         with self.strategy.scope():
             # Construct the model
             self.log.info("Setting up the model.")
-            self.model = CTLearnModel.from_name(
+
+            if self.model_type == "ChannelAttention_ResNet":
+               self.model = CTLearnModel.from_name(
                 self.model_type,
                 input_shape=self.training_loader.input_shape,
                 tasks=self.reco_tasks,
                 parent=self,
-                se_kernel_size = self.se_kernel_size,
                 channel_attention_reduction = self.channel_attention_reduction
-            ).model
+                ).model
+            elif self.model_type == "SpatialAttention_ResNet":
+                self.model = CTLearnModel.from_name(
+                    self.model_type,
+                    input_shape=self.training_loader.input_shape,
+                    tasks=self.reco_tasks,
+                    parent=self,
+                    se_kernel_size = self.se_kernel_size,
+                    channel_attention_reduction = self.channel_attention_reduction
+                ).model
+            else:
+                self.model = CTLearnModel.from_name(
+                    self.model_type,
+                    input_shape=self.training_loader.input_shape,
+                    tasks=self.reco_tasks,
+                    parent=self
+                ).model
+                
             # Validate the optimizer parameters
             validate_trait_dict(self.optimizer, ["name", "base_learning_rate"])
             # Set the learning rate for the optimizer

From e23a908a1680713b94909af4c26059542963f987 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 10:23:49 +0100
Subject: [PATCH 06/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index 74fb6453..1ffb0468 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -400,8 +400,7 @@ def start(self):
                     input_shape=self.training_loader.input_shape,
                     tasks=self.reco_tasks,
                     parent=self,
-                    se_kernel_size = self.se_kernel_size,
-                    channel_attention_reduction = self.channel_attention_reduction
+                    se_kernel_size = self.se_kernel_size
                 ).model
             else:
                 self.model = CTLearnModel.from_name(

From 07f21351bd90a9cb0167879488c34c63f903f5a4 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 11:27:53 +0100
Subject: [PATCH 07/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 36 +++---------------------------------
 1 file changed, 3 insertions(+), 33 deletions(-)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index 1ffb0468..7e86a72e 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -156,18 +156,6 @@ class TrainCTLearnModel(Tool):
         help="Number of epochs to train the neural network.",
     ).tag(config=True)
     
-    se_kernel_size = Int(
-        default_value=1,
-        allow_none=False,
-        help="Kernel size of the Spatial Attention layer",
-    ).tag(config=True)
-
-    channel_attention_reduction = Int(
-        default_value=16,
-        allow_none=False,
-        help="Reduction size of the Channel Attention layer",
-    ).tag(config=True)
-
 
     batch_size = Int(
         default_value=64,
@@ -385,30 +373,12 @@ def start(self):
         with self.strategy.scope():
             # Construct the model
             self.log.info("Setting up the model.")
-
-            if self.model_type == "ChannelAttention_ResNet":
-               self.model = CTLearnModel.from_name(
+            self.model = CTLearnModel.from_name(
                 self.model_type,
                 input_shape=self.training_loader.input_shape,
                 tasks=self.reco_tasks,
-                parent=self,
-                channel_attention_reduction = self.channel_attention_reduction
-                ).model
-            elif self.model_type == "SpatialAttention_ResNet":
-                self.model = CTLearnModel.from_name(
-                    self.model_type,
-                    input_shape=self.training_loader.input_shape,
-                    tasks=self.reco_tasks,
-                    parent=self,
-                    se_kernel_size = self.se_kernel_size
-                ).model
-            else:
-                self.model = CTLearnModel.from_name(
-                    self.model_type,
-                    input_shape=self.training_loader.input_shape,
-                    tasks=self.reco_tasks,
-                    parent=self
-                ).model
+                parent=self
+            ).model
                 
             # Validate the optimizer parameters
             validate_trait_dict(self.optimizer, ["name", "base_learning_rate"])

From 5661c853d3b8c4761e457e86a70ddc21959c6d25 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 11:38:19 +0100
Subject: [PATCH 08/11] Update model.py

---
 ctlearn/core/model.py | 508 +++---------------------------------------
 1 file changed, 36 insertions(+), 472 deletions(-)

diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py
index 0e3bc0c4..4e22b72f 100644
--- a/ctlearn/core/model.py
+++ b/ctlearn/core/model.py
@@ -21,8 +21,7 @@
     "SingleCNN",
     "ResNet",
     "LoadedModel",
-    "SpatialAttention_ResNet",
-    "ChannelAttention_ResNet"
+    "Attention_ResNet"
 ]
 
   
@@ -916,7 +915,7 @@ def call(self, x):
         return x
 
 
-class SpatialAttention_ResNet(CTLearnModel):
+class Attention_ResNet(CTLearnModel):
     """
     ``ResNet`` is a residual neural network model.
 
@@ -925,7 +924,7 @@ class SpatialAttention_ResNet(CTLearnModel):
     """
 
     name = Unicode(
-        "SE_ThinResNet",
+        "Attention_ThinResNet",
         help="Name of the model backbone.",
     ).tag(config=True)
 
@@ -964,471 +963,39 @@ class SpatialAttention_ResNet(CTLearnModel):
         ),
     ).tag(config=True)
 
-    def __init__(
-        self,
-        input_shape,
-        tasks,
-        se_kernel_size,
-        config=None,
-        parent=None,
-        **kwargs,
-    ):
-        super().__init__(
-            config=config,
-            parent=parent,
-            **kwargs,
-        )
-	
+    se_kernel_size = Int(
+        default_value=1,
+        allow_none=False,
+        help="Kernel size of the Spatial Attention layer",
+    ).tag(config=True)
 
 	
-        # Validate the architecture trait
-        for layer in self.architecture:
-            validate_trait_dict(layer, ["filters", "blocks"])
-        # Validate the initial layers trait
-        if self.init_layer is not None:
-            validate_trait_dict(self.init_layer, ["filters", "kernel_size", "strides"])
-        if self.init_max_pool is not None:
-            validate_trait_dict(self.init_max_pool, ["size", "strides"])
-        print("AQI", input_shape)
-        # Construct the name of the backbone model by appending "_block" to the model name
-        self.backbone_name = self.name + "_block"
-
-        # Build the ResNet model backbone
-        self.backbone_model, self.input_layer = self._build_backbone(input_shape,se_kernel_size)
-        backbone_output = self.backbone_model(self.input_layer)
-        # Validate the head trait with the provided tasks
-        validate_trait_dict(self.head, tasks)
-        # Build the fully connected head depending on the tasks
-        self.logits = build_fully_connect_head(backbone_output, self.head, tasks)
-
-        self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")
-        print(self.model.summary(expand_nested=True))
-
-    def _build_backbone(self, input_shape, se_kernel_size):
-        """
-        Build the ResNet model backbone.
-
-        Function to build the backbone of the ResNet model using the specified parameters.
-
-        Parameters
-        ----------
-        input_shape : tuple
-            Shape of the input data (batch_size, height, width, channels).  
-        
-        Returns
-        -------
-        backbone_model : keras.Model
-            Keras model object representing the ResNet backbone.
-        network_input : keras.Input
-            Keras input layer object for the backbone model.
-        """
-        # Define the input layer from the input shape
-        network_input = Input(shape=input_shape, name="input")
-        
-        print('-HOLA---------------------------------',network_input.shape)
-        
-
-        
-
-        # Apply initial padding if specified
-        if self.init_padding > 0:
-            network_input = keras.layers.ZeroPadding2D(
-                padding=self.init_padding,
-                kernel_size=self.init_layer["kernel_size"],
-                strides=self.init_layer["strides"],
-                name=self.backbone_name + "_padding",
-            )(network_input)
-        # Apply initial convolutional layer if specified
-        if self.init_layer is not None:
-            network_input = keras.layers.Conv2D(
-                filters=self.init_layer["filters"],
-                kernel_size=self.init_layer["kernel_size"],
-                strides=self.init_layer["strides"],
-                name=self.backbone_name + "_conv1_conv",
-            )(network_input)
-        # Apply max pooling if specified
-        if self.init_max_pool is not None:
-            network_input = keras.layers.MaxPool2D(
-                pool_size=self.init_max_pool["size"],
-                strides=self.init_max_pool["strides"],
-                name=self.backbone_name + "_pool1_pool",
-            )(network_input)
-        # Build the residual blocks
-        
-        engine_output = self._stacked_res_blocks(
-            network_input,
-            architecture=self.architecture,
-            residual_block_type=self.residual_block_type,
-            attention=self.attention,
-            se_kernel_size = se_kernel_size,
-            name=self.backbone_name
-
-        )
-
-        # Apply global average pooling as the final layer of the backbone
-        network_output = keras.layers.GlobalAveragePooling2D(
-            name=self.backbone_name + "_global_avgpool"
-        )(engine_output)
-
-        # Create the backbone model
-        backbone_model = keras.Model(
-            network_input, network_output, name=self.backbone_name
-        )
-        #print(backbone_model.summary())
-        return backbone_model, network_input
-
-        
-    def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attention,se_kernel_size, name=None):
-        """
-        Build a stack of residual blocks for the CTLearn model.
-
-        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
-        Each residual block consists of a series of convolutional layers with skip connections.
-
-        Parameters
-        ----------
-        inputs : keras.layers.Layer
-            Input Keras layer to the residual blocks.
-        architecture : list of dict
-            List of dictionaries containing the architecture of the ResNet model, which includes:
-            - Number of filters for the convolutional layers in the residual blocks.
-            - Number of residual blocks to stack.
-        residual_block_type : str
-            Type of residual block to use. Options are 'basic' or 'bottleneck'.
-        attention : dict
-            Dictionary containing the configuration parameters for the attention mechanism.
-        name : str, optional
-            Label for the model.
-
-        Returns
-        -------
-        x : keras.layers.Layer
-            Output Keras layer after passing through the stack of residual blocks.
-        """
-
-        # Get hyperparameters for the model architecture
-        filters_list = [
-            layer["filters"]
-            for layer in architecture
-        ]
-        blocks_list = [
-            layer["blocks"]
-            for layer in architecture
-        ]
-        
-        spatt = SpatialAttention(se_kernel_size,"SPATIAL")(inputs)
-
-
-        print('----------------------------------',spatt.shape)
-        
-        # Build the ResNet model
-        x = self._stack_fn(
-            spatt,
-            filters_list[0],
-            blocks_list[0],
-            residual_block_type,
-            stride=1,
-            attention=attention,
-            name=name + "_conv2",
-        )
-        for i, (filters, blocks) in enumerate(zip(filters_list[1:], blocks_list[1:])):
-            x = self._stack_fn(
-                x,
-                filters,
-                blocks,
-                residual_block_type,
-                attention=attention,
-                name=name + "_conv" + str(i + 3),
-            )
-        return x
-
-
-    def _stack_fn(
-        self,
-        inputs,
-        filters,
-        blocks,
-        residual_block_type,
-        stride=2,
-        attention=None,
-        name=None,
-    ):
-        """
-        Stack residual blocks for the CTLearn model.
-
-        This function constructs a stack of residual blocks, which are used to build the backbone of the CTLearn model.
-        Each residual block can be of different types (e.g., basic or bottleneck) and can include attention mechanisms.
-
-        Parameters
-        ----------
-        inputs : keras.layers.Layer
-            Input tensor to the residual blocks.
-        filters : int
-            Number of filters for the bottleneck layer in a block.
-        blocks : int
-            Number of residual blocks to stack.
-        residual_block_type : str
-            Type of residual block ('basic' or 'bottleneck').
-        stride : int, optional
-            Stride for the first layer in the first block. Default is 2.
-        attention : dict, optional
-            Configuration parameters for the attention mechanism. Default is None.
-        name : str, optional
-            Label for the stack. Default is None.
-
-        Returns
-        -------
-        keras.layers.Layer
-            Output tensor for the stacked blocks.
-        """
-
-        res_blocks = {
-            "basic": self._basic_residual_block,
-            "bottleneck": self._bottleneck_residual_block,
-        }
-
-        x = res_blocks[residual_block_type](
-            inputs,
-            filters,
-            stride=stride,
-            attention=attention,
-            name=name + "_block1",
-        )
-        for i in range(2, blocks + 1):
-            x = res_blocks[residual_block_type](
-                x,
-                filters,
-                conv_shortcut=False,
-                attention=attention,
-                name=name + "_block" + str(i),
-            )
-
-        return x
-
-
-    def _basic_residual_block(
-        self,
-        inputs,
-        filters,
-        kernel_size=3,
-        stride=1,
-        conv_shortcut=True,
-        attention=None,
-        name=None,
-    ):
-        """
-        Build a basic residual block for the CTLearn model.
-
-        This function constructs a basic residual block, which is a fundamental building block
-        of ResNet architectures. The block consists of two convolutional layers with an optional
-        convolutional shortcut, and can include attention mechanisms.
-
-        Parameters
-        ----------
-        inputs : keras.layers.Layer
-            Input tensor to the residual block.
-        filters : int
-            Number of filters for the convolutional layers.
-        kernel_size : int, optional
-            Size of the convolutional kernel. Default is 3.
-        stride : int, optional
-            Stride for the convolutional layers. Default is 1.
-        conv_shortcut : bool, optional
-            Whether to use a convolutional layer for the shortcut connection. Default is True.
-        attention : dict, optional
-            Configuration parameters for the attention mechanism. Default is None.
-        name : str, optional
-            Name for the residual block. Default is None.
-
-        Returns
-        -------
-        keras.layers.Layer
-            Output tensor after applying the residual block.
-        """
-
-        if conv_shortcut:
-            shortcut = keras.layers.Conv2D(
-                filters=filters, kernel_size=1, strides=stride, name=name + "_0_conv"
-            )(inputs)
-        else:
-            shortcut = inputs
-        
-        x = keras.layers.Conv2D(
-            filters=filters,
-            kernel_size=kernel_size,
-            strides=stride,
-            padding="same",
-            activation="relu",
-            name=name + "_1_conv",
-        )(inputs)
-        x = keras.layers.Conv2D(
-            filters=filters,
-            kernel_size=kernel_size,
-            padding="same",
-            activation="relu",
-            name=name + "_2_conv",
-        )(x)
-
-        # Attention mechanism
-        if attention is not None:
-            if attention["mechanism"] == "Dual-SE":
-                x = dual_squeeze_excite_block(
-                    x, attention["reduction_ratio"], name=name + "_dse"
-                )
-            elif attention["mechanism"] == "Channel-SE":
-                x = channel_squeeze_excite_block(
-                    x, attention["reduction_ratio"], name=name + "_cse"
-                )
-            elif attention["mechanism"] == "Spatial-SE":
-                x = spatial_squeeze_excite_block(x, name=name + "_sse")
-
-        x = keras.layers.Add(name=name + "_add")([shortcut, x])
-        x = keras.layers.ReLU(name=name + "_out")(x)
-
-        return x
-
-
-    def _bottleneck_residual_block(
-        self,
-        inputs,
-        filters,
-        kernel_size=3,
-        stride=1,
-        conv_shortcut=True,
-        attention=None,
-        name=None,
-    ):
-        """
-        Build a bottleneck residual block for the CTLearn model.
-
-        This function constructs a bottleneck residual block, which is a fundamental building block of
-        ResNet architectures. The block consists of three convolutional layers: a 1x1 convolution to reduce
-        dimensionality, a 3x3 convolution for main computation, and another 1x1 convolution to restore dimensionality.
-        It also includes an optional shortcut connection and can include attention mechanisms.
-
-        Parameters
-        ----------
-        inputs : keras.layers.Layer
-            Input tensor to the residual block.
-        filters : int
-            Number of filters for the convolutional layers.
-        kernel_size : int, optional
-            Size of the convolutional kernel. Default is 3.
-        stride : int, optional
-            Stride for the convolutional layers. Default is 1.
-        conv_shortcut : bool, optional
-            Whether to use a convolutional layer for the shortcut connection. Default is True.
-        attention : dict, optional
-            Configuration parameters for the attention mechanism. Default is None.
-        name : str, optional
-            Name for the residual block. Default is None.
-
-        Returns
-        -------
-        output : keras.layers.Layer
-            Output layer of the residual block.
-        """
-
-        if conv_shortcut:
-            shortcut = keras.layers.Conv2D(
-                filters=4 * filters,
-                kernel_size=1,
-                strides=stride,
-                name=name + "_0_conv",
-            )(inputs)
-        else:
-            shortcut = inputs
-
-        x = keras.layers.Conv2D(
-            filters=filters,
-            kernel_size=1,
-            strides=stride,
-            activation="relu",
-            name=name + "_1_conv",
-        )(inputs)
-        x = keras.layers.Conv2D(
-            filters=filters,
-            kernel_size=kernel_size,
-            padding="same",
-            activation="relu",
-            name=name + "_2_conv",
-        )(x)
-        x = keras.layers.Conv2D(filters=4 * filters, kernel_size=1, name=name + "_3_conv")(
-            x
-        )
-
-        # Attention mechanism
-        if attention is not None:
-            if attention["mechanism"] == "Dual-SE":
-                x = dual_squeeze_excite_block(
-                    x, attention["reduction_ratio"], name=name + "_dse"
-                )
-            elif attention["mechanism"] == "Channel-SE":
-                x = channel_squeeze_excite_block(
-                    x, attention["reduction_ratio"], name=name + "_cse"
-                )
-            elif attention["mechanism"] == "Spatial-SE":
-                x = spatial_squeeze_excite_block(x, name=name + "_sse")
-
-        x = keras.layers.Add(name=name + "_add")([shortcut, x])
-        x = keras.layers.ReLU(name=name + "_out")(x)
-
-        return x
-
-
-class ChannelAttention_ResNet(CTLearnModel):
-    """
-    ``ResNet`` is a residual neural network model.
-
-    This class extends the functionality of ``CTLearnModel`` by implementing
-    methods to build a residual neural network model.
-    """
-
-    name = Unicode(
-        "CH_ThinResNet",
-        help="Name of the model backbone.",
+    channel_attention_reduction = Int(
+        default_value=16,
+        allow_none=False,
+        help="Reduction size of the Channel Attention layer",
     ).tag(config=True)
 
-    init_layer = Dict(
-        default_value=None,
-        allow_none=True,
-        help=(
-            "Parameters for the first convolutional layer. "
-            "E.g. ``{'filters': 64, 'kernel_size': 7, 'strides': 2}``."
-        ),
+    attention_type = CaselessStrEnum(
+        ["spatial", "channel", "both"],
+        default_value="spatial",
+        allow_none=False,
+        help="Type of attention layer(s) to apply",
     ).tag(config=True)
 
-    init_max_pool = Dict(
-        default_value=None,
-        allow_none=True,
-        help=(
-            "Parameters for the first max pooling layer. "
-            "E.g. ``{'size': 3, 'strides': 2}``."
-        ),
-    ).tag(config=True)
 
-    residual_block_type = CaselessStrEnum(
-        ["basic", "bottleneck"],
-        default_value="bottleneck",
-        allow_none=False,
-        help="Type of residual block to use.",
-    ).tag(config=True)
-    
-    architecture = List(
-        trait=Dict(),
-        default_value=[{'filters': 48, 'blocks': 2}, {'filters': 96, 'blocks': 3}, {'filters': 128, 'blocks': 3}, {'filters': 256, 'blocks': 3}],
+    attention_location = CaselessStrEnum(
+        ["initial", "after_conv"],
+        default_value="initial",
         allow_none=False,
-        help=(
-            "List of dicts containing the number of filters and residual blocks. "
-            "E.g. ``[{'filters': 12, 'blocks': 2}, ...]``."
-        ),
+        help="Location of the attention layer(s) to apply",
     ).tag(config=True)
 
+
     def __init__(
         self,
         input_shape,
         tasks,
-        channel_attention_reduction,
         config=None,
         parent=None,
         **kwargs,
@@ -1449,12 +1016,12 @@ def __init__(
             validate_trait_dict(self.init_layer, ["filters", "kernel_size", "strides"])
         if self.init_max_pool is not None:
             validate_trait_dict(self.init_max_pool, ["size", "strides"])
-        print("AQI", input_shape)
-        # Construct the name of the backbone model by appending "_block" to the model name
+
+	# Construct the name of the backbone model by appending "_block" to the model name
         self.backbone_name = self.name + "_block"
 
         # Build the ResNet model backbone
-        self.backbone_model, self.input_layer = self._build_backbone(input_shape,channel_attention_reduction)
+        self.backbone_model, self.input_layer = self._build_backbone(input_shape,se_kernel_size)
         backbone_output = self.backbone_model(self.input_layer)
         # Validate the head trait with the provided tasks
         validate_trait_dict(self.head, tasks)
@@ -1464,7 +1031,7 @@ def __init__(
         self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")
         print(self.model.summary(expand_nested=True))
 
-    def _build_backbone(self, input_shape, channel_attention_reduction):
+    def _build_backbone(self, input_shape, se_kernel_size):
         """
         Build the ResNet model backbone.
 
@@ -1484,11 +1051,6 @@ def _build_backbone(self, input_shape, channel_attention_reduction):
         """
         # Define the input layer from the input shape
         network_input = Input(shape=input_shape, name="input")
-        
-        print('-HOLA---------------------------------',network_input.shape)
-        
-
-        
 
         # Apply initial padding if specified
         if self.init_padding > 0:
@@ -1519,8 +1081,7 @@ def _build_backbone(self, input_shape, channel_attention_reduction):
             network_input,
             architecture=self.architecture,
             residual_block_type=self.residual_block_type,
-            attention=self.attention,
-            channel_attention_reduction = channel_attention_reduction,
+            attention=self.attention
             name=self.backbone_name
 
         )
@@ -1534,11 +1095,10 @@ def _build_backbone(self, input_shape, channel_attention_reduction):
         backbone_model = keras.Model(
             network_input, network_output, name=self.backbone_name
         )
-        #print(backbone_model.summary())
         return backbone_model, network_input
 
         
-    def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attention,channel_attention_reduction, name=None):
+    def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attention, name=None):
         """
         Build a stack of residual blocks for the CTLearn model.
 
@@ -1575,15 +1135,19 @@ def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attenti
             layer["blocks"]
             for layer in architecture
         ]
-        
-        spatt = ChannelAttention(channel_attention_reduction,"CHANNEL")(inputs)
+
+	if attention_type == "spatial":
+	    attention_layer = SpatialAttention(self.se_kernel_size,"SPATIAL")(inputs)
+	elif attention_type == "channel":
+	    attention_layer = ChannelAttention(self.channel_attention_reduction,"CHANNEL")(inputs)
+	else:
+	    attention_layer = CBAM(self.channel_attention_reduction,self.se_kernel_size,"CBAM")(inputs)
 
 
-        print('----------------------------------',spatt.shape)
         
         # Build the ResNet model
         x = self._stack_fn(
-            spatt,
+            attention_layer,
             filters_list[0],
             blocks_list[0],
             residual_block_type,

From 222d032bff2b521890258ed31e381a0495607a9d Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 12:35:12 +0100
Subject: [PATCH 09/11] Update Attention_ResNet structure

---
 ctlearn/core/model.py | 39 ++++++++++++++++++++++++++++++---------
 1 file changed, 30 insertions(+), 9 deletions(-)

diff --git a/ctlearn/core/model.py b/ctlearn/core/model.py
index 4e22b72f..62875ce9 100644
--- a/ctlearn/core/model.py
+++ b/ctlearn/core/model.py
@@ -1021,7 +1021,7 @@ def __init__(
         self.backbone_name = self.name + "_block"
 
         # Build the ResNet model backbone
-        self.backbone_model, self.input_layer = self._build_backbone(input_shape,se_kernel_size)
+        self.backbone_model, self.input_layer = self._build_backbone(input_shape)
         backbone_output = self.backbone_model(self.input_layer)
         # Validate the head trait with the provided tasks
         validate_trait_dict(self.head, tasks)
@@ -1031,7 +1031,7 @@ def __init__(
         self.model = keras.Model(self.input_layer, self.logits, name="CTLearn_model")
         print(self.model.summary(expand_nested=True))
 
-    def _build_backbone(self, input_shape, se_kernel_size):
+    def _build_backbone(self, input_shape):
         """
         Build the ResNet model backbone.
 
@@ -1081,7 +1081,7 @@ def _build_backbone(self, input_shape, se_kernel_size):
             network_input,
             architecture=self.architecture,
             residual_block_type=self.residual_block_type,
-            attention=self.attention
+            attention=self.attention,
             name=self.backbone_name
 
         )
@@ -1136,12 +1136,13 @@ def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attenti
             for layer in architecture
         ]
 
-	if attention_type == "spatial":
-	    attention_layer = SpatialAttention(self.se_kernel_size,"SPATIAL")(inputs)
-	elif attention_type == "channel":
-	    attention_layer = ChannelAttention(self.channel_attention_reduction,"CHANNEL")(inputs)
-	else:
-	    attention_layer = CBAM(self.channel_attention_reduction,self.se_kernel_size,"CBAM")(inputs)
+
+        if self.attention_type == "spatial":
+            attention_layer = SpatialAttention(self.se_kernel_size,"SPATIAL")(inputs)
+        elif self.attention_type == "channel":
+            attention_layer = ChannelAttention(self.channel_attention_reduction,"CHANNEL")(inputs)
+        else:
+            attention_layer = CBAM(self.channel_attention_reduction,self.se_kernel_size,"CBAM")(inputs)
 
 
         
@@ -1155,6 +1156,17 @@ def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attenti
             attention=attention,
             name=name + "_conv2",
         )
+
+        if self.attention_location != 'initial':
+       
+            if self.attention_type == "spatial":
+            	x = SpatialAttention(self.se_kernel_size,name + "_SPATIAL_conv2")(x)
+            elif self.attention_type == "channel":
+                x = ChannelAttention(self.channel_attention_reduction,name + "_CHANNEL_conv2")(x)
+            else:
+                x = CBAM(self.channel_attention_reduction,self.se_kernel_size,name + "_CBAM_conv2")(x)
+            
+            
         for i, (filters, blocks) in enumerate(zip(filters_list[1:], blocks_list[1:])):
             x = self._stack_fn(
                 x,
@@ -1164,6 +1176,15 @@ def _stacked_res_blocks(self, inputs, architecture, residual_block_type, attenti
                 attention=attention,
                 name=name + "_conv" + str(i + 3),
             )
+            
+            if self.attention_location == 'after_conv':
+                if self.attention_type == "spatial":
+                    x = SpatialAttention(self.se_kernel_size,name + "_SPATIAL_conv" + str(i + 3))(x)
+                elif self.attention_type == "channel":
+                    x = ChannelAttention(self.channel_attention_reduction,name + "_CHANNEL_conv" + str(i + 3))(x)
+                else:
+                    x = CBAM(self.channel_attention_reduction,self.se_kernel_size,name + "-CBAM_conv" + str(i + 3))(x)
+
         return x
 
 

From 86b58c045132449c13c8e3fc67cf3a64e4e1286a Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 12:40:35 +0100
Subject: [PATCH 10/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index 7e86a72e..765b81ef 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -156,7 +156,6 @@ class TrainCTLearnModel(Tool):
         help="Number of epochs to train the neural network.",
     ).tag(config=True)
     
-
     batch_size = Int(
         default_value=64,
         allow_none=False,
@@ -377,9 +376,8 @@ def start(self):
                 self.model_type,
                 input_shape=self.training_loader.input_shape,
                 tasks=self.reco_tasks,
-                parent=self
+                parent=self,
             ).model
-                
             # Validate the optimizer parameters
             validate_trait_dict(self.optimizer, ["name", "base_learning_rate"])
             # Set the learning rate for the optimizer

From a137c42d9f87fea1e792fedf578056b394470ed2 Mon Sep 17 00:00:00 2001
From: rcervinoucm <rcervino@ucm.es>
Date: Wed, 12 Mar 2025 12:41:51 +0100
Subject: [PATCH 11/11] Update train_model.py

---
 ctlearn/tools/train_model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ctlearn/tools/train_model.py b/ctlearn/tools/train_model.py
index 765b81ef..c4e17b52 100644
--- a/ctlearn/tools/train_model.py
+++ b/ctlearn/tools/train_model.py
@@ -155,7 +155,7 @@ class TrainCTLearnModel(Tool):
         allow_none=False,
         help="Number of epochs to train the neural network.",
     ).tag(config=True)
-    
+
     batch_size = Int(
         default_value=64,
         allow_none=False,