From 562800d245a967bf4014a7bc20a9850176b2d940 Mon Sep 17 00:00:00 2001
From: Johanmkr <johan.m.kroken@gmail.com>
Date: Tue, 4 Feb 2025 17:00:11 +0100
Subject: [PATCH 01/15] added precision test to test_metrics.py

---
 tests/test_metrics.py | 54 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 53 insertions(+), 1 deletion(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 1650e01..974bb93 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,4 +1,4 @@
-from utils.metrics import Recall, F1Score
+from utils.metrics import F1Score, Precision, Recall
 
 
 def test_recall():
@@ -30,3 +30,55 @@ def test_f1score():
     assert f1_metric.tp.sum().item() > 0, "Expected some true positives."
     assert f1_metric.fp.sum().item() > 0, "Expected some false positives."
     assert f1_metric.fn.sum().item() > 0, "Expected some false negatives."
+
+
+def test_precision_case1():
+    import torch
+
+    for boolean, true_precision in zip([True, False], [25.0 / 36, 7.0 / 10]):
+        true1 = torch.tensor([0, 1, 2, 1, 0, 2, 1, 0, 2, 1])
+        pred1 = torch.tensor([0, 2, 1, 1, 0, 2, 0, 0, 2, 1])
+        P = Precision(3, use_mean=boolean)
+        precision1 = P(true1, pred1)
+        assert precision1.allclose(torch.tensor(true_precision), atol=1e-5), (
+            f"Precision Score: {precision1.item()}"
+        )
+
+
+def test_precision_case2():
+    import torch
+
+    for boolean, true_precision in zip([True, False], [8.0 / 15, 6.0 / 15]):
+        true2 = torch.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
+        pred2 = torch.tensor([0, 0, 4, 3, 4, 0, 4, 4, 2, 3, 4, 1, 2, 4, 0])
+        P = Precision(5, use_mean=boolean)
+        precision2 = P(true2, pred2)
+        assert precision2.allclose(torch.tensor(true_precision), atol=1e-5), (
+            f"Precision Score: {precision2.item()}"
+        )
+
+
+def test_precision_case3():
+    import torch
+
+    for boolean, true_precision in zip([True, False], [3.0 / 4, 4.0 / 5]):
+        true3 = torch.tensor([0, 0, 0, 1, 0])
+        pred3 = torch.tensor([1, 0, 0, 1, 0])
+        P = Precision(2, use_mean=boolean)
+        precision3 = P(true3, pred3)
+        assert precision3.allclose(torch.tensor(true_precision), atol=1e-5), (
+            f"Precision Score: {precision3.item()}"
+        )
+
+
+def test_for_zero_denominator():
+    import torch
+
+    for boolean in [True, False]:
+        true4 = torch.tensor([1, 1, 1, 1, 1])
+        pred4 = torch.tensor([0, 0, 0, 0, 0])
+        P = Precision(2, use_mean=boolean)
+        precision4 = P(true4, pred4)
+        assert precision4.allclose(torch.tensor(0.0), atol=1e-5), (
+            f"Precision Score: {precision4.item()}"
+        )

From c1de9cc82cc1bb9f86362fb319bc81efda22ea28 Mon Sep 17 00:00:00 2001
From: Johanmkr <johan.m.kroken@gmail.com>
Date: Tue, 4 Feb 2025 17:00:35 +0100
Subject: [PATCH 02/15] formatted utils folder

---
 utils/dataloaders/__init__.py |  2 +-
 utils/load_data.py            |  2 +-
 utils/metrics/F1.py           |  1 -
 utils/metrics/__init__.py     |  1 +
 utils/metrics/precision.py    | 58 +++++------------------------------
 utils/models/solveig_model.py | 49 +++++++++++++++--------------
 6 files changed, 34 insertions(+), 79 deletions(-)

diff --git a/utils/dataloaders/__init__.py b/utils/dataloaders/__init__.py
index bb97adc..1bd80f5 100644
--- a/utils/dataloaders/__init__.py
+++ b/utils/dataloaders/__init__.py
@@ -1,4 +1,4 @@
 __all__ = ["USPSDataset0_6", "USPSH5_Digit_7_9_Dataset"]
 
 from .usps_0_6 import USPSDataset0_6
-from .uspsh5_7_9 import USPSH5_Digit_7_9_Dataset
\ No newline at end of file
+from .uspsh5_7_9 import USPSH5_Digit_7_9_Dataset
diff --git a/utils/load_data.py b/utils/load_data.py
index f54e94a..d1868dd 100644
--- a/utils/load_data.py
+++ b/utils/load_data.py
@@ -8,6 +8,6 @@ def load_data(dataset: str, *args, **kwargs) -> Dataset:
         case "usps_0-6":
             return USPSDataset0_6(*args, **kwargs)
         case "usps_7-9":
-            return  USPSH5_Digit_7_9_Dataset(*args, **kwargs)
+            return USPSH5_Digit_7_9_Dataset(*args, **kwargs)
         case _:
             raise ValueError(f"Dataset: {dataset} not implemented.")
diff --git a/utils/metrics/F1.py b/utils/metrics/F1.py
index 36e5e34..1e0e795 100644
--- a/utils/metrics/F1.py
+++ b/utils/metrics/F1.py
@@ -84,4 +84,3 @@ def compute(self):
         )
 
         return f1_score
-
diff --git a/utils/metrics/__init__.py b/utils/metrics/__init__.py
index f623943..4ac1ece 100644
--- a/utils/metrics/__init__.py
+++ b/utils/metrics/__init__.py
@@ -3,3 +3,4 @@
 from .EntropyPred import EntropyPrediction
 from .F1 import F1Score
 from .recall import Recall
+from .precision import Precision
diff --git a/utils/metrics/precision.py b/utils/metrics/precision.py
index be3f91b..61ba1eb 100644
--- a/utils/metrics/precision.py
+++ b/utils/metrics/precision.py
@@ -7,20 +7,23 @@
 
 
 class Precision(nn.Module):
-    """Metric module for precision. Can calculate precision both as a mean of precisions or as brute function of true positives and false positives. This is for now controller with the USE_MEAN macro.
+    """Metric module for precision. Can calculate precision both as a mean of precisions or as brute function of true positives and false positives.
 
     Parameters
     ----------
     num_classes : int
         Number of classes in the dataset.
+    use_mean : bool
+        Whether to calculate precision as a mean of precisions or as a brute function of true positives and false positives.
     """
 
-    def __init__(self, num_classes):
+    def __init__(self, num_classes: int, use_mean: bool = True):
         super().__init__()
 
         self.num_classes = num_classes
+        self.use_mean = use_mean
 
-    def forward(self, y_true, y_pred):
+    def forward(self, y_true: torch.tensor, y_pred: torch.tensor) -> torch.tensor:
         """Calculates the precision score given number of classes and the true and predicted labels.
 
         Parameters
@@ -43,7 +46,7 @@ def forward(self, y_true, y_pred):
             1, y_pred.unsqueeze(1), 1
         )
 
-        if USE_MEAN:
+        if self.use_mean:
             tp = torch.sum(true_oh * pred_oh, 0)
             fp = torch.sum(~true_oh.bool() * pred_oh, 0)
 
@@ -54,52 +57,5 @@ def forward(self, y_true, y_pred):
         return torch.nanmean(tp / (tp + fp))
 
 
-def test_precision_case1():
-    true_precision = 25.0 / 36 if USE_MEAN else 7.0 / 10
-
-    true1 = torch.tensor([0, 1, 2, 1, 0, 2, 1, 0, 2, 1])
-    pred1 = torch.tensor([0, 2, 1, 1, 0, 2, 0, 0, 2, 1])
-    P = Precision(3)
-    precision1 = P(true1, pred1)
-    assert precision1.allclose(torch.tensor(true_precision), atol=1e-5), (
-        f"Precision Score: {precision1.item()}"
-    )
-
-
-def test_precision_case2():
-    true_precision = 8.0 / 15 if USE_MEAN else 6.0 / 15
-
-    true2 = torch.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
-    pred2 = torch.tensor([0, 0, 4, 3, 4, 0, 4, 4, 2, 3, 4, 1, 2, 4, 0])
-    P = Precision(5)
-    precision2 = P(true2, pred2)
-    assert precision2.allclose(torch.tensor(true_precision), atol=1e-5), (
-        f"Precision Score: {precision2.item()}"
-    )
-
-
-def test_precision_case3():
-    true_precision = 3.0 / 4 if USE_MEAN else 4.0 / 5
-
-    true3 = torch.tensor([0, 0, 0, 1, 0])
-    pred3 = torch.tensor([1, 0, 0, 1, 0])
-    P = Precision(2)
-    precision3 = P(true3, pred3)
-    assert precision3.allclose(torch.tensor(true_precision), atol=1e-5), (
-        f"Precision Score: {precision3.item()}"
-    )
-
-
-def test_for_zero_denominator():
-    true_precision = 0.0
-    true4 = torch.tensor([1, 1, 1, 1, 1])
-    pred4 = torch.tensor([0, 0, 0, 0, 0])
-    P = Precision(2)
-    precision4 = P(true4, pred4)
-    assert precision4.allclose(torch.tensor(true_precision), atol=1e-5), (
-        f"Precision Score: {precision4.item()}"
-    )
-
-
 if __name__ == "__main__":
     pass
diff --git a/utils/models/solveig_model.py b/utils/models/solveig_model.py
index c16dbaf..d04094b 100644
--- a/utils/models/solveig_model.py
+++ b/utils/models/solveig_model.py
@@ -4,26 +4,26 @@
 
 class SolveigModel(nn.Module):
     """
-        A Convolutional Neural Network model for classification.
-
-         Args
-        ----
-        image_shape : tuple(int, int, int)
-            Shape of the input image (C, H, W).
-        num_classes : int
-            Number of classes in the dataset.
-
-        Attributes:
-        -----------
-        conv_block1 : nn.Sequential
-            First convolutional block containing a convolutional layer, ReLU activation, and max-pooling.
-        conv_block2 : nn.Sequential
-            Second convolutional block containing a convolutional layer and ReLU activation.
-        conv_block3 : nn.Sequential
-            Third convolutional block containing a convolutional layer and ReLU activation.
-        fc1 : nn.Linear
-            Fully connected layer that outputs the final classification scores.
-        """
+    A Convolutional Neural Network model for classification.
+
+     Args
+    ----
+    image_shape : tuple(int, int, int)
+        Shape of the input image (C, H, W).
+    num_classes : int
+        Number of classes in the dataset.
+
+    Attributes:
+    -----------
+    conv_block1 : nn.Sequential
+        First convolutional block containing a convolutional layer, ReLU activation, and max-pooling.
+    conv_block2 : nn.Sequential
+        Second convolutional block containing a convolutional layer and ReLU activation.
+    conv_block3 : nn.Sequential
+        Third convolutional block containing a convolutional layer and ReLU activation.
+    fc1 : nn.Linear
+        Fully connected layer that outputs the final classification scores.
+    """
 
     def __init__(self, image_shape, num_classes):
         super().__init__()
@@ -34,19 +34,19 @@ def __init__(self, image_shape, num_classes):
         self.conv_block1 = nn.Sequential(
             nn.Conv2d(in_channels=C, out_channels=25, kernel_size=3, padding=1),
             nn.ReLU(),
-            nn.MaxPool2d(kernel_size=2, stride=2)
+            nn.MaxPool2d(kernel_size=2, stride=2),
         )
 
         # Define the second convolutional block (conv + relu)
         self.conv_block2 = nn.Sequential(
             nn.Conv2d(in_channels=25, out_channels=50, kernel_size=3, padding=1),
-            nn.ReLU()
+            nn.ReLU(),
         )
 
         # Define the third convolutional block (conv + relu)
         self.conv_block3 = nn.Sequential(
             nn.Conv2d(in_channels=50, out_channels=100, kernel_size=3, padding=1),
-            nn.ReLU()
+            nn.ReLU(),
         )
 
         self.fc1 = nn.Linear(100 * 8 * 8, num_classes)
@@ -64,8 +64,7 @@ def forward(self, x):
 
 
 if __name__ == "__main__":
-
-    x = torch.randn(1,3, 16, 16)
+    x = torch.randn(1, 3, 16, 16)
 
     model = SolveigModel(x.shape[1:], 3)
 

From f9bd192bf279c7331a789a72c6985a48d554741f Mon Sep 17 00:00:00 2001
From: Johanmkr <johan.m.kroken@gmail.com>
Date: Tue, 4 Feb 2025 17:06:09 +0100
Subject: [PATCH 03/15] added precision to __all__

---
 utils/metrics/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utils/metrics/__init__.py b/utils/metrics/__init__.py
index 4ac1ece..6007beb 100644
--- a/utils/metrics/__init__.py
+++ b/utils/metrics/__init__.py
@@ -1,6 +1,6 @@
-__all__ = ["EntropyPrediction", "Recall", "F1Score"]
+__all__ = ["EntropyPrediction", "Recall", "F1Score", "Precision"]
 
 from .EntropyPred import EntropyPrediction
 from .F1 import F1Score
-from .recall import Recall
 from .precision import Precision
+from .recall import Recall

From d7677547425703ab11e7e11aaff3887972b2f700 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <79144013+hzavadil98@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:36:39 +0100
Subject: [PATCH 04/15] Removed softmax from christian_model.py

---
 utils/models/christian_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/utils/models/christian_model.py b/utils/models/christian_model.py
index a277b33..2bec3d5 100644
--- a/utils/models/christian_model.py
+++ b/utils/models/christian_model.py
@@ -58,7 +58,6 @@ def __init__(self, image_shape, num_classes):
         self.cnn2 = CNNBlock(50, 100)
 
         self.fc1 = nn.Linear(100 * 4 * 4, num_classes)
-        self.softmax = nn.Softmax(dim=1)
 
     def forward(self, x):
         x = self.cnn1(x)
@@ -66,7 +65,6 @@ def forward(self, x):
 
         x = x.view(x.size(0), -1)
         x = self.fc1(x)
-        x = self.softmax(x)
 
         return x
 

From 2c4db889d37518a79e0896c5e62f63618dfdd43f Mon Sep 17 00:00:00 2001
From: Jan Zavadil <79144013+hzavadil98@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:37:22 +0100
Subject: [PATCH 05/15] Removed softmax from johan_model.py

---
 utils/models/johan_model.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/utils/models/johan_model.py b/utils/models/johan_model.py
index 55c6251..8500ea9 100644
--- a/utils/models/johan_model.py
+++ b/utils/models/johan_model.py
@@ -43,14 +43,12 @@ def __init__(self, image_shape, num_classes):
         self.fc2 = nn.Linear(77, 77)
         self.fc3 = nn.Linear(77, 77)
         self.fc4 = nn.Linear(77, num_classes)
-        self.softmax = nn.Softmax(dim=1)
         self.relu = nn.ReLU()
 
     def forward(self, x):
         for layer in [self.fc1, self.fc2, self.fc3, self.fc4]:
             x = layer(x)
             x = self.relu(x)
-        x = self.softmax(x)
         return x
 
 

From fae506d319db9769ffe95883bbeeb01f14719263 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <79144013+hzavadil98@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:37:58 +0100
Subject: [PATCH 06/15] Removed softmax from solveig_model.py

---
 utils/models/solveig_model.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/utils/models/solveig_model.py b/utils/models/solveig_model.py
index d04094b..21dec4e 100644
--- a/utils/models/solveig_model.py
+++ b/utils/models/solveig_model.py
@@ -58,7 +58,6 @@ def forward(self, x):
         x = torch.flatten(x, 1)
 
         x = self.fc1(x)
-        x = nn.Softmax(x)
 
         return x
 

From 2d21b034593ecd1b6303080f128856afb88aaa58 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <79144013+hzavadil98@users.noreply.github.com>
Date: Wed, 5 Feb 2025 10:45:36 +0100
Subject: [PATCH 07/15] Removed the test veryfying softmaxity of output

The model should output raw logits for nn.CrossEntropyLoss() #37
---
 tests/test_models.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/tests/test_models.py b/tests/test_models.py
index 15a7504..be6bd3f 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -17,6 +17,4 @@ def test_christian_model(image_shape, num_classes):
     y = model(x)
 
     assert y.shape == (n, num_classes), f"Shape: {y.shape}"
-    assert y.sum(dim=1).allclose(torch.ones(n), atol=1e-5), (
-        f"Softmax output should sum to 1, but got: {y.sum()}"
-    )
+    

From 9f400f59360640982e4f0884266ac74fe8622ce9 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 11:35:30 +0100
Subject: [PATCH 08/15] Added accuracy and tests for it and Jan model

---
 tests/test_metrics.py     | 16 +++++++++++++++-
 tests/test_models.py      | 17 ++++++++++++++++-
 utils/load_metric.py      |  6 +++---
 utils/metrics/__init__.py |  3 ++-
 utils/metrics/accuracy.py | 32 ++++++++++++++++++++++++++++++++
 5 files changed, 68 insertions(+), 6 deletions(-)
 create mode 100644 utils/metrics/accuracy.py

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 1074d26..ccd665e 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,5 +1,5 @@
 
-from utils.metrics import F1Score, Precision, Recall
+from utils.metrics import F1Score, Precision, Recall, Accuracy
 
 
 
@@ -84,3 +84,17 @@ def test_for_zero_denominator():
         assert precision4.allclose(torch.tensor(0.0), atol=1e-5), (
             f"Precision Score: {precision4.item()}"
         )
+        
+def test_accuracy():
+    import torch
+
+    accuracy = Accuracy()
+
+    y_true = torch.tensor([0, 3, 2, 3, 4])
+    y_pred = torch.tensor([0, 1, 2, 3, 4])
+
+    accuracy_score = accuracy(y_true, y_pred)
+
+    assert accuracy_score.allclose(torch.tensor(0.8), atol=1e-5), (
+        f"Accuracy Score: {accuracy_score.item()}"
+    )
diff --git a/tests/test_models.py b/tests/test_models.py
index 15a7504..5652b6b 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -1,7 +1,7 @@
 import pytest
 import torch
 
-from utils.models import ChristianModel
+from utils.models import ChristianModel, JanModel
 
 
 @pytest.mark.parametrize(
@@ -20,3 +20,18 @@ def test_christian_model(image_shape, num_classes):
     assert y.sum(dim=1).allclose(torch.ones(n), atol=1e-5), (
         f"Softmax output should sum to 1, but got: {y.sum()}"
     )
+
+@pytest.mark.parametrize(
+    "image_shape, num_classes",
+    [((1, 28, 28), 4), ((3, 16, 16), 10)],
+)
+def test_jan_model(image_shape, num_classes):
+    n, c, h, w = 5, *image_shape
+
+    model = JanModel(image_shape, num_classes)
+
+    x = torch.randn(n, c, h, w)
+    y = model(x)
+
+    assert y.shape == (n, num_classes), f"Shape: {y.shape}"
+    
diff --git a/utils/load_metric.py b/utils/load_metric.py
index 9c942d1..f4c766b 100644
--- a/utils/load_metric.py
+++ b/utils/load_metric.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch.nn as nn
 
-from .metrics import EntropyPrediction, F1Score, precision
+from .metrics import EntropyPrediction, F1Score, Precision, Accuracy
 
 
 class MetricWrapper(nn.Module):
@@ -39,9 +39,9 @@ def _get_metric(self, key):
             case "recall":
                 raise NotImplementedError("Recall score not implemented yet")
             case "precision":
-                return precision()
+                return Precision()
             case "accuracy":
-                raise NotImplementedError("Accuracy score not implemented yet")
+                return Accuracy()
             case _:
                 raise ValueError(f"Metric {key} not supported")
 
diff --git a/utils/metrics/__init__.py b/utils/metrics/__init__.py
index 6007beb..486e490 100644
--- a/utils/metrics/__init__.py
+++ b/utils/metrics/__init__.py
@@ -1,6 +1,7 @@
-__all__ = ["EntropyPrediction", "Recall", "F1Score", "Precision"]
+__all__ = ["EntropyPrediction", "Recall", "F1Score", "Precision", "Accuracy"]
 
 from .EntropyPred import EntropyPrediction
 from .F1 import F1Score
 from .precision import Precision
 from .recall import Recall
+from .accuracy import Accuracy
diff --git a/utils/metrics/accuracy.py b/utils/metrics/accuracy.py
new file mode 100644
index 0000000..9ae1287
--- /dev/null
+++ b/utils/metrics/accuracy.py
@@ -0,0 +1,32 @@
+import torch
+from torch import nn
+
+
+class Accuracy(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, y_true, y_pred):
+        """
+        Compute the accuracy of the model.
+
+        Parameters
+        ----------
+        y_true : torch.Tensor
+            True labels.
+        y_pred : torch.Tensor
+            Predicted labels.
+
+        Returns
+        -------
+        float
+            Accuracy score.
+        """
+        return (y_true == y_pred).float().mean().item()
+    
+if __name__ == "__main__":
+    y_true = torch.tensor([0, 3, 2, 3, 4])
+    y_pred = torch.tensor([0, 1, 2, 3, 4])
+
+    accuracy = Accuracy()
+    print(accuracy(y_true, y_pred))
\ No newline at end of file

From 0ebacedab1af12923caec707549a23a2ca25e401 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 11:43:32 +0100
Subject: [PATCH 09/15] formatted to pass tests

---
 tests/test_metrics.py     | 7 +++----
 tests/test_models.py      | 2 +-
 utils/load_metric.py      | 2 +-
 utils/metrics/__init__.py | 2 +-
 utils/metrics/accuracy.py | 5 +++--
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index ccd665e..d11b76d 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,4 @@
-
-from utils.metrics import F1Score, Precision, Recall, Accuracy
-
+from utils.metrics import Accuracy, F1Score, Precision, Recall
 
 
 def test_recall():
@@ -84,7 +82,8 @@ def test_for_zero_denominator():
         assert precision4.allclose(torch.tensor(0.0), atol=1e-5), (
             f"Precision Score: {precision4.item()}"
         )
-        
+
+
 def test_accuracy():
     import torch
 
diff --git a/tests/test_models.py b/tests/test_models.py
index 5652b6b..9f256ca 100644
--- a/tests/test_models.py
+++ b/tests/test_models.py
@@ -21,6 +21,7 @@ def test_christian_model(image_shape, num_classes):
         f"Softmax output should sum to 1, but got: {y.sum()}"
     )
 
+
 @pytest.mark.parametrize(
     "image_shape, num_classes",
     [((1, 28, 28), 4), ((3, 16, 16), 10)],
@@ -34,4 +35,3 @@ def test_jan_model(image_shape, num_classes):
     y = model(x)
 
     assert y.shape == (n, num_classes), f"Shape: {y.shape}"
-    
diff --git a/utils/load_metric.py b/utils/load_metric.py
index f4c766b..8d56d12 100644
--- a/utils/load_metric.py
+++ b/utils/load_metric.py
@@ -3,7 +3,7 @@
 import numpy as np
 import torch.nn as nn
 
-from .metrics import EntropyPrediction, F1Score, Precision, Accuracy
+from .metrics import Accuracy, EntropyPrediction, F1Score, Precision
 
 
 class MetricWrapper(nn.Module):
diff --git a/utils/metrics/__init__.py b/utils/metrics/__init__.py
index 486e490..b9e07ec 100644
--- a/utils/metrics/__init__.py
+++ b/utils/metrics/__init__.py
@@ -1,7 +1,7 @@
 __all__ = ["EntropyPrediction", "Recall", "F1Score", "Precision", "Accuracy"]
 
+from .accuracy import Accuracy
 from .EntropyPred import EntropyPrediction
 from .F1 import F1Score
 from .precision import Precision
 from .recall import Recall
-from .accuracy import Accuracy
diff --git a/utils/metrics/accuracy.py b/utils/metrics/accuracy.py
index 9ae1287..f95bc3e 100644
--- a/utils/metrics/accuracy.py
+++ b/utils/metrics/accuracy.py
@@ -23,10 +23,11 @@ def forward(self, y_true, y_pred):
             Accuracy score.
         """
         return (y_true == y_pred).float().mean().item()
-    
+
+
 if __name__ == "__main__":
     y_true = torch.tensor([0, 3, 2, 3, 4])
     y_pred = torch.tensor([0, 1, 2, 3, 4])
 
     accuracy = Accuracy()
-    print(accuracy(y_true, y_pred))
\ No newline at end of file
+    print(accuracy(y_true, y_pred))

From 46798d24533cc1665879819031324c41f03a3374 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 11:47:58 +0100
Subject: [PATCH 10/15] fixed metric test

---
 tests/test_metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index d11b76d..63f36a6 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -94,6 +94,6 @@ def test_accuracy():
 
     accuracy_score = accuracy(y_true, y_pred)
 
-    assert accuracy_score.allclose(torch.tensor(0.8), atol=1e-5), (
+    assert (torch.abs(torch.tensor(accuracy_score - 0.8)) < 1e-5), (
         f"Accuracy Score: {accuracy_score.item()}"
     )

From c25a2c84682f6b320c687f8ad4d8fdd120f16c44 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 13:14:42 +0100
Subject: [PATCH 11/15] moved argument parsing and handling from main to a
 separate file to clean up

---
 main.py             | 112 +++++---------------------------------------
 utils/__init__.py   |   3 +-
 utils/arg_parser.py |  98 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 102 deletions(-)
 create mode 100644 utils/arg_parser.py

diff --git a/main.py b/main.py
index 8f389d5..2aa6dcf 100644
--- a/main.py
+++ b/main.py
@@ -9,7 +9,7 @@
 from torchvision import transforms
 from tqdm import tqdm
 
-from utils import MetricWrapper, createfolders, load_data, load_model
+from utils import MetricWrapper, createfolders, load_data, load_model, get_args
 
 
 def main():
@@ -25,100 +25,7 @@ def main():
     ------
 
     """
-    parser = argparse.ArgumentParser(
-        prog="",
-        description="",
-        epilog="",
-    )
-    # Structuture related values
-    parser.add_argument(
-        "--datafolder",
-        type=Path,
-        default="Data",
-        help="Path to where data will be saved during training.",
-    )
-    parser.add_argument(
-        "--resultfolder",
-        type=Path,
-        default="Results",
-        help="Path to where results will be saved during evaluation.",
-    )
-    parser.add_argument(
-        "--modelfolder",
-        type=Path,
-        default="Experiments",
-        help="Path to where model weights will be saved at the end of training.",
-    )
-    parser.add_argument(
-        "--savemodel",
-        action="store_true",
-        help="Whether model should be saved or not.",
-    )
-
-    parser.add_argument(
-        "--download-data",
-        action="store_true",
-        help="Whether the data should be downloaded or not. Might cause code to start a bit slowly.",
-    )
-
-    # Data/Model specific values
-    parser.add_argument(
-        "--modelname",
-        type=str,
-        default="MagnusModel",
-        choices=["MagnusModel", "ChristianModel", "SolveigModel"],
-        help="Model which to be trained on",
-    )
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        default="svhn",
-        choices=["svhn", "usps_0-6", "uspsh5_7_9", "mnist_0-3"],
-        help="Which dataset to train the model on.",
-    )
-
-    parser.add_argument(
-        "--metric",
-        type=str,
-        default=["entropy"],
-        choices=["entropy", "f1", "recall", "precision", "accuracy"],
-        nargs="+",
-        help="Which metric to use for evaluation",
-    )
-
-    # Training specific values
-    parser.add_argument(
-        "--epoch",
-        type=int,
-        default=20,
-        help="Amount of training epochs the model will do.",
-    )
-    parser.add_argument(
-        "--learning_rate",
-        type=float,
-        default=0.001,
-        help="Learning rate parameter for model training.",
-    )
-    parser.add_argument(
-        "--batchsize",
-        type=int,
-        default=64,
-        help="Amount of training images loaded in one go",
-    )
-    parser.add_argument(
-        "--device",
-        type=str,
-        default="cpu",
-        choices=["cuda", "cpu", "mps"],
-        help="Which device to run the training on.",
-    )
-    parser.add_argument(
-        "--dry_run",
-        action="store_true",
-        help="If true, the code will not run the training loop.",
-    )
-
-    args = parser.parse_args()
+    args = get_args()
 
     createfolders(args.datafolder, args.resultfolder, args.modelfolder)
 
@@ -126,12 +33,15 @@ def main():
 
     metrics = MetricWrapper(*args.metric)
 
-    augmentations = transforms.Compose(
-        [
-            transforms.Resize((16, 16)),  # At least for USPS
-            transforms.ToTensor(),
-        ]
-    )
+    if args.dataset.lower() == "usps_0-6" or args.dataset.lower() == "uspsh5_7_9":
+        augmentations = transforms.Compose(
+            [
+                transforms.Resize((16, 16)),
+                transforms.ToTensor(),
+            ]
+        )
+    else:
+        augmentations = transforms.Compose([transforms.ToTensor()])
 
     # Dataset
     traindata = load_data(
diff --git a/utils/__init__.py b/utils/__init__.py
index 6ea6cde..d6418ad 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -1,5 +1,6 @@
-__all__ = ["createfolders", "load_data", "load_model", "MetricWrapper"]
+__all__ = ["createfolders", "load_data", "load_model", "MetricWrapper", "get_args"]
 
+from .arg_parser import get_args
 from .createfolders import createfolders
 from .load_data import load_data
 from .load_metric import MetricWrapper
diff --git a/utils/arg_parser.py b/utils/arg_parser.py
new file mode 100644
index 0000000..2620b98
--- /dev/null
+++ b/utils/arg_parser.py
@@ -0,0 +1,98 @@
+import argparse
+from pathlib import Path
+
+
+def get_args():
+    parser = argparse.ArgumentParser(
+        prog="",
+        description="",
+        epilog="",
+    )
+    # Structuture related values
+    parser.add_argument(
+        "--datafolder",
+        type=Path,
+        default="Data",
+        help="Path to where data will be saved during training.",
+    )
+    parser.add_argument(
+        "--resultfolder",
+        type=Path,
+        default="Results",
+        help="Path to where results will be saved during evaluation.",
+    )
+    parser.add_argument(
+        "--modelfolder",
+        type=Path,
+        default="Experiments",
+        help="Path to where model weights will be saved at the end of training.",
+    )
+    parser.add_argument(
+        "--savemodel",
+        action="store_true",
+        help="Whether model should be saved or not.",
+    )
+
+    parser.add_argument(
+        "--download-data",
+        action="store_true",
+        help="Whether the data should be downloaded or not. Might cause code to start a bit slowly.",
+    )
+
+    # Data/Model specific values
+    parser.add_argument(
+        "--modelname",
+        type=str,
+        default="MagnusModel",
+        choices=["MagnusModel", "ChristianModel", "SolveigModel", "JanModel"],
+        help="Model which to be trained on",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="svhn",
+        choices=["svhn", "usps_0-6", "uspsh5_7_9", "mnist_0-3"],
+        help="Which dataset to train the model on.",
+    )
+
+    parser.add_argument(
+        "--metric",
+        type=str,
+        default=["entropy"],
+        choices=["entropy", "f1", "recall", "precision", "accuracy"],
+        nargs="+",
+        help="Which metric to use for evaluation",
+    )
+
+    # Training specific values
+    parser.add_argument(
+        "--epoch",
+        type=int,
+        default=20,
+        help="Amount of training epochs the model will do.",
+    )
+    parser.add_argument(
+        "--learning_rate",
+        type=float,
+        default=0.001,
+        help="Learning rate parameter for model training.",
+    )
+    parser.add_argument(
+        "--batchsize",
+        type=int,
+        default=64,
+        help="Amount of training images loaded in one go",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        choices=["cuda", "cpu", "mps"],
+        help="Which device to run the training on.",
+    )
+    parser.add_argument(
+        "--dry_run",
+        action="store_true",
+        help="If true, the code will not run the training loop.",
+    )
+    return parser.parse_args()

From a6b436cccd2ee9abb3e8bb7efec70722f9580020 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 14:30:36 +0100
Subject: [PATCH 12/15] added num_classes parameter to Metric wrapper and
 metric calculation step to train, val loops

---
 main.py                        | 35 +++++++++++++++++++++++++---------
 utils/dataloaders/mnist_0_3.py |  6 ++++--
 utils/load_metric.py           | 22 ++++++++++++---------
 utils/metrics/accuracy.py      |  5 +++--
 4 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/main.py b/main.py
index 2aa6dcf..98bb515 100644
--- a/main.py
+++ b/main.py
@@ -31,7 +31,6 @@ def main():
 
     device = args.device
 
-    metrics = MetricWrapper(*args.metric)
 
     if args.dataset.lower() == "usps_0-6" or args.dataset.lower() == "uspsh5_7_9":
         augmentations = transforms.Compose(
@@ -59,6 +58,8 @@ def main():
         transform=augmentations,
     )
 
+    metrics = MetricWrapper(*args.metric, num_classes = traindata.num_classes)
+    
     # Find the shape of the data, if is 2D, add a channel dimension
     data_shape = traindata[0][0].shape
     if len(data_shape) == 2:
@@ -90,7 +91,7 @@ def main():
     if args.dry_run:
         dry_run_loader = DataLoader(
             traindata,
-            batch_size=1,
+            batch_size=20,
             shuffle=True,
             pin_memory=True,
             drop_last=True,
@@ -98,16 +99,20 @@ def main():
 
         for x, y in tqdm(dry_run_loader, desc="Dry run", total=1):
             x, y = x.to(device), y.to(device)
-            pred = model.forward(x)
+            logits = model.forward(x)
 
-            loss = criterion(y, pred)
+            loss = criterion(logits, y)
             loss.backward()
 
             optimizer.step()
             optimizer.zero_grad(set_to_none=True)
+            
+            preds = th.argmax(logits, dim=1)
+            metrics(y, preds)
 
-            break
 
+            break
+        print(metrics.__getmetrics__())
         print("Dry run completed successfully.")
         exit(0)
 
@@ -120,14 +125,20 @@ def main():
         model.train()
         for x, y in tqdm(trainloader, desc="Training"):
             x, y = x.to(device), y.to(device)
-            pred = model.forward(x)
+            logits = model.forward(x)
 
-            loss = criterion(y, pred)
+            loss = criterion(logits, y)
             loss.backward()
 
             optimizer.step()
             optimizer.zero_grad(set_to_none=True)
             trainingloss.append(loss.item())
+            
+            preds = th.argmax(logits, dim=1)
+            metrics(y, preds)
+            
+        wandb.log(metrics.__getmetrics__(str_prefix="Train "))
+        metrics.__resetvalues__()
 
         evalloss = []
         # Eval loop start
@@ -135,9 +146,15 @@ def main():
         with th.no_grad():
             for x, y in tqdm(valiloader, desc="Validation"):
                 x, y = x.to(device), y.to(device)
-                pred = model.forward(x)
-                loss = criterion(y, pred)
+                logits = model.forward(x)
+                loss = criterion(y, logits)
                 evalloss.append(loss.item())
+                
+                preds = th.argmax(logits, dim=1)
+                metrics(y, preds)
+        
+        wandb.log(metrics.__getmetrics__(str_prefix="Evaluation "))
+        metrics.__resetvalues__()
 
         wandb.log(
             {
diff --git a/utils/dataloaders/mnist_0_3.py b/utils/dataloaders/mnist_0_3.py
index 5e5a935..4e40ad1 100644
--- a/utils/dataloaders/mnist_0_3.py
+++ b/utils/dataloaders/mnist_0_3.py
@@ -134,11 +134,11 @@ def __len__(self):
 
     def __getitem__(self, index):
         with open(self.labels_path, "rb") as f:
-            f.seek(8 + index)  # Jump to the label position
+            f.seek(8 + self.idx[index])  # Jump to the label position
             label = int.from_bytes(f.read(1), byteorder="big")  # Read 1 byte for label
 
         with open(self.images_path, "rb") as f:
-            f.seek(16 + index * 28 * 28)  # Jump to image position
+            f.seek(16 + self.idx[index] * 28 * 28)  # Jump to image position
             image = np.frombuffer(f.read(28 * 28), dtype=np.uint8).reshape(
                 28, 28
             )  # Read image data
@@ -149,3 +149,5 @@ def __getitem__(self, index):
             image = self.transform(image)
 
         return image, label
+    
+
diff --git a/utils/load_metric.py b/utils/load_metric.py
index 8d56d12..29ca08e 100644
--- a/utils/load_metric.py
+++ b/utils/load_metric.py
@@ -3,13 +3,14 @@
 import numpy as np
 import torch.nn as nn
 
-from .metrics import Accuracy, EntropyPrediction, F1Score, Precision
+from .metrics import Accuracy, EntropyPrediction, F1Score, Precision, Recall
 
 
 class MetricWrapper(nn.Module):
-    def __init__(self, *metrics):
+    def __init__(self, *metrics, num_classes):
         super().__init__()
         self.metrics = {}
+        self.num_classes = num_classes
 
         for metric in metrics:
             self.metrics[metric] = self._get_metric(metric)
@@ -33,15 +34,15 @@ def _get_metric(self, key):
 
         match key.lower():
             case "entropy":
-                return EntropyPrediction()
+                return EntropyPrediction(num_classes=self.num_classes)
             case "f1":
-                raise F1Score()
+                return F1Score(num_classes=self.num_classes)
             case "recall":
-                raise NotImplementedError("Recall score not implemented yet")
+                return Recall(num_classes=self.num_classes)
             case "precision":
-                return Precision()
+                return Precision(num_classes=self.num_classes)
             case "accuracy":
-                return Accuracy()
+                return Accuracy(num_classes=self.num_classes)
             case _:
                 raise ValueError(f"Metric {key} not supported")
 
@@ -49,10 +50,13 @@ def __call__(self, y_true, y_pred):
         for key in self.metrics:
             self.tmp_scores[key].append(self.metrics[key](y_true, y_pred))
 
-    def __getmetrics__(self):
+    def __getmetrics__(self, str_prefix: str = None):
         return_metrics = {}
         for key in self.metrics:
-            return_metrics[key] = np.mean(self.tmp_scores[key])
+            if str_prefix is not None:
+                return_metrics[str_prefix + key] = np.mean(self.tmp_scores[key])
+            else:
+                return_metrics[key] = np.mean(self.tmp_scores[key])
 
         return return_metrics
 
diff --git a/utils/metrics/accuracy.py b/utils/metrics/accuracy.py
index f95bc3e..0d2fe40 100644
--- a/utils/metrics/accuracy.py
+++ b/utils/metrics/accuracy.py
@@ -3,9 +3,10 @@
 
 
 class Accuracy(nn.Module):
-    def __init__(self):
+    def __init__(self, num_classes):
         super().__init__()
-
+        self.num_classes = num_classes
+        
     def forward(self, y_true, y_pred):
         """
         Compute the accuracy of the model.

From 38d2499e9ff5bd19e968a4f90d65b44e4e534c53 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 14:53:11 +0100
Subject: [PATCH 13/15] Set up logging to wandb, must add WANDB_API through
 secrets

---
 .gitignore | 2 ++
 main.py    | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 29fa5e6..b564848 100644
--- a/.gitignore
+++ b/.gitignore
@@ -5,6 +5,8 @@ Results/
 Experiments/
 _build/
 bin/
+wandb/
+wandb_api.py
 
 # Byte-compiled / optimized / DLL files
 __pycache__/
diff --git a/main.py b/main.py
index 98bb515..ded472c 100644
--- a/main.py
+++ b/main.py
@@ -116,7 +116,8 @@ def main():
         print("Dry run completed successfully.")
         exit(0)
 
-    wandb.init(project="", tags=[])
+    wandb.login(key=WANDB_API)
+    wandb.init(entity="ColabCode",project="Jan", tags=[args.modelname, args.dataset])
     wandb.watch(model)
 
     for epoch in range(args.epoch):
@@ -147,7 +148,7 @@ def main():
             for x, y in tqdm(valiloader, desc="Validation"):
                 x, y = x.to(device), y.to(device)
                 logits = model.forward(x)
-                loss = criterion(y, logits)
+                loss = criterion(logits, y)
                 evalloss.append(loss.item())
                 
                 preds = th.argmax(logits, dim=1)

From 2933536538af9939942f82c6721b57cd3e198a01 Mon Sep 17 00:00:00 2001
From: Jan Zavadil <hzavadil98@gmail.com>
Date: Wed, 5 Feb 2025 15:08:56 +0100
Subject: [PATCH 14/15] fixed test

---
 main.py               | 18 ++++++++----------
 tests/test_metrics.py |  2 +-
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/main.py b/main.py
index ded472c..2052cdf 100644
--- a/main.py
+++ b/main.py
@@ -31,7 +31,6 @@ def main():
 
     device = args.device
 
-
     if args.dataset.lower() == "usps_0-6" or args.dataset.lower() == "uspsh5_7_9":
         augmentations = transforms.Compose(
             [
@@ -58,8 +57,8 @@ def main():
         transform=augmentations,
     )
 
-    metrics = MetricWrapper(*args.metric, num_classes = traindata.num_classes)
-    
+    metrics = MetricWrapper(*args.metric, num_classes=traindata.num_classes)
+
     # Find the shape of the data, if is 2D, add a channel dimension
     data_shape = traindata[0][0].shape
     if len(data_shape) == 2:
@@ -106,18 +105,17 @@ def main():
 
             optimizer.step()
             optimizer.zero_grad(set_to_none=True)
-            
+
             preds = th.argmax(logits, dim=1)
             metrics(y, preds)
 
-
             break
         print(metrics.__getmetrics__())
         print("Dry run completed successfully.")
         exit(0)
 
     wandb.login(key=WANDB_API)
-    wandb.init(entity="ColabCode",project="Jan", tags=[args.modelname, args.dataset])
+    wandb.init(entity="ColabCode", project="Jan", tags=[args.modelname, args.dataset])
     wandb.watch(model)
 
     for epoch in range(args.epoch):
@@ -134,10 +132,10 @@ def main():
             optimizer.step()
             optimizer.zero_grad(set_to_none=True)
             trainingloss.append(loss.item())
-            
+
             preds = th.argmax(logits, dim=1)
             metrics(y, preds)
-            
+
         wandb.log(metrics.__getmetrics__(str_prefix="Train "))
         metrics.__resetvalues__()
 
@@ -150,10 +148,10 @@ def main():
                 logits = model.forward(x)
                 loss = criterion(logits, y)
                 evalloss.append(loss.item())
-                
+
                 preds = th.argmax(logits, dim=1)
                 metrics(y, preds)
-        
+
         wandb.log(metrics.__getmetrics__(str_prefix="Evaluation "))
         metrics.__resetvalues__()
 
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index 63f36a6..a4a18a1 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -87,7 +87,7 @@ def test_for_zero_denominator():
 def test_accuracy():
     import torch
 
-    accuracy = Accuracy()
+    accuracy = Accuracy(num_classes=5)
 
     y_true = torch.tensor([0, 3, 2, 3, 4])
     y_pred = torch.tensor([0, 1, 2, 3, 4])

From 2341c69f53c71c557ed81e1896f5cb559c86cb90 Mon Sep 17 00:00:00 2001
From: salomaestro <chris10an.salomonsen@gmail.com>
Date: Thu, 6 Feb 2025 11:05:47 +0100
Subject: [PATCH 15/15] Formatting

---
 main.py                        | 5 ++---
 tests/test_metrics.py          | 2 +-
 utils/dataloaders/mnist_0_3.py | 2 --
 utils/metrics/accuracy.py      | 2 +-
 4 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/main.py b/main.py
index 2052cdf..9e7ac51 100644
--- a/main.py
+++ b/main.py
@@ -1,4 +1,3 @@
-import argparse
 from pathlib import Path
 
 import numpy as np
@@ -9,7 +8,7 @@
 from torchvision import transforms
 from tqdm import tqdm
 
-from utils import MetricWrapper, createfolders, load_data, load_model, get_args
+from utils import MetricWrapper, createfolders, get_args, load_data, load_model
 
 
 def main():
@@ -31,7 +30,7 @@ def main():
 
     device = args.device
 
-    if args.dataset.lower() == "usps_0-6" or args.dataset.lower() == "uspsh5_7_9":
+    if args.dataset.lower() in ["usps_0-6", "uspsh5_7_9"]:
         augmentations = transforms.Compose(
             [
                 transforms.Resize((16, 16)),
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index a4a18a1..d6da0ab 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -94,6 +94,6 @@ def test_accuracy():
 
     accuracy_score = accuracy(y_true, y_pred)
 
-    assert (torch.abs(torch.tensor(accuracy_score - 0.8)) < 1e-5), (
+    assert torch.abs(torch.tensor(accuracy_score - 0.8)) < 1e-5, (
         f"Accuracy Score: {accuracy_score.item()}"
     )
diff --git a/utils/dataloaders/mnist_0_3.py b/utils/dataloaders/mnist_0_3.py
index 4e40ad1..0245add 100644
--- a/utils/dataloaders/mnist_0_3.py
+++ b/utils/dataloaders/mnist_0_3.py
@@ -149,5 +149,3 @@ def __getitem__(self, index):
             image = self.transform(image)
 
         return image, label
-    
-
diff --git a/utils/metrics/accuracy.py b/utils/metrics/accuracy.py
index 0d2fe40..4d1cdd1 100644
--- a/utils/metrics/accuracy.py
+++ b/utils/metrics/accuracy.py
@@ -6,7 +6,7 @@ class Accuracy(nn.Module):
     def __init__(self, num_classes):
         super().__init__()
         self.num_classes = num_classes
-        
+
     def forward(self, y_true, y_pred):
         """
         Compute the accuracy of the model.