From 12b774ac4d6a96ff844f5f88c810e72cd049ec26 Mon Sep 17 00:00:00 2001
From: Kalle Westerling <kalle.westerling@gmail.com>
Date: Fri, 2 Aug 2024 12:37:06 +0100
Subject: [PATCH 1/5] Replace most `assert` statements with more
 descriptive`raise` statements

---
 mapreader/annotate/utils.py                 |  5 +-
 mapreader/classify/load_annotations.py      | 17 ++++--
 mapreader/download/data_structures.py       | 20 ++++--
 mapreader/download/downloader.py            | 11 ++--
 mapreader/download/downloader_utils.py      |  9 ++-
 mapreader/download/tile_merging.py          |  7 ++-
 mapreader/spot_text/deepsolo_runner.py      | 10 ++-
 mapreader/spot_text/runner_base.py          |  3 +-
 tests/test_download/test_data_structures.py | 68 +++++++++++++++++++++
 9 files changed, 125 insertions(+), 25 deletions(-)
 create mode 100644 tests/test_download/test_data_structures.py

diff --git a/mapreader/annotate/utils.py b/mapreader/annotate/utils.py
index a0c216d1..148dc820 100644
--- a/mapreader/annotate/utils.py
+++ b/mapreader/annotate/utils.py
@@ -310,7 +310,10 @@ def display_record(record: tuple[str, str, str, int, int]) -> None:
                     # stream=True so we don't download the whole page, only check if
                     # the page exists
                     response = requests.get(url, stream=True)
-                    assert response.status_code < 400
+                    if not response.status_code < 400:
+                        raise RuntimeError(
+                            f"URL could not get a response: {response.status_code}"
+                        )
                     print()
                     print(f"URL: {url}")
                 except:
diff --git a/mapreader/classify/load_annotations.py b/mapreader/classify/load_annotations.py
index 6292b211..e9228516 100644
--- a/mapreader/classify/load_annotations.py
+++ b/mapreader/classify/load_annotations.py
@@ -439,10 +439,13 @@ def review_labels(
                     self.reviewed.loc[input_id, "label_index"] = self._get_label_index(
                         input_label
                     )
-                    assert (
+                    if not (
                         self.annotations[self.label_col].value_counts().tolist()
                         == self.annotations["label_index"].value_counts().tolist()
-                    )
+                    ):
+                        raise RuntimeError(
+                            f"[ERROR] Label indices do not match label counts. Please check the label indices for label '{input_label}'."
+                        )
                     print(
                         f'[INFO] Image {input_id} has been relabelled as "{input_label}"'
                     )
@@ -595,12 +598,18 @@ def create_datasets(
                 test_size=float(relative_frac_test),
                 random_state=random_state,
             )
-            assert len(self.annotations) == len(df_train) + len(df_val) + len(df_test)
+            if not len(self.annotations) == len(df_train) + len(df_val) + len(df_test):
+                raise ValueError(
+                    "[ERROR] Number of annotations in the split dataframes does not match the number of annotations in the original dataframe."
+                )
 
         else:
             df_val = df_temp
             df_test = None
-            assert len(self.annotations) == len(df_train) + len(df_val)
+            if not len(self.annotations) == len(df_train) + len(df_val):
+                raise ValueError(
+                    "[ERROR] Number of annotations in the split dataframes does not match the number of annotations in the original dataframe."
+                )
 
         if context_datasets:
             datasets = self.create_patch_context_datasets(
diff --git a/mapreader/download/data_structures.py b/mapreader/download/data_structures.py
index 9115057b..23d95355 100644
--- a/mapreader/download/data_structures.py
+++ b/mapreader/download/data_structures.py
@@ -14,8 +14,11 @@ def __init__(self, lat: float, lon: float):
         lon : float
             longitude value (in range [-180°, 180°] )
         """
-        assert -90 <= lat <= 90
-        assert -180 <= lon <= 180
+        if not -90 <= lat <= 90:
+            raise ValueError("Latitude must be in range [-90, 90]")
+        if not -180 <= lon <= 180:
+            raise ValueError("Longitude must be in range [-180, 180]")
+
         self.lat = lat
         self.lon = lon
 
@@ -37,9 +40,12 @@ def __init__(self, x: int, y: int, z: int):
         z : int
             Zoom level
         """
-        assert z >= 0
-        assert 0 <= x < 2**z
-        assert 0 <= y < 2**z
+        if not z >= 0:
+            raise ValueError("Zoom level must be greater than or equal to 0")
+        if not 0 <= x < 2**z:
+            raise ValueError(f"X value must be in range [0, {2**z}]")
+        if not 0 <= y < 2**z:
+            raise ValueError(f"Y value must be in range [0, {2**z}]")
         self.x = x
         self.y = y
         self.z = z
@@ -61,7 +67,9 @@ def __init__(self, cell1: GridIndex, cell2: GridIndex):
         cell1 : GridIndex
         cell2 : GridIndex
         """
-        assert cell1.z == cell2.z, "Can't calculate a grid on different scales yet"
+        if cell1.z != cell2.z:
+            raise NotImplementedError("Can't calculate a grid on different scales yet")
+
         start_x = min(cell1.x, cell2.x)
         end_x = max(cell1.x, cell2.x)
         start_y = min(cell1.y, cell2.y)
diff --git a/mapreader/download/downloader.py b/mapreader/download/downloader.py
index 0005fb71..dedb625c 100644
--- a/mapreader/download/downloader.py
+++ b/mapreader/download/downloader.py
@@ -180,11 +180,12 @@ def download_map_by_polygon(
             Additional keyword arguments to pass to the `_download_map` method
         """
 
-        assert isinstance(
-            polygon, Polygon
-        ), "[ERROR] \
-Please pass polygon as shapely.geometry.Polygon object.\n\
-[HINT] Use ``create_polygon_from_latlons()`` to create polygon."
+        if type(polygon) is not Polygon:
+            raise ValueError(
+                "[ERROR] \
+    Please pass polygon as shapely.geometry.Polygon object.\n\
+    [HINT] Use ``create_polygon_from_latlons()`` to create polygon."
+            )
 
         min_x, min_y, max_x, max_y = polygon.bounds
 
diff --git a/mapreader/download/downloader_utils.py b/mapreader/download/downloader_utils.py
index f86bdbb2..22c84331 100644
--- a/mapreader/download/downloader_utils.py
+++ b/mapreader/download/downloader_utils.py
@@ -155,6 +155,11 @@ def get_coordinate_from_index(grid_index: GridIndex) -> Coordinate:
     return Coordinate(lat, lon)
 
 
+def _check_z(z):
+    if not z >= 0:
+        raise ValueError("Zoom level must be positive")
+
+
 def _get_index_from_coordinate(lon: float, lat: float, z: int) -> tuple[(int, int)]:
     """Generate (x,y) tuple from Coordinate latitudes and longitudes.
 
@@ -163,7 +168,7 @@ def _get_index_from_coordinate(lon: float, lat: float, z: int) -> tuple[(int, in
     Tuple
         (x,y) tuple.
     """
-    assert z >= 0, "Zoom level must be positive"
+    _check_z(z)
     n = 2**z
     x = int((lon + 180) / 360 * n)
     lat_rad = math.radians(lat)
@@ -179,7 +184,7 @@ def _get_coordinate_from_index(x: int, y: int, z: int) -> tuple[(float, float)]:
     Tuple
         (lon, lat) tuple representing the upper left corner of the tile.
     """
-    assert z >= 0, "Zoom level must be positive"
+    _check_z(z)
     n = 2**z
     lon = (x / n) * 360 - 180
     lat_rad = math.atan(math.sinh(math.pi * (1 - 2 * y / n)))
diff --git a/mapreader/download/tile_merging.py b/mapreader/download/tile_merging.py
index 461e5b78..b26db605 100644
--- a/mapreader/download/tile_merging.py
+++ b/mapreader/download/tile_merging.py
@@ -136,9 +136,10 @@ def _load_tile_size(self, grid_bb: GridBoundingBox):
                 )
 
         img_size = start_image.size
-        assert (
-            img_size[0] == img_size[1]
-        ), f"Tiles must be quadratic. This tile, however, is rectangular: {img_size}"
+        if not (img_size[0] == img_size[1]):
+            raise ValueError(
+                f"[ERROR] Tiles must be square: {img_size[0]}x{img_size[1]}."
+            )
         tile_size = img_size[0]
         return tile_size
 
diff --git a/mapreader/spot_text/deepsolo_runner.py b/mapreader/spot_text/deepsolo_runner.py
index 5fdb8944..32befadf 100644
--- a/mapreader/spot_text/deepsolo_runner.py
+++ b/mapreader/spot_text/deepsolo_runner.py
@@ -199,9 +199,13 @@ def __init__(
             with open(self.use_customer_dictionary, "rb") as fp:
                 self.CTLABELS = pickle.load(fp)
         # voc_size includes the unknown class, which is not in self.CTABLES
-        assert int(self.voc_size - 1) == len(
-            self.CTLABELS
-        ), f"voc_size is not matched dictionary size, got {int(self.voc_size - 1)} and {len(self.CTLABELS)}."
+        voc_size_len = int(self.voc_size - 1)
+        CTLABELS_len = len(self.CTLABELS)
+        if not voc_size_len == CTLABELS_len:
+            raise ValueError(
+                f"Vocabulary size and CTABLES do not match up, \
+                             got {voc_size_len} and {CTLABELS_len}."
+            )
 
         # setup the predictor
         self.predictor = DefaultPredictor(cfg)
diff --git a/mapreader/spot_text/runner_base.py b/mapreader/spot_text/runner_base.py
index 616954c0..a24b3829 100644
--- a/mapreader/spot_text/runner_base.py
+++ b/mapreader/spot_text/runner_base.py
@@ -370,7 +370,8 @@ def save_to_geojson(
         geo_df = self._dict_to_dataframe(self.geo_predictions, geo=True, parent=True)
 
         # get the crs (should be the same for all polygons)
-        assert geo_df["crs"].nunique() == 1
+        if not geo_df["crs"].nunique() == 1:
+            raise ValueError("[ERROR] Multiple crs found in the predictions.")
         crs = geo_df["crs"].unique()[0]
 
         geo_df = geopd.GeoDataFrame(geo_df, geometry="polygon", crs=crs)
diff --git a/tests/test_download/test_data_structures.py b/tests/test_download/test_data_structures.py
new file mode 100644
index 00000000..ca10a5f0
--- /dev/null
+++ b/tests/test_download/test_data_structures.py
@@ -0,0 +1,68 @@
+from __future__ import annotations
+
+import pytest
+
+from mapreader.download.data_structures import Coordinate, GridBoundingBox, GridIndex
+
+
+@pytest.fixture
+def init_coordinate():
+    coordinate = Coordinate(0, 90)
+    assert coordinate.lat == 0
+    assert coordinate.lon == 90
+    assert str(coordinate) == "(0, 90)"
+
+    coordinate = Coordinate(0, -180)
+    assert coordinate.lat == 0
+    assert coordinate.lon == -180
+    assert str(coordinate) == "(0, -180)"
+
+    with pytest.raises(ValueError, match="must be in range"):
+        coordinate = Coordinate(-180, 90)
+
+    with pytest.raises(ValueError, match="must be in range"):
+        coordinate = Coordinate(-180, 90)
+
+    with pytest.raises(ValueError, match="must be in range"):
+        coordinate = Coordinate(0, -181)
+
+
+@pytest.fixture
+def init_grid_index():
+    grid_index = GridIndex(1, 2, 3)
+    assert grid_index.x == 1
+    assert grid_index.y == 2
+    assert grid_index.z == 3
+
+    with pytest.raises(
+        ValueError, match="Zoom level must be greater than or equal to 0"
+    ):
+        grid_index = GridIndex(1, 2, -1)
+
+    with pytest.raises(ValueError, match=r"X value must be in range \[0, 8\]"):
+        grid_index = GridIndex(9, 2, 3)
+
+    with pytest.raises(ValueError, match=r"Y value must be in range \[0, 8\]"):
+        grid_index = GridIndex(1, 9, 3)
+
+
+@pytest.fixture
+def init_grid_bounding_box():
+    cell1 = GridIndex(1, 2, 3)
+    cell2 = GridIndex(2, 3, 4)
+
+    with pytest.raises(
+        NotImplementedError, match="Can't calculate a grid on different scales yet"
+    ):
+        bb = GridBoundingBox(cell1, cell2)
+
+    cell2 = GridIndex(2, 3, 3)
+
+    bb = GridBoundingBox(cell1, cell2)
+
+    assert bb.z == 3
+    assert str(bb.lower_corner) == "(3, 1, 2)"
+    assert str(bb.upper_corner) == "(3, 2, 3)"
+    assert bb.x_range == range(1, 3)
+    assert bb.y_range == range(2, 4)
+    assert bb.covered_cells == 4

From 7aae8dce490497db9de85e7be99b3de816a127d5 Mon Sep 17 00:00:00 2001
From: Kalle Westerling <kalle.westerling@gmail.com>
Date: Wed, 7 Aug 2024 15:17:00 +0100
Subject: [PATCH 2/5] Moving test files

---
 tests/{ => test_annotate}/test_annotator.py        |  2 +-
 tests/{ => test_download}/test_sheet_downloader.py | 10 +++++++---
 2 files changed, 8 insertions(+), 4 deletions(-)
 rename tests/{ => test_annotate}/test_annotator.py (99%)
 rename tests/{ => test_download}/test_sheet_downloader.py (98%)

diff --git a/tests/test_annotator.py b/tests/test_annotate/test_annotator.py
similarity index 99%
rename from tests/test_annotator.py
rename to tests/test_annotate/test_annotator.py
index 54cffa8d..d49bc902 100644
--- a/tests/test_annotator.py
+++ b/tests/test_annotate/test_annotator.py
@@ -9,7 +9,7 @@
 
 @pytest.fixture
 def sample_dir():
-    return Path(__file__).resolve().parent / "sample_files"
+    return Path(__file__).resolve().parent.parent / "sample_files"
 
 
 @pytest.fixture
diff --git a/tests/test_sheet_downloader.py b/tests/test_download/test_sheet_downloader.py
similarity index 98%
rename from tests/test_sheet_downloader.py
rename to tests/test_download/test_sheet_downloader.py
index c2b19e97..9a9a05b9 100644
--- a/tests/test_sheet_downloader.py
+++ b/tests/test_download/test_sheet_downloader.py
@@ -18,7 +18,7 @@
 
 @pytest.fixture
 def sample_dir():
-    return Path(__file__).resolve().parent / "sample_files"
+    return Path(__file__).resolve().parent.parent / "sample_files"
 
 
 @pytest.fixture
@@ -38,7 +38,9 @@ def test_init(sheet_downloader):
 
 def test_init_errors(sample_dir):
     test_json = f"{sample_dir}/test_json.json"  # crs changed to EPSG:3857 (note: coordinates are wrong in file)
-    download_url = "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png"
+    download_url = (
+        "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png"
+    )
     with pytest.raises(ValueError, match="file not found"):
         SheetDownloader("fake_file.json", download_url)
     with pytest.raises(ValueError, match="string or list of strings"):
@@ -67,7 +69,9 @@ def test_get_grid_bb(sheet_downloader):
 
 def test_get_grid_bb_errors(sample_dir):
     test_json = f"{sample_dir}/test_json_epsg3857.json"  # crs changed to EPSG:3857 (note: coordinates are wrong in file)
-    download_url = "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png"
+    download_url = (
+        "https://mapseries-tilesets.s3.amazonaws.com/1inch_2nd_ed/{z}/{x}/{y}.png"
+    )
     sd = SheetDownloader(test_json, download_url)
     with pytest.raises(NotImplementedError, match="EPSG:4326"):
         sd.get_grid_bb()

From 0968c578db951c2c2dc2eca7559accaa83d212ec Mon Sep 17 00:00:00 2001
From: Kalle Westerling <kalle.westerling@gmail.com>
Date: Wed, 7 Aug 2024 15:17:47 +0100
Subject: [PATCH 3/5] chore: Add unit tests for downloader_utils

---
 tests/test_download/test_downloader_utils.py | 51 ++++++++++++++++++++
 1 file changed, 51 insertions(+)
 create mode 100644 tests/test_download/test_downloader_utils.py

diff --git a/tests/test_download/test_downloader_utils.py b/tests/test_download/test_downloader_utils.py
new file mode 100644
index 00000000..9f8ee066
--- /dev/null
+++ b/tests/test_download/test_downloader_utils.py
@@ -0,0 +1,51 @@
+from __future__ import annotations
+
+import math
+
+import pytest
+
+from mapreader.download.downloader_utils import (
+    _check_z,
+    _get_coordinate_from_index,
+    _get_index_from_coordinate,
+)
+
+
+def test_check_z():
+    assert _check_z(14) is True
+    with pytest.raises(ValueError, match="Zoom level must be positive"):
+        _check_z(-1)
+
+
+def test_get_index_from_coordinate():
+    lon, lat, z = 0, 0, 1
+    expected = (1, 1)
+    assert _get_index_from_coordinate(lon, lat, z) == expected
+
+    lon, lat, z = 180, 85.05112878, 1
+    expected = (2, 0)
+    assert _get_index_from_coordinate(lon, lat, z) == expected
+
+    lon, lat, z = -180, -85.05112878, 1
+    expected = (0, 2)
+    assert _get_index_from_coordinate(lon, lat, z) == expected
+
+
+def test_get_coordinate_from_index():
+    x, y, z = 1, 1, 1
+    expected = (0.0, 0.0)
+    result = _get_coordinate_from_index(x, y, z)
+    assert math.isclose(result[0], expected[0], rel_tol=1e-9)
+    assert math.isclose(result[1], expected[1], rel_tol=1e-9)
+
+    x, y, z = 2, 0, 1
+    expected = (180.0, 85.05112878)
+    result = _get_coordinate_from_index(x, y, z)
+    assert math.isclose(result[0], expected[0], rel_tol=1e-9)
+    assert math.isclose(result[1], expected[1], rel_tol=1e-9)
+
+    x, y, z = 0, 2, 1
+    expected = (-180.0, -85.05112878)
+    result = _get_coordinate_from_index(x, y, z)
+    assert math.isclose(result[0], expected[0], rel_tol=1e-9)
+    assert math.isclose(result[1], expected[1], rel_tol=1e-9)

From 1782d1d1c0aa05bc21d59975e6205e9dfd66a05a Mon Sep 17 00:00:00 2001
From: Kalle Westerling <kalle.westerling@gmail.com>
Date: Wed, 7 Aug 2024 15:18:49 +0100
Subject: [PATCH 4/5] Renaming unrecognised test functions for
 download.data_structures

---
 tests/test_download/test_data_structures.py | 18 ++++++++++--------
 1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/tests/test_download/test_data_structures.py b/tests/test_download/test_data_structures.py
index ca10a5f0..d0dcc856 100644
--- a/tests/test_download/test_data_structures.py
+++ b/tests/test_download/test_data_structures.py
@@ -2,11 +2,14 @@
 
 import pytest
 
-from mapreader.download.data_structures import Coordinate, GridBoundingBox, GridIndex
+from mapreader.download.data_structures import (
+    Coordinate,
+    GridBoundingBox,
+    GridIndex,
+)
 
 
-@pytest.fixture
-def init_coordinate():
+def test_init_coordinate():
     coordinate = Coordinate(0, 90)
     assert coordinate.lat == 0
     assert coordinate.lon == 90
@@ -27,8 +30,7 @@ def init_coordinate():
         coordinate = Coordinate(0, -181)
 
 
-@pytest.fixture
-def init_grid_index():
+def test_init_grid_index():
     grid_index = GridIndex(1, 2, 3)
     assert grid_index.x == 1
     assert grid_index.y == 2
@@ -46,13 +48,13 @@ def init_grid_index():
         grid_index = GridIndex(1, 9, 3)
 
 
-@pytest.fixture
-def init_grid_bounding_box():
+def test_init_grid_bounding_box():
     cell1 = GridIndex(1, 2, 3)
     cell2 = GridIndex(2, 3, 4)
 
     with pytest.raises(
-        NotImplementedError, match="Can't calculate a grid on different scales yet"
+        NotImplementedError,
+        match="Can't calculate a grid on different scales yet",
     ):
         bb = GridBoundingBox(cell1, cell2)
 

From 6670442f428c904f6ae2ff6361e1fa412fbe1fc9 Mon Sep 17 00:00:00 2001
From: Kalle Westerling <kalle.westerling@gmail.com>
Date: Wed, 7 Aug 2024 15:19:08 +0100
Subject: [PATCH 5/5] Adding output to `_check_z` function

---
 mapreader/download/downloader_utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mapreader/download/downloader_utils.py b/mapreader/download/downloader_utils.py
index 22c84331..7db683a4 100644
--- a/mapreader/download/downloader_utils.py
+++ b/mapreader/download/downloader_utils.py
@@ -158,6 +158,7 @@ def get_coordinate_from_index(grid_index: GridIndex) -> Coordinate:
 def _check_z(z):
     if not z >= 0:
         raise ValueError("Zoom level must be positive")
+    return True
 
 
 def _get_index_from_coordinate(lon: float, lat: float, z: int) -> tuple[(int, int)]: