From 742a6f5a3bc881191727d434306cb2dff27c0458 Mon Sep 17 00:00:00 2001 From: Rosie Date: Thu, 30 Nov 2023 16:05:35 +0000 Subject: [PATCH 01/13] fix save to geojson --- mapreader/load/images.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 9e15b17d..eeb7296e 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -21,7 +21,8 @@ from PIL import Image, ImageStat from pyproj import Transformer from rasterio.plot import reshape_as_raster -from shapely.geometry import box +from shapely.geometry import box, Polygon +from shapely import wkt from tqdm.auto import tqdm os.environ[ @@ -2212,6 +2213,8 @@ def save_patches_to_geojson( if "polygon" not in patch_df.columns: self.add_patch_polygons() _, patch_df = self.convert_images() + + patch_df["polygon"]=patch_df["polygon"].apply(lambda x: x if isinstance(x, Polygon) else wkt.loads(x)) if not crs: if "crs" in patch_df.columns: @@ -2220,14 +2223,16 @@ def save_patches_to_geojson( else: crs = "EPSG:4326" + if "image_id" in patch_df.columns: + patch_df.drop(columns=["image_id"], inplace=True) patch_df.reset_index(names="image_id", inplace=True) # drop pixel stats columns patch_df.drop(columns=patch_df.filter(like="pixel", axis=1), inplace=True) - # drop tuple columns - cause errors + # change tuple columns to strings for col in patch_df.columns: if isinstance(patch_df[col][0], tuple): - patch_df.drop(columns=col, inplace=True) + patch_df[col]=patch_df[col].apply(str) geo_patch_df = geopd.GeoDataFrame(patch_df, geometry="polygon", crs=crs) geo_patch_df.to_file(geojson_fname, driver="GeoJSON") From 393089a72ba13d93c61fae8b022b6b2d4b7724b2 Mon Sep 17 00:00:00 2001 From: Rosie Date: Fri, 1 Dec 2023 11:08:48 +0000 Subject: [PATCH 02/13] add literal eval for list/tuple columns --- mapreader/load/images.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index eeb7296e..1975b0dc 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -5,6 +5,7 @@ except ImportError: pass +from ast import literal_eval import os import random import warnings @@ -17,7 +18,7 @@ import numpy as np import pandas as pd import PIL -import rasterio +import rasterio from PIL import Image, ImageStat from pyproj import Transformer from rasterio.plot import reshape_as_raster @@ -420,7 +421,7 @@ def add_metadata( data_series = metadata_df[metadata_df[image_id_col] == key].squeeze() for column, item in data_series.items(): try: - self.images[tree_level][key][column] = eval(item) + self.images[tree_level][key][column] = literal_eval(item) except: self.images[tree_level][key][column] = item @@ -715,9 +716,13 @@ def _add_coord_increments_id( verbose, ) return + if isinstance(self.parents[image_id]["coordinates"], str): + self.parents[image_id]["coordinates"] = literal_eval(self.parents[image_id]["coordinates"]) if "shape" not in self.parents[image_id].keys(): self._add_shape_id(image_id) + if isinstance(self.parents[image_id]["shape"], str): + self.parents[image_id]["shape"] = literal_eval(self.parents[image_id]["shape"]) image_height, image_width, _ = self.parents[image_id]["shape"] @@ -754,6 +759,8 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: verbose, ) return + if isinstance(self.parents[parent_id]["coordinates"], str): + self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) else: if not all([k in self.parents[parent_id].keys() for k in ["dlat", "dlon"]]): @@ -766,6 +773,9 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: dlat = self.parents[parent_id]["dlat"] # get patch bounds + if isinstance(self.patches[image_id]["pixel_bounds"], str): + self.patches[image_id]["pixel_bounds"] = literal_eval(self.patches[image_id]["pixel_bounds"]) + pixel_bounds = self.patches[image_id]["pixel_bounds"] # get patch coords @@ -797,6 +807,9 @@ def _add_patch_polygons_id(self, image_id: str, verbose: bool = False) -> None: self._add_patch_coords_id(image_id, verbose) if "coordinates" in self.patches[image_id].keys(): + if isinstance(self.patches[image_id]["coordinates"], str): + self.patches[image_id]["coordinates"] = literal_eval(self.patches[image_id]["coordinates"]) + coords = self.patches[image_id]["coordinates"] self.patches[image_id]["polygon"] = box(*coords) @@ -835,9 +848,13 @@ def _add_center_coord_id( self._add_patch_coords_id(image_id, verbose) if "coordinates" in self.images[tree_level][image_id].keys(): + if isinstance(self.images[tree_level][image_id]["coordinates"], str): + self.images[tree_level][image_id]["coordinates"] = literal_eval(self.images[tree_level][image_id]["coordinates"]) + self._print_if_verbose( f"[INFO] Reading 'coordinates' from {image_id}.", verbose ) + min_x, min_y, max_x, max_y = self.images[tree_level][image_id][ "coordinates" ] @@ -888,9 +905,13 @@ def _calc_pixel_height_width( f"[WARNING] 'coordinates' could not be found in {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa ) return + if isinstance(self.parents[parent_id]["coordinates"], str): + self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) if "shape" not in self.parents[parent_id].keys(): self._add_shape_id(parent_id) + if isinstance(self.parents[parent_id]["shape"], str): + self.parents[parent_id]["shape"] = literal_eval(self.parents[parent_id]["shape"]) height, width, _ = self.parents[parent_id]["shape"] xmin, ymin, xmax, ymax = self.parents[parent_id]["coordinates"] @@ -2153,12 +2174,18 @@ def _save_patch_as_geotiff( verbose, ) + # get shape if "shape" not in self.patches[patch_id].keys(): self._add_shape_id(patch_id) + if isinstance(self.patches[patch_id]["shape"], str): + self.patches[patch_id]["shape"] = literal_eval(self.patches[patch_id]["shape"]) height, width, channels = self.patches[patch_id]["shape"] + # get coords if "coordinates" not in self.patches[patch_id].keys(): self._add_patch_coords_id(patch_id) + if isinstance(self.patches[patch_id]["coordinates"], str): + self.patches[patch_id]["coordinates"] = literal_eval(self.patches[patch_id]["coordinates"]) coords = self.patches[patch_id]["coordinates"] if not crs: @@ -2213,7 +2240,7 @@ def save_patches_to_geojson( if "polygon" not in patch_df.columns: self.add_patch_polygons() _, patch_df = self.convert_images() - + patch_df["polygon"]=patch_df["polygon"].apply(lambda x: x if isinstance(x, Polygon) else wkt.loads(x)) if not crs: From ebf98ae86742f7072ce3229c4f32cdbc46ec8293 Mon Sep 17 00:00:00 2001 From: Rosie Date: Mon, 4 Dec 2023 11:54:20 +0000 Subject: [PATCH 03/13] add method to save parents as geotiffs --- mapreader/load/images.py | 110 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 110 insertions(+) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 1975b0dc..116f0a90 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -2098,6 +2098,116 @@ def _get_tree_level(self, image_id: str) -> str: tree_level = "parent" if bool(self.parents.get(image_id)) else "patch" return tree_level + def save_parents_as_geotiffs( + self, + rewrite: bool = False, + verbose: bool = False, + crs: str | None = None, + ) -> None: + """Save all parents in MapImages instance as geotiffs. + + Parameters + ---------- + rewrite : bool, optional + Whether to rewrite files if they already exist, by default False + verbose : bool, optional + Whether to print verbose outputs, by default False + crs : str, optional + The CRS of the coordinates. + If None, the method will first look for ``crs`` in the parents dictionary and use those. If ``crs`` cannot be found in the dictionary, the method will use "EPSG:4326". + By default None. + """ + + parents_list = self.list_parents() + + for parent_id in tqdm(parents_list): + self._save_parent_as_geotiff(parent_id, rewrite, verbose, crs) + + def _save_parent_as_geotiff( + self, + parent_id: str, + rewrite: bool = False, + verbose: bool = False, + crs: str | None = None, + ) -> None: + """Save a parent image as a geotiff. + + Parameters + ---------- + parent_id : str + The ID of the parent to write. + rewrite : bool, optional + Whether to rewrite files if they already exist, by default False + verbose : bool, optional + Whether to print verbose outputs, by default False + crs : Optional[str], optional + The CRS of the coordinates. + If None, the method will first look for ``crs`` in the parents dictionary and use those. If ``crs`` cannot be found in the dictionary, the method will use "EPSG:4326". + By default None. + + Raises + ------ + ValueError + If parent directory does not exist. + """ + + parent_path = self.parents[parent_id]["image_path"] + parent_dir = os.path.dirname(parent_path) + + if not os.path.exists(parent_dir): + raise ValueError(f'[ERROR] Parent directory "{parent_dir}" does not exist.') + + parent_id_no_ext = os.path.splitext(parent_id)[0] + geotiff_path = f"{parent_dir}/{parent_id_no_ext}.tif" + + self.parents[parent_id]["geotiff_path"] = geotiff_path + + if os.path.isfile(f"{geotiff_path}"): + if not rewrite: + self._print_if_verbose( + f'[INFO] File already exists: {geotiff_path}.', verbose + ) + return + + self._print_if_verbose( + f"[INFO] Creating: {geotiff_path}.", + verbose, + ) + + if "shape" not in self.parents[parent_id].keys(): + self._add_shape_id(parent_id) + if isinstance(self.parents[parent_id]["shape"], str): + self.parents[parent_id]["shape"] = literal_eval(self.parents[parent_id]["shape"]) + height, width, channels = self.parents[parent_id]["shape"] + + if "coordinates" not in self.parents[parent_id].keys(): + print(self.parents[parent_id].keys()) + raise ValueError(f"[ERROR] Cannot locate coordinates for {parent_id}") + if isinstance(self.parents[parent_id]["coordinates"], str): + self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) + coords = self.parents[parent_id]["coordinates"] + + if not crs: + crs = self.parents[parent_id].get("crs", "EPSG:4326") + + parent_affine = rasterio.transform.from_bounds(*coords, width, height) + parent = Image.open(parent_path) + parent_array = reshape_as_raster(parent) + + with rasterio.open( + f"{geotiff_path}", + 'w', + driver="GTiff", + height=parent.height, + width=parent.width, + count=channels, + transform=parent_affine, + dtype='uint8', + nodata=0, + crs=crs, + ) as dst: + dst.write(parent_array) + def save_patches_as_geotiffs( self, rewrite: bool | None = False, From b877a9684944134a632cdb4b74fb28ceb0039f27 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Mon, 4 Dec 2023 14:52:13 +0000 Subject: [PATCH 04/13] fix patch coords --- mapreader/load/images.py | 95 ++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 34 deletions(-) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 116f0a90..d2c13784 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -5,10 +5,10 @@ except ImportError: pass -from ast import literal_eval import os import random import warnings +from ast import literal_eval from glob import glob from typing import Literal @@ -18,12 +18,12 @@ import numpy as np import pandas as pd import PIL -import rasterio +import rasterio from PIL import Image, ImageStat from pyproj import Transformer from rasterio.plot import reshape_as_raster -from shapely.geometry import box, Polygon from shapely import wkt +from shapely.geometry import Polygon, box from tqdm.auto import tqdm os.environ[ @@ -717,12 +717,16 @@ def _add_coord_increments_id( ) return if isinstance(self.parents[image_id]["coordinates"], str): - self.parents[image_id]["coordinates"] = literal_eval(self.parents[image_id]["coordinates"]) + self.parents[image_id]["coordinates"] = literal_eval( + self.parents[image_id]["coordinates"] + ) if "shape" not in self.parents[image_id].keys(): self._add_shape_id(image_id) if isinstance(self.parents[image_id]["shape"], str): - self.parents[image_id]["shape"] = literal_eval(self.parents[image_id]["shape"]) + self.parents[image_id]["shape"] = literal_eval( + self.parents[image_id]["shape"] + ) image_height, image_width, _ = self.parents[image_id]["shape"] @@ -760,29 +764,34 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: ) return if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) + self.parents[parent_id]["coordinates"] = literal_eval( + self.parents[parent_id]["coordinates"] + ) else: if not all([k in self.parents[parent_id].keys() for k in ["dlat", "dlon"]]): self._add_coord_increments_id(parent_id) # get min_x and min_y and pixel-wise dlon and dlat for parent image - parent_min_x = self.parents[parent_id]["coordinates"][0] - parent_min_y = self.parents[parent_id]["coordinates"][1] + parent_min_x, parent_min_y, parent_max_x, parent_max_y = self.parents[ + parent_id + ]["coordinates"] dlon = self.parents[parent_id]["dlon"] dlat = self.parents[parent_id]["dlat"] # get patch bounds if isinstance(self.patches[image_id]["pixel_bounds"], str): - self.patches[image_id]["pixel_bounds"] = literal_eval(self.patches[image_id]["pixel_bounds"]) - + self.patches[image_id]["pixel_bounds"] = literal_eval( + self.patches[image_id]["pixel_bounds"] + ) + pixel_bounds = self.patches[image_id]["pixel_bounds"] # get patch coords min_x = (pixel_bounds[0] * dlon) + parent_min_x - min_y = (pixel_bounds[1] * dlat) + parent_min_y + min_y = parent_max_y - (pixel_bounds[3] * dlat) max_x = (pixel_bounds[2] * dlon) + parent_min_x - max_y = (pixel_bounds[3] * dlat) + parent_min_y + max_y = parent_max_y - (pixel_bounds[1] * dlat) self.patches[image_id]["coordinates"] = (min_x, min_y, max_x, max_y) self.patches[image_id]["crs"] = self.parents[parent_id]["crs"] @@ -808,8 +817,10 @@ def _add_patch_polygons_id(self, image_id: str, verbose: bool = False) -> None: if "coordinates" in self.patches[image_id].keys(): if isinstance(self.patches[image_id]["coordinates"], str): - self.patches[image_id]["coordinates"] = literal_eval(self.patches[image_id]["coordinates"]) - + self.patches[image_id]["coordinates"] = literal_eval( + self.patches[image_id]["coordinates"] + ) + coords = self.patches[image_id]["coordinates"] self.patches[image_id]["polygon"] = box(*coords) @@ -849,12 +860,14 @@ def _add_center_coord_id( if "coordinates" in self.images[tree_level][image_id].keys(): if isinstance(self.images[tree_level][image_id]["coordinates"], str): - self.images[tree_level][image_id]["coordinates"] = literal_eval(self.images[tree_level][image_id]["coordinates"]) - + self.images[tree_level][image_id]["coordinates"] = literal_eval( + self.images[tree_level][image_id]["coordinates"] + ) + self._print_if_verbose( f"[INFO] Reading 'coordinates' from {image_id}.", verbose ) - + min_x, min_y, max_x, max_y = self.images[tree_level][image_id][ "coordinates" ] @@ -906,12 +919,16 @@ def _calc_pixel_height_width( ) return if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) + self.parents[parent_id]["coordinates"] = literal_eval( + self.parents[parent_id]["coordinates"] + ) if "shape" not in self.parents[parent_id].keys(): self._add_shape_id(parent_id) if isinstance(self.parents[parent_id]["shape"], str): - self.parents[parent_id]["shape"] = literal_eval(self.parents[parent_id]["shape"]) + self.parents[parent_id]["shape"] = literal_eval( + self.parents[parent_id]["shape"] + ) height, width, _ = self.parents[parent_id]["shape"] xmin, ymin, xmax, ymax = self.parents[parent_id]["coordinates"] @@ -2100,7 +2117,7 @@ def _get_tree_level(self, image_id: str) -> str: def save_parents_as_geotiffs( self, - rewrite: bool = False, + rewrite: bool = False, verbose: bool = False, crs: str | None = None, ) -> None: @@ -2119,14 +2136,14 @@ def save_parents_as_geotiffs( """ parents_list = self.list_parents() - + for parent_id in tqdm(parents_list): self._save_parent_as_geotiff(parent_id, rewrite, verbose, crs) def _save_parent_as_geotiff( self, - parent_id: str, - rewrite: bool = False, + parent_id: str, + rewrite: bool = False, verbose: bool = False, crs: str | None = None, ) -> None: @@ -2165,8 +2182,8 @@ def _save_parent_as_geotiff( if os.path.isfile(f"{geotiff_path}"): if not rewrite: self._print_if_verbose( - f'[INFO] File already exists: {geotiff_path}.', verbose - ) + f"[INFO] File already exists: {geotiff_path}.", verbose + ) return self._print_if_verbose( @@ -2177,14 +2194,18 @@ def _save_parent_as_geotiff( if "shape" not in self.parents[parent_id].keys(): self._add_shape_id(parent_id) if isinstance(self.parents[parent_id]["shape"], str): - self.parents[parent_id]["shape"] = literal_eval(self.parents[parent_id]["shape"]) + self.parents[parent_id]["shape"] = literal_eval( + self.parents[parent_id]["shape"] + ) height, width, channels = self.parents[parent_id]["shape"] if "coordinates" not in self.parents[parent_id].keys(): print(self.parents[parent_id].keys()) raise ValueError(f"[ERROR] Cannot locate coordinates for {parent_id}") if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval(self.parents[parent_id]["coordinates"]) + self.parents[parent_id]["coordinates"] = literal_eval( + self.parents[parent_id]["coordinates"] + ) coords = self.parents[parent_id]["coordinates"] if not crs: @@ -2196,17 +2217,17 @@ def _save_parent_as_geotiff( with rasterio.open( f"{geotiff_path}", - 'w', + "w", driver="GTiff", height=parent.height, width=parent.width, count=channels, transform=parent_affine, - dtype='uint8', + dtype="uint8", nodata=0, crs=crs, ) as dst: - dst.write(parent_array) + dst.write(parent_array) def save_patches_as_geotiffs( self, @@ -2288,14 +2309,18 @@ def _save_patch_as_geotiff( if "shape" not in self.patches[patch_id].keys(): self._add_shape_id(patch_id) if isinstance(self.patches[patch_id]["shape"], str): - self.patches[patch_id]["shape"] = literal_eval(self.patches[patch_id]["shape"]) + self.patches[patch_id]["shape"] = literal_eval( + self.patches[patch_id]["shape"] + ) height, width, channels = self.patches[patch_id]["shape"] # get coords if "coordinates" not in self.patches[patch_id].keys(): self._add_patch_coords_id(patch_id) if isinstance(self.patches[patch_id]["coordinates"], str): - self.patches[patch_id]["coordinates"] = literal_eval(self.patches[patch_id]["coordinates"]) + self.patches[patch_id]["coordinates"] = literal_eval( + self.patches[patch_id]["coordinates"] + ) coords = self.patches[patch_id]["coordinates"] if not crs: @@ -2351,7 +2376,9 @@ def save_patches_to_geojson( self.add_patch_polygons() _, patch_df = self.convert_images() - patch_df["polygon"]=patch_df["polygon"].apply(lambda x: x if isinstance(x, Polygon) else wkt.loads(x)) + patch_df["polygon"] = patch_df["polygon"].apply( + lambda x: x if isinstance(x, Polygon) else wkt.loads(x) + ) if not crs: if "crs" in patch_df.columns: @@ -2369,7 +2396,7 @@ def save_patches_to_geojson( # change tuple columns to strings for col in patch_df.columns: if isinstance(patch_df[col][0], tuple): - patch_df[col]=patch_df[col].apply(str) + patch_df[col] = patch_df[col].apply(str) geo_patch_df = geopd.GeoDataFrame(patch_df, geometry="polygon", crs=crs) geo_patch_df.to_file(geojson_fname, driver="GeoJSON") From 14b8d29b74a257f1f0c91285342c7b2eff89f022 Mon Sep 17 00:00:00 2001 From: Rosie Date: Mon, 4 Dec 2023 15:53:10 +0000 Subject: [PATCH 05/13] add saving coords from grid_bb --- mapreader/load/images.py | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index d2c13784..8d8c84fb 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -11,6 +11,7 @@ from ast import literal_eval from glob import glob from typing import Literal +import re import matplotlib.image as mpimg import matplotlib.patches as patches @@ -26,6 +27,9 @@ from shapely.geometry import Polygon, box from tqdm.auto import tqdm +from mapreader.download.data_structures import GridIndex, GridBoundingBox +from mapreader.download.downloader_utils import get_polygon_from_grid_bb + os.environ[ "USE_PYGEOS" ] = "0" # see here https://github.com/geopandas/geopandas/issues/2691 @@ -511,6 +515,21 @@ def add_shape(self, tree_level: str | None = "parent") -> None: for image_id in image_ids: self._add_shape_id(image_id=image_id) + def add_coords_from_grid_bb(self, verbose: bool = False) -> None: + + print("[INFO] Adding coordinates, tree level: parent") + + parent_list = self.list_parents() + + for parent_id in parent_list: + if "grid_bb" not in self.parents[parent_id].keys(): + print( + f"[WARNING] No grid bounding box found for {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa + ) + continue + + self._add_coords_from_grid_bb_id(image_id=parent_id, verbose=verbose) + def add_coord_increments(self, verbose: bool | None = False) -> None: """ Adds coordinate increments to each image at the parent level. @@ -662,6 +681,31 @@ def _add_shape_id( raise ValueError( f'[ERROR] Problem with "{image_id}". Please either redownload or remove from list of images to load.' ) + + def _add_coords_from_grid_bb_id( + self, image_id: int | str, verbose: bool = False + ) -> None: + + grid_bb = self.parents[image_id]["grid_bb"] + + if isinstance(grid_bb, str): + cell1, cell2 = re.findall("\(.*?\)", grid_bb) + + z1, x1, y1 = literal_eval(cell1) + z2, x2, y2 = literal_eval(cell2) + + cell1 = GridIndex(x1, y1, z1) + cell2 = GridIndex(x2, y2, z2) + + grid_bb = GridBoundingBox(cell1, cell2) + + if isinstance(grid_bb, GridBoundingBox): + polygon = get_polygon_from_grid_bb(grid_bb) + coordinates = polygon.bounds + self.parents[image_id]["coordinates"] = coordinates + + else: + raise ValueError(f"[ERROR] Unexpected grid_bb format for {image_id}.") def _add_coord_increments_id( self, image_id: int | str, verbose: bool | None = False From cb4fd4a2a8f66941801ad65c9eedec50660b3743 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Tue, 2 Jan 2024 17:28:21 +0000 Subject: [PATCH 06/13] update metadata files --- tests/sample_files/ts_downloaded_maps.csv | 4 ++-- tests/sample_files/ts_downloaded_maps.tsv | 4 ++-- tests/sample_files/ts_downloaded_maps.xlsx | Bin 10645 -> 10141 bytes 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/sample_files/ts_downloaded_maps.csv b/tests/sample_files/ts_downloaded_maps.csv index 620f26f5..533d3cbb 100644 --- a/tests/sample_files/ts_downloaded_maps.csv +++ b/tests/sample_files/ts_downloaded_maps.csv @@ -1,2 +1,2 @@ -,name,url,coordinates,crs,pub_date,region,polygon -0,cropped_74488689.png,https://maps.nls.uk/view/74488689,"(-4.833984375, 55.80128097, -4.21875, 56.05976947910657)",EPSG:4326,1898,Glasgow (30),"[[-4.20999993, 55.79000003], [-4.82999994, 55.79000003], [-4.82999993, 56.05000003], [-4.20999993, 56.05000003], [-4.20999993, 55.79000003]]" +,name,url,coordinates,crs,published_date,grid_bb,polygon +0,cropped_74488689.png,https://maps.nls.uk/view/74488689,"(-4.833984375, 55.78892895389263, -4.19677734375, 56.05976947910656)",EPSG:4326,1898,"[(14, 7972, 5097)x(14, 8000, 5118)]","POLYGON ((-4.20999993 55.79000003, -4.82999994 55.79000003, -4.82999993 56.05000003, -4.20999993 56.05000003, -4.20999993 55.79000003))" diff --git a/tests/sample_files/ts_downloaded_maps.tsv b/tests/sample_files/ts_downloaded_maps.tsv index 22ae4ba5..a93cb540 100644 --- a/tests/sample_files/ts_downloaded_maps.tsv +++ b/tests/sample_files/ts_downloaded_maps.tsv @@ -1,2 +1,2 @@ - name url coordinates crs pub_date region polygon -0 cropped_74488689.png https://maps.nls.uk/view/74488689 "(-4.833984375,55.80128097,-4.21875,56.05976947910657)" EPSG:4326 1898 Glasgow (30) "[[-4.20999993,55.79000003],[-4.82999994,55.79000003],[-4.82999993,56.05000003],[-4.20999993,56.05000003],[-4.20999993,55.79000003]]" + name url coordinates crs published_date grid_bb polygon +0 cropped_74488689.png https://maps.nls.uk/view/74488689 "(-4.833984375, 55.78892895389263, -4.19677734375, 56.05976947910656)" EPSG:4326 1898 "[(14, 7972, 5097)x(14, 8000, 5118)]","POLYGON ((-4.20999993 55.79000003, -4.82999994 55.79000003, -4.82999993 56.05000003, -4.20999993 56.05000003, -4.20999993 55.79000003))" diff --git a/tests/sample_files/ts_downloaded_maps.xlsx b/tests/sample_files/ts_downloaded_maps.xlsx index fd80430de9c02baa0c9697af245ff7fd8b0389d9..20051690fed861d696b5f54f55d6bda7b68bacc7 100644 GIT binary patch delta 5554 zcmZu#cQl;c_8x8Ujn3#j$`CDj5H0%XqL)OCK3XDLhLPxff~XNh?*>5-z4sD~-cpp& ziF)O`>vz9(@A|#(T4$a2yw5&=Jm=YapS_a}vUF&5p*Xly03d(>006K7Z1%Dot*`)q z1l(#`L2S6jLXA9u*e;@%f>%%WYZtPo#MV69$>}yb@{&yPBiLD@nqqVzK=@$TvBur4 z9ud*gtWOjj()ZQSia|oKyLX4PjfO7C%ghv$L&YHffbQwjSceS2d(VbWeNm-V1&uO# zM=-T)@OsvKuMzojPa{!@S<%Bjbo+|7(Gt1jKrwuhY<-+^%Grr0!0Kq7UmZ7^Jz#y4(@lbE?#o3QZjZe zGeGT{vf~r%v}xJh_*OpkW-IADl;B0YF6`_7EcsPu*OoERyP!BaHMel3Xy%6dUVHh6L#$}kOvE{uLbQA zB5MKNgbVp*yabca`3SP_7;cji^rLvtTx=q*kCz#En=DOcpn^ECFh9(2@5?hWk~JL4 zllJ>XQ53lTl&%?>O3O!LfF}RjLB_4%XDcFwGn*r#$cx$fof=AP3Ljgyls4pJ_UY2__2&1 z(GjZ45c>hfe@wC6KW|q)B{=+Y<)Gy^;?kaXLA1$O3S}-4dlVSv!y4YF$^z#}^4`o% zYc|BJ`Y2uw@U)>1`38?M4jcWw)5`#4H4YV=@uR;&EH(g8jRydb-ZAmYRnQmaJQ#Khu4Y}4a&&$?jhj+jDp*|S7V#;f&4=S)CH*iw%HV|@vDkv_;tE)G5 zF)8Mfw(AaZj?OCCzv$23l^3O)QoJZwN&Y!LutWAa=~rRXp|GJ5pn3&?PNk;G6NWn6 zFBY{V$Y~+b9y9A?=%kTPG@BvhgJa(E_!T7UK?88B@}aNd16Gi%=koBD(OaSX2*=YQ z%iDU>pXYM5Q4J;sBz*gAF+eaw7JuV2(yR%x@{*@0pSNIILgyXP??Xj2AUfMlZrX{9G&bnpErm+@Hc@-ZDf zaDG&x7Y1aa8+ppz8>%1)Pt$IRMy?yc$ds#n=`DM#M42r#^PqC{;h_@GKCW{Zpz6C- za7jeE$$bJxUub=6;oe84e{VjoyxBTfIRE+Wa*5HvSNf61%=;{h%*dH|i>(r;X8=B1iWWoZtZ~di-Zg5Lw>l*hP!ue zPF+|oV#oM&Sm!?X8_PW*qXX0R%@e!Z(v-Bhslu`T*6XIH?X>P)a3oLw0EqvsS8pFb zS9|Y2YVERcgRQBM-B=ahw%1I>cVU9>tx%~gWcz68G(Fihwk7ghr?C?>o+!C(sdZV; z(qiR{w0`T=9(R^Dmp1qO{A?I$zyVO`@^^!Guyo^tl&Iog(yl9=_-xlE3trd2$K*FUDVs)={TlLT%`ZBI?{ zna^0IPGH?bZ(POHdA49qqDH9ygnm^j+D^WvDsa9mKrgZUancY{lx>s<`mN^2#SyRI zfJZi>#C(wWo}0@FG=Mu~(Vvq8np;x@kK$~^*3B~!Ktf)Zm%`3fXgxWCgIJO_=?W}; z1BK0~9~PsEW;bw$NP8T3t>iYlm;K~#fxagf)%`^ms+Y_K9=$WM{Se-X+$jPR1}T?u zX}|Z~CzQ29T3hytFo8}!^$*+VGA3``Ba4*a#vcYiL#4FL)zTZufXlofb)$Ir#;eMX zw$o#0l0{ekAL<%`g?MDNfnaL=4t9L%4o4 zlv~6CGa}p`2!+B&ls4oKRBbhQ#|WE5H_1cVhgVK*wsx#$KQ4(W1s0Rb<6ATsX$r}; z#vv8t zJB{~rDhKxGYD_PLf2qcc7Db7g>hc?jh+8{`U6;XWhW6fDK zu1(L4`&OeqVcrRI1w22B}!CxV_Kn?NJe+hFh7L7lAV~!FWFF4 zim_DJUO)5yAn=Xp5v~u!IF|!0e+&%s+rwK1s1>WSGiWxkf33U4&b+C65(E?0Smz8e z^j0rT+RvbJIrp_9!+x>R@0j?el%3ST%xbYN!IALfC8Rw5#o>D;9ahbvt!eKav#v%C z)XvI;X@j)KaLD%DwZHXb;txJ6j*Oib7_+&6YkY@%c>SX#PrJ@9;EA$8Keo=C8hq=$Vs?`*7_ntG`H`OWmn)o7RRXr3$ylVPrqFq5qRI6S}>Y>#s zPOIB+o|_f=$PxL7`q6wr^yjqs<4%XfBwL)iglL8-d0}_m=(FzOr4Vi^{y4Pxt1Esn z`jYS=IC;0zMXcBF4};=C@|!lo^8$5&NIbgVM9QF9&rboc=1Sclm)4o$+}2Hpr#l# zllEOv0C&+Cz=%Pbm{rEm5g@Xj1tiO{z|s94TXfzhSp}&U>%x8TRT**R9f=z^cmFV^ zMMkM@Y%`k37j(dnD_9t#buOHv|Gq8#3NKXIcxi2P)))q&%YG z0AdR+Uh?kj&&0->Mi43=k_=7ngqQAgno7?lj?KycPyQR(laImS1Y2ufM=H$Wx9 zp$dJ*&O@vlATi#e@2citx!h;LyROqi&q~?VP>TAa@%6l~yV`TIS=|hABM}OZ_V}7&S;d4o`U-SsmQYz&Ef-0D5)uq$Ni^lyiOwiE3#cjfcBVgzn&x zq>O`XJ>@1cBdc--3vE2*^aqqp#x@PAKePOdP_D;k`?EIjCZdXULA%@e`iH*krb%}G z?q$HWS|s@#RM_YN^;sTmX`Ee|4d0&X10$zY`<=IU#>&|qoPOAMJ4eZ^Y8>MBti7gk z!>G^SaOOpk!=NgGu^sRr?jETxeuDNG`nLgEF|11?m1f)83P`_C)%J{Kw(d2*V5 zQd!O9G5eN`+7q?dF)=2hHJ{+{zP`F4}tS zH%`3>M`p8NY2}QULJLtQp{f1keegGau5{ZZaN(TX9>a$WmuY07IT&8N>;(ERLI&2=4H!$QPqY zTax3zU8#yt*^*a9y0NFj0KVB;rn-`QVU%^b^U!w zJGA$Pnf5u%8MTcigkiS|%XlY)f-`PkT~_W~ixCuT6<7A3A%r5TN7`!F2ZW+lPl$6e z1i@cifo*kKX)z~oOaadAsX$f0V{fUQM(v%GK$t)JXaPQVd@8=Lh0eSV*RFuCEJ#V) zlMZqZwp8d#E?S9axxKsX!Nk7s(JLKY0%F%y@3k&Uz+?t@@Zy|K7G6v45MRc91IY}w z?lwkH9eezclJTfuVPo=R8~_oPrr1lc*EjAw<@V0bMdF?ui>xn?g(Z z?&mhx-)2wR%>$8|>T4y-uM5G}?a>WGnp?dAuz~4*(^C)|{W181!^7t*VCQvo6gXybBfOg zQL`hTO;T)Smw|By%Nd-e@L;@T`N=d%hBbI4`B`T#Q{ADp;lhr4&-pTTsr?&cl$=Uo z9KgqcI96u%IIfrfeLZw43)$ZO{Nt!gL=-lchj`J&-}K&H=5B9eCdNq+tW?3P#?~_O z5vCs>*AGp7d)76REN*QCV;L#1 z;aZw__pYV+b5{&BeMi3}Kdr&Y8un_v!(Ttr4w*C*xQiWYTl(?SY{l5=EWIOLJ@q`6 zIsWcvdJtu$ZQ*TvP&UpLk;Pkv8QMY!P?WqoI7RhA&Own!Z6J`w&&M`eTX|RdjrkQP z5PcK=r(C+d{d=h4QPb2xhJnc?B@Fc;1Dd13-WrD>*JPq(s-#1x#-VrnP|Q@hg$6n@k&|S@ND(T^GsU#Ta4ejb3udUAaGsJRoI&`y}b;Ec}sE@;OP0 zPV)qXwC9=}gn=Z&tGG$+Esi4i(0##ms_Ug~ct^wY8r`Uxj?_NPFl@_y!V7coE6gR?))m!67vM!bSimnw=r95mPG?&M)qZ8 zds^x?hDkqfN@5>Nq`n*>PB2rYj4NAJJ6x}KVbi(R#d(0A{;`~;9^JTOdA1-Ku(0Gu znx&Kt?f<0l#cfM$G75XA=$kA@J*fXYL#BR9@HhT&NVgL_Tdn8aP#VOj2tvX3X&|4M zHc-->4Wb67QLJj9$R1N9tnm7%KLBdYJp1zH#`8r=ZOmxwrE+>@f-ZZg`jg1bNri-W z`eYMN1mGiy+rtgbrNh3c_Xvk4TH#DKyg85{Vn}UhBHRJ+;M0x#73`!KtMpYy_0@hf zF0P=4F2=aS1@-f}jCnE?!I*?93j>sX3snSm~jJ4XDyrQ>f z^SbBfLOkUN!qQ3~jMk9kDhpSIF`XB&+C|!7Q&y;K(=v5RMu<5wM@zTd^%DgnQ`OQ)Rrq;B2|G=Hlo_Vf~a1MC0Ut9YX*qaSHId3*B)mJ3r7|Y?Kun zE;NZwDx78Vcgr%)XXJ$0e0!lR@k=a8uDh}}{QK)Ei?qcG+#H974=Lf2&{7NDga-6h zKM-%O!_0Jb*@;sb)B6UWS;66_vOxWrG*|Ta;&otDdeS@NhaVsEd`H>&4D}70zlATr zfH|^|tBhB;qAh10!voqTXrf%HzE+j0* z8F7No_m7FrtY`@dGWn-`xA$Z79R1-^Ho7*hs~K0CefteX;;@nzzRTwDth~a;X9n=y z_?NVce$JSOCw`AV$rSa|GhkMdciKQEgQ#Htp+c(0h(X&alP0*oS=lki62bRG#>{j|+$na86E*~Q9s`of@sHQJH7 zquJLpgL(;}2CL76LNXWrf)YzF*}h-d0LD_cK0 znrhn$Y`96=TNhsdtZy^xL_1F^c1Ts;eEx2$u($>0l6Y^?u;Odd=DZ{kb}6U%!9q|_ zfO@k~n8nA);BZ%ZTkh}J^(U=iVe|NGyLcIg6!qu zXZ+tr(m($(1V}P&Hpc(0-2VX-gOFO>EO%*@9-D&-X$|3I{3o#j0JQ%A?`C-nNCpTm z<9{OMoug9y7l4Bq8NtoP_#el;1Cak0;2R5a1#(a6Kf1h2K4kv|kY@(~>|nOKUN8@D cL2D0>Kb-|#C?4<+=&!ps_-;#l&OcxO0`RRblmGw# delta 6042 zcmZu#by$>L(_fZmVO{ACDd`T$MM4?`Y3UMJBo+h|Sh{-wDM7kHx?37SLK>tLkQ5PF ziT%*$yB^=?dEayYbLKkdcim@Z&dmJIywXb1rPNl##3BXY0Pp|+02AQj&PQE;3;-Y> zyM~evh;Ur4h2n`E1Pu~%K9uQ>WslVx6s2zxMohf*VM`VVEIeZu6|w40I(6>N96agZL;5eYaG7iqn6{vLN8pq<|7>SV)g|xqRJ8Q;*DHY}Ei4{;>M_F(+s5n#sGa z3Ui(c)52{<nbQ(aLUw}N%+hosY_XQuxWlIy*M$Dft2<_$>fTTu9V3%E98B>6n<@g z<{-HwsxARj0u!P{*2xaMvA;Yf06-1E!$4NEQrDg0;A0??-Z}R1Lqd*1JA>z6@JL?e zh77eN->-813Ak@&!q#Y0zr@hx7y8z(K!l0hE@L1%JH`neFMa!mar zTC1;5fElqbdB45kB#bxRN3}2|pPIL4>2+OV0|01bGUjbWGTxYJ*NtVA@d1EaH}};W z#^>eY?qucS;>7Fi46D#{cG(bzT>DR5g+HFDEZROpTZOzLx%N7WvzvTv~!DQIkD z9gvneqA%hwb3N`=HXrKE#2pN8+S_HPpUOIq_Ilgh|J+WUwndzYQ?8T!>BDCCWvg;U zw{St8o)Q9NRuhQoEkHS(Z!=sGQE1)DlBEJ@4+pmBm&HMnb~92QnVn?lxz=vv6%eFu z+#WXO&lnc5F5^y!t%+C7F!&5+BHp$*{=liV5K^o$%-Mi>yW%y$&Zwu^D7aF6B9N=p z+tHEhlPwYCmQP~&OY#}v5#iS770s`2VK;ZMX4fFdj=!zvSWxaCvWZKyl#S{3u$q^u z1t>{Qv0szK;Q3Gl9XJq<%eyC9Otg%z2c{g$4volKpj<4l(4y2B4h_bUjNS|i-@wZ- z_`LZi?6xJ9x!0aop{WwOp$?>v${bR;Qi;asxlv&qOArk@F&~U#tTu7s$z#TC8cQ3P zVkY7~4|Ir5it?xblNXD`Ap)GFBxHK9F)nP0<%{oa8BR5WPGADD&-*!S6odqzQ|!)O z<>?5SA)2yTCFZEW$WB#P`beT}s@B!9_c62Z5`MM>Ac8fiz5>)ZW0|z0Tyww)snj8UA4JW;~O%&umeOF*QV{3LV$3fNq5c24=Yrs{Ftsr4lqNQ>#^ zufMcet{adRdt)^bS!ot=^?F;Km>s_>Sg1E%N0X*ul?y6=OPylM&s{iw=`duiWVlX@ zy4o9Uzl$o)mAt5;o9#H{wWmQ9k#z_+{_sipsd=( z$6Gb}3>+K#ZvC*ci&u$)rS;s`@&{t_HstQs73;A(L1eF6SeF}8!pihq}Hak($Z|gGuX~9gW zeIc4(7RZ)+A>S5vnzrwCE3mg*`D=8~^A7$%3!QDCh zu}iIT33S$MJH`~CE!tA zl49hxLCG>EWo$R%tMbr%InMwco=KA{)jnQ!Nf!0s32>y)coNzA#L;Rd-8yOY;!gZl z;iUfDA%SaTm6KWROOI_E%_lxez^yqZMST3t=taqIss;E0u)z*raV@9HEXBg7DVPF* zB?yBIYbnRwqu>h0qPFl))l490L;1cT3}I|EM%*qj4)H?MDZ2zW5` z5AOhWNj~oyXA-boEXU__Xl649Ft9TWoV* z>gO(6-`v7WM5mIwpsiYv@W+tY`jh)ixbSWoQF=s-s6`hQx#xFQ8OFFL<_R)|56q4T z5odFH-CMMb?^{3hx_!s(4KVWDCJ!;g*seDFn59d;qtNfTNbtksXWDw*Ris{f%2y7= z7tlwCIeD9Nn5=hUUg<=S#19M0b_>PIb*gKhrb~YF6*4z|(91dt_jgGa+1v(lf-R<1 z`u$QdeM{!~xNAgnU5M%vQr_8K0&kYF+u)kVlpraCOlhr?VfIgtqplgDe`C&RsN!&n9nD&q)ZI~a*!XdSG3 zoO0s`9F)uNdy6k}yI{uTrgE1dCFyApzDcH9eT<}GM;e#6Ldw%&(5~I7XB0|aPI+eO zwmsx+1LTmM&(|Fu1EtQjaIe+yAGG4(uZR_ocq)&V7lan3r#=JGS1 zGyF)mL23*+ju)1u9|1vT!VJ<~(jY~J%1cGn}r23ptJiA ztI5oNTN-iMPjcOLQ~iZ+tj+v~1;C%!+IqnG%-wBaaK7Jyz%Nq3`%4aqe@D>!i#-J$ zLC&5|J%=O~bwsS`eZj2QJ;PmMvCE%t5*s84YFeN>%Y2phTr1!E;aZOoQOg)KL8OWb zR=jSUBl-eDJ1HtIq2^3&XH_!ty{!3tgaDnJtT%b2m61F{zUz%ZxY5Q3f(LkqV?r1#xD!STWHu^V{mT1&_pEc@^&u076ClADUp~= z{=viGTahQ*CnA zWGIhD=@(l$gl~?gyJ{ANCkLmlyj=X+&h+_a_~hz*g9(OrP{<;g$EPbh%0bO4D2O7R zXy$yh*N_Jx<=-$m@5~(`NIRuBhGdCu&=!%t9dPZ9b>2&UB8TbP}mC=D~$+Mlw*gT_c};;A z{cDRlAhwg5%0i!hD-V_vXg9Z95^k}v2G~PkUIjqi^B^jgCp01P9$@BlFO&04AGI@Pgp9S@3NWgW0!69e(08 z^qOKT7#~p+;tY2Y@Tah`C-u~aT7r*~bvGD_epu?Ms+viOHMV!sY*+3LM%mF=$Tif$ z9q1hUB5z%&*hgs?3tZT;4|$JJ4=Id{tAMdh@R}Un1aoUGVAq=3aL3SYlcOj6vWPC# zABTe{2j~|9_5)y0wnU;Kah(smb8EHGQg9JL!JXY^nZ=EuO7uD=a*&Qe+(75`J4Mt9 zk|J>NWxo0_HQwZiiaLWk3>b(CT@9^een^MG7lg8({i{MeJ+8qLV`I0w2T`VL5k=!R z*6Z`B(lD=!y}h@rAv$g)&okqTx5ZorHeesqt-_fc?YTsAQYxV3k1G!vt>!W(9xxc* z#)lCN7*)(;FiRp`K1S%d(qlTrDCWJXvEoTIccs^hrh9ATxHRMNcqag%L*2M6607fh zI3pjPdhTYF?Y>O1NWI4_OI$nFJU`i$G^_dj=f=CwYe)+YnddI`KvmrlwaTFAs@x6Y zKH;UF!`JG+nbpW|hKYe{`Gq8fm~qo*OTZUxfIQbu0L^9%jg^2t^YqUku@ zb0dD*3m%7%ePpuYtFV$p;8An421;|Y%foyN7+p-8<9Y5V_K+}Agu6z)8u+O&jteC) z!?vrces5{|IB;bH;hi^^X0PEQ>t&fgY;ieT!T8=>{a$v4pz>&{P0HSCkkqUTNJ+?j zkFLLM@#@WhTx&ra_;S=CjE=<=ao4DIJXpG5fYNXBOvdaYRuZ>YXz)sgFUk{yvFAa& zc|91^tXxi1#BS8sQTLtsvu%FYXyzqq>(WENlW#g#!!bzKPY4D2MF{Z&g4>0ZZ`^2G zcgVnqfe1-@sMg0%8u}3%EkFDsvwmqNv=K8!`W~cG-4`^xLJmVFDYRX^n0{7HN8&ps z&2B#4!eY8#E_jY(U8qkL#T*3GeheyYqVXgW`him9P2DkCnoB>BQ#~e;} zY*mjqzUVQFEl0IbJ-n)j(xfvuYGFt+nL=#w5n>FLzp#<6{igOylw910Qyz1lKrZ zl%#tI_+C?XRcD*EmE}G?BAx?5gTPC;Bw1=zOF&u2#|5!@6dM%-J{!$#Y z+057^%;q7%mJRTU;*(Lj#{6Yh1b9F1BKyX!vo~a$_^&c*?eg@gt+j`P%TxHT(piwC z=0qa^2{;P9#?8GjHk;EF9cb%`v%V_~QAL&~78;q@u<~Sljc!Cs*FBRb@#Q!b%^wuY zu@i2QK3Cdj%#F@X_NnH>#v-W%?dix|8QO%BDoYryyag0W0Y?x^U%nMbMluC3b%q5| zVW_n*IS#7kE(FZYNAqBAY)Tp%Rf4+{sIf$uKLmKZ(s2l^~>j*7Pt8a5dAKF~^PK0Z28*{?1}b;yn)pQH}D)kkh=X?GPvg!T%EOw;2e=OlGnFS8D#0{bXPM_`~>AU`UCVo$kszJ zv7`BtSSbkp^h}XVtPdrcZiw1nB=k4+)Koj#Og@ z|5qaX1M9l;Tj77D#ebjq-(mO%AItI^|0fXtD=4+qut{)!0f=rs(;KDlu>F$$4+BQG AxBvhE From 73ed8899f2875374c7398d96d043b059dcff276d Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Tue, 2 Jan 2024 17:29:08 +0000 Subject: [PATCH 07/13] add and update tests --- tests/test_load/test_geo_utils.py | 19 +- tests/test_load/test_images.py | 526 ++++++++++++++++++------------ 2 files changed, 335 insertions(+), 210 deletions(-) diff --git a/tests/test_load/test_geo_utils.py b/tests/test_load/test_geo_utils.py index 92787c3a..88d89525 100644 --- a/tests/test_load/test_geo_utils.py +++ b/tests/test_load/test_geo_utils.py @@ -14,17 +14,24 @@ def sample_dir(): def test_extractGeoInfo(sample_dir): - image_ID = "cropped_geo.tif" - image_path = f"{sample_dir}/{image_ID}" + image_id = "cropped_geo.tif" + image_path = f"{sample_dir}/{image_id}" shape, crs, coord = geo_utils.extractGeoInfo(image_path) assert shape == (9, 9, 3) assert crs == "EPSG:27700" assert coord == approx((534348, 192378, 534349, 192379), rel=1e-0) +def test_extractGeoInfo_errors(sample_dir): + image_id = "cropped_74488689.png" + image_path = f"{sample_dir}/{image_id}" + with pytest.raises(ValueError, match="No coordinates found"): + geo_utils.extractGeoInfo(image_path) + + def test_reproject(sample_dir): - image_ID = "cropped_geo.tif" - image_path = f"{sample_dir}/{image_ID}" + image_id = "cropped_geo.tif" + image_path = f"{sample_dir}/{image_id}" _, _, new_crs, reprojected_coord, size_in_m = geo_utils.reproject_geo_info( image_path, calc_size_in_m="gc" ) @@ -35,8 +42,8 @@ def test_reproject(sample_dir): def test_versus_loader(sample_dir): - image_ID = "cropped_geo.tif" - image_path = f"{sample_dir}/{image_ID}" + image_id = "cropped_geo.tif" + image_path = f"{sample_dir}/{image_id}" shape, _, _, reprojected_coords, size_in_m = geo_utils.reproject_geo_info( image_path, calc_size_in_m="great-circle" ) diff --git a/tests/test_load/test_images.py b/tests/test_load/test_images.py index de90c191..08b299c8 100644 --- a/tests/test_load/test_images.py +++ b/tests/test_load/test_images.py @@ -12,7 +12,6 @@ from pytest import approx from shapely.geometry import Polygon -from mapreader import loader from mapreader.load.images import MapImages @@ -22,27 +21,31 @@ def sample_dir(): @pytest.fixture -def init_ts_maps(sample_dir, tmp_path): +def image_id(): + return "cropped_74488689.png" + + +@pytest.fixture +def init_maps(sample_dir, image_id, tmp_path): """Initializes MapImages object (with metadata from csv and patches). Returns ------- list - image_ID (of parent png image), ts_map (MapImages object), parent_list (== image_ID) and patch_list (list of patches). + maps (MapImages object), parent_list (== image_id) and patch_list (list of patches). """ - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - ts_map.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") - ts_map.patchify_all(patch_size=3, path_save=tmp_path) # gives 9 patches - parent_list = ts_map.list_parents() - patch_list = ts_map.list_patches() + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") + maps.patchify_all(patch_size=3, path_save=tmp_path) # gives 9 patches + parent_list = maps.list_parents() + patch_list = maps.list_patches() - return image_ID, ts_map, parent_list, patch_list + return maps, parent_list, patch_list @pytest.fixture -def keys(): - return ["parent_id", "image_path", "shape", "name", "coord", "other"] +def ts_metadata_keys(): + return ["name", "url", "coordinates", "crs", "published_date", "grid_bb", "polygon"] @pytest.fixture @@ -56,6 +59,11 @@ def metadata_df(): ) +@pytest.fixture +def metadata_keys(): + return ["parent_id", "image_path", "shape", "name", "coord", "other"] + + @pytest.fixture def matching_metadata_dir(tmp_path, metadata_df): test_path = tmp_path / "test_dir" @@ -94,144 +102,185 @@ def missing_metadata_dir(tmp_path, metadata_df): return test_path -# ---- tests ---- +# --- test init --- + + +def test_init_png(sample_dir, image_id): + maps = MapImages(f"{sample_dir}/{image_id}") + assert len(maps.list_parents()) == 1 + assert len(maps.list_patches()) == 0 + assert isinstance(maps, MapImages) + str(maps) + len(maps) + + +def test_init_tiff(sample_dir): + image_id = "cropped_non_geo.tif" + tiffs = MapImages(f"{sample_dir}/{image_id}") + assert len(tiffs) == 1 + assert isinstance(tiffs, MapImages) + + +def test_init_geotiff(sample_dir): + image_id = "cropped_geo.tif" + geotiffs = MapImages(f"{sample_dir}/{image_id}") + assert len(geotiffs) == 1 + assert isinstance(geotiffs, MapImages) + + +def test_init_tiff_32bit(sample_dir): + image_id = "cropped_32bit.tif" + with pytest.raises(NotImplementedError, match="Image mode"): + MapImages(f"{sample_dir}/{image_id}") + + +def test_init_non_image(sample_dir): + file_name = "ts_downloaded_maps.csv" + with pytest.raises(PIL.UnidentifiedImageError, match="not an image"): + MapImages(f"{sample_dir}/{file_name}") -# ---- png tests (separate geo info) --- +def test_init_fake_tree_level_error(sample_dir, image_id): + with pytest.raises(ValueError, match="parent or patch"): + MapImages(f"{sample_dir}/{image_id}", tree_level="fake") -def test_loader_png(sample_dir): - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - assert len(ts_map) == 1 - assert isinstance(ts_map, MapImages) +# --- test ``add_metadata`` --- -# add_metadata tests w/ png files -def test_loader_add_metadata(sample_dir): +# first test ``add_metadata`` works for png files + + +def test_add_metadata(sample_dir, image_id, ts_metadata_keys): # metadata csv - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - ts_map.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") - assert "coordinates" in ts_map.images["parent"][image_ID].keys() - assert ts_map.images["parent"][image_ID]["coordinates"] == approx( - (-4.83, 55.80, -4.21, 56.059), rel=1e-2 - ) + maps_csv = MapImages(f"{sample_dir}/{image_id}") + maps_csv.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") + # metadata tsv - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - ts_map.add_metadata(f"{sample_dir}/ts_downloaded_maps.tsv", delimiter="\t") - assert "coordinates" in ts_map.images["parent"][image_ID].keys() - assert ts_map.images["parent"][image_ID]["coordinates"] == approx( - (-4.83, 55.80, -4.21, 56.059), rel=1e-2 - ) + maps_tsv = MapImages(f"{sample_dir}/{image_id}") + maps_tsv.add_metadata(f"{sample_dir}/ts_downloaded_maps.tsv", delimiter="\t") + # metadata xlsx - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - ts_map.add_metadata(f"{sample_dir}/ts_downloaded_maps.xlsx") - assert "coordinates" in ts_map.images["parent"][image_ID].keys() - assert ts_map.images["parent"][image_ID]["coordinates"] == approx( - (-4.83, 55.80, -4.21, 56.059), rel=1e-2 - ) + maps_xlsx = MapImages(f"{sample_dir}/{image_id}") + maps_xlsx.add_metadata(f"{sample_dir}/ts_downloaded_maps.xlsx") + for maps in [maps_csv, maps_tsv, maps_xlsx]: + assert all([k in maps.parents[image_id].keys() for k in ts_metadata_keys]) + assert isinstance(maps.parents[image_id]["coordinates"], tuple) + assert maps.parents[image_id]["coordinates"] == approx( + (-4.83, 55.80, -4.21, 56.059), rel=1e-2 + ) -# if metadata info matches -def test_matching_metadata_csv(matching_metadata_dir, keys): - my_files = loader(f"{matching_metadata_dir}/*png") - assert len(my_files) == 3 - my_files.add_metadata(f"{matching_metadata_dir}/metadata_df.csv") - for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys +# check for mismatched metadata -def test_matching_metadata_xlsx(matching_metadata_dir, keys): - my_files = loader(f"{matching_metadata_dir}/*png") - assert len(my_files) == 3 - my_files.add_metadata(f"{matching_metadata_dir}/metadata_df.xlsx") - for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys +def test_matching_metadata(matching_metadata_dir, metadata_df, metadata_keys): + my_files_csv = MapImages(f"{matching_metadata_dir}/*png") + my_files_csv.add_metadata(f"{matching_metadata_dir}/metadata_df.csv") -def test_matching_metadata_df(matching_metadata_dir, metadata_df, keys): - my_files = loader(f"{matching_metadata_dir}/*png") - assert len(my_files) == 3 - my_files.add_metadata(metadata_df) - for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys + my_files_xlsx = MapImages(f"{matching_metadata_dir}/*png") + my_files_xlsx.add_metadata(f"{matching_metadata_dir}/metadata_df.xlsx") + my_files_df = MapImages(f"{matching_metadata_dir}/*png") + my_files_df.add_metadata(metadata_df) -# if you pass index col - this should pick up if index.name is 'name' or 'image_id' -def test_matching_metadata_csv_w_index_col(matching_metadata_dir): - my_files = loader(f"{matching_metadata_dir}/*png") - assert len(my_files) == 3 - my_files.add_metadata(f"{matching_metadata_dir}/metadata_df.csv", index_col="name") - keys = ["parent_id", "image_path", "shape", "Unnamed: 0", "coord", "other", "name"] - for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys + for my_files in [my_files_csv, my_files_xlsx, my_files_df]: + for parent_id in my_files.list_parents(): + assert list(my_files.parents[parent_id].keys()) == metadata_keys -# if you pass columns -def test_matching_metadata_csv_w_usecols(matching_metadata_dir): - my_files = loader(f"{matching_metadata_dir}/*png") - assert len(my_files) == 3 +def test_missing_metadata_csv_ignore_mismatch(missing_metadata_dir, metadata_keys): + my_files = MapImages(f"{missing_metadata_dir}/*png") + assert len(my_files) == 4 my_files.add_metadata( - f"{matching_metadata_dir}/metadata_df.csv", columns=["name", "coord"] + f"{missing_metadata_dir}/metadata_df.csv", ignore_mismatch=True ) - keys = ["parent_id", "image_path", "shape", "name", "coord"] - for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys - assert isinstance(my_files.images["parent"][parent_id]["coord"], tuple) + for parent_id in ["file1.png", "file2.png", "file3.png"]: + assert list(my_files.parents[parent_id].keys()) == metadata_keys + assert list(my_files.parents["file4.png"].keys()) == [ + "parent_id", + "image_path", + "shape", + ] -# if there is extra info in the metadata -def test_extra_metadata_csv_ignore_mismatch(extra_metadata_dir, keys): - my_files = loader(f"{extra_metadata_dir}/*png") +def test_missing_metadata_csv_errors(missing_metadata_dir): + my_files = MapImages(f"{missing_metadata_dir}/*png") + assert len(my_files) == 4 + with pytest.raises(ValueError, match="missing information"): + my_files.add_metadata(f"{missing_metadata_dir}/metadata_df.csv") + + +def test_extra_metadata_csv_ignore_mismatch(extra_metadata_dir, metadata_keys): + my_files = MapImages(f"{extra_metadata_dir}/*png") assert len(my_files) == 2 my_files.add_metadata(f"{extra_metadata_dir}/metadata_df.csv", ignore_mismatch=True) for parent_id in my_files.list_parents(): - assert list(my_files.images["parent"][parent_id].keys()) == keys + assert list(my_files.parents[parent_id].keys()) == metadata_keys def test_extra_metadata_csv_errors(extra_metadata_dir): - my_files = loader(f"{extra_metadata_dir}/*png") + my_files = MapImages(f"{extra_metadata_dir}/*png") assert len(my_files) == 2 with pytest.raises(ValueError, match="information about non-existent images"): my_files.add_metadata(f"{extra_metadata_dir}/metadata_df.csv") -# if there is missing info in metadata -def test_missing_metadata_csv_ignore_mismatch(missing_metadata_dir, keys): - my_files = loader(f"{missing_metadata_dir}/*png") - assert len(my_files) == 4 +# test other ``add_metadata`` args + + +def test_add_metadata_index_col(matching_metadata_dir): + my_files = MapImages(f"{matching_metadata_dir}/*png") + assert len(my_files) == 3 my_files.add_metadata( - f"{missing_metadata_dir}/metadata_df.csv", ignore_mismatch=True - ) - for parent_id in ["file1.png", "file2.png", "file3.png"]: - assert list(my_files.images["parent"][parent_id].keys()) == keys - assert list(my_files.images["parent"]["file4.png"].keys()) == [ + f"{matching_metadata_dir}/metadata_df.csv", index_col="name" + ) # pass index col arg + metadata_keys = [ "parent_id", "image_path", "shape", + "Unnamed: 0", + "coord", + "other", + "name", ] + for parent_id in my_files.list_parents(): + assert list(my_files.parents[parent_id].keys()) == metadata_keys -def test_missing_metadata_csv_errors(missing_metadata_dir): - my_files = loader(f"{missing_metadata_dir}/*png") - assert len(my_files) == 4 - with pytest.raises(ValueError, match="missing information"): - my_files.add_metadata(f"{missing_metadata_dir}/metadata_df.csv") +def test_add_metadata_columns(matching_metadata_dir, metadata_df): + my_files_csv = MapImages(f"{matching_metadata_dir}/*png") + my_files_csv.add_metadata( + f"{matching_metadata_dir}/metadata_df.csv", columns=["name", "coord"] + ) + + my_files_xlsx = MapImages(f"{matching_metadata_dir}/*png") + my_files_xlsx.add_metadata( + f"{matching_metadata_dir}/metadata_df.xlsx", columns=["name", "coord"] + ) + + my_files_df = MapImages(f"{matching_metadata_dir}/*png") + my_files_df.add_metadata(metadata_df, columns=["name", "coord"]) # pass columns arg + + metadata_keys = ["parent_id", "image_path", "shape", "name", "coord"] + for my_files in [my_files_csv, my_files_xlsx, my_files_df]: + for parent_id in my_files.list_parents(): + assert list(my_files.parents[parent_id].keys()) == metadata_keys + assert isinstance(my_files.parents[parent_id]["coord"], tuple) + + +# other ``add_metadata`` errors -# other errors -# if csv file doesn't exist def test_metadata_not_found(matching_metadata_dir): - my_files = loader(f"{matching_metadata_dir}/*png") + my_files = MapImages(f"{matching_metadata_dir}/*png") assert len(my_files) == 3 with pytest.raises(ValueError): my_files.add_metadata(f"{matching_metadata_dir}/fakefile.csv") def test_metadata_missing_name_or_image_id(matching_metadata_dir): - my_files = loader(f"{matching_metadata_dir}/*png") + my_files = MapImages(f"{matching_metadata_dir}/*png") assert len(my_files) == 3 incomplete_metadata_df = pd.DataFrame( {"coord": [(1.1, 1.5), (2.1, 1.0), (3.1, 4.5)], "other": [1, 2, 3]} @@ -249,108 +298,189 @@ def test_metadata_missing_name_or_image_id(matching_metadata_dir): my_files.add_metadata(f"{matching_metadata_dir}/incomplete_metadata_df.csv") -# --- tiff tests (no geo info) --- - - -def test_loader_tiff(sample_dir): - image_ID = "cropped_non_geo.tif" - tiff = loader(f"{sample_dir}/{image_ID}") - assert len(tiff) == 1 - assert isinstance(tiff, MapImages) - - -# --- geotiff tests (contains geo info) --- - - -def test_loader_geotiff(sample_dir): - image_ID = "cropped_geo.tif" - geotiff = loader(f"{sample_dir}/{image_ID}") - assert len(geotiff) == 1 - assert isinstance(geotiff, MapImages) +# --- test ``add_geo_info`` --- def test_loader_add_geo_info(sample_dir): # check it works for geotiff - image_ID = "cropped_geo.tif" - geotiff = loader(f"{sample_dir}/{image_ID}") - geotiff.add_geo_info() - assert "shape" in geotiff.images["parent"][image_ID].keys() - assert "coordinates" in geotiff.images["parent"][image_ID].keys() - assert geotiff.images["parent"][image_ID]["coordinates"] == approx( + image_id = "cropped_geo.tif" + geotiffs = MapImages(f"{sample_dir}/{image_id}") + geotiffs.add_geo_info() + assert all( + [k in geotiffs.parents[image_id].keys() for k in ["shape", "coordinates"]] + ) + assert geotiffs.parents[image_id]["coordinates"] == approx( (-0.061, 51.6142, -0.0610, 51.614), rel=1e-2 ) # check nothing happens for png/tiff (no metadata) - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - keys = list(ts_map.images["parent"][image_ID].keys()) + image_id = "cropped_74488689.png" + ts_map = MapImages(f"{sample_dir}/{image_id}") + keys = list(ts_map.parents[image_id].keys()) ts_map.add_geo_info() - assert list(ts_map.images["parent"][image_ID].keys()) == keys + assert list(ts_map.parents[image_id].keys()) == keys - image_ID = "cropped_non_geo.tif" - tiff = loader(f"{sample_dir}/{image_ID}") - keys = list(tiff.images["parent"][image_ID].keys()) + image_id = "cropped_non_geo.tif" + tiff = MapImages(f"{sample_dir}/{image_id}") + keys = list(tiff.parents[image_id].keys()) tiff.add_geo_info() - assert list(tiff.images["parent"][image_ID].keys()) == keys + assert list(tiff.parents[image_id].keys()) == keys -# -- could add jpeg, IIIF, etc. here too --- +# --- test patchify --- -def test_loader_tiff_32bit(sample_dir): - image_ID = "cropped_32bit.tif" - with pytest.raises(NotImplementedError, match="Image mode"): - loader(f"{sample_dir}/{image_ID}") +def test_patchify_pixels(sample_dir, image_id, tmp_path): + maps = MapImages(f"{sample_dir}/{image_id}") + maps.patchify_all(patch_size=3, path_save=tmp_path) + parent_list = maps.list_parents() + patch_list = maps.list_patches() + assert len(parent_list) == 1 + assert len(patch_list) == 9 + assert os.path.isfile(f"{tmp_path}/patch-0-0-3-3-#{image_id}#.png") -def test_loader_non_image(sample_dir): - file_ID = "ts_downloaded_maps.csv" - with pytest.raises(PIL.UnidentifiedImageError, match="not an image"): - loader(f"{sample_dir}/{file_ID}") +def test_patchify_meters(sample_dir, image_id, tmp_path): + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") + maps.patchify_all(patch_size=10000, method="meters", path_save=f"{tmp_path}_meters") + assert os.path.isfile(f"{tmp_path}_meters/patch-0-0-2-2-#{image_id}#.png") + assert len(maps.list_patches()) == 25 + + +def test_patchify_meters_errors(sample_dir, image_id, tmp_path): + maps = MapImages(f"{sample_dir}/{image_id}") + with pytest.raises(ValueError, match="add coordinate information"): + maps.patchify_all(patch_size=10000, method="meters", path_save=tmp_path) # --- test other functions --- -def test_loader_patchify_all(sample_dir, tmp_path): - image_ID = "cropped_74488689.png" - ts_map = loader(f"{sample_dir}/{image_ID}") - ts_map.patchify_all(patch_size=3, path_save=tmp_path) - parent_list = ts_map.list_parents() - patch_list = ts_map.list_patches() - assert len(parent_list) == 1 - assert len(patch_list) == 9 - assert os.path.isfile(f"{tmp_path}/patch-0-0-3-3-#{image_ID}#.png") +def test_load_patches(init_maps, sample_dir, tmp_path): + maps, _, _ = init_maps + + # create tiff patches + geotiff_path = f"{sample_dir}/cropped_geo.tif" + tiff_maps = MapImages(geotiff_path) + tiff_maps.add_geo_info() + tiff_maps.patchify_all(patch_size=3, path_save=f"{tmp_path}_tiffs") + + assert len(maps.list_parents()) == 1 + assert len(maps.list_patches()) == 9 + maps.load_patches(f"{tmp_path}_tiffs", parent_paths=geotiff_path, add_geo_info=True) + assert "coordinates" in maps.parents["cropped_geo.tif"].keys() + assert len(maps.list_parents()) == 2 + assert len(maps.list_patches()) == 18 # 9 for each + + maps.load_patches(f"{tmp_path}_tiffs", parent_paths=geotiff_path, clear_images=True) + assert len(maps.list_parents()) == 1 + assert len(maps.list_patches()) == 9 + + +def test_load_parents(init_maps, image_id, sample_dir): + maps, _, _ = init_maps + geotiff_path = f"{sample_dir}/cropped_geo.tif" + assert len(maps.list_parents()) == 1 + maps.load_parents(geotiff_path, overwrite=False, add_geo_info=True) + assert len(maps.list_parents()) == 2 + assert all(map in maps.list_parents() for map in [image_id, "cropped_geo.tif"]) + assert "coordinates" in maps.parents["cropped_geo.tif"].keys() -def test_loader_coord_functions(init_ts_maps, sample_dir): + tiff_path = f"{sample_dir}/cropped_non_geo.tif" + maps.load_parents(tiff_path, overwrite=True) + assert len(maps.list_parents()) == 1 + assert maps.list_parents() == ["cropped_non_geo.tif"] + + +def test_add_shape(init_maps, image_id): + maps, _, patch_list = init_maps + + maps.parents[image_id].pop("shape") + assert "shape" not in maps.parents[image_id].keys() + maps.add_shape(tree_level="parent") + assert "shape" in maps.parents[image_id].keys() + + maps.patches[patch_list[0]].pop("shape") + assert "shape" not in maps.patches[patch_list[0]].keys() + maps.add_shape(tree_level="patch") + assert "shape" in maps.patches[patch_list[0]].keys() + + +def test_calc_coords_from_grid_bb(sample_dir, image_id): + ts_map = MapImages(f"{sample_dir}/{image_id}") + ts_map.add_metadata( + f"{sample_dir}/ts_downloaded_maps.csv", columns=["name", "grid_bb", "crs"] + ) + assert "coordinates" not in ts_map.parents[image_id] + ts_map.add_coords_from_grid_bb() + assert "coordinates" in ts_map.parents[image_id] + assert ts_map.parents[image_id]["coordinates"] == approx( + (-4.83, 55.80, -4.21, 56.059), rel=1e-2 + ) + + +def test_coord_functions(init_maps, image_id, sample_dir): # test for png with added metadata - image_ID, ts_map, _, patch_list = init_ts_maps - ts_map.add_center_coord() - assert "dlon" in ts_map.images["parent"][image_ID].keys() - assert "center_lon" in ts_map.images["patch"][patch_list[0]].keys() + maps, _, patch_list = init_maps + maps.add_center_coord() + assert "dlon" in maps.parents[image_id].keys() + assert "center_lon" in maps.patches[patch_list[0]].keys() # test for geotiff with added geoinfo - image_ID = "cropped_geo.tif" - geotiff = loader(f"{sample_dir}/{image_ID}") - geotiff.add_geo_info() - geotiff.add_coord_increments() - geotiff.add_center_coord(tree_level="parent") - assert "dlon" in geotiff.images["parent"][image_ID].keys() - assert "center_lon" in geotiff.images["parent"][image_ID].keys() + image_id = "cropped_geo.tif" + geotiffs = MapImages(f"{sample_dir}/{image_id}") + geotiffs.add_geo_info() + geotiffs.add_coord_increments() + geotiffs.add_center_coord(tree_level="parent") + assert "dlon" in geotiffs.parents[image_id].keys() + assert "center_lon" in geotiffs.parents[image_id].keys() # test for tiff with no geo info (i.e. no coords so nothing should happen) - image_ID = "cropped_non_geo.tif" - tiff = loader(f"{sample_dir}/{image_ID}") - keys = list(tiff.images["parent"][image_ID].keys()) - tiff.add_coord_increments() - tiff.add_center_coord(tree_level="parent") - assert list(tiff.images["parent"][image_ID].keys()) == keys + image_id = "cropped_non_geo.tif" + tiffs = MapImages(f"{sample_dir}/{image_id}") + keys = list(tiffs.parents[image_id].keys()) + tiffs.add_coord_increments() + tiffs.add_center_coord(tree_level="parent") + assert list(tiffs.parents[image_id].keys()) == keys + + +def test_add_patch_coords(init_maps): + maps, _, patch_list = init_maps + maps.patches[patch_list[0]].pop("coordinates") + assert "coordinates" not in maps.patches[patch_list[0]].keys() + maps.add_patch_coords() + assert "coordinates" in maps.patches[patch_list[0]].keys() + + +def test_add_patch_polygons(init_maps): + maps, _, patch_list = init_maps + maps.patches[patch_list[0]].pop("polygon") + assert "polygon" not in maps.patches[patch_list[0]].keys() + maps.add_patch_polygons() + assert "polygon" in maps.patches[patch_list[0]].keys() + assert isinstance(maps.patches[patch_list[0]]["polygon"], Polygon) + +def test_save_patches_as_geotiffs(init_maps): + maps, _, _ = init_maps + maps.save_patches_as_geotiffs() -def test_loader_calc_pixel_stats(init_ts_maps, sample_dir, tmp_path): - image_ID, ts_map, _, patch_list = init_ts_maps - ts_map.calc_pixel_stats() + +def test_save_to_geojson(init_maps, tmp_path): + maps, _, _ = init_maps + maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") + assert os.path.exists(f"{tmp_path}/patches.geojson") + geo_df = geopd.read_file(f"{tmp_path}/patches.geojson") + assert "geometry" in geo_df.columns + assert str(geo_df.crs.to_string()) == "EPSG:4326" + assert isinstance(geo_df["geometry"][0], Polygon) + + +def test_calc_pixel_stats(init_maps, sample_dir, tmp_path): + maps, _, patch_list = init_maps + maps.calc_pixel_stats() expected_cols = [ "mean_pixel_R", "mean_pixel_G", @@ -361,15 +491,14 @@ def test_loader_calc_pixel_stats(init_ts_maps, sample_dir, tmp_path): "std_pixel_B", "std_pixel_A", ] - for col in expected_cols: - assert col in ts_map.images["patch"][patch_list[0]].keys() + assert all([col in maps.patches[patch_list[0]].keys() for col in expected_cols]) # geotiff/tiff will not have alpha channel, so only RGB returned - image_ID = "cropped_geo.tif" - geotiff = loader(f"{sample_dir}/{image_ID}") - geotiff.patchify_all(patch_size=3, path_save=tmp_path) - patch_list = geotiff.list_patches() - geotiff.calc_pixel_stats() + image_id = "cropped_geo.tif" + geotiffs = MapImages(f"{sample_dir}/{image_id}") + geotiffs.patchify_all(patch_size=3, path_save=tmp_path) + patch_list = geotiffs.list_patches() + geotiffs.calc_pixel_stats() expected_cols = [ "mean_pixel_R", "mean_pixel_G", @@ -378,45 +507,34 @@ def test_loader_calc_pixel_stats(init_ts_maps, sample_dir, tmp_path): "std_pixel_G", "std_pixel_B", ] - for col in expected_cols: - assert col in geotiff.images["patch"][patch_list[0]].keys() + assert all([col in geotiffs.patches[patch_list[0]].keys() for col in expected_cols]) -def test_loader_convert_images(init_ts_maps): - _, ts_map, _, _ = init_ts_maps - parent_df, patch_df = ts_map.convert_images() +def test_loader_convert_images(init_maps): + maps, _, _ = init_maps + parent_df, patch_df = maps.convert_images() assert parent_df.shape == (1, 13) assert patch_df.shape == (9, 7) - parent_df, patch_df = ts_map.convert_images(save=True) + parent_df, patch_df = maps.convert_images(save=True) assert os.path.isfile("./parent_df.csv") assert os.path.isfile("./patch_df.csv") os.remove("./parent_df.csv") os.remove("./patch_df.csv") - parent_df, patch_df = ts_map.convert_images(save=True, save_format="excel") + parent_df, patch_df = maps.convert_images(save=True, save_format="excel") assert os.path.isfile("./parent_df.xlsx") assert os.path.isfile("./patch_df.xlsx") os.remove("./parent_df.xlsx") os.remove("./patch_df.xlsx") -def test_loader_convert_images_errors(init_ts_maps): - _, ts_map, _, _ = init_ts_maps +def test_loader_convert_images_errors(init_maps): + maps, _, _ = init_maps with pytest.raises(ValueError, match="``save_format`` should be one of"): - ts_map.convert_images(save=True, save_format="json") + maps.convert_images(save=True, save_format="json") -def test_loader_add_patch_polygons(init_ts_maps): - _, ts_map, _, patch_list = init_ts_maps - ts_map.add_patch_polygons() - assert "polygon" in ts_map.patches[patch_list[0]].keys() - assert isinstance(ts_map.patches[patch_list[0]]["polygon"], Polygon) - - -def test_loader_save_to_geojson(init_ts_maps, tmp_path): - _, ts_map, _, _ = init_ts_maps - ts_map.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") - assert os.path.exists(f"{tmp_path}/patches.geojson") - geo_df = geopd.read_file(f"{tmp_path}/patches.geojson") - assert "geometry" in geo_df.columns - assert str(geo_df.crs.to_string()) == "EPSG:4326" - assert isinstance(geo_df["geometry"][0], Polygon) +def test_save_parents_as_geotiffs(init_maps, sample_dir, image_id): + maps, _, _ = init_maps + maps.save_parents_as_geotiffs() + image_id = image_id.split(".")[0] + assert os.path.isfile(f"{sample_dir}/{image_id}.tif") From 8bda0c27cca6c141a75102853b35d984d70650cb Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Wed, 3 Jan 2024 11:07:12 +0000 Subject: [PATCH 08/13] add more tests --- tests/test_load/test_images.py | 169 ++++++++++++++++++++++++++++++--- 1 file changed, 154 insertions(+), 15 deletions(-) diff --git a/tests/test_load/test_images.py b/tests/test_load/test_images.py index 08b299c8..b6e78b2c 100644 --- a/tests/test_load/test_images.py +++ b/tests/test_load/test_images.py @@ -31,7 +31,7 @@ def init_maps(sample_dir, image_id, tmp_path): Returns ------- - list + tuple maps (MapImages object), parent_list (== image_id) and patch_list (list of patches). """ maps = MapImages(f"{sample_dir}/{image_id}") @@ -43,6 +43,28 @@ def init_maps(sample_dir, image_id, tmp_path): return maps, parent_list, patch_list +@pytest.fixture +def init_dataframes(sample_dir, image_id, tmp_path): + """Initializes MapImages object (with metadata from csv and patches) and creates parent and patch dataframes. + + Returns + ------- + tuple + path to parent and patch dataframes + """ + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata(f"{sample_dir}/ts_downloaded_maps.csv") + maps.patchify_all(patch_size=3, path_save=tmp_path) # gives 9 patches + maps.add_center_coord(tree_level="parent") + maps.add_patch_polygons() + maps.calc_pixel_stats() + _, _ = maps.convert_images(save=True) + assert os.path.isfile("./parent_df.csv") + assert os.path.isfile("./patch_df.csv") + + return "./parent_df.csv", "./patch_df.csv" + + @pytest.fixture def ts_metadata_keys(): return ["name", "url", "coordinates", "crs", "published_date", "grid_bb", "polygon"] @@ -128,13 +150,32 @@ def test_init_geotiff(sample_dir): assert isinstance(geotiffs, MapImages) -def test_init_tiff_32bit(sample_dir): +def test_init_parent_path(sample_dir, image_id, capfd): + maps = MapImages( + f"{sample_dir}/{image_id}", + tree_level="patch", + parent_path=f"{sample_dir}/{image_id}", + ) + assert len(maps.list_parents()) == 1 + assert len(maps.list_patches()) == 1 + + # without passing tree level should get warning + maps = MapImages(f"{sample_dir}/{image_id}", parent_path=f"{sample_dir}/{image_id}") + out, _ = capfd.readouterr() + assert ( + "[WARNING] Ignoring `parent_path` as `tree_level` is set to 'parent'." in out + ) + assert len(maps.list_parents()) == 1 + assert len(maps.list_patches()) == 0 + + +def test_init_tiff_32bit_error(sample_dir): image_id = "cropped_32bit.tif" with pytest.raises(NotImplementedError, match="Image mode"): MapImages(f"{sample_dir}/{image_id}") -def test_init_non_image(sample_dir): +def test_init_non_image_error(sample_dir): file_name = "ts_downloaded_maps.csv" with pytest.raises(PIL.UnidentifiedImageError, match="not an image"): MapImages(f"{sample_dir}/{file_name}") @@ -269,6 +310,46 @@ def test_add_metadata_columns(matching_metadata_dir, metadata_df): assert isinstance(my_files.parents[parent_id]["coord"], tuple) +def test_add_metadata_parent(sample_dir, image_id, init_dataframes, ts_metadata_keys): + parent_df, _ = init_dataframes + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata(parent_df, tree_level="parent") + assert all( + [ + x in maps.parents[image_id].keys() + for x in [*ts_metadata_keys, "center_lat", "center_lon"] + ] + ) + assert isinstance(maps.parents[image_id]["shape"], tuple) + assert isinstance(maps.parents[image_id]["coordinates"], tuple) + + +def test_add_metadata_patch(sample_dir, image_id, init_dataframes, tmp_path): + parent_df, patch_df = init_dataframes + maps = MapImages(f"{sample_dir}/{image_id}") + maps.patchify_all(patch_size=3, path_save=tmp_path) + maps.add_metadata(parent_df, tree_level="parent") # add this too just in case + maps.add_metadata(patch_df, tree_level="patch") + patch_id = maps.list_patches()[0] + expected_cols = [ + "parent_id", + "shape", + "pixel_bounds", + "coordinates", + "polygon", + "mean_pixel_R", + "mean_pixel_A", + "std_pixel_R", + "std_pixel_A", + ] + assert all([x in maps.patches[patch_id].keys() for x in expected_cols]) + for k in ["shape", "pixel_bounds", "coordinates"]: + assert isinstance(maps.patches[patch_id][k], tuple) + assert isinstance( + "polygon", str + ) # expect this to be a string, reformed into polygon later + + # other ``add_metadata`` errors @@ -315,10 +396,10 @@ def test_loader_add_geo_info(sample_dir): # check nothing happens for png/tiff (no metadata) image_id = "cropped_74488689.png" - ts_map = MapImages(f"{sample_dir}/{image_id}") - keys = list(ts_map.parents[image_id].keys()) - ts_map.add_geo_info() - assert list(ts_map.parents[image_id].keys()) == keys + maps = MapImages(f"{sample_dir}/{image_id}") + keys = list(maps.parents[image_id].keys()) + maps.add_geo_info() + assert list(maps.parents[image_id].keys()) == keys image_id = "cropped_non_geo.tif" tiff = MapImages(f"{sample_dir}/{image_id}") @@ -409,19 +490,39 @@ def test_add_shape(init_maps, image_id): def test_calc_coords_from_grid_bb(sample_dir, image_id): - ts_map = MapImages(f"{sample_dir}/{image_id}") - ts_map.add_metadata( + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata( f"{sample_dir}/ts_downloaded_maps.csv", columns=["name", "grid_bb", "crs"] ) - assert "coordinates" not in ts_map.parents[image_id] - ts_map.add_coords_from_grid_bb() - assert "coordinates" in ts_map.parents[image_id] - assert ts_map.parents[image_id]["coordinates"] == approx( + assert "coordinates" not in maps.parents[image_id] + maps.add_coords_from_grid_bb() + assert "coordinates" in maps.parents[image_id] + assert maps.parents[image_id]["coordinates"] == approx( (-4.83, 55.80, -4.21, 56.059), rel=1e-2 ) -def test_coord_functions(init_maps, image_id, sample_dir): +def test_calc_coords_from_grid_bb_warning(sample_dir, image_id, capfd): + maps = MapImages(f"{sample_dir}/{image_id}") + assert all([x not in maps.parents[image_id] for x in ["coordinates", "grid_bb"]]) + maps.add_coords_from_grid_bb() + out, _ = capfd.readouterr() + assert "[WARNING] No grid bounding box" in out + assert "coordinates" not in maps.parents[image_id] + + +def test_calc_coords_from_grid_bb_error(sample_dir, image_id, capfd): + maps = MapImages(f"{sample_dir}/{image_id}") + maps.add_metadata( + f"{sample_dir}/ts_downloaded_maps.csv", columns=["name", "grid_bb", "crs"] + ) + assert "coordinates" not in maps.parents[image_id] + maps.parents[image_id]["grid_bb"] = 123 + with pytest.raises(ValueError, match="Unexpected grid_bb"): + maps.add_coords_from_grid_bb() + + +def test_coord_functions(init_maps, image_id, sample_dir, capfd): # test for png with added metadata maps, _, patch_list = init_maps maps.add_center_coord() @@ -443,6 +544,8 @@ def test_coord_functions(init_maps, image_id, sample_dir): keys = list(tiffs.parents[image_id].keys()) tiffs.add_coord_increments() tiffs.add_center_coord(tree_level="parent") + out, _ = capfd.readouterr() + assert "[WARNING] 'coordinates' could not be found" in out assert list(tiffs.parents[image_id].keys()) == keys @@ -468,7 +571,7 @@ def test_save_patches_as_geotiffs(init_maps): maps.save_patches_as_geotiffs() -def test_save_to_geojson(init_maps, tmp_path): +def test_save_to_geojson(init_maps, tmp_path, capfd): maps, _, _ = init_maps maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") assert os.path.exists(f"{tmp_path}/patches.geojson") @@ -477,6 +580,42 @@ def test_save_to_geojson(init_maps, tmp_path): assert str(geo_df.crs.to_string()) == "EPSG:4326" assert isinstance(geo_df["geometry"][0], Polygon) + maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") + out, _ = capfd.readouterr() + assert "[WARNING] File already exists" in out + + +def test_save_to_geojson_missing_data(sample_dir, image_id, tmp_path): + maps = MapImages(f"{sample_dir}/{image_id}") + maps.patchify_all(patch_size=3, path_save=tmp_path) + maps.add_metadata( + f"{sample_dir}/ts_downloaded_maps.csv", columns=["name", "coordinates", "crs"] + ) + maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") + assert os.path.exists(f"{tmp_path}/patches.geojson") + geo_df = geopd.read_file(f"{tmp_path}/patches.geojson") + assert "geometry" in geo_df.columns + assert str(geo_df.crs.to_string()) == "EPSG:4326" + assert isinstance(geo_df["geometry"][0], Polygon) + + +def test_save_to_geojson_polygon_strings( + sample_dir, image_id, init_dataframes, tmp_path +): + parent_df, patch_df = init_dataframes + maps = MapImages(f"{sample_dir}/{image_id}") + maps.patchify_all(patch_size=3, path_save=tmp_path) + maps.add_metadata(parent_df, tree_level="parent") + maps.add_metadata(patch_df, tree_level="patch") + patch_id = maps.list_patches()[0] + assert isinstance(maps.patches[patch_id]["polygon"], str) + maps.save_patches_to_geojson(geojson_fname=f"{tmp_path}/patches.geojson") + assert os.path.exists(f"{tmp_path}/patches.geojson") + geo_df = geopd.read_file(f"{tmp_path}/patches.geojson") + assert "geometry" in geo_df.columns + assert str(geo_df.crs.to_string()) == "EPSG:4326" + assert isinstance(geo_df["geometry"][0], Polygon) + def test_calc_pixel_stats(init_maps, sample_dir, tmp_path): maps, _, patch_list = init_maps From 86a240b292189ce48ccd3e3e516d4af6db0df7fb Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Wed, 3 Jan 2024 12:11:28 +0000 Subject: [PATCH 09/13] remove unnecessary literal_evals --- mapreader/load/images.py | 66 ++++------------------------------------ 1 file changed, 6 insertions(+), 60 deletions(-) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 8d8c84fb..2985bc73 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -7,11 +7,11 @@ import os import random +import re import warnings from ast import literal_eval from glob import glob from typing import Literal -import re import matplotlib.image as mpimg import matplotlib.patches as patches @@ -27,7 +27,7 @@ from shapely.geometry import Polygon, box from tqdm.auto import tqdm -from mapreader.download.data_structures import GridIndex, GridBoundingBox +from mapreader.download.data_structures import GridBoundingBox, GridIndex from mapreader.download.downloader_utils import get_polygon_from_grid_bb os.environ[ @@ -516,7 +516,6 @@ def add_shape(self, tree_level: str | None = "parent") -> None: self._add_shape_id(image_id=image_id) def add_coords_from_grid_bb(self, verbose: bool = False) -> None: - print("[INFO] Adding coordinates, tree level: parent") parent_list = self.list_parents() @@ -681,16 +680,15 @@ def _add_shape_id( raise ValueError( f'[ERROR] Problem with "{image_id}". Please either redownload or remove from list of images to load.' ) - + def _add_coords_from_grid_bb_id( self, image_id: int | str, verbose: bool = False ) -> None: - grid_bb = self.parents[image_id]["grid_bb"] if isinstance(grid_bb, str): - cell1, cell2 = re.findall("\(.*?\)", grid_bb) - + cell1, cell2 = re.findall(r"\(.*?\)", grid_bb) + z1, x1, y1 = literal_eval(cell1) z2, x2, y2 = literal_eval(cell2) @@ -705,7 +703,7 @@ def _add_coords_from_grid_bb_id( self.parents[image_id]["coordinates"] = coordinates else: - raise ValueError(f"[ERROR] Unexpected grid_bb format for {image_id}.") + raise ValueError(f"[ERROR] Unexpected grid_bb format for {image_id}.") def _add_coord_increments_id( self, image_id: int | str, verbose: bool | None = False @@ -760,17 +758,9 @@ def _add_coord_increments_id( verbose, ) return - if isinstance(self.parents[image_id]["coordinates"], str): - self.parents[image_id]["coordinates"] = literal_eval( - self.parents[image_id]["coordinates"] - ) if "shape" not in self.parents[image_id].keys(): self._add_shape_id(image_id) - if isinstance(self.parents[image_id]["shape"], str): - self.parents[image_id]["shape"] = literal_eval( - self.parents[image_id]["shape"] - ) image_height, image_width, _ = self.parents[image_id]["shape"] @@ -807,10 +797,6 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: verbose, ) return - if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval( - self.parents[parent_id]["coordinates"] - ) else: if not all([k in self.parents[parent_id].keys() for k in ["dlat", "dlon"]]): @@ -823,12 +809,6 @@ def _add_patch_coords_id(self, image_id: str, verbose: bool = False) -> None: dlon = self.parents[parent_id]["dlon"] dlat = self.parents[parent_id]["dlat"] - # get patch bounds - if isinstance(self.patches[image_id]["pixel_bounds"], str): - self.patches[image_id]["pixel_bounds"] = literal_eval( - self.patches[image_id]["pixel_bounds"] - ) - pixel_bounds = self.patches[image_id]["pixel_bounds"] # get patch coords @@ -860,11 +840,6 @@ def _add_patch_polygons_id(self, image_id: str, verbose: bool = False) -> None: self._add_patch_coords_id(image_id, verbose) if "coordinates" in self.patches[image_id].keys(): - if isinstance(self.patches[image_id]["coordinates"], str): - self.patches[image_id]["coordinates"] = literal_eval( - self.patches[image_id]["coordinates"] - ) - coords = self.patches[image_id]["coordinates"] self.patches[image_id]["polygon"] = box(*coords) @@ -903,11 +878,6 @@ def _add_center_coord_id( self._add_patch_coords_id(image_id, verbose) if "coordinates" in self.images[tree_level][image_id].keys(): - if isinstance(self.images[tree_level][image_id]["coordinates"], str): - self.images[tree_level][image_id]["coordinates"] = literal_eval( - self.images[tree_level][image_id]["coordinates"] - ) - self._print_if_verbose( f"[INFO] Reading 'coordinates' from {image_id}.", verbose ) @@ -962,17 +932,9 @@ def _calc_pixel_height_width( f"[WARNING] 'coordinates' could not be found in {parent_id}. Suggestion: run add_metadata or add_geo_info." # noqa ) return - if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval( - self.parents[parent_id]["coordinates"] - ) if "shape" not in self.parents[parent_id].keys(): self._add_shape_id(parent_id) - if isinstance(self.parents[parent_id]["shape"], str): - self.parents[parent_id]["shape"] = literal_eval( - self.parents[parent_id]["shape"] - ) height, width, _ = self.parents[parent_id]["shape"] xmin, ymin, xmax, ymax = self.parents[parent_id]["coordinates"] @@ -2237,19 +2199,11 @@ def _save_parent_as_geotiff( if "shape" not in self.parents[parent_id].keys(): self._add_shape_id(parent_id) - if isinstance(self.parents[parent_id]["shape"], str): - self.parents[parent_id]["shape"] = literal_eval( - self.parents[parent_id]["shape"] - ) height, width, channels = self.parents[parent_id]["shape"] if "coordinates" not in self.parents[parent_id].keys(): print(self.parents[parent_id].keys()) raise ValueError(f"[ERROR] Cannot locate coordinates for {parent_id}") - if isinstance(self.parents[parent_id]["coordinates"], str): - self.parents[parent_id]["coordinates"] = literal_eval( - self.parents[parent_id]["coordinates"] - ) coords = self.parents[parent_id]["coordinates"] if not crs: @@ -2352,19 +2306,11 @@ def _save_patch_as_geotiff( # get shape if "shape" not in self.patches[patch_id].keys(): self._add_shape_id(patch_id) - if isinstance(self.patches[patch_id]["shape"], str): - self.patches[patch_id]["shape"] = literal_eval( - self.patches[patch_id]["shape"] - ) height, width, channels = self.patches[patch_id]["shape"] # get coords if "coordinates" not in self.patches[patch_id].keys(): self._add_patch_coords_id(patch_id) - if isinstance(self.patches[patch_id]["coordinates"], str): - self.patches[patch_id]["coordinates"] = literal_eval( - self.patches[patch_id]["coordinates"] - ) coords = self.patches[patch_id]["coordinates"] if not crs: From 6e49e90da741902265bcc6d8d80b921b4bf38b25 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Wed, 3 Jan 2024 12:42:12 +0000 Subject: [PATCH 10/13] update docs --- docs/source/User-guide/Load.rst | 36 ++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/docs/source/User-guide/Load.rst b/docs/source/User-guide/Load.rst index 7cf6640c..44c915ab 100644 --- a/docs/source/User-guide/Load.rst +++ b/docs/source/User-guide/Load.rst @@ -80,11 +80,13 @@ For example, if you have downloaded your maps using the default settings of our Other arguments you may want to specify when adding metadata to your images include: - ``index_col`` - By default, this is set to ``0`` so the first column of your csv/excel spreadsheet will be used as the index column when creating a pandas dataframe. If you would like to use a different column you can specify ``index_col``. - - ``columns`` - By default, the ``.add_metadata()`` method will add all the columns in your metadata to your ``MapImages`` object. If you would like to add only specific columns, you can pass a list of these as the ``columns``\s argument (e.g. ``columns=[`name`, `coordinates`, `region`]``) to add only these columns to your ``MapImages`` object. + - ``columns`` - By default, the ``add_metadata()`` method will add all the columns in your metadata to your ``MapImages`` object. If you would like to add only specific columns, you can pass a list of these as the ``columns``\s argument (e.g. ``columns=[`name`, `coordinates`, `region`]``) to add only these columns to your ``MapImages`` object. - ``ignore_mismatch``- By default, this is set to ``False`` so that an error is given if the images in your ``MapImages`` object are mismatched to your metadata. Setting ``ignore_mismatch`` to ``True`` (by specifying ``ignore_mismatch=True``) will allow you to bypass this error and add mismatched metadata. Only metadata corresponding to images in your ``MapImages`` object will be added. - ``delimiter`` - By default, this is set to ``|``. If your csv file is delimited using a different delimiter you should specify the delimiter argument. +.. note:: In MapReader versions < 1.0.7, coordinates were miscalculated. To correct this, use the ``add_coords_from_grid_bb()`` method to calculate new, correct coordinates. + Patchify ---------- @@ -184,7 +186,7 @@ As above, you can use the ``path_save`` argument to change where these patches a Other arguments you may want to specify when patchifying your images include: - ``square_cuts`` - By default, this is set to ``False``. Thus, if your ``patch_size`` is not a factor of your image size (e.g. if you are trying to slice a 100x100 pixel image into 8x8 pixel patches), you will end up with some rectangular patches at the edges of your image. If you set ``square_cuts=True``, then all your patches will be square, however there will be some overlap between edge patches. Using ``square_cuts=True`` is useful if you need square images for model training, and don't want to warp your rectangular images by resizing them at a later stage. - - ``add_to_parent`` - By default, this is set to ``True`` so that each time you run ``.patchify_all()`` your patches are added to your ``MapImages`` object. Setting it to ``False`` (by specifying ``add_to_parent=False``) will mean your patches are created, but not added to your ``MapImages`` object. This can be useful for testing out different patch sizes. + - ``add_to_parent`` - By default, this is set to ``True`` so that each time you run ``patchify_all()`` your patches are added to your ``MapImages`` object. Setting it to ``False`` (by specifying ``add_to_parent=False``) will mean your patches are created, but not added to your ``MapImages`` object. This can be useful for testing out different patch sizes. - ``rewrite`` - By default, this is set to ``False`` so that if your patches already exist they are not overwritten. Setting it to ``True`` (by specifying ``rewrite=True``) will mean already existing patches are recreated and overwritten. If you would like to save your patches as geo-referenced tiffs (i.e. geotiffs), use: @@ -193,10 +195,12 @@ If you would like to save your patches as geo-referenced tiffs (i.e. geotiffs), my_files.save_patches_as_geotiffs() -This will save each patch in your ``MapImages`` object as a ``.geotiff`` file in your patches directory. +This will save each patch in your ``MapImages`` object as a georeferenced ``.tif`` file in your patches directory. + +.. note:: MapReader also has a ``save_parents_as_geotiff()`` method for saving parent images as geotiffs. -After running the ``.patchify_all()`` method, you'll see that ``print(my_files)`` shows you have both 'parents' and 'patches'. -To view an iterable list of these, you can use the ``.list_parents()`` and ``.list_patches()`` methods: +After running the ``patchify_all()`` method, you'll see that ``print(my_files)`` shows you have both 'parents' and 'patches'. +To view an iterable list of these, you can use the ``list_parents()`` and ``list_patches()`` methods: .. code-block:: python @@ -229,7 +233,7 @@ or .. note:: These parent and patch dataframes **will not** automatically update so you will want to run this command again if you add new information into your ``MapImages`` object. -At any point, you can also save these dataframes by passing the ``save`` argument to the ``.convert_images()`` method: +At any point, you can also save these dataframes by passing the ``save`` argument to the ``convert_images()`` method: .. code-block:: python @@ -280,7 +284,7 @@ If, however, you want to see a random sample of your patches use the ``tree_leve It can also be helpful to see your patches in the context of their parent image. -To do this use the ``.show()`` method. +To do this use the ``show()`` method. e.g. : @@ -312,7 +316,7 @@ This will show you your chosen patches, by default highlighted with red borders, .. admonition:: Advanced usage :class: dropdown - Further usage of the ``.show()`` method is detailed in :ref:`Further_analysis`. + Further usage of the ``show()`` method is detailed in :ref:`Further_analysis`. Please head there for guidance on advanced usage. You may also want to see all the patches created from one of your parent images. @@ -330,7 +334,7 @@ This can be done using: .. admonition:: Advanced usage :class: dropdown - Further usage of the ``.show_parent()`` method is detailed in :ref:`Further_analysis`. + Further usage of the ``show_parent()`` method is detailed in :ref:`Further_analysis`. Please head there for guidance on advanced usage. .. todo:: Move 'Further analysis/visualization' to a different page (e.g. as an appendix) @@ -341,13 +345,13 @@ Further analysis/visualization (optional) ------------------------------------------- If you have loaded geographic coordinates into your ``MapImages`` object, you may want to calculate the central coordinates of your patches. -The ``.add_center_coord()`` method can used to do this: +The ``add_center_coord()`` method can used to do this: .. code-block:: python my_files.add_center_coord() -You can then rerun the ``.convert_images()`` method to see your results. +You can then rerun the ``convert_images()`` method to see your results. i.e.: @@ -358,15 +362,15 @@ i.e.: You will see that center coordinates of each patch have been added to your patch dataframe. -The ``.calc_pixel_stats()`` method can be used to calculate means and standard deviations of pixel intensities of each of your patches: +The ``calc_pixel_stats()`` method can be used to calculate means and standard deviations of pixel intensities of each of your patches: .. code-block:: python my_files.calc_pixel_stats() -After rerunning the ``.convert_images()`` method (as above), you will see that mean and standard pixel intensities have been added to your patch dataframe. +After rerunning the ``convert_images()`` method (as above), you will see that mean and standard pixel intensities have been added to your patch dataframe. -The ``.show()`` and ``.show_parent()`` methods can be used to plot these values ontop of your patches. +The ``show()`` and ``show_parent()`` methods can be used to plot these values ontop of your patches. This is done by specifying the ``column_to_plot`` argument. e.g. to view "mean_pixel_R" on your patches: @@ -394,12 +398,12 @@ e.g. to view "mean_pixel_R" on your patches: .. image:: ../figures/show_par_RGB_0.5.png :width: 400px -.. note:: The ``column_to_plot`` argument can also be used with the ``.show()`` method. +.. note:: The ``column_to_plot`` argument can also be used with the ``show()`` method. .. admonition:: Advanced usage :class: dropdown - Other arguments you may want to specify when showing your images (for both the ``.show()`` and ``.show_parent()`` methods): + Other arguments you may want to specify when showing your images (for both the ``show()`` and ``show_parent()`` methods): - ``plot_parent`` - By default, this is set to ``True`` so that the parent image is shown. If you would like to remove the parent image, e.g. if you are plotting column values, you can set ``plot_parent=False``. This should speed up the code for plotting. - ``patch_border`` - By default, this is set to ``True`` so that borders are plotted around each patch. Setting ``patch_border`` to ``False`` (by specifying ``patch_border=False``) will stop patch borders being shown. From 314bdd694ed37169e6a99a91d9467e517bc78027 Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Fri, 5 Jan 2024 12:41:38 +0000 Subject: [PATCH 11/13] fix links --- docs/source/User-guide/Classify/Classify.rst | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docs/source/User-guide/Classify/Classify.rst b/docs/source/User-guide/Classify/Classify.rst index 0867e4a5..715d3c99 100644 --- a/docs/source/User-guide/Classify/Classify.rst +++ b/docs/source/User-guide/Classify/Classify.rst @@ -20,13 +20,12 @@ This is all done within MapReader's ``ClassifierContainer()`` class, which is us - Predict labels of unannotated images (model inference). - Visualize datasets and predictions. -If you already have a fine-tuned model, you can skip to the `Infer labels using a fine-tuned model `_ page. +If you already have a fine-tuned model, you can skip to the `Infer labels using a fine-tuned model `_ page. -If not, you should proceed to the `Train/fine-tune a classifier `_ page. +If not, you should proceed to the `Train/fine-tune a classifier `_ page. .. toctree:: :maxdepth: 1 Train Infer - From 7e90e2b7389d9fda089ec0228bbccbe4eec5a1ac Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Fri, 5 Jan 2024 13:49:26 +0000 Subject: [PATCH 12/13] add ClassifierContainer imports to docs --- docs/source/User-guide/Classify/Train.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/source/User-guide/Classify/Train.rst b/docs/source/User-guide/Classify/Train.rst index e9262091..cd35a86c 100644 --- a/docs/source/User-guide/Classify/Train.rst +++ b/docs/source/User-guide/Classify/Train.rst @@ -231,6 +231,8 @@ There are a number of options for the ``model`` argument: .. code-block:: python #EXAMPLE + from mapreader import ClassifierContainer + my_classifier = ClassifierContainer("resnet18", annotated_images.labels_map, dataloaders) By default, this will load a pretrained form of the model and reshape the last layer to output the same number of nodes as labels in your dataset. @@ -242,10 +244,13 @@ There are a number of options for the ``model`` argument: .. code-block:: python + #EXAMPLE from torchvision import models from torch import nn + from mapreader import ClassifierContainer + my_model = models.resnet18(pretrained=True) # reshape the final layer (FC layer) of the neural network to output the same number of nodes as label in your dataset @@ -270,6 +275,8 @@ There are a number of options for the ``model`` argument: #EXAMPLE import torch + from mapreader import ClassifierContainer + my_model = torch.load("./models/model_checkpoint_6.pkl") my_classifier = ClassifierContainer(my_model, annotated_images.labels_map, dataloaders) @@ -285,6 +292,8 @@ There are a number of options for the ``model`` argument: .. code-block:: python #EXAMPLE + from mapreader import ClassifierContainer + my_classifier = ClassifierContainer(None, None, None, load_path="./models/checkpoint_6.pkl") This will also load the corresponding model file (in this case "./models/model_checkpoint_6.pkl"). @@ -301,6 +310,8 @@ There are a number of options for the ``model`` argument: #EXAMPLE from transformers import AutoFeatureExtractor, AutoModelForImageClassification + from mapreader import ClassifierContainer + extractor = AutoFeatureExtractor.from_pretrained("davanstrien/autotrain-mapreader-5000-40830105612") my_model = AutoModelForImageClassification.from_pretrained("davanstrien/autotrain-mapreader-5000-40830105612") @@ -316,6 +327,8 @@ There are a number of options for the ``model`` argument: #EXAMPLE import timm + from mapreader import ClassifierContainer + my_model = timm.create_model("hf_hub:timm/resnest101e.in1k", pretrained=True, num_classes=len(annotated_images.labels_map)) my_classifier = ClassifierContainer(my_model, annotated_images.labels_map, dataloaders) From af7df2dc659e46a1ff81563620ecf1bc3799c49b Mon Sep 17 00:00:00 2001 From: Rosie Wood Date: Fri, 5 Jan 2024 15:16:25 +0000 Subject: [PATCH 13/13] add saving of one band geotiffs --- mapreader/load/images.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/mapreader/load/images.py b/mapreader/load/images.py index 2985bc73..c78ca366 100644 --- a/mapreader/load/images.py +++ b/mapreader/load/images.py @@ -2211,7 +2211,6 @@ def _save_parent_as_geotiff( parent_affine = rasterio.transform.from_bounds(*coords, width, height) parent = Image.open(parent_path) - parent_array = reshape_as_raster(parent) with rasterio.open( f"{geotiff_path}", @@ -2225,7 +2224,12 @@ def _save_parent_as_geotiff( nodata=0, crs=crs, ) as dst: - dst.write(parent_array) + if len(parent.getbands()) == 1: + parent_array = np.array(parent) + dst.write(parent_array, indexes=1) + else: + parent_array = reshape_as_raster(parent) + dst.write(parent_array) def save_patches_as_geotiffs( self,