Skip to content

Commit

Permalink
Fix unit test to work with s3fs
Browse files Browse the repository at this point in the history
  • Loading branch information
chuckwondo committed May 10, 2024
1 parent b2e564d commit 1ad38df
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 12 deletions.
9 changes: 5 additions & 4 deletions src/gedi_subset/subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ class SubsetGranuleProps:
single argument.
"""

fs: s3fs.S3FileSystem
granule: Granule
maap: MAAP
aoi_gdf: gpd.GeoDataFrame
Expand Down Expand Up @@ -163,10 +164,8 @@ def subset_granule(props: SubsetGranuleProps) -> IOResultE[Maybe[str]]:
logger.debug(f"Subsetting {inpath}")

try:
fs = s3fs.S3FileSystem()

with (
fs.open(inpath, block_size=4 * 1024 * 1024, cache_type="all") as f,
props.fs.open(inpath, block_size=4 * 1024 * 1024, cache_type="all") as f,
h5py.File(f) as hdf5,
):
gdf = subset_hdf5(
Expand All @@ -181,6 +180,7 @@ def subset_granule(props: SubsetGranuleProps) -> IOResultE[Maybe[str]]:
except Exception as e:
granule_ur = props.granule["Granule"]["GranuleUR"]
logger.warning(f"Skipping granule {granule_ur} [failed to read {inpath}: {e}]")
logger.exception(e)
return IOSuccess(Nothing)

if gdf.empty:
Expand Down Expand Up @@ -249,9 +249,10 @@ def append_subset(src: str) -> IOResultE[str]:
logger.info(f"Found {len(found_granules)} in the CMR")
logger.info(f"Total downloadable granules: {len(downloadable_granules)}")

fs = s3fs.S3FileSystem()
payloads = (
SubsetGranuleProps(
granule, maap, aoi_gdf, lat, lon, beams, columns, query, output_dir
fs, granule, maap, aoi_gdf, lat, lon, beams, columns, query, output_dir
)
for granule in downloadable_granules
)
Expand Down
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def __init__(self):
)


@pytest.fixture(scope="function")
@pytest.fixture(scope="module")
def aws_credentials() -> None:
"""Mocked AWS Credentials for moto."""
os.environ["AWS_ACCESS_KEY_ID"] = "testing"
Expand Down
54 changes: 47 additions & 7 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,66 @@
import os
from pathlib import Path
from typing import cast

import geopandas as gpd
import pytest
import requests
from botocore.session import Session
from maap.maap import MAAP
from maap.Result import Granule
from moto.moto_server.threaded_moto_server import ThreadedMotoServer
from mypy_boto3_s3.client import S3Client
from returns.io import IOSuccess
from returns.maybe import Some
from s3fs import S3FileSystem
from typer import BadParameter

from gedi_subset.subset import SubsetGranuleProps, check_beams_option, subset_granule

# The following fixtures are simplifications of those found in the tests for s3fs at
# https://github.com/fsspec/s3fs/blob/main/s3fs/tests/test_s3fs.py.
# They are used to work around this issue: https://github.com/getmoto/moto/issues/6836

def test_subset_granule(maap: MAAP, h5_path: str, aoi_gdf: gpd.GeoDataFrame):
output_dir = os.path.dirname(h5_path)
filename = os.path.basename(h5_path)
ip_address = "127.0.0.1"
port = 5555
endpoint_uri = f"http://{ip_address}:{port}/"


@pytest.fixture(scope="module")
def moto_server(aws_credentials):
server = ThreadedMotoServer(ip_address=ip_address, port=port)
server.start()
yield
server.stop()


@pytest.fixture(autouse=True)
def reset_s3_fixture():
requests.post(f"{endpoint_uri}/moto-api/reset")


@pytest.fixture()
def fs(moto_server, h5_path: str):
client = cast(S3Client, Session().create_client("s3", endpoint_url=endpoint_uri))
client.create_bucket(Bucket="mybucket")
client.put_object(Bucket="mybucket", Key="temp.h5", Body=Path(h5_path).read_bytes())

S3FileSystem.clear_instance_cache()
fs = S3FileSystem(client_kwargs={"endpoint_url": endpoint_uri})
fs.invalidate_cache()

yield fs


def test_subset_granule(
fs: S3FileSystem, maap: MAAP, aoi_gdf: gpd.GeoDataFrame, tmp_path: Path
):
granule = Granule(
{
"Granule": {
"GranuleUR": "foo",
"OnlineAccessURLs": {
"OnlineAccessURL": {"URL": f"s3://mybucket/{filename}"}
"OnlineAccessURL": {"URL": "s3://mybucket/temp.h5"}
},
}
},
Expand All @@ -37,10 +77,10 @@ def test_subset_granule(maap: MAAP, h5_path: str, aoi_gdf: gpd.GeoDataFrame):
# we get should simply match the path of the h5 fixture file, except with a .gpq
# extension, rather than an .h5 extension.

root, _ = os.path.splitext(h5_path)
expected_path = f"{root}.gpq"
expected_path = os.path.join(tmp_path, "temp.gpq")
io_result = subset_granule(
SubsetGranuleProps(
fs,
granule,
maap,
aoi_gdf,
Expand All @@ -49,7 +89,7 @@ def test_subset_granule(maap: MAAP, h5_path: str, aoi_gdf: gpd.GeoDataFrame):
"all",
["agbd"],
"l2_quality_flag == 1",
Path(output_dir),
tmp_path,
)
)

Expand Down

0 comments on commit 1ad38df

Please sign in to comment.