From a311d16bb281ce1a8ca9c6d57d2f933c9808d596 Mon Sep 17 00:00:00 2001 From: 12rambau Date: Fri, 8 Jul 2022 08:10:29 +0000 Subject: [PATCH 1/6] feat: first draft of a gdrive modules --- sepal_ui/scripts/gdrive.py | 147 +++++++++++++++++++++++++++++++++++++ 1 file changed, 147 insertions(+) create mode 100644 sepal_ui/scripts/gdrive.py diff --git a/sepal_ui/scripts/gdrive.py b/sepal_ui/scripts/gdrive.py new file mode 100644 index 00000000..c0f79b8a --- /dev/null +++ b/sepal_ui/scripts/gdrive.py @@ -0,0 +1,147 @@ +import io +from pathlib import Path + +import ee +from apiclient import discovery +from googleapiclient.http import MediaIoBaseDownload +from osgeo import gdal + +from sepal_ui.scripts import utils as su + +################################################################################ +# attributes of the singleton +# + +# call need_ee for the whole file +su.init_ee() + +SERVICE = discovery.build( + serviceName="drive", + version="v3", + cache_discovery=False, + credentials=ee.credentials(), +) +"the gdrive service used to access the content of the user folder" + +################################################################################ +# functions +# + + +def get_all_items(mime_type="image/tiff"): + """ + get all the items in the Gdrive, items will have 2 columns, 'name' and 'id'. + It excludes files that are contained in the trashbin. + + Args: + mime_type (str, optional): the mime type to look for by default Tif images + + Return: + (list): the found items with 2 columns ('id' and 'name') + """ + + # get list of files + return ( + SERVICE.files() + .list( + q=f"mimeType='{mime_type}' and trashed = false", + pageSize=1000, + fields="nextPageToken, files(id, name)", + ) + .execute() + .get("files", []) + ) + + +def get_items(file_name, mime_type="image/tiff"): + """ + look for the file_name patern in user Gdrive files and retreive a list of Ids. + + usually gee export your files using a tiling system so the file name provided + need to be the one from the export description. + + Args: + file_name (str): the file name used during the exportation step + mime_type (str, optional): the mime type to look for by default Tif images + + Return: + (list): the list of file id corresponding to the requested filename in your gdrive account + """ + + return [i for i in get_items(mime_type) if i["name"].startswith(file_name)] + + +def delete_items(items): + """ + Delete the items from Gdrive + + Args: + items (list): the list of item to delete as described in get_imes functions + """ + + for i in items: + SERVICE.files().delete(fileId=i["id"]).execute() + + return + + +def download_items(file_name, local_path, mime_type="image/tiff", delete=False): + """ + Download from Gdrive all the file corresponding to an equivalent get_items request. + + if the mime_type is "image/tiff" a vrt file will be created. The delete option will automatically delete files once they are dowloaded. + + Args: + file_name (str): the file name used during the exportation step + local_path (pathlike object): the destination of the files + mime_type (str, optional): the mime type to look for by default Tif images + delete (bool, optional): either or not the file need to be deleted once the download is finished. default to :code:`False` + + Return: + (pathlib.Path): the path to the download folder or the path to the vrt + """ + + # cast as path + local_path = Path(local_path) + + # get the items + items = get_items(file_name, mime_type) + + # load them to the use workspace + local_files = [] + for i in items: + request = SERVICE.files().get_media(fileId=i["id"]) + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request) + + # download in chunks + done = False + while done is False: + status, done = downloader.next_chunk() + + # write to files + local_file = local_path / i["name"] + with local_file.open("wb") as fo: + fo.write(fh.getvalue()) + + local_files.append(local_file) + + # delete the items ? + delete is False or delete_items(items) + + # create a vrt ? + if mime_type == "image/tiff": + vrt_file = local_path / f"{file_name}.vrt" + ds = gdal.BuildVRT(str(vrt_file), [str(f) for f in local_files]) + + # if there is no cache to empty it means that one of the dataset was empty + try: + ds.FlushCache() + except AttributeError: + raise Exception("one of the dataset was empty") + + # check that the file was effectively created (gdal doesn't raise errors) + if not vrt_file.is_file(): + raise Exception(f"the vrt {vrt_file} was not created") + + return vrt_file if mime_type == "image/tiff" else local_path From bc104a970c985d84212a8f362fc481d8e3f19bed Mon Sep 17 00:00:00 2001 From: 12rambau Date: Fri, 8 Jul 2022 14:40:57 +0000 Subject: [PATCH 2/6] test: gdrive methods --- sepal_ui/scripts/gdrive.py | 5 +- tests/test_gdrive.py | 160 +++++++++++++++++++++++++++++++++++++ 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 tests/test_gdrive.py diff --git a/sepal_ui/scripts/gdrive.py b/sepal_ui/scripts/gdrive.py index c0f79b8a..dbd400c4 100644 --- a/sepal_ui/scripts/gdrive.py +++ b/sepal_ui/scripts/gdrive.py @@ -19,7 +19,7 @@ serviceName="drive", version="v3", cache_discovery=False, - credentials=ee.credentials(), + credentials=ee.Credentials(), ) "the gdrive service used to access the content of the user folder" @@ -35,6 +35,7 @@ def get_all_items(mime_type="image/tiff"): Args: mime_type (str, optional): the mime type to look for by default Tif images + folder (str): the id of the folder we want to look into Return: (list): the found items with 2 columns ('id' and 'name') @@ -68,7 +69,7 @@ def get_items(file_name, mime_type="image/tiff"): (list): the list of file id corresponding to the requested filename in your gdrive account """ - return [i for i in get_items(mime_type) if i["name"].startswith(file_name)] + return [i for i in get_all_items(mime_type) if i["name"].startswith(file_name)] def delete_items(items): diff --git a/tests/test_gdrive.py b/tests/test_gdrive.py new file mode 100644 index 00000000..af997086 --- /dev/null +++ b/tests/test_gdrive.py @@ -0,0 +1,160 @@ +import tempfile +from itertools import product +from pathlib import Path + +import pytest +import rasterio as rio +from google_drive_downloader import GoogleDriveDownloader as gdd +from googleapiclient.http import MediaFileUpload +from rasterio import windows + +from sepal_ui.scripts import gdrive +from sepal_ui.scripts import utils as su + + +class TestGdrive: + def test_get_all_items(self, tmp_dem, gdrive_folder): + + # extract name and folder + tmp_dir, test_file = tmp_dem + + list_items = gdrive.get_all_items() + + # at least the one I added manually + assert len(list_items) >= 9 + + return + + def test_get_items(self, tmp_dem, gdrive_folder): + + # extract name and folder + tmp_dir, test_file = tmp_dem + + list_items = gdrive.get_items(test_file.stem) + + assert len(list_items) == 9 + + return + + def test_download_items(self, tmp_dem, gdrive_folder): + + # extract name and folder + tmp_dir, test_file = tmp_dem + + # extract all the files from the folder + with tempfile.TemporaryDirectory() as loc_tmp_dir: + + gdrive.download_items(test_file.stem, loc_tmp_dir) + + loc_tmp_dir = Path(loc_tmp_dir) + assert len([f for f in loc_tmp_dir.glob("*.tif")]) == 9 + assert len([f for f in loc_tmp_dir.glob("*.vrt")]) == 1 + + return + + def test_delete_items(self, tmp_dem, gdrive_folder): + + # extract name and folder + tmp_dir, test_file = tmp_dem + + gdrive.delete_items(gdrive.get_items(test_file.stem)) + + # assert + assert gdrive.get_items(test_file.stem) == [] + + return + + @pytest.fixture(scope="class") + def gdrive_folder(self, tmp_dem): + """create a fake folder in my gdrive and run the test over it""" + + # extract name and folder + tmp_dir, test_file = tmp_dem + + # create a gdrive folder + body = { + "name": "test_sepal_ui", + "mimeType": "application/vnd.google-apps.folder", + } + gdrive_folder = gdrive.SERVICE.files().create(body=body).execute() + + # send all the tile files to the gdrive folder + files = [f for f in tmp_dir.glob("*.tif") if not f.name.endswith("dem.tif")] + for f in files: + file_metadata = {"name": f.name, "parents": [gdrive_folder["id"]]} + media = MediaFileUpload(f, mimetype="image/tiff") + ( + gdrive.SERVICE.files() + .create(body=file_metadata, media_body=media) + .execute() + ) + + yield gdrive_folder + + # delete the folder + gdrive.SERVICE.files().delete(fileId=gdrive_folder["id"]).execute() + + return + + @pytest.fixture(scope="class") + def tmp_dem(self): + """the tmp dir containing the dem""" + + # create a tmp directory and save the DEM file inside + with tempfile.TemporaryDirectory() as tmp_dir: + + tmp_dir = Path(tmp_dir) + + # save the file + test_file = tmp_dir / f"{su.random_string(8)}_dem.tif" + test_id = "1vRkAWQYsLWCi6vcTMk8vLxoXMFbdMFn8" + gdd.download_file_from_google_drive(test_id, test_file, True, True) + + # cut the image in pieces + with rio.open(test_file) as src: + + tile_width = int(src.meta["width"] / 2) + tile_height = int(src.meta["height"] / 2) + meta = src.meta.copy() + + for window, transform in self.get_tiles(src, tile_width, tile_height): + + meta["transform"] = transform + meta["width"], meta["height"] = window.width, window.height + outpath = ( + tmp_dir + / f"{test_file.stem}_{window.col_off}_{window.row_off}.tif" + ) + with rio.open(outpath, "w", **meta) as dst: + dst.write(src.read(window=window)) + + yield tmp_dir, test_file + + # add this empty line before return to make sure that the file is destroyed + return + + @staticmethod + def get_tiles(ds, width, height): + """ + Cut an image in pieces according to the specified width and height + + Args: + ds: dataset + width: the width of the tile + height; the height of the tile + + Yield: + (window, transform): the tuple of the window characteristics corresponding to each tile + """ + ncols, nrows = ds.meta["width"], ds.meta["height"] + + offsets = product(range(0, ncols, width), range(0, nrows, height)) + big_window = windows.Window(col_off=0, row_off=0, width=ncols, height=nrows) + for col_off, row_off in offsets: + window = windows.Window( + col_off=col_off, row_off=row_off, width=width, height=height + ).intersection(big_window) + transform = windows.transform(window, ds.transform) + yield window, transform + + return From 76350b294f17404d59b134b3085afb0c9db8e132 Mon Sep 17 00:00:00 2001 From: 12rambau Date: Fri, 8 Jul 2022 14:48:00 +0000 Subject: [PATCH 3/6] build: add google_drive_downloader in test build --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 75d062bc..567eaf34 100644 --- a/setup.py +++ b/setup.py @@ -62,6 +62,7 @@ def run(self): "test": [ "coverage", "pytest", + "google_drive_downloader", ], "doc": [ "jupyter-sphinx", From a78849fa3c58570cfa5dc5ab77a3ad8de2686b6b Mon Sep 17 00:00:00 2001 From: 12rambau Date: Fri, 8 Jul 2022 14:53:05 +0000 Subject: [PATCH 4/6] build: add google dependencies --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 567eaf34..b84f238f 100644 --- a/setup.py +++ b/setup.py @@ -54,6 +54,7 @@ def run(self): "pyyaml", "dask", "tqdm", + "google-api-python-client", ], "extras_require": { "dev": [ @@ -62,7 +63,7 @@ def run(self): "test": [ "coverage", "pytest", - "google_drive_downloader", + "googledrivedownloader", ], "doc": [ "jupyter-sphinx", From c0ca641c66d5115c2e3d9464a617392e691dda04 Mon Sep 17 00:00:00 2001 From: 12rambau Date: Fri, 8 Jul 2022 15:46:56 +0000 Subject: [PATCH 5/6] build: install GDAL in the Github action manually to ensure version --- .github/workflows/unit.yml | 4 ++++ docs/source/start/installation.rst | 30 ++++++++++++++++++++++++++++++ setup.py | 1 + 3 files changed, 35 insertions(+) diff --git a/.github/workflows/unit.yml b/.github/workflows/unit.yml index facc2d47..35469f01 100644 --- a/.github/workflows/unit.yml +++ b/.github/workflows/unit.yml @@ -25,6 +25,10 @@ jobs: with: python-version: ${{ matrix.python-version }} + - name: Manually install GDAL + run: | + pip install --find-links=https://girder.github.io/large_image_wheels --no-cache GDAL + - name: Install dependencies run: pip install .[test] diff --git a/docs/source/start/installation.rst b/docs/source/start/installation.rst index 7720e493..606f0c17 100644 --- a/docs/source/start/installation.rst +++ b/docs/source/start/installation.rst @@ -5,6 +5,36 @@ Installation The Sepal environment is up to date with the latest stable version of :code:`sepal_ui`. No installation is required + +Install GDAL +------------ + +:code:`sepal-ui` require gdal to build the vrt from downloaded images. Until we +find a way to only rely on :code:`rasterio`, users will be force to install GDAL +on their environment. + +.. note:: + + The following is coming from the + `localTileServer documentation `__ + where they provide a nice insight on installing GDAL. + +GDAL can be a pain in the 🍑 to install, so you may want to handle GDAL +before installing ``localtileserver`` when using ``pip``. + +If on linux, I highly recommend using the `large_image_wheels `_ from Kitware. + +.. code:: bash + + pip install --find-links=https://girder.github.io/large_image_wheels --no-cache GDAL + + +Otherwise, *one does not simply pip install GDAL*. You will want to either use +conda or install GDAL using your system package manager (e.g.: apt, Homebrew, etc.) + +.. image:: https://raw.githubusercontent.com/banesullivan/localtileserver/main/imgs/pip-gdal.jpg + :alt: One does not simply pip install GDAL + :align: center Stable release -------------- diff --git a/setup.py b/setup.py index b84f238f..087a8ea5 100644 --- a/setup.py +++ b/setup.py @@ -55,6 +55,7 @@ def run(self): "dask", "tqdm", "google-api-python-client", + "GDAL", ], "extras_require": { "dev": [ From 592d6600dd49ce93e833587e6f95a48e5d834bcf Mon Sep 17 00:00:00 2001 From: dfguerrerom Date: Sun, 5 Mar 2023 17:12:30 +0100 Subject: [PATCH 6/6] fix: add new gdrive service compatible with sepal --- sepal_ui/scripts/gdrive.py | 225 ++++++++++++++++++++----------------- 1 file changed, 119 insertions(+), 106 deletions(-) diff --git a/sepal_ui/scripts/gdrive.py b/sepal_ui/scripts/gdrive.py index dbd400c4..8abeec1f 100644 --- a/sepal_ui/scripts/gdrive.py +++ b/sepal_ui/scripts/gdrive.py @@ -1,148 +1,161 @@ +""" +Google Drive object providing a simple interface to interact with files from Gdrive. +""" + +from typing import Optional, Union +import json import io from pathlib import Path import ee from apiclient import discovery +from google.oauth2.credentials import Credentials from googleapiclient.http import MediaIoBaseDownload from osgeo import gdal -from sepal_ui.scripts import utils as su - -################################################################################ -# attributes of the singleton -# - -# call need_ee for the whole file -su.init_ee() +import sepal_ui.scripts.decorator as sd -SERVICE = discovery.build( - serviceName="drive", - version="v3", - cache_discovery=False, - credentials=ee.Credentials(), -) -"the gdrive service used to access the content of the user folder" +sd.init_ee() +class GDrive: -################################################################################ -# functions -# + def __init__(self) -> None: + self.initialize = ee.Initialize() -def get_all_items(mime_type="image/tiff"): - """ - get all the items in the Gdrive, items will have 2 columns, 'name' and 'id'. - It excludes files that are contained in the trashbin. + # Access to sepal access token + self.access_token = json.loads( + (Path.home() / ".config/earthengine/credentials").read_text() + ).get("access_token") - Args: - mime_type (str, optional): the mime type to look for by default Tif images - folder (str): the id of the folder we want to look into - - Return: - (list): the found items with 2 columns ('id' and 'name') - """ + self.service = discovery.build( + serviceName="drive", + version="v3", + cache_discovery=False, + credentials=Credentials(self.access_token), + ) - # get list of files - return ( - SERVICE.files() - .list( - q=f"mimeType='{mime_type}' and trashed = false", - pageSize=1000, - fields="nextPageToken, files(id, name)", + def get_all_items(self, mime_type: Optional[str]="image/tiff") -> list: + """Get all the items in the Gdrive. + + items will have 2 columns, 'name' and 'id'. + It excludes files that are contained in the trashbin. + + Args: + mime_type (str, optional): the mime type to look for by default Tif images + folder (str): the id of the folder we want to look into + + Return: + (list): the found items with 2 columns ('id' and 'name') + """ + + # get list of files + return ( + self.service.files() + .list( + q=f"mimeType='{mime_type}' and trashed = false", + pageSize=1000, + fields="nextPageToken, files(id, name)", + ) + .execute() + .get("files", []) ) - .execute() - .get("files", []) - ) -def get_items(file_name, mime_type="image/tiff"): - """ - look for the file_name patern in user Gdrive files and retreive a list of Ids. + def get_items(self, file_name:Union[str, Path], mime_type: str = "image/tiff") -> list: + """Look for the file_name patern in user Gdrive files and retreive a list of Ids. + + usually gee export your files using a tiling system so the file name provided + need to be the one from the export description. - usually gee export your files using a tiling system so the file name provided - need to be the one from the export description. + Args: + file_name (str): the file name used during the exportation step + mime_type (str, optional): the mime type to look for by default Tif images - Args: - file_name (str): the file name used during the exportation step - mime_type (str, optional): the mime type to look for by default Tif images + Return: + (list): the list of file id corresponding to the requested filename in your gdrive account + """ - Return: - (list): the list of file id corresponding to the requested filename in your gdrive account - """ + return [i for i in self.get_all_items(mime_type) if i["name"].startswith(file_name)] - return [i for i in get_all_items(mime_type) if i["name"].startswith(file_name)] + def delete_items(self, items: list) -> None: + """ + Delete the items from Gdrive -def delete_items(items): - """ - Delete the items from Gdrive + Args: + items (list): the list of item to delete as described in get_imes functions + """ - Args: - items (list): the list of item to delete as described in get_imes functions - """ + for i in items: + self.service.files().delete(fileId=i["id"]).execute() - for i in items: - SERVICE.files().delete(fileId=i["id"]).execute() + return - return + def download_items( + self, + file_name: Union[str, Path], + local_path: Union[str, Path], + mime_type: str ="image/tiff", + delete: Optional[bool]=False + ) -> Union[Path, None]: -def download_items(file_name, local_path, mime_type="image/tiff", delete=False): - """ - Download from Gdrive all the file corresponding to an equivalent get_items request. + """Download from Gdrive all the file corresponding to an equivalent get_items request. - if the mime_type is "image/tiff" a vrt file will be created. The delete option will automatically delete files once they are dowloaded. + if the mime_type is "image/tiff" a vrt file will be created. The delete option will automatically delete files once they are dowloaded. - Args: - file_name (str): the file name used during the exportation step - local_path (pathlike object): the destination of the files - mime_type (str, optional): the mime type to look for by default Tif images - delete (bool, optional): either or not the file need to be deleted once the download is finished. default to :code:`False` + Args: + file_name (str): the file name used during the exportation step + local_path (pathlike object): the destination of the files + mime_type (str, optional): the mime type to look for by default Tif images + delete (bool, optional): either or not the file need to be deleted once the download is finished. default to :code:`False` - Return: - (pathlib.Path): the path to the download folder or the path to the vrt - """ + Return: + (pathlib.Path): the path to the download folder or the path to the vrt + """ - # cast as path - local_path = Path(local_path) + # cast as path + local_path = Path(local_path) - # get the items - items = get_items(file_name, mime_type) + # get the items + items = self.get_items(file_name, mime_type) - # load them to the use workspace - local_files = [] - for i in items: - request = SERVICE.files().get_media(fileId=i["id"]) - fh = io.BytesIO() - downloader = MediaIoBaseDownload(fh, request) + # load them to the use workspace + local_files = [] + for i in items: + request = self.service.files().get_media(fileId=i["id"]) + fh = io.BytesIO() + downloader = MediaIoBaseDownload(fh, request) - # download in chunks - done = False - while done is False: - status, done = downloader.next_chunk() + # download in chunks + done = False + while done is False: + status, done = downloader.next_chunk() - # write to files - local_file = local_path / i["name"] - with local_file.open("wb") as fo: - fo.write(fh.getvalue()) + # write to files + local_file = local_path / i["name"] + with local_file.open("wb") as fo: + fo.write(fh.getvalue()) - local_files.append(local_file) + local_files.append(local_file) - # delete the items ? - delete is False or delete_items(items) + # delete the items ? + if delete: + self.delete_items(items) - # create a vrt ? - if mime_type == "image/tiff": - vrt_file = local_path / f"{file_name}.vrt" - ds = gdal.BuildVRT(str(vrt_file), [str(f) for f in local_files]) + # create a vrt ? + if mime_type == "image/tiff": + vrt_file = local_path / f"{file_name}.vrt" + ds = gdal.BuildVRT(str(vrt_file), [str(f) for f in local_files]) - # if there is no cache to empty it means that one of the dataset was empty - try: - ds.FlushCache() - except AttributeError: - raise Exception("one of the dataset was empty") + # if there is no cache to empty it means that one of the dataset was empty + try: + ds.FlushCache() + except AttributeError: + raise Exception("one of the dataset was empty") - # check that the file was effectively created (gdal doesn't raise errors) - if not vrt_file.is_file(): - raise Exception(f"the vrt {vrt_file} was not created") + # check that the file was effectively created (gdal doesn't raise errors) + if not vrt_file.is_file(): + raise Exception(f"the vrt {vrt_file} was not created") - return vrt_file if mime_type == "image/tiff" else local_path + return vrt_file if mime_type == "image/tiff" else local_path