From 2254ba6ccb0a79ae38b6081b484550c05408a7bc Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Tue, 14 May 2024 15:09:08 -0700 Subject: [PATCH 01/25] Update AWSHandler.py and __init__.py Add download_file_from_s3 and parse_s3_uri functions Refactor download_file function to handle S3 URLs --- cellpack/autopack/AWSHandler.py | 2 +- cellpack/autopack/__init__.py | 49 +++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index b4c6397e..4613ddb4 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -19,7 +19,7 @@ def __init__( self, bucket_name, sub_folder_name=None, - region_name=None, + region_name="us-west-2", ): self.bucket_name = bucket_name self.folder_name = sub_folder_name diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 143544f0..7ec8476a 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -51,7 +51,8 @@ from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS from cellpack.autopack.loaders.utils import read_json_file, write_json_file - +import boto3 +import botocore packageContainsVFCommands = 1 ssl._create_default_https_context = ssl._create_unverified_context @@ -261,8 +262,39 @@ def updateReplacePath(newPaths): REPLACE_PATH[w[0]] = w[1] +def download_file_from_s3(s3_uri, local_file_path): + s3_client = boto3.client("s3") + bucket_name, key = parse_s3_uri(s3_uri) + + try: + s3_client.download_file(bucket_name, key, local_file_path) + print("File downloaded successfully.") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + + +def parse_s3_uri(s3_uri): + # Remove the "s3://" prefix and split the remaining string into bucket name and key + s3_uri = s3_uri.replace("s3://", "") + parts = s3_uri.split("/") + bucket_name = parts[0] + folder = "/".join(parts[1:-1]) + key = parts[-1] + + return bucket_name, folder, key + + def download_file(url, local_file_path, reporthook): - if url_exists(url): + if is_s3_url(url): + # download from s3 + bucket_name, folder, key = parse_s3_uri(url) + s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + s3_handler = s3_handler(bucket_name, folder) + + elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) except Exception as e: @@ -271,8 +303,19 @@ def download_file(url, local_file_path, reporthook): raise Exception(f"Url does not exist {url}") +# def is_full_url(file_path): +# return file_path.find("http") != -1 or file_path.find("ftp") != -1 + + def is_full_url(file_path): - return file_path.find("http") != -1 or file_path.find("ftp") != -1 + url_regex = re.compile( + r"^(?:http|https|ftp|s3)://", re.IGNORECASE + ) # check http, https, ftp, s3 + return re.match(url_regex, file_path) is not None + + +def is_s3_url(file_path): + return file_path.find("s3://") != -1 def is_remote_path(file_path): From 7abaaf1216c3c608913774f04829326143401194 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 15 May 2024 10:43:56 -0700 Subject: [PATCH 02/25] Add S3 file download functionality --- cellpack/autopack/__init__.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 7ec8476a..9511b83f 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -290,9 +290,19 @@ def parse_s3_uri(s3_uri): def download_file(url, local_file_path, reporthook): if is_s3_url(url): # download from s3 + # bucket_name, folder, key = parse_s3_uri(url) + # s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) + # s3_handler = s3_handler(bucket_name, folder) + s3_client = boto3.client("s3") bucket_name, folder, key = parse_s3_uri(url) - s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - s3_handler = s3_handler(bucket_name, folder) + try: + s3_client.download_file(bucket_name, f"{folder}/{key}", local_file_path) + print("File downloaded successfully.") + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") elif url_exists(url): try: From 36d0ee182d2ce4ddc79a17aa642d78d31a62ec1b Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 14:54:00 -0700 Subject: [PATCH 03/25] Add grid cache directory --- cellpack/autopack/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 9511b83f..476c05a1 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -115,6 +115,7 @@ def url_exists(url): cache_geoms = appdata / "cache_geometries" cache_sphere = appdata / "cache_collisionTrees" cache_recipes = appdata / "cache_recipes" +cache_grids = appdata / "cache_grids" preferences = appdata / "preferences" # we can now use some json/xml file for storing preferences and options. # need others ? @@ -123,6 +124,7 @@ def url_exists(url): "results": cache_results, "collisionTrees": cache_sphere, "recipes": cache_recipes, + "grids": cache_grids, "prefs": preferences, } From 9af4a196c8c67905eea038abfd2b3a9f7c9ed43c Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 14:54:38 -0700 Subject: [PATCH 04/25] simplify grid loading logic and allow loading from URL --- cellpack/autopack/Environment.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cellpack/autopack/Environment.py b/cellpack/autopack/Environment.py index b9dcd713..a69d9397 100644 --- a/cellpack/autopack/Environment.py +++ b/cellpack/autopack/Environment.py @@ -76,7 +76,7 @@ from .Compartment import CompartmentList, Compartment from .Recipe import Recipe from .ingredient import GrowIngredient, ActinIngredient -from cellpack.autopack import IOutils +from cellpack.autopack import IOutils, get_local_file_location from .octree import Octree from .Gradient import Gradient from .transformation import signed_angle_between_vectors @@ -149,13 +149,14 @@ def __init__(self, config=None, recipe=None): self.grid_file_out = ( f"{self.out_folder}/{self.name}_{config['name']}_{self.version}_grid.dat" ) - if recipe.get("grid_file_path") is not None: - self.grid_file_out = recipe["grid_file_path"] - - should_load_grid_file = ( - os.path.isfile(self.grid_file_out) and self.load_from_grid_file - ) - self.previous_grid_file = self.grid_file_out if should_load_grid_file else None + self.previous_grid_file = None + if self.load_from_grid_file: + # first check if grid file path is specified in recipe + if recipe.get("grid_file_path") is not None: + self.grid_file_out = get_local_file_location(recipe["grid_file_path"], cache="grids") + # check if grid file is already present in the output folder + if os.path.isfile(self.grid_file_out): + self.previous_grid_file = self.grid_file_out self.setupfile = "" self.current_path = None # the path of the recipe file self.custom_paths = None From 5071e067acf2f339ec6ff2fca5270b3d817d7474 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 14:55:20 -0700 Subject: [PATCH 05/25] add test recipe and config for URL loading --- .../packing-configs/test_url_load_config.json | 10 +++ cellpack/tests/recipes/v2/test_url_load.json | 89 +++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 cellpack/tests/packing-configs/test_url_load_config.json create mode 100644 cellpack/tests/recipes/v2/test_url_load.json diff --git a/cellpack/tests/packing-configs/test_url_load_config.json b/cellpack/tests/packing-configs/test_url_load_config.json new file mode 100644 index 00000000..e3bf1b2a --- /dev/null +++ b/cellpack/tests/packing-configs/test_url_load_config.json @@ -0,0 +1,10 @@ +{ + "name": "test_s3_mesh_config", + "out": "out/test", + "save_analyze_result": true, + "save_plot_figures": true, + "load_from_grid_file": true, + "show_progress_bar": true, + "upload_results": false, + "open_in_browser": false +} \ No newline at end of file diff --git a/cellpack/tests/recipes/v2/test_url_load.json b/cellpack/tests/recipes/v2/test_url_load.json new file mode 100644 index 00000000..5b0ae878 --- /dev/null +++ b/cellpack/tests/recipes/v2/test_url_load.json @@ -0,0 +1,89 @@ +{ + "version": "1.0.0", + "format_version": "2.1", + "name": "test_s3_mesh", + "bounding_box": [ + [ + -5, + -5, + -5 + ], + [ + 5, + 5, + 5 + ] + ], + "objects": { + "membrane_mesh": { + "type": "mesh", + "color": [ + 1, + 0, + 1 + ], + "representations": { + "mesh": { + "path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/meshes", + "name": "sphere_4.obj", + "format": "obj" + } + } + }, + "nucleus_mesh": { + "type": "mesh", + "color": [ + 0, + 1, + 1 + ], + "representations": { + "mesh": { + "path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/meshes", + "name": "sphere_2.obj", + "format": "obj" + } + } + }, + "primitive_sphere": { + "type": "single_sphere", + "color": [ + 0.2, + 0.7, + 0.1 + ], + "radius": 0.5, + "packing_mode": "random" + } + }, + "composition": { + "bounding_area": { + "regions": { + "interior": [ + "membrane" + ] + } + }, + "membrane": { + "object": "membrane_mesh", + "count": 1, + "regions": { + "interior": [ + "nucleus", + { + "object": "primitive_sphere", + "count": 100 + } + ] + } + }, + "nucleus": { + "object": "nucleus_mesh", + "count": 1, + "regions": { + "interior": [] + } + } + }, + "grid_file_path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/grids/test_s3_mesh_test_s3_mesh_config_1.0.0_grid.dat" +} \ No newline at end of file From 9e7ec13c231e5c751571f118f976a9992f9be1ea Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:29:35 -0700 Subject: [PATCH 06/25] Update cache directory to be created in local repo --- cellpack/autopack/__init__.py | 50 ++++++++++++----------------------- 1 file changed, 17 insertions(+), 33 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 476c05a1..bf42b184 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -77,24 +77,9 @@ def make_directory_if_needed(directory): # #Setup autopack data directory. # ============================================================================== # the dir will have all the recipe + cache. - -APPNAME = "autoPACK" - - -if sys.platform == "darwin": - # from AppKit import NSSearchPathForDirectoriesInDomains - # http://developer.apple.com/DOCUMENTATION/Cocoa/Reference/Foundation/Miscellaneous/Foundation_Functions/Reference/reference.html#//apple_ref/c/func/NSSearchPathForDirectoriesInDomains - # NSApplicationSupportDirectory = 14 - # NSUserDomainMask = 1 - # True for expanding the tilde into a fully qualified path - # appdata = path.join(NSSearchPathForDirectoriesInDomains(14, 1, True)[0], APPNAME) - appdata = os.path.expanduser("~") + "/Library/Application Support/autoPACK" -elif sys.platform == "win32": - appdata = path.join(environ["APPDATA"], APPNAME) -else: - appdata = path.expanduser(path.join("~", "." + APPNAME)) +appdata = Path(__file__).parents[2] / ".cache" make_directory_if_needed(appdata) -log.info(f"autoPACK data dir created {appdata}") +log.info(f"cellPACK data dir created {appdata}") appdata = Path(appdata) @@ -110,16 +95,15 @@ def url_exists(url): # setup the cache directory inside the app data folder # ============================================================================== - -cache_results = appdata / "cache_results" -cache_geoms = appdata / "cache_geometries" -cache_sphere = appdata / "cache_collisionTrees" -cache_recipes = appdata / "cache_recipes" -cache_grids = appdata / "cache_grids" +cache_results = appdata / "results" +cache_geoms = appdata / "geometries" +cache_sphere = appdata / "collisionTrees" +cache_recipes = appdata / "recipes" +cache_grids = appdata / "grids" preferences = appdata / "preferences" # we can now use some json/xml file for storing preferences and options. # need others ? -cache_dir = { +CACHE_DIR = { "geometries": cache_geoms, "results": cache_results, "collisionTrees": cache_sphere, @@ -128,7 +112,7 @@ def url_exists(url): "prefs": preferences, } -for _, dir in cache_dir.items(): +for _, dir in CACHE_DIR.items(): make_directory_if_needed(dir) usePP = False @@ -355,7 +339,7 @@ def get_cache_location(name, cache, destination): name: str destination: str """ - local_file_directory = cache_dir[cache] / destination + local_file_directory = CACHE_DIR[cache] / destination local_file_path = local_file_directory / name make_directory_if_needed(local_file_directory) return local_file_path @@ -395,8 +379,8 @@ def get_local_file_location( # not url, use pathlib input_file_location = Path(input_file_location) - if os.path.isfile(cache_dir[cache] / input_file_location): - return cache_dir[cache] / input_file_location + if os.path.isfile(CACHE_DIR[cache] / input_file_location): + return CACHE_DIR[cache] / input_file_location if os.path.isfile(CURRENT_RECIPE_PATH / input_file_location): # if no folder provided, use the current_recipe_folder return CURRENT_RECIPE_PATH / input_file_location @@ -408,7 +392,7 @@ def get_local_file_location( if helper is not None: reporthook = helper.reporthook name = input_file_location - local_file_path = cache_dir[cache] / destination / name + local_file_path = CACHE_DIR[cache] / destination / name download_file(url, local_file_path, reporthook) return local_file_path return input_file_location @@ -585,12 +569,12 @@ def saveRecipeAvailableJSON(recipe_dictionary, filename): def clearCaches(*args): # can't work if file are open! - for k in cache_dir: + for k in CACHE_DIR: try: - shutil.rmtree(cache_dir[k]) - os.makedirs(cache_dir[k]) + shutil.rmtree(CACHE_DIR[k]) + os.makedirs(CACHE_DIR[k]) except: # noqa: E722 - print("problem cleaning ", cache_dir[k]) + print("problem cleaning ", CACHE_DIR[k]) def write_username_to_creds(): From 70bef4bae444b18fff28ab018193e5ebd35cc3bf Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:29:57 -0700 Subject: [PATCH 07/25] Add kwargs parameter to pack_grid method and clean_grid_cache option --- cellpack/autopack/Analysis.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/cellpack/autopack/Analysis.py b/cellpack/autopack/Analysis.py index 03bfec63..38f15217 100644 --- a/cellpack/autopack/Analysis.py +++ b/cellpack/autopack/Analysis.py @@ -1746,12 +1746,13 @@ def pack( self, seed=20, show_plotly_plot=True, + **kwargs, ): if show_plotly_plot: self.plotly.update_title(self.env.place_method) t1 = time() - results = self.env.pack_grid(seedNum=seed) + results = self.env.pack_grid(seedNum=seed, **kwargs) self.seed_to_results[seed] = results t2 = time() run_time = t2 - t1 @@ -2170,6 +2171,7 @@ def pack_one_seed( show_grid=False, plot_figures=False, save_gradient_data_as_image=False, + clean_grid_cache=False, ): """ Packs one seed of a recipe and returns the recipe object @@ -2190,6 +2192,7 @@ def pack_one_seed( seed=seed, # TODO: fix this to disable plotly if using simularium show_plotly_plot=(show_grid and two_d) and not use_simularium, + clean_grid_cache=clean_grid_cache, ) self.center = self.env.grid.getCenter() @@ -2385,6 +2388,7 @@ def doloop( save_gradient_data_as_image = packing_config_data.get( "save_gradient_data_as_image", False ) + clean_grid_cache = packing_config_data.get("clean_grid_cache", False) seed_list = get_seed_list(packing_config_data, recipe_data) if seed_list is None: @@ -2447,6 +2451,7 @@ def doloop( get_distance_distribution=get_distance_distribution, image_export_options=image_export_options, save_gradient_data_as_image=save_gradient_data_as_image, + clean_grid_cache=clean_grid_cache, ) ) for future in concurrent.futures.as_completed(futures): @@ -2489,6 +2494,7 @@ def doloop( show_grid=show_grid, plot_figures=plot_figures, save_gradient_data_as_image=save_gradient_data_as_image, + clean_grid_cache=clean_grid_cache, ) self.writeJSON(center_distance_file, center_distance_dict) From 03a172545c6b8d730d374a8271dd29f307cce98d Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:30:18 -0700 Subject: [PATCH 08/25] Update Environment.py with grid cache cleaning functionality --- cellpack/autopack/Environment.py | 26 ++++++++++++++++++++++---- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/cellpack/autopack/Environment.py b/cellpack/autopack/Environment.py index a69d9397..160251b4 100644 --- a/cellpack/autopack/Environment.py +++ b/cellpack/autopack/Environment.py @@ -76,7 +76,7 @@ from .Compartment import CompartmentList, Compartment from .Recipe import Recipe from .ingredient import GrowIngredient, ActinIngredient -from cellpack.autopack import IOutils, get_local_file_location +from cellpack.autopack import IOutils, get_cache_location, get_local_file_location from .octree import Octree from .Gradient import Gradient from .transformation import signed_angle_between_vectors @@ -153,10 +153,12 @@ def __init__(self, config=None, recipe=None): if self.load_from_grid_file: # first check if grid file path is specified in recipe if recipe.get("grid_file_path") is not None: - self.grid_file_out = get_local_file_location(recipe["grid_file_path"], cache="grids") + self.grid_file_out = get_local_file_location( + recipe["grid_file_path"], cache="grids" + ) # check if grid file is already present in the output folder if os.path.isfile(self.grid_file_out): - self.previous_grid_file = self.grid_file_out + self.previous_grid_file = self.grid_file_out self.setupfile = "" self.current_path = None # the path of the recipe file self.custom_paths = None @@ -279,6 +281,17 @@ def _setup(self): for gradient_data in self.recipe_data["gradients"]: self.set_gradient(gradient_data) + def clean_grid_cache(self, grid_file_name): + """ + Clean the grid cache + """ + local_file_path = get_cache_location( + name=grid_file_name, cache="grids", destination="" + ) + if os.path.exists(local_file_path): + print(f"Removing grid cache file: {local_file_path}") # TODO: change to log + os.remove(local_file_path) + def get_compartment_object_by_name(self, compartment_name): """ Returns compartment object by name @@ -499,7 +512,7 @@ def save_result( if not os.path.isfile(self.grid_file_out) and self.load_from_grid_file: # do not overwrite if grid was loaded from file self.grid.result_filename = self.grid_file_out - self.saveGridToFile(self.grid_file_out) + self.save_grids_to_pickle(self.grid_file_out) if save_grid_logs: self.saveGridLogsAsJson(self.result_file + "_grid-data.json") self.collectResultPerIngredient() @@ -2182,6 +2195,11 @@ def pack_grid( distances=distances, all_objects=all_objects, ) + + if kw.get("clean_grid_cache", False): + grid_file_name = str(self.previous_grid_file).split(os.path.sep)[-1] + self.clean_grid_cache(grid_file_name=grid_file_name) + return all_objects def restore_molecules_array(self, ingr): From e085763d38ec8ad3a7ca840ce1c296e6910438b9 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:30:25 -0700 Subject: [PATCH 09/25] Add clean_grid_cache option to default_values --- cellpack/autopack/loaders/config_loader.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cellpack/autopack/loaders/config_loader.py b/cellpack/autopack/loaders/config_loader.py index 647cb248..d77b5bfd 100644 --- a/cellpack/autopack/loaders/config_loader.py +++ b/cellpack/autopack/loaders/config_loader.py @@ -23,6 +23,7 @@ class Inner_Grid_Methods(MetaEnum): class ConfigLoader(object): default_values = { + "clean_grid_cache": False, "format": "simularium", "load_from_grid_file": False, "inner_grid_method": "trimesh", From 2e572410e9e943667bc1cd870da127817cba1f81 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:30:34 -0700 Subject: [PATCH 10/25] Add clean.py script to clean local cache directory --- cellpack/bin/clean.py | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 cellpack/bin/clean.py diff --git a/cellpack/bin/clean.py b/cellpack/bin/clean.py new file mode 100644 index 00000000..6a6327d6 --- /dev/null +++ b/cellpack/bin/clean.py @@ -0,0 +1,32 @@ +# cleans the local cache directory +import shutil +from cellpack.autopack import CACHE_DIR +import fire +import os + + +def clean(): + """ + Cleans the local cache directory + :return: void + """ + for _, folder in CACHE_DIR.items(): + for filename in os.listdir(folder): + file_path = os.path.join(folder, filename) + try: + if os.path.isfile(file_path) or os.path.islink(file_path): + os.unlink(file_path) + elif os.path.isdir(file_path): + shutil.rmtree(file_path) + except Exception as e: + print(f"Failed to delete {file_path}. Exception: {e}") + print("Cache cleaned") + + +# Run directly from command line +def main(): + fire.Fire(clean) + + +if __name__ == "__main__": + main() From a5b39d8297ffc535556f4da1879ae0df0add5586 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:30:46 -0700 Subject: [PATCH 11/25] Add clean_grid_cache option to test_url_load_config.json --- cellpack/tests/packing-configs/test_url_load_config.json | 1 + 1 file changed, 1 insertion(+) diff --git a/cellpack/tests/packing-configs/test_url_load_config.json b/cellpack/tests/packing-configs/test_url_load_config.json index e3bf1b2a..84cf31b0 100644 --- a/cellpack/tests/packing-configs/test_url_load_config.json +++ b/cellpack/tests/packing-configs/test_url_load_config.json @@ -2,6 +2,7 @@ "name": "test_s3_mesh_config", "out": "out/test", "save_analyze_result": true, + "clean_grid_cache": true, "save_plot_figures": true, "load_from_grid_file": true, "show_progress_bar": true, From c299197a572ae3b651ad26f6085ea5c0bb6802d0 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:33:50 -0700 Subject: [PATCH 12/25] Update clean_grid_cache flag to false --- cellpack/tests/packing-configs/test_url_load_config.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cellpack/tests/packing-configs/test_url_load_config.json b/cellpack/tests/packing-configs/test_url_load_config.json index 84cf31b0..6f94ecda 100644 --- a/cellpack/tests/packing-configs/test_url_load_config.json +++ b/cellpack/tests/packing-configs/test_url_load_config.json @@ -2,7 +2,7 @@ "name": "test_s3_mesh_config", "out": "out/test", "save_analyze_result": true, - "clean_grid_cache": true, + "clean_grid_cache": false, "save_plot_figures": true, "load_from_grid_file": true, "show_progress_bar": true, From 705a8112952c282ce856b3d4aed05b84ae64eb38 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 29 May 2024 16:58:41 -0700 Subject: [PATCH 13/25] Linting: remove unused imports --- cellpack/autopack/__init__.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index bf42b184..964f91ca 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -36,11 +36,9 @@ """ import logging import logging.config -import sys import os import re import shutil -from os import path, environ import getpass from pathlib import Path import urllib.request as urllib @@ -61,7 +59,9 @@ os.environ["NUMEXPR_MAX_THREADS"] = "32" ############################################################################### -log_file_path = path.join(path.dirname(path.abspath(__file__)), "../logging.conf") +log_file_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "../logging.conf" +) logging.config.fileConfig(log_file_path, disable_existing_loggers=False) log = logging.getLogger("autopack") log.propagate = False From f5677c3de2c1b4aaeddf2adf7fe63c7bd9dfd5cc Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Mon, 3 Jun 2024 13:08:14 -0700 Subject: [PATCH 14/25] add back sys import --- cellpack/autopack/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index b86a2194..4c560209 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -37,6 +37,7 @@ import logging import logging.config import os +import sys import re import shutil import getpass From db7d7471e1ddb7457318a259fb27120cbd41d5d3 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Tue, 11 Jun 2024 14:03:57 -0700 Subject: [PATCH 15/25] Sort imports --- cellpack/autopack/__init__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 4c560209..185d7100 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -34,25 +34,26 @@ AF @author: Ludovic Autin with editing by Graham Johnson """ +import getpass +import json import logging import logging.config import os -import sys import re import shutil -import getpass -from pathlib import Path +import ssl +import sys import urllib.request as urllib from collections import OrderedDict -import ssl -import json -from cellpack.autopack.DBRecipeHandler import DBRecipeLoader -from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS +from pathlib import Path -from cellpack.autopack.loaders.utils import read_json_file, write_json_file import boto3 import botocore +from cellpack.autopack.DBRecipeHandler import DBRecipeLoader +from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS +from cellpack.autopack.loaders.utils import read_json_file, write_json_file + packageContainsVFCommands = 1 ssl._create_default_https_context = ssl._create_unverified_context use_json_hook = True From 33c439aa638b690cf03213e81d264167aff3cb28 Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Tue, 11 Jun 2024 14:13:17 -0700 Subject: [PATCH 16/25] remove unused function --- cellpack/autopack/__init__.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 185d7100..3878458a 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -301,10 +301,6 @@ def download_file(url, local_file_path, reporthook): raise Exception(f"Url does not exist {url}") -# def is_full_url(file_path): -# return file_path.find("http") != -1 or file_path.find("ftp") != -1 - - def is_full_url(file_path): url_regex = re.compile( r"^(?:http|https|ftp|s3)://", re.IGNORECASE From ecb96ef2d8c828c21460947f84b5093f425da5d3 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Tue, 11 Jun 2024 16:53:19 -0700 Subject: [PATCH 17/25] move aws methods to AWSHandler --- cellpack/autopack/AWSHandler.py | 15 +++++++++++- cellpack/autopack/__init__.py | 42 +++++++-------------------------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index 0bbecf0b..f9f5d323 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -19,7 +19,7 @@ def __init__( self, bucket_name, sub_folder_name=None, - region_name="us-west-2", + region_name=None, ): self.bucket_name = bucket_name self.folder_name = sub_folder_name @@ -125,3 +125,16 @@ def save_file_and_get_url(self, file_path): print(f"AWS credentials are not configured, details:{e}") return None, None return None, None + + def download_file_from_s3(self, key, local_file_path): + try: + self.s3_client.download_file(self.bucket_name, key, local_file_path) + print("File downloaded successfully.") + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + + def is_s3_url(self, file_path): + return file_path.find("s3://") != -1 diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 3878458a..7805b0cf 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -250,20 +250,6 @@ def updateReplacePath(newPaths): REPLACE_PATH[w[0]] = w[1] -def download_file_from_s3(s3_uri, local_file_path): - s3_client = boto3.client("s3") - bucket_name, key = parse_s3_uri(s3_uri) - - try: - s3_client.download_file(bucket_name, key, local_file_path) - print("File downloaded successfully.") - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") - - def parse_s3_uri(s3_uri): # Remove the "s3://" prefix and split the remaining string into bucket name and key s3_uri = s3_uri.replace("s3://", "") @@ -275,23 +261,15 @@ def parse_s3_uri(s3_uri): return bucket_name, folder, key -def download_file(url, local_file_path, reporthook): - if is_s3_url(url): - # download from s3 - # bucket_name, folder, key = parse_s3_uri(url) - # s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # s3_handler = s3_handler(bucket_name, folder) - s3_client = boto3.client("s3") +def download_file(url, local_file_path, reporthook, database_name="aws"): + if database_name == "aws": + db = DATABASE_IDS.handlers().get(database_name) bucket_name, folder, key = parse_s3_uri(url) - try: - s3_client.download_file(bucket_name, f"{folder}/{key}", local_file_path) - print("File downloaded successfully.") - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") - + initialize_db = db( + bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" + ) + if initialize_db.is_s3_url(url): + initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) @@ -308,10 +286,6 @@ def is_full_url(file_path): return re.match(url_regex, file_path) is not None -def is_s3_url(file_path): - return file_path.find("s3://") != -1 - - def is_remote_path(file_path): """ @param file_path: str From 6500445be87925848c5efc67535d9e3bc3be48f7 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Tue, 11 Jun 2024 17:28:23 -0700 Subject: [PATCH 18/25] move s3 url check back to autopack --- cellpack/autopack/AWSHandler.py | 3 --- cellpack/autopack/__init__.py | 9 ++++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index f9f5d323..eaddd782 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -135,6 +135,3 @@ def download_file_from_s3(self, key, local_file_path): print("The object does not exist.") else: print("An error occurred while downloading the file.") - - def is_s3_url(self, file_path): - return file_path.find("s3://") != -1 diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 7805b0cf..51996af2 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -261,15 +261,18 @@ def parse_s3_uri(s3_uri): return bucket_name, folder, key +def is_s3_url(file_path): + return file_path.find("s3://") != -1 + + def download_file(url, local_file_path, reporthook, database_name="aws"): - if database_name == "aws": + if is_s3_url(url): db = DATABASE_IDS.handlers().get(database_name) bucket_name, folder, key = parse_s3_uri(url) initialize_db = db( bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" ) - if initialize_db.is_s3_url(url): - initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) + initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) From 1156f61c72ead6e97a49278ca5ebf078d0941a2c Mon Sep 17 00:00:00 2001 From: Saurabh Mogre Date: Wed, 12 Jun 2024 10:12:02 -0700 Subject: [PATCH 19/25] Update recipe and config --- .../tests/packing-configs/test_url_load_config.json | 10 +++------- cellpack/tests/recipes/v2/test_url_load.json | 4 ++-- 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/cellpack/tests/packing-configs/test_url_load_config.json b/cellpack/tests/packing-configs/test_url_load_config.json index 6f94ecda..0039a028 100644 --- a/cellpack/tests/packing-configs/test_url_load_config.json +++ b/cellpack/tests/packing-configs/test_url_load_config.json @@ -1,11 +1,7 @@ { - "name": "test_s3_mesh_config", - "out": "out/test", - "save_analyze_result": true, + "name": "test_url_load_config", "clean_grid_cache": false, - "save_plot_figures": true, "load_from_grid_file": true, - "show_progress_bar": true, - "upload_results": false, - "open_in_browser": false + "out": "cellpack/tests/outputs", + "save_analyze_result": true } \ No newline at end of file diff --git a/cellpack/tests/recipes/v2/test_url_load.json b/cellpack/tests/recipes/v2/test_url_load.json index 5b0ae878..b1abb816 100644 --- a/cellpack/tests/recipes/v2/test_url_load.json +++ b/cellpack/tests/recipes/v2/test_url_load.json @@ -1,7 +1,7 @@ { "version": "1.0.0", "format_version": "2.1", - "name": "test_s3_mesh", + "name": "test_url_loading", "bounding_box": [ [ -5, @@ -85,5 +85,5 @@ } } }, - "grid_file_path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/grids/test_s3_mesh_test_s3_mesh_config_1.0.0_grid.dat" + "grid_file_path": "https://cellpack-results.s3.us-west-2.amazonaws.com/data/grids/nested_mesh_grid.dat" } \ No newline at end of file From ffd573599a81833bd636777efde39031cbe0695c Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Wed, 12 Jun 2024 12:56:19 -0700 Subject: [PATCH 20/25] rename function and add docstring --- cellpack/autopack/AWSHandler.py | 26 ++++++++++++++++---------- cellpack/autopack/__init__.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index eaddd782..48f7fd62 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -69,6 +69,22 @@ def upload_file(self, file_path): return False return file_name + def download_file(self, key, local_file_path): + """ + Download a file from S3 + :param key: S3 object key + :param local_file_path: Local file path to save the downloaded file + """ + + try: + self.s3_client.download_file(self.bucket_name, key, local_file_path) + print("File downloaded successfully.") + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + def create_presigned_url(self, object_name, expiration=3600): """Generate a presigned URL to share an S3 object :param object_name: string @@ -125,13 +141,3 @@ def save_file_and_get_url(self, file_path): print(f"AWS credentials are not configured, details:{e}") return None, None return None, None - - def download_file_from_s3(self, key, local_file_path): - try: - self.s3_client.download_file(self.bucket_name, key, local_file_path) - print("File downloaded successfully.") - except ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 51996af2..c8ff6b75 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -272,7 +272,7 @@ def download_file(url, local_file_path, reporthook, database_name="aws"): initialize_db = db( bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" ) - initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) + initialize_db.download_file(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) From 65f0bbf6c3e0b902a4a25d68efe805ca0514b18a Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Wed, 12 Jun 2024 12:59:59 -0700 Subject: [PATCH 21/25] remove unused imports --- cellpack/autopack/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index c8ff6b75..3ff238da 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -47,8 +47,6 @@ from collections import OrderedDict from pathlib import Path -import boto3 -import botocore from cellpack.autopack.DBRecipeHandler import DBRecipeLoader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS From f1d4619f7489555dd11bdbc13e769f03d0670baa Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Tue, 11 Jun 2024 16:53:19 -0700 Subject: [PATCH 22/25] move aws methods to AWSHandler --- cellpack/autopack/AWSHandler.py | 15 +++++++++++- cellpack/autopack/__init__.py | 42 +++++++-------------------------- 2 files changed, 22 insertions(+), 35 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index 0bbecf0b..f9f5d323 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -19,7 +19,7 @@ def __init__( self, bucket_name, sub_folder_name=None, - region_name="us-west-2", + region_name=None, ): self.bucket_name = bucket_name self.folder_name = sub_folder_name @@ -125,3 +125,16 @@ def save_file_and_get_url(self, file_path): print(f"AWS credentials are not configured, details:{e}") return None, None return None, None + + def download_file_from_s3(self, key, local_file_path): + try: + self.s3_client.download_file(self.bucket_name, key, local_file_path) + print("File downloaded successfully.") + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + + def is_s3_url(self, file_path): + return file_path.find("s3://") != -1 diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 3878458a..7805b0cf 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -250,20 +250,6 @@ def updateReplacePath(newPaths): REPLACE_PATH[w[0]] = w[1] -def download_file_from_s3(s3_uri, local_file_path): - s3_client = boto3.client("s3") - bucket_name, key = parse_s3_uri(s3_uri) - - try: - s3_client.download_file(bucket_name, key, local_file_path) - print("File downloaded successfully.") - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") - - def parse_s3_uri(s3_uri): # Remove the "s3://" prefix and split the remaining string into bucket name and key s3_uri = s3_uri.replace("s3://", "") @@ -275,23 +261,15 @@ def parse_s3_uri(s3_uri): return bucket_name, folder, key -def download_file(url, local_file_path, reporthook): - if is_s3_url(url): - # download from s3 - # bucket_name, folder, key = parse_s3_uri(url) - # s3_handler = DATABASE_IDS.handlers().get(DATABASE_IDS.AWS) - # s3_handler = s3_handler(bucket_name, folder) - s3_client = boto3.client("s3") +def download_file(url, local_file_path, reporthook, database_name="aws"): + if database_name == "aws": + db = DATABASE_IDS.handlers().get(database_name) bucket_name, folder, key = parse_s3_uri(url) - try: - s3_client.download_file(bucket_name, f"{folder}/{key}", local_file_path) - print("File downloaded successfully.") - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") - + initialize_db = db( + bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" + ) + if initialize_db.is_s3_url(url): + initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) @@ -308,10 +286,6 @@ def is_full_url(file_path): return re.match(url_regex, file_path) is not None -def is_s3_url(file_path): - return file_path.find("s3://") != -1 - - def is_remote_path(file_path): """ @param file_path: str From 43becb902b2144cbac4dfc77cc6cccb95ed67ad1 Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Tue, 11 Jun 2024 17:28:23 -0700 Subject: [PATCH 23/25] move s3 url check back to autopack --- cellpack/autopack/AWSHandler.py | 3 --- cellpack/autopack/__init__.py | 9 ++++++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index f9f5d323..eaddd782 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -135,6 +135,3 @@ def download_file_from_s3(self, key, local_file_path): print("The object does not exist.") else: print("An error occurred while downloading the file.") - - def is_s3_url(self, file_path): - return file_path.find("s3://") != -1 diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 7805b0cf..51996af2 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -261,15 +261,18 @@ def parse_s3_uri(s3_uri): return bucket_name, folder, key +def is_s3_url(file_path): + return file_path.find("s3://") != -1 + + def download_file(url, local_file_path, reporthook, database_name="aws"): - if database_name == "aws": + if is_s3_url(url): db = DATABASE_IDS.handlers().get(database_name) bucket_name, folder, key = parse_s3_uri(url) initialize_db = db( bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" ) - if initialize_db.is_s3_url(url): - initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) + initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) From f826346e53b8f59f5b42623ce417bedbe5326e8f Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Wed, 12 Jun 2024 12:56:19 -0700 Subject: [PATCH 24/25] rename function and add docstring --- cellpack/autopack/AWSHandler.py | 26 ++++++++++++++++---------- cellpack/autopack/__init__.py | 2 +- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cellpack/autopack/AWSHandler.py b/cellpack/autopack/AWSHandler.py index eaddd782..48f7fd62 100644 --- a/cellpack/autopack/AWSHandler.py +++ b/cellpack/autopack/AWSHandler.py @@ -69,6 +69,22 @@ def upload_file(self, file_path): return False return file_name + def download_file(self, key, local_file_path): + """ + Download a file from S3 + :param key: S3 object key + :param local_file_path: Local file path to save the downloaded file + """ + + try: + self.s3_client.download_file(self.bucket_name, key, local_file_path) + print("File downloaded successfully.") + except ClientError as e: + if e.response["Error"]["Code"] == "404": + print("The object does not exist.") + else: + print("An error occurred while downloading the file.") + def create_presigned_url(self, object_name, expiration=3600): """Generate a presigned URL to share an S3 object :param object_name: string @@ -125,13 +141,3 @@ def save_file_and_get_url(self, file_path): print(f"AWS credentials are not configured, details:{e}") return None, None return None, None - - def download_file_from_s3(self, key, local_file_path): - try: - self.s3_client.download_file(self.bucket_name, key, local_file_path) - print("File downloaded successfully.") - except ClientError as e: - if e.response["Error"]["Code"] == "404": - print("The object does not exist.") - else: - print("An error occurred while downloading the file.") diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index 51996af2..c8ff6b75 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -272,7 +272,7 @@ def download_file(url, local_file_path, reporthook, database_name="aws"): initialize_db = db( bucket_name=bucket_name, sub_folder_name=folder, region_name="us-west-2" ) - initialize_db.download_file_from_s3(f"{folder}/{key}", local_file_path) + initialize_db.download_file(f"{folder}/{key}", local_file_path) elif url_exists(url): try: urllib.urlretrieve(url, local_file_path, reporthook=reporthook) From 9cc8362476380cef271f7309fcc1aad62a12009e Mon Sep 17 00:00:00 2001 From: Ruge Li Date: Wed, 12 Jun 2024 12:59:59 -0700 Subject: [PATCH 25/25] remove unused imports --- cellpack/autopack/__init__.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cellpack/autopack/__init__.py b/cellpack/autopack/__init__.py index c8ff6b75..3ff238da 100755 --- a/cellpack/autopack/__init__.py +++ b/cellpack/autopack/__init__.py @@ -47,8 +47,6 @@ from collections import OrderedDict from pathlib import Path -import boto3 -import botocore from cellpack.autopack.DBRecipeHandler import DBRecipeLoader from cellpack.autopack.interface_objects.database_ids import DATABASE_IDS