From 845a9e62594e84a0f51d0c7f026c51862e3d475e Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 10:16:19 +0000 Subject: [PATCH 1/6] [HWORKS-800] Implement dataset.copy and dataset.move API --- python/hopsworks/core/dataset_api.py | 60 ++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index c18f71f9e..34c02576d 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -388,3 +388,63 @@ def mkdir(self, path: str): return _client._send_request( "POST", path_params, headers=headers, query_params=query_params )["attributes"]["path"] + + def copy(self, source_path: str, destination_path: str): + """Copy a file or directory in the Hopsworks Filesystem. + + ```python + + import hopsworks + + project = hopsworks.login() + + dataset_api = project.get_dataset_api() + + directory_path = dataset_api.copy("Resources/myfile.txt", "Logs/myfile.txt") + + ``` + # Arguments + source_path: the source path to copy + destination_path: the destination path + # Raises + `RestAPIError`: If unable to perform the copy + """ + _client = client.get_instance() + path_params = ["project", self._project_id, "dataset", path] + query_params = { + "action": "copy", + "destination_path": destination_path, + } + _client._send_request( + "POST", path_params, query_params=query_params + )["attributes"]["path"] + + def move(self, source_path: str, destination_path: str): + """Move a file or directory in the Hopsworks Filesystem. + + ```python + + import hopsworks + + project = hopsworks.login() + + dataset_api = project.get_dataset_api() + + directory_path = dataset_api.move("Resources/myfile.txt", "Logs/myfile.txt") + + ``` + # Arguments + source_path: the source path to move + destination_path: the destination path + # Raises + `RestAPIError`: If unable to perform the move + """ + _client = client.get_instance() + path_params = ["project", self._project_id, "dataset", path] + query_params = { + "action": "move", + "destination_path": destination_path, + } + _client._send_request( + "POST", path_params, query_params=query_params + )["attributes"]["path"] \ No newline at end of file From 84f78f12378626bcc29658426c0ed8d8034dc5fe Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 10:19:38 +0000 Subject: [PATCH 2/6] remove wrong code --- python/hopsworks/core/dataset_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index 34c02576d..5cc2fa308 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -417,7 +417,7 @@ def copy(self, source_path: str, destination_path: str): } _client._send_request( "POST", path_params, query_params=query_params - )["attributes"]["path"] + ) def move(self, source_path: str, destination_path: str): """Move a file or directory in the Hopsworks Filesystem. @@ -447,4 +447,4 @@ def move(self, source_path: str, destination_path: str): } _client._send_request( "POST", path_params, query_params=query_params - )["attributes"]["path"] \ No newline at end of file + ) \ No newline at end of file From dcdb1809974b75ae03972c5dddda637e0b881bb9 Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 11:08:41 +0000 Subject: [PATCH 3/6] fix --- python/hopsworks/core/dataset_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index 5cc2fa308..9fb29e73c 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -400,7 +400,7 @@ def copy(self, source_path: str, destination_path: str): dataset_api = project.get_dataset_api() - directory_path = dataset_api.copy("Resources/myfile.txt", "Logs/myfile.txt") + directory_path = dataset_api.copy("Resources/myfile.txt", "Logs") ``` # Arguments @@ -410,7 +410,7 @@ def copy(self, source_path: str, destination_path: str): `RestAPIError`: If unable to perform the copy """ _client = client.get_instance() - path_params = ["project", self._project_id, "dataset", path] + path_params = ["project", self._project_id, "dataset", source_path] query_params = { "action": "copy", "destination_path": destination_path, @@ -430,7 +430,7 @@ def move(self, source_path: str, destination_path: str): dataset_api = project.get_dataset_api() - directory_path = dataset_api.move("Resources/myfile.txt", "Logs/myfile.txt") + directory_path = dataset_api.move("Resources/myfile.txt", "Logs") ``` # Arguments @@ -440,7 +440,7 @@ def move(self, source_path: str, destination_path: str): `RestAPIError`: If unable to perform the move """ _client = client.get_instance() - path_params = ["project", self._project_id, "dataset", path] + path_params = ["project", self._project_id, "dataset", source_path] query_params = { "action": "move", "destination_path": destination_path, From b8639eeb50fb39a673a95c2c9d691685e337bddc Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 12:01:26 +0000 Subject: [PATCH 4/6] fix --- python/hopsworks/core/dataset_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index 9fb29e73c..eb7fcae56 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -400,7 +400,7 @@ def copy(self, source_path: str, destination_path: str): dataset_api = project.get_dataset_api() - directory_path = dataset_api.copy("Resources/myfile.txt", "Logs") + directory_path = dataset_api.copy("Resources/myfile.txt", "Logs/myfile.txt") ``` # Arguments @@ -430,7 +430,7 @@ def move(self, source_path: str, destination_path: str): dataset_api = project.get_dataset_api() - directory_path = dataset_api.move("Resources/myfile.txt", "Logs") + directory_path = dataset_api.move("Resources/myfile.txt", "Logs/myfile.txt") ``` # Arguments From 8d79ce1760fb9dfbf74a0e81483e3bc4f944fc42 Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 13:08:05 +0000 Subject: [PATCH 5/6] fix --- python/hopsworks/core/dataset_api.py | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index eb7fcae56..9ea083d29 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -389,7 +389,7 @@ def mkdir(self, path: str): "POST", path_params, headers=headers, query_params=query_params )["attributes"]["path"] - def copy(self, source_path: str, destination_path: str): + def copy(self, source_path: str, destination_path: str, overwrite: bool = False): """Copy a file or directory in the Hopsworks Filesystem. ```python @@ -406,9 +406,20 @@ def copy(self, source_path: str, destination_path: str): # Arguments source_path: the source path to copy destination_path: the destination path + overwrite: overwrite destination if exists # Raises `RestAPIError`: If unable to perform the copy """ + if self.exists(destination_path): + if overwrite: + self.remove(destination_path) + else: + raise DatasetException( + "{} already exists, set overwrite=True to overwrite it".format( + destination_path + ) + ) + _client = client.get_instance() path_params = ["project", self._project_id, "dataset", source_path] query_params = { @@ -419,7 +430,7 @@ def copy(self, source_path: str, destination_path: str): "POST", path_params, query_params=query_params ) - def move(self, source_path: str, destination_path: str): + def move(self, source_path: str, destination_path: str, overwrite: bool = False): """Move a file or directory in the Hopsworks Filesystem. ```python @@ -436,9 +447,21 @@ def move(self, source_path: str, destination_path: str): # Arguments source_path: the source path to move destination_path: the destination path + overwrite: overwrite destination if exists # Raises `RestAPIError`: If unable to perform the move """ + + if self.exists(destination_path): + if overwrite: + self.remove(destination_path) + else: + raise DatasetException( + "{} already exists, set overwrite=True to overwrite it".format( + destination_path + ) + ) + _client = client.get_instance() path_params = ["project", self._project_id, "dataset", source_path] query_params = { From 60ce46bef6591c75cbd55b7ab58f89703c637882 Mon Sep 17 00:00:00 2001 From: Robin Andersson Date: Wed, 18 Oct 2023 15:07:50 +0000 Subject: [PATCH 6/6] fix formatting --- python/hopsworks/core/dataset_api.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/python/hopsworks/core/dataset_api.py b/python/hopsworks/core/dataset_api.py index 9ea083d29..285083cac 100644 --- a/python/hopsworks/core/dataset_api.py +++ b/python/hopsworks/core/dataset_api.py @@ -426,9 +426,7 @@ def copy(self, source_path: str, destination_path: str, overwrite: bool = False) "action": "copy", "destination_path": destination_path, } - _client._send_request( - "POST", path_params, query_params=query_params - ) + _client._send_request("POST", path_params, query_params=query_params) def move(self, source_path: str, destination_path: str, overwrite: bool = False): """Move a file or directory in the Hopsworks Filesystem. @@ -468,6 +466,4 @@ def move(self, source_path: str, destination_path: str, overwrite: bool = False) "action": "move", "destination_path": destination_path, } - _client._send_request( - "POST", path_params, query_params=query_params - ) \ No newline at end of file + _client._send_request("POST", path_params, query_params=query_params)