Skip to content

Commit

Permalink
LocalDataset: Added support for smart push and pull
Browse files Browse the repository at this point in the history
  • Loading branch information
girgink committed Mar 22, 2023
1 parent c117c71 commit 586fbd6
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/fairly/dataset/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def get_files(self, refresh: bool=False) -> Dict[str, File]:
Returns:
Dictionary of files of the dataset (key = path, value = File object)
"""
if self._files is None or refresh:
if self._files is None or refresh or self.auto_refresh:
files = {}
for file in self._get_files():
files[file.path] = file
Expand Down
83 changes: 81 additions & 2 deletions src/fairly/dataset/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ class LocalDataset(Dataset):

_regexps: Dict = {}

def __init__(self, path: str, auto_refresh: bool=False):
def __init__(self, path: str, auto_refresh: bool=True):
"""Initializes LocalDataset object.
Args:
Expand Down Expand Up @@ -551,4 +551,83 @@ def synchronize(self, source, notify: Callable=None) -> None:

def reproduce(self) -> LocalDataset:
"""Reproduces an actual copy of the dataset."""
return LocalDataset(self.path)
return LocalDataset(self.path)


def reproduce(self) -> LocalDataset:
"""Reproduces an actual copy of the dataset."""
return LocalDataset(self.path)


def get_remote_dataset(self, remote=None) -> RemoteDataset:
if isinstance(remote, RemoteDataset):
return remote

elif not remote and self.metadata.get("doi"):
return fairly.dataset(self.metadata["doi"])

else:
remote_datasets = self.remote_datasets
if remote in remote_datasets:
return remote_datasets[remote]

elif remote_datasets:
return list(remote_datasets.values())[0]

return None


def push(self, target=None, notify: Callable=None) -> RemoteDataset:
remote = self.get_remote_dataset(target)
if not remote:
raise ValueError("No target dataset")

diff = self.diff_metadata(remote)
if diff:
remote.set_metadata(**self.metadata)
remote.save_metadata()

diff = self.diff_files(remote)
if diff:
client = remote.client
for file in diff.added.values():
client.upload_file(remote, file, notify=notify)

for file in diff.removed.values():
client.delete_file(remote, file)

for file, remote_file in diff.modified.values():
client.delete_file(remote, remote_file)
client.upload_file(remote, file)

remote.get_files(refresh=True)

return remote


def pull(self, source=None, notify: Callable=None) -> None:
remote = self.get_remote_dataset(source)
if not remote:
raise ValueError("No source dataset")

diff = remote.diff_metadata(self)
if diff:
self.set_metadata(**remote.metadata)
self.save_metadata()

diff = remote.diff_files(self)
if diff:
client = remote.client
for file in diff.added.values():
client.download_file(file, path=self.path, notify=notify)

for file in diff.removed.values():
os.remove(file.fullpath)

for file, remote_file in diff.modified.values():
os.remove(file.fullpath)
client.download_file(remote_file, path=self.path, notify=notify)

self.get_files(refresh=True)

return remote
2 changes: 1 addition & 1 deletion src/fairly/dataset/remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class RemoteDataset(Dataset):
"""

def __init__(self, client, id=None, auto_refresh: bool=False, details: Dict=None, **kwargs):
def __init__(self, client, id=None, auto_refresh: bool=True, details: Dict=None, **kwargs):
"""Initializes RemoteDataset object.
Args:
Expand Down

0 comments on commit 586fbd6

Please sign in to comment.