Skip to content

Commit

Permalink
Merge in main
Browse files Browse the repository at this point in the history
  • Loading branch information
golharam committed Feb 13, 2025
2 parents 7351e85 + 4bd6e11 commit 48d37b2
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 9 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

# Ignore virtual environment directories
venv/
env/
*env/

# Ignore __pycache__ directories
**/__pycache__/
Expand Down
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

## [v0.2.3](https://github.com/NGS360/PAML.git/releases/tag/v0.2.3) - 2025-02-12

- Fix removal of collection.all_files() from Arvados Python SDK.

## [v0.2.2](https://github.com/NGS360/PAML.git/releases/tag/v0.2.2) - 2025-01-27

- Downgraded support for arvados-python to 2.7.4

## [v0.3-rc3](https://github.com/NGS360/PAML.git/releases/tag/v0.3-rc3) - 2024-12-16

### Changed
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "cwl_platform"
version = "0.2.2"
version = "0.2.3"
authors = [
{ name="Ryan Golhar", email="[email protected]" }
]
Expand Down
26 changes: 20 additions & 6 deletions src/cwl_platform/arvados_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,21 @@ def default(self, o):
return o.to_dict()
return super().default(o)


class StreamFileReader(arvados.arvfile.ArvadosFileReader):
class _NameAttribute(str):
# The Python file API provides a plain .name attribute.
# Older SDK provided a name() method.
# This class provides both, for maximum compatibility.
def __call__(self):
return self

def __init__(self, arvadosfile):
super(StreamFileReader, self).__init__(arvadosfile)
self.name = self._NameAttribute(arvadosfile.name)

def stream_name(self):
return super().stream_name().lstrip("./")

# custom JSON decoder
def arvados_task_decoder(obj):
"""Arvados Task Decoder class"""
Expand Down Expand Up @@ -131,11 +145,11 @@ def _all_files(self, root_collection):
subcollection = root_collection.find(str(stream_path))
for name, item in subcollection.items():
if isinstance(item, arvados.arvfile.ArvadosFile):
yield (stream_path / name, item)
yield StreamFileReader(item)
else:
stream_queue.append(stream_path / name)

def _get_files_list_in_collection(
def _get_files_in_collection(
self, collection_uuid, subdirectory_path=None, filters=None
):
"""
Expand Down Expand Up @@ -318,13 +332,13 @@ def copy_folder(self, source_project, source_folder, destination_project):
self.logger.debug(
"Get list of files in source collection, %s", source_collection["uuid"]
)
source_files = self._get_files_list_in_collection(source_collection["uuid"])
source_files = self._get_files_in_collection(source_collection["uuid"])
self.logger.debug(
"Getting list of files in destination collection, %s",
destination_collection["uuid"],
)
destination_files = list(
self._get_files_list_in_collection(destination_collection["uuid"])
self._get_files_in_collection(destination_collection["uuid"])
)

source_collection = arvados.collection.Collection(source_collection["uuid"])
Expand Down Expand Up @@ -609,7 +623,7 @@ def get_files(self, project, filters=None):
len(matching_collections),
collection["uuid"],
)
files += self._get_files_list_in_collection(
files += self._get_files_in_collection(
collection["uuid"], filters=filters
)
self.logger.debug("Return list of %d files", len(files))
Expand Down
150 changes: 149 additions & 1 deletion tests/test_arvados_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import mock
from mock import MagicMock

from cwl_platform.arvados_platform import ArvadosPlatform, ArvadosTask
from cwl_platform.arvados_platform import ArvadosPlatform, ArvadosTask, StreamFileReader

class TestArvadosPlaform(unittest.TestCase):
'''
Expand Down Expand Up @@ -119,5 +119,153 @@ def test_detect_platform(self):
os.environ['ARVADOS_API_HOST'] = 'some host'
self.assertTrue(ArvadosPlatform.detect())

@mock.patch("arvados.collection.Collection") # Ensure this patch decorator is correctly placed
@mock.patch("cwl_platform.arvados_platform.ArvadosPlatform._get_files_list_in_collection")
def test_copy_folder_success(self, mock_get_files_list, mock_collection):
''' Test copy_folder method with file streaming'''
# Mocking the source collection
# Mocking the API responses for finding the source and destination collections
mock_collection.return_value = MagicMock()
source_collection = {
'items': [
{
'uuid': 'source-uuid',
'name': 'source-folder',
'description': 'source collection'
}
]
}
self.platform.api.collections().list.return_value.execute.return_value = source_collection

# Mocking the destination collection empty
# Mocking the API responses for finding the source and destination collections
destination_collection = {
'items': [
{
'uuid': 'destination-uuid',
'name': 'source-folder',
'description': 'destination collection',
"preserve_version": True
}
]
}
self.platform.api.collections().list.return_value.execute.side_effect = [
source_collection, destination_collection]

# Simulate the files being streamed using StreamFileReader
file1_stream = MagicMock()
file1_stream.name = "file1.txt"
file1_stream.stream_name.return_value = "stream1"
file1_stream.read.return_value = b'file1 contents'

file2_stream = MagicMock()
file2_stream.name = "file2.txt"
file2_stream.stream_name.return_value = "stream2"
file2_stream.read.return_value = b'file2 contents'

# Wrap the mock streams with StreamFileReader
file1_reader = StreamFileReader(file1_stream)
file2_reader = StreamFileReader(file2_stream)

# Mock _get_files_list_in_collection to return the file readers (file-like objects)
mock_get_files_list.return_value = [file1_reader, file2_reader]

# Call the copy_folder method
source_project = {"uuid": "source-uuid"}
destination_project = {"uuid": "destination-uuid"}
source_folder = "source-folder"

result = self.platform.copy_folder(source_project, source_folder, destination_project)

# Assertions
self.assertIsNotNone(result) # Ensure the result is not None
self.assertEqual(result['uuid'], 'destination-uuid') # Ensure we got the correct destination UUID
# Ensure the collection listing function was called twice
self.assertEqual(self.platform.api.collections().list.call_count, 2)
self.assertEqual(mock_get_files_list.call_count, 2) # Ensure the file listing function was called twice

def test_copy_folder_source_collection_notfound(self):
''' Test copy_folder method with file streaming when the source collection is NOT found'''
# Mocking the source collection empty
# Mocking the API responses for finding the source and destination collections
self.platform.api.collections().list.return_value.execute.return_value = {'items': []}

# Call the copy_folder method
source_project = {"uuid": "source-uuid"}
destination_project = {"uuid": "destination-uuid"}
source_folder = "source-folder"

result = self.platform.copy_folder(source_project, source_folder, destination_project)

# Assertions
self.assertIsNone(result)

@mock.patch("arvados.collection.Collection") # Ensure this patch decorator is correctly placed
@mock.patch("cwl_platform.arvados_platform.ArvadosPlatform._get_files_list_in_collection")
def test_copy_folder_create_destination_collection(self, mock_get_files_list, mock_collection):
''' Test copy_folder method with file streaming to CREATE the destination collection'''
# Mocking the source collection
# Mocking the API responses for finding the source and destination collections
mock_collection.return_value = MagicMock()
source_collection = {
'items': [
{
'uuid': 'source-uuid',
'name': 'source-folder',
'description': 'source collection'
}
]
}
self.platform.api.collections().list.return_value.execute.return_value = source_collection

# Mocking the destination collection empty
# Mocking the API responses for finding the source and destination collections
destination_collection = {
'items': []
}
self.platform.api.collections().list.return_value.execute.side_effect = [
source_collection, destination_collection]

self.platform.api.collections().create.return_value.execute.return_value = {
"uuid": "destination-uuid",
"name": "source-folder",
"description": "destination collection",
"preserve_version": True,
}

# Simulate the files being streamed using StreamFileReader
file1_stream = MagicMock()
file1_stream.name = "file1.txt"
file1_stream.stream_name.return_value = "stream1"
file1_stream.read.return_value = b'file1 contents'

file2_stream = MagicMock()
file2_stream.name = "file2.txt"
file2_stream.stream_name.return_value = "stream2"
file2_stream.read.return_value = b'file2 contents'

# Wrap the mock streams with StreamFileReader
file1_reader = StreamFileReader(file1_stream)
file2_reader = StreamFileReader(file2_stream)

# Mock _get_files_list_in_collection to return the file readers (file-like objects)
mock_get_files_list.return_value = [file1_reader, file2_reader]

# Call the copy_folder method
source_project = {"uuid": "source-uuid"}
destination_project = {"uuid": "destination-uuid"}
source_folder = "source-folder"

result = self.platform.copy_folder(source_project, source_folder, destination_project)

# Assertions
self.assertIsNotNone(result) # Ensure the result is not None
self.assertEqual(result['uuid'], 'destination-uuid') # Ensure we got the correct destination UUID
# Ensure the collection listing function was called once
self.assertEqual(self.platform.api.collections().list.call_count, 2)
# Ensure the collection creating function was called once
self.assertEqual(self.platform.api.collections().create.call_count, 1)
self.assertEqual(mock_get_files_list.call_count, 2) # Ensure the file listing function was called twice

if __name__ == '__main__':
unittest.main()

0 comments on commit 48d37b2

Please sign in to comment.