Skip to content

Commit

Permalink
Merge pull request #12500 from nuwang/papercut_file_sources
Browse files Browse the repository at this point in the history
File sources for gdrive, gcs, onedata, basespace
  • Loading branch information
mvdbeek authored Dec 21, 2021
2 parents 9824889 + 0924628 commit 9c92c1b
Show file tree
Hide file tree
Showing 20 changed files with 315 additions and 81 deletions.
12 changes: 12 additions & 0 deletions lib/galaxy/dependencies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,18 @@ def check_s3fs(self):
# use s3fs directly (skipping pyfilesystem) for direct access to more options
return 's3fs' in self.file_sources

def check_fs_googledrivefs(self):
return 'googledrive' in self.file_sources

def check_fs_gcsfs(self):
return 'googlecloudstorage' in self.file_sources

def check_fs_onedatafs(self):
return 'onedata' in self.file_sources

def check_fs_basespace(self):
return 'basespace' in self.file_sources

def check_watchdog(self):
install_set = {'auto', 'True', 'true', 'polling', True}
return (self.config['watch_tools'] in install_set
Expand Down
4 changes: 4 additions & 0 deletions lib/galaxy/dependencies/conditional-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,10 @@ fs.sshfs # type: ssh
fs-s3fs # type: s3
s3fs # type: s3fs
fs.anvilfs # type: anvil
fs.googledrivefs # type: googledrive
fs-gcsfs # type: googlecloudstorage
fs-onedatafs # type: onedata
fs-basespace # type: basespace

# Chronos client
chronos-python==1.2.1
Expand Down
20 changes: 20 additions & 0 deletions lib/galaxy/files/sources/basespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
try:
from fs_basespace import BASESPACEFS
except ImportError:
BASESPACEFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class BaseSpaceFilesSource(PyFilesystem2FilesSource):
plugin_type = 'basespace'
required_module = BASESPACEFS
required_package = "fs-basespace"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
handle = BASESPACEFS(**props)
return handle


__all__ = ('BaseSpaceFilesSource',)
30 changes: 30 additions & 0 deletions lib/galaxy/files/sources/googlecloudstorage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
try:
from fs_gcsfs import GCSFS
from google.cloud.storage import Client
from google.oauth2.credentials import Credentials
except ImportError:
GCSFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class GoogleCloudStorageFilesSource(PyFilesystem2FilesSource):
plugin_type = 'googlecloudstorage'
required_module = GCSFS
required_package = "fs-gcsfs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
bucket_name = props.pop('bucket_name', None)
root_path = props.pop('root_path', None)
project = props.pop('project', None)
args = {}
if props.get('anonymous'):
args['client'] = Client.create_anonymous_client()
elif props.get('token'):
args['client'] = Client(project=project, credentials=Credentials(**props))
handle = GCSFS(bucket_name, root_path=root_path, retry=0, **args)
return handle


__all__ = ('GoogleCloudStorageFilesSource',)
22 changes: 22 additions & 0 deletions lib/galaxy/files/sources/googledrive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
try:
from fs.googledrivefs import GoogleDriveFS
from google.oauth2.credentials import Credentials
except ImportError:
GoogleDriveFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class GoogleDriveFilesSource(PyFilesystem2FilesSource):
plugin_type = 'googledrive'
required_module = GoogleDriveFS
required_package = "fs.googledrivefs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
credentials = Credentials(**props)
handle = GoogleDriveFS(credentials)
return handle


__all__ = ('GoogleDriveFilesSource',)
20 changes: 20 additions & 0 deletions lib/galaxy/files/sources/onedata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
try:
from fs.onedatafs import OnedataFS
except ImportError:
OnedataFS = None

from ._pyfilesystem2 import PyFilesystem2FilesSource


class OneDataFilesSource(PyFilesystem2FilesSource):
plugin_type = 'onedata'
required_module = OnedataFS
required_package = "fs-onedatafs"

def _open_fs(self, user_context):
props = self._serialization_props(user_context)
handle = OnedataFS(**props)
return handle


__all__ = ('OneDataFilesSource',)
1 change: 1 addition & 0 deletions packages/files/test-requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
pytest
fs-gcsfs
88 changes: 63 additions & 25 deletions test/unit/files/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from galaxy.files import (
ConfiguredFileSources,
ConfiguredFileSourcesConfig,
DictFileSourcesUserContext,
)

Expand Down Expand Up @@ -55,6 +56,21 @@ def user_context_fixture(user_ftp_dir=None, role_names=None, group_names=None, i
preferences={
'webdav|password': 'secret1234',
'dropbox|access_token': os.environ.get('GALAXY_TEST_DROPBOX_ACCESS_TOKEN'),
'googledrive|client_id': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_CLIENT_ID'),
'googledrive|client_secret': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_CLIENT_SECRET'),
'googledrive|access_token': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_ACCESS_TOKEN'),
'googledrive|refresh_token': os.environ.get('GALAXY_TEST_GOOGLE_DRIVE_REFRESH_TOKEN'),
'googlecloudstorage|project': os.environ.get('GALAXY_TEST_GCS_PROJECT'),
'googlecloudstorage|bucket_name': os.environ.get('GALAXY_TEST_GCS_BUCKET'),
'googlecloudstorage|client_id': os.environ.get('GALAXY_TEST_GCS_CLIENT_ID'),
'googlecloudstorage|client_secret': os.environ.get('GALAXY_TEST_GCS_CLIENT_SECRET'),
'googlecloudstorage|access_token': os.environ.get('GALAXY_TEST_GCS_ACCESS_TOKEN'),
'googlecloudstorage|refresh_token': os.environ.get('GALAXY_TEST_GCS_REFRESH_TOKEN'),
'onedata|provider_host': os.environ.get('GALAXY_TEST_ONEDATA_PROVIDER_HOST'),
'onedata|access_token': os.environ.get('GALAXY_TEST_ONEDATA_ACCESS_TOKEN'),
'basespace|client_id': os.environ.get('GALAXY_TEST_ONEDATA_CLIENT_ID'),
'basespace|client_secret': os.environ.get('GALAXY_TEST_ONEDATA_CLIENT_SECRET'),
'basespace|access_token': os.environ.get('GALAXY_TEST_ONEDATA_ACCESS_TOKEN'),
},
role_names=role_names or set(),
group_names=group_names or set(),
Expand All @@ -63,43 +79,43 @@ def user_context_fixture(user_ftp_dir=None, role_names=None, group_names=None, i
return user_context


def assert_realizes_as(file_sources, uri, expected, user_context=None):
def realize_to_temp_file(file_sources, uri, user_context=None):
file_source_path = file_sources.get_file_source_path(uri)
with tempfile.NamedTemporaryFile(mode='r') as temp:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
with open(temp.name) as f:
realized_contents = f.read()
if realized_contents != expected:
message = "Expected to realize contents at [{}] as [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)
return realized_contents


def assert_realizes_as(file_sources, uri, expected, user_context=None):
realized_contents = realize_to_temp_file(file_sources, uri, user_context=user_context)
if realized_contents != expected:
message = "Expected to realize contents at [{}] as [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)


def assert_realizes_contains(file_sources, uri, expected, user_context=None):
file_source_path = file_sources.get_file_source_path(uri)
with tempfile.NamedTemporaryFile(mode='r') as temp:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
realized_contents = temp.read()
if expected not in realized_contents:
message = "Expected to realize contents at [{}] to contain [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)
realized_contents = realize_to_temp_file(file_sources, uri, user_context=user_context)
if expected not in realized_contents:
message = "Expected to realize contents at [{}] to contain [{}], instead found [{}]".format(
uri,
expected,
realized_contents,
)
raise AssertionError(message)


def assert_realizes_throws_exception(file_sources, uri, user_context=None) -> Exception:
file_source_path = file_sources.get_file_source_path(uri)
exception = None
with tempfile.NamedTemporaryFile(mode='r') as temp:
try:
file_source_path.file_source.realize_to(file_source_path.path, temp.name, user_context=user_context)
except Exception as e:
exception = e
try:
realize_to_temp_file(file_sources, uri, user_context=user_context)
except Exception as e:
exception = e
assert exception
return exception

Expand All @@ -110,3 +126,25 @@ def write_from(file_sources, uri, content, user_context=None):
f.write(content)
f.flush()
file_source_path.file_source.write_from(file_source_path.path, f.name, user_context=user_context)


def configured_file_sources(conf_file):
file_sources_config = ConfiguredFileSourcesConfig()
return ConfiguredFileSources(file_sources_config, conf_file=conf_file)


def assert_simple_file_realize(conf_file, recursive=False, filename="a", contents="a\n", contains=False):
user_context = user_context_fixture()
file_sources = configured_file_sources(conf_file)
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
res = file_source.list("/", recursive=recursive, user_context=user_context)
a_file = find(res, class_="File", name=filename)
assert a_file

if contains:
assert_realizes_contains(file_sources, f"gxfiles://test1/{filename}", contents, user_context=user_context)
else:
assert_realizes_as(file_sources, f"gxfiles://test1/{filename}", contents, user_context=user_context)
7 changes: 7 additions & 0 deletions test/unit/files/basespace_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
- type: basespace
id: test1
doc: Test access to Illumina BaseSpace
basespace_server: https://api.basespace.illumina.com
client_id: ${user.preferences['basespace|client_id']}
client_secret: ${user.preferences['basespace|client_secret']}
access_token: ${user.preferences['basespace|access_token']}
2 changes: 1 addition & 1 deletion test/unit/files/dropbox_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
- type: dropbox
id: test1
doc: Test WebDAV server.
doc: Test access to a dropbox account.
accessToken: ${user.preferences['dropbox|access_token']}
11 changes: 11 additions & 0 deletions test/unit/files/gcsfs_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
- type: googlecloudstorage
id: test1
doc: Test access to Google Cloud Storage.
project: ${user.preferences['googlecloudstorage|project']}
bucket_name: 'genomics-public-data'
token_uri: "https://www.googleapis.com/oauth2/v4/token"
client_id: ${user.preferences['googlecloudstorage|client_id']}
client_secret: ${user.preferences['googlecloudstorage|client_secret']}
token: ${user.preferences['googlecloudstorage|access_token']}
refresh_token: ${user.preferences['googlecloudstorage|refresh_token']}
anonymous: true
8 changes: 8 additions & 0 deletions test/unit/files/googledrive_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
- type: googledrive
id: test1
doc: Test access to a Google drive.
token: ${user.preferences['googledrive|access_token']}
refresh_token: ${user.preferences['googledrive|refresh_token']}
token_uri: "https://www.googleapis.com/oauth2/v4/token"
client_id: ${user.preferences['googledrive|client_id']}
client_secret: ${user.preferences['googledrive|client_secret']}
5 changes: 5 additions & 0 deletions test/unit/files/onedata_file_sources_conf.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
- type: onedata
id: test1
doc: Test access to a OneData host
provider_host: ${user.preferences['onedata|provider_host']}
access_token: ${user.preferences['onedata|access_token']}
36 changes: 36 additions & 0 deletions test/unit/files/test_basespace.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import os

import pytest

from ._util import (
assert_realizes_as,
configured_file_sources,
find,
user_context_fixture,
)
SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "basespace_file_sources_conf.yml")

skip_if_no_basespace_access_token = pytest.mark.skipif(
not os.environ.get('GALAXY_TEST_BASESPACE_CLIENT_ID')
or not os.environ.get('GALAXY_TEST_BASESPACE_CLIENT_SECRET')
or not os.environ.get('GALAXY_TEST_BASESPACE_ACCESS_TOKEN')
or not os.environ.get('GALAXY_TEST_BASESPACE_TEST_FILE_PATH'),
reason="GALAXY_TEST_BASESPACE_CLIENT_ID and related vars not set"
)


@skip_if_no_basespace_access_token
def test_file_source():
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
test_file = os.environ.get('GALAXY_TEST_BASESPACE_TEST_FILE_PATH', "")
res = file_source.list(os.path.dirname(test_file), recursive=False, user_context=user_context)
a_file = find(res, class_="File", name=os.path.basename(test_file))
assert a_file

assert_realizes_as(file_sources, a_file['uri'], "a\n", user_context=user_context)
25 changes: 3 additions & 22 deletions test/unit/files/test_dropbox.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,8 @@

import pytest

from galaxy.files import ConfiguredFileSources, ConfiguredFileSourcesConfig
from ._util import (
assert_realizes_as,
find_file_a,
user_context_fixture,
)
from ._util import assert_simple_file_realize

SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "dropbox_file_sources_conf.yml")

Expand All @@ -19,19 +15,4 @@

@skip_if_no_dropbox_access_token
def test_file_source():
user_context = user_context_fixture()
file_sources = _configured_file_sources()
file_source_pair = file_sources.get_file_source_path("gxfiles://test1")

assert file_source_pair.path == "/"
file_source = file_source_pair.file_source
res = file_source.list("/", recursive=True, user_context=user_context)
a_file = find_file_a(res)
assert a_file

assert_realizes_as(file_sources, "gxfiles://test1/a", "a\n", user_context=user_context)


def _configured_file_sources(conf_file=FILE_SOURCES_CONF):
file_sources_config = ConfiguredFileSourcesConfig()
return ConfiguredFileSources(file_sources_config, conf_file=conf_file)
assert_simple_file_realize(FILE_SOURCES_CONF, recursive=True)
25 changes: 25 additions & 0 deletions test/unit/files/test_gcsfs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import os

import pytest

from ._util import assert_simple_file_realize

try:
from fs_gcsfs import GCSFS
except ImportError:
GCSFS = None

SCRIPT_DIRECTORY = os.path.abspath(os.path.dirname(__file__))
FILE_SOURCES_CONF = os.path.join(SCRIPT_DIRECTORY, "gcsfs_file_sources_conf.yml")


skip_if_no_gcsfs_libs = pytest.mark.skipif(
not GCSFS,
reason="Required lib to run gcs file source test: fs_gcsfs is not available"
)


@skip_if_no_gcsfs_libs
def test_file_source():
assert_simple_file_realize(FILE_SOURCES_CONF, recursive=False, filename="README", contents="1000genomes",
contains=True)
Loading

0 comments on commit 9c92c1b

Please sign in to comment.