Skip to content

Commit

Permalink
Merge pull request #18909 from jmchilton/file_source_property_global_…
Browse files Browse the repository at this point in the history
…defaults

Allow setting a few global defaults for file source plugin types.
  • Loading branch information
mvdbeek authored Oct 7, 2024
2 parents 97a5e5d + ab70b5b commit 4c75803
Show file tree
Hide file tree
Showing 13 changed files with 233 additions and 19 deletions.
59 changes: 59 additions & 0 deletions doc/source/admin/galaxy_options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5115,6 +5115,26 @@
:Type: str


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``workflow_scheduling_separate_materialization_iteration``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Workflows launched with URI/URL inputs that are not marked as
'deferred' are "materialized" (or undeferred) by the workflow
scheduler. This might be a lengthy process. Setting this to 'True'
will place the invocation back in the queue after materialization
before scheduling the workflow so it is less likely to starve
other workflow scheduling. Ideally, Galaxy would allow more fine
grain control of handlers but until then, this provides a way to
tip the balance between "doing more work" and "being more fair".
The default here is pretty arbitrary - it has been to False to
optimize Galaxy for automated, single user applications where
"fairness" is mostly irrelevant.
:Default: ``false``
:Type: bool


~~~~~~~~~~~~~~~~~~~~~~~~
``cache_user_job_count``
~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down Expand Up @@ -5602,3 +5622,42 @@
This requires the help_forum_api_url to be set.
:Default: ``false``
:Type: bool


~~~~~~~~~~~~~~~~~~~~~~~~
``file_source_temp_dir``
~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Directory to store temporary files for file sources. This defaults
to new_file_path if not set.
:Default: ``None``
:Type: str


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``file_source_webdav_use_temp_files``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Default value for use_temp_files for webdav plugins that don't
explicitly declare this.
:Default: ``true``
:Type: bool


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``file_source_listings_expiry_time``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Number of seconds before file source content listings are
refreshed. Shorter times will result in more queries while
browsing a file sources. Longer times will result in fewer
requests to file sources but outdated contents might be displayed
to the user. Currently only affects s3fs file sources.
:Default: ``60``
:Type: int



5 changes: 5 additions & 0 deletions lib/galaxy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -704,6 +704,7 @@ class GalaxyAppConfiguration(BaseAppConfiguration, CommonConfigurationMixin):
drmaa_external_runjob_script: str
email_from: Optional[str]
enable_tool_shed_check: bool
file_source_temp_dir: str
galaxy_data_manager_data_path: str
galaxy_infrastructure_url: str
hours_between_check: int
Expand Down Expand Up @@ -1236,6 +1237,9 @@ def _load_theme(path: str, theme_dict: dict):
else:
_load_theme(self.themes_config_file, self.themes)

if self.file_source_temp_dir:
self.file_source_temp_dir = os.path.abspath(self.file_source_temp_dir)

def _process_celery_config(self):
if self.celery_conf and self.celery_conf.get("result_backend") is None:
# If the result_backend is not set, use a SQLite database in the data directory
Expand Down Expand Up @@ -1348,6 +1352,7 @@ def check(self):
self.template_cache_path,
self.tool_data_path,
self.user_library_import_dir,
self.file_source_temp_dir,
]
for path in paths_to_check:
self._ensure_directory(path)
Expand Down
29 changes: 29 additions & 0 deletions lib/galaxy/config/sample/galaxy.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -2748,6 +2748,19 @@ galaxy:
# <config_dir>.
#workflow_schedulers_config_file: workflow_schedulers_conf.xml

# Workflows launched with URI/URL inputs that are not marked as
# 'deferred' are "materialized" (or undeferred) by the workflow
# scheduler. This might be a lengthy process. Setting this to 'True'
# will place the invocation back in the queue after materialization
# before scheduling the workflow so it is less likely to starve other
# workflow scheduling. Ideally, Galaxy would allow more fine grain
# control of handlers but until then, this provides a way to tip the
# balance between "doing more work" and "being more fair". The default
# here is pretty arbitrary - it has been to False to optimize Galaxy
# for automated, single user applications where "fairness" is mostly
# irrelevant.
#workflow_scheduling_separate_materialization_iteration: false

# If using job concurrency limits (configured in job_config_file),
# several extra database queries must be performed to determine the
# number of jobs a user has dispatched to a given destination. By
Expand Down Expand Up @@ -2978,3 +2991,19 @@ galaxy:
# Enable the integration of the Galaxy Help Forum in the tool panel.
# This requires the help_forum_api_url to be set.
#enable_help_forum_tool_panel_integration: false

# Directory to store temporary files for file sources. This defaults
# to new_file_path if not set.
#file_source_temp_dir: null

# Default value for use_temp_files for webdav plugins that don't
# explicitly declare this.
#file_source_webdav_use_temp_files: true

# Number of seconds before file source content listings are refreshed.
# Shorter times will result in more queries while browsing a file
# sources. Longer times will result in fewer requests to file sources
# but outdated contents might be displayed to the user. Currently only
# affects s3fs file sources.
#file_source_listings_expiry_time: 60

20 changes: 20 additions & 0 deletions lib/galaxy/config/schemas/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4104,3 +4104,23 @@ mapping:
per_host: true
desc: |
Enable the integration of the Galaxy Help Forum in the tool panel. This requires the help_forum_api_url to be set.
file_source_temp_dir:
type: str
required: false
desc: |
Directory to store temporary files for file sources. This defaults to new_file_path if not set.
file_source_webdav_use_temp_files:
type: bool
default: true
desc: |
Default value for use_temp_files for webdav plugins that don't explicitly declare this.
file_source_listings_expiry_time:
type: int
default: 60
desc: |
Number of seconds before file source content listings are refreshed. Shorter times will result in more
queries while browsing a file sources. Longer times will result in fewer requests to file sources but
outdated contents might be displayed to the user. Currently only affects s3fs file sources.
20 changes: 20 additions & 0 deletions lib/galaxy/files/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ class FileSourcePluginsConfig:
user_library_import_dir: Optional[str]
ftp_upload_dir: Optional[str]
ftp_upload_purge: bool
tmp_dir: Optional[str]
webdav_use_temp_files: Optional[bool]
listings_expiry_time: Optional[int]

def __init__(
self,
Expand All @@ -33,6 +36,9 @@ def __init__(
user_library_import_dir=None,
ftp_upload_dir=None,
ftp_upload_purge=True,
tmp_dir=None,
webdav_use_temp_files=None,
listings_expiry_time=None,
):
symlink_allowlist = symlink_allowlist or []
fetch_url_allowlist = fetch_url_allowlist or []
Expand All @@ -42,6 +48,9 @@ def __init__(
self.user_library_import_dir = user_library_import_dir
self.ftp_upload_dir = ftp_upload_dir
self.ftp_upload_purge = ftp_upload_purge
self.tmp_dir = tmp_dir
self.webdav_use_temp_files = webdav_use_temp_files
self.listings_expiry_time = listings_expiry_time

@staticmethod
def from_app_config(config):
Expand All @@ -54,6 +63,10 @@ def from_app_config(config):
kwds["user_library_import_dir"] = config.user_library_import_dir
kwds["ftp_upload_dir"] = config.ftp_upload_dir
kwds["ftp_upload_purge"] = config.ftp_upload_purge
kwds["tmp_dir"] = config.file_source_temp_dir
kwds["webdav_use_temp_files"] = config.file_source_webdav_use_temp_files
kwds["listings_expiry_time"] = config.file_source_listings_expiry_time

return FileSourcePluginsConfig(**kwds)

def to_dict(self):
Expand All @@ -64,6 +77,9 @@ def to_dict(self):
"user_library_import_dir": self.user_library_import_dir,
"ftp_upload_dir": self.ftp_upload_dir,
"ftp_upload_purge": self.ftp_upload_purge,
"tmp_dir": self.tmp_dir,
"webdav_use_temp_files": self.webdav_use_temp_files,
"listings_expiry_time": self.listings_expiry_time,
}

@staticmethod
Expand All @@ -75,6 +91,10 @@ def from_dict(as_dict):
user_library_import_dir=as_dict["user_library_import_dir"],
ftp_upload_dir=as_dict["ftp_upload_dir"],
ftp_upload_purge=as_dict["ftp_upload_purge"],
# Always provided for new jobs, remove in 25.0
tmp_dir=as_dict.get("tmp_dir"),
webdav_use_temp_files=as_dict.get("webdav_use_temp_files"),
listings_expiry_time=as_dict.get("listings_expiry_time"),
)


Expand Down
13 changes: 9 additions & 4 deletions lib/galaxy/files/sources/_pyfilesystem2.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class PyFilesystem2FilesSource(BaseFilesSource):
required_package: ClassVar[str]
supports_pagination = True
supports_search = True
allow_key_error_on_empty_directories = False # work around a bug in webdav

def __init__(self, **kwd: Unpack[FilesSourceProperties]):
if self.required_module is None:
Expand Down Expand Up @@ -65,10 +66,14 @@ def _list(
with self._open_fs(user_context=user_context, opts=opts) as h:
if recursive:
recursive_result: List[AnyRemoteEntry] = []
for p, dirs, files in h.walk(path, namespaces=["details"]):
to_dict = functools.partial(self._resource_info_to_dict, p)
recursive_result.extend(map(to_dict, dirs))
recursive_result.extend(map(to_dict, files))
try:
for p, dirs, files in h.walk(path, namespaces=["details"]):
to_dict = functools.partial(self._resource_info_to_dict, p)
recursive_result.extend(map(to_dict, dirs))
recursive_result.extend(map(to_dict, files))
except KeyError:
if not self.allow_key_error_on_empty_directories:
raise
return recursive_result, len(recursive_result)
else:
page = self._to_page(limit, offset)
Expand Down
14 changes: 13 additions & 1 deletion lib/galaxy/files/sources/s3fs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@
Tuple,
)

from typing_extensions import Unpack
from typing_extensions import (
NotRequired,
Unpack,
)

from galaxy.files import OptionalUserContext
from . import (
Expand All @@ -35,6 +38,7 @@ class S3FsFilesSourceProperties(FilesSourceProperties, total=False):
endpoint_url: int
user: str
passwd: str
listings_expiry_time: NotRequired[Optional[int]]
client_kwargs: dict # internally computed. Should not be specified in config file


Expand All @@ -45,6 +49,14 @@ def __init__(self, **kwd: Unpack[S3FsFilesSourceProperties]):
if s3fs is None:
raise Exception("Package s3fs unavailable but required for this file source plugin.")
props: S3FsFilesSourceProperties = cast(S3FsFilesSourceProperties, self._parse_common_config_opts(kwd))
file_sources_config = self._file_sources_config
if (
props.get("listings_expiry_time") is None
and file_sources_config
and file_sources_config.listings_expiry_time
):
if file_sources_config.listings_expiry_time:
props["listings_expiry_time"] = file_sources_config.listings_expiry_time
# There is a possibility that the bucket name could be parameterized: e.g.
# bucket: ${user.preferences['generic_s3|bucket']}
# that's ok, because we evaluate the bucket name again later. The bucket property here will only
Expand Down
26 changes: 23 additions & 3 deletions lib/galaxy/files/sources/webdav.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,29 +5,49 @@

import tempfile
from typing import (
cast,
Optional,
Union,
)

from typing_extensions import NotRequired

from . import (
FilesSourceOptions,
FilesSourceProperties,
)
from ._pyfilesystem2 import PyFilesystem2FilesSource


class WebDavFilesSourceProperties(FilesSourceProperties, total=False):
use_temp_files: NotRequired[Optional[bool]]
temp_path: NotRequired[Optional[str]]


class WebDavFilesSource(PyFilesystem2FilesSource):
plugin_type = "webdav"
required_module = WebDAVFS
required_package = "fs.webdavfs"
allow_key_error_on_empty_directories = True

def _open_fs(self, user_context=None, opts: Optional[FilesSourceOptions] = None):
props = self._serialization_props(user_context)
props = cast(WebDavFilesSourceProperties, self._serialization_props(user_context))
file_sources_config = self._file_sources_config
use_temp_files = props.pop("use_temp_files", None)
if use_temp_files is None and file_sources_config and file_sources_config.webdav_use_temp_files is not None:
use_temp_files = file_sources_config.webdav_use_temp_files
if use_temp_files is None:
# Default to True to avoid memory issues with large files.
props["use_temp_files"] = True
props["temp_path"] = props.get("temp_path", tempfile.TemporaryDirectory(prefix="webdav_"))
use_temp_files = True

if use_temp_files:
temp_path = props.get("temp_path")
if temp_path is None and file_sources_config and file_sources_config.tmp_dir:
temp_path = file_sources_config.tmp_dir
if temp_path is None:
temp_path = tempfile.mkdtemp(prefix="webdav_")
props["temp_path"] = temp_path
props["use_temp_files"] = use_temp_files
extra_props: Union[FilesSourceProperties, dict] = opts.extra_props or {} if opts else {}
handle = WebDAVFS(**{**props, **extra_props})
return handle
Expand Down
4 changes: 4 additions & 0 deletions lib/galaxy/model/unittest_utils/data_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ def __init__(self, root=None, **kwd):
self.ftp_upload_dir = None
self.ftp_upload_purge = False

self.file_source_temp_dir = None
self.file_source_webdav_use_temp_files = False
self.file_source_listings_expiry_time = 60

def __del__(self):
if self._remove_root:
shutil.rmtree(self.root)
Expand Down
6 changes: 4 additions & 2 deletions test/unit/files/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import os
import tempfile
from typing import Optional

from galaxy.files import (
ConfiguredFileSources,
Expand Down Expand Up @@ -156,8 +157,9 @@ def write_from(
file_source_path.file_source.write_from(file_source_path.path, f.name, user_context=user_context)


def configured_file_sources(conf_file):
file_sources_config = FileSourcePluginsConfig()
def configured_file_sources(conf_file, file_sources_config: Optional[FileSourcePluginsConfig] = None):
file_sources_config = file_sources_config or FileSourcePluginsConfig()
assert file_sources_config
if isinstance(conf_file, str):
conf = ConfiguredFileSourcesConf(conf_file=conf_file)
else:
Expand Down
Loading

0 comments on commit 4c75803

Please sign in to comment.