From 7d4d0749e9f5ffcc2a4507e13e940e46400a020d Mon Sep 17 00:00:00 2001 From: stxue1 <122345910+stxue1@users.noreply.github.com> Date: Thu, 8 Aug 2024 08:35:54 -0700 Subject: [PATCH] Add concept of files that a job will use in the future (#5011) Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- src/toil/job.py | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/toil/job.py b/src/toil/job.py index 280f504dd6..4517911864 100644 --- a/src/toil/job.py +++ b/src/toil/job.py @@ -761,7 +761,8 @@ def __init__( jobName: str, unitName: Optional[str] = "", displayName: Optional[str] = "", - local: Optional[bool] = None + local: Optional[bool] = None, + files: Optional[Set[FileID]] = None ) -> None: """ Create a new JobDescription. @@ -784,6 +785,7 @@ def __init__( :param local: If True, the job is meant to use minimal resources but is sensitive to execution latency, and so should be executed by the leader. + :param files: Set of FileID objects that the job plans to use. """ # Set requirements super().__init__(requirements) @@ -893,6 +895,11 @@ def makeString(x: Union[str, bytes, None]) -> str: # And we log who made the version (by PID) self._job_version_writer = 0 + # Store FileIDs that the Job will want to use + # This currently does not serve much of a purpose except for debugging + # In the future, this can be used to improve job scheduling, see https://github.com/DataBiosphere/toil/issues/3071 + self.files_to_use = files or set() + def get_names(self) -> Names: """ Get the names and ID of this job as a named tuple. @@ -1536,6 +1543,7 @@ def __init__( displayName: Optional[str] = "", descriptionClass: Optional[type] = None, local: Optional[bool] = None, + files: Optional[Set[FileID]] = None ) -> None: """ Job initializer. @@ -1556,6 +1564,7 @@ def __init__( :param displayName: Human-readable job type display name. :param descriptionClass: Override for the JobDescription class used to describe the job. :param local: if the job can be run on the leader. + :param files: Set of Files that the job will want to use. :type memory: int or string convertible by toil.lib.conversions.human2bytes to an int :type cores: float, int, or string convertible by toil.lib.conversions.human2bytes to an int @@ -1594,7 +1603,8 @@ def __init__( jobName, unitName=unitName, displayName=displayName, - local=local + local=local, + files=files ) # Private class variables needed to actually execute a job, in the worker. @@ -1718,6 +1728,20 @@ def checkpoint(self) -> bool: """Determine if the job is a checkpoint job or not.""" return isinstance(self._description, CheckpointJobDescription) + @property + def files_to_use(self) -> Set[FileID]: + return self.description.files_to_use + + @files_to_use.setter + def files_to_use(self, val: Set[FileID]): + self.description.files_to_use = val + + def add_to_files_to_use(self, val: FileID): + self.description.files_to_use.add(val) + + def remove_from_files_to_use(self, val: FileID): + self.description.files_to_use.remove(val) + def assignConfig(self, config: Config) -> None: """ Assign the given config object.