Skip to content

Commit

Permalink
simplify report fetching and file object
Browse files Browse the repository at this point in the history
  • Loading branch information
b97pla committed Jan 23, 2024
1 parent d94875c commit 8246380
Show file tree
Hide file tree
Showing 7 changed files with 133 additions and 133 deletions.
17 changes: 14 additions & 3 deletions delivery/models/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,29 @@ def __hash__(self):

class RunfolderProject(BaseProject):
"""
Model a project directory in a runfolder on disk. Note that this means that this project model only extends
to the idea of projects as subdirectories in a demultiplexed Illumina runfolder.
Model a project directory in a runfolder on disk. Note that this means that this project model
only extends to the idea of projects as subdirectories in a demultiplexed Illumina runfolder.
"""

def __init__(self, name, path, runfolder_path, runfolder_name, samples=None, project_files=None):
def __init__(
self,
name,
path,
runfolder_path,
runfolder_name,
samples=None,
project_files=None
):
"""
Instantiate a new `RunfolderProject` object
:param name: of the project
:param path: path to the project
:param runfolder_path: path the runfolder in which this project is stored.
:param runfolder_name: name of the runfolder in which this project is stored
:param samples: list of instances of Sample, representing samples in the project
:param project_files: list of instances of RunfolderFile, representing files belonging to
the project. These do not include files already accounted for in the `samples` list.
"""
self.name = name
self.path = os.path.abspath(path)
Expand Down
25 changes: 15 additions & 10 deletions delivery/models/runfolder.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ def __init__(
self,
file_path,
base_path=None,
file_checksum=None,
file_operation="symlink"
file_checksum=None
):
"""
A `RunfolderFile` object representing a file in the runfolder
Expand All @@ -55,15 +54,11 @@ def __init__(
:param file_path: the path to the file
:param base_path: a path relative to which the file will be considered
:param file_checksum: a computed checksum for the file
:param file_operation: the file operation to perform, symlink (default) or copy
"""
self.file_path = os.path.abspath(file_path)
self.file_name = os.path.basename(file_path)
self.base_path = base_path or os.path.dirname(self.file_path)
self.checksum = file_checksum
self.file_operation = file_operation \
if file_operation and file_operation in ["symlink", "copy"] \
else "symlink"

@classmethod
def create_object_from_path(
Expand All @@ -73,9 +68,20 @@ def create_object_from_path(
filesystem_service,
metadata_service,
base_path=None,
checksums=None,
file_operation="symlink"
checksums=None
):
"""
Factory method that creates and returns a class instance.
:param file_path: the path to the file
:param runfolder_path: the path to the runfolder containing the file
:param filesystem_service: a service which can access the file system
:param metadata_service: a MetadataService for reading and writing metadata files
:param base_path: a path relative to which the file will be considered
:param checksums: a list of pre-computed checksums that may or may not contain an entry for
the `file_path` relative to the `runfolder_path`
:return: a class object representing a file in the runfolder
"""
checksums = checksums or {}
relative_file_path = filesystem_service.relpath(
file_path,
Expand All @@ -89,6 +95,5 @@ def create_object_from_path(
return cls(
file_path,
base_path=base_path,
file_checksum=checksum,
file_operation=file_operation
file_checksum=checksum
)
10 changes: 3 additions & 7 deletions delivery/models/sample.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,7 @@ def __init__(
read_no=None,
is_index=None,
base_path=None,
checksum=None,
file_operation="symlink"
checksum=None
):
"""
A `SampleFile` object
Expand All @@ -66,13 +65,11 @@ def __init__(
:param is_index: if True, the sequence file contains index sequences
:param base_path: a path relative to which the file will be considered
:param checksum: the MD5 checksum for this SampleFile
:param file_operation: the file operation to perform, symlink (default) or copy
"""
super(SampleFile, self).__init__(
sample_path,
base_path=base_path,
file_checksum=checksum,
file_operation=file_operation
file_checksum=checksum
)
self.sample_name = sample_name
self.sample_index = sample_index
Expand All @@ -92,6 +89,5 @@ def __hash__(self):
self.lane_no,
self.read_no,
self.is_index,
self.checksum,
self.file_operation
self.checksum
))
123 changes: 52 additions & 71 deletions delivery/repositories/project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -214,81 +214,63 @@ def get_report_files(self, project_path, project_name, runfolder, checksums=None
:raises ProjectReportNotFoundException: if no MultiQC report was found for the project
"""

report_files = self.project_multiqc_report_files(project_path, project_name)
if self.filesystem_service.exists(report_files[0]):
log.info(
f"MultiQC reports found in Unaligned/{project_name}, overriding organisation of "
f"seqreports"
)
else:
log.info(f"Organising seqreports for {project_name}")
report_files = self.runfolder_project_multiqc_report_files(
project_name,
runfolder
)
try:
report_path = self.filesystem_service.dirname(report_files[0])
return [
RunfolderFile.create_object_from_path(
file_path=report_file,
runfolder_path=runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=report_path,
checksums=checksums
)
for report_file in report_files
]
except FileNotFoundError:
raise ProjectReportNotFoundException(
f"No project report found for {project_name}"
)

@staticmethod
def project_multiqc_report_files(project_path, project_name):
"""
Return a list of MultiQC report files found under the project directory.
:param project_path: the path to the project folder
:param project_name: the name of the project
:return: a list of paths to MultiQC-report-related files
"""
return [
os.path.join(
project_path,
f"{project_name}_multiqc_report.html"
),
# look for the MultiQC report in these locations (in order of precedence)
report_paths = [
project_path,
os.path.join(
project_path,
f"{project_name}_multiqc_report_data.zip"
runfolder.path,
"seqreports",
"projects",
project_name
)
]
# depending on the location of the MultiQC report, use these different prefixes
report_prefixes = [
project_name,
f"{runfolder.name}_{project_name}"
]
report_names = [
"multiqc_report.html",
"multiqc_report_data.zip"
]
for report_path, report_prefix in zip(report_paths, report_prefixes):
# creating the RunfolderFile for a non-existing file should raise an exception
try:
report_files = []
for report_name in report_names:
report_file = os.path.join(
report_path,
f"{report_prefix}_{report_name}"
)
report_files.append(
RunfolderFile.create_object_from_path(
file_path=report_file,
runfolder_path=runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=report_path,
checksums=checksums
)
)
except FileNotFoundError:
report_files = []
# if no exception was raised, return the report files and log a message depending on
# where they were found
else:
if report_path == report_paths[0]:
log.info(
f"MultiQC reports found in Unaligned/{project_name}, overriding "
f"organisation of seqreports"
)
else:
log.info(f"Organising seqreports for {project_name}")

@staticmethod
def runfolder_project_multiqc_report_files(project_name, runfolder):
"""
Return a list of MultiQC report files for a project found under the seqreports folder.
return report_files

:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:return: a list of paths to MultiQC-report-related files
"""
report_dir = os.path.join(
runfolder.path,
"seqreports",
"projects",
project_name
# raise an exception if no report files at all were found
raise ProjectReportNotFoundException(
f"No project report found for {project_name}"
)
return [
os.path.join(
report_dir,
f"{runfolder.name}_{project_name}_multiqc_report.html",
),
os.path.join(
report_dir,
f"{runfolder.name}_{project_name}_multiqc_report_data.zip"
)
]

def get_project_readme(
self,
Expand Down Expand Up @@ -324,8 +306,7 @@ def get_project_readme(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=self.filesystem_service.dirname(readme_file),
checksums=checksums,
file_operation="copy"
checksums=checksums
)
]
except FileNotFoundError:
Expand Down
78 changes: 43 additions & 35 deletions delivery/services/organise_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,14 @@ def check_previously_organised_project(self, project, organised_projects_path, f

def organise_project(self, runfolder, project, organised_projects_path, lanes):
"""
Organise a project on a runfolder into its own directory and into a standard structure. If the project has
already been organised, a ProjectAlreadyOrganisedException will be raised, unless force is True. If force is
True, the existing project path will be renamed with a unique suffix.
Organise a project on a runfolder into its own directory and into a standard structure. If
the project has already been organised, a ProjectAlreadyOrganisedException will be raised.
:param runfolder: a Runfolder instance representing the runfolder on which the project belongs
:param runfolder: a Runfolder instance representing the runfolder on which the project
belongs
:param project: a Project instance representing the project to be organised
:param lanes: if not None, only samples on any of the specified lanes will be organised
:param force: if True, a previously organised project will be renamed with a unique suffix
:raises ProjectAlreadyOrganisedException: if project has already been organised and force is False
:param lanes: if not None, only samples on any of the specified lanes will be organised fix
:raises ProjectAlreadyOrganisedException: if project has already been organised
:return: a Project instance representing the project after organisation
"""
# symlink the samples
Expand Down Expand Up @@ -141,23 +140,18 @@ def organise_project_file(self, project_file, organised_project_path):
project_file.base_path
)

# the full path to the symlink
link_name = os.path.join(
# the full path to the organised file
destination = os.path.join(
organised_project_path,
relpath
)
if project_file.file_operation == "copy":
self.file_system_service.copy(project_file.file_path, link_name)
else:
# the relative path from the symlink to the original file
link_path = self.file_system_service.relpath(
project_file.file_path,
self.file_system_service.dirname(link_name)
)
self.file_system_service.symlink(link_path, link_name)

# copy the source file to the organised file destination
self.file_system_service.copy(project_file.file_path, destination)

# return a new RunFolder file object representing the organised file at its new location
return RunfolderFile(
link_name,
destination,
base_path=organised_project_path,
file_checksum=project_file.checksum
)
Expand All @@ -181,7 +175,13 @@ def organise_sample(self, sample, organised_project_path, lanes):
# symlink the sample files using relative paths
organised_sample_files = []
for sample_file in sample.sample_files:
organised_sample_files.append(self.organise_sample_file(sample_file, organised_sample_path, lanes))
organised_sample_files.append(
self.organise_sample_file(
sample_file,
organised_sample_path,
lanes
)
)

# clean up the list of sample files by removing None elements
organised_sample_files = list(filter(None, organised_sample_files))
Expand All @@ -194,32 +194,40 @@ def organise_sample(self, sample, organised_project_path, lanes):

def organise_sample_file(self, sample_file, organised_sample_path, lanes):
"""
Organise a sample file by creating a relative symlink in the supplied directory, pointing back to the supplied
SampleFile's original file path. The Sample file can be excluded from organisation based on the lane it was
derived from. The supplied directory will be created if it doesn't exist.
Organise a sample file by creating a relative symlink in the supplied directory,
pointing back to the supplied SampleFile's original file path. The Sample file can be
excluded from organisation based on the lane it was derived from. The supplied directory
will be created if it doesn't exist.
:param sample_file: a SampleFile instance representing the sample file to be organised
:param organised_sample_path: the path to the organised sample directory under which to place the symlink
:param lanes: if not None, only sample files derived from any of the specified lanes will be organised
:param organised_sample_path: the path to the organised sample directory under which to
place the symlink
:param lanes: if not None, only sample files derived from any of the specified lanes will
be organised
:return: a new SampleFile instance representing the sample file after organisation
"""
# skip if the sample file data is derived from a lane that shouldn't be included
if lanes and sample_file.lane_no not in lanes:
return None

# create the symlink in the supplied directory and relative to the file's original location
link_name = os.path.join(organised_sample_path, sample_file.file_name)
if sample_file.file_operation == "copy":
self.file_system_service.copy(sample_file.file_path, link_name)
else:
relative_path = self.file_system_service.relpath(
sample_file.file_path,
organised_sample_path
)
self.file_system_service.symlink(relative_path, link_name)
destination = os.path.join(
organised_sample_path,
sample_file.file_name
)

# get the relative path between the source file and the organised directory
relative_path = self.file_system_service.relpath(
sample_file.file_path,
organised_sample_path
)

# create the symlink using the relative path
self.file_system_service.symlink(relative_path, destination)

# return a new SampleFile instance representing the sample file after organisation
return SampleFile(
sample_path=link_name,
sample_path=destination,
sample_name=sample_file.sample_name,
sample_index=sample_file.sample_index,
lane_no=sample_file.lane_no,
Expand Down
Loading

0 comments on commit 8246380

Please sign in to comment.