Skip to content

Commit

Permalink
organize project metadata files (#54)
Browse files Browse the repository at this point in the history
* organize project metadata files

* ensure text encoding of file
  • Loading branch information
b97pla authored Feb 8, 2024
1 parent 7983064 commit 7acfe1a
Show file tree
Hide file tree
Showing 8 changed files with 149 additions and 22 deletions.
68 changes: 58 additions & 10 deletions delivery/repositories/project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,18 +140,19 @@ def project_from_dir(d):
project_name = os.path.basename(d)
project_files = []

# gather report files for the project from the runfolder
try:
project_files.extend(
self.get_report_files(
project_path,
project_name,
runfolder,
checksums=runfolder.checksums
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather the README to include with the project
try:
project_files.extend(
self.get_project_readme(
Expand All @@ -163,6 +164,17 @@ def project_from_dir(d):
except ProjectReportNotFoundException as ex:
log.warning(ex)

# gather metadata files for the project from the runfolder
try:
project_files.extend(
self.get_metadata_files(
project_name=project_name,
runfolder=runfolder
)
)
except ProjectReportNotFoundException as ex:
log.warning(ex)

samples = self.sample_repository.get_samples(
project_path,
project_name,
Expand Down Expand Up @@ -198,7 +210,12 @@ def project_from_dir(d):
f"Did not find {self.PROJECTS_DIR} folder for: {runfolder.name}"
)

def get_report_files(self, project_path, project_name, runfolder, checksums=None):
def get_report_files(
self,
project_path,
project_name,
runfolder
):
"""
Gets the paths to files associated with the supplied project's MultiQC report. This report
is fetched from seqreports unless there is a MultiQC report directly under the project's
Expand All @@ -208,8 +225,6 @@ def get_report_files(self, project_path, project_name, runfolder, checksums=None
:param project_path: the path to the project folder
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:return: a list of RunfolderFile objects representing project report files
:raises ProjectReportNotFoundException: if no MultiQC report was found for the project
"""
Expand Down Expand Up @@ -249,7 +264,7 @@ def get_report_files(self, project_path, project_name, runfolder, checksums=None
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=report_path,
checksums=checksums
checksums=runfolder.checksums
)
)
except FileNotFoundError:
Expand All @@ -276,19 +291,16 @@ def get_project_readme(
self,
project_name,
runfolder,
checksums=None,
with_undetermined=False
):
"""
Get the README to be included with the project data set.
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:param checksums: a dict with pre-calculated checksums for files. paths are keys and the
corresponding checksum is the value
:param with_undetermined: if True, the README should refer to data that includes
undetermined reads
:return: the path to the README file wrapped in a list
:return: a list containing a RunfolderFile object representing the README
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Organising README for {project_name}")
Expand All @@ -306,7 +318,7 @@ def get_project_readme(
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=self.filesystem_service.dirname(readme_file),
checksums=checksums
checksums=runfolder.checksums
)
]
except FileNotFoundError:
Expand All @@ -315,6 +327,42 @@ def get_project_readme(
f"{project_name}"
)

def get_metadata_files(
self,
project_name,
runfolder
):
"""
Gather the metadata files to be included with the project on delivery
:param project_name: the name of the project
:param runfolder: a Runfolder instance representing the runfolder containing the project
:return: a list of RunfolderFile objects representing the gathered metadata files
:raises ProjectReportNotFoundException: if the README was not found
"""
log.info(f"Fetching metadata files for {project_name}")
metadata_files = [
RunfolderFile.create_object_from_path(
file_path=metafile,
runfolder_path=runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service,
base_path=runfolder.path,
checksums=runfolder.checksums
)
for metafile in self.filesystem_service.list_files_recursively(
os.path.join(
runfolder.path,
"metadata"
)
) if os.path.basename(metafile).startswith(project_name)
]
if not metadata_files:
raise ProjectReportNotFoundException(
f"metadata files could not be found for {project_name}"
)
return metadata_files

def is_sample_in_project(self, project, sample_project, sample_id, sample_lane):
"""
Checks if a matching sample is present in the project.
Expand Down
6 changes: 5 additions & 1 deletion delivery/repositories/runfolder_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -249,4 +249,8 @@ def get_project_report_files(self, runfolder, project):
:param project: an instance of Project
:return: a tuple with the path to the directory containing the report and a list of paths to the report files
"""
return self.project_repository.get_report_files(project, checksums=runfolder.checksums)
return self.project_repository.get_report_files(
project_path=project.path,
project_name=project.name,
runfolder=runfolder
)
8 changes: 8 additions & 0 deletions delivery/services/file_system_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,14 @@ def copy(source, dest):
:param dest:
:return: None
"""
try:
FileSystemService.makedirs(
FileSystemService.dirname(
dest
)
)
except FileExistsError:
pass
try:
return shutil.copyfile(source, dest)
except IsADirectoryError:
Expand Down
22 changes: 18 additions & 4 deletions delivery/services/organise_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,21 @@ def organise_runfolder(self, runfolder_id, lanes, projects, force):
# organise the projects and return a new Runfolder instance
organised_projects = []
for project in projects_on_runfolder:
organised_projects.append(self.organise_project(runfolder, project, organised_projects_path, lanes))
organised_projects.append(
self.organise_project(
runfolder,
project,
organised_projects_path,
lanes
)
)

return Runfolder(
runfolder.name,
runfolder.path,
projects=organised_projects,
checksums=runfolder.checksums)
checksums=runfolder.checksums
)

def check_previously_organised_project(self, project, organised_projects_path, force):
organised_project_path = os.path.join(organised_projects_path, project.name)
Expand All @@ -79,7 +87,13 @@ def check_previously_organised_project(self, project, organised_projects_path, f
self.file_system_service.mkdir(organised_projects_backup_path)
self.file_system_service.rename(organised_project_path, backup_path)

def organise_project(self, runfolder, project, organised_projects_path, lanes):
def organise_project(
self,
runfolder,
project,
organised_projects_path,
lanes
):
"""
Organise a project on a runfolder into its own directory and into a standard structure. If
the project has already been organised, a ProjectAlreadyOrganisedException will be raised.
Expand Down Expand Up @@ -127,7 +141,7 @@ def organise_project(self, runfolder, project, organised_projects_path, lanes):

def organise_project_file(self, project_file, organised_project_path):
"""
Find and symlink or copy the project-associated files to the organised project directory.
Find and copy the project-associated files to the organised project directory.
:param project_file: a RunfolderFile instance representing the project-associated file
before organisation
Expand Down
3 changes: 1 addition & 2 deletions tests/integration_tests/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,8 +90,7 @@ def _verify_checksum(file_path, expected_checksum):
MetadataService.hash_file(samplesheet_file))

for project_file in project.project_files:
project_file_base = os.path.dirname(project.project_files[0].file_path)
relative_path = os.path.relpath(project_file.file_path, project_file_base)
relative_path = os.path.relpath(project_file.file_path, project_file.base_path)
organised_project_file_path = os.path.join(organised_path, relative_path)
self.assertEqual(
os.path.basename(organised_project_file_path),
Expand Down
1 change: 1 addition & 0 deletions tests/resources/runfolders/readme/README.md
57 changes: 55 additions & 2 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,18 @@ def __init__(self):
def spawn_callback(self, f, **args):
f(**args)


class TestUtils:
DUMMY_CONFIG = {"monitored_directory": "/foo"}
README_DIRECTORY = "/bar"


class DummyConfig:

def __getitem__(self, key):
return TestUtils.DUMMY_CONFIG[key]


fake_directories = ["160930_ST-E00216_0111_BH37CWALXX",
"160930_ST-E00216_0112_BH37CWALXX"]
fake_projects = ["ABC_123", "DEF_456", "GHI_789"]
Expand Down Expand Up @@ -120,7 +123,14 @@ def runfolder_project(
runfolder_path=runfolder.path,
runfolder_name=runfolder.name
)
project.project_files = project_report_files(project, next(report_type))
project_files = project_report_files(project, next(report_type))
project_files.append(
project_readme_file()
)
project_files.extend(
project_metadata_files(project)
)
project.project_files = project_files
sample_names = sample_name_generator()

# a straight-forward sample with files on one lane
Expand Down Expand Up @@ -272,6 +282,50 @@ def project_report_files(project, report_type):
]


def project_readme_file():
readme_file = os.path.join(
"tests",
"resources",
"readme",
"README.md"
)
return RunfolderFile(
file_path=readme_file,
base_path=os.path.dirname(readme_file),
file_checksum=f"checksum-for-{readme_file}"
)


def project_metadata_files(project, file_types=None):
file_suffixes = [
"run",
"experiment"
]
file_types = file_types or [
"xml",
"json"
]
metadata_path = os.path.join(
project.runfolder_path,
"metadata"
)
metadata_files = []
for file_type in file_types:
for file_suffix in file_suffixes:
metadata_file = os.path.join(
metadata_path,
f"{project.name}-{file_suffix}.{file_type}"
)
metadata_files.append(
RunfolderFile(
file_path=metadata_file,
base_path=project.runfolder_path,
file_checksum=f"checksum-for-{metadata_file}"
)
)
return metadata_files


_runfolder1 = Runfolder(name="160930_ST-E00216_0111_BH37CWALXX",
path="/foo/160930_ST-E00216_0111_BH37CWALXX")

Expand Down Expand Up @@ -306,7 +360,6 @@ def project_report_files(project, report_type):

FAKE_RUNFOLDERS = [_runfolder1, _runfolder2]
UNORGANISED_RUNFOLDER = unorganised_runfolder()
README_DIRECTORY = "/bar"


def assert_eventually_equals(self, timeout, f, expected, delay=0.1):
Expand Down
6 changes: 3 additions & 3 deletions tests/unit_tests/repositories/test_project_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from delivery.services.file_system_service import FileSystemService
from delivery.services.metadata_service import MetadataService

from tests.test_utils import README_DIRECTORY, UNORGANISED_RUNFOLDER
from tests.test_utils import UNORGANISED_RUNFOLDER


class TestGeneralProjectRepository(unittest.TestCase):
Expand Down Expand Up @@ -41,12 +41,12 @@ def setUp(self) -> None:
self.sample_repository = mock.create_autospec(RunfolderProjectBasedSampleRepository)
self.filesystem_service = mock.create_autospec(FileSystemService)
self.metadata_service = mock.create_autospec(MetadataService)
self.runfolder = UNORGANISED_RUNFOLDER
self.project_repository = UnorganisedRunfolderProjectRepository(
sample_repository=self.sample_repository,
readme_directory=README_DIRECTORY,
readme_directory=self.runfolder.path,
filesystem_service=self.filesystem_service,
metadata_service=self.metadata_service)
self.runfolder = UNORGANISED_RUNFOLDER

def test_get_report_files(self):

Expand Down

0 comments on commit 7acfe1a

Please sign in to comment.