Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature/image support #100

Merged
merged 2 commits into from
Dec 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/sbpack.egg-info/
/.idea/
/venv/
/.nextflow/
/.pytest_cache/
7 changes: 3 additions & 4 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
ruamel.yaml >= 0.16
sevenbridges-python >= 2.0
nf-core==2.1
wrabbit==0.2.4
cwlformat
packaging
nf-core == 2.1
wrabbit == 0.3.0
pillow >= 11.0.0
8 changes: 4 additions & 4 deletions sbpack/noncwl/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ Given the contents of this sample sheet is:

Remapped file will be:

| sample | fastq_1 | fastq_2 | strandedness |
|:--------|:------------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
| SAMPLE1 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse |
| SAMPLE2 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse |
| sample | fastq_1 | fastq_2 | strandedness |
|:--------|:-----------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
| SAMPLE1 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse |
| SAMPLE2 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse |
185 changes: 116 additions & 69 deletions sbpack/noncwl/manifest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from sevenbridges.models.project import Project
from sevenbridges import Api
from sevenbridges.errors import NotFound, Forbidden

import logging
import sbpack.lib as lib
import argparse
import os
import re


logger = logging.getLogger(__name__)
Expand All @@ -14,10 +16,11 @@
def paths_to_check(file_name: str) -> list:
"""
:param file_name: Contents of a single manifest file cell that contains
path(s) to files.
path(s) to files. Can be multiple files if separated with ";".
:return: Files that need to be checked
"""
chk = []
rtrn = []
to_check = []

if ";" in file_name:
# This should handle the case when there are multiple files in the
Expand All @@ -29,40 +32,41 @@ def paths_to_check(file_name: str) -> list:
chk.append(file_name)

for file_name in chk:
if ":" in file_name:
if "://" in file_name:
# If a file is in cloud storage, skip it
continue

file_name = file_name.strip('/')
rtrn.append(file_name)
to_check.append(file_name)
cur_path = file_name
while os.path.dirname(cur_path):
cur_path = os.path.dirname(cur_path)
rtrn.append(cur_path)
to_check.append(cur_path)

return rtrn
return to_check


def get_path_from_id(api: Api, file: str) -> str:
"""
Extracts the full path of a file from ID
:param api: Initialized SevenBridges API
:param file: id of a file
:return: Path to the File
:return: Path to the File on vs://
"""
file = api.files.get(file)
temp = file
full_path = [file.name]

project_root = api.projects.get(file.project)
project = api.projects.get(file.project)
project_root = api.files.get(project.root_folder)
project_root_name = api.files.get(project_root).name

while temp.parent != project_root:
while temp.parent != project_root.id:
temp = api.files.get(temp.parent)
full_path.append(temp.name)

full_path.append(project_root_name)
return "vs:///Projects/" + "/".join(full_path[::-1])
return "vs://Projects/" + "/".join(full_path[::-1])


def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
Expand All @@ -71,7 +75,7 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
:param api: Initialized SevenBridges API
:param file_name: Name of the file
:param project: SevenBridges Project
:return:
:return: Path to the File on vs://
"""

file = api.files.query(project=project, names=[file_name])
Expand All @@ -83,25 +87,52 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
)


def remap_cell(project_root: str, path: str) -> str:
def try_to_get_file(api, id_):
"""
Tries to get a file through the SevenBridges API
:param api: SevenBridges API
:param id_: File ID on the SevenBridges Platform
:return: File object if found, else None
"""
try:
return api.files.get(id_)
except NotFound:
return None
except Forbidden:
return None
except Exception as e:
return None


def remap_cell(api, project_root: str, path: str) -> str:
"""
Remaps a file path to the 'vs:' file system.

Supports multiple files separated with ';'.

:param api: SebenBridges API
:param project_root: Name of the project root directory.
:param path: File path.
:return: File path(s) prefixed with 'vs:///Projects/' and project_root.
:return: File path(s) prefixed with 'vs://Projects/' and project_root.
"""
# prefix it with the project root
if ";" in path:
return ";".join([remap_cell(project_root, f) for f in path.split(";")])
return ";".join([
remap_cell(api, project_root, f) for f in path.split(";")])

if path and ":" not in path:
while path.startswith('/'):
path = path[1:]
if path:
return f"vs:///Projects/{project_root}/{path}"
remapped_path = None
if re.match(r'[a-f0-9]{24}', path):
# file ids are MongoDB Object IDs
remapped_path = try_to_get_file(api, path)

if remapped_path:
return remapped_path
else:
return f"vs://Projects/{project_root}/{path}"
else:
return path

Expand Down Expand Up @@ -181,19 +212,30 @@ def validate_sheet(
if os.path.dirname(path):
parent = checked[os.path.dirname(path)]

file = api.files.query(
names=[basename],
project=project if not parent else None,
parent=parent)
file = None
if re.match(r'[a-f0-9]{24}', path):
# file ids are MongoDB Object IDs
file = try_to_get_file(api, path)

if file is None:
file = api.files.query(
names=[basename],
project=project if not parent else None,
parent=parent
)
if file:
file = file[0]

if file:
checked[path] = file[0]
checked[path] = file
else:
raise FileExistsError(
f"File <{path}> does not exist within "
f"project <{project}>")


def remap(
api,
project_root: str,
path_to_file: str,
remap_columns: list,
Expand All @@ -209,6 +251,7 @@ def remap(

The function assumes that the first row is always the header.

:param api: SevenBridges API
:param project_root: Name of the project root directory.
:param path_to_file: Path to the manifest file.
:param remap_columns: Names of manifest file columns that contain paths to
Expand Down Expand Up @@ -245,67 +288,21 @@ def remap(
if line:
line = line.strip('\n').split(split_char)
for i in indices:
line[i] = remap_cell(project_root, line[i])
line[i] = remap_cell(api, project_root, line[i])
line = split_char.join(line)
sheet.append(line)

return "\n".join(sheet)


def main():
# CLI parameters
parser = argparse.ArgumentParser()
parser.add_argument(
"--profile", required=False,
default="default", type=str,
help="SB platform profile as set in the SB API credentials file.",
)
parser.add_argument(
"--projectid", required=True,
type=str,
help="Takes the form {user or division}/{project}.",
)
parser.add_argument(
"--sample-sheet", required=True,
type=str,
help="Path to the sample sheet."
)
parser.add_argument(
"--columns", required=True,
metavar='string', nargs='+', type=str,
help="Specify columns that contain paths to files on the platform"
"as a list of strings separated by spaces.",
)
parser.add_argument(
"--output", '-o', required=False,
type=str,
help="Name of the output file.",
)
parser.add_argument(
"--upload", action='store_true', required=False,
help="Upload the file to the project after making it.",
)
parser.add_argument(
"--tags", required=False,
metavar='string', nargs='+', type=str,
help="Specify tags that you want the sample sheet to have on the "
"platform, after it is uploaded.",
)
parser.add_argument(
"--validate", action='store_true', required=False,
help="Validate if each file exists on target project location.",
)

args = parser.parse_args()

def make_manifest(api, args):
project = args.projectid
api = lib.get_profile(args.profile)

project = api.projects.get(project)
project_root = api.files.get(project.root_folder).name

logger.info('Remapping manifest files.')
sheet = remap(
api,
project_root,
args.sample_sheet,
args.columns
Expand Down Expand Up @@ -366,5 +363,55 @@ def main():
file.save()


def main():
# CLI parameters
parser = argparse.ArgumentParser()
parser.add_argument(
"--profile", required=False,
default="default", type=str,
help="SB platform profile as set in the SB API credentials file.",
)
parser.add_argument(
"--projectid", required=True,
type=str,
help="Takes the form {user or division}/{project}.",
)
parser.add_argument(
"--sample-sheet", required=True,
type=str,
help="Path to the sample sheet."
)
parser.add_argument(
"--columns", required=True,
metavar='string', nargs='+', type=str,
help="Specify columns that contain paths to files on the platform"
"as a list of strings separated by spaces.",
)
parser.add_argument(
"--output", '-o', required=False,
type=str,
help="Name of the output file.",
)
parser.add_argument(
"--upload", action='store_true', required=False,
help="Upload the file to the project after making it.",
)
parser.add_argument(
"--tags", required=False,
metavar='string', nargs='+', type=str,
help="Specify tags that you want the sample sheet to have on the "
"platform, after it is uploaded.",
)
parser.add_argument(
"--validate", action='store_true', required=False,
help="Validate if each file exists on target project location.",
)

args = parser.parse_args()

api = lib.get_profile(args.profile)
make_manifest(api, args)


if __name__ == "__main__":
main()
Loading
Loading