diff --git a/.github/workflows/linting.yml b/.github/workflows/linting.yml index 24c8638..518991b 100644 --- a/.github/workflows/linting.yml +++ b/.github/workflows/linting.yml @@ -65,4 +65,4 @@ jobs: Thanks again for your contribution! repo-token: ${{ secrets.GITHUB_TOKEN }} - allow-repeats: false \ No newline at end of file + allow-repeats: false diff --git a/.nf-core.yml b/.nf-core.yml index b1a7f0e..3805dc8 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1 +1 @@ -repository_type: pipeline \ No newline at end of file +repository_type: pipeline diff --git a/LICENSE.txt b/LICENSE.txt index 14e2f77..d7fc69b 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0 means any form of the work other than Source Code Form. 1.7. "Larger Work" - means a work that combines Covered Software with other material, in + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. 1.8. "License" @@ -355,9 +355,9 @@ notice described in Exhibit B of this License must be attached. Exhibit A - Source Code Form License Notice ------------------------------------------- - This Source Code Form is subject to the terms of the Mozilla Public - License, v. 2.0. If a copy of the MPL was not distributed with this - file, You can obtain one at http://mozilla.org/MPL/2.0/. + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE @@ -369,5 +369,5 @@ You may add additional accurate notices of copyright ownership. Exhibit B - "Incompatible With Secondary Licenses" Notice --------------------------------------------------------- - This Source Code Form is "Incompatible With Secondary Licenses", as - defined by the Mozilla Public License, v. 2.0. + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/assets/schema_input.json b/assets/schema_input.json index 366d8ae..f0786ab 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -14,4 +14,4 @@ } } } -} \ No newline at end of file +} diff --git a/assets/test_run_ids.csv b/assets/test_run_ids.csv index 5d0cf4e..f1c3f1b 100644 --- a/assets/test_run_ids.csv +++ b/assets/test_run_ids.csv @@ -1,4 +1,4 @@ 4Bi5xBK6E2Nbhj 4LWT4uaXDaGcDY 38QXz4OfQDpwOV -2lXd1j7OwZVfxh \ No newline at end of file +2lXd1j7OwZVfxh diff --git a/bin/pipeline-gantt.py b/bin/pipeline-gantt.py index d80a0f3..e8cd2cb 100755 --- a/bin/pipeline-gantt.py +++ b/bin/pipeline-gantt.py @@ -1,61 +1,90 @@ #!/usr/bin/env python3 import json -import tarfile from datetime import datetime from typing import IO +from pathlib import Path import click import pandas as pd import plotly.express as px -def extract_instance(fusion_logs: str, lines: IO) -> str: - for i, line in enumerate(lines): - try: - log = json.loads(line) - if 'instance-id' in log: - return log['instance-id'] - except json.JSONDecodeError: - print(f"WARN: invalid JSON at '{fusion_logs}' line {i}") - return "" +def extract_instance(fusion_logs: Path) -> str: + with fusion_logs.open() as file: + for line_number, line in enumerate(file, start=1): + try: + log = json.loads(line) + if "instance-id" in log: + return log["instance-id"] + except json.JSONDecodeError: + print(f"WARN: invalid JSON at '{fusion_logs}' line {line_number}") + return "" @click.command() -@click.option('--title', default='Pipeline GANTT', help='Plot title.') -@click.option('--input-file', type=click.Path(), help='The pipeline dump tar.gz input file.') -@click.option('--output-file', type=click.Path(), help='The HTML output file') -def build_gantt(title: str, input_file: str, output_file: str): +@click.option("--title", default="Pipeline GANTT", help="Plot title.") +@click.option( + "--input-dir", type=click.Path(), help="The pipeline dump tar.gz input file." +) +@click.option("--output-file", type=click.Path(), help="The HTML output file") +def build_gantt(title: str, input_dir: str, output_file: str): tasks = [] instance_ids = {} - tar = tarfile.open(input_file, "r:gz") - for member in tar.getmembers(): - if member.name == "workflow-tasks.json": - tasks = json.load(tar.extractfile(member)) - if member.name.endswith(".fusion.log"): - _, task_id, _ = member.name.split('/') - instance_id = extract_instance(member.name, tar.extractfile(member)) - instance_ids[int(task_id)] = instance_id + for path in Path(input_dir).glob("workflow-tasks.json"): + with path.open() as json_file: + tasks = json.load(json_file) + for path in Path(input_dir).glob("**/.fusion.log"): + task_id = int(path.parent.name) + instance_id = extract_instance(path) + instance_ids[task_id] = instance_id for t in tasks: - t['instanceId'] = instance_ids.get(t['taskId'], "unknow") - - data = [{k: v for k, v in t.items() if k in ['taskId', 'name', 'start', 'complete', 'memory', 'cpus', 'machineType', 'instanceId']} for t in tasks] - df = pd.DataFrame({ - 'id': f"T{d['taskId']}", - 'name': d['name'], - 'size': f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB", - 'start': datetime.strptime(d['start'], '%Y-%m-%dT%H:%M:%SZ'), - 'complete': datetime.strptime(d['complete'], '%Y-%m-%dT%H:%M:%SZ'), - 'instance': f"{d['instanceId']} ({d['machineType']})" - } - for d in data - ) - - fig = px.timeline(df, title=title, x_start="start", x_end="complete", y="id", color="instance", text="name", pattern_shape="size") + t["instanceId"] = instance_ids.get(t["taskId"], "unknow") + + data = [ + { + k: v + for k, v in t.items() + if k + in [ + "taskId", + "name", + "start", + "complete", + "memory", + "cpus", + "machineType", + "instanceId", + ] + } + for t in tasks + ] + df = pd.DataFrame( + { + "id": f"T{d['taskId']}", + "name": d["name"], + "size": f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB", + "start": datetime.strptime(d["start"], "%Y-%m-%dT%H:%M:%SZ"), + "complete": datetime.strptime(d["complete"], "%Y-%m-%dT%H:%M:%SZ"), + "instance": f"{d['instanceId']} ({d['machineType']})", + } + for d in data + ) + + fig = px.timeline( + df, + title=title, + x_start="start", + x_end="complete", + y="id", + color="instance", + text="name", + pattern_shape="size", + ) fig.write_html(output_file) -if __name__ == '__main__': +if __name__ == "__main__": build_gantt() diff --git a/conf/modules.config b/conf/modules.config index 31408af..51b897c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -52,4 +52,4 @@ process { ] } -} \ No newline at end of file +} diff --git a/main.nf b/main.nf index 594d8b8..951b45c 100644 --- a/main.nf +++ b/main.nf @@ -62,4 +62,4 @@ workflow { ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ THE END ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -*/ \ No newline at end of file +*/ diff --git a/modules/local/pipeline_gantt/main.nf b/modules/local/pipeline_gantt/main.nf index cf24347..c95a961 100644 --- a/modules/local/pipeline_gantt/main.nf +++ b/modules/local/pipeline_gantt/main.nf @@ -17,7 +17,7 @@ process PIPELINE_GANTT { """ pipeline-gantt.py \\ --title "GANTT Plot for run: $run_id" \\ - --input-file $run_dump \\ + --input-dir $run_dump \\ --output-file ./${prefix}_gantt.html cat <<-END_VERSIONS > versions.yml @@ -28,4 +28,4 @@ process PIPELINE_GANTT { click: \$(python -c "import click; print(click.__version__)") END_VERSIONS """ -} \ No newline at end of file +} diff --git a/modules/local/seqera_runs_dump/main.nf b/modules/local/seqera_runs_dump/main.nf index 6fab1f5..dd510a9 100644 --- a/modules/local/seqera_runs_dump/main.nf +++ b/modules/local/seqera_runs_dump/main.nf @@ -34,7 +34,7 @@ process SEQERA_RUNS_DUMP { -xvf \\ ${prefix}_run_dump.tar.gz \\ -C ${prefix}_run_dump - + cp ${prefix}_run_dump/workflow.json . cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py index cecaa10..c1e49ec 100755 --- a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py +++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py @@ -8,6 +8,7 @@ import platform from textwrap import dedent + def main(): """Load all version files and generate merged output.""" versions_this_module = {} @@ -17,7 +18,9 @@ def main(): } with open("$versions") as f: - versions_by_process = yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + versions_by_process = ( + yaml.load(f, Loader=yaml.BaseLoader) | versions_this_module + ) # aggregate versions by the module name (derived from fully-qualified process name) versions_by_module = {} diff --git a/nextflow_schema.json b/nextflow_schema.json index 627d24d..42744af 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -27,7 +27,7 @@ "default": "community/showcase", "description": "Workspace on the Seqera Platform to fetch run information.", "fa_icon": "fas fa-folder-open" - }, + }, "outdir": { "type": "string", "format": "directory-path", diff --git a/tower.yml b/tower.yml index fe46247..d0d5df5 100644 --- a/tower.yml +++ b/tower.yml @@ -1,3 +1,3 @@ reports: multiqc_report.html: - display: "MultiQC HTML report" \ No newline at end of file + display: "MultiQC HTML report"