Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update pipeline-gantt.py #1

Merged
merged 2 commits into from
Nov 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ jobs:

Thanks again for your contribution!
repo-token: ${{ secrets.GITHUB_TOKEN }}
allow-repeats: false
allow-repeats: false
2 changes: 1 addition & 1 deletion .nf-core.yml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
repository_type: pipeline
repository_type: pipeline
12 changes: 6 additions & 6 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0
means any form of the work other than Source Code Form.

1.7. "Larger Work"
means a work that combines Covered Software with other material, in
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.

1.8. "License"
Expand Down Expand Up @@ -355,9 +355,9 @@ notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------

This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.

If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
Expand All @@ -369,5 +369,5 @@ You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------

This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.
2 changes: 1 addition & 1 deletion assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
}
}
}
}
}
2 changes: 1 addition & 1 deletion assets/test_run_ids.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
4Bi5xBK6E2Nbhj
4LWT4uaXDaGcDY
38QXz4OfQDpwOV
2lXd1j7OwZVfxh
2lXd1j7OwZVfxh
105 changes: 67 additions & 38 deletions bin/pipeline-gantt.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,90 @@
#!/usr/bin/env python3

import json
import tarfile
from datetime import datetime
from typing import IO
from pathlib import Path

import click
import pandas as pd
import plotly.express as px


def extract_instance(fusion_logs: str, lines: IO) -> str:
for i, line in enumerate(lines):
try:
log = json.loads(line)
if 'instance-id' in log:
return log['instance-id']
except json.JSONDecodeError:
print(f"WARN: invalid JSON at '{fusion_logs}' line {i}")
return ""
def extract_instance(fusion_logs: Path) -> str:
with fusion_logs.open() as file:
for line_number, line in enumerate(file, start=1):
try:
log = json.loads(line)
if "instance-id" in log:
return log["instance-id"]
except json.JSONDecodeError:
print(f"WARN: invalid JSON at '{fusion_logs}' line {line_number}")
return ""


@click.command()
@click.option('--title', default='Pipeline GANTT', help='Plot title.')
@click.option('--input-file', type=click.Path(), help='The pipeline dump tar.gz input file.')
@click.option('--output-file', type=click.Path(), help='The HTML output file')
def build_gantt(title: str, input_file: str, output_file: str):
@click.option("--title", default="Pipeline GANTT", help="Plot title.")
@click.option(
"--input-dir", type=click.Path(), help="The pipeline dump tar.gz input file."
)
@click.option("--output-file", type=click.Path(), help="The HTML output file")
def build_gantt(title: str, input_dir: str, output_file: str):
tasks = []
instance_ids = {}

tar = tarfile.open(input_file, "r:gz")
for member in tar.getmembers():
if member.name == "workflow-tasks.json":
tasks = json.load(tar.extractfile(member))
if member.name.endswith(".fusion.log"):
_, task_id, _ = member.name.split('/')
instance_id = extract_instance(member.name, tar.extractfile(member))
instance_ids[int(task_id)] = instance_id
for path in Path(input_dir).glob("workflow-tasks.json"):
with path.open() as json_file:
tasks = json.load(json_file)
for path in Path(input_dir).glob("**/.fusion.log"):
task_id = int(path.parent.name)
instance_id = extract_instance(path)
instance_ids[task_id] = instance_id

for t in tasks:
t['instanceId'] = instance_ids.get(t['taskId'], "unknow")

data = [{k: v for k, v in t.items() if k in ['taskId', 'name', 'start', 'complete', 'memory', 'cpus', 'machineType', 'instanceId']} for t in tasks]
df = pd.DataFrame({
'id': f"T{d['taskId']}",
'name': d['name'],
'size': f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB",
'start': datetime.strptime(d['start'], '%Y-%m-%dT%H:%M:%SZ'),
'complete': datetime.strptime(d['complete'], '%Y-%m-%dT%H:%M:%SZ'),
'instance': f"{d['instanceId']} ({d['machineType']})"
}
for d in data
)

fig = px.timeline(df, title=title, x_start="start", x_end="complete", y="id", color="instance", text="name", pattern_shape="size")
t["instanceId"] = instance_ids.get(t["taskId"], "unknow")

data = [
{
k: v
for k, v in t.items()
if k
in [
"taskId",
"name",
"start",
"complete",
"memory",
"cpus",
"machineType",
"instanceId",
]
}
for t in tasks
]
df = pd.DataFrame(
{
"id": f"T{d['taskId']}",
"name": d["name"],
"size": f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB",
"start": datetime.strptime(d["start"], "%Y-%m-%dT%H:%M:%SZ"),
"complete": datetime.strptime(d["complete"], "%Y-%m-%dT%H:%M:%SZ"),
"instance": f"{d['instanceId']} ({d['machineType']})",
}
for d in data
)

fig = px.timeline(
df,
title=title,
x_start="start",
x_end="complete",
y="id",
color="instance",
text="name",
pattern_shape="size",
)
fig.write_html(output_file)


if __name__ == '__main__':
if __name__ == "__main__":
build_gantt()
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ process {
]
}

}
}
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ workflow {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
*/
4 changes: 2 additions & 2 deletions modules/local/pipeline_gantt/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process PIPELINE_GANTT {
"""
pipeline-gantt.py \\
--title "GANTT Plot for run: $run_id" \\
--input-file $run_dump \\
--input-dir $run_dump \\
--output-file ./${prefix}_gantt.html

cat <<-END_VERSIONS > versions.yml
Expand All @@ -28,4 +28,4 @@ process PIPELINE_GANTT {
click: \$(python -c "import click; print(click.__version__)")
END_VERSIONS
"""
}
}
2 changes: 1 addition & 1 deletion modules/local/seqera_runs_dump/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process SEQERA_RUNS_DUMP {
-xvf \\
${prefix}_run_dump.tar.gz \\
-C ${prefix}_run_dump

cp ${prefix}_run_dump/workflow.json .

cat <<-END_VERSIONS > versions.yml
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"default": "community/showcase",
"description": "Workspace on the Seqera Platform to fetch run information.",
"fa_icon": "fas fa-folder-open"
},
},
"outdir": {
"type": "string",
"format": "directory-path",
Expand Down
2 changes: 1 addition & 1 deletion tower.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
reports:
multiqc_report.html:
display: "MultiQC HTML report"
display: "MultiQC HTML report"