Skip to content

Commit

Permalink
Merge pull request #1 from robsyme/patch-1
Browse files Browse the repository at this point in the history
Update pipeline-gantt.py
  • Loading branch information
drpatelh authored Nov 7, 2023
2 parents 74e7968 + a65829c commit cea490d
Show file tree
Hide file tree
Showing 13 changed files with 88 additions and 56 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/linting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,4 +65,4 @@ jobs:
Thanks again for your contribution!
repo-token: ${{ secrets.GITHUB_TOKEN }}
allow-repeats: false
allow-repeats: false
2 changes: 1 addition & 1 deletion .nf-core.yml
Original file line number Diff line number Diff line change
@@ -1 +1 @@
repository_type: pipeline
repository_type: pipeline
12 changes: 6 additions & 6 deletions LICENSE.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ Mozilla Public License Version 2.0
means any form of the work other than Source Code Form.

1.7. "Larger Work"
means a work that combines Covered Software with other material, in
means a work that combines Covered Software with other material, in
a separate file or files, that is not Covered Software.

1.8. "License"
Expand Down Expand Up @@ -355,9 +355,9 @@ notice described in Exhibit B of this License must be attached.
Exhibit A - Source Code Form License Notice
-------------------------------------------

This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.
This Source Code Form is subject to the terms of the Mozilla Public
License, v. 2.0. If a copy of the MPL was not distributed with this
file, You can obtain one at http://mozilla.org/MPL/2.0/.

If it is not possible or desirable to put the notice in a particular
file, then You may include the notice in a location (such as a LICENSE
Expand All @@ -369,5 +369,5 @@ You may add additional accurate notices of copyright ownership.
Exhibit B - "Incompatible With Secondary Licenses" Notice
---------------------------------------------------------

This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.
This Source Code Form is "Incompatible With Secondary Licenses", as
defined by the Mozilla Public License, v. 2.0.
2 changes: 1 addition & 1 deletion assets/schema_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
}
}
}
}
}
2 changes: 1 addition & 1 deletion assets/test_run_ids.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
4Bi5xBK6E2Nbhj
4LWT4uaXDaGcDY
38QXz4OfQDpwOV
2lXd1j7OwZVfxh
2lXd1j7OwZVfxh
105 changes: 67 additions & 38 deletions bin/pipeline-gantt.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,90 @@
#!/usr/bin/env python3

import json
import tarfile
from datetime import datetime
from typing import IO
from pathlib import Path

import click
import pandas as pd
import plotly.express as px


def extract_instance(fusion_logs: str, lines: IO) -> str:
for i, line in enumerate(lines):
try:
log = json.loads(line)
if 'instance-id' in log:
return log['instance-id']
except json.JSONDecodeError:
print(f"WARN: invalid JSON at '{fusion_logs}' line {i}")
return ""
def extract_instance(fusion_logs: Path) -> str:
with fusion_logs.open() as file:
for line_number, line in enumerate(file, start=1):
try:
log = json.loads(line)
if "instance-id" in log:
return log["instance-id"]
except json.JSONDecodeError:
print(f"WARN: invalid JSON at '{fusion_logs}' line {line_number}")
return ""


@click.command()
@click.option('--title', default='Pipeline GANTT', help='Plot title.')
@click.option('--input-file', type=click.Path(), help='The pipeline dump tar.gz input file.')
@click.option('--output-file', type=click.Path(), help='The HTML output file')
def build_gantt(title: str, input_file: str, output_file: str):
@click.option("--title", default="Pipeline GANTT", help="Plot title.")
@click.option(
"--input-dir", type=click.Path(), help="The pipeline dump tar.gz input file."
)
@click.option("--output-file", type=click.Path(), help="The HTML output file")
def build_gantt(title: str, input_dir: str, output_file: str):
tasks = []
instance_ids = {}

tar = tarfile.open(input_file, "r:gz")
for member in tar.getmembers():
if member.name == "workflow-tasks.json":
tasks = json.load(tar.extractfile(member))
if member.name.endswith(".fusion.log"):
_, task_id, _ = member.name.split('/')
instance_id = extract_instance(member.name, tar.extractfile(member))
instance_ids[int(task_id)] = instance_id
for path in Path(input_dir).glob("workflow-tasks.json"):
with path.open() as json_file:
tasks = json.load(json_file)
for path in Path(input_dir).glob("**/.fusion.log"):
task_id = int(path.parent.name)
instance_id = extract_instance(path)
instance_ids[task_id] = instance_id

for t in tasks:
t['instanceId'] = instance_ids.get(t['taskId'], "unknow")

data = [{k: v for k, v in t.items() if k in ['taskId', 'name', 'start', 'complete', 'memory', 'cpus', 'machineType', 'instanceId']} for t in tasks]
df = pd.DataFrame({
'id': f"T{d['taskId']}",
'name': d['name'],
'size': f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB",
'start': datetime.strptime(d['start'], '%Y-%m-%dT%H:%M:%SZ'),
'complete': datetime.strptime(d['complete'], '%Y-%m-%dT%H:%M:%SZ'),
'instance': f"{d['instanceId']} ({d['machineType']})"
}
for d in data
)

fig = px.timeline(df, title=title, x_start="start", x_end="complete", y="id", color="instance", text="name", pattern_shape="size")
t["instanceId"] = instance_ids.get(t["taskId"], "unknow")

data = [
{
k: v
for k, v in t.items()
if k
in [
"taskId",
"name",
"start",
"complete",
"memory",
"cpus",
"machineType",
"instanceId",
]
}
for t in tasks
]
df = pd.DataFrame(
{
"id": f"T{d['taskId']}",
"name": d["name"],
"size": f"{d['cpus']}c_{d['memory'] / 1024 ** 3:.0f}GB",
"start": datetime.strptime(d["start"], "%Y-%m-%dT%H:%M:%SZ"),
"complete": datetime.strptime(d["complete"], "%Y-%m-%dT%H:%M:%SZ"),
"instance": f"{d['instanceId']} ({d['machineType']})",
}
for d in data
)

fig = px.timeline(
df,
title=title,
x_start="start",
x_end="complete",
y="id",
color="instance",
text="name",
pattern_shape="size",
)
fig.write_html(output_file)


if __name__ == '__main__':
if __name__ == "__main__":
build_gantt()
2 changes: 1 addition & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,4 @@ process {
]
}

}
}
2 changes: 1 addition & 1 deletion main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -62,4 +62,4 @@ workflow {
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
*/
4 changes: 2 additions & 2 deletions modules/local/pipeline_gantt/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ process PIPELINE_GANTT {
"""
pipeline-gantt.py \\
--title "GANTT Plot for run: $run_id" \\
--input-file $run_dump \\
--input-dir $run_dump \\
--output-file ./${prefix}_gantt.html
cat <<-END_VERSIONS > versions.yml
Expand All @@ -28,4 +28,4 @@ process PIPELINE_GANTT {
click: \$(python -c "import click; print(click.__version__)")
END_VERSIONS
"""
}
}
2 changes: 1 addition & 1 deletion modules/local/seqera_runs_dump/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ process SEQERA_RUNS_DUMP {
-xvf \\
${prefix}_run_dump.tar.gz \\
-C ${prefix}_run_dump
cp ${prefix}_run_dump/workflow.json .
cat <<-END_VERSIONS > versions.yml
Expand Down

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"default": "community/showcase",
"description": "Workspace on the Seqera Platform to fetch run information.",
"fa_icon": "fas fa-folder-open"
},
},
"outdir": {
"type": "string",
"format": "directory-path",
Expand Down
2 changes: 1 addition & 1 deletion tower.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
reports:
multiqc_report.html:
display: "MultiQC HTML report"
display: "MultiQC HTML report"

0 comments on commit cea490d

Please sign in to comment.