Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add py script that generates master html #2

Merged
merged 10 commits into from
Jan 23, 2025
72 changes: 72 additions & 0 deletions assets/master_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>16S Samples Report</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
<div class="container my-5">
<div class="card">
<div class="card-header text-white bg-primary">
<h2 class="card-title mb-0">Sample Report</h2>
</div>
<div class="card-body">
<div class="table-responsive">
<table class="table table-bordered table-striped table-hover">
<thead class="table-success">
<tr>
<th rowspan="2">Sample ID</th>
<th colspan="1" class="text-center">Results</th>
<th colspan="1" class="text-center">QC</th>
<th colspan="8" class="text-center">NanoPlot</th>
<th colspan="3" class="text-center">Pipeline</th>
</tr>
<tr>
<th class="text-center">Krona</th>
<th class="text-center">MultiQC Report</th>
<th class="text-center">Report</th>
<th class="text-center">Length vs Quality Scatter (Dot)</th>
<th class="text-center">Length vs Quality Scatter (KDE)</th>
<th class="text-center">Non-weighted Histogram</th>
<th class="text-center">Non-weighted Log-transformed Histogram</th>
<th class="text-center">Weighted Histogram</th>
<th class="text-center">Weighted Log-transformed Histogram</th>
<th class="text-center">Yield by Length</th>
<th class="text-center">Execution Report</th>
<th class="text-center">Execution Timeline</th>
<th class="text-center">DAG</th>
</tr>
</thead>
<tbody>
{% for sample_id in sample_ids %}
<tr>
<td>{{ sample_id }}</td>
<td><a href="./krona/{{ sample_id }}_T1_krona.html">Krona</a></td>
<td><a href="./multiqc/multiqc_report.html">MultiQC</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_dot.html">Dot Scatter Plot</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_kde.html">KDE Scatter Plot</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNanoPlot-report.html">NanoPlot Report</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedHistogramReadlength.html">Non-weighted Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedLogTransformed_HistogramReadlength.html">Non-weighted Log-transformed Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedHistogramReadlength.html">Weighted Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedLogTransformed_HistogramReadlength.html">Weighted Log-transformed Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedYield_By_Length.html">Yield by Length</a></td>
<td><a href="./pipeline_info/execution_report_{{ date_id }}.html">Execution Report</a></td>
<td><a href="./pipeline_info/execution_timeline_{{ date_id }}.html">Execution Timeline</a></td>
<td><a href="./pipeline_info/pipeline_dag_{{ date_id }}.html">Pipeline DAG</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="card-footer text-muted">
Sequenced on {{ seqrun_date }}
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
123 changes: 123 additions & 0 deletions bin/generate_master_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python

"""Generate a master html template."""

import os
import re
import argparse
import pandas as pd
from jinja2 import Template
from datetime import datetime

description = '''
------------------------
Title: generate_master_html.py
Date: 2024-12-16
Author(s): Ryan Kennedy
------------------------
Description:
This script creates master html file that points to all html files that were outputted from EMU.
List of functions:
get_sample_ids, generate_master_html.
List of standard modules:
csv, os, argparse.
List of "non standard" modules:
pandas, jinja2.
Procedure:
1. Get sample IDs by parsing samplesheet csv.
2. Render html using template.
3. Write out master.html file.
-----------------------------------------------------------------------------------------------------------
'''

usage = '''
-----------------------------------------------------------------------------------------------------------
Generates master html file that points to all html files.
Executed using: python3 ./generate_master_html.py -i <Input_Directory> -o <Output_Filepath>
-----------------------------------------------------------------------------------------------------------
'''

parser = argparse.ArgumentParser(
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=usage
)
parser.add_argument(
'-v', '--version',
action='version',
version='%(prog)s 0.0.1'
)
parser.add_argument(
'-c', '--csv',
help='input samplesheet csv filepath',
metavar='SAMPLESHEET_CSV_FILEPATH',
dest='csv',
required=True
)
parser.add_argument(
'-m', '--html',
help='input master html template filepath',
metavar='MASTER_HTML_TEMPLATE_FILEPATH',
dest='html',
required=True
)

args = parser.parse_args()

def get_date_id(samplesheet_csv_fpath):
date_ids = []
parent_dir = os.path.dirname(samplesheet_csv_fpath)
pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info')
for filename in os.listdir(pipeline_info_dir):
if filename.startswith("execution_report"):
execution_report_fpath = os.path.join(pipeline_info_dir, filename)
date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)')
date_ids.append(date_id)
date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids))
date_id_zipped = list(zip(date_ids, date_list))
sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)]
return sorted_date_ids[0]

def find_date_in_string(input_string, date_pattern):
"""Searches for a date within a given string."""
date = ""
match = re.search(date_pattern, input_string)
if match:
date_regex = match.group(1)
if len(date_regex) == 8:
date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y")
elif len(date_regex) > 8:
date = date_regex
else:
date = "(No date found)"
return date

def get_sample_ids(samplesheet_csv):
"""Get sample id from csv."""
df = pd.read_csv(samplesheet_csv)
sample_ids = df['sample'].tolist()
return sample_ids

def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id):
"""Read the template from an HTML file."""
with open(template_html_fpath, "r") as file:
master_template = file.read()
template = Template(master_template)
rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id)
return rendered_html

def main():
sample_ids = get_sample_ids(args.csv)
seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_')
date_id = get_date_id(args.csv)
rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id)
with open("master.html", "w") as fout:
fout.write(rendered_html)

if __name__ == "__main__":
main()
27 changes: 27 additions & 0 deletions conf/cmd.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/gmsemu -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
process.executor = 'slurm'
process.queue = 'low'
config_profile_name = 'cmd profile'
config_profile_description = 'CMD High performance profile'

// Databases
db = '/fs1/pipelines/gms_16S-dev/assets/databases/emu_database'

// Limit resources so that this can run on GitHub Actions
max_cpus = 60
max_memory = '300.GB'
max_time = '48.h'

}
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ process {
]
}

withName: GENERATE_MASTER_HTML {
publishDir = [
path: { "${params.outdir}/" },
mode: params.publish_dir_mode,
pattern: 'master.html'
]
}

withName: NANOPLOT1 {
publishDir = [
path: { "${params.outdir}/nanoplot" },
Expand Down
19 changes: 19 additions & 0 deletions modules/local/generate_master_html/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process GENERATE_MASTER_HTML {
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "pyhdfd78af_1") must be EXCLUDED to support installation on different operating systems.
conda "conda-forge::nf-core=3.0.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/nf-core:3.0.2--pyhdfd78af_1':
'quay.io/biocontainers/nf-core:3.0.2' }"

input:
path csv

output:
path 'master.html', emit: master_html

script:
"""
generate_master_html.py --csv $csv --html $params.master_template
"""
}
54 changes: 54 additions & 0 deletions modules/local/generate_master_html/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: "emu_abundance"
## TODO nf-core: Add a description of the module and list keywords
description: A taxonomic profiler for metagenomic 16S data optimized for error prone long reads.
keywords:
- Metagenomics
- 16S
- Nanopore

tools:
- "emu":
## TODO nf-core: Add a description and other details for the software below
description: "Emu is a relative abundance estimator for 16s genomic data."
homepage: "https://gitlab.com/treangenlab/emu"
documentation: "https://gitlab.com/treangenlab/emu"
tool_dev_url: "None"
doi: "https://doi.org/10.1038/s41592-022-01520-4"
licence: "['MIT']"

## TODO nf-core: Add a description of all of the variables used as input
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
#
## TODO nf-core: Delete / customise this example input
- reads:
type: file
description: fastq.gz file containing metagenomic 16S data
pattern: "*.{fastq.gz}"

## TODO nf-core: Add a description of all of the variables used as output
output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
#
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
## TODO nf-core: Delete / customise this example output
- report:
type: file
description: Report (tsv file) over detected species and estimated number of reads and relative abundance
pattern: "*{.tsv}"

authors:
- "@ryanjameskennedy"
16 changes: 8 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params {
input = null
db = null

// reads = null
// reads = null
seqtype = "map-ont"
min_abundance = 0.0001
minimap_max_alignments = 50
Expand All @@ -22,20 +22,20 @@ params {
keep_files = false
output_unclassified = true

// master html
master_template = "$projectDir/assets/master_template.html"

//
// porechop_abi
adapter_trimming = false
// porechop_abi
adapter_trimming = false

//
// filtlong filtering
// filtlong filtering
quality_filtering = true
longread_qc_qualityfilter_minlength = 1200
longread_qc_qualityfilter_maxlength = 1800
longread_qc_qualityfilter_min_mean_q = 94

//Save the trimmed reads
save_preprocessed_reads = false
save_preprocessed_reads = false

// krona
run_krona = true
Expand Down Expand Up @@ -173,7 +173,7 @@ profiles {
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
full { includeConfig 'conf/full.config' }

cmd { includeConfig 'conf/cmd.config' }
}


Expand Down
9 changes: 5 additions & 4 deletions workflows/gmsemu.nf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf'
include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf'
include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf'
include { GENERATE_MASTER_HTML } from '../modules/local/generate_master_html/main.nf'
//include { FALCO } from '../modules/nf-core/falco/main.nf'
include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf'
include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf'
Expand Down Expand Up @@ -93,19 +94,19 @@ workflow GMSEMU {


if ( params.merge_fastq_pass && !params.barcodes_samplesheet) {
MERGE_BARCODES (params.merge_fastq_pass)
MERGE_BARCODES(params.merge_fastq_pass)
//GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged"))
GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged)
// ch_input = file(params.outdir + 'samplesheet_merged.csv')
ch_input = GENERATE_INPUT.out.sample_sheet_merged
} else if ( params.merge_fastq_pass && params.barcodes_samplesheet) {
MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass)
MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass)
// merged_files = (params.outdir + '/fastq_pass_merged')
GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
ch_input = GENERATE_INPUT.out.sample_sheet_merged
}


GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged)


//
Expand Down