Skip to content

Commit

Permalink
Merge pull request #2 from SMD-Bioinformatics-Lund/35-generate-master…
Browse files Browse the repository at this point in the history
…-html

Add py script that generates master html
  • Loading branch information
ryanjameskennedy authored Jan 23, 2025
2 parents eddb78d + a4bfa1c commit eba0557
Show file tree
Hide file tree
Showing 8 changed files with 316 additions and 12 deletions.
72 changes: 72 additions & 0 deletions assets/master_template.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>16S Samples Report</title>
<link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
</head>
<body>
<div class="container my-5">
<div class="card">
<div class="card-header text-white bg-primary">
<h2 class="card-title mb-0">Sample Report</h2>
</div>
<div class="card-body">
<div class="table-responsive">
<table class="table table-bordered table-striped table-hover">
<thead class="table-success">
<tr>
<th rowspan="2">Sample ID</th>
<th colspan="1" class="text-center">Results</th>
<th colspan="1" class="text-center">QC</th>
<th colspan="8" class="text-center">NanoPlot</th>
<th colspan="3" class="text-center">Pipeline</th>
</tr>
<tr>
<th class="text-center">Krona</th>
<th class="text-center">MultiQC Report</th>
<th class="text-center">Report</th>
<th class="text-center">Length vs Quality Scatter (Dot)</th>
<th class="text-center">Length vs Quality Scatter (KDE)</th>
<th class="text-center">Non-weighted Histogram</th>
<th class="text-center">Non-weighted Log-transformed Histogram</th>
<th class="text-center">Weighted Histogram</th>
<th class="text-center">Weighted Log-transformed Histogram</th>
<th class="text-center">Yield by Length</th>
<th class="text-center">Execution Report</th>
<th class="text-center">Execution Timeline</th>
<th class="text-center">DAG</th>
</tr>
</thead>
<tbody>
{% for sample_id in sample_ids %}
<tr>
<td>{{ sample_id }}</td>
<td><a href="./krona/{{ sample_id }}_T1_krona.html">Krona</a></td>
<td><a href="./multiqc/multiqc_report.html">MultiQC</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_dot.html">Dot Scatter Plot</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedLengthvsQualityScatterPlot_kde.html">KDE Scatter Plot</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNanoPlot-report.html">NanoPlot Report</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedHistogramReadlength.html">Non-weighted Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedNon_weightedLogTransformed_HistogramReadlength.html">Non-weighted Log-transformed Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedHistogramReadlength.html">Weighted Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedWeightedLogTransformed_HistogramReadlength.html">Weighted Log-transformed Histogram</a></td>
<td><a href="./nanoplot/{{ sample_id }}_T1_nanoplot_unprocessedYield_By_Length.html">Yield by Length</a></td>
<td><a href="./pipeline_info/execution_report_{{ date_id }}.html">Execution Report</a></td>
<td><a href="./pipeline_info/execution_timeline_{{ date_id }}.html">Execution Timeline</a></td>
<td><a href="./pipeline_info/pipeline_dag_{{ date_id }}.html">Pipeline DAG</a></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
</div>
<div class="card-footer text-muted">
Sequenced on {{ seqrun_date }}
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/js/bootstrap.bundle.min.js"></script>
</body>
</html>
123 changes: 123 additions & 0 deletions bin/generate_master_html.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python

"""Generate a master html template."""

import os
import re
import argparse
import pandas as pd
from jinja2 import Template
from datetime import datetime

description = '''
------------------------
Title: generate_master_html.py
Date: 2024-12-16
Author(s): Ryan Kennedy
------------------------
Description:
This script creates master html file that points to all html files that were outputted from EMU.
List of functions:
get_sample_ids, generate_master_html.
List of standard modules:
csv, os, argparse.
List of "non standard" modules:
pandas, jinja2.
Procedure:
1. Get sample IDs by parsing samplesheet csv.
2. Render html using template.
3. Write out master.html file.
-----------------------------------------------------------------------------------------------------------
'''

usage = '''
-----------------------------------------------------------------------------------------------------------
Generates master html file that points to all html files.
Executed using: python3 ./generate_master_html.py -i <Input_Directory> -o <Output_Filepath>
-----------------------------------------------------------------------------------------------------------
'''

parser = argparse.ArgumentParser(
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=usage
)
parser.add_argument(
'-v', '--version',
action='version',
version='%(prog)s 0.0.1'
)
parser.add_argument(
'-c', '--csv',
help='input samplesheet csv filepath',
metavar='SAMPLESHEET_CSV_FILEPATH',
dest='csv',
required=True
)
parser.add_argument(
'-m', '--html',
help='input master html template filepath',
metavar='MASTER_HTML_TEMPLATE_FILEPATH',
dest='html',
required=True
)

args = parser.parse_args()

def get_date_id(samplesheet_csv_fpath):
date_ids = []
parent_dir = os.path.dirname(samplesheet_csv_fpath)
pipeline_info_dir = os.path.join(parent_dir, 'pipeline_info')
for filename in os.listdir(pipeline_info_dir):
if filename.startswith("execution_report"):
execution_report_fpath = os.path.join(pipeline_info_dir, filename)
date_id = find_date_in_string(execution_report_fpath, r'(\d{4}-\d{2}-\d{2}[^.]+)')
date_ids.append(date_id)
date_list = map(find_date_in_string, date_ids, [r'\b(\d{4}-\d{2}-\d{2})']*len(date_ids))
date_id_zipped = list(zip(date_ids, date_list))
sorted_date_ids = [date_id[0] for date_id in sorted(date_id_zipped, key=lambda date: datetime.strptime(date[1], "%Y-%m-%d"), reverse=True)]
return sorted_date_ids[0]

def find_date_in_string(input_string, date_pattern):
"""Searches for a date within a given string."""
date = ""
match = re.search(date_pattern, input_string)
if match:
date_regex = match.group(1)
if len(date_regex) == 8:
date = datetime.strptime(date_regex, "%Y%m%d").strftime("%d-%m-%Y")
elif len(date_regex) > 8:
date = date_regex
else:
date = "(No date found)"
return date

def get_sample_ids(samplesheet_csv):
"""Get sample id from csv."""
df = pd.read_csv(samplesheet_csv)
sample_ids = df['sample'].tolist()
return sample_ids

def generate_master_html(template_html_fpath, sample_ids, seqrun_date, date_id):
"""Read the template from an HTML file."""
with open(template_html_fpath, "r") as file:
master_template = file.read()
template = Template(master_template)
rendered_html = template.render(sample_ids=sample_ids, seqrun_date=seqrun_date, date_id=date_id)
return rendered_html

def main():
sample_ids = get_sample_ids(args.csv)
seqrun_date = find_date_in_string(args.csv, r'/(\d{8})_')
date_id = get_date_id(args.csv)
rendered_html = generate_master_html(args.html, sample_ids, seqrun_date, date_id)
with open("master.html", "w") as fout:
fout.write(rendered_html)

if __name__ == "__main__":
main()
27 changes: 27 additions & 0 deletions conf/cmd.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.

Use as follows:
nextflow run nf-core/gmsemu -profile test,<docker/singularity> --outdir <OUTDIR>

----------------------------------------------------------------------------------------
*/

params {
process.executor = 'slurm'
process.queue = 'low'
config_profile_name = 'cmd profile'
config_profile_description = 'CMD High performance profile'

// Databases
db = '/fs1/pipelines/gms_16S-dev/assets/databases/emu_database'

// Limit resources so that this can run on GitHub Actions
max_cpus = 60
max_memory = '300.GB'
max_time = '48.h'

}
8 changes: 8 additions & 0 deletions conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,14 @@ process {
]
}

withName: GENERATE_MASTER_HTML {
publishDir = [
path: { "${params.outdir}/" },
mode: params.publish_dir_mode,
pattern: 'master.html'
]
}

withName: NANOPLOT1 {
publishDir = [
path: { "${params.outdir}/nanoplot" },
Expand Down
19 changes: 19 additions & 0 deletions modules/local/generate_master_html/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
process GENERATE_MASTER_HTML {
// Software MUST be pinned to channel (i.e. "bioconda"), version (i.e. "1.10").
// For Conda, the build (i.e. "pyhdfd78af_1") must be EXCLUDED to support installation on different operating systems.
conda "conda-forge::nf-core=3.0.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/nf-core:3.0.2--pyhdfd78af_1':
'quay.io/biocontainers/nf-core:3.0.2' }"

input:
path csv

output:
path 'master.html', emit: master_html

script:
"""
generate_master_html.py --csv $csv --html $params.master_template
"""
}
54 changes: 54 additions & 0 deletions modules/local/generate_master_html/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
name: "emu_abundance"
## TODO nf-core: Add a description of the module and list keywords
description: A taxonomic profiler for metagenomic 16S data optimized for error prone long reads.
keywords:
- Metagenomics
- 16S
- Nanopore

tools:
- "emu":
## TODO nf-core: Add a description and other details for the software below
description: "Emu is a relative abundance estimator for 16s genomic data."
homepage: "https://gitlab.com/treangenlab/emu"
documentation: "https://gitlab.com/treangenlab/emu"
tool_dev_url: "None"
doi: "https://doi.org/10.1038/s41592-022-01520-4"
licence: "['MIT']"

## TODO nf-core: Add a description of all of the variables used as input
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
#
## TODO nf-core: Delete / customise this example input
- reads:
type: file
description: fastq.gz file containing metagenomic 16S data
pattern: "*.{fastq.gz}"

## TODO nf-core: Add a description of all of the variables used as output
output:
#Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
#
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
## TODO nf-core: Delete / customise this example output
- report:
type: file
description: Report (tsv file) over detected species and estimated number of reads and relative abundance
pattern: "*{.tsv}"

authors:
- "@ryanjameskennedy"
16 changes: 8 additions & 8 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ params {
input = null
db = null

// reads = null
// reads = null
seqtype = "map-ont"
min_abundance = 0.0001
minimap_max_alignments = 50
Expand All @@ -22,20 +22,20 @@ params {
keep_files = false
output_unclassified = true

// master html
master_template = "$projectDir/assets/master_template.html"

//
// porechop_abi
adapter_trimming = false
// porechop_abi
adapter_trimming = false

//
// filtlong filtering
// filtlong filtering
quality_filtering = true
longread_qc_qualityfilter_minlength = 1200
longread_qc_qualityfilter_maxlength = 1800
longread_qc_qualityfilter_min_mean_q = 94

//Save the trimmed reads
save_preprocessed_reads = false
save_preprocessed_reads = false

// krona
run_krona = true
Expand Down Expand Up @@ -173,7 +173,7 @@ profiles {
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
full { includeConfig 'conf/full.config' }

cmd { includeConfig 'conf/cmd.config' }
}


Expand Down
9 changes: 5 additions & 4 deletions workflows/gmsemu.nf
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ include { INPUT_CHECK } from '../subworkflows/local/input_check'
include { MERGE_BARCODES } from '../modules/local/merge_barcodes/main.nf'
include { MERGE_BARCODES_SAMPLESHEET } from '../modules/local/merge_barcodes_samplesheet/main.nf'
include { GENERATE_INPUT } from '../modules/local/generate_input/main.nf'
include { GENERATE_MASTER_HTML } from '../modules/local/generate_master_html/main.nf'
//include { FALCO } from '../modules/nf-core/falco/main.nf'
include { NANOPLOT as NANOPLOT1 } from '../modules/nf-core/nanoplot/main.nf'
include { NANOPLOT as NANOPLOT2 } from '../modules/nf-core/nanoplot/main.nf'
Expand Down Expand Up @@ -93,19 +94,19 @@ workflow GMSEMU {


if ( params.merge_fastq_pass && !params.barcodes_samplesheet) {
MERGE_BARCODES (params.merge_fastq_pass)
MERGE_BARCODES(params.merge_fastq_pass)
//GENERATE_INPUT(file("${params.outdir}/fastq_pass_merged"))
GENERATE_INPUT(MERGE_BARCODES.out.fastq_dir_merged)
// ch_input = file(params.outdir + 'samplesheet_merged.csv')
ch_input = GENERATE_INPUT.out.sample_sheet_merged
} else if ( params.merge_fastq_pass && params.barcodes_samplesheet) {
MERGE_BARCODES_SAMPLESHEET (params.barcodes_samplesheet, params.merge_fastq_pass)
MERGE_BARCODES_SAMPLESHEET(params.barcodes_samplesheet, params.merge_fastq_pass)
// merged_files = (params.outdir + '/fastq_pass_merged')
GENERATE_INPUT (MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
GENERATE_INPUT(MERGE_BARCODES_SAMPLESHEET.out.fastq_dir_merged)
ch_input = GENERATE_INPUT.out.sample_sheet_merged
}


GENERATE_MASTER_HTML(GENERATE_INPUT.out.sample_sheet_merged)


//
Expand Down

0 comments on commit eba0557

Please sign in to comment.