Skip to content

Commit

Permalink
test v0.1.1
Browse files Browse the repository at this point in the history
  • Loading branch information
andyjslee committed Aug 1, 2024
1 parent c555752 commit e2ba435
Show file tree
Hide file tree
Showing 10 changed files with 285 additions and 4 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ Download the latest stable release [here](https://github.com/pirl-unc/nexus/rele
```
conda create -n nexus python=3.10
conda activate nexus
conda install nextflow==23.10.0
pip install nexus-<version>.tar.gz --verbose
```

Expand Down Expand Up @@ -82,7 +83,7 @@ For more on this particular workflow, check out [here](/src/nexuslib/pipelines/a

## 05. Documentation for Available Workflows

A list of links to documentation for all available workflows in `v0.1.0` is provided below:
A list of links to documentation for all available workflows in the latest version is provided below:

| Category | Workflow |
|:------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------|
Expand All @@ -104,6 +105,7 @@ A list of links to documentation for all available workflows in `v0.1.0` is prov
| Utilities | [**fastq_to_unaligned_bam.nf**](/src/nexuslib/pipelines/utilities/fastq_to_unaligned_bam/) |
| Utilities | [**fastqc.nf**](/src/nexuslib/pipelines/utilities/fastqc/) |
| Utilities | [**sequencing_coverage.nf**](/src/nexuslib/pipelines/utilities/sequencing_coverage/) |
| Variant calling (DNA) | [**long_read_dna_variant_calling_clairs.nf**](/src/nexuslib/pipelines/variant_calling/long_read_dna_variant_calling_clairs/) |
| Variant calling (DNA) | [**long_read_dna_variant_calling_cutesv.nf**](/src/nexuslib/pipelines/variant_calling/long_read_dna_variant_calling_cutesv/) |
| Variant calling (DNA) | [**long_read_dna_variant_calling_deepvariant.nf**](/src/nexuslib/pipelines/variant_calling/long_read_dna_variant_calling_deepvariant/) |
| Variant calling (DNA) | [**long_read_dna_variant_calling_pbsv.nf**](/src/nexuslib/pipelines/variant_calling/long_read_dna_variant_calling_pbsv/) |
Expand Down
8 changes: 7 additions & 1 deletion nextflow/nexus_v0.1.0_nextflow_slurm.config
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ process {
memory = '192.GB'
time = '7days'
}
withLabel: 'clairs' {
container = 'hkubal/clairs:v0.3.0'
cpus = 48
memory = '192.GB'
time = '7days'
}
withLabel: 'copy_bam_file' {
cpus = 2
memory = '16.GB'
Expand Down Expand Up @@ -320,7 +326,7 @@ process {
container = 'ajslee/svaba:1.2.0'
cpus = 48
memory = '192.GB'
time = '7days'
time = '14days'
}
withLabel: 'svim' {
container = 'ajslee/svim:2.0.0'
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ requires = [

[project]
name = "nexus"
version = "0.1.0"
version = "0.1.1"
description = "NEXflow's Ultimate Streamliner"
requires-python = ">=3.10"
readme = "README.md"
Expand Down
Empty file.
3 changes: 2 additions & 1 deletion scripts/unittests/unittest_variant_calling_dna_long_reads.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@ pytest \
--cov-report=term-missing \
--cov=nexuslib \
test/ \
-k "test_long_read_dna_variant_calling_cutesv or \
-k "test_long_read_dna_variant_calling_clairs or \
test_long_read_dna_variant_calling_cutesv or \
test_long_read_dna_variant_calling_deepvariant_github or \
test_long_read_dna_variant_calling_pbsv or \
test_long_read_dna_variant_calling_savana or \
Expand Down
35 changes: 35 additions & 0 deletions src/nexuslib/pipelines/modules/clairs.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/env nextflow

process runClairs {

label 'clairs'
tag "${sample_id}"
debug true

publishDir(
path: "${output_dir}/",
mode: 'copy'
)

input:
tuple val(sample_id), path(tumor_bam_file), path(tumor_bam_bai_file), path(normal_bam_file), path(normal_bam_bai_file)
path(reference_genome_fasta_file)
path(reference_genome_fasta_fai_file)
val(params_clairs)
val(output_dir)

output:
tuple val(sample_id), path("${sample_id}_clairs_outputs/"), emit: f

script:
"""
mkdir -p ${sample_id}_clairs_outputs/
run_clairs \
--tumor_bam $tumor_bam_file \
--normal_bam $normal_bam_file \
--ref_fn $reference_genome_fasta_file \
--output_dir ${sample_id}_clairs_outputs/ \
--threads ${task.cpus} \
$params_clairs
"""
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
## long_read_dna_variant_calling_clairs.nf

Identifies somatic small DNA variants in tumor and normal long-read DNA BAM files using [ClairS](https://github.com/HKU-BAL/ClairS).

### Inputs / Outputs

| I/O | Description |
|:-------|:----------------------------------------------|
| Input | Tumor and normal `bam` files for each sample. |
| Output | `vcf` file for each sample. |

### Dependencies

* `ClairS`

### Example

```
nexus run --nf-workflow long_read_dna_variant_calling_clairs.nf \
-c NEXTFLOW_CONFIG_FILE \
-w WORK_DIR \
--samples_tsv_file SAMPLES_TSV_FILE \
--output_dir OUTPUT_DIR \
--reference_genome_fasta_file REFERENCE_GENOME_FASTA_FILE \
--reference_genome_fasta_fai_file REFERENCE_GENOME_FASTA_FAI_FILE \
--params_clairs '"--platform hifi_revio"'
```

### Usage

```
workflow:
1. Run ClairS.
usage: nexus run --nf-workflow long_read_dna_variant_calling_clairs.nf [required] [optional] [--help]
required arguments:
-c : Nextflow .config file.
-w : Nextflow work directory path.
--samples_tsv_file : TSV file with the following columns: 'sample_id', 'tumor_bam_file', 'tumor_bam_bai_file', 'normal_bam_file', 'normal_bam_bai_file'.
--output_dir : Directory to which output files will be copied.
optional arguments:
--reference_genome_fasta_file : Reference genome FASTA file (default: /datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa).
--reference_genome_fasta_fai_file : Reference genome FASTA file (default: /datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa.fai).
--params_clairs : ClairS parameters (default: '"--platform hifi_revio"').
Note that the parameters need to be wrapped in quotes.
--delete_work_dir : Delete work directory (default: false).
```

### Parameters

`-c`
* Nextflow config file can be downloaded [here](https://github.com/pirl-unc/nexus/tree/main/nextflow)

`--sample_tsv_file`

| Header | Description |
|---------------------|------------------------------------|
| sample_id | Sample ID |
| tumor_bam_file | Full path to tumor `bam` file |
| tumor_bam_bai_file | Full path to tumor `bam.bai` file |
| normal_bam_file | Full path to normal `bam` file |
| normal_bam_bai_file | Full path to normal `bam.bai` file |

`--reference_genome_fasta_file`
* Reference genome FASTA files can be found in /datastore/lbcfs/collaborations/pirl/seqdata/references/ on LBG.

`--reference_genome_fasta_fai_file`
* Reference genome FASTA.FAI files can be found in /datastore/lbcfs/collaborations/pirl/seqdata/references/ on LBG.

`--params_clairs`
* Refer to the [ClairS documentation](https://github.com/HKU-BAL/ClairS).
* The following parameters for `run_clairs` are already included in `nexus` module for `clairs` and should not be specified:
* `--tumor_bam`
* `--normal_bam`
* `--ref_fn`
* `--output_dir`
* `--threads`
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
#!/usr/bin/env nextflow

nextflow.enable.dsl=2

// Step 1. Import Nextflow modules
include { runClairs } from '../../modules/clairs'

// Step 2. Input arguments
params.help = ''
// Required arguments
params.samples_tsv_file = ''
params.output_dir = ''
// Optional arguments
params.reference_genome_fasta_file = '/datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa'
params.reference_genome_fasta_fai_file = '/datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa.fai'
params.params_clairs = '--platform hifi_revio'
params.delete_work_dir = false

if (params.params_clairs == true) {
params_clairs = ''
} else {
params_clairs = params.params_clairs
}

// Step 3. Print inputs and help
log.info """\
==================================================================================
Identify somatic small variants in long-read DNA sequencing BAM files using ClairS
==================================================================================
""".stripIndent()

if (params.help) {
log.info"""\
workflow:
1. Run ClairS.
usage: nexus run --nf-workflow long_read_dna_variant_calling_clairs.nf [required] [optional] [--help]
required arguments:
-c : Nextflow .config file.
-w : Nextflow work directory path.
--samples_tsv_file : TSV file with the following columns: 'sample_id', 'tumor_bam_file', 'tumor_bam_bai_file', 'normal_bam_file', 'normal_bam_bai_file'.
--output_dir : Directory to which output files will be copied.
optional arguments:
--reference_genome_fasta_file : Reference genome FASTA file (default: /datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa).
--reference_genome_fasta_fai_file : Reference genome FASTA file (default: /datastore/lbcfs/collaborations/pirl/seqdata/references/hg38.fa.fai).
--params_clairs : ClairS parameters (default: '"--platform hifi_revio"').
Note that the parameters need to be wrapped in quotes.
--delete_work_dir : Delete work directory (default: false).
""".stripIndent()
exit 0
} else {
log.info"""\
samples_tsv_file : ${params.samples_tsv_file}
output_dir : ${params.output_dir}
reference_genome_fasta_file : ${params.reference_genome_fasta_file}
reference_genome_fasta_fai_file : ${params.reference_genome_fasta_fai_file}
params_clairs : ${params_clairs}
delete_work_dir : ${params.delete_work_dir}
""".stripIndent()
}

// Step 4. Set channels
Channel
.fromPath( params.samples_tsv_file )
.splitCsv( header: true, sep: '\t' )
.map { row -> tuple(
"${row.sample_id}",
"${row.tumor_bam_file}",
"${row.tumor_bam_bai_file}",
"${row.normal_bam_file}",
"${row.normal_bam_bai_file}") }
.set { input_bam_files_ch }

// Step 5. Workflow
workflow LONG_READ_DNA_VARIANT_CALLING_CLAIRS {
take:
input_bam_files_ch // channel: [val(sample_id), path(tumor_bam_file), path(tumor_bam_bai_file), path(normal_bam_file), path(normal_bam_bai_file)]
reference_genome_fasta_file
reference_genome_fasta_fai_file
params_clairs
output_dir

main:
runClairs(
input_bam_files_ch,
reference_genome_fasta_file,
reference_genome_fasta_fai_file,
params_clairs,
output_dir
)
}

workflow {
LONG_READ_DNA_VARIANT_CALLING_CLAIRS(
input_bam_files_ch,
params.reference_genome_fasta_file,
params.reference_genome_fasta_fai_file,
params_clairs,
params.output_dir
)
}

workflow.onComplete {
if ( params.delete_work_dir == true || params.delete_work_dir == 1 ) {
workflow.workDir.deleteDir()
}
}
6 changes: 6 additions & 0 deletions test/data/nextflow/nextflow_test_docker.config
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ process {
memory = '4.GB'
time = '1hour'
}
withLabel: 'clairs' {
container = 'hkubal/clairs:v0.3.0'
cpus = 2
memory = '4.GB'
time = '1hour'
}
withLabel: 'copy_bam_file' {
cpus = 2
memory = '4.GB'
Expand Down
43 changes: 43 additions & 0 deletions test/variant_calling/test_long_read_dna_variant_calling_clairs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import pandas as pd
import os
from nexuslib.main import run_workflow
from ..data import get_data_path


def test_long_read_dna_variant_calling_clairs():
nextflow_config_file = get_data_path(name='nextflow/nextflow_test_docker.config')
tumor_dna_bam_file = get_data_path(name='bam/hg38_tp53_tumor_long_read_dna.bam')
tumor_dna_bam_bai_file = get_data_path(name='bam/hg38_tp53_tumor_long_read_dna.bam.bai')
normal_dna_bam_file = get_data_path(name='bam/hg38_tp53_normal_long_read_dna.bam')
normal_dna_bam_bai_file = get_data_path(name='bam/hg38_tp53_normal_long_read_dna.bam.bai')
reference_genome_fasta_file = get_data_path(name='fasta/hg38_chr17_1-8000000.fa')
reference_genome_fasta_fai_file = get_data_path(name='fasta/hg38_chr17_1-8000000.fa.fai')
temp_dir = os.getcwd() + '/tmp'
intermediate_dir = temp_dir + '/intermediate/test_long_read_dna_variant_calling_clairs'
work_dir = temp_dir + '/work/test_long_read_dna_variant_calling_clairs'
output_dir = temp_dir + '/outputs/test_long_read_dna_variant_calling_clairs'
if not os.path.exists(intermediate_dir):
os.makedirs(intermediate_dir)
if not os.path.exists(work_dir):
os.makedirs(work_dir)
if not os.path.exists(output_dir):
os.makedirs(output_dir)
pd.DataFrame({
'sample_id': ['tumor'],
'tumor_bam_file': [tumor_dna_bam_file],
'tumor_bam_bai_file': [tumor_dna_bam_bai_file],
'normal_bam_file': [normal_dna_bam_file],
'normal_bam_bai_file': [normal_dna_bam_bai_file]
}).to_csv(intermediate_dir + "/samples.tsv", sep='\t', index=False)
workflow_args = [
'-c', nextflow_config_file,
'-w', work_dir,
'--samples_tsv_file', intermediate_dir + '/samples.tsv',
'--reference_genome_fasta_file', reference_genome_fasta_file,
'--reference_genome_fasta_fai_file', reference_genome_fasta_fai_file,
'--params_clairs', '"--platform hifi_revio"',
'--output_dir', output_dir,
]
run_workflow(workflow='long_read_dna_variant_calling_clairs.nf',
nextflow='nextflow',
workflow_args=workflow_args)

0 comments on commit e2ba435

Please sign in to comment.