Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add TAR module #6772

Merged
merged 6 commits into from
Oct 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions modules/nf-core/tar/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
channels:
- conda-forge
- bioconda

dependencies:
- conda-forge::bzip2=1.0.8
- conda-forge::gzip=1.13
- conda-forge::lzip=1.21
- conda-forge::lzop=1.04
- conda-forge::tar=1.34
- conda-forge::xz=5.2.6
- conda-forge::zstd=1.5.6
74 changes: 74 additions & 0 deletions modules/nf-core/tar/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
process TAR {
tag "${meta.id}"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/98/98946ea8217c35441352a94f3e0cd1dfa24137c323e8b0f5dfcb3123b465d0b1/data':
'community.wave.seqera.io/library/bzip2_gzip_lzip_lzop_pruned:5a822ddcf829e7af' }"

input:
tuple val(meta), path(input)
val compress_type

output:
tuple val(meta), path("*.tar${compress_type}"), emit: archive
path "versions.yml", emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
valid_compress_types = ['.bz2', '.xz', '.lz', '.lzma', '.lzo', '.zst', '.gz', '']
if (!compress_type in valid_compress_types) {
error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}")
}

if (compress_type == '.bz2') {
compress_flag = '--bzip2'
} else if (compress_type == '.xz') {
compress_flag = '--xz'
} else if (compress_type == '.lz') {
compress_flag = '--lzip'
} else if (compress_type == '.lzma') {
compress_flag = '--lzma'
} else if (compress_type == '.lzo') {
compress_flag = '--lzop'
} else if (compress_type == '.zst') {
compress_flag = '--zstd'
} else if (compress_type == '.gz') {
compress_flag = '--gzip'
} else if (compress_type == '') {
compress_flag = ''
} else {
error("ERROR: Invalid compress_type: ${compress_type} for TAR. Set as empty string for no compression. Compression options: ${valid_compress_types.join(", ")}")
}

"""
tar \\
-c \\
${compress_flag} \\
${args} \\
-f ${prefix}.tar${compress_type} \\
${input}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
tar: \$(tar --version | grep tar | sed 's/.*) //g')
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
echo "" | gzip -c > ${prefix}.tar.gz
cat <<-END_VERSIONS > versions.yml
"${task.process}":
tar: \$(tar --version | grep tar | sed 's/.*) //g')
END_VERSIONS
"""
}
67 changes: 67 additions & 0 deletions modules/nf-core/tar/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "tar"
description: Compress directories into tarballs with various compression options
keywords:
- untar
- tar
- tarball
- compression
- archive
- gzip
- targz
tools:
- "tar":
description: "GNU Tar provides the ability to create tar archives, as well as
various other kinds of manipulation."
homepage: "https://www.gnu.org/software/tar/"
documentation: "https://www.gnu.org/software/tar/manual/"
licence: ["GPLv3"]
identifier: ""

input:
- - meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- input:
type: directory
description: A file or directory to be archived
pattern: "*/"
ontologies:
- edam: "http://edamontology.org/data_1049"
- - compress_type:
type: string
description: |
A string defining which type of (optional) compression to apply to the archive.
Provide an empty string in quotes for no compression
pattern: ".bz2|.xz|.lz|.lzma|.lzo|.zst|.gz"
output:
- archive:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}"
ontologies: &id001
- edam: "http://edamontology.org/format_25722"
- edam: "http://edamontology.org/format_2573"
- edam: "http://edamontology.org/format_3462"

- "*.tar${compress_type}":
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
pattern: "*.tar{.bz2,.xz,.lz,.lzma,.lzo,.zst,.gz,}"
ontologies: *id001
- versions:
- versions.yml:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@jfy133"
maintainers:
- "@jfy133"
210 changes: 210 additions & 0 deletions modules/nf-core/tar/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// nf-core modules test tar
nextflow_process {

name "Test Process TAR"
script "../main.nf"
process "TAR"

tag "modules"
tag "modules_nfcore"
tag "tar"
tag "untar"

setup {
run("UNTAR") {
script "../../untar/main.nf"
process {
"""
input[0] = [
[ id:'test' ], // meta map
file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true)
]
"""
}
}
}

test("sarscov2 - genome - db - kraken2 - none") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = ''
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .gz") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.gz'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .bz2") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.bz2'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .lzip") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.lz'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .lzma") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.lzma'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .lzo") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.lzo'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - .zst") {

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.zst'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(
file(process.out.archive[0][1]).name,
process.out.versions
).match() }
)
}

}

test("sarscov2 - genome - db - kraken2 - stub") {

options "-stub"

when {
process {
"""
input[0] = UNTAR.out.untar
input[1] = '.gz'
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Loading
Loading