Skip to content

Commit

Permalink
progress on #236 NGSpeciesID
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrKralCZ committed Jan 15, 2024
1 parent e9d6a52 commit a362a4c
Show file tree
Hide file tree
Showing 13 changed files with 526 additions and 0 deletions.
45 changes: 45 additions & 0 deletions 236_NGSpeciesID/Biopython-1.83-foss-2023a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
##
# Author: Robert Mijakovic <[email protected]>
##
easyblock = 'PythonPackage'

name = 'Biopython'
version = '1.83'

homepage = 'https://www.biopython.org'
description = """Biopython is a set of freely available tools for biological
computation written in Python by an international team of developers. It is
a distributed collaborative effort to develop Python libraries and
applications which address the needs of current and future work in
bioinformatics. """

toolchain = {'name': 'foss', 'version': '2023a'}

source_urls = ['https://biopython.org/DIST']
sources = [SOURCELOWER_TAR_GZ]
checksums = ['78e6bfb78de63034037afd35fe77cb6e0a9e5b62706becf78a7d922b16ed83f7']

dependencies = [
('Python', '3.11.3'),
('SciPy-bundle', '2023.07'),
]

download_dep_fail = True
use_pip = True
sanity_pip_check = True

# Run only tests that don't require internet connection
runtest = 'python setup.py test --offline'

sanity_check_paths = {
'files': [],
'dirs': ['lib/python%(pyshortver)s/site-packages/Bio',
'lib/python%(pyshortver)s/site-packages/BioSQL']
}

# extra check to ensure numpy dependency is available
sanity_check_commands = ["python -c 'import Bio.MarkovModel'"]

options = {'modulename': 'Bio'}

moduleclass = 'bio'
32 changes: 32 additions & 0 deletions 236_NGSpeciesID/Pysam-0.22.0-GCC-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
easyblock = 'PythonPackage'

name = 'Pysam'
version = '0.22.0'

homepage = 'https://github.com/pysam-developers/pysam'
description = """Pysam is a python module for reading and manipulating Samfiles.
It's a lightweight wrapper of the samtools C-API. Pysam also includes an interface for tabix."""

toolchain = {'name': 'GCC', 'version': '12.3.0'}

source_urls = ['https://github.com/pysam-developers/pysam/archive/']
sources = ['v%(version)s.tar.gz']
checksums = ['61b3377c5f889ddc6f6979912c3bb960d7e08407dada9cb38f13955564ea036f']

builddependencies = [('Python-bundle-PyPI', '2023.06')]

dependencies = [
('Python', '3.11.3'),
('ncurses', '6.4'),
('cURL', '8.0.1'),
('XZ', '5.4.2'),
]

# https://github.com/pysam-developers/pysam/issues/1146#issuecomment-1309421979
preinstallopts = """export CFLAGS="-fPIC $CFLAGS" && """
download_dep_fail = True
use_pip = True

sanity_pip_check = True

moduleclass = 'bio'
29 changes: 29 additions & 0 deletions 236_NGSpeciesID/Racon-1.5.0-GCCcore-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
easyblock = 'CMakeMake'

name = 'Racon'
version = '1.5.0'

homepage = 'https://github.com/lbcb-sci/racon'
description = """Ultrafast consensus module for raw de novo genome assembly of long uncorrected reads."""

toolchain = {'name': 'GCCcore', 'version': '12.3.0'}

github_account = 'lbcb-sci'
source_urls = [GITHUB_SOURCE]
sources = ['%(version)s.tar.gz']
checksums = ['41e362f71cc03b934f17d6e2c0d626e1b2997258261b14551586de006666424a']

builddependencies = [
('CMake', '3.26.3'),
('binutils', '2.40'),
# ('git', '2.38.1', '-nodocs'),
]

sanity_check_paths = {
'files': ['bin/racon'],
'dirs': [],
}

sanity_check_commands = ['racon --help']

moduleclass = 'bio'
24 changes: 24 additions & 0 deletions 236_NGSpeciesID/edlib-1.3.9-GCC-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
easyblock = 'PythonPackage'

name = 'edlib'
version = '1.3.9'

homepage = 'https://martinsos.github.io/edlib'
description = "Lightweight, super fast library for sequence alignment using edit (Levenshtein) distance."

toolchain = {'name': 'GCC', 'version': '12.3.0'}

sources = [SOURCE_TAR_GZ]
checksums = ['64c3dfab3ebe3e759565a0cc71eb4df23cf3ce1713fd558af3c473dddc2a3766']

dependencies = [
('Python', '3.11.3'),
]

download_dep_fail = True
use_pip = True
sanity_pip_check = True

moduleclass = 'bio'

# TODO edlib.bycython.cpp:198:12: fatal error: longintrepr.h: No such file or directory
35 changes: 35 additions & 0 deletions 236_NGSpeciesID/intervaltree-python-3.1.0-GCCcore-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Author: Jasper Grimm (UoY)
# Update: Petr Král (INUITS)

easyblock = 'PythonPackage'

name = 'intervaltree-python'
_modname = 'intervaltree'
version = '3.1.0'

homepage = 'https://github.com/chaimleib/intervaltree'
description = """A mutable, self-balancing interval tree. Queries may be by
point, by range overlap, or by range containment.
"""

toolchain = {'name': 'GCCcore', 'version': '12.3.0'}

source_urls = ['https://pypi.python.org/packages/source/i/%s' % _modname]
sources = ['%s-%s.tar.gz' % (_modname, version)]
checksums = ['902b1b88936918f9b2a19e0e5eb7ccb430ae45cde4f39ea4b36932920d33952d']

builddependencies = [('binutils', '2.40')]

dependencies = [
('Python', '3.11.3'),
]

use_pip = True
download_dep_fail = True
sanity_pip_check = True

options = {'modulename': _modname}

moduleclass = 'bio'

# TODO intervaltree 3.1.0 requires sortedcontainers, which is not installed.
71 changes: 71 additions & 0 deletions 236_NGSpeciesID/medaka-1.11.3-foss-2023a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# This is a contribution from HPCNow! (http://hpcnow.com)
# Copyright:: HPCNow!
# Authors:: Danilo Gonzalez <[email protected]>
# License:: GPL-v3.0
# Updated to foss-2020b to use with artic tool
# J. Sassmannshausen (GSTT/NHS UK)
# Updated to 1.5.0
# Jasper Grimm (UoY)
# Updated: Petr Král (INUITS)

easyblock = 'PythonBundle'

name = 'medaka'
version = '1.11.3'

homepage = 'https://github.com/nanoporetech/medaka'
description = "medaka is a tool to create a consensus sequence from nanopore sequencing data."

toolchain = {'name': 'foss', 'version': '2023a'}
toolchainopts = {'pic': True}

builddependencies = [('Autotools', '20220317')]

_minimap_ver = '2.26'
dependencies = [
('Python', '3.11.3'),
('Python-bundle-PyPI', '2023.06'), # includes cffi
# tensorflow~=2.10.0 required by medaka 1.9.1, see requirements.txt
('TensorFlow', '2.13.0'),
('Pysam', '0.21.0'), # TODO
('SAMtools', '1.18'),
('minimap2', _minimap_ver),
('HTSlib', '1.18'), # for tabix, bgzip
('Racon', '1.5.0'), # TODO
('edlib', '1.3.9'), # TODO
('pyspoa', '0.2.1'),
('python-parasail', '1.3.4'), # TODO
('ont-fast5-api', '4.1.1'), # TODO
('WhatsHap', '2.1'), # TODO
('intervaltree-python', '3.1.0'), # TODO
('BCFtools', '1.18'),
]

use_pip = True
sanity_pip_check = True

exts_list = [
('mappy', _minimap_ver, {
'checksums': ['e53fbe9a3ea8762a64b8103f4f779c9fb16d418eaa0a731f45cebc83867a9b71'],
}),
('wurlitzer', '3.0.3', {
'checksums': ['224f5fe70618be3872c05dfddc8c457191ec1870654596279fcc1edadebe3e5b'],
}),
(name, version, {
'checksums': ['4440762a17ddd66806ddbd7c3218140caa234b96a8c919ed54d7243d3e4a5dd1'],
# Some requirements are too strict.
'preinstallopts': "sed -i 's/tensorflow.*/tensorflow/g;s/cffi==/cffi>=/g' requirements.txt && ",
}),
]

sanity_check_paths = {
'files': ['bin/medaka', 'bin/medaka_consensus', 'bin/medaka_version_report'],
'dirs': ['lib/python%(pyshortver)s/site-packages'],
}

sanity_check_commands = [
"medaka --help",
"medaka_version_report",
]

moduleclass = 'bio'
54 changes: 54 additions & 0 deletions 236_NGSpeciesID/minimap2-2.26-GCCcore-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
# Adam Huffman
# DeepThought, Flinders University
# Updated to 2.22
# R.QIAO

# Update Petr Král (INUITS)
easyblock = 'MakeCp'

name = 'minimap2'
version = '2.26'

homepage = 'https://github.com/lh3/minimap2'
description = """Minimap2 is a fast sequence mapping and alignment
program that can find overlaps between long noisy reads, or map long
reads or their assemblies to a reference genome optionally with detailed
alignment (i.e. CIGAR). At present, it works efficiently with query
sequences from a few kilobases to ~100 megabases in length at an error
rate ~15%. Minimap2 outputs in the PAF or the SAM format. On limited
test data sets, minimap2 is over 20 times faster than most other
long-read aligners. It will replace BWA-MEM for long reads and contig
alignment."""

toolchain = {'name': 'GCCcore', 'version': '12.3.0'}

source_urls = ['https://github.com/lh3/%(name)s/releases/download/v%(version)s/']
sources = ['%(name)s-%(version)s.tar.bz2']
checksums = ['6a588efbd273bff4f4808d5190957c50272833d2daeb4407ccf4c1b78143624c']

builddependencies = [('binutils', '2.40')]

dependencies = [('zlib', '1.2.13')]

buildopts = 'CC="${CC}" CFLAGS="${CFLAGS}" INCLUDES="${CPPFLAGS}"'

files_to_copy = [
(['%(name)s'], 'bin'),
(['lib%(name)s.a'], 'lib'),
(['*.h'], 'include'),
'LICENSE.txt', 'NEWS.md', 'README.md',
(['%(name)s.1'], 'share/man/man1')
]

sanity_check_paths = {
'files': ['bin/%(name)s', 'lib/lib%(name)s.a'],
'dirs': ['include']
}

sanity_check_commands = [
"minimap2 --help",
"cd %(builddir)s/minimap2-%(version)s && minimap2 -a test/MT-human.fa test/MT-orang.fa > test.sam",
]

moduleclass = 'bio'
43 changes: 43 additions & 0 deletions 236_NGSpeciesID/ont-fast5-api-4.1.2-foss-2023a.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
easyblock = 'PythonBundle'

name = 'ont-fast5-api'
version = '4.1.2'

homepage = 'https://github.com/nanoporetech/ont_fast5_api'
description = "ont_fast5_api is a simple interface to HDF5 files of the Oxford Nanopore .fast5 file format."

toolchain = {'name': 'foss', 'version': '2023a'}

dependencies = [
('Python', '3.11.3'),
('SciPy-bundle', '2023.07'),
('h5py', '3.9.0'),
]

use_pip = True

exts_list = [
('progressbar33', '2.4', {
'modulename': 'progressbar',
'checksums': ['51fe0d9b3b4023db2f983eeccdfc8c9846b84db8443b9bee002c7f58f4376eff'],
}),
(name, version, {
'checksums': ['c7c59c6100e992ef8bc239cdf91f7a8ab46abf57ecd689f94b2b98e72a9e9472'],
}),
]

sanity_check_paths = {
'files': ['bin/compress_fast5', 'bin/fast5_subset', 'bin/multi_to_single_fast5', 'bin/single_to_multi_fast5'],
'dirs': [''],
}

sanity_check_commands = [
"compress_fast5 --help",
"fast5_subset --help",
"multi_to_single_fast5 --help",
"single_to_multi_fast5 --help",
]

sanity_pip_check = True

moduleclass = 'bio'
27 changes: 27 additions & 0 deletions 236_NGSpeciesID/parasail-2.6.2-GCC-12.3.0.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
easyblock = 'CMakeMake'

name = 'parasail'
version = '2.6.2'

homepage = 'https://github.com/jeffdaily/parasail'
description = """parasail is a SIMD C (C99) library containing implementations
of the Smith-Waterman (local), Needleman-Wunsch (global), and semi-global
pairwise sequence alignment algorithms. """

toolchain = {'name': 'GCC', 'version': '12.3.0'}

# https://github.com/jeffdaily/parasail
github_account = 'jeffdaily'
source_urls = [GITHUB_SOURCE]
sources = ['v%(version)s.tar.gz']
checksums = ['9057041db8e1cde76678f649420b85054650414e5de9ea84ee268756c7ea4b4b']

builddependencies = [('CMake', '3.26.3')]

sanity_check_paths = {
'files': ['bin/parasail_aligner', 'bin/parasail_stats',
'lib/libparasail.%s' % SHLIB_EXT, 'include/parasail.h'],
'dirs': [],
}

moduleclass = 'bio'
Loading

0 comments on commit a362a4c

Please sign in to comment.