Skip to content

Commit

Permalink
#235 smafa + cd-hit + polars
Browse files Browse the repository at this point in the history
  • Loading branch information
pavelToman committed Jul 17, 2024
1 parent 3a4ec86 commit 713c6e0
Show file tree
Hide file tree
Showing 8 changed files with 1,350 additions and 22 deletions.
84 changes: 62 additions & 22 deletions 235_singleM/SingleM-0.16.0-foss-2023a.eb
Original file line number Diff line number Diff line change
Expand Up @@ -11,60 +11,100 @@ finding bias in genome recovery, computing ecological diversity metrics, and lin

toolchain = {'name': 'foss', 'version': '2023a'}

# DEPS
# OK - python>=3.7 #, <3.10 # https://github.com/google-research/google-research/issues/779 means scann (at least 1.2.8) fails to install via pip for 3.10. Older python versions than 3.7 may also work, not tested.
# OK - diamond>=2.1.7 # Getting segmentation fault on test_insert_prefilter otherwise
# OK - biopython
# OK - hmmer
# OK - orfm
# OK - mfqe
# ok - extern
# ok - graftm>=0.15.0 # Version needed for --translation-table
# OK - krona
# OK - pplacer
# OK - sra-tools
# OK - ncbi-ngs-sdk - in SRA-Toolkit
# OK - sqlite - in python
# ok - squarify
# OK - mafft
# OK - seqmagick
# OK - expressbetadiversity
# OK - cd-hit -> created
# OK - fasttree
# ok - fastalite
# OK - jinja2 - pypi bundle
# OK - pip
# OK - sqlalchemy
# OK - pandas
# ok - bird_tool_utils_python>=0.4.1
# ok - zenodo_backpack
# OK - smafa>=0.7.0 -> created
# ok - pyranges
# OK - polars>=0.19.3 # supplement with taxon genome length requires this, for strip_chars
# OK - prodigal
# OK - tqdm
# OK - pyarrow
# # - galah >= 0.4.0 # 0.4.0 is the first version with checkm2 quality inputs, but not yet released
# - pytest # For testing only
# - ipython # For testing only
# - sqlparse # Don't understand why, but this isn't being installed before tests are run on GH actions. Required indirectly (by taxtastic).

dependencies = [
('Python', '3.11.3'), # with SQLite
('Python-bundle-PyPI', '2023.06'),
('SciPy-bundle', '2023.07'), # for pandas
('matplotlib', '3.7.2'),
# ('matplotlib', '3.7.2'),
('Biopython', '1.83'),
('polars', '0.19.19'), # polars >= 0.19.3
('polars', '0.20.2'), # polars >= 0.19.3
('tqdm', '4.66.1'),
('Arrow', '14.0.1'), # for pyarrow
('DIAMOND', '2.1.8'),
('HMMER', '3.4'),
('MAFFT', '7.520', '-with-extensions'),
('tqdm', '4.66.1'),
('OrfM', '0.7.1'),
('mfqe', '0.5.0'),
('KronaTools', '2.8.1'),
('pplacer', '1.1.alpha19', '', True),
('pplacer', '1.1.alpha19', '', SYSTEM),
('SRA-Toolkit', '3.0.10'),
('Seqmagick', '0.8.6'),
('ExpressBetaDiversity', '1.0.10'),
('FastTree', '2.1.11'),
('SQLAlchemy', '2.0.25'),
('CD-HIT', '4.8.1'),
('smafa', '0.8.0'),
('prodigal', '2.6.3'),
#
# ('DendroPy', '4.4.0'),
# ('PyYAML', '5.1'),
# ('PostgreSQL', '11.3', versionsuffix), # required for psycopg2
# ('smafa', '0.4.0'),
# ('VSEARCH', '2.13.4'),
# ('fxtract', '2.3'),
]

use_pip = True
sanity_pip_check = True

# required because we're building Python packages using Intel compilers on top of Python built with GCC
# check_ldshared = True

# ('zenodo_backpack', '0.2.0', {
# 'checksums': ['5843a04b353ae613b474932c2a12d1fbfd529674074e73927ddc131eba7f1da8'],
# }),

# ('bird_tool_utils', '0.4.1', {
# 'checksums': ['6fe80f9608626427e8d382c5341c24088d61f17336fb6ce834d40aa4577499b5'],
# }),
exts_list = [
('Jinja2', '3.1.3'),
('fastalite', '0.4.1'),
('zenodo_backpack', '0.3.1', {
'checksums': ['dc91f3c427f976465789746e94736abfa536cf42dc2e49b6d6067382a9a39b26'],
}),
('bird_tool_utils', '0.4.1', {
'checksums': ['6fe80f9608626427e8d382c5341c24088d61f17336fb6ce834d40aa4577499b5'],
}),
('fastalite', '0.4.1', {
'checksums': ['e85413ee22bdb3fe0f73f5226771cf71eb33074ccdf8bbefff3a1bc6242de37c'],
}),
('squarify', '0.4.3', {
'checksums': ['54091f6ad175f7f201f8934574e647ce1b50dedc478c5fd968688eb7d7469f95'],
}),
('pyranges', '0.0.129', {
'checksums': ['bee83b4fad0062be9586668c6b0fc4270d5e761951975e018202993680071fb3'],
}),
('SQLAlchemy', '2.0.25', {
'checksums': ['a2c69a7664fb2d54b8682dd774c3b54f67f84fa123cf84dda2a5f40dcaa04e08'],
}),
('extern', '0.4.1', {
'checksums': ['0ff01adc2ad423f3d1e31641024b3974569fb0127b4d925bc6bed1cb86b6b1e4'],
}),
('graftm', '0.15.0', {
'checksums': ['794b2c6a8fff36c81db8b9a544ac021ab1397c569638bf7651ce5329db36de21'],
('graftm', '0.15.1', {
'checksums': ['80d828c311d2d6067977cfad5b6bac7cbc5d223ef8ab770d676b39bf2bc75163'],
}),
('singlem', version, {
'checksums': ['64e43a6a40795d68ff5aed7dfff9a94532b862f25a28c27de7d588d64a8c7f79'],
Expand Down
39 changes: 39 additions & 0 deletions 235_singleM/cdhit.eb
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# This file is an EasyBuild reciPY as per https://github.com/easybuilders/easybuild
# changed toolchain and Perl version
# Updated by: Thomas Eylenbosch(Gluo N.V.), Pavel Tománek (Inuits)

easyblock = 'MakeCp'

name = 'CD-HIT'
version = '4.8.1'

homepage = 'http://weizhongli-lab.org/cd-hit/'
description = """ CD-HIT is a very widely used program for clustering and
comparing protein or nucleotide sequences."""

toolchain = {'name': 'GCC', 'version': '12.3.0'}
toolchainopts = {'openmp': True}

source_urls = ['https://github.com/weizhongli/cdhit/releases/download/V%(version)s/']
sources = ['%(namelower)s-v%(version)s-2019-0228.tar.gz']
checksums = ['26172dba3040d1ae5c73ff0ac6c3be8c8e60cc49fc7379e434cdf9cb1e7415de']

dependencies = [
('Perl', '5.36.1'),
('zlib', '1.2.13'),
]

buildopts = ' CC="$CXX" CCFLAGS="$CPPFLAGS $CXXFLAGS"'

local_list_of_executables = ['cd-hit', 'cd-hit-est', 'cd-hit-2d', 'cd-hit-est-2d', 'cd-hit-div', 'cd-hit-454']

files_to_copy = [(local_list_of_executables, 'bin'), (['*.pl'], 'bin'), 'README', 'doc', 'license.txt']

fix_perl_shebang_for = ['bin/*.pl']

sanity_check_paths = {
'files': ['bin/%s' % x for x in local_list_of_executables],
'dirs': [],
}

moduleclass = 'bio'
105 changes: 105 additions & 0 deletions 235_singleM/crates_smafa.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
[('smafa', '0.8.0')]
crates = [
('addr2line', '0.22.0'),
('adler', '1.0.2'),
('aho-corasick', '1.1.3'),
('anstream', '0.6.14'),
('anstyle', '1.0.7'),
('anstyle-parse', '0.2.4'),
('anstyle-query', '1.1.0'),
('anstyle-wincon', '3.0.3'),
('assert_cli', '0.6.3'),
('atomic-polyfill', '1.0.3'),
('autocfg', '1.3.0'),
('backtrace', '0.3.73'),
('bird_tool_utils', '0.4.1'),
('bird_tool_utils-man', '0.4.0'),
('bitflags', '2.6.0'),
('buffer-redux', '1.0.1'),
('bytecount', '0.6.8'),
('byteorder', '1.5.0'),
('bzip2', '0.4.4'),
('bzip2-sys', '0.1.11+1.0.8'),
('cargo-husky', '1.5.0'),
('cc', '1.1.5'),
('cfg-if', '1.0.0'),
('clap', '4.5.9'),
('clap_builder', '4.5.9'),
('clap_lex', '0.7.1'),
('cobs', '0.2.3'),
('colorchoice', '1.0.1'),
('colored', '1.9.4'),
('crc32fast', '1.4.2'),
('critical-section', '1.1.2'),
('difference', '2.0.0'),
('embedded-io', '0.4.0'),
('env_logger', '0.10.2'),
('environment', '0.1.1'),
('errno', '0.3.9'),
('failure', '0.1.8'),
('failure_derive', '0.1.8'),
('fastrand', '2.1.0'),
('flate2', '1.0.30'),
('gimli', '0.29.0'),
('hash32', '0.2.1'),
('heapless', '0.7.17'),
('hermit-abi', '0.3.9'),
('humantime', '2.1.0'),
('is-terminal', '0.4.12'),
('is_terminal_polyfill', '1.70.0'),
('itoa', '1.0.11'),
('lazy_static', '1.5.0'),
('libc', '0.2.155'),
('linux-raw-sys', '0.4.14'),
('lock_api', '0.4.12'),
('log', '0.4.22'),
('lzma-sys', '0.1.20'),
('memchr', '2.7.4'),
('miniz_oxide', '0.7.4'),
('needletail', '0.5.1'),
('object', '0.36.1'),
('pkg-config', '0.3.30'),
('postcard', '1.0.8'),
('proc-macro2', '1.0.86'),
('quote', '1.0.36'),
('regex', '1.10.5'),
('regex-automata', '0.4.7'),
('regex-syntax', '0.8.4'),
('roff', '0.1.0'),
('rustc-demangle', '0.1.24'),
('rustc_version', '0.4.0'),
('rustix', '0.38.34'),
('ryu', '1.0.18'),
('scopeguard', '1.2.0'),
('semver', '1.0.23'),
('serde', '1.0.204'),
('serde_derive', '1.0.204'),
('serde_json', '1.0.120'),
('spin', '0.9.8'),
('stable_deref_trait', '1.2.0'),
('strsim', '0.11.1'),
('syn', '1.0.109'),
('syn', '2.0.71'),
('synstructure', '0.12.6'),
('tempfile', '3.10.1'),
('termcolor', '1.4.1'),
('unicode-ident', '1.0.12'),
('unicode-xid', '0.2.4'),
('utf8parse', '0.2.2'),
('version-compare', '0.1.1'),
('winapi', '0.3.9'),
('winapi-i686-pc-windows-gnu', '0.4.0'),
('winapi-util', '0.1.8'),
('winapi-x86_64-pc-windows-gnu', '0.4.0'),
('windows-sys', '0.52.0'),
('windows-targets', '0.52.6'),
('windows_aarch64_gnullvm', '0.52.6'),
('windows_aarch64_msvc', '0.52.6'),
('windows_i686_gnu', '0.52.6'),
('windows_i686_gnullvm', '0.52.6'),
('windows_i686_msvc', '0.52.6'),
('windows_x86_64_gnu', '0.52.6'),
('windows_x86_64_gnullvm', '0.52.6'),
('windows_x86_64_msvc', '0.52.6'),
('xz2', '0.1.7'),
]
File renamed without changes.
Loading

0 comments on commit 713c6e0

Please sign in to comment.