diff --git a/.gitignore b/.gitignore index cbe9f68..f6c3635 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ src/ -dfam-tetools.sif +*.sif Libraries/ tetools/ \ No newline at end of file diff --git a/CHANGELOG.md b/CHANGELOG.md index 70b29f0..38f234d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). +## 1.89 +### Updated +- RepeatMasker: `4.1.6` -> `4.1.7` +- NINJA: `0.99-cluster_only` -> `1.00-cluster_only` ## 1.88.5 ### Updated diff --git a/Dockerfile b/Dockerfile index 49fe523..ea9904e 100644 --- a/Dockerfile +++ b/Dockerfile @@ -100,11 +100,13 @@ RUN tar -x -f mafft-7.471-without-extensions-src.tgz \ # Compile NINJA RUN cd /opt \ - && mkdir NINJA \ - && tar --strip-components=1 -x -f src/NINJA-cluster.tar.gz -C NINJA \ + && tar --strip-components=1 -x -f src/NINJA-cluster.tar.gz \ && cd NINJA \ - && make clean && make build - + && make clean && make all \ + && cd .. \ + && mv LICENSE ./NINJA \ + && mv README.md ./NINJA + # Move UCSC tools RUN mkdir /opt/ucsc_tools \ && mv faToTwoBit twoBitInfo twoBitToFa /opt/ucsc_tools \ @@ -136,9 +138,9 @@ RUN cd /opt \ -libdir=/opt/RepeatMasker/Libraries \ -trf_prgm=/opt/trf \ -default_search_engine=rmblast \ - && gunzip -c src/Dfam-RepeatMasker.lib.gz > RepeatMasker/Libraries/RepeatMasker.lib \ - && /opt/rmblast/bin/makeblastdb -dbtype nucl -in RepeatMasker/Libraries/RepeatMasker.lib \ - && cd .. && rm src/RepeatMasker-4.1.7.tar.gz + && gunzip -c /opt/src/Dfam-RepeatMasker.lib.gz > Libraries/RepeatMasker.lib \ + && /opt/rmblast/bin/makeblastdb -dbtype nucl -in Libraries/RepeatMasker.lib \ + && cd .. && rm /opt/src/RepeatMasker-4.1.7.tar.gz # With Dfam root partition #RUN cd /opt \ diff --git a/README.md b/README.md index f17b17e..4975704 100644 --- a/README.md +++ b/README.md @@ -47,8 +47,12 @@ The wrapper script does the following: * Runs the container as the current user, with the current working directory accessible from within the container. Depending on the environment and the software used, this directory appears inside the container at its original location and/or at the path `/work`. - -**NOTE:** When using the wrapper script with the `--docker` option, it will attempt to mount the host `$(pwd)/Libraries` folder to the container `/opt/RepeatMasker/Libraries` folder. This is intended to enable the modification of local FamDB files, but it will also overwrite the library files within the container. To avoid this, copy those files to the host system using the steps below under **Customizing the RepeatMasker libraries**. +* Binds a directory, ideally named `Libraries`, into the container over + `/opt/RepeatMasker/Libraries` with the `--library` option. This is intended to enable the + modification of local FamDB files, as the container is packaged with a minimal FamDB file. + Note that this will overwrite the library files within the container. To avoid this, copy + those files to the host system using the steps below under + **Customizing the RepeatMasker libraries**. ``` curl -sSLO https://github.com/Dfam-consortium/TETools/raw/master/dfam-tetools.sh @@ -209,12 +213,12 @@ docker buildx build --platform=linux/amd64,linux/arm64 --output=type=registry -t ## Included software -The following software is included in the Dfam TE Tools container (version `1.88.5`): +The following software is included in the Dfam TE Tools container (version `1.89`): | | | | | -------------- | -------- | --- | | RepeatModeler | 2.0.5 | -| RepeatMasker | 4.1.6 | +| RepeatMasker | 4.1.7 | | coseg | 0.2.3 | | | | | | RMBlast | 2.14.1 | @@ -226,7 +230,7 @@ The following software is included in the Dfam TE Tools container (version `1.88 | genometools | 1.6.4 | | LTR\_retriever | 2.9.0 | | MAFFT | 7.471 | -| NINJA | 0.99-cluster\_only | +| NINJA | 1.00-cluster\_only | | UCSC utilities\* | v413 | > \* Selected tools only: `faToTwoBit`, `twoBitInfo`, `twoBitToFa` diff --git a/dfam-tetools.sh b/dfam-tetools.sh index 8397d8d..6953179 100755 --- a/dfam-tetools.sh +++ b/dfam-tetools.sh @@ -6,8 +6,6 @@ set -eu -workdir="$(pwd)" - die() { printf "%s\n" "$*" >&2 exit 1 @@ -17,12 +15,14 @@ usage() { printf "%s\n" "Usage: dfam-tetools.sh [-h|--help] [--container=/path/to/dfam-tetools.sif | --container=dfam/tetools:tag] [--docker | --singularity] + [--library] [-- command [arg1 [arg2 [...]]]] --container Choose a specific container to use (a .sif file or a docker image ID or tag) ---docker Run the container via docker +--docker Run the container via docker --singularity Run the container via singularity -command A command to run in the container instead of an interactive shell +--library A directory containing modified RepeatMasker/Libraries files +command A command to run in the container instead of an interactive shell If neither --docker nor --singularity is specified and both programs are available, singularity is preferred." @@ -31,9 +31,12 @@ programs are available, singularity is preferred." ## Parse command-line arguments ## -container="dfam/tetools:1.88.5" +container="dfam/tetools:1.89" use_docker=0 use_singularity=0 +use_lib=0 +workdir="" +bind_cmd="" while [ $# -ge 1 ]; do opt="$1" @@ -65,6 +68,11 @@ The --trf_prgm parameter was ignored." >&2 --singularity) use_singularity=1 ;; + --library) + use_lib=1 + workdir="$1" + shift + ;; *) die "Unrecognized argument: $opt A command to run in the container must be preceded by a --" @@ -99,19 +107,26 @@ fi ## Run the container ## if [ "$use_docker" = 1 ]; then + if [ "$use_lib" = 1 ]; then + bind_cmd="--mount type=bind,source=$workdir,target=/opt/RepeatMasker/Libraries" + fi docker run -it --rm \ --init \ - --mount type=bind,source="$workdir/Libraries",target=/opt/RepeatMasker/Libraries \ + $bind_cmd \ --user "$(id -u):$(id -g)" \ --workdir "/opt" \ "$container" \ "$@" elif [ "$use_singularity" = 1 ]; then + if [ "$use_lib" = 1 ]; then + bind_cmd="-B $workdir:/opt/RepeatMasker/Libraries" + fi if [ $# -eq 0 ]; then set -- "/bin/bash" fi export LANG=C singularity exec \ + $bind_cmd \ "$container" \ "$@" fi diff --git a/getsrc.sh b/getsrc.sh index 31555bc..8be2898 100755 --- a/getsrc.sh +++ b/getsrc.sh @@ -32,12 +32,12 @@ download https://github.com/weizhongli/cdhit/releases/download/V4.8.1/cd-hit-v4. download https://github.com/genometools/genometools/archive/v1.6.4.tar.gz gt-1.6.4.tar.gz download https://github.com/oushujun/LTR_retriever/archive/v2.9.0.tar.gz LTR_retriever-2.9.0.tar.gz download https://mafft.cbrc.jp/alignment/software/mafft-7.471-without-extensions-src.tgz -download https://github.com/TravisWheelerLab/NINJA/archive/0.99-cluster_only.tar.gz NINJA-cluster.tar.gz +download https://github.com/TravisWheelerLab/NINJA/archive/1.00-cluster_only.tar.gz NINJA-cluster.tar.gz # download https://www.repeatmasker.org/coseg-0.2.3.tar.gz download https://github.com/rmhubley/coseg/archive/refs/tags/coseg-0.2.3.tar.gz -download https://www.dfam.org/releases/Dfam_3.8/families/FamDB/dfam38_full.0.h5.gz +# download https://www.dfam.org/releases/Dfam_3.8/families/FamDB/dfam38_full.0.h5.gz download https://www.dfam.org/releases/Dfam_3.8/families/Dfam-RepeatMasker.lib.gz -download http://www.repeatmasker.org/RepeatMasker/RepeatMasker-4.1.6.tar.gz +# download http://www.repeatmasker.org/RepeatMasker/RepeatMasker-4.1.7.tar.gz download https://github.com/Dfam-consortium/RepeatModeler/archive/2.0.5.tar.gz RepeatModeler-2.0.5.tar.gz # download https://github.com/zhangrengang/TEsorter/archive/v1.4.6.tar.gz TEsorter-1.4.6.tar.gz diff --git a/sha256sums.txt b/sha256sums.txt index 2e5d5a5..6a3c772 100644 --- a/sha256sums.txt +++ b/sha256sums.txt @@ -1,17 +1,18 @@ -e2d94f6179c33990a77fa9fdcefb842c8481b4c30833c9c12cbbe54cb3fdda73 *LTR_retriever-2.9.0.tar.gz -02efaa6a93ac859168dac9ad41567af64aba957caa130b6d2da73fa413fed0bd *NINJA-cluster.tar.gz -699765fa49d18dbfac9f7a82ecd054464b468cb7521abe9c2bd8caccf08ee7d8 *RECON-1.08.tar.gz -85c8bf61dd8411d140674cfa74e7642b41878fd63a7a779845e35162828f0d74 *RepeatMasker-4.1.6.tar.gz -4c005ada7e88ff944116832349580a0dc3db762dc1c91235fa574b32ea9a8452 *RepeatModeler-2.0.5.tar.gz -d1d2c32d7343efae3735bc7769b56bc23dd8009c566b7d04f3c796083a7bdace *RepeatScout-1.0.6.tar.gz -26172dba3040d1ae5c73ff0ac6c3be8c8e60cc49fc7379e434cdf9cb1e7415de *cd-hit-v4.8.1-2019-0228.tar.gz -0c48fba155264db8a9e544689678c9f691fee511b74e0b5d227b827657555235 *coseg-0.2.3.tar.gz -f523802129d6726ebf26b0555104fc15ac66717a99c34c12ff1739575d4b8dd1 *dfam38_full.0.h5.gz -2b976f14b67c1b9ae1b48bd65f3dd73c5f8a5a82474998b6ebe16a68d7def09f *faToTwoBit -eda9cf3a5d63e5017f3c315c5700b0e308c08793d5795bc889350a536369a449 *gt-1.6.4.tar.gz -ca70d94fd0cf271bd7063423aabb116d42de533117343a9b27a65c17ff06fbf3 *hmmer-3.4.tar.gz -60f8ec7bab80e00dbb24be65b8000312ffc98ff10eb72fc2219869d1dd382964 *mafft-7.471-without-extensions-src.tgz -a5e19700282a3423481d6198db37d2d0bdf2c530b4fb7cef7f1aa2c92364baae *rmblast-2.14.1+-x64-linux.tar.gz -516015b625473350c3d1c9b83cac86baea620c8418498ab64c0a67029c3fb28a *trf-4.09.1.tar.gz -160b8ae1921553d70e8549c15e43cac20d032e3754b31d93032dca775420c72d *twoBitInfo -3e8f0b4088690a657b920e45add4cda379c769a565e84f86f0f1c96749d2378b *twoBitToFa +26172dba3040d1ae5c73ff0ac6c3be8c8e60cc49fc7379e434cdf9cb1e7415de cd-hit-v4.8.1-2019-0228.tar.gz +0c48fba155264db8a9e544689678c9f691fee511b74e0b5d227b827657555235 coseg-0.2.3.tar.gz +b53e09a3db2d01943c4167d9b457addf9194988c298f4ce600236da7e3fc3dfd dfam38_full.0.h5.gz +31242db2d262305ebdf52c5c7a93743889bdb0f75253750964a0db9e8aa27711 Dfam-RepeatMasker.lib.gz +d9897ad14695b701e3818c494b2048aa13d089d9b0a7a8b20170d9f87b67e13f faToTwoBit +eda9cf3a5d63e5017f3c315c5700b0e308c08793d5795bc889350a536369a449 gt-1.6.4.tar.gz +ca70d94fd0cf271bd7063423aabb116d42de533117343a9b27a65c17ff06fbf3 hmmer-3.4.tar.gz +e2d94f6179c33990a77fa9fdcefb842c8481b4c30833c9c12cbbe54cb3fdda73 LTR_retriever-2.9.0.tar.gz +60f8ec7bab80e00dbb24be65b8000312ffc98ff10eb72fc2219869d1dd382964 mafft-7.471-without-extensions-src.tgz +3138d74522df9531e7573cf801bacc6a36768ae3c606a4b046cbd4c1acd9ca4c NINJA-cluster.tar.gz +699765fa49d18dbfac9f7a82ecd054464b468cb7521abe9c2bd8caccf08ee7d8 RECON-1.08.tar.gz +f39d9aec38383dc87e6ebeb27b5a229dc1c41049dc99ba0ca906e6017f8985f9 RepeatMasker-4.1.7.tar.gz +4c005ada7e88ff944116832349580a0dc3db762dc1c91235fa574b32ea9a8452 RepeatModeler-2.0.5.tar.gz +d1d2c32d7343efae3735bc7769b56bc23dd8009c566b7d04f3c796083a7bdace RepeatScout-1.0.6.tar.gz +a5e19700282a3423481d6198db37d2d0bdf2c530b4fb7cef7f1aa2c92364baae rmblast-2.14.1+-x64-linux.tar.gz +516015b625473350c3d1c9b83cac86baea620c8418498ab64c0a67029c3fb28a trf-4.09.1.tar.gz +7915d9c905bca42f6de00134ea052db898b098e34d001c3ea4ad2a3c528a1cf1 twoBitInfo +a54b36631a280ba1cb3fb88ce8ebbd520d918468648dfce5569a95ba133bfcce twoBitToFa