Skip to content

Commit ed32613

Browse files
committed
Update Vagrant & easy_install
- change easy_install scripts to install libs and docs one level up from INSTALLATION_PATH - It would be cleaner to re-define INSTALLATION_PATH to function like a standard PREFIX (e.g. /usr/local), but this would be a bigger change - Conform dev version to python standard (1.1.0.dev0) - Use additional resources in vagrant box (cpu, storage) - needed for compiling casper - Vagrantfile now installs all needed software - document using vagrant - All (non-skipped) tests pass from the vagrant box
1 parent bef995b commit ed32613

File tree

11 files changed

+216
-58
lines changed

11 files changed

+216
-58
lines changed

Vagrantfile

+159-42
Original file line numberDiff line numberDiff line change
@@ -14,52 +14,39 @@ Vagrant.configure("2") do |config|
1414
# boxes at https://vagrantcloud.com/search.
1515
config.vm.box = "ubuntu/bionic64"
1616

17-
# Disable automatic box update checking. If you disable this, then
18-
# boxes will only be checked for updates when the user runs
19-
# `vagrant box outdated`. This is not recommended.
20-
# config.vm.box_check_update = false
21-
22-
# Create a forwarded port mapping which allows access to a specific port
23-
# within the machine from a port on the host machine. In the example below,
24-
# accessing "localhost:8080" will access port 80 on the guest machine.
25-
# NOTE: This will enable public access to the opened port
26-
# config.vm.network "forwarded_port", guest: 80, host: 8080
27-
28-
# Create a forwarded port mapping which allows access to a specific port
29-
# within the machine from a port on the host machine and only allow access
30-
# via 127.0.0.1 to disable public access
31-
# config.vm.network "forwarded_port", guest: 80, host: 8080, host_ip: "127.0.0.1"
32-
33-
# Create a private network, which allows host-only access to the machine
34-
# using a specific IP.
35-
# config.vm.network "private_network", ip: "192.168.33.10"
36-
37-
# Create a public network, which generally matched to bridged network.
38-
# Bridged networks make the machine appear as another physical device on
39-
# your network.
40-
# config.vm.network "public_network"
17+
# Increase CPUs and memory for the box
18+
# https://stackoverflow.com/a/37335639/81658
19+
config.vm.provider "virtualbox" do |v|
20+
host = RbConfig::CONFIG['host_os']
21+
# Give VM 3/4 system memory & access to all cpu cores on the host
22+
if host =~ /darwin/
23+
cpus = `sysctl -n hw.ncpu`.to_i
24+
elsif host =~ /linux/
25+
cpus = `nproc`.to_i
26+
else # Windows folks
27+
cpus = `wmic cpu get NumberOfCores`.split("\n")[2].to_i
28+
end
29+
30+
puts "Provisioning VM with #{cpus} CPU"
31+
v.customize ["modifyvm", :id, "--cpus", cpus]
32+
end
33+
34+
# Need extra disk size for casper compilation
35+
# Requires disksize plugin:
36+
# vagrant plugin install vagrant-disksize
37+
config.disksize.size = '15GB'
38+
39+
# Log in as root
40+
#config.ssh.username = 'root'
41+
#config.ssh.password = 'vagrant'
42+
#config.ssh.insert_key = 'true'
4143

4244
# Share an additional folder to the guest VM. The first argument is
4345
# the path on the host to the actual folder. The second argument is
4446
# the path on the guest to mount the folder. And the optional third
4547
# argument is a set of non-required options.
4648
# config.vm.synced_folder "../data", "/vagrant_data"
4749

48-
# Provider-specific configuration so you can fine-tune various
49-
# backing providers for Vagrant. These expose provider-specific options.
50-
# Example for VirtualBox:
51-
#
52-
# config.vm.provider "virtualbox" do |vb|
53-
# # Display the VirtualBox GUI when booting the machine
54-
# vb.gui = true
55-
#
56-
# # Customize the amount of memory on the VM:
57-
# vb.memory = "1024"
58-
# end
59-
#
60-
# View the documentation for the provider you are using for more
61-
# information on available options.
62-
6350
# Enable provisioning with a shell script. Additional provisioners such as
6451
# Puppet, Chef, Ansible, Salt, and Docker are also available. Please see the
6552
# documentation for more information about their specific syntax and use.
@@ -68,26 +55,156 @@ Vagrant.configure("2") do |config|
6855
# apt-get install -y apache2
6956
# SHELL
7057
config.vm.provision "shell", inline: <<-SHELL
58+
set -e
59+
7160
# Install dependencies
7261
apt-get update
7362
apt-get -y dist-upgrade
74-
apt-get -y install python3 python3-pip unzip
63+
apt-get -y install python3 python3-pip unzip openjdk-8-jre-headless cmake
7564
pip3 install virtualenv
7665
7766
# reduce wget output during provisioning
7867
echo 'verbose = off' >> ~/.wgetrc
7968
69+
# Assistive tech cause java problems
70+
rm -f /usr/lib/jvm/java-8-openjdk-amd64/jre/lib/accessibility.properties
71+
8072
cd /vagrant/easy_setup
8173
8274
# accept all licenses
83-
sed -i 's/ACCEPT_ALL=no/ACCEPT_ALL=yes/i' configTRAL_path.cfg
75+
if grep -q ACCEPT_ALL configTRAL_path.cfg; then
76+
sed -i 's/ACCEPT_ALL=no/ACCEPT_ALL=yes/i' configTRAL_path.cfg
77+
else
78+
echo ACCEPT_ALL=yes >> configTRAL_path.cfg
79+
fi
8480
8581
8682
# Install TRAL software
8783
./setupTRAL.sh setup
84+
85+
# Config file
86+
cat <<END > ~/.tral/config.ini
87+
###########################################
88+
### Configuration file for TRAL Vagrant ###
89+
###########################################
90+
91+
sequence_type = AA
92+
93+
[sequence]
94+
[[repeat_detection]]
95+
# AA includes all detectors used by default on protein sequence data.
96+
AA = HHrepID, T-REKS, TRUST, XSTREAM
97+
# DNA includes all detectors used by default on protein sequence data.
98+
DNA = Phobos, TRED, T-REKS, TRF, XSTREAM
99+
[[repeat_detector_path]]
100+
# If the executable is in the system path, supply its name. Otherwise, supply the full path to the executable. Details are explained in TRAL's online docs.
101+
PHOBOS = phobos
102+
HHrepID = hhrepid_64
103+
HHrepID_dummyhmm = ~/.tral/data/hhrepid/dummyHMM.hmm
104+
T-REKS = T-REKS
105+
TRED = tred
106+
TRF = trf
107+
TRUST = TRUST
108+
TRUST_substitutionmatrix = ~/.tral/tral_external_software/TRUST_Align/Align/BLOSUM50
109+
XSTREAM = XSTREAM
110+
111+
[hmm]
112+
hmmbuild = hmmbuild
113+
l_effective_max = 50
114+
115+
[filter]
116+
[[basic]]
117+
tag = basic_filter
118+
[[[dict]]]
119+
[[[[pvalue]]]]
120+
func_name = pvalue
121+
score = phylo_gap01
122+
threshold = 0.1
123+
[[[[n_effective]]]]
124+
func_name = attribute
125+
attribute = n_effective
126+
type = min
127+
threshold = 1.9
128+
129+
130+
[repeat]
131+
scoreslist = phylo_gap01, # score (the comma in the end is needed for TRAL)
132+
calc_score = False # is the score calculated?
133+
calc_pvalue = False # is the pvalue calculated?
134+
precision = 10
135+
ginsi = ginsi # integrated in MAFFT
136+
Castor = Castor
137+
[[castor_parameter]]
138+
rate_distribution = constant # either constant or gamma
139+
alfsim = alfsim
140+
141+
[repeat_list]
142+
# Columns to include in repeat list TSV output
143+
# Allowed values:
144+
# - begin: position of the tandem repeats within the sequence,
145+
# - pvalue: statistical significance of the tandem repeats
146+
# - divergence: divergence of the tandem repeat units
147+
# - l_effective: length of the tandem repeat units
148+
# - n_effective: number of tandem repeat units
149+
# - msa_original: multiple sequence alignment
150+
# - score: score corresponding to the value of 'model'
151+
# - repeat_region_length: total length of repeat region
152+
output_characteristics = begin, msa_original, l_effective, n_effective, repeat_region_length, divergence, pvalue
153+
154+
# model for scoring repeats. Supported: entropy, parsimony, pSim, phylo, phylo_gap01, phylo_gap001
155+
model = phylo_gap01
156+
157+
[repeat_score]
158+
evolutionary_model = lg
159+
[[indel]]
160+
indel_rate_per_site = 0.01
161+
ignore_gaps = True
162+
gaps = row_wise
163+
zipf = 1.821
164+
[[optimisation]]
165+
start_min = 0.5
166+
start_max = 1.5
167+
n_iteration = 14
168+
[[K80]]
169+
kappa = 2.59
170+
[[TN93]]
171+
alpha_1 = 0.3
172+
alpha_2 = 0.4
173+
beta = 0.7
174+
[[score_calibration]]
175+
scoreslist=phylo_gap01, # score (the comma at the end is needed)
176+
save_calibration = False
177+
precision = 10
178+
179+
[AA]
180+
standard_chars = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
181+
all_chars = A, B, C, D, E, F, G, H, I, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z
182+
[[ambiguous_chars]]
183+
B = D,N
184+
O = K,
185+
U = C,
186+
Z = E,Q
187+
X = A, C, D, E, F, G, H, I, K, L, M, N, P, Q, R, S, T, V, W, Y
188+
[DNA]
189+
standard_chars = A, C, G, T
190+
all_chars = A, C, G, T, N, X
191+
[[ambiguous_chars]]
192+
N = A, C, G, T
193+
X = A, C, G, T
194+
195+
END
196+
197+
198+
# All external software
199+
./install_ext_software.sh
200+
201+
cd /vagrant
202+
# dev requirements are optional but useful for tests and docs
203+
pip3 install -r requirements_dev.txt
204+
88205
echo
89206
echo "THIS MACHINE CONTAINS PROPRIETARY SOFTWARE."
90207
echo "Please check the licenses before using (e.g. no commercial use permitted)"
91208
SHELL
92-
209+
93210
end

docs/index.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,4 +92,4 @@ Indices and tables
9292
overlap_filtering
9393
workflow
9494
search_hmm
95-
95+
vagrant

docs/vagrant.rst

+32
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
.. _vagrant:
2+
3+
Running TRAL using vagrant
4+
==========================
5+
6+
Building
7+
--------
8+
9+
To build a vagrant box, first clone tral from the git repo. Next, run
10+
::
11+
12+
vagrant up
13+
vagrant ssh
14+
15+
This will The current working directory will be mounted at /vagrant within the box.
16+
Tral should be run as root within the box (it uses /root/.tral as the data dir).
17+
::
18+
19+
sudo su
20+
cd /vagrant
21+
22+
23+
Testing TRAL
24+
------------
25+
26+
The vagrant box is useful for testing tral.
27+
::
28+
29+
vagrant ssh
30+
sudo su
31+
cd /vagrant
32+
pytest

easy_setup/configTRAL_path.cfg

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ INSTALLATION_PATH=/usr/local/bin
1111
## ATTENTION:
1212
# Please do only change these filesystem if you know where to adapt the changes
1313

14-
TRAL_PATH=$FILES/tral
14+
TRAL_PATH=$FILES/.tral
1515
TRAL_EXT_SOFTWARE=$TRAL_PATH/tral_external_software
1616

1717
TRAL=$TRAL_PATH/tral # TRAL library

easy_setup/install_ext_software/alf.sh

+1-4
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,7 @@ if [ ! -d "$TRAL_EXT_SOFTWARE/ALF_standalone" ]; then # test if not already in d
4141
fi
4242

4343
rm -rf "$TRAL_EXT_SOFTWARE/ALF_standalone.tar.gz"
44-
(cd "$TRAL_EXT_SOFTWARE/ALF_standalone" && "$TRAL_EXT_SOFTWARE/ALF_standalone/install.sh" "$INSTALLATION_PATH") # installation of ALF
45-
ln -sf "$INSTALLATION_PATH/bin/alfsim" "$INSTALLATION_PATH"
46-
ln -sf "$INSTALLATION_PATH/bin/alfdarwin.linux64" "$INSTALLATION_PATH"
47-
ln -sf "$INSTALLATION_PATH/bin/alfdarwin" "$INSTALLATION_PATH"
44+
(cd "$TRAL_EXT_SOFTWARE/ALF_standalone" && "$TRAL_EXT_SOFTWARE/ALF_standalone/install.sh" "$INSTALLATION_PATH/..") # installation of ALF
4845

4946
######################
5047
### Uninstall ALF (default paths!)

easy_setup/install_ext_software/hmmer.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -53,14 +53,13 @@ done
5353
{
5454
cd "$TRAL_EXT_SOFTWARE/hmmer-"*
5555
} && {
56-
./configure --prefix "$INSTALLATION_PATH"
56+
./configure --prefix "$INSTALLATION_PATH/.."
5757
make clean
5858
make
5959
# "$INSTALLATION_PATH"/bin make check # run a test suite
6060
make install
6161
} && {
6262
echo "Installation of HMMER done."
63-
ln -s "$INSTALLATION_PATH/bin/hmmbuild" "$INSTALLATION_PATH/hmmbuild"
6463
echo -e "\nhmmbuild is in your path $INSTALLATION_PATH\n"
6564
}
6665
)

easy_setup/install_ext_software/mafft.sh

+1-2
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,7 @@ if [ ! -d "$TRAL_EXT_SOFTWARE/$mafftVer" ]; then
4848
exit 1
4949
}
5050
tar -xvzf "$TRAL_EXT_SOFTWARE/$mafftVer" -C "$TRAL_EXT_SOFTWARE"
51-
sed -i "s#PREFIX = /usr/local#PREFIX = \"$INSTALLATION_PATH\"#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile" # change default installation path in Makefile
52-
sed -i "s#BINDIR = \$(PREFIX)/bin#BINDIR = \$(PREFIX)#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile"
51+
sed -i "s#PREFIX = /usr/local#PREFIX = \"$INSTALLATION_PATH/..\"#" "$TRAL_EXT_SOFTWARE/$latestVer/core/Makefile" # change default installation path in Makefile
5352

5453
( cd "$TRAL_EXT_SOFTWARE/$latestVer/core/" && make clean && make && make install ) # Installation
5554
rm -rf "$TRAL_EXT_SOFTWARE/"$latestVer""

tral/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.1.0-dev"
1+
__version__ = "1.1.0.dev0"

tral/repeat/test/repeat_align_test.py

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
'RASVLFANE----KYKSALQELE--ELK-QIVPKESLVYFL',
2424
'IGKVYKKLG----QTHLALMNFS--WAM-DLDPKGA----N']
2525

26+
2627
def test_repeat_alignment():
2728
''' Test a realignment using Mafft's ginsi.'''
2829

tral/sequence/repeat_detection_run.py

+17-4
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@
1717
import sys
1818
import tempfile
1919

20-
from tral import configuration
21-
from tral.sequence import repeat_detection_io
20+
from .. import configuration
21+
from . import repeat_detection_io
22+
from ..paths import CONFIG_DIR, config_file
2223

2324
LOG = logging.getLogger(__name__)
2425

@@ -196,10 +197,22 @@ def __init__(self):
196197
"-shuffle": False
197198
}
198199

200+
dummyhmm = os.path.expanduser(REPEAT_DETECTOR_PATH['HHrepID_dummyhmm'])
201+
if not os.path.exists(dummyhmm):
202+
# Special case for the data directory
203+
try:
204+
rel_config = os.path.relpath(dummyhmm, start=CONFIG_DIR)
205+
if not rel_config.startswith(".."):
206+
dummyhmm = config_file(rel_config)
207+
except FileNotFoundError:
208+
pass # not in data; error below
209+
if not os.path.exists(dummyhmm):
210+
raise FileNotFoundError("HHrepID_dummyhmm not found: %s" % dummyhmm)
211+
199212
self.valopts = {
200213
# <file> input query alignment (fasta/a2m/a3m) or HMM file (.hhm)
201214
"-i": None,
202-
"-d": os.path.expanduser(REPEAT_DETECTOR_PATH['HHrepID_dummyhmm']), # <path> dummy hmm database file
215+
"-d": dummyhmm, # <path> dummy hmm database file
203216
"-o": 'hhrepID.o', # <file> write results and multiple sequence alignment to file (default=none)
204217
"-v": 0, # -v: verbose mode (default: show only warnings) ; -v 0: suppress all screen outpu
205218
"-P": None, # <float> max p-value of suboptimal alignments in all search rounds but the last one (def=0.1)
@@ -654,7 +667,7 @@ def __init__(self):
654667
}
655668

656669
self.valopts = {
657-
"-matrix": REPEAT_DETECTOR_PATH['TRUST_substitutionmatrix'],
670+
"-matrix": os.path.expanduser(REPEAT_DETECTOR_PATH['TRUST_substitutionmatrix']),
658671
"-gapo": "8",
659672
"-gapx": "2",
660673
"-procTotal": "1",

tral/tral_configuration/config.ini

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ sequence_type = AA
1515
TRED = tred
1616
TRF = trf
1717
TRUST = TRUST
18-
TRUST_substitutionmatrix = path/to/TRAL_PATH/tral_external_software/TRUST/Align/BLOSUM50
18+
TRUST_substitutionmatrix = ~/.tral/tral_external_software/TRUST_Align/Align/BLOSUM50
1919
XSTREAM = XSTREAM
2020

2121
[hmm]

0 commit comments

Comments
 (0)