diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 71b1695d..833c39da 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -19,7 +19,7 @@ jobs:
- name: Create conda environement
run: |
source `conda info --base`/etc/profile.d/conda.sh
- conda create -q -n build-IntaRNA -c conda-forge -c bioconda gcc_linux-64 gxx_linux-64 boost-cpp libboost viennarna>=2.4.14 pkgconfig
+ conda create -q -n build-IntaRNA -c conda-forge -c bioconda gcc_linux-64 gxx_linux-64 boost-cpp viennarna>=2.4.14 pkgconfig
conda activate build-IntaRNA
- name: Script
run: |
@@ -28,11 +28,19 @@ jobs:
# generate autotools's files
bash autotools-init.sh
# run configure (without boost checks)
- ./configure --prefix=$HOME/IntaRNA --with-vrna=`conda info --base`/envs/build-IntaRNA --with-boost=no --without-zlib
+ ENVPREFIX=`conda info --base`/envs/build-IntaRNA
+ export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ENVPREFIX/lib
+ ./configure \
+ --prefix=$HOME/IntaRNA \
+ --with-vrna=$ENVPREFIX \
+ --with-boost=$ENVPREFIX \
+ --with-boost-libdir=$ENVPREFIX/lib \
+ --with-zlib=$ENVPREFIX
# compile documentation
# - make doxygen-doc
# compile, test and install IntaRNA
- make -j 2 && make tests -j 2 && make install
+ make -j 2 && make install
+ make tests -j 2
##### check IntaRNA build #####
# run installed IntaRNA with help output
$HOME/IntaRNA/bin/IntaRNA -h
diff --git a/ChangeLog b/ChangeLog
index 72842672..bc61f4a3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -10,11 +10,60 @@
# changes in development version since last release
################################################################################
+
+################################################################################
+### version 3.2.2
+################################################################################
+
# IntaRNA
+- BUGFIX: maximal interaction length correction for precomputed accessibility
+ data was one to large (wrong dangling end computation for maximally long RRIs)
+ (thanks to Sabine Reisser)
+- BUGFIX: explicit seed encodings within last 7 nucleotides (seedBP) were
+ ignored (thanks to Sebastian Holler)
+- BUGFIX: outNoLP option was not correctly implemented (missing recursion cases)
+ and was thus missing interactions
+- BUGFIX: osx: configure adaptation to old grep version in osx
################################################################################
################################################################################
+220110 Martin Raden
+ * configure.ac :
+ * rewrite of grep personality call to be compatible with old grep version on osx
+
+201204 Simon Bray (thanks!) :
+ * replace travis with github action
+
+210211 Martin Raden
+ * IntaRNA/SeedConstraint :
+ * constructor: bp>2 check only if no explicit seed present
+ * IntaRNA/SeedHandlerExplicit :
+ - getSeedMaxBP() : obsolete, replaced by getSeedMinBP()
+ + getSeedMinBP() : minimal number of bps within encoded seeds; used for seed
+ constraint initialization
+ * traceBackSeed() :
+ * bugfix: check for minimal seed length sufficient on one side
+ * bin/CommandLineParsing :
+ * seedBP now set to getSeedMinBP() if explicit seeds present
+ * bin/IntaRNA :
+ * exception information now motivates to send input along with report
+ + docs/recursions : recursion depictions for sanity checks of implementations
+ + PredictorMfe2d (+outNoLP)
+ + PredictorMfe2dSeed (+outNoLP)
+ * IntaRNA/PredictorMfe2dSeed :
+ * IntaRNA/PredictorMfe2dHeuristic :
+ * IntaRNA/PredictorMfe2dHeuristicSeed :
+ * IntaRNA/PredictorMfeEns2dHeuristic :
+ * bugfix outNoLP : missing recursion cases
+ * IntaRNA/AccessibilityFromStream :
+ * bugfix read from stream
+ * test updated
+
+201210 Martin Raden
+ * IntaRNA/AccessibilityFromStream :
+ * bugfix: max length == 1 smaller than max-accessibility-data-length due to
+ dangling-end treatment (thanks to Sabine Reisser)
################################################################################
### version 3.2.1
@@ -35,6 +84,8 @@
* bin/CommandLineParsing :
* changing import and usage of boost::bind and boost::placeholders namespace
(thanks to Behra Phani Rama Krishna)
+ * R/IntaRNA_plotRegions.R :
+ * replace deprecated expand_scale() with expansion()
200615 Martin Raden
* IntaRNA/OutputConstraint :
diff --git a/R/IntaRNA_plotRegions.R b/R/IntaRNA_plotRegions.R
index 99048fd2..bf7306b5 100644
--- a/R/IntaRNA_plotRegions.R
+++ b/R/IntaRNA_plotRegions.R
@@ -123,7 +123,7 @@ coveragePlot =
geom_density() +
ylab("coverage") +
xlab( ifelse( is.null(title) , "position" , title ) ) +
- scale_y_continuous(position = "right", expand=expand_scale(mult = c(0, .02))) +
+ scale_y_continuous(position = "right", expand=expansion(mult = c(0, .02))) +
scale_x_continuous(expand = c(0, 0), limits=c(xmin,xmax));
if (!is.null(title)) {
diff --git a/README.md b/README.md
index e4ad36bd..66abf955 100644
--- a/README.md
+++ b/README.md
@@ -1182,10 +1182,28 @@ molecule. IntaRNA supports such data by interfacing the Vienna RNA package
capabilities for SHAPE reactivity data incorporation, see
Lorenz et al. ([2015](https://doi.org/10.1093/bioinformatics/btv523), [2016](https://dx.doi.org/10.1186%2Fs13015-016-0070-z)) or the
[RNAfold manpage](https://www.tbi.univie.ac.at/RNA/RNAfold.1.html).
+To use this feature, ViennaRNA-based accessibility computation is needed,
+i.e. `--t|qAcc=C`, which is the default mode (no action required).
The SHAPE reactivity data can be provided via file using `--qShape` or
-`--tShape` for query or target sequence, respectively.
-Independently for each, it is possible
+`--tShape` for query or target sequence, respectively. The input format is a
+space-separated 3-column format providing position, nucleotide and reactivity value as
+exmemplified below.
+
+```[bash]
+ 1 G 0.134
+ 2 C 0.044
+ 3 C 0.057
+ 4 G 0.114
+ 5 U 0.094
+ 6 C 0.035
+ 7 G 0.909
+ 8 C 0.224
+ 9 C 0.529
+ 10 A 1.475
+```
+
+Independently for each sequence, it is possible
to define the methods to be used to convert the data into pseudo energies and
pairing probabilities. The respective IntaRNA arguments are
`--qShapeMethod`|`--tShapeMethod`
@@ -1193,6 +1211,13 @@ and `--qShapeConversion`|`--tShapeConversion`, which mimics the according
tool arguments in the Vienna RNA package (see e.g. the
[RNAfold manpage](https://www.tbi.univie.ac.at/RNA/RNAfold.1.html)).
+An example call that shows the effect of SHAPE data incorporation is given below (assuming the SHAPE data from above is stored in
+the file `data-SHAPE.txt`).
+```[bash]
+IntaRNA -q GCCGUCGCCA -t GCCGUCGCCA --tShape=data-SHAPE.txt --out=qAcc:STDOUT --out=tAcc:STDERR --out=/dev/null
+```
+
+
For further details, please refer to our respective publication
- [Integration of accessibility data from structure probing into RNA-RNA interaction prediction.](https://doi.org/10.1093/bioinformatics/bty1029)
diff --git a/configure.ac b/configure.ac
index d0b2f54e..811bbd62 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1,7 +1,7 @@
AC_PREREQ([2.65])
# 5 argument version only available with aclocal >= 2.64
-AC_INIT([IntaRNA], [3.2.1], [], [intaRNA], [http://www.bioinf.uni-freiburg.de] )
+AC_INIT([IntaRNA], [3.2.2], [], [intaRNA], [http://www.bioinf.uni-freiburg.de] )
# minimal required version of the boost library
BOOST_REQUIRED_VERSION=1.50.0
@@ -411,7 +411,7 @@ AS_IF([test "$DEPENDENCYNOTFOUND" = "1"], [
##########################################################################
# get available personalities
-AC_SUBST([PERSONALITIES],[`grep -P "^\\s+case\\s+IntaRNA\\S+\\s*:\\s*return" ./src/bin/CommandLineParsing.h | sed "s/^\\s*case\\s\\+\\(IntaRNA\\S*\\)\\s\\+:\\s\\+return.*/\\1/g" | tr "\\n" " "`])
+AC_SUBST([PERSONALITIES],[`grep "^\\s*case\\s*IntaRNA\\S*\\s*:\\s*return" ./src/bin/CommandLineParsing.h | sed "s/^\\s*case\\s\\+\\(IntaRNA\\S*\\)\\s\\+:\\s\\+return.*/\\1/g" | tr "\\n" " "`])
##########################################################################
@@ -476,4 +476,4 @@ You can run 'make', 'make tests' and 'make install' now!
Run 'make install prefix=XYZ' for installation in an alternative directory.
======================================
-])
\ No newline at end of file
+])
diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg
new file mode 100644
index 00000000..974282cb
--- /dev/null
+++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg
@@ -0,0 +1,672 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg
new file mode 100644
index 00000000..aac1bcc1
--- /dev/null
+++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg
@@ -0,0 +1,558 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg
new file mode 100644
index 00000000..84f611ee
--- /dev/null
+++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg
@@ -0,0 +1,1500 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg
new file mode 100644
index 00000000..f396ee0e
--- /dev/null
+++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg
@@ -0,0 +1,1726 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA.PredictorMfe2d.svg b/doc/recursions/IntaRNA.PredictorMfe2d.svg
new file mode 100644
index 00000000..a0b0f5e5
--- /dev/null
+++ b/doc/recursions/IntaRNA.PredictorMfe2d.svg
@@ -0,0 +1,437 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg b/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg
new file mode 100644
index 00000000..400ccf6e
--- /dev/null
+++ b/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg
@@ -0,0 +1,342 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg b/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg
new file mode 100644
index 00000000..51e4d496
--- /dev/null
+++ b/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg
@@ -0,0 +1,570 @@
+
+
+
+
diff --git a/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg b/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg
new file mode 100644
index 00000000..b8e3e76e
--- /dev/null
+++ b/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg
@@ -0,0 +1,641 @@
+
+
+
+
diff --git a/src/IntaRNA/AccessibilityFromStream.cpp b/src/IntaRNA/AccessibilityFromStream.cpp
index 86eff91f..d486c189 100644
--- a/src/IntaRNA/AccessibilityFromStream.cpp
+++ b/src/IntaRNA/AccessibilityFromStream.cpp
@@ -87,7 +87,8 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP
// check if maxLength <= max available length
size_t cutEnd = line.find_last_of("1234567890");
size_t cutStart = line.find_last_not_of("1234567890", cutEnd );
- size_t maxAvailLength = boost::lexical_cast( line.substr(cutStart+1,cutEnd-cutStart));
+ // max length == 1 smaller than max-accessibility-data-length due to dangling-end treatment
+ size_t maxAvailLength = (boost::lexical_cast( line.substr(cutStart+1,cutEnd-cutStart)) -1);
if (maxAvailLength < getMaxLength()) {
#if INTARNA_MULITHREADING
#pragma omp critical(intarna_omp_logOutput)
@@ -101,7 +102,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP
}
// resize data structure to fill
- edValues.resize( getSequence().size(), getSequence().size(), 0, getMaxLength() );
+ edValues.resize( getSequence().size(), getSequence().size(), 0, 1+getMaxLength() );
// TODO rewrite to support "nan" and "inf" parsing via boost::spirit::qi
// http://stackoverflow.com/questions/11420263/is-it-possible-to-read-infinity-or-nan-values-using-input-streams
@@ -135,7 +136,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP
// parse probabilities or EDs for this line and store
double curVal;
- size_t minI = j - std::min( j, getMaxLength() );
+ size_t minI = j - std::min( j, 1+getMaxLength() );
for ( size_t i = j; i>minI; i--) {
if ( inStream >>curVal ) {
// check if we parse probabilities
@@ -162,7 +163,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP
}
}
// check if full line was already parsed
- if (j < maxAvailLength || minI > 0) {
+ if (j < 1+maxAvailLength || minI > 0) {
// skip rest till end of line
inStream.ignore(std::numeric_limits::max(), '\n');
}
diff --git a/src/IntaRNA/PredictorMfe2d.cpp b/src/IntaRNA/PredictorMfe2d.cpp
index 90d066d8..21b59427 100644
--- a/src/IntaRNA/PredictorMfe2d.cpp
+++ b/src/IntaRNA/PredictorMfe2d.cpp
@@ -100,6 +100,10 @@ fillHybridE( const size_t j1, const size_t j2
throw std::runtime_error("PredictorMfe2d::fillHybridE() : i1init > j1 : "+toString(i1init)+" > "+toString(j1));
if (i2init > j2)
throw std::runtime_error("PredictorMfe2d::fillHybridE() : i2init > j2 : "+toString(i2init)+" > "+toString(j2));
+ if (!energy.isAccessible2(j1))
+ throw std::runtime_error("PredictorMfe2d::fillHybridE() : !energy.isAccessible2(j1) : "+toString(j1));
+ if (!energy.isAccessible2(j2))
+ throw std::runtime_error("PredictorMfe2d::fillHybridE() : !energy.isAccessible2(j2) : "+toString(j2));
#endif
// get minimal start indices heeding max interaction length
@@ -166,6 +170,7 @@ fillHybridE( const size_t j1, const size_t j2
iStackE = energy.getE_interLeft(i1,i1+noLpShift,i2,i2+noLpShift);
// init with stacking only
+ // or stacking with right extension
curMinE = iStackE + ((w1==2&&w2==2) ? energy.getE_init() : hybridE_pq(i1+noLpShift, i2+noLpShift) );
} else {
//
@@ -274,7 +279,7 @@ traceBack( Interaction & interaction )
// get stacking term
iStackE = energy.getE_interLeft(i1,i1+noLpShift, i2,i2+noLpShift);
- // check just stacking
+ // check just stacking with right extension
if ( E_equal( curE, iStackE + hybridE_pq(i1+noLpShift,i2+noLpShift) )) {
// store bp
interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) );
diff --git a/src/IntaRNA/PredictorMfe2dHeuristic.cpp b/src/IntaRNA/PredictorMfe2dHeuristic.cpp
index 91750cc8..d0b12253 100644
--- a/src/IntaRNA/PredictorMfe2dHeuristic.cpp
+++ b/src/IntaRNA/PredictorMfe2dHeuristic.cpp
@@ -124,7 +124,8 @@ fillHybridE()
// set to interaction initiation with according boundary
// if valid right boundary
- if (!outConstraint.noGUend || !energy.isGU(i1+noLpShift,i2+noLpShift))
+ if (E_isNotINF(iStackE)
+ && (!outConstraint.noGUend || !energy.isGU(i1+noLpShift,i2+noLpShift)))
{
*curCell = BestInteractionE(iStackE + energy.getE_init(), i1+noLpShift, i2+noLpShift);
// current best total energy value (covers to far E_init only)
@@ -133,6 +134,40 @@ fillHybridE()
updateZall( i1,curCell->j1,i2,curCell->j2, curCellEtotal, false );
}
+ /////////////////////////////////////////
+ // check direct extension to the right of the noLP stacking
+ /////////////////////////////////////////
+ if(outConstraint.noLP)
+ {
+
+ // direct cell access (const)
+ rightExt = &(hybridE(i1+noLpShift,i2+noLpShift));
+ // check if right side can pair
+ // check if interaction length is within boundary
+ if (E_isNotINF(rightExt->val)
+ && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength()
+ && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() )
+ {
+ // compute energy direct extension with stacking
+ curE = iStackE + rightExt->val;
+ // check if this combination yields better energy
+ curEtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE);
+ // update best extension
+ if ( curEtotal < curCellEtotal )
+ {
+ // update current best for this left boundary
+ // copy right boundary
+ *curCell = *rightExt;
+ // set new energy
+ curCell->val = curE;
+ // store total energy to avoid recomputation
+ curCellEtotal = curEtotal;
+ }
+ // update Zall
+ updateZall( i1,rightExt->j1,i2,rightExt->j2, curEtotal, false );
+ }
+ }
+
// iterate over all loop sizes w1 (seq1) and w2 (seq2) (minus 1)
for (w1=1; w1-1 <= energy.getMaxInternalLoopSize1() && i1+w1+noLpShift= i1+noLpShift) {
+ // get energy of seed only explicitly
+ curE = seedE + energy.getE_init();
+ // check if this combination yields better energy
+ curEseedtotal = energy.getE(i1,sj1,i2,sj2,curE);
+ // update Zall for seed only (if otherwise stacking enforced)
+ updateZall( i1,sj1,i2,sj2, curEseedtotal, false );
+ }
// for noLP : check for explicit interior loop after seed
// assumption: seed fulfills noLP
@@ -252,6 +255,73 @@ fillHybridE()
// if !noLP or stacking bp possible
if (E_isNotINF(iStackE)) {
+
+ if (outConstraint.noLP) {
+ /////////////////////////////////////////
+ // check direct extension to the right of the noLP stacking
+ /////////////////////////////////////////
+
+ //////////////////////////////////////////////////////////
+ // update hybridE without seed constraint
+ //////////////////////////////////////////////////////////
+
+ // direct cell access (const)
+ rightExt = &(hybridE(i1+noLpShift,i2+noLpShift));
+
+ // check if right side can pair
+ // check if interaction length is within boundary
+ if ( E_isNotINF(rightExt->val)
+ && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength()
+ && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() )
+ {
+ // compute energy for direct stack extension
+ curE = iStackE + rightExt->val;
+ // check if this combination yields better energy
+ curEtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE);
+ if ( curEtotal < curCellEtotal )
+ {
+ // update current best for this left boundary
+ // copy right boundary
+ *curCell = *rightExt;
+ // set new energy
+ curCell->val = curE;
+ // store total energy to avoid recomputation
+ curCellEtotal = curEtotal;
+ }
+ }
+
+ //////////////////////////////////////////////////////////
+ // update hybridE_seed including seed constraint
+ //////////////////////////////////////////////////////////
+
+ // direct cell access to right side end of loop (seed has to be to the right of it)
+ rightExt = &(hybridE_seed(i1+noLpShift,i2+noLpShift));
+ // check if right side of loop can pair
+ // check if interaction length is within boundary
+ if (E_isNotINF(rightExt->val)
+ && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength()
+ && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() )
+ {
+ // compute energy for direct stack extension
+ curE = iStackE + rightExt->val;
+ // check if this combination yields better energy
+ curEseedtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE);
+ if ( curEseedtotal < curCellSeedEtotal )
+ {
+ // update current best for this left boundary
+ // copy right boundary
+ *curCellSeed = *rightExt;
+ // set new energy
+ curCellSeed->val = curE;
+ // store overall energy
+ curCellSeedEtotal = curEseedtotal;
+ }
+ // avoid Z update; otherwise double-counting of interactions
+ // --> that way, underestimation of Z
+ }
+ }
+
+
// iterate over all loop sizes w1 (seq1) and w2 (seq2) (minus 1)
for (w1=1; w1-1 <= energy.getMaxInternalLoopSize1() && i1+noLpShift+w1j1 == j1 && curCell->j2 == j2 &&
+ // and energy is the source of curE
+ E_equal( curE, (iStackE + curCell->val ) ) )
+ {
+ // stop searching
+ traceNotFound = false;
+ // store bp
+ interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) );
+ // trace right part of stacking
+ i1+=noLpShift;
+ i2+=noLpShift;
+ curE = curCell->val;
+ }
+ }
+
// check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2)
for (k1=std::min(j1,i1+energy.getMaxInternalLoopSize1()+1+noLpShift); traceNotFound && k1>i1+noLpShift; k1--) {
for (k2=std::min(j2,i2+energy.getMaxInternalLoopSize2()+1+noLpShift); traceNotFound && k2>i2+noLpShift; k2--) {
@@ -454,7 +544,7 @@ traceBack( Interaction & interaction )
i2 = k2;
break;
}
- // trace remaining base pairs
+ // trace right bulge extension of seed in noLP mode
if (outConstraint.noLP) {
for (size_t l1=std::min(j1-1,k1+energy.getMaxInternalLoopSize1()+1); traceNotFound && l1>k1; l1--) {
for (size_t l2=std::min(j2-1,k2+energy.getMaxInternalLoopSize2()+1); traceNotFound && l2>k2; l2--) {
@@ -477,6 +567,7 @@ traceBack( Interaction & interaction )
}} // l1 l2
}
if (traceNotFound){
+ // as to be direct right extension of seed without bulge
curCell = &(hybridE(k1,k2));
// sanity check
assert( E_equal( curE, (seedE+(curCell->val)) )
diff --git a/src/IntaRNA/PredictorMfe2dSeed.cpp b/src/IntaRNA/PredictorMfe2dSeed.cpp
index fbb8fe4a..16ddee26 100644
--- a/src/IntaRNA/PredictorMfe2dSeed.cpp
+++ b/src/IntaRNA/PredictorMfe2dSeed.cpp
@@ -192,6 +192,7 @@ fillHybridE( const size_t j1, const size_t j2
iStackE = energy.getE_interLeft(i1,i1+noLpShift,i2,i2+noLpShift);
// init with stacking only
+ // or stacking with right extension
curMinE = iStackE + ((w1==2&&w2==2) ? energy.getE_init() : hybridE_pq(i1+noLpShift, i2+noLpShift) );
} else {
//
@@ -214,7 +215,8 @@ fillHybridE( const size_t j1, const size_t j2
curMinEseed = std::min( curMinEseed, seedHandler.getSeedE(i1,i2) + hybridE_pq(k1,k2) );
} else
// just the seed up to right boundary (explicit noLP handling)
- if (k1 == j1 && k2 == j2) {
+ // ensure minimal seed length in noLP mode
+ if (k1 == j1 && k2 == j2 && k1>=i1+noLpShift) {
curMinEseed = std::min( curMinEseed, seedHandler.getSeedE(i1,i2) + energy.getE_init() );
}
// handle interior loops after seeds in noLP-mode
@@ -238,6 +240,13 @@ fillHybridE( const size_t j1, const size_t j2
}
}
+ // handle direct left-stacking in noLP-mode
+ if ( outConstraint.noLP) {
+ if ( E_isNotINF( hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) ) {
+ curMinEseed = std::min( curMinEseed, (iStackE + hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) );
+ }
+ }
+
// check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2)
if (w1 > 2 && w2 > 2) {
for (k1=std::min(j1-1,i1+energy.getMaxInternalLoopSize1()+1+noLpShift); k1>i1+noLpShift; k1--) {
@@ -433,6 +442,26 @@ traceBack( Interaction & interaction )
}
}
+ // explicit check of direct left-end stacking (in noLP mode)
+ if (outConstraint.noLP) {
+ if ( E_isNotINF( hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) ) {
+ // check if correct split
+ if (E_equal ( curE,
+ (iStackE + hybridE_pq_seed(i1+noLpShift,i2+noLpShift) )
+ ) )
+ {
+ // store stacked base pair
+ interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) );
+ // update trace back boundary
+ i1+=noLpShift;
+ i2+=noLpShift;
+ curE= hybridE_pq_seed(i1,i2);
+ // start next iteration if trace was found
+ continue;
+ }
+ }
+ }
+
// check all interval splits
if ( (j1-i1) > 1 && (j2-i2) > 1) {
// check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2)
diff --git a/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp b/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp
index 1c43f418..8d33466b 100644
--- a/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp
+++ b/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp
@@ -133,6 +133,46 @@ fillHybridZ()
curCellEtotal = energy.getE(i1,i1+noLpShift, i2,i2+noLpShift ,energy.getE(curCell->val));
// update overall partition function information for initial bps only
updateZ( i1,curCell->j1, i2,curCell->j2, curCell->val, true );
+
+ if(outConstraint.noLP) {
+ /////////////////////////////////////////
+ // check direct extension to the right of the noLP stacking
+ /////////////////////////////////////////
+
+ // direct cell access (const)
+ rightExt = &(hybridZ(i1+noLpShift,i2+noLpShift));
+ // check if right side can pair
+ if (Z_equal(rightExt->val, 0.0)) {
+ continue;
+ }
+ // check if interaction length is within boundary
+ if ( (rightExt->j1 +1 -i1) > energy.getAccessibility1().getMaxLength()
+ || (rightExt->j2 +1 -i2) > energy.getAccessibility2().getMaxLength() )
+ {
+ continue;
+ }
+
+ // compute Z for direct extension with stacking
+ curZ = iStackZ * rightExt->val;
+
+ // update overall partition function information for current right extension
+ updateZ( i1,rightExt->j1, i2,rightExt->j2, curZ, true );
+
+ // check if this combination yields better energy
+ curEtotal = energy.getE(i1,rightExt->j1, i2,rightExt->j2, energy.getE(curZ));
+
+ // update best right extension for (i1,i2) in curCell
+ if ( curEtotal < curCellEtotal )
+ {
+ // update current best for this left boundary
+ // copy right boundary
+ *curCell = *rightExt;
+ // set new partition function
+ curCell->val = curZ;
+ // store total energy to avoid recomputation
+ curCellEtotal = curEtotal;
+ }
+ }
}
diff --git a/src/IntaRNA/SeedConstraint.h b/src/IntaRNA/SeedConstraint.h
index 1d7bd4da..d0214b34 100644
--- a/src/IntaRNA/SeedConstraint.h
+++ b/src/IntaRNA/SeedConstraint.h
@@ -290,7 +290,7 @@ SeedConstraint::SeedConstraint(
, bpGUendAllowed(!noGUendAllowed)
, lpAllowed(!noLP)
{
- if (bp < 2) throw std::runtime_error("SeedConstraint() : base pair number ("+toString(bp)+") < 2");
+ if (bp < 2 && explicitSeeds.empty()) throw std::runtime_error("SeedConstraint() : base pair number ("+toString(bp)+") < 2");
}
/////////////////////////////////////////////////////////////////////////////
diff --git a/src/IntaRNA/SeedHandlerExplicit.cpp b/src/IntaRNA/SeedHandlerExplicit.cpp
index eec4d52b..85830834 100644
--- a/src/IntaRNA/SeedHandlerExplicit.cpp
+++ b/src/IntaRNA/SeedHandlerExplicit.cpp
@@ -81,23 +81,23 @@ SeedHandlerExplicit::
size_t
SeedHandlerExplicit::
-getSeedMaxBP( const std::string & seedEncoding )
+getSeedMinBP( const std::string & seedEncoding )
{
- size_t maxBP = 0;
+ size_t minBP = 99999;
if (!seedEncoding.empty()) {
#if INTARNA_IN_DEBUG_MODE
- if (!checkSeedEncoding(seedEncoding).empty()) throw std::runtime_error("SeedHandlerExplicit::getSeedMaxBP() : no valid seed encoding : "+checkSeedEncoding(seedEncoding));
+ if (!checkSeedEncoding(seedEncoding).empty()) throw std::runtime_error("SeedHandlerExplicit::getSeedMinBP() : no valid seed encoding : "+checkSeedEncoding(seedEncoding));
#endif
size_t curBP = 0;
for( const char & c : seedEncoding ) {
switch(c){
case ',' :
- case '&' : maxBP = std::max( maxBP, curBP ); curBP = 0; break;
+ case '&' : minBP = std::min( minBP, curBP ); curBP = 0; break;
case '|' : curBP++;
}
}
}
- return maxBP;
+ return minBP;
}
/////////////////////////////////////////////////////////////////////////////
@@ -171,6 +171,7 @@ fillSeed( const size_t i1min, const size_t i1max, const size_t i2min, const size
withinIntervals++;
}
}
+
// return identified number
return withinIntervals;
}
@@ -194,7 +195,7 @@ traceBackSeed( Interaction & interaction
// fill data structure for current seed
const SeedData & s = seed->second;
// check if seed can not contain at least three base pairs
- if (s.dotBar1.size() < 3 && s.dotBar2.size() < 3) {
+ if (s.dotBar1.size() < 3 || s.dotBar2.size() < 3) {
return;
}
// get positions of second base pair within seed (if any)
diff --git a/src/IntaRNA/SeedHandlerExplicit.h b/src/IntaRNA/SeedHandlerExplicit.h
index 9d2743e5..6c624c81 100644
--- a/src/IntaRNA/SeedHandlerExplicit.h
+++ b/src/IntaRNA/SeedHandlerExplicit.h
@@ -120,14 +120,14 @@ class SeedHandlerExplicit : public SeedHandler
checkSeedEncoding( const std::string & seed );
/**
- * parses the given explicit seed encoding for the maximal number of
- * base pairs within a seed
+ * parses the given explicit seed encoding for the minimal number of
+ * base pairs within any of the encoded seed
* @param seedEncoding the explicit seed encoding to parse
- * @return the maximal number of base pairs among all encoded seeds
+ * @return the minimal number of base pairs among all encoded seeds
*/
static
size_t
- getSeedMaxBP( const std::string & seedEncoding );
+ getSeedMinBP( const std::string & seedEncoding );
/**
* Replace the input variables i1 and i2 to values to within the given range
diff --git a/src/bin/CommandLineParsing.cpp b/src/bin/CommandLineParsing.cpp
index f3a9cc76..e73cfee6 100644
--- a/src/bin/CommandLineParsing.cpp
+++ b/src/bin/CommandLineParsing.cpp
@@ -1267,8 +1267,10 @@ parse(int argc, char** argv)
if (target.size()>1 || query.size() > 1) {
throw error("explicit seed definition only for single query/target available");
}
- // input sanity check : maybe seed constraints defined -> warn
if (seedBP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedBP provided (will be ignored)";
+ // reset seedBP to minimal bps in any of the seeds
+ seedBP.val = SeedHandlerExplicit::getSeedMinBP(seedTQ);
+ // input sanity check : maybe seed constraints defined -> warn
if (seedMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedMaxUP provided (will be ignored)";
if (seedQMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedQMaxUP provided (will be ignored)";
if (seedTMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedTMaxUP provided (will be ignored)";
@@ -1533,9 +1535,9 @@ parse(int argc, char** argv)
// check compatibility of seed constraint with helix setup
if (!noSeedRequired) {
// check if helixMaxBP >= seedBP
- if (helixMaxBP.val < ( seedTQ.empty() ? seedBP.val : SeedHandlerExplicit::getSeedMaxBP(seedTQ) ) ) {
- throw error("maximum number of seed base pairs ("
- +toString(( seedTQ.empty() ? seedBP.val : SeedHandlerExplicit::getSeedMaxBP(seedTQ) ))
+ if (helixMaxBP.val < seedBP.val) {
+ throw error("minimum number of seed base pairs ("
+ +toString(seedBP.val)
+") exceeds the maximally allowed number of helix base pairs ("+toString(helixMaxBP.val)+")");
}
}
diff --git a/src/bin/IntaRNA.cpp b/src/bin/IntaRNA.cpp
index ca9f77b1..438e55e5 100644
--- a/src/bin/IntaRNA.cpp
+++ b/src/bin/IntaRNA.cpp
@@ -420,13 +420,13 @@ int main(int argc, char **argv){
////////////////////// exception handling ///////////////////////////
} catch (std::exception & e) {
LOG(WARNING) <<"Exception raised : " < Please report to the IntaRNA development team! Thanks!\n";
+ <<" ==> Please report (including input) to the IntaRNA development team! Thanks!\n";
el::Loggers::flushAll();
return -1;
} catch (...) {
std::exception_ptr eptr = std::current_exception();
LOG(WARNING) <<"Unknown exception raised \n\n"
- <<" ==> Please report to the IntaRNA development team! Thanks!\n";
+ <<" ==> Please report (including input) to the IntaRNA development team! Thanks!\n";
el::Loggers::flushAll();
return -1;
}
diff --git a/tests/AccessibilityFromStream_test.cpp b/tests/AccessibilityFromStream_test.cpp
index d6ff0d08..7dab2a9c 100644
--- a/tests/AccessibilityFromStream_test.cpp
+++ b/tests/AccessibilityFromStream_test.cpp
@@ -62,19 +62,19 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) {
std::istringstream accStream(accString);
// trigger parsing
- AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
// std::cerr <<"orig data:\n" < 0.998 );
// REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 );
+// REQUIRE( std::exp( - acc.getED(21, 29) ) > 0.0001 );
+// REQUIRE( std::exp( - acc.getED(21, 29) ) < 0.00011 );
}
SECTION("Pu_RNAplfold output reparsed") {
@@ -83,31 +83,21 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) {
std::istringstream accStream(accString);
// trigger parsing
- AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
// check elements
REQUIRE( acc.getED(29, 29) == 0 );
- REQUIRE( acc.getED(20, 29) == 732 );
-// // old checks not working for integer-based ED type
-// REQUIRE( std::exp( - acc.getED(29, 29) ) > 0.998 );
-// REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 );
+ REQUIRE( acc.getED(21, 29) == 690 );
std::stringstream accStream2;
acc.writeRNAplfold_Pu_text( accStream2, 1.0 );
// trigger parsing
- AccessibilityFromStream acc2( rna, 10, NULL, accStream2, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc2( rna, 9, NULL, accStream2, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
// check elements
REQUIRE( acc.getED(29, 29) == 0 );
- REQUIRE( acc.getED(20, 29) == 732 );
-// // old checks not working for integer-based ED type
-// REQUIRE( std::exp( - acc2.getED(29, 29) ) > 0.998 );
-// REQUIRE( std::exp( - acc2.getED(29, 29) ) < 0.999 );
-// REQUIRE( std::exp( - acc2.getED(20, 29) ) > 0.0006 );
-// REQUIRE( std::exp( - acc2.getED(20, 29) ) < 0.0007 );
+ REQUIRE( acc.getED(21, 29) == 690 );
}
@@ -117,31 +107,21 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) {
std::istringstream accStream(accString);
// trigger parsing
- AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 );
// check elements
REQUIRE( acc.getED(29, 29) == 0 );
- REQUIRE( acc.getED(20, 29) == 732 );
-// // old checks not working for integer-based ED type
-// REQUIRE( std::exp( - acc.getED(29, 29) ) > 0.998 );
-// REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 );
-// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 );
+ REQUIRE( acc.getED(21, 29) == 690 );
std::stringstream accStream2;
acc.writeRNAplfold_ED_text( accStream2 );
// trigger parsing
- AccessibilityFromStream acc2( rna, 10, NULL, accStream2, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc2( rna, 9, NULL, accStream2, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 );
// check elements
REQUIRE( acc.getED(29, 29) == 0 );
- REQUIRE( acc.getED(20, 29) == 732 );
-// // old checks not working for integer-based ED type
-// REQUIRE( std::exp( - acc2.getED(29, 29) ) > 0.998 );
-// REQUIRE( std::exp( - acc2.getED(29, 29) ) < 0.999 );
-// REQUIRE( std::exp( - acc2.getED(20, 29) ) > 0.0006 );
-// REQUIRE( std::exp( - acc2.getED(20, 29) ) < 0.0007 );
+ REQUIRE( acc.getED(21, 29) == 690 );
}
@@ -150,7 +130,7 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) {
std::istringstream accStream(accString);
RnaSequence rnaDouble("tooLong",seq+seq);
// trigger parsing exception
- REQUIRE_THROWS( AccessibilityFromStream( rnaDouble, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ) );
+ REQUIRE_THROWS( AccessibilityFromStream( rnaDouble, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ) );
}
SECTION("test decomposeByMaxED()") {
@@ -158,7 +138,7 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) {
std::istringstream accStream(accString);
RnaSequence rna("tooLong",seq);
// read PU values for fake ED values
- AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 );
+ AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 );
// new validation values for integer-based ED type just copied from failing tests
// ! not manually checked for sanity !