diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 71b1695d..833c39da 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -19,7 +19,7 @@ jobs: - name: Create conda environement run: | source `conda info --base`/etc/profile.d/conda.sh - conda create -q -n build-IntaRNA -c conda-forge -c bioconda gcc_linux-64 gxx_linux-64 boost-cpp libboost viennarna>=2.4.14 pkgconfig + conda create -q -n build-IntaRNA -c conda-forge -c bioconda gcc_linux-64 gxx_linux-64 boost-cpp viennarna>=2.4.14 pkgconfig conda activate build-IntaRNA - name: Script run: | @@ -28,11 +28,19 @@ jobs: # generate autotools's files bash autotools-init.sh # run configure (without boost checks) - ./configure --prefix=$HOME/IntaRNA --with-vrna=`conda info --base`/envs/build-IntaRNA --with-boost=no --without-zlib + ENVPREFIX=`conda info --base`/envs/build-IntaRNA + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ENVPREFIX/lib + ./configure \ + --prefix=$HOME/IntaRNA \ + --with-vrna=$ENVPREFIX \ + --with-boost=$ENVPREFIX \ + --with-boost-libdir=$ENVPREFIX/lib \ + --with-zlib=$ENVPREFIX # compile documentation # - make doxygen-doc # compile, test and install IntaRNA - make -j 2 && make tests -j 2 && make install + make -j 2 && make install + make tests -j 2 ##### check IntaRNA build ##### # run installed IntaRNA with help output $HOME/IntaRNA/bin/IntaRNA -h diff --git a/ChangeLog b/ChangeLog index 72842672..bc61f4a3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -10,11 +10,60 @@ # changes in development version since last release ################################################################################ + +################################################################################ +### version 3.2.2 +################################################################################ + # IntaRNA +- BUGFIX: maximal interaction length correction for precomputed accessibility + data was one to large (wrong dangling end computation for maximally long RRIs) + (thanks to Sabine Reisser) +- BUGFIX: explicit seed encodings within last 7 nucleotides (seedBP) were + ignored (thanks to Sebastian Holler) +- BUGFIX: outNoLP option was not correctly implemented (missing recursion cases) + and was thus missing interactions +- BUGFIX: osx: configure adaptation to old grep version in osx ################################################################################ ################################################################################ +220110 Martin Raden + * configure.ac : + * rewrite of grep personality call to be compatible with old grep version on osx + +201204 Simon Bray (thanks!) : + * replace travis with github action + +210211 Martin Raden + * IntaRNA/SeedConstraint : + * constructor: bp>2 check only if no explicit seed present + * IntaRNA/SeedHandlerExplicit : + - getSeedMaxBP() : obsolete, replaced by getSeedMinBP() + + getSeedMinBP() : minimal number of bps within encoded seeds; used for seed + constraint initialization + * traceBackSeed() : + * bugfix: check for minimal seed length sufficient on one side + * bin/CommandLineParsing : + * seedBP now set to getSeedMinBP() if explicit seeds present + * bin/IntaRNA : + * exception information now motivates to send input along with report + + docs/recursions : recursion depictions for sanity checks of implementations + + PredictorMfe2d (+outNoLP) + + PredictorMfe2dSeed (+outNoLP) + * IntaRNA/PredictorMfe2dSeed : + * IntaRNA/PredictorMfe2dHeuristic : + * IntaRNA/PredictorMfe2dHeuristicSeed : + * IntaRNA/PredictorMfeEns2dHeuristic : + * bugfix outNoLP : missing recursion cases + * IntaRNA/AccessibilityFromStream : + * bugfix read from stream + * test updated + +201210 Martin Raden + * IntaRNA/AccessibilityFromStream : + * bugfix: max length == 1 smaller than max-accessibility-data-length due to + dangling-end treatment (thanks to Sabine Reisser) ################################################################################ ### version 3.2.1 @@ -35,6 +84,8 @@ * bin/CommandLineParsing : * changing import and usage of boost::bind and boost::placeholders namespace (thanks to Behra Phani Rama Krishna) + * R/IntaRNA_plotRegions.R : + * replace deprecated expand_scale() with expansion() 200615 Martin Raden * IntaRNA/OutputConstraint : diff --git a/R/IntaRNA_plotRegions.R b/R/IntaRNA_plotRegions.R index 99048fd2..bf7306b5 100644 --- a/R/IntaRNA_plotRegions.R +++ b/R/IntaRNA_plotRegions.R @@ -123,7 +123,7 @@ coveragePlot = geom_density() + ylab("coverage") + xlab( ifelse( is.null(title) , "position" , title ) ) + - scale_y_continuous(position = "right", expand=expand_scale(mult = c(0, .02))) + + scale_y_continuous(position = "right", expand=expansion(mult = c(0, .02))) + scale_x_continuous(expand = c(0, 0), limits=c(xmin,xmax)); if (!is.null(title)) { diff --git a/README.md b/README.md index e4ad36bd..66abf955 100644 --- a/README.md +++ b/README.md @@ -1182,10 +1182,28 @@ molecule. IntaRNA supports such data by interfacing the Vienna RNA package capabilities for SHAPE reactivity data incorporation, see Lorenz et al. ([2015](https://doi.org/10.1093/bioinformatics/btv523), [2016](https://dx.doi.org/10.1186%2Fs13015-016-0070-z)) or the [RNAfold manpage](https://www.tbi.univie.ac.at/RNA/RNAfold.1.html). +To use this feature, ViennaRNA-based accessibility computation is needed, +i.e. `--t|qAcc=C`, which is the default mode (no action required). The SHAPE reactivity data can be provided via file using `--qShape` or -`--tShape` for query or target sequence, respectively. -Independently for each, it is possible +`--tShape` for query or target sequence, respectively. The input format is a +space-separated 3-column format providing position, nucleotide and reactivity value as +exmemplified below. + +```[bash] + 1 G 0.134 + 2 C 0.044 + 3 C 0.057 + 4 G 0.114 + 5 U 0.094 + 6 C 0.035 + 7 G 0.909 + 8 C 0.224 + 9 C 0.529 + 10 A 1.475 +``` + +Independently for each sequence, it is possible to define the methods to be used to convert the data into pseudo energies and pairing probabilities. The respective IntaRNA arguments are `--qShapeMethod`|`--tShapeMethod` @@ -1193,6 +1211,13 @@ and `--qShapeConversion`|`--tShapeConversion`, which mimics the according tool arguments in the Vienna RNA package (see e.g. the [RNAfold manpage](https://www.tbi.univie.ac.at/RNA/RNAfold.1.html)). +An example call that shows the effect of SHAPE data incorporation is given below (assuming the SHAPE data from above is stored in +the file `data-SHAPE.txt`). +```[bash] +IntaRNA -q GCCGUCGCCA -t GCCGUCGCCA --tShape=data-SHAPE.txt --out=qAcc:STDOUT --out=tAcc:STDERR --out=/dev/null +``` + + For further details, please refer to our respective publication - [Integration of accessibility data from structure probing into RNA-RNA interaction prediction.](https://doi.org/10.1093/bioinformatics/bty1029) diff --git a/configure.ac b/configure.ac index d0b2f54e..811bbd62 100644 --- a/configure.ac +++ b/configure.ac @@ -1,7 +1,7 @@ AC_PREREQ([2.65]) # 5 argument version only available with aclocal >= 2.64 -AC_INIT([IntaRNA], [3.2.1], [], [intaRNA], [http://www.bioinf.uni-freiburg.de] ) +AC_INIT([IntaRNA], [3.2.2], [], [intaRNA], [http://www.bioinf.uni-freiburg.de] ) # minimal required version of the boost library BOOST_REQUIRED_VERSION=1.50.0 @@ -411,7 +411,7 @@ AS_IF([test "$DEPENDENCYNOTFOUND" = "1"], [ ########################################################################## # get available personalities -AC_SUBST([PERSONALITIES],[`grep -P "^\\s+case\\s+IntaRNA\\S+\\s*:\\s*return" ./src/bin/CommandLineParsing.h | sed "s/^\\s*case\\s\\+\\(IntaRNA\\S*\\)\\s\\+:\\s\\+return.*/\\1/g" | tr "\\n" " "`]) +AC_SUBST([PERSONALITIES],[`grep "^\\s*case\\s*IntaRNA\\S*\\s*:\\s*return" ./src/bin/CommandLineParsing.h | sed "s/^\\s*case\\s\\+\\(IntaRNA\\S*\\)\\s\\+:\\s\\+return.*/\\1/g" | tr "\\n" " "`]) ########################################################################## @@ -476,4 +476,4 @@ You can run 'make', 'make tests' and 'make install' now! Run 'make install prefix=XYZ' for installation in an alternative directory. ====================================== -]) \ No newline at end of file +]) diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg new file mode 100644 index 00000000..974282cb --- /dev/null +++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2d.svg @@ -0,0 +1,672 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = + + + = + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg new file mode 100644 index 00000000..aac1bcc1 --- /dev/null +++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristic.svg @@ -0,0 +1,558 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg new file mode 100644 index 00000000..84f611ee --- /dev/null +++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dHeuristicSeed.svg @@ -0,0 +1,1500 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg new file mode 100644 index 00000000..f396ee0e --- /dev/null +++ b/doc/recursions/IntaRNA-outNoLP.PredictorMfe2dSeed.svg @@ -0,0 +1,1726 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = + = + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +   + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = + + + = + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA.PredictorMfe2d.svg b/doc/recursions/IntaRNA.PredictorMfe2d.svg new file mode 100644 index 00000000..a0b0f5e5 --- /dev/null +++ b/doc/recursions/IntaRNA.PredictorMfe2d.svg @@ -0,0 +1,437 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + = + + + = + + + + + diff --git a/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg b/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg new file mode 100644 index 00000000..400ccf6e --- /dev/null +++ b/doc/recursions/IntaRNA.PredictorMfe2dHeuristic.svg @@ -0,0 +1,342 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg b/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg new file mode 100644 index 00000000..51e4d496 --- /dev/null +++ b/doc/recursions/IntaRNA.PredictorMfe2dHeuristicSeed.svg @@ -0,0 +1,570 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg b/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg new file mode 100644 index 00000000..b8e3e76e --- /dev/null +++ b/doc/recursions/IntaRNA.PredictorMfe2dSeed.svg @@ -0,0 +1,641 @@ + + + + + + + + + + image/svg+xml + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/IntaRNA/AccessibilityFromStream.cpp b/src/IntaRNA/AccessibilityFromStream.cpp index 86eff91f..d486c189 100644 --- a/src/IntaRNA/AccessibilityFromStream.cpp +++ b/src/IntaRNA/AccessibilityFromStream.cpp @@ -87,7 +87,8 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP // check if maxLength <= max available length size_t cutEnd = line.find_last_of("1234567890"); size_t cutStart = line.find_last_not_of("1234567890", cutEnd ); - size_t maxAvailLength = boost::lexical_cast( line.substr(cutStart+1,cutEnd-cutStart)); + // max length == 1 smaller than max-accessibility-data-length due to dangling-end treatment + size_t maxAvailLength = (boost::lexical_cast( line.substr(cutStart+1,cutEnd-cutStart)) -1); if (maxAvailLength < getMaxLength()) { #if INTARNA_MULITHREADING #pragma omp critical(intarna_omp_logOutput) @@ -101,7 +102,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP } // resize data structure to fill - edValues.resize( getSequence().size(), getSequence().size(), 0, getMaxLength() ); + edValues.resize( getSequence().size(), getSequence().size(), 0, 1+getMaxLength() ); // TODO rewrite to support "nan" and "inf" parsing via boost::spirit::qi // http://stackoverflow.com/questions/11420263/is-it-possible-to-read-infinity-or-nan-values-using-input-streams @@ -135,7 +136,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP // parse probabilities or EDs for this line and store double curVal; - size_t minI = j - std::min( j, getMaxLength() ); + size_t minI = j - std::min( j, 1+getMaxLength() ); for ( size_t i = j; i>minI; i--) { if ( inStream >>curVal ) { // check if we parse probabilities @@ -162,7 +163,7 @@ parseRNAplfold_text( std::istream & inStream, const Z_type RT, const bool parseP } } // check if full line was already parsed - if (j < maxAvailLength || minI > 0) { + if (j < 1+maxAvailLength || minI > 0) { // skip rest till end of line inStream.ignore(std::numeric_limits::max(), '\n'); } diff --git a/src/IntaRNA/PredictorMfe2d.cpp b/src/IntaRNA/PredictorMfe2d.cpp index 90d066d8..21b59427 100644 --- a/src/IntaRNA/PredictorMfe2d.cpp +++ b/src/IntaRNA/PredictorMfe2d.cpp @@ -100,6 +100,10 @@ fillHybridE( const size_t j1, const size_t j2 throw std::runtime_error("PredictorMfe2d::fillHybridE() : i1init > j1 : "+toString(i1init)+" > "+toString(j1)); if (i2init > j2) throw std::runtime_error("PredictorMfe2d::fillHybridE() : i2init > j2 : "+toString(i2init)+" > "+toString(j2)); + if (!energy.isAccessible2(j1)) + throw std::runtime_error("PredictorMfe2d::fillHybridE() : !energy.isAccessible2(j1) : "+toString(j1)); + if (!energy.isAccessible2(j2)) + throw std::runtime_error("PredictorMfe2d::fillHybridE() : !energy.isAccessible2(j2) : "+toString(j2)); #endif // get minimal start indices heeding max interaction length @@ -166,6 +170,7 @@ fillHybridE( const size_t j1, const size_t j2 iStackE = energy.getE_interLeft(i1,i1+noLpShift,i2,i2+noLpShift); // init with stacking only + // or stacking with right extension curMinE = iStackE + ((w1==2&&w2==2) ? energy.getE_init() : hybridE_pq(i1+noLpShift, i2+noLpShift) ); } else { // @@ -274,7 +279,7 @@ traceBack( Interaction & interaction ) // get stacking term iStackE = energy.getE_interLeft(i1,i1+noLpShift, i2,i2+noLpShift); - // check just stacking + // check just stacking with right extension if ( E_equal( curE, iStackE + hybridE_pq(i1+noLpShift,i2+noLpShift) )) { // store bp interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) ); diff --git a/src/IntaRNA/PredictorMfe2dHeuristic.cpp b/src/IntaRNA/PredictorMfe2dHeuristic.cpp index 91750cc8..d0b12253 100644 --- a/src/IntaRNA/PredictorMfe2dHeuristic.cpp +++ b/src/IntaRNA/PredictorMfe2dHeuristic.cpp @@ -124,7 +124,8 @@ fillHybridE() // set to interaction initiation with according boundary // if valid right boundary - if (!outConstraint.noGUend || !energy.isGU(i1+noLpShift,i2+noLpShift)) + if (E_isNotINF(iStackE) + && (!outConstraint.noGUend || !energy.isGU(i1+noLpShift,i2+noLpShift))) { *curCell = BestInteractionE(iStackE + energy.getE_init(), i1+noLpShift, i2+noLpShift); // current best total energy value (covers to far E_init only) @@ -133,6 +134,40 @@ fillHybridE() updateZall( i1,curCell->j1,i2,curCell->j2, curCellEtotal, false ); } + ///////////////////////////////////////// + // check direct extension to the right of the noLP stacking + ///////////////////////////////////////// + if(outConstraint.noLP) + { + + // direct cell access (const) + rightExt = &(hybridE(i1+noLpShift,i2+noLpShift)); + // check if right side can pair + // check if interaction length is within boundary + if (E_isNotINF(rightExt->val) + && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength() + && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() ) + { + // compute energy direct extension with stacking + curE = iStackE + rightExt->val; + // check if this combination yields better energy + curEtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE); + // update best extension + if ( curEtotal < curCellEtotal ) + { + // update current best for this left boundary + // copy right boundary + *curCell = *rightExt; + // set new energy + curCell->val = curE; + // store total energy to avoid recomputation + curCellEtotal = curEtotal; + } + // update Zall + updateZall( i1,rightExt->j1,i2,rightExt->j2, curEtotal, false ); + } + } + // iterate over all loop sizes w1 (seq1) and w2 (seq2) (minus 1) for (w1=1; w1-1 <= energy.getMaxInternalLoopSize1() && i1+w1+noLpShift= i1+noLpShift) { + // get energy of seed only explicitly + curE = seedE + energy.getE_init(); + // check if this combination yields better energy + curEseedtotal = energy.getE(i1,sj1,i2,sj2,curE); + // update Zall for seed only (if otherwise stacking enforced) + updateZall( i1,sj1,i2,sj2, curEseedtotal, false ); + } // for noLP : check for explicit interior loop after seed // assumption: seed fulfills noLP @@ -252,6 +255,73 @@ fillHybridE() // if !noLP or stacking bp possible if (E_isNotINF(iStackE)) { + + if (outConstraint.noLP) { + ///////////////////////////////////////// + // check direct extension to the right of the noLP stacking + ///////////////////////////////////////// + + ////////////////////////////////////////////////////////// + // update hybridE without seed constraint + ////////////////////////////////////////////////////////// + + // direct cell access (const) + rightExt = &(hybridE(i1+noLpShift,i2+noLpShift)); + + // check if right side can pair + // check if interaction length is within boundary + if ( E_isNotINF(rightExt->val) + && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength() + && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() ) + { + // compute energy for direct stack extension + curE = iStackE + rightExt->val; + // check if this combination yields better energy + curEtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE); + if ( curEtotal < curCellEtotal ) + { + // update current best for this left boundary + // copy right boundary + *curCell = *rightExt; + // set new energy + curCell->val = curE; + // store total energy to avoid recomputation + curCellEtotal = curEtotal; + } + } + + ////////////////////////////////////////////////////////// + // update hybridE_seed including seed constraint + ////////////////////////////////////////////////////////// + + // direct cell access to right side end of loop (seed has to be to the right of it) + rightExt = &(hybridE_seed(i1+noLpShift,i2+noLpShift)); + // check if right side of loop can pair + // check if interaction length is within boundary + if (E_isNotINF(rightExt->val) + && (rightExt->j1 +1 -i1) <= energy.getAccessibility1().getMaxLength() + && (rightExt->j2 +1 -i2) <= energy.getAccessibility2().getMaxLength() ) + { + // compute energy for direct stack extension + curE = iStackE + rightExt->val; + // check if this combination yields better energy + curEseedtotal = energy.getE(i1,rightExt->j1,i2,rightExt->j2,curE); + if ( curEseedtotal < curCellSeedEtotal ) + { + // update current best for this left boundary + // copy right boundary + *curCellSeed = *rightExt; + // set new energy + curCellSeed->val = curE; + // store overall energy + curCellSeedEtotal = curEseedtotal; + } + // avoid Z update; otherwise double-counting of interactions + // --> that way, underestimation of Z + } + } + + // iterate over all loop sizes w1 (seq1) and w2 (seq2) (minus 1) for (w1=1; w1-1 <= energy.getMaxInternalLoopSize1() && i1+noLpShift+w1j1 == j1 && curCell->j2 == j2 && + // and energy is the source of curE + E_equal( curE, (iStackE + curCell->val ) ) ) + { + // stop searching + traceNotFound = false; + // store bp + interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) ); + // trace right part of stacking + i1+=noLpShift; + i2+=noLpShift; + curE = curCell->val; + } + } + // check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2) for (k1=std::min(j1,i1+energy.getMaxInternalLoopSize1()+1+noLpShift); traceNotFound && k1>i1+noLpShift; k1--) { for (k2=std::min(j2,i2+energy.getMaxInternalLoopSize2()+1+noLpShift); traceNotFound && k2>i2+noLpShift; k2--) { @@ -454,7 +544,7 @@ traceBack( Interaction & interaction ) i2 = k2; break; } - // trace remaining base pairs + // trace right bulge extension of seed in noLP mode if (outConstraint.noLP) { for (size_t l1=std::min(j1-1,k1+energy.getMaxInternalLoopSize1()+1); traceNotFound && l1>k1; l1--) { for (size_t l2=std::min(j2-1,k2+energy.getMaxInternalLoopSize2()+1); traceNotFound && l2>k2; l2--) { @@ -477,6 +567,7 @@ traceBack( Interaction & interaction ) }} // l1 l2 } if (traceNotFound){ + // as to be direct right extension of seed without bulge curCell = &(hybridE(k1,k2)); // sanity check assert( E_equal( curE, (seedE+(curCell->val)) ) diff --git a/src/IntaRNA/PredictorMfe2dSeed.cpp b/src/IntaRNA/PredictorMfe2dSeed.cpp index fbb8fe4a..16ddee26 100644 --- a/src/IntaRNA/PredictorMfe2dSeed.cpp +++ b/src/IntaRNA/PredictorMfe2dSeed.cpp @@ -192,6 +192,7 @@ fillHybridE( const size_t j1, const size_t j2 iStackE = energy.getE_interLeft(i1,i1+noLpShift,i2,i2+noLpShift); // init with stacking only + // or stacking with right extension curMinE = iStackE + ((w1==2&&w2==2) ? energy.getE_init() : hybridE_pq(i1+noLpShift, i2+noLpShift) ); } else { // @@ -214,7 +215,8 @@ fillHybridE( const size_t j1, const size_t j2 curMinEseed = std::min( curMinEseed, seedHandler.getSeedE(i1,i2) + hybridE_pq(k1,k2) ); } else // just the seed up to right boundary (explicit noLP handling) - if (k1 == j1 && k2 == j2) { + // ensure minimal seed length in noLP mode + if (k1 == j1 && k2 == j2 && k1>=i1+noLpShift) { curMinEseed = std::min( curMinEseed, seedHandler.getSeedE(i1,i2) + energy.getE_init() ); } // handle interior loops after seeds in noLP-mode @@ -238,6 +240,13 @@ fillHybridE( const size_t j1, const size_t j2 } } + // handle direct left-stacking in noLP-mode + if ( outConstraint.noLP) { + if ( E_isNotINF( hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) ) { + curMinEseed = std::min( curMinEseed, (iStackE + hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) ); + } + } + // check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2) if (w1 > 2 && w2 > 2) { for (k1=std::min(j1-1,i1+energy.getMaxInternalLoopSize1()+1+noLpShift); k1>i1+noLpShift; k1--) { @@ -433,6 +442,26 @@ traceBack( Interaction & interaction ) } } + // explicit check of direct left-end stacking (in noLP mode) + if (outConstraint.noLP) { + if ( E_isNotINF( hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) ) { + // check if correct split + if (E_equal ( curE, + (iStackE + hybridE_pq_seed(i1+noLpShift,i2+noLpShift) ) + ) ) + { + // store stacked base pair + interaction.basePairs.push_back( energy.getBasePair(i1+noLpShift,i2+noLpShift) ); + // update trace back boundary + i1+=noLpShift; + i2+=noLpShift; + curE= hybridE_pq_seed(i1,i2); + // start next iteration if trace was found + continue; + } + } + } + // check all interval splits if ( (j1-i1) > 1 && (j2-i2) > 1) { // check all combinations of decompositions into (i1,i2)..(k1,k2)-(j1,j2) diff --git a/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp b/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp index 1c43f418..8d33466b 100644 --- a/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp +++ b/src/IntaRNA/PredictorMfeEns2dHeuristic.cpp @@ -133,6 +133,46 @@ fillHybridZ() curCellEtotal = energy.getE(i1,i1+noLpShift, i2,i2+noLpShift ,energy.getE(curCell->val)); // update overall partition function information for initial bps only updateZ( i1,curCell->j1, i2,curCell->j2, curCell->val, true ); + + if(outConstraint.noLP) { + ///////////////////////////////////////// + // check direct extension to the right of the noLP stacking + ///////////////////////////////////////// + + // direct cell access (const) + rightExt = &(hybridZ(i1+noLpShift,i2+noLpShift)); + // check if right side can pair + if (Z_equal(rightExt->val, 0.0)) { + continue; + } + // check if interaction length is within boundary + if ( (rightExt->j1 +1 -i1) > energy.getAccessibility1().getMaxLength() + || (rightExt->j2 +1 -i2) > energy.getAccessibility2().getMaxLength() ) + { + continue; + } + + // compute Z for direct extension with stacking + curZ = iStackZ * rightExt->val; + + // update overall partition function information for current right extension + updateZ( i1,rightExt->j1, i2,rightExt->j2, curZ, true ); + + // check if this combination yields better energy + curEtotal = energy.getE(i1,rightExt->j1, i2,rightExt->j2, energy.getE(curZ)); + + // update best right extension for (i1,i2) in curCell + if ( curEtotal < curCellEtotal ) + { + // update current best for this left boundary + // copy right boundary + *curCell = *rightExt; + // set new partition function + curCell->val = curZ; + // store total energy to avoid recomputation + curCellEtotal = curEtotal; + } + } } diff --git a/src/IntaRNA/SeedConstraint.h b/src/IntaRNA/SeedConstraint.h index 1d7bd4da..d0214b34 100644 --- a/src/IntaRNA/SeedConstraint.h +++ b/src/IntaRNA/SeedConstraint.h @@ -290,7 +290,7 @@ SeedConstraint::SeedConstraint( , bpGUendAllowed(!noGUendAllowed) , lpAllowed(!noLP) { - if (bp < 2) throw std::runtime_error("SeedConstraint() : base pair number ("+toString(bp)+") < 2"); + if (bp < 2 && explicitSeeds.empty()) throw std::runtime_error("SeedConstraint() : base pair number ("+toString(bp)+") < 2"); } ///////////////////////////////////////////////////////////////////////////// diff --git a/src/IntaRNA/SeedHandlerExplicit.cpp b/src/IntaRNA/SeedHandlerExplicit.cpp index eec4d52b..85830834 100644 --- a/src/IntaRNA/SeedHandlerExplicit.cpp +++ b/src/IntaRNA/SeedHandlerExplicit.cpp @@ -81,23 +81,23 @@ SeedHandlerExplicit:: size_t SeedHandlerExplicit:: -getSeedMaxBP( const std::string & seedEncoding ) +getSeedMinBP( const std::string & seedEncoding ) { - size_t maxBP = 0; + size_t minBP = 99999; if (!seedEncoding.empty()) { #if INTARNA_IN_DEBUG_MODE - if (!checkSeedEncoding(seedEncoding).empty()) throw std::runtime_error("SeedHandlerExplicit::getSeedMaxBP() : no valid seed encoding : "+checkSeedEncoding(seedEncoding)); + if (!checkSeedEncoding(seedEncoding).empty()) throw std::runtime_error("SeedHandlerExplicit::getSeedMinBP() : no valid seed encoding : "+checkSeedEncoding(seedEncoding)); #endif size_t curBP = 0; for( const char & c : seedEncoding ) { switch(c){ case ',' : - case '&' : maxBP = std::max( maxBP, curBP ); curBP = 0; break; + case '&' : minBP = std::min( minBP, curBP ); curBP = 0; break; case '|' : curBP++; } } } - return maxBP; + return minBP; } ///////////////////////////////////////////////////////////////////////////// @@ -171,6 +171,7 @@ fillSeed( const size_t i1min, const size_t i1max, const size_t i2min, const size withinIntervals++; } } + // return identified number return withinIntervals; } @@ -194,7 +195,7 @@ traceBackSeed( Interaction & interaction // fill data structure for current seed const SeedData & s = seed->second; // check if seed can not contain at least three base pairs - if (s.dotBar1.size() < 3 && s.dotBar2.size() < 3) { + if (s.dotBar1.size() < 3 || s.dotBar2.size() < 3) { return; } // get positions of second base pair within seed (if any) diff --git a/src/IntaRNA/SeedHandlerExplicit.h b/src/IntaRNA/SeedHandlerExplicit.h index 9d2743e5..6c624c81 100644 --- a/src/IntaRNA/SeedHandlerExplicit.h +++ b/src/IntaRNA/SeedHandlerExplicit.h @@ -120,14 +120,14 @@ class SeedHandlerExplicit : public SeedHandler checkSeedEncoding( const std::string & seed ); /** - * parses the given explicit seed encoding for the maximal number of - * base pairs within a seed + * parses the given explicit seed encoding for the minimal number of + * base pairs within any of the encoded seed * @param seedEncoding the explicit seed encoding to parse - * @return the maximal number of base pairs among all encoded seeds + * @return the minimal number of base pairs among all encoded seeds */ static size_t - getSeedMaxBP( const std::string & seedEncoding ); + getSeedMinBP( const std::string & seedEncoding ); /** * Replace the input variables i1 and i2 to values to within the given range diff --git a/src/bin/CommandLineParsing.cpp b/src/bin/CommandLineParsing.cpp index f3a9cc76..e73cfee6 100644 --- a/src/bin/CommandLineParsing.cpp +++ b/src/bin/CommandLineParsing.cpp @@ -1267,8 +1267,10 @@ parse(int argc, char** argv) if (target.size()>1 || query.size() > 1) { throw error("explicit seed definition only for single query/target available"); } - // input sanity check : maybe seed constraints defined -> warn if (seedBP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedBP provided (will be ignored)"; + // reset seedBP to minimal bps in any of the seeds + seedBP.val = SeedHandlerExplicit::getSeedMinBP(seedTQ); + // input sanity check : maybe seed constraints defined -> warn if (seedMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedMaxUP provided (will be ignored)"; if (seedQMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedQMaxUP provided (will be ignored)"; if (seedTMaxUP.isSet()) LOG(INFO) <<"explicit seeds defined, but seedTMaxUP provided (will be ignored)"; @@ -1533,9 +1535,9 @@ parse(int argc, char** argv) // check compatibility of seed constraint with helix setup if (!noSeedRequired) { // check if helixMaxBP >= seedBP - if (helixMaxBP.val < ( seedTQ.empty() ? seedBP.val : SeedHandlerExplicit::getSeedMaxBP(seedTQ) ) ) { - throw error("maximum number of seed base pairs (" - +toString(( seedTQ.empty() ? seedBP.val : SeedHandlerExplicit::getSeedMaxBP(seedTQ) )) + if (helixMaxBP.val < seedBP.val) { + throw error("minimum number of seed base pairs (" + +toString(seedBP.val) +") exceeds the maximally allowed number of helix base pairs ("+toString(helixMaxBP.val)+")"); } } diff --git a/src/bin/IntaRNA.cpp b/src/bin/IntaRNA.cpp index ca9f77b1..438e55e5 100644 --- a/src/bin/IntaRNA.cpp +++ b/src/bin/IntaRNA.cpp @@ -420,13 +420,13 @@ int main(int argc, char **argv){ ////////////////////// exception handling /////////////////////////// } catch (std::exception & e) { LOG(WARNING) <<"Exception raised : " < Please report to the IntaRNA development team! Thanks!\n"; + <<" ==> Please report (including input) to the IntaRNA development team! Thanks!\n"; el::Loggers::flushAll(); return -1; } catch (...) { std::exception_ptr eptr = std::current_exception(); LOG(WARNING) <<"Unknown exception raised \n\n" - <<" ==> Please report to the IntaRNA development team! Thanks!\n"; + <<" ==> Please report (including input) to the IntaRNA development team! Thanks!\n"; el::Loggers::flushAll(); return -1; } diff --git a/tests/AccessibilityFromStream_test.cpp b/tests/AccessibilityFromStream_test.cpp index d6ff0d08..7dab2a9c 100644 --- a/tests/AccessibilityFromStream_test.cpp +++ b/tests/AccessibilityFromStream_test.cpp @@ -62,19 +62,19 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) { std::istringstream accStream(accString); // trigger parsing - AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); // std::cerr <<"orig data:\n" < 0.998 ); // REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 ); +// REQUIRE( std::exp( - acc.getED(21, 29) ) > 0.0001 ); +// REQUIRE( std::exp( - acc.getED(21, 29) ) < 0.00011 ); } SECTION("Pu_RNAplfold output reparsed") { @@ -83,31 +83,21 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) { std::istringstream accStream(accString); // trigger parsing - AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); // check elements REQUIRE( acc.getED(29, 29) == 0 ); - REQUIRE( acc.getED(20, 29) == 732 ); -// // old checks not working for integer-based ED type -// REQUIRE( std::exp( - acc.getED(29, 29) ) > 0.998 ); -// REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 ); + REQUIRE( acc.getED(21, 29) == 690 ); std::stringstream accStream2; acc.writeRNAplfold_Pu_text( accStream2, 1.0 ); // trigger parsing - AccessibilityFromStream acc2( rna, 10, NULL, accStream2, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc2( rna, 9, NULL, accStream2, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); // check elements REQUIRE( acc.getED(29, 29) == 0 ); - REQUIRE( acc.getED(20, 29) == 732 ); -// // old checks not working for integer-based ED type -// REQUIRE( std::exp( - acc2.getED(29, 29) ) > 0.998 ); -// REQUIRE( std::exp( - acc2.getED(29, 29) ) < 0.999 ); -// REQUIRE( std::exp( - acc2.getED(20, 29) ) > 0.0006 ); -// REQUIRE( std::exp( - acc2.getED(20, 29) ) < 0.0007 ); + REQUIRE( acc.getED(21, 29) == 690 ); } @@ -117,31 +107,21 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) { std::istringstream accStream(accString); // trigger parsing - AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ); // check elements REQUIRE( acc.getED(29, 29) == 0 ); - REQUIRE( acc.getED(20, 29) == 732 ); -// // old checks not working for integer-based ED type -// REQUIRE( std::exp( - acc.getED(29, 29) ) > 0.998 ); -// REQUIRE( std::exp( - acc.getED(29, 29) ) < 0.999 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) > 0.0006 ); -// REQUIRE( std::exp( - acc.getED(20, 29) ) < 0.0007 ); + REQUIRE( acc.getED(21, 29) == 690 ); std::stringstream accStream2; acc.writeRNAplfold_ED_text( accStream2 ); // trigger parsing - AccessibilityFromStream acc2( rna, 10, NULL, accStream2, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc2( rna, 9, NULL, accStream2, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 ); // check elements REQUIRE( acc.getED(29, 29) == 0 ); - REQUIRE( acc.getED(20, 29) == 732 ); -// // old checks not working for integer-based ED type -// REQUIRE( std::exp( - acc2.getED(29, 29) ) > 0.998 ); -// REQUIRE( std::exp( - acc2.getED(29, 29) ) < 0.999 ); -// REQUIRE( std::exp( - acc2.getED(20, 29) ) > 0.0006 ); -// REQUIRE( std::exp( - acc2.getED(20, 29) ) < 0.0007 ); + REQUIRE( acc.getED(21, 29) == 690 ); } @@ -150,7 +130,7 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) { std::istringstream accStream(accString); RnaSequence rnaDouble("tooLong",seq+seq); // trigger parsing exception - REQUIRE_THROWS( AccessibilityFromStream( rnaDouble, 10, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ) ); + REQUIRE_THROWS( AccessibilityFromStream( rnaDouble, 9, NULL, accStream, AccessibilityFromStream::Pu_RNAplfold_Text, 1.0 ) ); } SECTION("test decomposeByMaxED()") { @@ -158,7 +138,7 @@ TEST_CASE( "AccessibilityFromStream", "[AccessibilityFromStream]" ) { std::istringstream accStream(accString); RnaSequence rna("tooLong",seq); // read PU values for fake ED values - AccessibilityFromStream acc( rna, 10, NULL, accStream, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 ); + AccessibilityFromStream acc( rna, 9, NULL, accStream, AccessibilityFromStream::ED_RNAplfold_Text, 1.0 ); // new validation values for integer-based ED type just copied from failing tests // ! not manually checked for sanity !