Skip to content

Commit

Permalink
add in additional test in extendPreRepeat
Browse files Browse the repository at this point in the history
Make sure that the final repeat cannot be greater than the length
of the read.

Change documentation to cut a new version
  • Loading branch information
ctSkennerton committed Sep 5, 2016
1 parent e0384a4 commit 19f73f5
Show file tree
Hide file tree
Showing 7 changed files with 64 additions and 26 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
crass -- CRisprASSembler -- version 1 subversion 0 revision 0 (1.0.0)
crass -- CRisprASSembler -- version 1 subversion 0 revision 0 (1.0.1)
=======================================================================

CITATION
Expand Down
4 changes: 2 additions & 2 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Process this file with autoconf to produce a configure script.

AC_PREREQ(2.61)
AC_INIT(crass, 1.0.0, [email protected])
AC_INIT(crass, 1.0.1, [email protected])

AC_CONFIG_AUX_DIR(build)
AC_CONFIG_MACRO_DIR([m4])
Expand All @@ -16,7 +16,7 @@ LT_LANG([C++])
AC_DEFINE([PACKAGE_FULL_NAME],["CRisprASSembler"],[Define the full name of the package])
AC_DEFINE([PACKAGE_MAJOR_VERSION],[1],[Define the major version of the package])
AC_DEFINE([PACKAGE_MINOR_VERSION],[0],[Define the minor version of the package])
AC_DEFINE([PACKAGE_REVISION],[0],[Define the revision of the package])
AC_DEFINE([PACKAGE_REVISION],[1],[Define the revision of the package])


# extra configure options
Expand Down
8 changes: 2 additions & 6 deletions doc/manual.tex
Original file line number Diff line number Diff line change
Expand Up @@ -185,9 +185,9 @@

%%% The "real" document content comes below...

\title{Crass: The CRISPR assembler (v0.3.12)}
\title{Crass: The CRISPR assembler (v1.0.1)}
\author{Connor Skennerton and Michael Imelfort}
\date{10th February 2015} % Activate to display a given date or no date (if empty),
\date{5th September 2016} % Activate to display a given date or no date (if empty),
% otherwise the current date is printed

\begin{document}
Expand Down Expand Up @@ -301,7 +301,6 @@ \subsubsection{User Flags}
\combinedoptionflag{g}{logToScreen} & Does not produce a log file but instead prints the contents to screen.\\ \\
\combinedoptionflag{G}{showSingletons} & Set this flag if you would like to see unconnected singleton spacers in the final graph.\\ \\
\combinedoptionflag{h}{help} & Print the basic usage and version information. \\ \\
\combinedoptionflag{H}{removeHomopolymers} & This is an experimental feature of Crass where the search algorithms attempt to correct for homopolymer errors in reads.\\ \\
\combinedoptionflagarg{k}{kmerCount}{INT} & Sets the number of kmers that need to be shared between putative direct repeats for them to be clustered together after the find stage. Clustered direct repeats are eventually concatenated to form a 'true' direct repeat for a CRISPR; putative repeats that cannot be clustered are removed from consideration. Change this variable if you feel that the clustering is too stringent and is breaking appart one CRISPR into multiple types. The default number of kmers is 6, however the value should not be set below 6 as this would not be stringent enough; a higher value would split closely related direct repeats apart\\ \\
\combinedoptionflagarg{K}{graphNodeLen}{INT} & Crass makes a graph by cutting kmers on either side of the direct repeat and then joining these together. The length of the kmer will dictate how connected the graph will be. A smaller number will increase the chances of new conections being formed, however it also increases the chances of false positives. The default value is 9.\\ \\
\combinedoptionflagarg{l}{logLevel}{INT} & Sets the verbosity of the log file. Under most circumstances the log level cannot go higher than 4, unless the enable-debug option is set during configuration, which will increase the maximum value to 10. Note that above a level of 4 alot of the information will not be understandable to the user as most of these messages are specifically for us, the developers to track down bugs. \\ \\
Expand All @@ -313,9 +312,6 @@ \subsubsection{User Flags}
\combinedoptionflagarg{S}{maxSpacer}{INT} & The upper bound considered acceptable for the size of a spacer sequence. Default is 50bp.\\ \\
\combinedoptionflag{V}{version} & Preints out program version information. \\ \\
\combinedoptionflagarg{w}{windowLength}{INT} & When using the long read search algorithm, changes the window length for finding seed sequences; can be set between 6 - 9bp. The default value is 8bp.\\ \\
\combinedoptionflagarg{x}{spacerScalling}{DECIMAL} & Overide the default scalling of the spacer bounds (\optionflag{sS}) set by \longoptionflag{removeHomopolymers}. The default is 0.7, i.e. the size of the spacer bounds is reduced by 30\% when removing homopolymers in sequences. The value must be a decimal. \\ \\
\combinedoptionflagarg{y}{repeatScalling}{DECIMAL} & Overide the default scalling of the direct repeat bounds (\optionflag{dD}) set by \longoptionflag{removeHomopolymers}. The default is 0.7, i.e. the size of the direct repeat bounds is reduced by 30\% when removing homopolymers in sequences. The value must be a decimal.\\
\combinedoptionflag{z}{noScalling} & This turns off the effects of (\optionflag{x} or \optionflag{y}) so that the bounds of the direct repeat and spacer (\optionflag{dDsS}) given on the command line are interpreted literally when the \longoptionflag{removeHomopolymers} option is set. \\ \\
\hline
\end{longtable}
\subsubsection{Output From Crass}
Expand Down
20 changes: 5 additions & 15 deletions man/crass.1
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,15 @@
.\"man mdoc.samples for a complete listing of options
.\"man mdoc for the short list of editing options
.\"/usr/share/misc/mdoc.template
.Dd 17/04/13
.Dd 5/09/16
.Dt crass 1
.Os Darwin
.Sh NAME
.Nm crass
.Nd the CRISPR Assembler.
.Sh SYNOPSIS
.Nm
.Op Fl abcdDefgGhHkKlLnorsSVwxyz
.Op Fl abcdDefgGhkKlLnorsSVw
.Ar

.Sh DESCRIPTION
Expand All @@ -35,7 +35,7 @@ then reads containing direct repeats are then outputed for further analysis.
.Bl -tag -width -indent
.It
.Nm
.Op Fl eghrzGHL
.Op Fl eghrGL
.Op Fl a Ar LAYOUT_TYPE
.Op Fl b Ar INT
.Op Fl c Ar COLOUR_TYPE
Expand All @@ -48,8 +48,6 @@ then reads containing direct repeats are then outputed for further analysis.
.Op Fl o Ar DIR
.Op Fl s Ar INT
.Op Fl w Ar INT
.Op Fl x Ar REAL
.Op Fl y Ar REAL
.Op Fl D Ar INT
.Op Fl K Ar INT
.Op Fl S Ar INT
Expand Down Expand Up @@ -78,15 +76,13 @@ The Maximum length of the direct repeat to search for [Default: 47]
.It Fl e Ar "" Fl "\^\-noDebugGraph"
Option available only when DEBUG preoprocessor symbol is set. Will turn off generating debugging graphs
.It Fl f Ar INT Fl "\^\-covCutoff" Ar INT
Defines the minimim number of reads that a putative CRISPR must contain to be considered real. [Default: 10]
Defines the minimim number of spacers that a putative CRISPR must contain to be considered real. [Default: 3]
.It Fl g Ar "" Fl "\^\-logToScreen"
Print the logging info to stdout rather than to a file
.It Fl G Ar "" Fl "\^\-showSingletons" Ar ""
Set to show unattached spacers in the graph output
.It Fl h Ar "" Fl "\^\-help" Ar ""
Output basic usage informtion to screen
.It Fl H Ar "" Fl "\^\-removeHomopolymers"
Correct for homopolymer errors [default: no correction]
.It Fl l Ar INT Fl "\^\-logLevel" Ar INT
The level of verbosity to ouput in the
.Nm
Expand All @@ -96,7 +92,7 @@ The number of kmers at two direct repeats must share to be considered part of th
.It Fl K Ar INT Fl "\^\-graphNodeLen" Ar INT
The length of the kmer used to define a node in the graph. The lower the number the more connected the graph will be but also increases the chance of false positive edges [Default: 7]
.It Fl n Ar INT Fl "\^\-minNumRepeats" Ar INT
The minimim number of repeats that a candidate CRISPR locus must contain to be considered 'real' [Default: 3]
The minimim number of repeats that a candidate CRISPR locus must contain to be considered 'real' [Default: 2]
.It Fl o Ar LOCATION Fl "\^\-outDir" Ar LOCATION
The name of the ouput directory for the output files [Default: ./]
.It Fl r Ar "" Fl "\^\-noRendering" Ar ""
Expand All @@ -109,12 +105,6 @@ The maximim length of the spacer to search for [Default: 50]
Print version and copy right information
.It Fl w Ar INT Fl "\^\-windowLength" Ar INT
The length of the window size for searching a genome. Must be between 6 - 9 [Default: 8]
.It Fl x Ar REAL Fl "\^\-spacerScalling" Ar REAL
A decimal number that represents the reduction in size of the spacer when the --removeHomopolymers option is set [Default: 0.7]
.It Fl y Ar REAL Fl "\^\-repeatScalling" Ar REAL
A decimal number that represents the reduction in size of the direct repeat when the --removeHomopolymers option is set [Default: 0.7]
.It Fl z Ar "" Fl "\^\-noScalling" Ar ""
Use the given spacer and direct repeat ranges when --removeHomopolymers is set. The default is to use the scale these values based on the values of -x and -y.
.El

.\" .Sh ENVIRONMENT \" May not be needed
Expand Down
11 changes: 9 additions & 2 deletions src/crass/libcrispr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,12 +747,19 @@ unsigned int extendPreRepeat(ReadHolder& tmp_holder, int searchWindowLength, in
if(*repeat_iter < static_cast<unsigned int>(left_extension_length))
{
*repeat_iter = 0;
*(repeat_iter + 1) += right_extension_length;
}
else
{
*repeat_iter -= left_extension_length;
*(repeat_iter+1) += right_extension_length;
}

if(*(repeat_iter+1) + right_extension_length >= tmp_holder.getSeqLength())
{
*(repeat_iter + 1) = tmp_holder.getSeqLength() - 1;
}
else
{
*(repeat_iter + 1) += right_extension_length;
}
#ifdef DEBUG
logInfo("\t"<<*repeat_iter<<","<<*(repeat_iter+1), 9);
Expand Down
1 change: 1 addition & 0 deletions src/test/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ check_PROGRAMS = crass-test
AM_CXXFLAGS = -I$(top_builddir)/src/crass/
AM_LDFLAGS = @zlib_flags@
crass_test_SOURCES = \
test_readholder.cpp\
test_libcrispr.cpp\
test_main.cpp

Expand Down
44 changes: 44 additions & 0 deletions src/test/test_libcrispr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,50 @@
#include "libcrispr.h"
#include "ReadHolder.h"

// 0 1
// 0 1 2 3 4 5 6 7 8 9 0 1 2
// 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345
// CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA
// rrrrrrrr rrrrrrrr rrrrrrrr

TEST_CASE("searching for additional repeated kmer in a 126bp read", "[libcrispr]") {
ReadHolder read("CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA","HWI-D00456:77:C70WLANXX:1:1101:10963:2182");
SECTION("where there should be one additional match with a minimum spacer length of 26"){
read.startStopsAdd(0, 7);
read.startStopsAdd(63,70);
std::string pattern = "CACCATGG";
scanRight(read, pattern, 26, 24);
StartStopList reppos = read.getStartStopList();
REQUIRE(reppos.size() == 6);
REQUIRE(reppos[0] == 0);
REQUIRE(reppos[1] == 7);
REQUIRE(reppos[2] == 63);
REQUIRE(reppos[3] == 70);
REQUIRE(reppos[4] == 105);
REQUIRE(reppos[5] == 112);
}
}

TEST_CASE("check extending repeat with 126bp read", "[libcrispr]") {
ReadHolder read("CACCATGGAAGACCTTCCTAACACCATGGTAGACATTCCTTACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTCCTAACACCATGGTAGACCTTTCTAA","HWI-D00456:77:C70WLANXX:1:1101:10963:2182");

SECTION("The search window length is 8 and the min spacer length is 26") {
read.startStopsAdd(0, 7);
read.startStopsAdd(63,70);
read.startStopsAdd(105,112);
int repeat_length = extendPreRepeat(read, 8, 26);
REQUIRE(repeat_length == 23);
StartStopList reppos = read.getStartStopList();
REQUIRE(reppos.size() == 6);
REQUIRE(reppos[0] == 0);
REQUIRE(reppos[1] == 21);
REQUIRE(reppos[2] == 62);
REQUIRE(reppos[3] == 84);
REQUIRE(reppos[4] == 104);
REQUIRE(reppos[5] == 125);
}
}

TEST_CASE("searching for additional repeated kmers in 100bp read", "[libcrispr]"){
// read
// 0 1
Expand Down

0 comments on commit 19f73f5

Please sign in to comment.