Skip to content

Commit

Permalink
add
Browse files Browse the repository at this point in the history
  • Loading branch information
brianjohnhaas committed Sep 26, 2018
1 parent 1f139d8 commit 0ff10dc
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 28 deletions.
68 changes: 47 additions & 21 deletions Trinity
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,8 @@ my $MAX_CHRYSALIS_CLUSTER_SIZE = 25; # might need to set higher for highly polym

my $JUST_NORMALIZE_READS_FLAG = 0;

my $NO_SEQTK = 0;

my $trinity_banner = qq^
______ ____ ____ ____ ____ ______ __ __
| || \\ | || \\ | || || | |
Expand Down Expand Up @@ -542,6 +544,9 @@ my $advanced_usage = <<_ADVANCEDUSAGE_;
# --stomp_snps : stomp snps out of kmers before inchworm assembly
#
# --NO_SUPERTRANS : disable supertranscripts
#
# --NO_SEQTK :disable seqtk for fq->fa conversions, instead use slower perl code
#
_ADVANCEDUSAGE_

Expand Down Expand Up @@ -759,6 +764,8 @@ my $STOMP_SNPS = 0;
"FORCE" => \$FORCE_FLAG,

"NO_SUPERTRANS" => \$NO_SUPERTRANS_FLAG,

"NO_SEQTK" => \$NO_SEQTK,

);

Expand Down Expand Up @@ -2573,30 +2580,45 @@ sub prep_seqs {

# make fasta
foreach my $f (@initial_files){
my $cmd = "cat $f | seqtk-trinity seq -A -";
my $linecount_cmd = "cat $f | wc -l";
if ($f=~/\.gz$/){
$cmd = "gunzip -c $f | seqtk-trinity seq -A -";
$linecount_cmd = "gunzip -c $f | wc -l";
} elsif ($f=~/\.bz2$/){
$cmd = "bunzip2 -dkc $f | seqtk-trinity seq -A -";
$linecount_cmd = "bunzip2 -dkc $f | wc -l";
} elsif ($f =~ /\.xz/) {
$cmd = "xz -dc ${f} | seqtk-trinity seq -A -";
$linecount_cmd = "xz -dc ${f} | wc -l";
## I would like to suggest that these if statements are not necessary if one just does
## qx"less ${f} |" because less has smart input filters in place and will automagically
## handle all the likely compression formats.
}

if ($SS_lib_type && $SS_lib_type eq "R") {
$cmd =~ s/trinity seq /trinity seq -r /;
if ($NO_SEQTK) {
my $perlcmd = "$UTILDIR/support_scripts/fastQ_to_fastA.pl -I $f ";

if ($SS_lib_type && $SS_lib_type eq "R") {
$perlcmd .= " --rev ";

}
$perlcmd .= " >> $file_prefix.fa 2> $f.readcount ";

&process_cmd($perlcmd);
}
else {
## using seqtk (trinity-mod'd version)

$cmd .= " >> $file_prefix.fa";

&process_cmd($cmd);

my $cmd = "cat $f | seqtk-trinity seq -A -";
my $linecount_cmd = "cat $f | wc -l";
if ($f=~/\.gz$/){
$cmd = "gunzip -c $f | seqtk-trinity seq -A -";
$linecount_cmd = "gunzip -c $f | wc -l";
} elsif ($f=~/\.bz2$/){
$cmd = "bunzip2 -dkc $f | seqtk-trinity seq -A -";
$linecount_cmd = "bunzip2 -dkc $f | wc -l";
} elsif ($f =~ /\.xz/) {
$cmd = "xz -dc ${f} | seqtk-trinity seq -A -";
$linecount_cmd = "xz -dc ${f} | wc -l";
## I would like to suggest that these if statements are not necessary if one just does
## qx"less ${f} |" because less has smart input filters in place and will automagically
## handle all the likely compression formats.
}

if ($SS_lib_type && $SS_lib_type eq "R") {
$cmd =~ s/trinity seq /trinity seq -r /;
}

$cmd .= " >> $file_prefix.fa";

&process_cmd($cmd);
}
}

}
Expand Down Expand Up @@ -3225,6 +3247,10 @@ sub normalize {
if ($NO_CLEANUP) {
$cmd .= " --no_cleanup ";
}
if ($NO_SEQTK) {
$cmd .= " --NO_SEQTK ";
}


#@read_files = &add_fifo_for_gzip(@read_files);

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#!/bin/bash -ve

#######################################################
## Run Trinity to Generate Transcriptome Assemblies ##
#######################################################

if [ -z ${TRINITY_HOME} ]; then
echo "Must set env var TRINITY_HOME"
exit 1
fi


${TRINITY_HOME}/Trinity --seqType fq --max_memory 2G \
--left reads.left.fq.gz \
--right reads.right.fq.gz \
--SS_lib_type RF \
--CPU 4 \
--output trinity_out_dir_noseqtk --NO_SEQTK

2 changes: 1 addition & 1 deletion trinityrnaseq.wiki
31 changes: 25 additions & 6 deletions util/insilico_read_normalization.pl
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@
my $PARALLEL_STATS = 0;
my $JELLY_S;

my $NO_SEQTK = 0;

my $usage = <<_EOUSAGE_;
Expand Down Expand Up @@ -164,6 +166,8 @@
'jelly_s=i' => \$JELLY_S,

'tmp_dir_name=s' => \$TMP_DIR_NAME,

"NO_SEQTK" => \$NO_SEQTK,

);

Expand Down Expand Up @@ -641,13 +645,28 @@ sub prep_seqs {

if ($seqType eq "fq") {
# make fasta
my $cmd = "seqtk-trinity seq -A";
if ($SS_lib_type && $SS_lib_type eq "R") {
$cmd =~ s/trinity seq /trinity seq -r /;

if ($NO_SEQTK) {
my $perlcmd = "$UTILDIR/fastQ_to_fastA.pl -I $initial_file ";

if ($SS_lib_type && $SS_lib_type eq "R") {
$perlcmd .= " --rev ";

}
$perlcmd .= " >> $file_prefix.fa 2> $file_prefix.readcount ";

&process_cmd($perlcmd);
}
else {
# using seqtk
my $cmd = "seqtk-trinity seq -A";
if ($SS_lib_type && $SS_lib_type eq "R") {
$cmd =~ s/trinity seq /trinity seq -r /;
}
$cmd .= " $initial_file >> $file_prefix.fa";

&process_cmd($cmd);
}
$cmd .= " $initial_file >> $file_prefix.fa";

&process_cmd($cmd);
}
elsif ($seqType eq "fa") {
if ($SS_lib_type && $SS_lib_type eq "R") {
Expand Down

0 comments on commit 0ff10dc

Please sign in to comment.