Skip to content

Commit

Permalink
Merge pull request #4 from chriswyatt1/working
Browse files Browse the repository at this point in the history
Working
  • Loading branch information
chriswyatt1 authored Aug 4, 2024
2 parents 762787c + f60687e commit 4fbc8c1
Show file tree
Hide file tree
Showing 15 changed files with 231 additions and 307 deletions.
52 changes: 44 additions & 8 deletions bin/get_fasta_largest_isoform.TrinityMS.pl
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,14 @@
use FindBin;
use lib ("$FindBin::RealBin/PerlLib");
use Getopt::Std;
use Bio::SeqIO;

# This script reads in a fasta file and produces the longest transcript for each gene (if present in ensembl, trinity or basic headers are present).
# This is to stop multiple isoforms of the same gene going through.
# If you wish all genes/isoforms to go through, use "basic"
# It then prints out the longest isoform for each gene, or all genes.

die "Please specify (1)fasta file (2) nucl type (ensembl, trinity)\n" unless(@ARGV==2);
die "Please specify (1)fasta file (2) nucl type (ensembl, trinity, basic)\n" unless(@ARGV==2);

my $fastafile = $ARGV[0];
my $nucl_type = $ARGV[1];
Expand All @@ -19,7 +24,6 @@
if ($nucl_type eq "trinity"){


use Bio::SeqIO;
my $seqio = Bio::SeqIO->new(-file => "$fastafile", '-format' => 'Fasta');
my %fastadictionary=();
my @headersplit=();
Expand Down Expand Up @@ -49,7 +53,7 @@

foreach my $key ( sort keys %fastadictionary){
if ($fastadictionary{$key} eq "Sequenceunavailable"){

#Do nothing
}
else{
print $outhandle ">$key\n$fastadictionary{$key}\n";
Expand All @@ -61,7 +65,6 @@

if ($nucl_type eq "ensembl"){

use Bio::SeqIO;
my $seqio = Bio::SeqIO->new(-file => "$fastafile", '-format' => 'Fasta');
my %fastadictionary=();
my @headersplit=();
Expand All @@ -87,20 +90,53 @@

}

print "Now print new fasta with one main protein per gene.\n";
#print "Now print new fasta with one main protein per gene.\n";

foreach my $key ( sort keys %fastadictionary){
if ($fastadictionary{$key} eq "Sequenceunavailable"){

#Do nothing
}
else{
print $outhandle ">$key\n$fastadictionary{$key}\n";
}

}
}

}


if ($nucl_type eq "basic"){

my $seqio = Bio::SeqIO->new(-file => "$fastafile", '-format' => 'Fasta');
my %fastadictionary=();
my @headersplit=();
while (my $seq = $seqio->next_seq){ ## selects one sequence at a time
## set variables for THIS sequence
my $id = $seq->display_id;
my $string = $seq->seq;
my $len=length($string);
if ($fastadictionary{$id}){
my $len_old=length($fastadictionary{$id});
if ($len >= $len_old){
$fastadictionary{$id}=$string;
}
}
else{
$fastadictionary{$id}=$string;
}
}

print "Now print new fasta with one main protein per gene.\n";

foreach my $key ( sort keys %fastadictionary){
if ($fastadictionary{$key} eq "Sequenceunavailable"){
#Do nothing
}
else{
print $outhandle ">$key\n$fastadictionary{$key}\n";
}
}

}

print "Finished: input lines, output lines\n";

58 changes: 0 additions & 58 deletions bin/get_fasta_largest_isoform.ensembl.pl

This file was deleted.

67 changes: 67 additions & 0 deletions conf/base.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
// Setting default labels with associated resource allocation
process {

// Default resources are same as single label for any unlabelled processes that may be added
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 2.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }

// If error is due to timeout or exceeding resource limits then retry
errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' }
maxRetries = 1
// Do not allow any errors when retrying
maxErrors = '-1'

withLabel:process_single {
cpus = { check_max( 1 * task.attempt, 'cpus' ) }
memory = { check_max( 2.GB * task.attempt, 'memory' ) }
time = { check_max( 1.h * task.attempt, 'time' ) }
}
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 4.GB * task.attempt, 'memory' ) }
time = { check_max( 2.h * task.attempt, 'time' ) }
}
withLabel:process_medium {
cpus = { check_max( 4 * task.attempt, 'cpus' ) }
memory = { check_max( 8.GB * task.attempt, 'memory' ) }
time = { check_max( 4.h * task.attempt, 'time' ) }
}
withLabel:process_high {
cpus = { check_max( 8 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
time = { check_max( 8.h * task.attempt, 'time' ) }
}
withLabel:process_long {
time = { check_max( 24.h * task.attempt, 'time' ) }
}
withLabel:process_high_memory {
memory = { check_max( 64.GB * task.attempt, 'memory' ) }
}
withLabel:process_med_memory {
memory = { check_max( 20.GB * task.attempt, 'memory' ) }
}
withLabel:download_nr {
memory = 12.GB
cpus = 2
time = 24.h
}
withLabel:blastdb {
memory = 40.GB
cpus = 4
time = 8.h
}
withLabel:blast {
memory = 40.GB
cpus = 4
time = 8.h
}
withLabel:error_ignore {
errorStrategy = 'ignore'
}
withLabel:error_retry {
errorStrategy = 'retry'
maxRetries = 2
}

}
87 changes: 0 additions & 87 deletions main.firstattempt.nf

This file was deleted.

Loading

0 comments on commit 4fbc8c1

Please sign in to comment.