diff --git a/MYMETA.json b/MYMETA.json deleted file mode 100644 index e410705..0000000 --- a/MYMETA.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "abstract" : "unknown", - "author" : [ - "unknown" - ], - "dynamic_config" : 0, - "generated_by" : "ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560", - "license" : [ - "agpl_3" - ], - "meta-spec" : { - "url" : "http://search.cpan.org/perldoc?CPAN::Meta::Spec", - "version" : "2" - }, - "name" : "Vagrent", - "no_index" : { - "directory" : [ - "t", - "inc" - ] - }, - "prereqs" : { - "build" : { - "requires" : { - "ExtUtils::MakeMaker" : "0" - } - }, - "configure" : { - "requires" : { - "ExtUtils::MakeMaker" : "0" - } - }, - "runtime" : { - "requires" : { - "Attribute::Abstract" : "1.02", - "Bio::DB::Sam" : "1.39", - "Bio::Root::Version" : "1.006924", - "Capture::Tiny" : "0.24", - "Config::IniFiles" : "2.83", - "Const::Fast" : "0.014", - "File::ShareDir" : "1.102", - "File::Temp" : "0.2304", - "File::Type" : "0.22", - "IPC::System::Simple" : "1.25", - "LWP::Simple" : "6.00", - "List::Util" : "1.41", - "Log::Log4perl" : "1.44", - "Pod::Usage" : "1.64", - "Sort::Key" : "1.33", - "TAP::Harness" : "3.33", - "Try::Tiny" : "0.22" - } - } - }, - "release_status" : "stable", - "version" : "v2.1.2" -} diff --git a/MYMETA.yml b/MYMETA.yml deleted file mode 100644 index 00320a4..0000000 --- a/MYMETA.yml +++ /dev/null @@ -1,38 +0,0 @@ ---- -abstract: unknown -author: - - unknown -build_requires: - ExtUtils::MakeMaker: 0 -configure_requires: - ExtUtils::MakeMaker: 0 -dynamic_config: 0 -generated_by: 'ExtUtils::MakeMaker version 6.68, CPAN::Meta::Converter version 2.131560' -license: open_source -meta-spec: - url: http://module-build.sourceforge.net/META-spec-v1.4.html - version: 1.4 -name: Vagrent -no_index: - directory: - - t - - inc -requires: - Attribute::Abstract: 1.02 - Bio::DB::Sam: 1.39 - Bio::Root::Version: 1.006924 - Capture::Tiny: 0.24 - Config::IniFiles: 2.83 - Const::Fast: 0.014 - File::ShareDir: 1.102 - File::Temp: 0.2304 - File::Type: 0.22 - IPC::System::Simple: 1.25 - LWP::Simple: 6.00 - List::Util: 1.41 - Log::Log4perl: 1.44 - Pod::Usage: 1.64 - Sort::Key: 1.33 - TAP::Harness: 3.33 - Try::Tiny: 0.22 -version: v2.1.2 diff --git a/docs.tar.gz b/docs.tar.gz index 605040d..c11c306 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/Sanger/CGP/Vagrent.pm b/lib/Sanger/CGP/Vagrent.pm index 2cc762d..f7f421c 100644 --- a/lib/Sanger/CGP/Vagrent.pm +++ b/lib/Sanger/CGP/Vagrent.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -26,7 +26,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '2.1.2'; +our $VERSION = '2.1.3'; our @EXPORT = qw($VERSION); 1; diff --git a/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm b/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm index b039a6a..7fae898 100644 --- a/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm +++ b/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::Annotators::AbstractAnnotator; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -28,6 +28,7 @@ use Log::Log4perl; use POSIX qw(ceil); use Data::Dumper; use Attribute::Abstract; +use Const::Fast qw(const); use Sanger::CGP::Vagrent qw($VERSION); @@ -38,20 +39,19 @@ use base qw(Sanger::CGP::Vagrent::Ontology::SequenceOntologyClassifier); my $log = Log::Log4perl->get_logger(__PACKAGE__); - -1; - # constant reference values for consensus splice site values. -use constant CONSENSUS_SPLICE_OFFSETS => (-2, -1, 1, 2, 5); -use constant CONSENSUS_SPLICE_BEFORE_BOUNDRY => -2; -use constant CONSENSUS_SPLICE_AFTER_BOUNDRY => 5; +const my @CONSENSUS_SPLICE_OFFSETS => (-2, -1, 1, 2, 5); +const my $CONSENSUS_SPLICE_BEFORE_BOUNDRY => -2; +const my $CONSENSUS_SPLICE_AFTER_BOUNDRY => 5; # constant value representing the cutoff for intronic calls -use constant INTRONIC_OFFSET_CUTOFF => 11; +const my $INTRONIC_OFFSET_CUTOFF => 11; -use constant UPDOWNSTREAM_5KB_CUTOFF => 5000; -use constant UPSTREAM_2KB_CUTOFF => -2000; -use constant DOWNSTREAM_500BP_CUTOFF => 500; +const my $UPDOWNSTREAM_5KB_CUTOFF => 5000; +const my $UPSTREAM_2KB_CUTOFF => -2000; +const my $DOWNSTREAM_500BP_CUTOFF => 500; + +1; sub new { my $proto = shift; @@ -63,6 +63,10 @@ sub new { return $self; } +sub getConsensusSpliceOffsets { + return @CONSENSUS_SPLICE_OFFSETS; +} + sub getAnnotation { my ($self,$var) = @_; my @ann = $self->_getAnnotation($var); @@ -462,7 +466,7 @@ sub _buildProteinAnnotation { # something has gone wrong return undef; } - + my $mtDna = $self->_getMutatedCdsSequence($wtDna,$cdsMinPos,$cdsMaxPos,$cAnnot->getMt()); my $mtProt = Bio::Seq->new(-seq => $prePad . $mtDna . $postPad)->translate->seq(); # mutant protein sequence my $maxMtProt = Bio::Seq->new(-seq => $prePad . $mtDna . substr($tran->getcDNASeq,$tran->getCdsMaxPos()))->translate->seq(); # maximised protein sequence, overruns the natural stop and translates to the end of the transcript @@ -513,7 +517,7 @@ sub _buildProteinAnnotation { substr($wt,-1,1,''); substr($mt,-1,1,''); } - + #warn "|$wt| to |$mt|\n"; if($wt ne ''){ # wild type residue has been changed @@ -843,7 +847,7 @@ sub _buildUnknownMRNAAnnotation { sub _isOffsetAConsensusSpliceDistance { my ($self,$offset) = @_; - foreach my $cf($self->CONSENSUS_SPLICE_OFFSETS){ + foreach my $cf(@CONSENSUS_SPLICE_OFFSETS){ if($offset == $cf){ return 1; } @@ -852,16 +856,16 @@ sub _isOffsetAConsensusSpliceDistance { } sub _getConsesnsusSpliceBeforeBoundry { - return CONSENSUS_SPLICE_BEFORE_BOUNDRY; + return $CONSENSUS_SPLICE_BEFORE_BOUNDRY; } sub _getConsesnsusSpliceAfterBoundry { - return CONSENSUS_SPLICE_AFTER_BOUNDRY; + return $CONSENSUS_SPLICE_AFTER_BOUNDRY; } sub _isIntronicOffsetDistance { my ($self,$offset) = @_; - if(abs($offset) >= $self->INTRONIC_OFFSET_CUTOFF){ + if(abs($offset) >= $INTRONIC_OFFSET_CUTOFF){ return 1; } return 0; @@ -869,7 +873,7 @@ sub _isIntronicOffsetDistance { sub _isWithin5KBOffsetDistance { my ($self,$offset) = @_; - if(abs($offset) <= $self->UPDOWNSTREAM_5KB_CUTOFF){ + if(abs($offset) <= $UPDOWNSTREAM_5KB_CUTOFF){ return 1; } return 0; @@ -877,7 +881,7 @@ sub _isWithin5KBOffsetDistance { sub _isWithin2KBUpstreamOffsetDistance { my ($self,$offset) = @_; - if($offset < 0 && $offset >= $self->UPSTREAM_2KB_CUTOFF){ + if($offset < 0 && $offset >= $UPSTREAM_2KB_CUTOFF){ return 1; } return 0; @@ -885,7 +889,7 @@ sub _isWithin2KBUpstreamOffsetDistance { sub _isWithin500BPDownstreamOffsetDistance { my ($self,$offset) = @_; - if($offset > 0 && $offset <= $self->DOWNSTREAM_500BP_CUTOFF){ + if($offset > 0 && $offset <= $DOWNSTREAM_500BP_CUTOFF){ return 1; } return 0; @@ -897,7 +901,7 @@ sub _coversStartCodon { # if the transcript isn't protein coding it can't have a start codon return 0; } - + my ($startMin,$startMax); if($anno->getContext eq Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()){ $startMin = $tran->getCdsMinPos; @@ -909,7 +913,7 @@ sub _coversStartCodon { # don't know, assume no return 0; } - + if($anno->hasClassification($self->getInsertionClass)){ # insertions are a special case, coordinates are outside the variant if($anno->getMinPos < $startMax && $anno->getMaxPos > $startMin){ @@ -944,7 +948,7 @@ sub _coversStopCodon { # don't know, assume no return 0; } - + if($anno->hasClassification($self->getInsertionClass)){ # insertions are a special case, coordinates are outside the variant if($anno->getMinPos < $stopMax && $anno->getMaxPos > $stopMin){ @@ -1010,10 +1014,10 @@ sub _canAnnotateToCDS { if($anno->hasClassification($self->getInsertionClass)){ # insertions are a special case. # Coordinates are the last WT positions, and not the first variant ones like everything else - + print 'ANNO POS: '.$anno->getMinPos.' , '.$anno->getMinOffset.' - '.$anno->getMaxPos.' , '.$anno->getMaxOffset."\n" if $self->_debug(); print 'CDS POS: '.$tran->getCdsMinPos.' , '.$tran->getCdsMaxPos."\n" if $self->_debug(); - + if($anno->getMaxPos < $tran->getCdsMinPos || $anno->getMinPos > $tran->getCdsMaxPos){ # ends before CDS or starts afterwards return 0; @@ -1051,7 +1055,7 @@ sub _canAnnotateToCDS { } elsif($anno->hasClassification($self->getUnknownVariantClass)){ return 0; } elsif($anno->hasClassification($self->getInsertionClass) && $anno->hasClassification($self->get5PrimeUtrVariantClass)){ - # odd case, insertions close to the start codons can be described on the CDS even though they don't change it. + # odd case, insertions close to the start codons can be described on the CDS even though they don't change it. return 1; } else { my $msg = "Unable to calculate CDS relevance - UNKNOWN CLASSIFICATION: ".join(' ',$anno->getClassifications); diff --git a/lib/Sanger/CGP/Vagrent/Bookmarkers/MostDeleteriousBookmarker.pm b/lib/Sanger/CGP/Vagrent/Bookmarkers/MostDeleteriousBookmarker.pm index f764ddf..40a78bf 100644 --- a/lib/Sanger/CGP/Vagrent/Bookmarkers/MostDeleteriousBookmarker.pm +++ b/lib/Sanger/CGP/Vagrent/Bookmarkers/MostDeleteriousBookmarker.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::Bookmarkers::MostDeleteriousBookmarker; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -27,39 +27,41 @@ use strict; use Log::Log4perl qw(:easy); use Data::Dumper; use Carp qw(croak); +use Const::Fast qw(const); use Sanger::CGP::Vagrent qw($VERSION); use base qw(Sanger::CGP::Vagrent::Bookmarkers::RepresentativeTranscriptBookmarker); +const my $DOWNSTREAM_SCORE => 4; +const my $UPSTREAM_SCORE => 7; +const my $INTRONIC_SCORE => 10; +const my $NONCODING_GENE_SCORE => 15; +const my $COMPLEX_IN_MRNA_SCORE => 50; +const my $NONCODING_GENE_SPLICE_REGION_SCORE => 95; +const my $THREEPRIME_UTR_SPLICE_REGION_SCORE => 100; +const my $FIVEPRIME_UTR_SPLICE_REGION_SCORE => 105; +const my $CODING_SPLICE_REGION_SCORE => 200; +const my $FIVEPRIME_UTR_SCORE => 300; +const my $THREEPRIME_UTR_SCORE => 305; +const my $NONCODING_GENE_ESS_SPLICE_SCORE => 395; +const my $THREEPRIME_UTR_ESS_SPLICE_SCORE => 400; +const my $FIVEPRIME_UTR_ESS_SPLICE_SCORE => 405; +const my $START_GAINED_SCORE => 450; +const my $SYNONYMOUS_SCORE => 500; +const my $COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE => 525; +const my $COMPLEX_IN_CDS_SCORE => 550; +const my $NON_SYNONYMOUS_SCORE => 600; +const my $STOP_LOST_SCORE => 700; +const my $INITIATOR_CHANGE_SCORE => 800; +const my $INFRAME_CODON_GAIN_SCORE => 825; +const my $INFRAME_CODON_LOSS_AND_GAIN_SCORE => 840; +const my $INFRAME_CODON_LOSS_SCORE => 850; +const my $CODING_ESS_SPLICE_SCORE => 900; +const my $STOP_GAINED_SCORE => 1000; +const my $FRAMESHIFT_SCORE => 1100; +const my $COMPLETE_PROTEIN_LOSS_SCORE => 1200; + 1; -use constant DOWNSTREAM_SCORE => 4; -use constant UPSTREAM_SCORE => 7; -use constant INTRONIC_SCORE => 10; -use constant NONCODING_GENE_SCORE => 15; -use constant COMPLEX_IN_MRNA_SCORE => 50; -use constant NONCODING_GENE_SPLICE_REGION_SCORE => 95; -use constant THREEPRIME_UTR_SPLICE_REGION_SCORE => 100; -use constant FIVEPRIME_UTR_SPLICE_REGION_SCORE => 105; -use constant CODING_SPLICE_REGION_SCORE => 200; -use constant FIVEPRIME_UTR_SCORE => 300; -use constant THREEPRIME_UTR_SCORE => 305; -use constant NONCODING_GENE_ESS_SPLICE_SCORE => 395; -use constant THREEPRIME_UTR_ESS_SPLICE_SCORE => 400; -use constant FIVEPRIME_UTR_ESS_SPLICE_SCORE => 405; -use constant START_GAINED_SCORE => 450; -use constant SYNONYMOUS_SCORE => 500; -use constant COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE => 525; -use constant COMPLEX_IN_CDS_SCORE => 550; -use constant NON_SYNONYMOUS_SCORE => 600; -use constant STOP_LOST_SCORE => 700; -use constant INITIATOR_CHANGE_SCORE => 800; -use constant INFRAME_CODON_GAIN_SCORE => 825; -use constant INFRAME_CODON_LOSS_AND_GAIN_SCORE => 840; -use constant INFRAME_CODON_LOSS_SCORE => 850; -use constant CODING_ESS_SPLICE_SCORE => 900; -use constant STOP_GAINED_SCORE => 1000; -use constant FRAMESHIFT_SCORE => 1100; -use constant COMPLETE_PROTEIN_LOSS_SCORE => 1200; sub _getAnnotation { my $self = shift; @@ -75,271 +77,271 @@ sub _getMostDeleterious { my $mostGroup = undef; foreach my $g(@groups){ my $score = 1; - my $mrna = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext); - my $cds = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext); - my $prot = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext); + my $mrna = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation->getmRNAAnnotationContext); + my $cds = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation->getCDSAnnotationContext); + my $prot = $g->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation->getProteinAnnotationContext); if($g->hasClassification($self->getProteinCodingClass)){ # protein coding transcript if(defined($prot) && $prot->getType() ne $mrna->getUnknownAnnotationType){ # Protein annotation - if($self->COMPLETE_PROTEIN_LOSS_SCORE > $score && + if($COMPLETE_PROTEIN_LOSS_SCORE > $score && $prot->hasClassification($self->getDeletionClass) && $prot->getMinPos() == 1 && $prot->getMaxPos() == $prot->getSequenceLength()){ # if marked as a deletion, start = 1 and end = protein length the protein is gone. - $score = $self->COMPLETE_PROTEIN_LOSS_SCORE; + $score = $COMPLETE_PROTEIN_LOSS_SCORE; } - if($self->FRAMESHIFT_SCORE > $score && + if($FRAMESHIFT_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getFrameShiftVariantClass)){ # Frameshift - $score = $self->FRAMESHIFT_SCORE; + $score = $FRAMESHIFT_SCORE; } - if($self->STOP_GAINED_SCORE > $score && + if($STOP_GAINED_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getStopGainedVariantClass)){ # non-sense / stop gained - $score = $self->STOP_GAINED_SCORE; + $score = $STOP_GAINED_SCORE; } - if($self->INFRAME_CODON_LOSS_SCORE > $score && + if($INFRAME_CODON_LOSS_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getInFrameCodonLossVariantClass)){ # in frame deletion - $score = $self->INFRAME_CODON_LOSS_SCORE; + $score = $INFRAME_CODON_LOSS_SCORE; } - if($self->INFRAME_CODON_LOSS_AND_GAIN_SCORE > $score && + if($INFRAME_CODON_LOSS_AND_GAIN_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getComplexIndelClass)){ # in frame complex sub - $score = $self->INFRAME_CODON_LOSS_AND_GAIN_SCORE; + $score = $INFRAME_CODON_LOSS_AND_GAIN_SCORE; } - if($self->INFRAME_CODON_GAIN_SCORE > $score && + if($INFRAME_CODON_GAIN_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getInFrameCodonGainVariantClass)){ # in frame insertion - $score = $self->INFRAME_CODON_GAIN_SCORE; + $score = $INFRAME_CODON_GAIN_SCORE; } - if($self->INITIATOR_CHANGE_SCORE > $score && + if($INITIATOR_CHANGE_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getStartLostVariantClass)){ # start lost - $score = $self->INITIATOR_CHANGE_SCORE; + $score = $INITIATOR_CHANGE_SCORE; } - if($self->STOP_LOST_SCORE > $score && + if($STOP_LOST_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getStopLostVariantClass)){ # stop lost - $score = $self->STOP_LOST_SCORE; + $score = $STOP_LOST_SCORE; } - if($self->NON_SYNONYMOUS_SCORE > $score && + if($NON_SYNONYMOUS_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $prot->hasClassification($self->getNonSynonymousVariantClass)){ # mis sense - $score = $self->NON_SYNONYMOUS_SCORE; + $score = $NON_SYNONYMOUS_SCORE; } - if($self->SYNONYMOUS_SCORE > $score && + if($SYNONYMOUS_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && ($prot->hasClassification($self->getSynonymousVariantClass) || $prot->hasClassification($self->getStopRetainedVariantClass))){ # silent including terminator silent - $score = $self->SYNONYMOUS_SCORE; + $score = $SYNONYMOUS_SCORE; } } elsif(defined($cds) && $cds->getType() ne $mrna->getUnknownAnnotationType){ # CDS annotation - if($self->FRAMESHIFT_SCORE > $score && + if($FRAMESHIFT_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && $cds->hasClassification($self->getFrameShiftVariantClass)){ # frame shift again, incase protein translation was too complex - $score = $self->FRAMESHIFT_SCORE; + $score = $FRAMESHIFT_SCORE; } - if($self->CODING_ESS_SPLICE_SCORE > $score && + if($CODING_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->getCDSClass) && $cds->hasClassification($self->getEssentialSpliceSiteVariantClass)){ # essential splice change in CDS - $score = $self->CODING_ESS_SPLICE_SCORE; + $score = $CODING_ESS_SPLICE_SCORE; } - if($self->COMPLEX_IN_CDS_SCORE > $score && + if($COMPLEX_IN_CDS_SCORE > $score && $g->hasClassification($self->getCDSClass) && $cds->hasClassification($self->getComplexChangeVariantClass)){ # complex transcript consequence involving CDS if($cds->getMinPos() == 1 && $cds->getMaxPos() == $cds->getSequenceLength()){ # position 1 to CDS length effected, transcript lost - $score = $self->COMPLETE_PROTEIN_LOSS_SCORE; + $score = $COMPLETE_PROTEIN_LOSS_SCORE; } elsif($g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass)){ # essential splice change - $score = $self->CODING_ESS_SPLICE_SCORE; + $score = $CODING_ESS_SPLICE_SCORE; } elsif($g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && ($g->hasClassification($self->get5PrimeUtrClass) || $mrna->hasClassification($self->get2KBUpStreamVariantClass)) && $cds->getMinPos() == 1){ # start codon lost - $score = $self->INITIATOR_CHANGE_SCORE; + $score = $INITIATOR_CHANGE_SCORE; } elsif($g->hasClassification($self->getExonClass) && $g->hasClassification($self->getCDSClass) && ($g->hasClassification($self->get3PrimeUtrClass) || $mrna->hasClassification($self->get500BPDownStreamVariantClass)) && $cds->getMaxPos() == $cds->getSequenceLength()){ # stop codon lost - $score = $self->STOP_LOST_SCORE; + $score = $STOP_LOST_SCORE; } else { # if its none of the above, its just complex in CDS - $score = $self->COMPLEX_IN_CDS_SCORE; + $score = $COMPLEX_IN_CDS_SCORE; } } - if($self->CODING_SPLICE_REGION_SCORE > $score && + if($CODING_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $g->hasClassification($self->getCDSClass) && $cds->hasClassification($self->getSpliceRegionVariantClass)){ # splice region change in CDS - $score = $self->CODING_SPLICE_REGION_SCORE; + $score = $CODING_SPLICE_REGION_SCORE; } - if($self->FIVEPRIME_UTR_ESS_SPLICE_SCORE > $score && + if($FIVEPRIME_UTR_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->getEssentialSpliceSiteVariantClass) && $mrna->hasClassification($self->get5PrimeUtrVariantClass)){ # essential splice change in 5' UTR, the splice site is probably directly before to the start codon - $score = $self->FIVEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $FIVEPRIME_UTR_ESS_SPLICE_SCORE; } - if($self->THREEPRIME_UTR_ESS_SPLICE_SCORE > $score && + if($THREEPRIME_UTR_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->get3PrimeUtrClass) && $mrna->hasClassification($self->getEssentialSpliceSiteVariantClass) && $mrna->hasClassification($self->get3PrimeUtrVariantClass)){ # essential splice change in 3' UTR, the splice site is probably directly after to the stop codon - $score = $self->THREEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $THREEPRIME_UTR_ESS_SPLICE_SCORE; } - if($self->FIVEPRIME_UTR_SPLICE_REGION_SCORE > $score && + if($FIVEPRIME_UTR_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->getSpliceRegionVariantClass) && $mrna->hasClassification($self->get5PrimeUtrVariantClass)){ # splice region change in 5' UTR, the splice site is probably directly before to the start codon - $score = $self->FIVEPRIME_UTR_SPLICE_REGION_SCORE; + $score = $FIVEPRIME_UTR_SPLICE_REGION_SCORE; } - if($self->THREEPRIME_UTR_SPLICE_REGION_SCORE > $score && + if($THREEPRIME_UTR_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $g->hasClassification($self->get3PrimeUtrClass) && $mrna->hasClassification($self->getSpliceRegionVariantClass) && $mrna->hasClassification($self->get3PrimeUtrVariantClass)){ # splice region change in 3' UTR, the splice site is probably directly after to the stop codon - $score = $self->THREEPRIME_UTR_SPLICE_REGION_SCORE; + $score = $THREEPRIME_UTR_SPLICE_REGION_SCORE; } } elsif(defined($mrna)) { # cDNA annotation - if($self->START_GAINED_SCORE > $score && + if($START_GAINED_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->getPrematureStartGainedVariantClass)){ # new start codon created in the 5' UTR - $score = $self->START_GAINED_SCORE; + $score = $START_GAINED_SCORE; } - if($self->FIVEPRIME_UTR_ESS_SPLICE_SCORE > $score && + if($FIVEPRIME_UTR_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->getEssentialSpliceSiteVariantClass) && $mrna->hasClassification($self->get5PrimeUtrVariantClass)){ # essential splice change in 5' UTR - $score = $self->FIVEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $FIVEPRIME_UTR_ESS_SPLICE_SCORE; } - if($self->THREEPRIME_UTR_ESS_SPLICE_SCORE > $score && + if($THREEPRIME_UTR_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->get3PrimeUtrClass) && $mrna->hasClassification($self->getEssentialSpliceSiteVariantClass) && $mrna->hasClassification($self->get3PrimeUtrVariantClass)){ # essential splice change in 3' UTR - $score = $self->THREEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $THREEPRIME_UTR_ESS_SPLICE_SCORE; } - if($self->THREEPRIME_UTR_SCORE > $score && + if($THREEPRIME_UTR_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->get3PrimeUtrClass) && $mrna->hasClassification($self->get3PrimeUtrVariantClass)){ # change in 3' UTR exon - $score = $self->THREEPRIME_UTR_SCORE; + $score = $THREEPRIME_UTR_SCORE; } - if($self->FIVEPRIME_UTR_SCORE > $score && + if($FIVEPRIME_UTR_SCORE > $score && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->get5PrimeUtrVariantClass)){ # change in 5' UTR exon - $score = $self->FIVEPRIME_UTR_SCORE; + $score = $FIVEPRIME_UTR_SCORE; } - if($self->FIVEPRIME_UTR_SPLICE_REGION_SCORE > $score && + if($FIVEPRIME_UTR_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $g->hasClassification($self->get5PrimeUtrClass) && $mrna->hasClassification($self->getSpliceRegionVariantClass) && $mrna->hasClassification($self->get5PrimeUtrVariantClass)){ # splice region change in 5' UTR - $score = $self->FIVEPRIME_UTR_SPLICE_REGION_SCORE; + $score = $FIVEPRIME_UTR_SPLICE_REGION_SCORE; } - if($self->THREEPRIME_UTR_SPLICE_REGION_SCORE > $score && + if($THREEPRIME_UTR_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $g->hasClassification($self->get3PrimeUtrClass) && $mrna->hasClassification($self->getSpliceRegionVariantClass) && $mrna->hasClassification($self->get3PrimeUtrVariantClass)){ # splice region change in 3' UTR - $score = $self->THREEPRIME_UTR_SPLICE_REGION_SCORE; + $score = $THREEPRIME_UTR_SPLICE_REGION_SCORE; } - if($self->COMPLEX_IN_MRNA_SCORE > $score && + if($COMPLEX_IN_MRNA_SCORE > $score && $g->hasClassification($self->getCDSClass) && $mrna->hasClassification($self->getComplexChangeVariantClass)){ # complex transcript consequence involving only UTR (no CDS) if($g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->get5PrimeUtrClass)){ # essential splice change in 5' UTR - $score = $self->FIVEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $FIVEPRIME_UTR_ESS_SPLICE_SCORE; } elsif($g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->getExonClass) && $g->hasClassification($self->get3PrimeUtrClass)){ # essential splice change in 3' UTR - $score = $self->THREEPRIME_UTR_ESS_SPLICE_SCORE; + $score = $THREEPRIME_UTR_ESS_SPLICE_SCORE; } else { # if its none of the above, its just complex in mrna - $score = $self->COMPLEX_IN_MRNA_SCORE; + $score = $COMPLEX_IN_MRNA_SCORE; } } - if($self->INTRONIC_SCORE > $score && + if($INTRONIC_SCORE > $score && $g->hasClassification($self->getIntronClass) && $mrna->hasClassification($self->getIntronVariantClass)){ # intronic change - $score = $self->INTRONIC_SCORE; + $score = $INTRONIC_SCORE; } - if($self->UPSTREAM_SCORE > $score && + if($UPSTREAM_SCORE > $score && ($mrna->hasClassification($self->get2KBUpStreamVariantClass) || $mrna->hasClassification($self->get5KBUpStreamVariantClass))){ # upstream of transcript - $score = $self->UPSTREAM_SCORE; + $score = $UPSTREAM_SCORE; } - if($self->DOWNSTREAM_SCORE > $score && + if($DOWNSTREAM_SCORE > $score && ($mrna->hasClassification($self->get500BPDownStreamVariantClass) || $mrna->hasClassification($self->get5KBDownStreamVariantClass))){ # downstream of transcript - $score = $self->DOWNSTREAM_SCORE; + $score = $DOWNSTREAM_SCORE; } } } else { # non-coding transcript - if($self->COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE > $score && + if($COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE > $score && $mrna->getMinPos() == 1 && $mrna->getMaxPos() == $mrna->getSequenceLength()){ # if marked as a variant, start = 1 and end = sequence length the transcript is gone. - $score = $self->COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE; + $score = $COMPLETE_NONCODING_TRANSCRIPT_LOSS_SCORE; } - if($self->NONCODING_GENE_ESS_SPLICE_SCORE > $score && + if($NONCODING_GENE_ESS_SPLICE_SCORE > $score && $g->hasClassification($self->getEssentialSpliceSiteClass) && $mrna->hasClassification($self->getEssentialSpliceSiteVariantClass)){ # essential splice change - $score = $self->NONCODING_GENE_ESS_SPLICE_SCORE; + $score = $NONCODING_GENE_ESS_SPLICE_SCORE; } - if($self->NONCODING_GENE_SPLICE_REGION_SCORE > $score && + if($NONCODING_GENE_SPLICE_REGION_SCORE > $score && $g->hasClassification($self->getSpliceRegionClass) && $mrna->hasClassification($self->getSpliceRegionVariantClass)){ # splice region change - $score = $self->NONCODING_GENE_SPLICE_REGION_SCORE; + $score = $NONCODING_GENE_SPLICE_REGION_SCORE; } - if($self->COMPLEX_IN_MRNA_SCORE > $score && $mrna->hasClassification($self->getComplexChangeVariantClass)){ + if($COMPLEX_IN_MRNA_SCORE > $score && $mrna->hasClassification($self->getComplexChangeVariantClass)){ # complex transcript consequence if($g->hasClassification($self->getEssentialSpliceSiteClass) && $g->hasClassification($self->getExonClass)){ # essential splice change - $score = $self->NONCODING_GENE_ESS_SPLICE_SCORE; + $score = $NONCODING_GENE_ESS_SPLICE_SCORE; } else { # if its not ess splice, its just complex (probably straddles transcript boundary) - $score = $self->COMPLEX_IN_MRNA_SCORE; + $score = $COMPLEX_IN_MRNA_SCORE; } } - if($self->NONCODING_GENE_SCORE > $score && + if($NONCODING_GENE_SCORE > $score && $g->hasClassification($self->getExonClass) && $mrna->hasClassification($self->getNonCodingTranscriptVariantClass)){ # exonic change - $score = $self->NONCODING_GENE_SCORE; + $score = $NONCODING_GENE_SCORE; } - if($self->INTRONIC_SCORE > $score && + if($INTRONIC_SCORE > $score && $g->hasClassification($self->getIntronClass) && $mrna->hasClassification($self->getIntronVariantClass)){ # intronic change - $score = $self->INTRONIC_SCORE; + $score = $INTRONIC_SCORE; } - if($self->UPSTREAM_SCORE > $score && + if($UPSTREAM_SCORE > $score && ($mrna->hasClassification($self->get2KBUpStreamVariantClass) || $mrna->hasClassification($self->get5KBUpStreamVariantClass))){ # upstream of transcript - $score = $self->UPSTREAM_SCORE; + $score = $UPSTREAM_SCORE; } - if($self->DOWNSTREAM_SCORE > $score && + if($DOWNSTREAM_SCORE > $score && ($mrna->hasClassification($self->get500BPDownStreamVariantClass) || $mrna->hasClassification($self->get5KBDownStreamVariantClass))){ # downstream of transcript - $score = $self->DOWNSTREAM_SCORE; + $score = $DOWNSTREAM_SCORE; } } diff --git a/lib/Sanger/CGP/Vagrent/Data/Annotation.pm b/lib/Sanger/CGP/Vagrent/Data/Annotation.pm index c02371c..ef7a008 100644 --- a/lib/Sanger/CGP/Vagrent/Data/Annotation.pm +++ b/lib/Sanger/CGP/Vagrent/Data/Annotation.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::Data::Annotation; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -26,38 +26,39 @@ use strict; use Data::Dumper; use List::Util qw(first); use Carp; +use Const::Fast qw(const); use Sanger::CGP::Vagrent qw($VERSION); use base qw(Sanger::CGP::Vagrent); -1; - # sequence context constant values -use constant CDS_ANNOTATION_CONTEXT => 'CDS'; -use constant MRNA_ANNOTATION_CONTEXT => 'mRNA'; -use constant PROTEIN_ANNOTATION_CONTEXT => 'Protein'; +const my $CDS_ANNOTATION_CONTEXT => 'CDS'; +const my $MRNA_ANNOTATION_CONTEXT => 'mRNA'; +const my $PROTEIN_ANNOTATION_CONTEXT => 'Protein'; -use constant ALL_ANNOTATION_CONTEXT => (CDS_ANNOTATION_CONTEXT,MRNA_ANNOTATION_CONTEXT,PROTEIN_ANNOTATION_CONTEXT); +const my @ALL_ANNOTATION_CONTEXT => ($CDS_ANNOTATION_CONTEXT,$MRNA_ANNOTATION_CONTEXT,$PROTEIN_ANNOTATION_CONTEXT); # variation type constant values -use constant SUBSTITUTION_ANNOTATION_TYPE => 'Substitution'; -use constant DELETION_ANNOTATION_TYPE => 'Deletion'; -use constant INSERTION_ANNOTATION_TYPE => 'Insertion'; -use constant COMPLEX_ANNOTATION_TYPE => 'Complex'; -use constant FRAMESHIFT_ANNOTATION_TYPE => 'FrameShift'; -use constant UNKNOWN_ANNOTATION_TYPE => 'Unknown'; +const my $SUBSTITUTION_ANNOTATION_TYPE => 'Substitution'; +const my $DELETION_ANNOTATION_TYPE => 'Deletion'; +const my $INSERTION_ANNOTATION_TYPE => 'Insertion'; +const my $COMPLEX_ANNOTATION_TYPE => 'Complex'; +const my $FRAMESHIFT_ANNOTATION_TYPE => 'FrameShift'; +const my $UNKNOWN_ANNOTATION_TYPE => 'Unknown'; -use constant ALL_ANNOTATION_TYPES => (COMPLEX_ANNOTATION_TYPE,FRAMESHIFT_ANNOTATION_TYPE,INSERTION_ANNOTATION_TYPE,DELETION_ANNOTATION_TYPE,SUBSTITUTION_ANNOTATION_TYPE,UNKNOWN_ANNOTATION_TYPE); +const my @ALL_ANNOTATION_TYPES => ($COMPLEX_ANNOTATION_TYPE,$FRAMESHIFT_ANNOTATION_TYPE,$INSERTION_ANNOTATION_TYPE,$DELETION_ANNOTATION_TYPE,$SUBSTITUTION_ANNOTATION_TYPE,$UNKNOWN_ANNOTATION_TYPE); # variation subtype constant values for location data description -use constant POSITION_KNOWN_SUBTYPE => 'POS-KNOWN'; -use constant POSITION_OFFSET_SUBTYPE => 'POS-OFFSET'; -use constant POSITION_OFF_SEQUENCE_SUBTYPE => 'POS-OFFSEQ'; +const my $POSITION_KNOWN_SUBTYPE => 'POS-KNOWN'; +const my $POSITION_OFFSET_SUBTYPE => 'POS-OFFSET'; +const my $POSITION_OFF_SEQUENCE_SUBTYPE => 'POS-OFFSEQ'; -use constant ALL_ANNOTATION_SUBTYPES => (POSITION_OFF_SEQUENCE_SUBTYPE,POSITION_OFFSET_SUBTYPE,POSITION_KNOWN_SUBTYPE); +const my @ALL_ANNOTATION_SUBTYPES => ($POSITION_OFF_SEQUENCE_SUBTYPE,$POSITION_OFFSET_SUBTYPE,$POSITION_KNOWN_SUBTYPE); + +1; sub new { my $proto = shift; @@ -98,7 +99,7 @@ sub _init { $self->{_description} = $vars{description}; } elsif($k eq 'context'){ my $good = undef; - foreach my $context(Sanger::CGP::Vagrent::Data::Annotation::ALL_ANNOTATION_CONTEXT){ + foreach my $context(@ALL_ANNOTATION_CONTEXT){ if($vars{context} eq $context){ $good = $vars{context}; last; @@ -111,7 +112,7 @@ sub _init { } } elsif($k eq 'type'){ my $good = undef; - foreach my $type(Sanger::CGP::Vagrent::Data::Annotation::ALL_ANNOTATION_TYPES){ + foreach my $type(@ALL_ANNOTATION_TYPES){ if($vars{type} eq $type){ $good = $vars{type}; last; @@ -124,7 +125,7 @@ sub _init { } } elsif($k eq 'subtype'){ my $good = undef; - foreach my $subtype(Sanger::CGP::Vagrent::Data::Annotation::ALL_ANNOTATION_SUBTYPES){ + foreach my $subtype(@ALL_ANNOTATION_SUBTYPES){ if($vars{subtype} eq $subtype){ $good = $vars{subtype}; last; @@ -141,51 +142,51 @@ sub _init { } sub getCDSAnnotationContext { - return CDS_ANNOTATION_CONTEXT; + return $CDS_ANNOTATION_CONTEXT; } sub getmRNAAnnotationContext { - return MRNA_ANNOTATION_CONTEXT; + return $MRNA_ANNOTATION_CONTEXT; } sub getProteinAnnotationContext { - return PROTEIN_ANNOTATION_CONTEXT; + return $PROTEIN_ANNOTATION_CONTEXT; } sub getSubstitutionAnnotationType { - return SUBSTITUTION_ANNOTATION_TYPE; + return $SUBSTITUTION_ANNOTATION_TYPE; } sub getDeletionAnnotationType { - return DELETION_ANNOTATION_TYPE; + return $DELETION_ANNOTATION_TYPE; } sub getComplexAnnotationType { - return COMPLEX_ANNOTATION_TYPE; + return $COMPLEX_ANNOTATION_TYPE; } sub getInsertionAnnotationType { - return INSERTION_ANNOTATION_TYPE; + return $INSERTION_ANNOTATION_TYPE; } sub getFrameShiftAnnotationType { - return FRAMESHIFT_ANNOTATION_TYPE; + return $FRAMESHIFT_ANNOTATION_TYPE; } sub getUnknownAnnotationType { - return UNKNOWN_ANNOTATION_TYPE; + return $UNKNOWN_ANNOTATION_TYPE; } sub getPositionKnownSubtype { - return POSITION_KNOWN_SUBTYPE; + return $POSITION_KNOWN_SUBTYPE; } sub getPositionOffsetSubtype { - return POSITION_OFFSET_SUBTYPE; + return $POSITION_OFFSET_SUBTYPE; } sub getPositionOffSequenceSubtype { - return POSITION_OFF_SEQUENCE_SUBTYPE; + return $POSITION_OFF_SEQUENCE_SUBTYPE; } sub getMinPos { diff --git a/lib/Sanger/CGP/Vagrent/Data/AnnotationGroup.pm b/lib/Sanger/CGP/Vagrent/Data/AnnotationGroup.pm index c081f6e..30bda82 100644 --- a/lib/Sanger/CGP/Vagrent/Data/AnnotationGroup.pm +++ b/lib/Sanger/CGP/Vagrent/Data/AnnotationGroup.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::Data::AnnotationGroup; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -55,7 +55,7 @@ sub _init { $self->{_accession} = $vars{accession}; } elsif($k eq 'type'){ my $good = undef; - foreach my $type(Sanger::CGP::Vagrent::Data::Transcript::ALL_GENE_TYPES){ + foreach my $type(Sanger::CGP::Vagrent::Data::Transcript->getAllGeneTypes){ if($vars{type} eq $type){ $good = $vars{type}; last; diff --git a/lib/Sanger/CGP/Vagrent/Data/Transcript.pm b/lib/Sanger/CGP/Vagrent/Data/Transcript.pm index e06b737..fc7dd62 100644 --- a/lib/Sanger/CGP/Vagrent/Data/Transcript.pm +++ b/lib/Sanger/CGP/Vagrent/Data/Transcript.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::Data::Transcript; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -27,19 +27,20 @@ use Carp; use Data::Dumper; use Sanger::CGP::Vagrent qw($VERSION); use Sort::Key qw(nkeysort); +use Const::Fast qw(const); use base qw(Sanger::CGP::Vagrent); -1; +const my $PROTEIN_CODING_TYPE => 'ProteinCoding'; +const my $MIRCO_RNA_TYPE => 'miRNA'; +const my $LINC_RNA_TYPE => 'lincRNA'; +const my $SNO_RNA_TYPE => 'snoRNA'; +const my $SN_RNA_TYPE => 'snRNA'; +const my $R_RNA_TYPE => 'rRNA'; -use constant PROTEIN_CODING_TYPE => 'ProteinCoding'; -use constant MIRCO_RNA_TYPE => 'miRNA'; -use constant LINC_RNA_TYPE => 'lincRNA'; -use constant SNO_RNA_TYPE => 'snoRNA'; -use constant SN_RNA_TYPE => 'snRNA'; -use constant R_RNA_TYPE => 'rRNA'; +const my @ALL_GENE_TYPES => ($PROTEIN_CODING_TYPE,$MIRCO_RNA_TYPE,$LINC_RNA_TYPE,$SNO_RNA_TYPE,$SN_RNA_TYPE,$R_RNA_TYPE); -use constant ALL_GENE_TYPES => (PROTEIN_CODING_TYPE,MIRCO_RNA_TYPE,LINC_RNA_TYPE,SNO_RNA_TYPE,SN_RNA_TYPE,R_RNA_TYPE); +1; sub new { my $proto = shift; @@ -89,7 +90,7 @@ sub _init { } elsif($k eq 'genetype'){ $self->{_genetype} = $vars{genetype}; my $good = undef; - foreach my $type(Sanger::CGP::Vagrent::Data::Transcript::ALL_GENE_TYPES){ + foreach my $type(@ALL_GENE_TYPES){ if($vars{genetype} eq $type){ $good = $vars{genetype}; last; @@ -104,6 +105,10 @@ sub _init { } } +sub getAllGeneTypes { + return @ALL_GENE_TYPES; +} + sub getGeneType { return shift->{_genetype}; } @@ -205,7 +210,7 @@ sub getGenomicMaxPos { } sub isProteinCoding { - if(shift->getGeneType eq PROTEIN_CODING_TYPE){ + if(shift->getGeneType eq $PROTEIN_CODING_TYPE){ return 1; } else { return 0; @@ -213,27 +218,27 @@ sub isProteinCoding { } sub getProteinCodingType { - return PROTEIN_CODING_TYPE; + return $PROTEIN_CODING_TYPE; } sub getMicroRnaType { - return MIRCO_RNA_TYPE; + return $MIRCO_RNA_TYPE; } sub getLincRnaType { - return LINC_RNA_TYPE; + return $LINC_RNA_TYPE; } sub getSnoRnaType { - return SNO_RNA_TYPE; + return $SNO_RNA_TYPE; } sub getSnRnaType { - return SN_RNA_TYPE; + return $SN_RNA_TYPE; } sub getRRnaType { - return R_RNA_TYPE; + return $R_RNA_TYPE; } __END__ diff --git a/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/AbstractGenomicRegionDumper.pm b/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/AbstractGenomicRegionDumper.pm index efca1df..7ce3456 100644 --- a/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/AbstractGenomicRegionDumper.pm +++ b/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/AbstractGenomicRegionDumper.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::GenomicRegionDumper::AbstractGenomicRegionDumper; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -35,8 +35,6 @@ my $log = Log::Log4perl->get_logger(__PACKAGE__); 1; -use constant REGION_DUMP_SUB_WINDOW => 99999; - sub _init { my $self = shift; my %vars = @_; diff --git a/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/SubstitutionGenomicRegionDumper.pm b/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/SubstitutionGenomicRegionDumper.pm index ab6ffeb..5d788aa 100644 --- a/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/SubstitutionGenomicRegionDumper.pm +++ b/lib/Sanger/CGP/Vagrent/GenomicRegionDumper/SubstitutionGenomicRegionDumper.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::GenomicRegionDumper::SubstitutionGenomicRegionDump ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -182,7 +182,7 @@ sub _convertExonToAnnotatableCodingExonicRegions { } sub _getLowestContinuousAcceptorEssentialSpliceSiteOffset { - my @offsets = sort {$b <=> $a} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator::CONSENSUS_SPLICE_OFFSETS; + my @offsets = sort {$b <=> $a} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator->getConsensusSpliceOffsets; my $out = 0; foreach my $o(@offsets){ next if $o > 0; @@ -196,7 +196,7 @@ sub _getLowestContinuousAcceptorEssentialSpliceSiteOffset { } sub _getHighestContinuousDonorEssentialSpliceSiteOffset { - my @offsets = sort {$a <=> $b} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator::CONSENSUS_SPLICE_OFFSETS; + my @offsets = sort {$a <=> $b} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator->getConsensusSpliceOffsets; my $out = 0; foreach my $o(@offsets){ next if $o < 0; @@ -210,7 +210,7 @@ sub _getHighestContinuousDonorEssentialSpliceSiteOffset { } sub _getIsolatedDonorEssentialSpliceSiteOffset { - my ($out) = sort {$b <=> $a} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator::CONSENSUS_SPLICE_OFFSETS; + my ($out) = sort {$b <=> $a} Sanger::CGP::Vagrent::Annotators::AbstractAnnotator->getConsensusSpliceOffsets; return $out; } diff --git a/lib/Sanger/CGP/Vagrent/IO/Writer/SingleLineCsvFileWriter.pm b/lib/Sanger/CGP/Vagrent/IO/Writer/SingleLineCsvFileWriter.pm index c53bc6e..7bf1c92 100644 --- a/lib/Sanger/CGP/Vagrent/IO/Writer/SingleLineCsvFileWriter.pm +++ b/lib/Sanger/CGP/Vagrent/IO/Writer/SingleLineCsvFileWriter.pm @@ -2,21 +2,21 @@ package Sanger::CGP::Vagrent::IO::Writer::SingleLineCsvFileWriter; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## diff --git a/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm b/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm index 9562655..16dc49b 100644 --- a/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm +++ b/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm @@ -14,21 +14,21 @@ package Sanger::CGP::Vagrent::Ontology::SequenceOntologyClassifier; ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -129,9 +129,9 @@ sub _loadOntologySummaryIni { sub getOntologySummary { my ($self,$anno) = @_; - my $mrna = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext); - my $cds = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext); - my $prot = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext); + my $mrna = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()); + my $cds = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()); + my $prot = $anno->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()); my @class = $anno->getClassifications; my @terms; foreach my $a($anno,$mrna,$cds,$prot){ diff --git a/prerelease.sh b/prerelease.sh index 80dca4c..e18b993 100755 --- a/prerelease.sh +++ b/prerelease.sh @@ -2,21 +2,21 @@ ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -46,12 +46,11 @@ echo '########## Running perl tests ##########' export HARNESS_PERL_SWITCHES=-MDevel::Cover=-db,reports,-ignore,'t/.*\.t,/Bio', rm -rf docs mkdir -p docs/reports_text -set -o pipefail; -prove -w --nocolor -I ./lib | sed 's/^/ /' # indent output of prove +prove -w -I lib t echo '### Generating test/pod coverage reports ###' # removed 'condition' from coverage as '||' 'or' doesn't work properly -cover -coverage branch,subroutine,pod -report_c0 50 -report_c1 85 -report_c2 100 -report html_basic reports -silent > /dev/null +cover -coverage branch,subroutine,pod -report_c0 50 -report_c1 85 -report_c2 100 -report html_basic reports -silent cover -coverage branch,subroutine,pod -report text reports -silent > docs/reports_text/coverage.txt rm -rf reports/structure reports/digests reports/cover.13 reports/runs cp reports/coverage.html reports/index.html diff --git a/setup.sh b/setup.sh index e72dc81..72b92dc 100755 --- a/setup.sh +++ b/setup.sh @@ -82,10 +82,8 @@ cd $INIT_DIR # make sure that build is self contained unset PERL5LIB -ARCHNAME=`perl -e 'use Config; print $Config{archname};'` PERLROOT=$INST_PATH/lib/perl5 -PERLARCH=$PERLROOT/$ARCHNAME -export PERL5LIB="$PERLROOT:$PERLARCH" +export PERL5LIB="$PERLROOT" # log information about this system ( @@ -127,18 +125,18 @@ if [ -e $SETUP_DIR/$CURR_TOOL.success ]; then echo -n " previously installed ..." else ( - set -ex + set -x get_distro $CURR_TOOL $CURR_SOURCE - cd $SETUP_DIR/$CURR_TOOL - make -j$CPU - cp tabix $INST_PATH/bin/. - cp bgzip $INST_PATH/bin/. - cd perl - patch Makefile.PL < $INIT_DIR/patches/tabixPerlLinker.diff - perl Makefile.PL INSTALL_BASE=$INST_PATH - make - make test - make install + cd $SETUP_DIR/$CURR_TOOL && \ + make -j$CPU && \ + cp tabix $INST_PATH/bin/. && \ + cp bgzip $INST_PATH/bin/. && \ + cd perl && \ + patch Makefile.PL < $INIT_DIR/patches/tabixPerlLinker.diff && \ + perl Makefile.PL INSTALL_BASE=$INST_PATH && \ + make && \ + make test && \ + make install && \ touch $SETUP_DIR/$CURR_TOOL.success ) >>$INIT_DIR/setup.log 2>&1 fi @@ -153,12 +151,12 @@ if [ -e $SETUP_DIR/$CURR_TOOL.success ]; then echo -n " previously installed ..." else ( - set -ex + set -x get_distro $CURR_TOOL $CURR_SOURCE - cd $SETUP_DIR/$CURR_TOOL - patch Makefile < $INIT_DIR/patches/vcfToolsInstLocs.diff - patch perl/Vcf.pm < $INIT_DIR/patches/vcfToolsProcessLog.diff - make -j$CPU PREFIX=$INST_PATH + cd $SETUP_DIR/$CURR_TOOL && \ + patch Makefile < $INIT_DIR/patches/vcfToolsInstLocs.diff && \ + patch perl/Vcf.pm < $INIT_DIR/patches/vcfToolsProcessLog.diff && \ + make -j$CPU PREFIX=$INST_PATH && \ touch $SETUP_DIR/$CURR_TOOL.success ) >>$INIT_DIR/setup.log 2>&1 fi @@ -177,7 +175,7 @@ else get_distro "samtools" $SOURCE_SAMTOOLS perl -i -pe 's/^CFLAGS=\s*/CFLAGS=-fPIC / unless /\b-fPIC\b/' samtools/Makefile fi - make -C samtools -j$CPU + make -C samtools -j$CPU && \ touch $SETUP_DIR/samtools.success )>>$INIT_DIR/setup.log 2>&1 fi @@ -204,11 +202,10 @@ done_message "" "Failed during installation of core dependencies." echo -n "Installing vagrent ..." ( - set -e - cd $INIT_DIR - perl Makefile.PL INSTALL_BASE=$INST_PATH - make - make test + cd $INIT_DIR && \ + perl Makefile.PL INSTALL_BASE=$INST_PATH && \ + make && \ + make test && \ make install ) >>$INIT_DIR/setup.log 2>&1 done_message "" "vagrent install failed." @@ -216,13 +213,10 @@ done_message "" "vagrent install failed." # cleanup all junk rm -rf $SETUP_DIR - - echo echo echo "Please add the following to beginning of path:" echo " $INST_PATH/bin" echo "Please add the following to beginning of PERL5LIB:" echo " $PERLROOT" -echo " $PERLARCH" echo diff --git a/t/1_pm_compile.t b/t/1_pm_compile.t index a726ea0..3adef27 100644 --- a/t/1_pm_compile.t +++ b/t/1_pm_compile.t @@ -1,20 +1,20 @@ ##########LICENCE########## # Copyright (c) 2014 Genome Research Ltd. -# +# # Author: Cancer Genome Project cgpit@sanger.ac.uk -# +# # This file is part of VAGrENT. -# +# # VAGrENT is free software: you can redistribute it and/or modify it under # the terms of the GNU Affero General Public License as published by the Free # Software Foundation; either version 3 of the License, or (at your option) any # later version. -# +# # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more # details. -# +# # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . ##########LICENCE########## @@ -28,6 +28,7 @@ use List::Util qw(first); use File::Find; use Cwd; use Try::Tiny qw(try finally); +use Capture::Tiny qw(capture); use File::Spec; use Const::Fast qw(const); @@ -46,28 +47,25 @@ const my @NEW_SKIP => qw( Sanger::CGP::Vagrent::Data::AbstractGenomicPosition Sanger::CGP::Vagrent::Data::Substitution Sanger::CGP::Vagrent::TranscriptSource::AbstractTranscriptSource ); -#warn "\n###################### WARNING #######################\n# 1_pm_compile.pl DISABLED until vagrent core update #\n###################### WARNING #######################\n\n"; -#ok(1,'Here to show willing'); -#done_testing(); -#exit; - my $init_cwd = getcwd; +my $bail_count; + my @modules; try { chdir($lib_path); - find({ wanted => \&build_module_set, no_chdir => 1 }, './'); + find({ wanted => \&compile_modules, no_chdir => 1 }, './'); } finally { chdir $init_cwd; die "The try block died with: @_\n" if(@_); }; -for my $mod(@modules) { - next if( first {$mod eq $_} @USE_SKIP ); - use_ok($mod) or BAIL_OUT("Unable to 'use' module $mod"); +if($bail_count) { + BAIL_OUT("Modules failed to compile, see above errors"); } for my $mod(@modules) { + use_ok($mod); ok($mod->VERSION, "Check version inheritance exists ($mod)"); if($mod->can('new')) { # only try new on things that have new defined new_ok($mod) unless( first {$mod eq $_} (@USE_SKIP, @NEW_SKIP) ); @@ -76,9 +74,19 @@ for my $mod(@modules) { done_testing(); -sub build_module_set { +sub compile_modules { +#return unless($_ =~ m/SequenceOntologyClassifier.pm/); if($_ =~ m/\.pm$/) { + my ($stdout, $stderr, $exit) = capture { system("$^X -c $_"); }; + warn "\n$stderr" if($exit); + is($exit, 0, "Module fails to compile cleanly: $_"); + $bail_count += $exit; + } +} + +sub build_module_set { + if($_ =~ m/\.pm$/) { my ($dir_str,$file) = (File::Spec->splitpath( $_ ))[1,2]; $file =~ s/\.pm$//; my @dirs = File::Spec->splitdir( $dir_str );