diff --git a/bin/AnnotateVcf.pl b/bin/AnnotateVcf.pl
index 36c000f..7bd2083 100755
--- a/bin/AnnotateVcf.pl
+++ b/bin/AnnotateVcf.pl
@@ -33,6 +33,8 @@
 use Data::Dumper;
 
 use List::Util qw(first);
+use File::Temp qw(tempfile);
+use Try::Tiny qw(try catch);
 
 use FindBin qw($Bin);
 use lib "$Bin/../lib";
@@ -76,6 +78,10 @@
 const my $REPRE_BM => Sanger::CGP::Vagrent::Bookmarkers::RepresentativeTranscriptBookmarker->new();
 const my $WORST_BM => Sanger::CGP::Vagrent::Bookmarkers::MostDeleteriousBookmarker->new();
 
+const my $SORT_CMD => 'cat %s | vcf-sort > %s';
+const my $BGZIP_CMD => 'bgzip %s';
+const my $TABIX_CMB => 'tabix -p vcf %s';
+
 
 my $header_already_parsed = 0;
 
@@ -86,12 +92,22 @@
   unless(defined $options->{'species'} && defined $options->{'assembly'}) {
     croak 'unable to determine species and assembly from VCF file, please specify on command line' unless find_species_in_vcf($vcf_in,$options);
   }
-  open my $OUT_FH, '>', $options->{'output'} or croak 'Failed to create: '.$options->{'output'};
+  my $output = $options->{'output'};
+  if($options->{'tabix'}){
+    (undef,$output) = tempfile('vagrentXXXXXXX', OPEN => 0, SUFFIX => '.vcf');
+  }
+
+  open my $OUT_FH, '>', $output or croak 'Failed to create: '.$output;
   my $annotator = get_annotator($options);
 
   process_data($vcf_in,$OUT_FH,$annotator,$options);
-  close $OUT_FH or croak 'Failed to close: '.$options->{'output'};
-  Vcf::validate($options->{'output'});
+  close $OUT_FH or croak 'Failed to close: '.$output;
+  Vcf::validate($output);
+
+  if($options->{'tabix'}){
+    compressAndIndex($options,$output);
+  }
+
   1;
 } or do {
   warn "EVAL_ERROR: $EVAL_ERROR\n" if($EVAL_ERROR);
@@ -100,6 +116,44 @@
   croak 'A problem occurred';
 };
 
+sub compressAndIndex {
+  my ($options, $tmpfile) = @_;
+  
+  my $sort_cmd = sprintf $SORT_CMD, $tmpfile, $options->{'output'};
+  my $bgzip_cmd = sprintf $BGZIP_CMD, $options->{'output'};
+  my $totabix = $options->{'output'} .'.gz';
+  my $tabix_cmd = sprintf $TABIX_CMB, $totabix;
+  
+  try {
+    my $tabix_in = $options->{'input'}.'.tbx';
+    unless(-e $tabix_in){
+      # If the input has a tabix index it must have already been sorted, 
+      # we haven't changed the order of the file so we can skip this sort
+      system($sort_cmd);
+    }
+    
+  } catch {
+    warn "EXECUTION ERROR: $sort_cmd\n";
+    die $_;
+  };
+
+  try {
+    system($bgzip_cmd);
+  } catch {
+    warn "EXECUTION ERROR: $bgzip_cmd\n";
+    die $_;
+  };
+
+  try {
+    system($tabix_cmd);
+  } catch {
+    warn "EXECUTION ERROR: $tabix_cmd\n";
+    die $_;
+  };
+
+  unlink $tmpfile;
+}
+
 sub process_data {
   my ($in,$out,$anno,$opts) = @_;
   print $out generate_header($in,$opts);
@@ -407,6 +461,7 @@ sub option_builder {
     'i|input=s' => \$opts{'input'},
     'o|output=s' => \$opts{'output'},
     'c|cache=s' => \$opts{'cache'},
+    't|tabix' => \$opts{'tabix'},
     'p|process=n' => \$opts{'process'},
     'sp|species=s' => \$opts{'species'},
     'as|assembly=s' => \$opts{'assembly'},
@@ -443,7 +498,7 @@ =head1 NAME
 
 =head1 SYNOPSIS
 
-AnnotateVcf.pl [-h] -i <IN_FILE> -o <OUT_FILE> -c <VAGRENT_CACHE_FILE>
+AnnotateVcf.pl [-h] [-t] -i <IN_FILE> -o <OUT_FILE> -c <VAGRENT_CACHE_FILE> [-sp <SPECIES> -as <GENOME_VERSON>]
 
   General Options:
 
@@ -451,7 +506,7 @@ =head1 SYNOPSIS
 
     --input     (-i)      Input vcf file (expects *.bgz)
 
-    --output    (-o)      Output vcf
+    --output    (-o)      Output vcf file (plain text, add -t for zip and index)
 
     --cache     (-c)      Vagrent reference data cache file
 
@@ -467,4 +522,6 @@ =head1 SYNOPSIS
 
     --process   (-p)      ID_PROCESS that generated this file
 
+    --tabix     (-t)      bgzip and tabix index the output file (will generate the .gz version of the -o option)
+
 =cut
diff --git a/docs.tar.gz b/docs.tar.gz
index 14787bb..1ce30a4 100644
Binary files a/docs.tar.gz and b/docs.tar.gz differ
diff --git a/lib/Sanger/CGP/Vagrent.pm b/lib/Sanger/CGP/Vagrent.pm
index f378213..752f7ec 100644
--- a/lib/Sanger/CGP/Vagrent.pm
+++ b/lib/Sanger/CGP/Vagrent.pm
@@ -26,7 +26,7 @@ use strict;
 use Const::Fast qw(const);
 
 use base 'Exporter';
-our $VERSION = '2.0';
+our $VERSION = '2.1.0';
 our @EXPORT = qw($VERSION);
 
 1;
diff --git a/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm b/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm
index 48cde27..b039a6a 100644
--- a/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm
+++ b/lib/Sanger/CGP/Vagrent/Annotators/AbstractAnnotator.pm
@@ -462,18 +462,19 @@ sub _buildProteinAnnotation {
  		# something has gone wrong
  		return undef;
  	}
+  
 	my $mtDna = $self->_getMutatedCdsSequence($wtDna,$cdsMinPos,$cdsMaxPos,$cAnnot->getMt());
 	my $mtProt = Bio::Seq->new(-seq => $prePad . $mtDna . $postPad)->translate->seq(); # mutant protein sequence
 	my $maxMtProt = Bio::Seq->new(-seq => $prePad . $mtDna . substr($tran->getcDNASeq,$tran->getCdsMaxPos()))->translate->seq(); # maximised protein sequence, overruns the natural stop and translates to the end of the transcript
 	if($wtProt eq $mtProt){
 		# wt and mt protein sequences are the same, its silent
 		$mutProtMin = ceil(($cAnnot->getMinPos / 3));
-  		$mutProtMax = ceil(($cAnnot->getMaxPos / 3));
-  		$wt = substr($wtProt,($mutProtMin - 1),(($mutProtMax - $mutProtMin) + 1));
-  	  	$mt = substr($mtProt,($mutProtMin - 1),(($mutProtMax - $mutProtMin) + 1));
-  	  	if(length($wt) == 1 && length($mt) == 1 && $mutProtMin == $mutProtMax){
-			$desc = 'p.'.$wt.$mutProtMin.$mt;
-  	  	} else {
+  	$mutProtMax = ceil(($cAnnot->getMaxPos / 3));
+  	$wt = substr($wtProt,($mutProtMin - 1),(($mutProtMax - $mutProtMin) + 1));
+  	$mt = substr($mtProt,($mutProtMin - 1),(($mutProtMax - $mutProtMin) + 1));
+  	if(length($wt) == 1 && length($mt) == 1 && $mutProtMin == $mutProtMax){
+		  $desc = 'p.'.$wt.$mutProtMin.$mt;
+  	} else {
 			$desc = 'p.(=)';
 		}
 		$type = $self->_getDefaultProteinAnnotationType();
@@ -495,10 +496,10 @@ sub _buildProteinAnnotation {
  		if($mutProtMin == 1){
  	  		# its frame shifted the start codon, no idea what this is going to cause.
  	  		push(@classes,$self->getStartLostVariantClass);
- 	  		return $self->_buildUnknownProteinAnnotation($var,$tran,$cAnnot,length($wtProt),@classes);
- 	    }
+	  		return $self->_buildUnknownProteinAnnotation($var,$tran,$cAnnot,length($wtProt),@classes);
+    }
  		$type = Sanger::CGP::Vagrent::Data::Annotation::getFrameShiftAnnotationType();
- 	    push(@classes,$self->getFrameShiftVariantClass);
+    push(@classes,$self->getFrameShiftVariantClass);
 	} else {
 		$wt = $wtProt;
 		$mt = $mtProt;
@@ -512,7 +513,7 @@ sub _buildProteinAnnotation {
 			substr($wt,-1,1,'');
 			substr($mt,-1,1,'');
 		}
-
+    
 		#warn "|$wt| to |$mt|\n";
 		if($wt ne ''){
 			# wild type residue has been changed
@@ -617,8 +618,6 @@ sub _buildProteinAnnotation {
 															subtype => $subtype);
   	$anno->addClassification(@classes);
   	return $anno;
-
-	return undef;
 }
 
 sub _getMutatedCdsSequence: Abstract;
@@ -637,7 +636,6 @@ sub _buildCDSAnnotation {
 		return $self->_buildUnknownCDSAnnotation($var,$tran,$rAnnot,@classes);
 	}
 	my ($cdsMin,$cdsMinOffset,$cdsMax,$cdsMaxOffset) = (undef,undef,undef,undef);
-
 	if($rAnnot->getMinPos < $tran->getCdsMinPos){
 		$cdsMin = 1;
 		$cdsMinOffset = 0;
@@ -668,6 +666,8 @@ sub _buildCDSAnnotation {
 		$cdsMaxOffset = $rAnnot->getMaxOffset();
   }
 
+  print "CDS: $cdsMin , $cdsMinOffset - $cdsMax, $cdsMaxOffset\n" if $self->_debug();
+
 	my $wt = $self->_getWildTypeStringForCDSAnno($var,$tran,$rAnnot);
 	my $mt = $self->_getMutantStringForCDSAnno($var,$tran,$rAnnot);
 	my $desc = $self->_getCDSDescriptionString($tran,$cdsMin,$cdsMax,$cdsMinOffset,$cdsMaxOffset,$wt,$mt);
@@ -959,21 +959,6 @@ sub _coversStopCodon {
       return 1;
     }
   }
-
-
-
-# 	if($anno->getContext eq Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()){
-# 		if($anno->getMinPos <= $tran->getCdsMaxPos && $anno->getMaxPos >= $tran->getCdsMaxPos - 2){
-# 			return 1;
-# 		}
-# 	} elsif($anno->getContext eq Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()){
-# 		if($anno->getMinPos <= $tran->getCdsLength && $anno->getMaxPos >= $tran->getCdsLength - 2){
-# 			return 1;
-# 		}
-# 	} else {
-# 		# don't know, assume no
-# 		return 0;
-# 	}
 	return 0;
 }
 
@@ -1025,10 +1010,25 @@ sub _canAnnotateToCDS {
 		if($anno->hasClassification($self->getInsertionClass)){
 			# insertions are a special case.
 			# Coordinates are the last WT positions, and not the first variant ones like everything else
-			if($anno->getMaxPos <= $tran->getCdsMinPos || $anno->getMinPos >= $tran->getCdsMaxPos){
-				# its outside the CDS
-				return 0;
-			}
+      
+      print 'ANNO POS: '.$anno->getMinPos.' , '.$anno->getMinOffset.' - '.$anno->getMaxPos.' , '.$anno->getMaxOffset."\n" if $self->_debug();
+      print 'CDS POS: '.$tran->getCdsMinPos.' , '.$tran->getCdsMaxPos."\n" if $self->_debug();
+      
+      if($anno->getMaxPos < $tran->getCdsMinPos || $anno->getMinPos > $tran->getCdsMaxPos){
+        # ends before CDS or starts afterwards
+        return 0;
+      } elsif($anno->getMaxPos == $tran->getCdsMinPos) {
+        # potential start codon issues
+        if($anno->getMinPos == $anno->getMaxPos && $anno->getMinPos == $tran->getCdsMinPos && abs($anno->getMinOffset) + abs($anno->getMaxOffset) > 0){
+          # probably start coordinate issues
+          unless($anno->getMaxOffset <= 0 && $self->_isIntronicOffsetDistance($anno->getMaxOffset) == 0){
+            # or not
+            return 0;
+          }
+        } else {
+          return 0;
+        }
+      }
 		} else {
 			if($anno->getMaxPos < $tran->getCdsMinPos || $anno->getMinPos > $tran->getCdsMaxPos){
 				# its outside the CDS
@@ -1050,6 +1050,9 @@ sub _canAnnotateToCDS {
 			return 0;
 		} elsif($anno->hasClassification($self->getUnknownVariantClass)){
 			return 0;
+    } elsif($anno->hasClassification($self->getInsertionClass) && $anno->hasClassification($self->get5PrimeUtrVariantClass)){
+      # odd case, insertions close to the start codons can be described on the CDS even though they don't change it. 
+      return 1;
 		} else {
 			my $msg = "Unable to calculate CDS relevance - UNKNOWN CLASSIFICATION: ".join(' ',$anno->getClassifications);
 			$self->addMessage($msg);
@@ -1066,6 +1069,7 @@ sub _canAnnotateToCDS {
 
 sub _canAnnotateToProtein {
 	my ($self,$tran,$anno) = @_;
+
 	unless($tran->isProteinCoding){
 		# if the transcript isn't protein coding it can't be a coding change
 		return 0;
diff --git a/lib/Sanger/CGP/Vagrent/Annotators/SimpleSubstitutionAnnotator.pm b/lib/Sanger/CGP/Vagrent/Annotators/SimpleSubstitutionAnnotator.pm
index b272240..4c5552d 100644
--- a/lib/Sanger/CGP/Vagrent/Annotators/SimpleSubstitutionAnnotator.pm
+++ b/lib/Sanger/CGP/Vagrent/Annotators/SimpleSubstitutionAnnotator.pm
@@ -190,18 +190,14 @@ sub _buildRNAAnnotation {
 	}
 
 	if($tran->isProteinCoding){
-    #print "HERE\n";
     if(($pos > $tran->getCdsMinPos || ($pos == $tran->getCdsMinPos && $offset >= 0)) && 
        ($pos < $tran->getCdsMaxPos || ($pos == $tran->getCdsMaxPos && $offset <= 0))){
-#		if($pos >= $tran->getCdsMinPos && $pos <= $tran->getCdsMaxPos){
 			# coding change
 			push(@groupClasses,$self->getCDSClass);
 		} elsif($pos < $tran->getCdsMinPos || ($pos == $tran->getCdsMinPos && $offset < 0)){
-#    } elsif($pos < $tran->getCdsMinPos){
 			# 5prime UTR
 			push(@groupClasses,$self->get5PrimeUtrClass);
 		} elsif($pos > $tran->getCdsMaxPos || ($pos == $tran->getCdsMaxPos && $offset > 0)){
-#    } elsif($pos > $tran->getCdsMaxPos){
 			# 3prime UTR
 			push(@groupClasses,$self->get3PrimeUtrClass);
 		} else {
diff --git a/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm b/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm
index b14e928..9562655 100644
--- a/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm
+++ b/lib/Sanger/CGP/Vagrent/Ontology/SequenceOntologyClassifier.pm
@@ -100,14 +100,15 @@ const my $SO_NON_PROTEIN_CODING_CLASS => 'SO:0000011:non_protein_coding';
 
 const my $TERM_SUMMARY_INI => 'SequenceOntologySummary.ini';
 
-#sub DESTROY {
-#  my $self = shift;
-#  if(defined $self->{'_SOsum'}){
-#    foreach my $k( sort {$self->{'_notSummary'}->{$b} <=> $self->{'_notSummary'}->{$a}} keys %{$self->{'_notSummary'}}){
-#      print $self->{'_notSummary'}->{$k},' - ',$k,"\n" unless $self->{'_notSummary'}->{$k} == 1;
-#    }
-#  }
-#}
+# sub DESTROY {
+# ##### Handy DESTROY function that will print ontology combinations that don't exist in the summary lookup at program termination.
+#   my $self = shift;
+#   if(defined $self->{'_SOsum'}){
+#     foreach my $k( sort {$self->{'_notSummary'}->{$b} <=> $self->{'_notSummary'}->{$a}} keys %{$self->{'_notSummary'}}){
+#       print $self->{'_notSummary'}->{$k},' - ',$k,"\n" unless $self->{'_notSummary'}->{$k} == 0;
+#     }
+#   }
+# }
 
 sub _ontologyInit {
   my $self = shift;
diff --git a/share/SequenceOntologySummary.ini b/share/SequenceOntologySummary.ini
index 3a3d058..15a936b 100644
--- a/share/SequenceOntologySummary.ini
+++ b/share/SequenceOntologySummary.ini
@@ -62,8 +62,10 @@ SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:1000002:s
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:1000002:substitution,SO:0001988:5_prime_UTR_premature_start_codon_gain_variant,SO:0001576:transcript_variant=5prime_UTR_variant
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0000159:deletion,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
+SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0000159:deletion,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0000667:insertion,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:1000032:indel,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
+SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_variant
 
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:1000002:substitution,SO:0001624:3_prime_UTR_variant,SO:0001576:transcript_variant=3prime_UTR_variant
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0000159:deletion,SO:0001624:3_prime_UTR_variant,SO:0001576:transcript_variant=3prime_UTR_variant
@@ -79,9 +81,12 @@ SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001619:nc_
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001634:500B_downstream_variant,SO:0001619:nc_transcript_variant=nc_variant
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001619:nc_transcript_variant=nc_variant
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001619:nc_transcript_variant=nc_variant
+SO:0000011:non_protein_coding,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0000159:deletion,SO:0001577:complex_change_in_transcript,SO:0001619:nc_transcript_variant=nc_variant
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000667:insertion,SO:0001619:nc_transcript_variant=nc_variant
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:1000032:indel,SO:0001619:nc_transcript_variant=nc_variant
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001619:nc_transcript_variant=nc_variant
+SO:0000011:non_protein_coding,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001619:nc_transcript_variant=nc_variant
+SO:0000011:non_protein_coding,SO:0000147:exon,SO:1000032:indel,SO:0001634:500B_downstream_variant,SO:0001619:nc_transcript_variant=nc_variant
 
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:1000002:substitution,SO:0001581:codon_variant,SO:0001583:non_synonymous_codon=missense
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:1000032:indel,SO:0001650:inframe_variant,SO:1000002:substitution,SO:0001583:non_synonymous_codon=missense
@@ -120,16 +125,21 @@ SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0000159:deletion,SO:
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0000159:deletion,SO:0001589:frameshift_variant,SO:0001576:transcript_variant=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0000159:deletion,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0000667:insertion,SO:0001589:frameshift_variant=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:1000032:indel,SO:0001589:frameshift_variant=frameshift
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001578:stop_lost=frameshift
 
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant,SO:0001582:initiator_codon_change=cds_disrupted
 
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001577:complex_change_in_transcript,SO:0001619:nc_transcript_variant=nc_transcript_disrupted
 
@@ -138,6 +148,8 @@ SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:th
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript=cds_deleted
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript=cds_deleted
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript=cds_deleted
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0000159:deletion=cds_deleted
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0000204:five_prime_UTR,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001577:complex_change_in_transcript,SO:0000159:deletion=cds_deleted
 
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001619:nc_transcript_variant=nc_transcript_deleted
 SO:0000011:non_protein_coding,SO:0000147:exon,SO:0000159:deletion,SO:0001635:5KB_upstream_variant,SO:0001636:2KB_upstream_variant,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001619:nc_transcript_variant=nc_transcript_deleted
@@ -177,6 +189,7 @@ SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0001993:extended_cis
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:1000032:indel,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant=ess_splice
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0001993:extended_cis_splice_site,SO:1000032:indel,SO:0001629:splice_site_variant,SO:0001576:transcript_variant=ess_splice
 SO:0000010:protein_coding,SO:0000316:CDS,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:1000032:indel,SO:0001995:extended_intronic_splice_region_variant,SO:0001629:splice_site_variant,SO:0001576:transcript_variant=ess_splice
+SO:0000010:protein_coding,SO:0000316:CDS,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:1000032:indel,SO:0001995:extended_intronic_splice_region_variant,SO:0001629:splice_site_variant,SO:0001627:intron_variant,SO:0001576:transcript_variant=ess_splice
 
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0001993:extended_cis_splice_site,SO:1000002:substitution,SO:0001629:splice_site_variant,SO:0001623:5_prime_UTR_variant,SO:0001576:transcript_variant=5prime_UTR_ess_splice
 SO:0000010:protein_coding,SO:0000204:five_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001636:2KB_upstream_variant,SO:0001623:5_prime_UTR_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant=5prime_UTR_ess_splice
@@ -195,6 +208,7 @@ SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0001993:extended_cis_spl
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001624:3_prime_UTR_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant=3prime_UTR_ess_splice
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001633:5KB_downstream_variant,SO:0001634:500B_downstream_variant,SO:0001624:3_prime_UTR_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant=3prime_UTR_ess_splice
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0001993:extended_cis_splice_site,SO:0000159:deletion,SO:0001624:3_prime_UTR_variant,SO:0001629:splice_site_variant,SO:0001576:transcript_variant=3prime_UTR_ess_splice
+SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0001993:extended_cis_splice_site,SO:0001996:extended_intronic_splice_region,SO:0000188:intron,SO:0000159:deletion,SO:0001624:3_prime_UTR_variant,SO:0001995:extended_intronic_splice_region_variant,SO:0001629:splice_site_variant,SO:0001627:intron_variant,SO:0001576:transcript_variant=3prime_UTR_ess_splice
 SO:0000010:protein_coding,SO:0000205:three_prime_UTR,SO:0000147:exon,SO:0001993:extended_cis_splice_site,SO:1000032:indel,SO:0001624:3_prime_UTR_variant,SO:0001577:complex_change_in_transcript,SO:0001576:transcript_variant=3prime_UTR_ess_splice
 
 SO:0000011:non_protein_coding,SO:0001993:extended_cis_splice_site,SO:1000002:substitution,SO:0001629:splice_site_variant=nc_ess_splice
diff --git a/t/deletion.t b/t/deletion.t
index a0b3072..aca81c0 100644
--- a/t/deletion.t
+++ b/t/deletion.t
@@ -45,6 +45,8 @@ testSplice();
 testExonic();
 testComplexCases();
 testUpStreamDownStream();
+testCdsBoundry();
+
 done_testing();
 
 sub testUpStreamDownStream {
@@ -331,6 +333,7 @@ sub testSplice {
 
 }
 sub testExonic {
+
 	#5 PRIME UTR EXON 1bp DEL
 		test5PrimeUTR_1bp_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 		test5PrimeUTR_1bp_TOR1AIP2(AnnotationTestUtils::TOR1AIP2_TRANSCRIPT);
@@ -415,7 +418,6 @@ sub testExonic {
 		testCDSExon_StartCodon_2bp_3_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 		testCDSExon_StartCodon_4bp_1_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 
-
 	#NON-CODING TRANSCRIPT
 		testExon_1bp_1_AC068831(AnnotationTestUtils::AC068831_TRANSCRIPT);
 		testExon_1bp_2_AC068831(AnnotationTestUtils::AC068831_TRANSCRIPT);
@@ -425,6 +427,497 @@ sub testExonic {
 
 }
 
+sub testCdsBoundry{
+
+  testStartUpstream_OR4F5();
+  testStartEndsUpsteam1bp_OR4F5();
+  testStartEndsUpsteam0bp_OR4F5();
+  testEndStarts0bp_OR4F5();
+  testEndStarts1bp_OR4F5();
+  testEndDownstream_OR4F5();
+
+  testStartUpstream_GABPB2();
+  testStartIntronic_GABPB2();
+  testStartSpliceRegion_GABPB2();
+  testStartEssSplice_GABPB2();
+
+}
+
+# OR4F5 protein coding gene - single exon, no UTRs, has both start and stop codons, + strand (probably wrong but great for testing)
+
+sub testStartUpstream_OR4F5 {
+	subtest 'Testing OR4F5 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+    my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 68091,
+			'maxpos'				=> 68091,
+			'delseq' 				=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEndsUpsteam1bp_OR4F5 {
+	my $file = shift;
+
+	subtest 'Testing OR4F5 Ends Upstream 1 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69090,
+			'maxpos'				=> 69090,
+			'delseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get2KBUpStreamVariantClass);
+
+		done_testing();
+	};
+}
+sub testStartEndsUpsteam0bp_OR4F5 {
+	my $file = shift;
+
+	subtest 'Testing OR4F5 Ends Upstream 0 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69090,
+			'maxpos'				=> 69090,
+			'delseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get2KBUpStreamVariantClass);
+
+		done_testing();
+	};
+}
+sub testEndStarts0bp_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream 0 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70009,
+			'maxpos'				=> 70009,
+			'delseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get500BPDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+sub testEndStarts1bp_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream 1 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70010,
+			'maxpos'				=> 70010,
+			'delseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get500BPDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+sub testEndDownstream_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 71010,
+			'maxpos'				=> 71010,
+			'delseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get5KBDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+
+# GABPB2 protein coding gene with both utrs on + strand of genome, start codon is at the start of an exon
+
+sub testStartUpstream_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151042080,
+			'maxpos'				=> 151042080,
+			'delseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartIntronic_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Intronic + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060460,
+			'maxpos'				=> 151060460,
+			'delseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getIntronClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->getIntronVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartSpliceRegion_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Splice Region + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060660,
+			'maxpos'				=> 151060660,
+			'delseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getSpliceRegionClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getDeletionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-6,332,-6,'C','-','r.332-6delc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getDeletionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-6,1,-6,'C','-','c.1-6delc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEssSplice_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Ess Splice + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Deletion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060665,
+			'maxpos'				=> 151060665,
+			'delseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::DeletionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->getEssentialSpliceSiteClass,$a->get5PrimeUtrClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getDeletionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-1,332,-1,'C','-','r.332-1delc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get5PrimeUtrVariantClass,$a->getEssentialSpliceSiteVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getDeletionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-1,1,-1,'C','-','c.1-1delc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getDeletionClass,$a->get5PrimeUtrVariantClass,$a->getEssentialSpliceSiteVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+
+
 # CEP350 protein coding gene with 5 prime utr exons on + strand of genome
 
 sub testUpsteamMilesAway_1bp_CEP350 {
diff --git a/t/insertion.t b/t/insertion.t
index 2b70daf..307bbd8 100644
--- a/t/insertion.t
+++ b/t/insertion.t
@@ -45,8 +45,27 @@ testSplice();
 testExonic();
 testUpStreamDownStream();
 testStrangeCases();
+testCdsBoundry();
 done_testing();
 
+
+sub testCdsBoundry {
+
+  testStartUpstream_OR4F5();
+  testStartEndsUpsteam1bp_OR4F5();
+  testStartEndsUpsteam0bp_OR4F5();
+  testEndStarts0bp_OR4F5();
+  testEndStarts1bp_OR4F5();
+  testEndDownstream_OR4F5();
+
+  testStartUpstream_GABPB2();
+  testStartIntronic_GABPB2();
+  testStartSpliceRegion_GABPB2();
+  testStartEssSplice_GABPB2();
+  testStartEssSplice2_GABPB2();
+
+}
+
 sub testUpStreamDownStream {
 	testUpsteamMilesAway_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 	testEndsUpsteam5001bp_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
@@ -85,9 +104,7 @@ sub testUpStreamDownStream {
 	testStartsDownstreamMilesAway_TOR1AIP2(AnnotationTestUtils::TOR1AIP2_TRANSCRIPT);
 
 }
-
 sub testStrangeCases {
-#CENTRE OF INTRONS ' => sub {
 		testIntronic_DeadCenterOfEvenSizedIntron_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 		testIntronic_StartingDeadCenterOfOddSizedIntron_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
 		testIntronic_EndingDeadCenterOfOddSizedIntron_CEP350(AnnotationTestUtils::CEP350_TRANSCRIPT);
@@ -319,6 +336,541 @@ sub testExonic {
 
 }
 
+# OR4F5 protein coding gene - single exon, no UTRs, has both start and stop codons, + strand (probably wrong but great for testing)
+
+sub testStartUpstream_OR4F5 {
+	subtest 'Testing OR4F5 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+    my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 68091,
+			'maxpos'				=> 68092,
+			'insseq' 				=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEndsUpsteam1bp_OR4F5 {
+	my $file = shift;
+
+	subtest 'Testing OR4F5 Ends Upstream 1 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69089,
+			'maxpos'				=> 69090,
+			'insseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get2KBUpStreamVariantClass);
+
+		done_testing();
+	};
+}
+sub testStartEndsUpsteam0bp_OR4F5 {
+	my $file = shift;
+
+	subtest 'Testing OR4F5 Ends Upstream 0 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69090,
+			'maxpos'				=> 69091,
+			'insseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get2KBUpStreamVariantClass);
+
+		done_testing();
+	};
+}
+sub testEndStarts0bp_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream 0 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70008,
+			'maxpos'				=> 70009,
+			'insseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get500BPDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+sub testEndStarts1bp_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream 1 + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70009,
+			'maxpos'				=> 70010,
+			'insseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get500BPDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+sub testEndDownstream_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Starts Downstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 71009,
+			'maxpos'				=> 71010,
+			'insseq' 						=> 'C');
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get5KBDownStreamVariantClass);
+
+		done_testing();
+	};
+
+
+
+}
+
+
+# GABPB2 protein coding gene with both utrs on + strand of genome, start codon is at the start of an exon
+
+sub testStartUpstream_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151042080,
+			'maxpos'				=> 151042081,
+			'insseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartIntronic_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Intronic + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060460,
+			'maxpos'				=> 151060461,
+			'insseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getIntronClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->getIntronVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartSpliceRegion_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Splice Region + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060660,
+			'maxpos'				=> 151060661,
+			'insseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getSpliceRegionClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-6,332,-5,'-','C','r.332-6_332-5insc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-6,1,-5,'-','C','c.1-6_1-5insC',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEssSplice_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Ess Splice + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060664,
+			'maxpos'				=> 151060665,
+			'insseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->getEssentialSpliceSiteClass,$a->get5PrimeUtrClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-2,332,-1,'-','C','r.332-2_332-1insc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get5PrimeUtrVariantClass,$a->getEssentialSpliceSiteVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-2,1,-1,'-','C','c.1-2_1-1insC',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get5PrimeUtrVariantClass,$a->getEssentialSpliceSiteVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEssSplice2_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Ess Splice + strand 2' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Insertion->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060665,
+			'maxpos'				=> 151060666,
+			'insseq' 				=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::InsertionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->getExonClass,$a->get5PrimeUtrClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-1,332,0,'-','C','r.332-1_332insc',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getInsertionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-1,1,0,'-','C','c.1-1_1insC',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getInsertionClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+
+
 
 # CEP350 protein coding gene with 5 prime utr exons on + strand of genome
 
@@ -5062,7 +5614,7 @@ sub testCDSStartAdjacent_1bp_TOR1AIP2 {
 sub testCDSStartAdjacent_3bp_TOR1AIP2 {
 	my $file = shift;
 
-	subtest 'Testing TOR1AIP2 5 prime UTR 1bp - strand CDS start adjacent' => sub {
+	subtest 'Testing TOR1AIP2 5 prime UTR 3bp - strand CDS start adjacent' => sub {
 		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
 
 
diff --git a/t/substitution.t b/t/substitution.t
index 4aa0344..8f16b4d 100755
--- a/t/substitution.t
+++ b/t/substitution.t
@@ -45,6 +45,7 @@ testIntronic();
 testSplice();
 testExonic();
 testUpStreamDownStream();
+testCDSBoundry();
 
 done_testing();
 
@@ -261,6 +262,508 @@ sub testExonic {
 		done_testing();
 	};
 }
+sub testCDSBoundry {
+  subtest 'Testing Start ' => sub {
+    testStartUpstream_GABPB2();
+    testStartIntronic_GABPB2();
+    testStartSpliceRegion_GABPB2();
+    testStartEssSplice_GABPB2();
+  
+    testStartUpstream_OR4F5();
+    testStartSpliceRegion_OR4F5();
+    testStartEssSplice_OR4F5();
+
+    done_testing();
+  };
+
+  subtest 'Testing End ' => sub {
+    
+    testEndEssSplice_OR4F5();
+    testEndSpliceRegion_OR4F5();
+    testEndDownstream_OR4F5();
+
+    done_testing();
+  };
+}
+
+# GABPB2 protein coding gene with both utrs on + strand of genome, start codon is at the start of an exon
+
+sub testStartUpstream_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151042080,
+			'maxpos'				=> 151042080,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartIntronic_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Intronic + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060460,
+			'maxpos'				=> 151060460,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getIntronClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->getIntronVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartSpliceRegion_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Splice Region + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060660,
+			'maxpos'				=> 151060660,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getSpliceRegionClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getSubstitutionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-6,332,-6,'U','C','r.332-6u>c',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getSubstitutionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-6,1,-6,'T','C','c.1-6T>C',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->getSpliceRegionVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEssSplice_GABPB2 {
+  my $file = shift;
+
+	subtest 'Testing GABPB2 5 Prime UTR Ess Splice + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 151060665,
+			'maxpos'				=> 151060665,
+			'wt' 						=> 'G',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),3,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'has have CDS context annotation');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'has have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass,$a->get5PrimeUtrClass,$a->getEssentialSpliceSiteClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getSubstitutionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									332,-1,332,-1,'G','C','r.332-1g>c',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->getEssentialSpliceSiteVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine CDS annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getSubstitutionAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffsetSubtype(),
+									1,-1,1,-1,'G','C','c.1-1G>C',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->getEssentialSpliceSiteVariantClass,$a->get5PrimeUtrVariantClass);
+
+		AnnotationTestUtils::checkAnnotation('examine Protein annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','p.?',$t[0]->getProteinAccession,$t[0]->getProteinAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getUnknownVariantClass);
+		done_testing();
+  };
+
+}
+
+# OR4F5 protein coding gene - single exon, no UTRs, has both start and stop codons, + strand (probably wrong but great for testing)
+
+sub testStartUpstream_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 5 Prime UTR Upstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 68091,
+			'maxpos'				=> 68091,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartSpliceRegion_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 5 Prime UTR Splice Region + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69085,
+			'maxpos'				=> 69085,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+    
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testStartEssSplice_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 5 Prime UTR Ess Splice + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 69090,
+			'maxpos'				=> 69090,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+    
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get2KBUpStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testEndEssSplice_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 3 Prime UTR Ess Splice + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70009,
+			'maxpos'				=> 70009,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get500BPDownStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testEndSpliceRegion_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 3 Prime UTR Splice Region + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 70015,
+			'maxpos'				=> 70015,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get500BPDownStreamVariantClass);
+		done_testing();
+  };
+
+}
+sub testEndDownstream_OR4F5 {
+  my $file = shift;
+
+	subtest 'Testing OR4F5 Downstream + strand 1' => sub {
+		 my $ts = Sanger::CGP::Vagrent::TranscriptSource::FileBasedTranscriptSource->new('cache' => AnnotationTestUtils::TRANSCRIPT_CACHE);   
+
+
+		my $sub = Sanger::CGP::Vagrent::Data::Substitution->new(
+			'species'				=> 'human',
+			'genomeVersion' => 'GRCh37',
+			'chr' 					=> 1,
+			'minpos'				=> 72015,
+			'maxpos'				=> 72015,
+			'wt' 						=> 'T',
+			'mt'						=> 'C',);
+
+		my @t = $ts->getTranscripts($sub);
+
+		my $a = Sanger::CGP::Vagrent::Annotators::SimpleSubstitutionAnnotator->new(transcriptSource => $ts);
+
+		my @res = $a->getAnnotation($sub);
+
+		is(scalar(@res),1,'annotation group count');
+		is($res[0]->getType,Sanger::CGP::Vagrent::Data::Transcript::getProteinCodingType(),'annotation group type - proteincoding');
+		is(scalar(@{$res[0]->getAllAnnotations}),1,'annotation count for group');
+		ok(defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext())),'has mRNA context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getCDSAnnotationContext())),'doesnt have CDS context annotation');
+		ok(!defined($res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getProteinAnnotationContext())),'doesnt have protein context annotation');
+
+		AnnotationTestUtils::checkAnnotationGroup('examine annotation group in detail',$res[0],
+									$t[0]->getGeneName,$t[0]->getCCDS,$t[0]->getAccession,$t[0]->getGeneType,
+									$a->getProteinCodingClass);
+
+		AnnotationTestUtils::checkAnnotation('examine mRNA annotation in detail',
+									$res[0]->getAnnotationByContext(Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext()),
+									Sanger::CGP::Vagrent::Data::Annotation::getmRNAAnnotationContext(),
+									Sanger::CGP::Vagrent::Data::Annotation::getUnknownAnnotationType(),
+									Sanger::CGP::Vagrent::Data::Annotation::getPositionOffSequenceSubtype(),
+									0,0,0,0,'?','?','r.?',$t[0]->getAccession,$t[0]->getAccessionVersion,$t[0]->getDatabase,$t[0]->getDatabaseVersion,
+									$a->getSubstitutionClass,$a->get5KBDownStreamVariantClass);
+		done_testing();
+  };
+
+}
+
 
 
 # CEP350 protein coding gene with 5 prime utr exons on + strand of genome
diff --git a/testData/test_transcript.cache.gz b/testData/test_transcript.cache.gz
index 4588b22..19b7b74 100644
Binary files a/testData/test_transcript.cache.gz and b/testData/test_transcript.cache.gz differ
diff --git a/testData/test_transcript.cache.gz.tbi b/testData/test_transcript.cache.gz.tbi
index 486a61b..ac66ee2 100644
Binary files a/testData/test_transcript.cache.gz.tbi and b/testData/test_transcript.cache.gz.tbi differ
diff --git a/testData/test_transcript.fa b/testData/test_transcript.fa
index 04aa39a..e9679bb 100644
--- a/testData/test_transcript.fa
+++ b/testData/test_transcript.fa
@@ -847,3 +847,21 @@ TCTGACAGCTTTATGTACAGCGTATTTTTAGAAAAACTTAAATATACTTCTTTATTTAGG
 GTTTTATTCTGATGAGCAAGTTTGTGTGTATATGTGTGTATGAGCATTTGTATGTATATA
 TACTTATACAGATCTATATTATATATACAGTTTTTGTACTATCATTTAAAATAAAAATGT
 TTCTCAATAAAATGTCAAAGCCGA
+>ENST00000335137
+ATGGTGACTGAATTCATTTTTCTGGGTCTCTCTGATTCTCAGGAACTCCAGACCTTCCTA       
+TTTATGTTGTTTTTTGTATTCTATGGAGGAATCGTGTTTGGAAACCTTCTTATTGTCATA       
+ACAGTGGTATCTGACTCCCACCTTCACTCTCCCATGTACTTCCTGCTAGCCAACCTCTCA       
+CTCATTGATCTGTCTCTGTCTTCAGTCACAGCCCCCAAGATGATTACTGACTTTTTCAGC       
+CAGCGCAAAGTCATCTCTTTCAAGGGCTGCCTTGTTCAGATATTTCTCCTTCACTTCTTT       
+GGTGGGAGTGAGATGGTGATCCTCATAGCCATGGGCTTTGACAGATATATAGCAATATGC       
+AAGCCCCTACACTACACTACAATTATGTGTGGCAACGCATGTGTCGGCATTATGGCTGTC       
+ACATGGGGAATTGGCTTTCTCCATTCGGTGAGCCAGTTGGCGTTTGCCGTGCACTTACTC       
+TTCTGTGGTCCCAATGAGGTCGATAGTTTTTATTGTGACCTTCCTAGGGTAATCAAACTT       
+GCCTGTACAGATACCTACAGGCTAGATATTATGGTCATTGCTAACAGTGGTGTGCTCACT       
+GTGTGTTCTTTTGTTCTTCTAATCATCTCATACACTATCATCCTAATGACCATCCAGCAT       
+CGCCCTTTAGATAAGTCGTCCAAAGCTCTGTCCACTTTGACTGCTCACATTACAGTAGTT       
+CTTTTGTTCTTTGGACCATGTGTCTTTATTTATGCCTGGCCATTCCCCATCAAGTCATTA       
+GATAAATTCCTTGCTGTATTTTATTCTGTGATCACCCCTCTCTTGAACCCAATTATATAC       
+ACACTGAGGAACAAAGACATGAAGACGGCAATAAGACAGCTGAGAAAATGGGATGCACAT       
+TCTAGTGTAAAGTTTTAG
+
diff --git a/testData/test_transcript.fa.fai b/testData/test_transcript.fa.fai
index a97b9fb..b1b450e 100644
--- a/testData/test_transcript.fa.fai
+++ b/testData/test_transcript.fa.fai
@@ -7,3 +7,4 @@ ENST00000339290	3241	21445	60	61
 ENST0000037195	9027	24757	60	61
 ENST00000367612	7905	33952	60	61
 ENST00000368918	8964	42006	60	61
+ENST00000335137	918	51137	60	68