diff --git a/CHANGES.md b/CHANGES.md index e6d3900..0c0a10a 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,8 @@ +### 3.2.0 +* Allows use of ensemblgenomes.org as a datasource +* Handle genes without names, and give more useful error message +* Ensure an error code is emmitted on failure + ### 3.1.0 * Adds travis testing * Cleans up install script and adds multi versioned paths to options diff --git a/bin/Admin_EnsemblGtf2CacheConverter.pl b/bin/Admin_EnsemblGtf2CacheConverter.pl index bc11c66..83d9e3c 100755 --- a/bin/Admin_EnsemblGtf2CacheConverter.pl +++ b/bin/Admin_EnsemblGtf2CacheConverter.pl @@ -62,7 +62,7 @@ $ccds = parseCCDSFile($opts->{'c'}) if defined $opts->{'c'}; convertGtf($opts,$lookup,$ccds); } catch { - warn "An error occurred while building reference support files\:\n\t$_"; # not $@ + die "An error occurred while building reference support files\:\n\t$_"; # not $@ }; sub convertGtf { @@ -104,6 +104,7 @@ sub convertGtf { if(exists $attr{'transcript_id'} && defined $attr{'transcript_id'}){ $acc = unquoteValue($attr{'transcript_id'}); unless(exists $lookup->{$acc}) { + next unless(exists $attr{'transcript_version'}); $acc .= '.'.unquoteValue($attr{'transcript_version'}); next unless exists $lookup->{$acc}; } @@ -125,7 +126,15 @@ sub convertGtf { $c++; $wip->{$acc}->{'type'} = $bioType; $wip->{$acc}->{'acc'} = $acc; - $wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_name'}); + if(exists $attr{'gene_name'}) { + $wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_name'}); + } + elsif(exists $attr{'gene_id'}) { + $wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_id'}); + } + else { + croak "Cannot identify gene name or ID for structure: ".Dumper(\%attr); + } $wip->{$acc}->{'CCDS'} = unquoteValue($attr{'ccds_id'}) if exists $attr{'ccds_id'}; } if($lineType eq $CDS_TYPE && !defined $wip->{$acc}->{'protacc'}){ @@ -257,8 +266,14 @@ sub convertTranscript { sub writeTranscript { my ($fh,$t,$rawT) = @_; - print $fh join("\t",$rawT->{'lines'}->{$EXON_TYPE}->[0]->[0],$t->getGenomicMinPos - 1, - $t->getGenomicMaxPos,$t->getAccession,$t->getGeneName,length $t->getcDNASeq); + eval { + print $fh join("\t",$rawT->{'lines'}->{$EXON_TYPE}->[0]->[0],$t->getGenomicMinPos - 1, + $t->getGenomicMaxPos,$t->getAccession,$t->getGeneName,length $t->getcDNASeq); + 1; + }; + if($@) { + die "\nTranscript Object: ".Dumper($t)."\n\nExon_Type layer: ".Dumper($rawT->{'lines'}->{$EXON_TYPE})."\n\nERROR: Abandon hope, Ensemble structure has changed\n"; + } $t->{_cdnaseq} = undef; print $fh "\t",Dumper($t),"\n"; } diff --git a/bin/Admin_EnsemblReferenceFileGenerator.pl b/bin/Admin_EnsemblReferenceFileGenerator.pl index d704931..074cba0 100755 --- a/bin/Admin_EnsemblReferenceFileGenerator.pl +++ b/bin/Admin_EnsemblReferenceFileGenerator.pl @@ -53,7 +53,7 @@ const my $CACHE_SUFFIX_RAW => 'vagrent.cache.raw'; const my @TRANSCRIPT_BIOTYPES => qw(protein_coding lincRNA miRNA snoRNA rRNA snRNA); const my $ENSEMBL_SPECIES_ASSEMBLY => qr/([^\.]+?)\.(.+?)\./; -const my $ENSEMBL_VERSION_PATTERN => qr/^ftp\:\/\/ftp\.ensembl\.org\/pub\/release\-(\d+?)\//; +const my $ENSEMBL_VERSION_PATTERN => qr/^ftp\:\/\/ftp\.ensembl(?:genomes)?\.org\/pub\/release\-(\d+?)\//; try { my $opts = option_builder(); diff --git a/docs.tar.gz b/docs.tar.gz index cfd2ccc..7483672 100644 Binary files a/docs.tar.gz and b/docs.tar.gz differ diff --git a/lib/Sanger/CGP/Vagrent.pm b/lib/Sanger/CGP/Vagrent.pm index 1473040..95865fd 100644 --- a/lib/Sanger/CGP/Vagrent.pm +++ b/lib/Sanger/CGP/Vagrent.pm @@ -26,7 +26,7 @@ use strict; use Const::Fast qw(const); use base 'Exporter'; -our $VERSION = '3.1.1'; +our $VERSION = '3.2.0'; our @EXPORT = qw($VERSION); 1;