Skip to content

Commit

Permalink
Merge pull request #21 from cancerit/feature/nameLessGenesAndEnsmblGe…
Browse files Browse the repository at this point in the history
…nomes

Feature/name less genes and ensmbl genomes
  • Loading branch information
AndyMenzies authored Nov 1, 2016
2 parents f050adf + 7f92d52 commit cd72f98
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 5 deletions.
23 changes: 19 additions & 4 deletions bin/Admin_EnsemblGtf2CacheConverter.pl
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@
$ccds = parseCCDSFile($opts->{'c'}) if defined $opts->{'c'};
convertGtf($opts,$lookup,$ccds);
} catch {
warn "An error occurred while building reference support files\:\n\t$_"; # not $@
die "An error occurred while building reference support files\:\n\t$_"; # not $@
};

sub convertGtf {
Expand Down Expand Up @@ -104,6 +104,7 @@ sub convertGtf {
if(exists $attr{'transcript_id'} && defined $attr{'transcript_id'}){
$acc = unquoteValue($attr{'transcript_id'});
unless(exists $lookup->{$acc}) {
next unless(exists $attr{'transcript_version'});
$acc .= '.'.unquoteValue($attr{'transcript_version'});
next unless exists $lookup->{$acc};
}
Expand All @@ -125,7 +126,15 @@ sub convertGtf {
$c++;
$wip->{$acc}->{'type'} = $bioType;
$wip->{$acc}->{'acc'} = $acc;
$wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_name'});
if(exists $attr{'gene_name'}) {
$wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_name'});
}
elsif(exists $attr{'gene_id'}) {
$wip->{$acc}->{'gene'} = unquoteValue($attr{'gene_id'});
}
else {
croak "Cannot identify gene name or ID for structure: ".Dumper(\%attr);
}
$wip->{$acc}->{'CCDS'} = unquoteValue($attr{'ccds_id'}) if exists $attr{'ccds_id'};
}
if($lineType eq $CDS_TYPE && !defined $wip->{$acc}->{'protacc'}){
Expand Down Expand Up @@ -257,8 +266,14 @@ sub convertTranscript {

sub writeTranscript {
my ($fh,$t,$rawT) = @_;
print $fh join("\t",$rawT->{'lines'}->{$EXON_TYPE}->[0]->[0],$t->getGenomicMinPos - 1,
$t->getGenomicMaxPos,$t->getAccession,$t->getGeneName,length $t->getcDNASeq);
eval {
print $fh join("\t",$rawT->{'lines'}->{$EXON_TYPE}->[0]->[0],$t->getGenomicMinPos - 1,
$t->getGenomicMaxPos,$t->getAccession,$t->getGeneName,length $t->getcDNASeq);
1;
};
if($@) {
die "\nTranscript Object: ".Dumper($t)."\n\nExon_Type layer: ".Dumper($rawT->{'lines'}->{$EXON_TYPE})."\n\nERROR: Abandon hope, Ensemble structure has changed\n";
}
$t->{_cdnaseq} = undef;
print $fh "\t",Dumper($t),"\n";
}
Expand Down
2 changes: 1 addition & 1 deletion bin/Admin_EnsemblReferenceFileGenerator.pl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
const my $CACHE_SUFFIX_RAW => 'vagrent.cache.raw';
const my @TRANSCRIPT_BIOTYPES => qw(protein_coding lincRNA miRNA snoRNA rRNA snRNA);
const my $ENSEMBL_SPECIES_ASSEMBLY => qr/([^\.]+?)\.(.+?)\./;
const my $ENSEMBL_VERSION_PATTERN => qr/^ftp\:\/\/ftp\.ensembl\.org\/pub\/release\-(\d+?)\//;
const my $ENSEMBL_VERSION_PATTERN => qr/^ftp\:\/\/ftp\.ensembl(?:genomes)?\.org\/pub\/release\-(\d+?)\//;

try {
my $opts = option_builder();
Expand Down

0 comments on commit cd72f98

Please sign in to comment.