From b38dd211960c32d1a6b00777ca98f508a44357e4 Mon Sep 17 00:00:00 2001
From: Colin <colin.diesh@gmail.com>
Date: Wed, 4 Mar 2020 12:42:52 -0500
Subject: [PATCH 1/2] Allow utf8 filenames

---
 src/perl5/Bio/JBrowse/Cmd/IndexNames.pm       | 16 ++--
 .../Bio/JBrowse/FeatureStream/Genbank.pm      | 85 +++++++++----------
 2 files changed, 52 insertions(+), 49 deletions(-)

diff --git a/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm b/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
index eec797b131..d0f7be3067 100644
--- a/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
+++ b/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
@@ -18,6 +18,8 @@ use Storable ();
 use File::Path ();
 use File::Temp ();
 use List::Util ();
+use Data::Dumper;
+use Encode qw(decode encode);
 
 use GenomeDB ();
 use Bio::GFF3::LowLevel qw/gff3_parse_feature/;
@@ -78,6 +80,7 @@ sub run {
     unless( @$refSeqs ) {
         die "No reference sequences defined in configuration, nothing to do.\n";
     }
+    print Dumper($gdb->trackList);
     my @tracks = grep $self->track_is_included( $_->{label} ),
                       @{ $gdb->trackList || [] };
     unless( @tracks ) {
@@ -195,8 +198,8 @@ sub _mergeIndexEntries {
 
 sub make_file_record {
     my ( $self, $track, $file ) = @_;
-    -f $file or die "$file not found\n";
-    -r $file or die "$file not readable\n";
+    -f decode('UTF-8',$file) or die "$file not found\n";
+    -r decode('UTF-8',$file) or die "$file not readable\n";
     my $gzipped = $file =~ /\.(txt|json|g)z(\.\d+)?$/;
     my $type = $file =~ /\.txtz?$/                ? 'txt'  :
                $file =~ /\.jsonz?$/               ? 'json' :
@@ -364,7 +367,8 @@ sub find_names_files {
             # read either names.txt or names.json files
             my $name_records_iterator;
             my $names_txt  = File::Spec->catfile( $dir, 'names.txt'  );
-            if( -f $names_txt ) {
+            print "$names_txt\n";
+            if( -e decode('UTF-8',$names_txt) ) {
                 push @files, $self->make_file_record( $track, $names_txt );
             }
             else {
@@ -436,8 +440,8 @@ sub make_operation_stream {
     #print "sizes: $self->{stats}{total_namerec_bytes}, buffered: $namerecs_buffered, b/rec: ".$total_namerec_sizes/$namerecs_buffered."\n";
     $self->{stats}{avg_record_text_bytes} = $self->{stats}{total_namerec_bytes}/($self->{stats}{namerecs_buffered}||1);
     $self->{stats}{total_input_bytes} = List::Util::sum(
-        map { my $s = -s $_->{fullpath};
-              $s *= 8 if $_->{fullpath} =~ /\.(g|txt|json)z$/;
+        map { my $s = -s decode('UTF-8',$_->{fullpath});
+              $s *= 8 if decode('UTF-8',$_->{fullpath}) =~ /\.(g|txt|json)z$/;
               $s;
           } @$names_files ) || 0;
     $self->{stats}{record_stream_estimated_count} = int( $self->{stats}{total_input_bytes} / ($self->{stats}{avg_record_text_bytes}||1));;
@@ -710,7 +714,7 @@ sub open_names_file {
         }
     }
     else {
-        open my $fh, '<', $infile or die "$! reading $infile";
+        open my $fh, '<', decode('UTF-8',$infile) or die "$! reading $infile";
         return $fh;
     }
 }
diff --git a/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm b/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
index 0fbe3552af..1e0b77d078 100644
--- a/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
+++ b/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
@@ -10,6 +10,7 @@ use strict;
 use warnings;
 
 use base 'Bio::JBrowse::FeatureStream';
+use Data::Dumper;
 
 use Bio::JBrowse::FeatureStream::Genbank::LocationParser;
 
@@ -31,11 +32,12 @@ sub _aggregate_features_from_gbk_record {
     # get index of top level feature ('mRNA' at current writing)
     my $indexTopLevel;
     my $count = 0;
+    print Dumper $record;
     foreach my $feat ( @{$record->{FEATURES}} ){
-	if ( _isTopLevel( $feat ) ){
-	    $indexTopLevel = $count;
-	}
-	$count++;
+        if ( _isTopLevel( $feat ) ){
+            $indexTopLevel = $count;
+        }
+        $count++;
     }
 
     return unless defined $indexTopLevel;
@@ -54,55 +56,52 @@ sub _aggregate_features_from_gbk_record {
     delete $f->{SEQUENCE};
 
     $f->{end} = $locations[-1]{end};
-    #for my $f ( @features ) {
-        $f->{start}  += $offset + 1;
-        $f->{end}    += $offset;
-        $f->{strand} = 1 unless defined $f->{strand};
-        $f->{type}   = $record->{FEATURES}[$indexTopLevel]{name};
-        $f->{seq_id} ||= $seq_id;
-
-        %$f = ( %{$record->{FEATURES}[$indexTopLevel]{feature} || {}}, %$f ); # get other attrs
-        if( $f->{type} eq 'mRNA' ) {
-            $f->{name} = $record->{FEATURES}[$indexTopLevel]{feature}{gene};
-            $f->{description} = $record->{FEATURES}[$indexTopLevel]{feature}{product} || $f->{FEATURES}[$indexTopLevel]{feature}{note};
-        }
+    $f->{type}   = $record->{FEATURES}[$indexTopLevel]{name};
+    $f->{seq_id} ||= $seq_id;
+
+    %$f = ( %{$record->{FEATURES}[$indexTopLevel]{feature} || {}}, %$f ); # get other attrs
+    if( $f->{type} eq 'gene' ) {
+        print "here2\n";
+        $f->{name} = $record->{FEATURES}[$indexTopLevel]{feature}{gene};
+        $f->{description} = $record->{FEATURES}[$indexTopLevel]{feature}{product} || $f->{FEATURES}[$indexTopLevel]{feature}{note};
+    }
 
-        # convert FEATURES to subfeatures
-        $f->{subfeatures} = [];
-        if ( scalar( @{$record->{FEATURES} || [] }) > $indexTopLevel ) {
-            for my $i ( $indexTopLevel + 1 .. $#{$record->{FEATURES}} ) {
-                my $feature = $record->{FEATURES}[$i];
-                my @sublocations = _parseLocation( $feature->{location} );
-                for my $subloc ( @sublocations ) {
-                    $subloc->{start} += $offset + 1;
-                    $subloc->{end} += $offset;
-
-                    my $newFeature = {
-                        %{ $feature->{feature}||{} },
-                        %$subloc,
-                        type  => $feature->{name}
-                        };
-
-                    $newFeature->{seq_id} ||= $seq_id;
-
-                    push @{$f->{subfeatures}}, $newFeature;
-                }
+    # convert FEATURES to subfeatures
+    $f->{subfeatures} = [];
+    if ( scalar( @{$record->{FEATURES} || [] }) > $indexTopLevel ) {
+        for my $i ( $indexTopLevel + 1 .. $#{$record->{FEATURES}} ) {
+            my $feature = $record->{FEATURES}[$i];
+            my @sublocations = _parseLocation( $feature->{location} );
+            for my $subloc ( @sublocations ) {
+                $subloc->{start} += $offset + 1;
+                $subloc->{end} += $offset;
+
+                my $newFeature = {
+                    %{ $feature->{feature}||{} },
+                    %$subloc,
+                    type  => $feature->{name}
+                    };
+
+                $newFeature->{seq_id} ||= $seq_id;
+
+                push @{$f->{subfeatures}}, $newFeature;
             }
         }
-#    }
+    }
 
     return $f;
 }
 
 sub _isTopLevel {
     my $feat = shift;
-    my @topLevelFeatures = qw( mRNA ); # add more as needed?
+    my @topLevelFeatures = qw( gene ); # add more as needed?
     my $isTopLevel = 0;
     foreach my $thisTopFeat ( @topLevelFeatures ){
-	if ( $feat->{'name'} =~ m/$thisTopFeat/ ){
-	    $isTopLevel = 1;
-	    last;
-	}
+        if ( $feat->{'name'} =~ m/$thisTopFeat/ ){
+            print "here\n";
+            $isTopLevel = 1;
+            last;
+        }
     }
     return $isTopLevel;
 }
@@ -115,7 +114,7 @@ sub _getRegionOffset {
 
     my $f = shift;
     my $offset = 0;
-    if ( grep {$_ =~ /REGION\:/} @{$f->{'VERSION'}} ){ # this is a region file 
+    if ( grep {$_ =~ /REGION\:/} @{$f->{'VERSION'}} ){ # this is a region file
  	# get array item after REGION token
  	my $count = 0;
 	my $regionIndexInArray;

From 7778cca1e7632a01497a3073c346acb650f48d8d Mon Sep 17 00:00:00 2001
From: Colin <colin.diesh@gmail.com>
Date: Wed, 4 Mar 2020 12:50:07 -0500
Subject: [PATCH 2/2] Remove some print commands

---
 src/perl5/Bio/JBrowse/Cmd/IndexNames.pm       |  3 -
 .../Bio/JBrowse/FeatureStream/Genbank.pm      | 85 ++++++++++---------
 2 files changed, 43 insertions(+), 45 deletions(-)

diff --git a/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm b/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
index d0f7be3067..0296ec882f 100644
--- a/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
+++ b/src/perl5/Bio/JBrowse/Cmd/IndexNames.pm
@@ -18,7 +18,6 @@ use Storable ();
 use File::Path ();
 use File::Temp ();
 use List::Util ();
-use Data::Dumper;
 use Encode qw(decode encode);
 
 use GenomeDB ();
@@ -80,7 +79,6 @@ sub run {
     unless( @$refSeqs ) {
         die "No reference sequences defined in configuration, nothing to do.\n";
     }
-    print Dumper($gdb->trackList);
     my @tracks = grep $self->track_is_included( $_->{label} ),
                       @{ $gdb->trackList || [] };
     unless( @tracks ) {
@@ -367,7 +365,6 @@ sub find_names_files {
             # read either names.txt or names.json files
             my $name_records_iterator;
             my $names_txt  = File::Spec->catfile( $dir, 'names.txt'  );
-            print "$names_txt\n";
             if( -e decode('UTF-8',$names_txt) ) {
                 push @files, $self->make_file_record( $track, $names_txt );
             }
diff --git a/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm b/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
index 1e0b77d078..0fbe3552af 100644
--- a/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
+++ b/src/perl5/Bio/JBrowse/FeatureStream/Genbank.pm
@@ -10,7 +10,6 @@ use strict;
 use warnings;
 
 use base 'Bio::JBrowse::FeatureStream';
-use Data::Dumper;
 
 use Bio::JBrowse::FeatureStream::Genbank::LocationParser;
 
@@ -32,12 +31,11 @@ sub _aggregate_features_from_gbk_record {
     # get index of top level feature ('mRNA' at current writing)
     my $indexTopLevel;
     my $count = 0;
-    print Dumper $record;
     foreach my $feat ( @{$record->{FEATURES}} ){
-        if ( _isTopLevel( $feat ) ){
-            $indexTopLevel = $count;
-        }
-        $count++;
+	if ( _isTopLevel( $feat ) ){
+	    $indexTopLevel = $count;
+	}
+	$count++;
     }
 
     return unless defined $indexTopLevel;
@@ -56,52 +54,55 @@ sub _aggregate_features_from_gbk_record {
     delete $f->{SEQUENCE};
 
     $f->{end} = $locations[-1]{end};
-    $f->{type}   = $record->{FEATURES}[$indexTopLevel]{name};
-    $f->{seq_id} ||= $seq_id;
-
-    %$f = ( %{$record->{FEATURES}[$indexTopLevel]{feature} || {}}, %$f ); # get other attrs
-    if( $f->{type} eq 'gene' ) {
-        print "here2\n";
-        $f->{name} = $record->{FEATURES}[$indexTopLevel]{feature}{gene};
-        $f->{description} = $record->{FEATURES}[$indexTopLevel]{feature}{product} || $f->{FEATURES}[$indexTopLevel]{feature}{note};
-    }
+    #for my $f ( @features ) {
+        $f->{start}  += $offset + 1;
+        $f->{end}    += $offset;
+        $f->{strand} = 1 unless defined $f->{strand};
+        $f->{type}   = $record->{FEATURES}[$indexTopLevel]{name};
+        $f->{seq_id} ||= $seq_id;
+
+        %$f = ( %{$record->{FEATURES}[$indexTopLevel]{feature} || {}}, %$f ); # get other attrs
+        if( $f->{type} eq 'mRNA' ) {
+            $f->{name} = $record->{FEATURES}[$indexTopLevel]{feature}{gene};
+            $f->{description} = $record->{FEATURES}[$indexTopLevel]{feature}{product} || $f->{FEATURES}[$indexTopLevel]{feature}{note};
+        }
 
-    # convert FEATURES to subfeatures
-    $f->{subfeatures} = [];
-    if ( scalar( @{$record->{FEATURES} || [] }) > $indexTopLevel ) {
-        for my $i ( $indexTopLevel + 1 .. $#{$record->{FEATURES}} ) {
-            my $feature = $record->{FEATURES}[$i];
-            my @sublocations = _parseLocation( $feature->{location} );
-            for my $subloc ( @sublocations ) {
-                $subloc->{start} += $offset + 1;
-                $subloc->{end} += $offset;
-
-                my $newFeature = {
-                    %{ $feature->{feature}||{} },
-                    %$subloc,
-                    type  => $feature->{name}
-                    };
-
-                $newFeature->{seq_id} ||= $seq_id;
-
-                push @{$f->{subfeatures}}, $newFeature;
+        # convert FEATURES to subfeatures
+        $f->{subfeatures} = [];
+        if ( scalar( @{$record->{FEATURES} || [] }) > $indexTopLevel ) {
+            for my $i ( $indexTopLevel + 1 .. $#{$record->{FEATURES}} ) {
+                my $feature = $record->{FEATURES}[$i];
+                my @sublocations = _parseLocation( $feature->{location} );
+                for my $subloc ( @sublocations ) {
+                    $subloc->{start} += $offset + 1;
+                    $subloc->{end} += $offset;
+
+                    my $newFeature = {
+                        %{ $feature->{feature}||{} },
+                        %$subloc,
+                        type  => $feature->{name}
+                        };
+
+                    $newFeature->{seq_id} ||= $seq_id;
+
+                    push @{$f->{subfeatures}}, $newFeature;
+                }
             }
         }
-    }
+#    }
 
     return $f;
 }
 
 sub _isTopLevel {
     my $feat = shift;
-    my @topLevelFeatures = qw( gene ); # add more as needed?
+    my @topLevelFeatures = qw( mRNA ); # add more as needed?
     my $isTopLevel = 0;
     foreach my $thisTopFeat ( @topLevelFeatures ){
-        if ( $feat->{'name'} =~ m/$thisTopFeat/ ){
-            print "here\n";
-            $isTopLevel = 1;
-            last;
-        }
+	if ( $feat->{'name'} =~ m/$thisTopFeat/ ){
+	    $isTopLevel = 1;
+	    last;
+	}
     }
     return $isTopLevel;
 }
@@ -114,7 +115,7 @@ sub _getRegionOffset {
 
     my $f = shift;
     my $offset = 0;
-    if ( grep {$_ =~ /REGION\:/} @{$f->{'VERSION'}} ){ # this is a region file
+    if ( grep {$_ =~ /REGION\:/} @{$f->{'VERSION'}} ){ # this is a region file 
  	# get array item after REGION token
  	my $count = 0;
 	my $regionIndexInArray;