From ea8ff473e33f543a59d0ba66d8d01105c5a91a04 Mon Sep 17 00:00:00 2001
From: mdehoon <mjldehoon@yahoo.com>
Date: Wed, 18 Sep 2024 18:04:06 +0900
Subject: [PATCH] Allow parsers in Bio.SeqIO to handle both text and binary
 modes (#4842)

---
 Bio/SeqIO/AbiIO.py            |  4 +++-
 Bio/SeqIO/AceIO.py            |  6 ++++--
 Bio/SeqIO/FastaIO.py          | 16 ++++++++++++----
 Bio/SeqIO/GckIO.py            |  4 +++-
 Bio/SeqIO/GfaIO.py            |  8 ++++++--
 Bio/SeqIO/IgIO.py             |  4 +++-
 Bio/SeqIO/InsdcIO.py          | 20 ++++++++++++++-----
 Bio/SeqIO/Interfaces.py       | 36 ++++++++++++++++++++++++++---------
 Bio/SeqIO/NibIO.py            |  4 +++-
 Bio/SeqIO/PdbIO.py            | 10 +++++++++-
 Bio/SeqIO/PhdIO.py            |  4 +++-
 Bio/SeqIO/PirIO.py            |  4 +++-
 Bio/SeqIO/QualityIO.py        |  8 ++++++--
 Bio/SeqIO/SeqXmlIO.py         |  4 +++-
 Bio/SeqIO/SffIO.py            |  4 +++-
 Bio/SeqIO/SnapGeneIO.py       |  4 +++-
 Bio/SeqIO/SwissIO.py          |  4 +++-
 Bio/SeqIO/TabIO.py            |  4 +++-
 Bio/SeqIO/TwoBitIO.py         |  4 +++-
 Bio/SeqIO/UniprotIO.py        | 17 ++++++++++++++++-
 Bio/SeqIO/XdnaIO.py           |  4 +++-
 DEPRECATED.rst                |  8 ++++++++
 Tests/test_SeqIO_UniprotIO.py | 12 ++++++++++--
 23 files changed, 152 insertions(+), 41 deletions(-)

diff --git a/Bio/SeqIO/AbiIO.py b/Bio/SeqIO/AbiIO.py
index 3c32f8b2ced..ad412efab03 100644
--- a/Bio/SeqIO/AbiIO.py
+++ b/Bio/SeqIO/AbiIO.py
@@ -347,9 +347,11 @@ def _get_string_tag(opt_bytes_value, default=None):
 class AbiIterator(SequenceIterator):
     """Parser for Abi files."""
 
+    modes = "b"
+
     def __init__(self, source, trim=False):
         """Return an iterator for the Abi file format."""
-        super().__init__(source, mode="b", fmt="ABI")
+        super().__init__(source, fmt="ABI")
         # check if input file is a valid Abi file
         marker = self.stream.read(4)
         if not marker:
diff --git a/Bio/SeqIO/AceIO.py b/Bio/SeqIO/AceIO.py
index 63fea2b3075..e9cd9bd3c1f 100644
--- a/Bio/SeqIO/AceIO.py
+++ b/Bio/SeqIO/AceIO.py
@@ -22,6 +22,8 @@
 class AceIterator(SequenceIterator):
     """Return SeqRecord objects from an ACE file."""
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -69,8 +71,8 @@ def __init__(
         90
 
         """
-        super().__init__(source, mode="t", fmt="ACE")
-        self.ace_contigs = Ace._parse(self.stream)
+        super().__init__(source, fmt="ACE")
+        self.ace_contigs = Ace.parse(self.stream)
 
     def __next__(self):
         try:
diff --git a/Bio/SeqIO/FastaIO.py b/Bio/SeqIO/FastaIO.py
index a1489499a0f..aecd4e702f9 100644
--- a/Bio/SeqIO/FastaIO.py
+++ b/Bio/SeqIO/FastaIO.py
@@ -143,6 +143,8 @@ def FastaTwoLineParser(handle):
 class FastaIterator(SequenceIterator):
     """Parser for plain Fasta files without comments."""
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -191,7 +193,7 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="t", fmt="Fasta")
+        super().__init__(source, fmt="Fasta")
         try:
             line = next(self.stream)
         except StopIteration:
@@ -266,6 +268,8 @@ def __next__(self):
 class FastaTwoLineIterator(SequenceIterator):
     """Parser for Fasta files with exactly two lines per record."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Iterate over two-line Fasta records (as SeqRecord objects).
 
@@ -278,7 +282,7 @@ def __init__(self, source):
         Only the default title to ID/name/description parsing offered
         by the relaxed FASTA parser is offered.
         """
-        super().__init__(source, mode="t", fmt="FASTA")
+        super().__init__(source, fmt="FASTA")
         self._data = FastaTwoLineParser(self.stream)
 
     def __next__(self):
@@ -300,6 +304,8 @@ def __next__(self):
 class FastaBlastIterator(SequenceIterator):
     """Parser for Fasta files, allowing for comments as in BLAST."""
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -348,7 +354,7 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="t", fmt="FASTA")
+        super().__init__(source, fmt="FASTA")
         for line in self.stream:
             if line[0] not in "#!;":
                 if not line.startswith(">"):
@@ -397,6 +403,8 @@ def __next__(self):
 class FastaPearsonIterator(SequenceIterator):
     """Parser for Fasta files, allowing for comments as in the FASTA aligner."""
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -446,7 +454,7 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="t", fmt="Fasta")
+        super().__init__(source, fmt="Fasta")
         for line in self.stream:
             if line.startswith(">"):
                 self._line = line
diff --git a/Bio/SeqIO/GckIO.py b/Bio/SeqIO/GckIO.py
index f2a550c05d5..118a1c03412 100644
--- a/Bio/SeqIO/GckIO.py
+++ b/Bio/SeqIO/GckIO.py
@@ -73,13 +73,15 @@ def _read_p4string(stream):
 class GckIterator(SequenceIterator):
     """Parser for GCK files."""
 
+    modes = "b"
+
     def __init__(self, source):
         """Break up a GCK file into SeqRecord objects.
 
         Note that a GCK file can only contain one sequence, so this
         iterator will always return a single record.
         """
-        super().__init__(source, mode="b", fmt="GCK")
+        super().__init__(source, fmt="GCK")
         # Skip file header
         # GCK files start with a 24-bytes header. Bytes 4 and 8 seem to
         # always be 12, maybe this could act as a magic cookie. Bytes
diff --git a/Bio/SeqIO/GfaIO.py b/Bio/SeqIO/GfaIO.py
index e50a3eca36b..6098a8826bc 100644
--- a/Bio/SeqIO/GfaIO.py
+++ b/Bio/SeqIO/GfaIO.py
@@ -119,6 +119,8 @@ class Gfa1Iterator(SequenceIterator):
     Documentation: https://gfa-spec.github.io/GFA-spec/GFA1.html
     """
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -128,7 +130,7 @@ def __init__(
         Arguments:
          - source - input stream opened in text mode, or a path to a file
         """
-        super().__init__(source, mode="t", fmt="GFA 1.0")
+        super().__init__(source, fmt="GFA 1.0")
 
     def __next__(self):
         for line in self.stream:
@@ -164,6 +166,8 @@ class Gfa2Iterator(SequenceIterator):
     Documentation for version 2: https://gfa-spec.github.io/GFA-spec/GFA2.html
     """
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -173,7 +177,7 @@ def __init__(
         Arguments:
          - source - input stream opened in text mode, or a path to a file
         """
-        super().__init__(source, mode="t", fmt="GFA 2.0")
+        super().__init__(source, fmt="GFA 2.0")
 
     def __next__(self):
         for line in self.stream:
diff --git a/Bio/SeqIO/IgIO.py b/Bio/SeqIO/IgIO.py
index a12ad4a77d8..91377565b5d 100644
--- a/Bio/SeqIO/IgIO.py
+++ b/Bio/SeqIO/IgIO.py
@@ -22,6 +22,8 @@
 class IgIterator(SequenceIterator):
     """Parser for IntelliGenetics files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Iterate over IntelliGenetics records (as SeqRecord objects).
 
@@ -60,7 +62,7 @@ def __init__(self, source):
         SYK_SYK length 330
 
         """
-        super().__init__(source, mode="t", fmt="IntelliGenetics")
+        super().__init__(source, fmt="IntelliGenetics")
         for line in self.stream:
             if not line.startswith(";;"):
                 break
diff --git a/Bio/SeqIO/InsdcIO.py b/Bio/SeqIO/InsdcIO.py
index 4d05bb33402..b892dbbf142 100644
--- a/Bio/SeqIO/InsdcIO.py
+++ b/Bio/SeqIO/InsdcIO.py
@@ -62,6 +62,8 @@
 class GenBankIterator(SequenceIterator):
     """Parser for GenBank files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Break up a Genbank file into SeqRecord objects.
 
@@ -99,7 +101,7 @@ def __init__(self, source):
         AF297471.1
 
         """
-        super().__init__(source, mode="t", fmt="GenBank")
+        super().__init__(source, fmt="GenBank")
         self.records = GenBankScanner(debug=0).parse_records(self.stream)
 
     def __next__(self):
@@ -115,6 +117,8 @@ def __next__(self):
 class EmblIterator(SequenceIterator):
     """Parser for EMBL files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Break up an EMBL file into SeqRecord objects.
 
@@ -158,7 +162,7 @@ def __init__(self, source):
         CQ797900.1
 
         """
-        super().__init__(source, mode="t", fmt="EMBL")
+        super().__init__(source, fmt="EMBL")
         self.records = EmblScanner(debug=0).parse_records(self.stream)
 
     def __next__(self):
@@ -174,6 +178,8 @@ def __next__(self):
 class ImgtIterator(SequenceIterator):
     """Parser for IMGT files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Break up an IMGT file into SeqRecord objects.
 
@@ -184,7 +190,7 @@ def __init__(self, source):
         Note that for genomes or chromosomes, there is typically only
         one record.
         """
-        super().__init__(source, mode="t", fmt="IMGT")
+        super().__init__(source, fmt="IMGT")
         self.records = _ImgtScanner(debug=0).parse_records(self.stream)
 
     def __next__(self):
@@ -200,6 +206,8 @@ def __next__(self):
 class GenBankCdsFeatureIterator(SequenceIterator):
     """Parser for GenBank files, creating a SeqRecord for each CDS feature."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Break up a Genbank file into SeqRecord objects for each CDS feature.
 
@@ -209,7 +217,7 @@ def __init__(self, source):
         many CDS features.  These are returned as with the stated amino acid
         translation sequence (if given).
         """
-        super().__init__(source, mode="t", fmt="GenBank")
+        super().__init__(source, fmt="GenBank")
         self.records = GenBankScanner(debug=0).parse_cds_features(self.stream)
 
     def __next__(self):
@@ -225,6 +233,8 @@ def __next__(self):
 class EmblCdsFeatureIterator(SequenceIterator):
     """Parser for EMBL files, creating a SeqRecord for each CDS feature."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Break up a EMBL file into SeqRecord objects for each CDS feature.
 
@@ -234,7 +244,7 @@ def __init__(self, source):
         many CDS features.  These are returned as with the stated amino acid
         translation sequence (if given).
         """
-        super().__init__(source, mode="t", fmt="EMBL")
+        super().__init__(source, fmt="EMBL")
         self.records = EmblScanner(debug=0).parse_cds_features(self.stream)
 
     def __next__(self):
diff --git a/Bio/SeqIO/Interfaces.py b/Bio/SeqIO/Interfaces.py
index 780a7af992b..8af8438b4a4 100644
--- a/Bio/SeqIO/Interfaces.py
+++ b/Bio/SeqIO/Interfaces.py
@@ -12,6 +12,7 @@
 
 from abc import ABC
 from abc import abstractmethod
+from abc import abstractproperty
 from os import PathLike
 from typing import AnyStr
 from typing import Generic
@@ -37,13 +38,25 @@ class SequenceIterator(ABC, Generic[AnyStr]):
 
     You should write a __next__ method that returns the next SeqRecord.  You
     may wish to redefine the __init__ method as well.
+    You must also create a class property `modes` specifying the allowable
+    file stream modes.
     """
 
+    @abstractproperty
+    def modes(self):
+        """File modes (binary or text) that the parser can handle.
+
+        This property must be "t" (for text mode only), "b" (for binary mode
+        only), "tb" (if both text and binary mode are accepted, but text mode
+        is preferred), or "bt" (if both text and binary mode are accepted, but
+        binary mode is preferred).
+        """
+        pass
+
     def __init__(
         self,
         source: _IOSource,
         alphabet: None = None,
-        mode: str = "t",
         fmt: Optional[str] = None,
     ) -> None:
         """Create a SequenceIterator object.
@@ -51,7 +64,6 @@ def __init__(
         Arguments:
         - source - input file stream, or path to input file
         - alphabet - no longer used, should be None
-        - mode - string, either "t" for text mode or "b" for binary
         - fmt - string, mixed case format name for in error messages
 
         This method MAY be overridden by any subclass.
@@ -63,24 +75,30 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
+        modes = self.modes
         if isinstance(source, _PathLikeTypes):
+            mode = modes[0]
             self.stream = open(source, "r" + mode)
             self.should_close_stream = True
         else:
-            if mode == "t":
-                if source.read(0) != "":
+            value = source.read(0)
+            if value == "":
+                if modes == "b":
                     raise StreamModeError(
-                        f"{fmt} files must be opened in text mode."
+                        f"{fmt} files must be opened in binary mode."
                     ) from None
-            elif mode == "b":
-                if source.read(0) != b"":
+                mode = "t"
+            elif value == b"":
+                if modes == "t":
                     raise StreamModeError(
-                        f"{fmt} files must be opened in binary mode."
+                        f"{fmt} files must be opened in text mode."
                     ) from None
+                mode = "b"
             else:
-                raise ValueError(f"Unknown mode '{mode}'") from None
+                raise RuntimeError("Failed to read from input data") from None
             self.stream = source
             self.should_close_stream = False
+        self.mode = mode
 
     @abstractmethod
     def __next__(self):
diff --git a/Bio/SeqIO/NibIO.py b/Bio/SeqIO/NibIO.py
index 9569bef30dc..c5b81c4f6ba 100644
--- a/Bio/SeqIO/NibIO.py
+++ b/Bio/SeqIO/NibIO.py
@@ -53,6 +53,8 @@
 class NibIterator(SequenceIterator):
     """Parser for nib files."""
 
+    modes = "b"
+
     def __init__(self, source):
         """Iterate over a nib file and yield a SeqRecord.
 
@@ -79,7 +81,7 @@ def __init__(self, source):
         nAGAAGagccgcNGgCActtGAnTAtCGTCgcCacCaGncGncTtGNtGG 50
 
         """
-        super().__init__(source, mode="b", fmt="Nib")
+        super().__init__(source, fmt="Nib")
         word = self.stream.read(4)
         if not word:
             raise ValueError("Empty file.")
diff --git a/Bio/SeqIO/PdbIO.py b/Bio/SeqIO/PdbIO.py
index 1d81e44b4b0..40bafea0184 100644
--- a/Bio/SeqIO/PdbIO.py
+++ b/Bio/SeqIO/PdbIO.py
@@ -114,6 +114,8 @@ def AtomIterator(pdb_id, structure):
 class PdbSeqresIterator(SequenceIterator):
     """Parser for PDB files."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Iterate over chains in a PDB file as SeqRecord objects.
 
@@ -151,7 +153,7 @@ def __init__(self, source: _TextIOSource) -> None:
         Note the chain is recorded in the annotations dictionary, and any PDB DBREF
         lines are recorded in the database cross-references list.
         """
-        super().__init__(source, mode="t", fmt="PDB")
+        super().__init__(source, fmt="PDB")
         self.cache = None
 
     def __next__(self):
@@ -276,6 +278,8 @@ def __next__(self):
 class PdbAtomIterator(SequenceIterator):
     """Parser for structures in a PDB files."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Iterate over structures in a PDB file as SeqRecord objects.
 
@@ -373,6 +377,8 @@ def __next__(self):
 class CifSeqresIterator(SequenceIterator):
     """Parser for chains in an mmCIF files."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Iterate over chains in an mmCIF file as SeqRecord objects.
 
@@ -498,6 +504,8 @@ def __next__(self):
 class CifAtomIterator(SequenceIterator):
     """Parser for structures in an mmCIF files."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Iterate over structures in an mmCIF file as SeqRecord objects.
 
diff --git a/Bio/SeqIO/PhdIO.py b/Bio/SeqIO/PhdIO.py
index 42ee6771443..8c3b07e342a 100644
--- a/Bio/SeqIO/PhdIO.py
+++ b/Bio/SeqIO/PhdIO.py
@@ -67,6 +67,8 @@
 class PhdIterator(SequenceIterator):
     """Parser for PHD files."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Return SeqRecord objects from a PHD file.
 
@@ -75,7 +77,7 @@ def __init__(self, source: _TextIOSource) -> None:
 
         This uses the Bio.Sequencing.Phd module to do the hard work.
         """
-        super().__init__(source, mode="t", fmt="PHD")
+        super().__init__(source, fmt="PHD")
 
     def __next__(self):
         phd_record = Phd._read(self.stream)
diff --git a/Bio/SeqIO/PirIO.py b/Bio/SeqIO/PirIO.py
index 17d5b9a7a2e..51bce08ef6d 100644
--- a/Bio/SeqIO/PirIO.py
+++ b/Bio/SeqIO/PirIO.py
@@ -110,6 +110,8 @@
 class PirIterator(SequenceIterator):
     """Parser for PIR files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Iterate over a PIR file and yield SeqRecord objects.
 
@@ -128,7 +130,7 @@ def __init__(self, source):
         HLA:HLA01083 length 188
 
         """
-        super().__init__(source, mode="t", fmt="Pir")
+        super().__init__(source, fmt="Pir")
         # Skip any text before the first record (e.g. blank lines, comments)
         for line in self.stream:
             if line[0] == ">":
diff --git a/Bio/SeqIO/QualityIO.py b/Bio/SeqIO/QualityIO.py
index c1d0511eaf5..33598606115 100644
--- a/Bio/SeqIO/QualityIO.py
+++ b/Bio/SeqIO/QualityIO.py
@@ -996,6 +996,8 @@ def FastqGeneralIterator(source: _TextIOSource) -> Iterator[tuple[str, str, str]
 class FastqIteratorAbstractBaseClass(SequenceIterator[str]):
     """Abstract base class for FASTQ file parsers."""
 
+    modes = "t"
+
     @abstractproperty
     def q_mapping(self):
         """Dictionary that maps letters in the quality string to quality values."""
@@ -1015,7 +1017,7 @@ def __init__(self, source):
         The quality values are stored in the `letter_annotations` dictionary
         attribute under the key `q_key`.
         """
-        super().__init__(source, mode="t", fmt="Fastq")
+        super().__init__(source, fmt="Fastq")
         self.line = None
 
     def __next__(self) -> SeqRecord:
@@ -1421,6 +1423,8 @@ def __init__(
 class QualPhredIterator(SequenceIterator):
     """Parser for QUAL files with PHRED quality scores but no sequence."""
 
+    modes = "t"
+
     def __init__(
         self,
         source: _TextIOSource,
@@ -1481,7 +1485,7 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="t", fmt="QUAL")
+        super().__init__(source, fmt="QUAL")
         # Skip any text before the first record (e.g. blank lines, comments)
         for line in self.stream:
             if line[0] == ">":
diff --git a/Bio/SeqIO/SeqXmlIO.py b/Bio/SeqIO/SeqXmlIO.py
index 52c05274b89..437f9948f9f 100644
--- a/Bio/SeqIO/SeqXmlIO.py
+++ b/Bio/SeqIO/SeqXmlIO.py
@@ -441,6 +441,8 @@ class SeqXmlIterator(SequenceIterator):
     method calls.
     """
 
+    modes = "b"
+
     # Small block size can be a problem with libexpat 2.6.0 onwards:
     BLOCK = 1024
 
@@ -451,7 +453,7 @@ def __init__(self, stream_or_path, namespace=None):
         # if the text handle was opened with a different encoding than the
         # one specified in the XML file. With a binary handle, the correct
         # encoding is picked up by the parser from the XML file.
-        super().__init__(stream_or_path, mode="b", fmt="SeqXML")
+        super().__init__(stream_or_path, fmt="SeqXML")
         stream = self.stream
         parser = sax.make_parser()
         content_handler = ContentHandler()
diff --git a/Bio/SeqIO/SffIO.py b/Bio/SeqIO/SffIO.py
index 2b39da78557..69d236b6b29 100644
--- a/Bio/SeqIO/SffIO.py
+++ b/Bio/SeqIO/SffIO.py
@@ -749,6 +749,8 @@ def _sff_read_raw_record(handle, number_of_flows_per_read):
 class SffIterator(SequenceIterator):
     """Parser for Standard Flowgram Format (SFF) files."""
 
+    modes = "b"
+
     # the read header format (fixed part):
     # read_header_length     H
     # name_length            H
@@ -829,7 +831,7 @@ def __init__(self, source, alphabet=None, trim=False):
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="b", fmt="SFF")
+        super().__init__(source, fmt="SFF")
         self.trim = trim
         stream = self.stream
         (
diff --git a/Bio/SeqIO/SnapGeneIO.py b/Bio/SeqIO/SnapGeneIO.py
index 31611957224..f359e5e39d6 100644
--- a/Bio/SeqIO/SnapGeneIO.py
+++ b/Bio/SeqIO/SnapGeneIO.py
@@ -291,6 +291,8 @@ def _get_child_value(node, name, default=None, error=None):
 class SnapGeneIterator(SequenceIterator):
     """Parser for SnapGene files."""
 
+    modes = "b"
+
     def __init__(self, source):
         """Parse a SnapGene file and return a SeqRecord object.
 
@@ -299,7 +301,7 @@ def __init__(self, source):
         Note that a SnapGene file can only contain one sequence, so this
         iterator will always return a single record.
         """
-        super().__init__(source, mode="b", fmt="SnapGene")
+        super().__init__(source, fmt="SnapGene")
         self.packets = _iterate(self.stream)
         try:
             packet_type, length, data = next(self.packets)
diff --git a/Bio/SeqIO/SwissIO.py b/Bio/SeqIO/SwissIO.py
index 98205065d38..b432b70dddc 100644
--- a/Bio/SeqIO/SwissIO.py
+++ b/Bio/SeqIO/SwissIO.py
@@ -27,6 +27,8 @@
 class SwissIterator(SequenceIterator):
     """Parser to break up a Swiss-Prot/UniProt file into SeqRecord objects."""
 
+    modes = "t"
+
     def __init__(self, source: _TextIOSource) -> None:
         """Iterate over a Swiss-Prot file and return SeqRecord objects.
 
@@ -47,7 +49,7 @@ def __init__(self, source: _TextIOSource) -> None:
         Rather than calling it directly, you are expected to use this
         parser via Bio.SeqIO.parse(..., format="swiss") instead.
         """
-        super().__init__(source, mode="t", fmt="SwissProt")
+        super().__init__(source, fmt="SwissProt")
 
     def __next__(self):
         swiss_record = SwissProt._read(self.stream)
diff --git a/Bio/SeqIO/TabIO.py b/Bio/SeqIO/TabIO.py
index 9e1efb04599..1732b2b55e5 100644
--- a/Bio/SeqIO/TabIO.py
+++ b/Bio/SeqIO/TabIO.py
@@ -44,6 +44,8 @@
 class TabIterator(SequenceIterator):
     """Parser for tab-delimited files."""
 
+    modes = "t"
+
     def __init__(self, source):
         """Iterate over tab separated lines as SeqRecord objects.
 
@@ -75,7 +77,7 @@ def __init__(self, source):
         gi|45478721|ref|NP_995576.1| length 90
 
         """
-        super().__init__(source, mode="t", fmt="Tab-separated plain-text")
+        super().__init__(source, fmt="Tab-separated plain-text")
 
     def __next__(self):
         for line in self.stream:
diff --git a/Bio/SeqIO/TwoBitIO.py b/Bio/SeqIO/TwoBitIO.py
index 69a2e164059..2b507fd2623 100644
--- a/Bio/SeqIO/TwoBitIO.py
+++ b/Bio/SeqIO/TwoBitIO.py
@@ -170,9 +170,11 @@ def lower(self):
 class TwoBitIterator(SequenceIterator):
     """Parser for UCSC twoBit (.2bit) files."""
 
+    modes = "b"
+
     def __init__(self, source):
         """Read the file index."""
-        super().__init__(source, mode="b", fmt="twoBit")
+        super().__init__(source, fmt="twoBit")
         # wait to close the file until the TwoBitIterator goes out of scope:
         self.should_close_stream = False
         stream = self.stream
diff --git a/Bio/SeqIO/UniprotIO.py b/Bio/SeqIO/UniprotIO.py
index 9d5e4b727c8..2f635fe1c41 100644
--- a/Bio/SeqIO/UniprotIO.py
+++ b/Bio/SeqIO/UniprotIO.py
@@ -18,11 +18,14 @@
 
 from xml.etree import ElementTree
 from xml.parsers.expat import errors
+import warnings
 
 from Bio import SeqFeature
 from Bio.Seq import Seq
 from Bio.SeqRecord import SeqRecord
 
+from Bio import BiopythonDeprecationWarning
+
 from .Interfaces import _BytesIOSource
 from .Interfaces import SequenceIterator
 
@@ -34,6 +37,8 @@
 class UniprotIterator(SequenceIterator):
     """Parser for UniProt XML files, returning SeqRecord objects."""
 
+    modes = "bt"
+
     def __init__(
         self,
         source: _BytesIOSource,
@@ -55,7 +60,17 @@ def __init__(
         """
         if alphabet is not None:
             raise ValueError("The alphabet argument is no longer supported")
-        super().__init__(source, mode="b", fmt="UniProt XML")
+        super().__init__(source, fmt="UniProt XML")
+        if self.mode == "t":
+            warnings.warn(
+                "Opening a UniProt XML file in text mode is "
+                "deprecated, as it may lead to garbled characters. "
+                "We recommend opening the file in binary mode; "
+                "parsing UniProt XML files opened in text mode will "
+                "no longer be supported in a future release of "
+                "Biopython.",
+                BiopythonDeprecationWarning,
+            )
         self.return_raw_comments = return_raw_comments
         self._data = ElementTree.iterparse(
             self.stream, events=("start", "start-ns", "end")
diff --git a/Bio/SeqIO/XdnaIO.py b/Bio/SeqIO/XdnaIO.py
index e75b8174c67..754826c77c1 100644
--- a/Bio/SeqIO/XdnaIO.py
+++ b/Bio/SeqIO/XdnaIO.py
@@ -145,6 +145,8 @@ def _read_feature(handle, record):
 class XdnaIterator(SequenceIterator):
     """Parser for Xdna files."""
 
+    modes = "b"
+
     def __init__(self, source):
         """Parse a Xdna file and return a SeqRecord object.
 
@@ -154,7 +156,7 @@ def __init__(self, source):
         contain a single sequence.
 
         """
-        super().__init__(source, mode="b", fmt="Xdna")
+        super().__init__(source, fmt="Xdna")
         header = self.stream.read(112)
         if not header:
             raise ValueError("Empty file.")
diff --git a/DEPRECATED.rst b/DEPRECATED.rst
index d92ab3758b1..8f0a595f575 100644
--- a/DEPRECATED.rst
+++ b/DEPRECATED.rst
@@ -75,6 +75,14 @@ Another option is to use ``format='fasta-blast'``; this follows the FASTA file
 format accepted by BLAST, treating any lines starting with '#', ';', or '!' as
 comment lines and ignoring them.
 
+Bio.SeqIO.UniprotIO
+-------------------
+Parsing a UniProt XML file opened in text mode (if the file was opened using
+``open("myuniprotfile.xml")``) was deprecated in Release 1.85, as this may lead
+to garbled characters.  Please open the file in binary mode (as in
+``open("myuniprotfile.xml", "rb")``), or let ``Bio.SeqIO.parse`` take care of
+opening and closing files by passing the file name instead of a file handle.
+
 Bio.Entrez
 ----------
 The ``egquery`` function wrapping the NCBI EGQuery (Entrez Global Query)
diff --git a/Tests/test_SeqIO_UniprotIO.py b/Tests/test_SeqIO_UniprotIO.py
index 8ce5a9609a4..c028c0c4280 100644
--- a/Tests/test_SeqIO_UniprotIO.py
+++ b/Tests/test_SeqIO_UniprotIO.py
@@ -13,18 +13,20 @@
 from Bio import SeqIO
 from Bio.SeqRecord import SeqRecord
 
+from Bio import BiopythonDeprecationWarning
+
 
 class ParserTests(SeqRecordTestBaseClass):
     """Tests Uniprot XML parser."""
 
-    def test_uni001(self):
+    def check_uni001(self, mode):
         """Parsing Uniprot file uni001."""
         filename = "uni001"
         # test the record parser
 
         datafile = os.path.join("SwissProt", filename)
 
-        with open(datafile, "rb") as handle:
+        with open(datafile, mode) as handle:
             seq_record = SeqIO.read(handle, "uniprot-xml")
 
         self.assertIsInstance(seq_record, SeqRecord)
@@ -133,6 +135,12 @@ def test_uni001(self):
         self.assertEqual(seq_record.annotations["sequence_version"], 1)
         self.assertEqual(seq_record.annotations["proteinExistence"], ["Predicted"])
 
+    def test_uni001(self):
+        """Parsing Uniprot file uni001 in text mode and in binary mode."""
+        self.check_uni001("rb")
+        with self.assertWarns(BiopythonDeprecationWarning):
+            self.check_uni001("rt")
+
     def test_uni003(self):
         """Parsing Uniprot file uni003."""
         filename = "uni003"