Skip to content

Commit

Permalink
Raise Message Exception when displaying binary data
Browse files Browse the repository at this point in the history
Fixes
https://sentry.galaxyproject.org/share/issue/a8843884527f4e4089b32fd14a2f126d/:
```
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xff in position 4: invalid start byte
  File "galaxy/web/framework/middleware/error.py", line 167, in __call__
    app_iter = self.application(environ, sr_checker)
  File "galaxy/web/framework/middleware/statsd.py", line 29, in __call__
    req = self.application(environ, start_response)
  File "/cvmfs/main.galaxyproject.org/venv/lib/python3.11/site-packages/paste/httpexceptions.py", line 635, in __call__
    return self.application(environ, start_response)
  File "galaxy/web/framework/base.py", line 174, in __call__
    return self.handle_request(request_id, path_info, environ, start_response)
  File "galaxy/web/framework/base.py", line 263, in handle_request
    body = method(trans, **kwargs)
  File "galaxy/webapps/galaxy/controllers/dataset.py", line 152, in display
    display_data, headers = data.datatype.display_data(
  File "galaxy/datatypes/sequence.py", line 785, in display_data
    "/dataset/large_file.mako", truncated_data=fh.read(max_peek_size), data=dataset
  File "<frozen codecs>", line 322, in decode

```
Which is a BAM file assigned to fastqsanger.gz
  • Loading branch information
mvdbeek committed Aug 15, 2024
1 parent b8b47bc commit 4cf0ad7
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 11 deletions.
15 changes: 9 additions & 6 deletions lib/galaxy/datatypes/sequence.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
get_headers,
iter_headers,
)
from galaxy.exceptions import InvalidFileFormatError
from galaxy.util import (
compression_utils,
nice_size,
Expand Down Expand Up @@ -775,15 +776,17 @@ def display_data(
headers = kwd.get("headers", {})
if preview:
with compression_utils.get_fileobj(dataset.get_file_name()) as fh:
max_peek_size = 1000000 # 1 MB
if os.stat(dataset.get_file_name()).st_size < max_peek_size:
max_peek_size = 100000
try:
chunk = fh.read(max_peek_size + 1)
except UnicodeDecodeError:
raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.")
if len(chunk) <= max_peek_size:
mime = "text/plain"
self._clean_and_set_mime_type(trans, mime, headers)
return fh.read(), headers
return chunk[:-1], headers
return (
trans.fill_template_mako(
"/dataset/large_file.mako", truncated_data=fh.read(max_peek_size), data=dataset
),
trans.fill_template_mako("/dataset/large_file.mako", truncated_data=chunk[:-1], data=dataset),
headers,
)
else:
Expand Down
14 changes: 9 additions & 5 deletions lib/galaxy/datatypes/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
iter_headers,
validate_tabular,
)
from galaxy.exceptions import InvalidFileFormatError
from galaxy.util import compression_utils
from galaxy.util.compression_utils import (
FileObjType,
Expand Down Expand Up @@ -157,12 +158,15 @@ def get_chunk(self, trans, dataset: HasFileName, offset: int = 0, ck_size: Optio
def _read_chunk(self, trans, dataset: HasFileName, offset: int, ck_size: Optional[int] = None):
with compression_utils.get_fileobj(dataset.get_file_name()) as f:
f.seek(offset)
ck_data = f.read(ck_size or trans.app.config.display_chunk_size)
if ck_data and ck_data[-1] != "\n":
cursor = f.read(1)
while cursor and cursor != "\n":
ck_data += cursor
try:
ck_data = f.read(ck_size or trans.app.config.display_chunk_size)
if ck_data and ck_data[-1] != "\n":
cursor = f.read(1)
while cursor and cursor != "\n":
ck_data += cursor
cursor = f.read(1)
except UnicodeDecodeError:
raise InvalidFileFormatError("Dataset appears to contain binary data, cannot display.")
last_read = f.tell()
return ck_data, last_read

Expand Down

0 comments on commit 4cf0ad7

Please sign in to comment.