Skip to content

Commit

Permalink
fix : Encoding issue for reading dataframe is bypassed with exception…
Browse files Browse the repository at this point in the history
… as proper exception is not passed to catch error
  • Loading branch information
100mi committed Jan 3, 2023
1 parent fb93ff3 commit 1b721ca
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions app/utils/dataframes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@

import numpy as np
import polars as pl
import polars.exceptions as pl_exc
import s3fs
from charset_normalizer import from_bytes
from fastapi.logger import logger
from numpy import bool_
from requests import get

Expand All @@ -25,8 +27,10 @@ async def get_dataframe_honouring_encoding_async(
) -> pl.DataFrame:
try:
df = pl.read_csv(file_url, null_values="NA", infer_schema_length=0)
except UnicodeDecodeError:
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.exception(f"Could not interpret File encoding : {err}")
encoding = get_encoding(url=file_url)
logger.info(f"File encoding for `{file_url}` : {encoding}")
df = pl.read_csv(
file_url,
null_values="NA",
Expand All @@ -39,8 +43,10 @@ async def get_dataframe_honouring_encoding_async(
def get_dataframe_honouring_encoding(file_url: str) -> pl.DataFrame:
try:
df = pl.read_csv(file_url, null_values="NA", infer_schema_length=0)
except UnicodeDecodeError:
except (UnicodeDecodeError, pl_exc.ComputeError) as err:
logger.exception(f"Could not interpret File encoding : {err}")
encoding = get_encoding(url=file_url)
logger.info(f"File encoding for `{file_url}` : {encoding}")
df = pl.read_csv(
file_url,
null_values="NA",
Expand Down

0 comments on commit 1b721ca

Please sign in to comment.