Skip to content

Commit

Permalink
Merge pull request #9 from factly/fix/encoding-utf
Browse files Browse the repository at this point in the history
fix/encoding utf
  • Loading branch information
100mi authored Nov 1, 2022
2 parents cb6d15e + 07b5f78 commit e211994
Showing 1 changed file with 19 additions and 2 deletions.
21 changes: 19 additions & 2 deletions app/utils/util_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,14 @@
from pathlib import Path

import numpy as np
from charset_normalizer import from_bytes
from fastapi import HTTPException, status
from fastapi.encoders import jsonable_encoder
from minio import Minio
from numpy import bool_
from pandas import read_csv
from pandas_profiling import ProfileReport
from requests import get

from app.core.config import Settings
from app.utils.profile_segments import ProfileSegments
Expand All @@ -18,6 +20,13 @@
setting = Settings()


def get_encoding(obj=None, url=None):
if url:
obj = get(url).content
encoding = from_bytes(obj).best().encoding
return encoding


def json_conversion_objects(obj):
"""Fix improper objects while creating json
Function use to convert non-JSON serializable objects to proper format
Expand Down Expand Up @@ -56,13 +65,21 @@ def provide_dataframe(

if source == "s3":
obj = s3_client.get_object(bucket, file_url)
df = read_csv(obj)
try:
df = read_csv(obj)
except UnicodeDecodeError:
encoding = get_encoding(obj=obj)
df = read_csv(obj, encoding=encoding)

# use link from file present in mande Studio
# dataframe : dataframe
# csv file path : str
if source == "url":
df = read_csv(file_url, na_values="NA")
try:
df = read_csv(file_url, na_values="NA")
except UnicodeDecodeError:
encoding = get_encoding(url=file_url)
df = read_csv(file_url, na_values="NA", encoding=encoding)
return df


Expand Down

0 comments on commit e211994

Please sign in to comment.