Skip to content

Commit

Permalink
Merge pull request #16 from factly/feat/pandas-to-polars
Browse files Browse the repository at this point in the history
feat: Add polars as reading dataframe rather than pandas
  • Loading branch information
deshetti authored Nov 15, 2022
2 parents 18769e1 + 6e002ae commit 9fdcc0f
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 18 deletions.
24 changes: 12 additions & 12 deletions app/api/api_v1/routers/profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ async def provide_raw_profiling(
samples_to_show = 5

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
samples={"head": samples_to_show, "tail": samples_to_show},
show_variable_description=False,
Expand Down Expand Up @@ -80,7 +80,7 @@ async def profile_samples(
samples_to_show = 5

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=True,
samples={"head": samples_to_show, "tail": samples_to_show},
show_variable_description=False,
Expand Down Expand Up @@ -112,7 +112,7 @@ async def profile_table(source: str = setting.EXAMPLE_URL):
# samples_to_show = 5

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=True,
# samples={"head": samples_to_show, "tail": samples_to_show},
show_variable_description=False,
Expand All @@ -139,7 +139,7 @@ async def profile_analysis(source: str = setting.EXAMPLE_URL):
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=True,
show_variable_description=False,
progress_bar=False,
Expand All @@ -162,7 +162,7 @@ async def profile_alerts(source: str = setting.EXAMPLE_URL):
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=True,
show_variable_description=False,
progress_bar=False,
Expand All @@ -187,7 +187,7 @@ async def profile_scatter(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand All @@ -212,7 +212,7 @@ async def profile_correlations(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand All @@ -237,7 +237,7 @@ async def profile_missing(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand All @@ -262,7 +262,7 @@ async def profile_package(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand All @@ -287,7 +287,7 @@ async def profile_variables(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand All @@ -312,7 +312,7 @@ async def profile_duplicates(
dataframe = provide_dataframe(source)

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
show_variable_description=False,
progress_bar=False,
Expand Down Expand Up @@ -343,7 +343,7 @@ async def profile_description(
samples_to_show = 5

profile = ProfileReport(
dataframe,
dataframe.to_pandas(),
minimal=minimal,
samples={"head": samples_to_show, "tail": samples_to_show},
show_variable_description=False,
Expand Down
6 changes: 3 additions & 3 deletions app/utils/util_functions.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
import datetime

import numpy as np
import polars as pl
from charset_normalizer import from_bytes
from numpy import bool_
from pandas import read_csv
from requests import get

from app.core.config import Settings
Expand Down Expand Up @@ -49,8 +49,8 @@ def provide_dataframe(file_url: str, source="url"):
# link : str, validate as proper url
# use link from file present in mande Studio
try:
df = read_csv(file_url, na_values="NA")
df = pl.read_csv(file_url, null_values="NA")
except UnicodeDecodeError:
encoding = get_encoding(url=file_url)
df = read_csv(file_url, na_values="NA", encoding=encoding)
df = pl.read_csv(file_url, null_values="NA", encoding=encoding)
return df
77 changes: 74 additions & 3 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ fastapi = "^0.85.1"
uvicorn = "^0.19.0"
python-dotenv = "^0.21.0"
pandas-profiling = "^3.3.0"
polars = {extras = ["numpy", "pandas", "pyarrow", "fsspec"], version = "^0.14.28"}

[tool.poetry.dev-dependencies]
flake8 = "^4.0.1"
Expand Down

0 comments on commit 9fdcc0f

Please sign in to comment.