Skip to content

Commit

Permalink
Add support for Snowpark Pandas Index (streamlit#9222)
Browse files Browse the repository at this point in the history
## Describe your changes

Adds support for the Snowpark Pandas Index object. This PR also applies
some minor modifications to our CI workflow so that snowflake dependency
only gets installed for Snowflake integration tests, and not always.

## Testing Plan

- Added to data_mocks -> will be tested across various commands and unit
tests.

---

**Contribution License Agreement**

By submitting this pull request you agree that all contributions to this
project are made under the Apache 2.0 license.
  • Loading branch information
lukasmasuch authored Aug 7, 2024
1 parent bae8ffd commit 8c39489
Show file tree
Hide file tree
Showing 8 changed files with 93 additions and 13 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/python-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ jobs:
)
name: >
Python 3.8: Python tests for Snowflake
py-unit-tests (snowflake)
env:
USE_CONSTRAINTS_FILE: "${{ fromJson(needs.build_info.outputs.USE_CONSTRAINTS_FILE )}}"
Expand All @@ -220,10 +220,10 @@ jobs:
ref: ${{ inputs.ref }}
persist-credentials: false
submodules: "recursive"
- name: Set up Python 3.8
- name: Set up Python 3.11
uses: actions/setup-python@v5
with:
python-version: "3.8"
python-version: "3.11"
- name: Decrypt credentials
run: ./.github/scripts/decrypt_credentials.sh
env:
Expand All @@ -232,7 +232,7 @@ jobs:
uses: ./.github/actions/make_init
- name: Run make develop
run: make develop
- name: Run Type Checkers
run: scripts/mypy --report
- name: Install lib with snowflake dependencies
run: uv pip install --editable ./lib[snowflake]
- name: Run Python Tests for Snowflake
run: make pytest-snowflake
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ python-init-test-only: lib/test-requirements.txt

.PHONY: python-init
python-init:
pip_args=("--editable" "./lib[snowflake]");\
pip_args=("--editable" "./lib");\
if [ "${USE_CONSTRAINTS_FILE}" = "true" ] ; then\
pip_args+=(--constraint "${CONSTRAINTS_URL}"); \
fi;\
Expand Down
2 changes: 1 addition & 1 deletion lib/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@

EXTRA_REQUIRES = {
"snowflake": [
"snowflake-snowpark-python>=0.9.0; python_version<'3.12'",
"snowflake-snowpark-python[modin]>=1.17.0; python_version<'3.12'",
"snowflake-connector-python>=2.8.0; python_version<'3.12'",
]
}
Expand Down
11 changes: 8 additions & 3 deletions lib/streamlit/dataframe_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,9 @@
_MODIN_SERIES_TYPE_STR: Final = "modin.pandas.series.Series"
_SNOWPANDAS_DF_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.dataframe.DataFrame"
_SNOWPANDAS_SERIES_TYPE_STR: Final = "snowflake.snowpark.modin.pandas.series.Series"
_SNOWPANDAS_INDEX_TYPE_STR: Final = (
"snowflake.snowpark.modin.plugin.extensions.index.Index"
)

V_co = TypeVar(
"V_co",
Expand Down Expand Up @@ -181,7 +184,7 @@ def is_unevaluated_data_object(obj: object) -> bool:
- Snowpark DataFrame / Table
- PySpark DataFrame
- Modin DataFrame / Series
- Snowpandas DataFrame / Series
- Snowpandas DataFrame / Series / Index
- Generator functions
Unevaluated means that the data is not yet in the local memory.
Expand Down Expand Up @@ -233,8 +236,10 @@ def is_modin_data_object(obj: object) -> bool:

def is_snowpandas_data_object(obj: object) -> bool:
"""True if obj is a Snowpark Pandas DataFrame or Series."""
return is_type(obj, _SNOWPANDAS_DF_TYPE_STR) or is_type(
obj, _SNOWPANDAS_SERIES_TYPE_STR
return (
is_type(obj, _SNOWPANDAS_DF_TYPE_STR)
or is_type(obj, _SNOWPANDAS_SERIES_TYPE_STR)
or is_type(obj, _SNOWPANDAS_INDEX_TYPE_STR)
)


Expand Down
3 changes: 0 additions & 3 deletions lib/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from __future__ import annotations

import os
import sys
from unittest.mock import mock_open, patch

import pytest
Expand Down Expand Up @@ -87,8 +86,6 @@ def pytest_configure(config: pytest.Config):

is_require_snowflake = config.getoption("--require-snowflake", default=False)
if is_require_snowflake:
if sys.version_info[0:2] != (3, 8):
raise pytest.UsageError("Python 3.8 is required to run Snowflake tests")
try:
import snowflake.snowpark # noqa: F401
except ImportError:
Expand Down
16 changes: 16 additions & 0 deletions lib/tests/streamlit/data_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from tests.streamlit.modin_mocks import Series as ModinSeries
from tests.streamlit.pyspark_mocks import DataFrame as PySparkDataFrame
from tests.streamlit.snowpandas_mocks import DataFrame as SnowpandasDataFrame
from tests.streamlit.snowpandas_mocks import Index as SnowpandasIndex
from tests.streamlit.snowpandas_mocks import Series as SnowpandasSeries
from tests.streamlit.snowpark_mocks import DataFrame as SnowparkDataFrame
from tests.streamlit.snowpark_mocks import Row as SnowparkRow
Expand Down Expand Up @@ -839,6 +840,21 @@ def data_generator():
pd.DataFrame,
),
),
(
"Snowpandas Index",
SnowpandasIndex(
pd.Index(["st.text_area", "st.markdown"]),
),
CaseMetadata(
2,
1,
DataFormat.SNOWPANDAS_OBJECT,
["st.text_area", "st.markdown"],
"dataframe",
True,
pd.DataFrame,
),
),
(
"Modin DataFrame",
ModinDataFrame(
Expand Down
35 changes: 35 additions & 0 deletions lib/tests/streamlit/dataframe_util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
TestObject,
)
from tests.streamlit.snowpandas_mocks import DataFrame as SnowpandasDataFrame
from tests.streamlit.snowpandas_mocks import Index as SnowpandasIndex
from tests.streamlit.snowpandas_mocks import Series as SnowpandasSeries
from tests.streamlit.snowpark_mocks import DataFrame as SnowparkDataFrame
from tests.streamlit.snowpark_mocks import Row as SnowparkRow
Expand Down Expand Up @@ -387,6 +388,7 @@ def test_is_snowpandas_data_object(self):
dataframe_util.is_snowpandas_data_object(SnowpandasDataFrame(df))
)
self.assertTrue(dataframe_util.is_snowpandas_data_object(SnowpandasSeries(df)))
self.assertTrue(dataframe_util.is_snowpandas_data_object(SnowpandasIndex(df)))

def test_is_snowpark_row_list(self):
class DummyClass:
Expand Down Expand Up @@ -480,6 +482,39 @@ def test_verify_snowpark_integration(self):
pd.DataFrame,
)

@pytest.mark.require_snowflake
def test_verify_snowpandas_integration(self):
"""Integration test snowpark pandas object handling.
This is in addition to the tests using the mocks to verify that
the latest version of the library is still supported.
"""
import modin.pandas as modin_pd

# Import the Snowpark pandas plugin for modin.
import snowflake.snowpark.modin.plugin # noqa: F401

with create_snowpark_session():
snowpandas_df = modin_pd.DataFrame([1, 2, 3], columns=["col1"])
assert dataframe_util.is_snowpandas_data_object(snowpandas_df) is True
assert isinstance(
dataframe_util.convert_anything_to_pandas_df(snowpandas_df),
pd.DataFrame,
)

snowpandas_series = snowpandas_df["col1"]
assert dataframe_util.is_snowpandas_data_object(snowpandas_series) is True
assert isinstance(
dataframe_util.convert_anything_to_pandas_df(snowpandas_series),
pd.DataFrame,
)

snowpandas_index = snowpandas_df.index
assert dataframe_util.is_snowpandas_data_object(snowpandas_index) is True
assert isinstance(
dataframe_util.convert_anything_to_pandas_df(snowpandas_index),
pd.DataFrame,
)

@parameterized.expand(
SHARED_TEST_CASES,
)
Expand Down
27 changes: 27 additions & 0 deletions lib/tests/streamlit/snowpandas_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,3 +74,30 @@ def head(self, n: int) -> Series:
def __getitem__(self, key: slice | int) -> Series:
# Allow slicing and integer indexing
return Series(self._data[key])


class Index:
"""This is dummy Index class, which imitates
snowflake.snowpark.modin.plugin.extensions.index.Index class for testing purposes.
We use this to make sure that our code does a special handling
if it detects a Snowpark Pandas Index.
This allows testing of the functionality without having the library installed,
but it won't capture changes in the API of the library. This requires
integration tests.
"""

__module__ = "snowflake.snowpark.modin.plugin.extensions.index"

def __init__(self, data: pd.Index):
self._data: pd.Index = data

def to_pandas(self) -> pd.Index:
return self._data

def head(self, n: int) -> Index:
"""Returns the top n element of a mock version of Snowpark Pandas Series"""
return Index(self[:n])

def __getitem__(self, key: slice | int) -> Index:
# Allow slicing and integer indexing
return Index(self._data[key])

0 comments on commit 8c39489

Please sign in to comment.