Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

try for pandas CI #910

Merged
merged 13 commits into from
Dec 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion fastparquet/parquet_thrift/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,6 @@
def __getattr__(name):
# for compatability with coe that calls, e.g., parquet_thrift.RowGroup(...)
from ..cencoding import ThriftObject
return partial(ThriftObject.from_fields, thrift_name=name)
if name[0].isupper():
return partial(ThriftObject.from_fields, thrift_name=name)
raise AttributeError(name)
2 changes: 1 addition & 1 deletion fastparquet/test/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import fsspec
import numpy as np
import pandas as pd
from pandas._testing import makeMixedDataFrame
from .util import makeMixedDataFrame
try:
from pandas.tslib import Timestamp
except ImportError:
Expand Down
2 changes: 1 addition & 1 deletion fastparquet/test/test_output.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from fastparquet import ParquetFile
from fastparquet import write, parquet_thrift, update_file_custom_metadata
from fastparquet import writer, encoding
from pandas._testing import makeMixedDataFrame
from .util import makeMixedDataFrame
from pandas.testing import assert_frame_equal
from pandas.api.types import CategoricalDtype
import pytest
Expand Down
80 changes: 36 additions & 44 deletions fastparquet/test/test_pd_optional_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,68 +3,59 @@
import numpy as np
import pandas as pd
from pandas.testing import assert_frame_equal
from pandas.core.arrays import IntegerArray
import fastparquet as fp
from .util import tempdir
from fastparquet import write, parquet_thrift
from fastparquet.parquet_thrift.parquet import ttypes as tt
import numpy.random as random


EXPECTED_SERIES_INT8 = pd.Series(random.uniform(low=-128, high=127,size=100)).round()
EXPECTED_SERIES_INT16 = pd.Series(random.uniform(low=-32768, high=32767,size=100)).round()
EXPECTED_SERIES_INT32 = pd.Series(random.uniform(low=-2147483648, high=2147483647,size=100)).round()
EXPECTED_SERIES_INT64 = pd.Series(random.uniform(low=-9223372036854775808, high=9223372036854775807,size=100)).round()
EXPECTED_SERIES_UINT8 = pd.Series(random.uniform(low=0, high=255,size=100)).round()
EXPECTED_SERIES_UINT16 = pd.Series(random.uniform(low=0, high=65535,size=100)).round()
EXPECTED_SERIES_UINT32 = pd.Series(random.uniform(low=0, high=4294967295,size=100)).round()
EXPECTED_SERIES_UINT64 = pd.Series(random.uniform(low=0, high=18446744073709551615,size=100)).round()
EXPECTED_SERIES_BOOL = pd.Series(random.choice([False, True], 100))
EXPECTED_SERIES_STRING = pd.Series(random.choice([
EXPECTED_SERIES_INT8 = random.uniform(low=-128, high=127, size=100).round()
EXPECTED_SERIES_INT16 = random.uniform(low=-32768, high=32767, size=100).round()
EXPECTED_SERIES_INT32 = random.uniform(low=-2147483648, high=2147483647, size=100).round()
EXPECTED_SERIES_INT64 = random.uniform(low=-9223372036854775808, high=9223372036854775807, size=100).round()
EXPECTED_SERIES_UINT8 = random.uniform(low=0, high=255, size=100).round()
EXPECTED_SERIES_UINT16 = random.uniform(low=0, high=65535, size=100).round()
EXPECTED_SERIES_UINT32 = random.uniform(low=0, high=4294967295, size=100).round()
EXPECTED_SERIES_UINT64 = random.uniform(low=0, high=18446744073709551615, size=100).round()
EXPECTED_SERIES_BOOL = random.choice([False, True], 100)
EXPECTED_SERIES_STRING = random.choice([
'You', 'are', 'my', 'fire',
'The', 'one', 'desire',
'Believe', 'when', 'I', 'say',
'I', 'want', 'it', 'that', 'way'
], 100))
], 100)


EXPECTED_SERIES_INT8.loc[20:30] = np.nan
EXPECTED_SERIES_INT16.loc[20:30] = np.nan
EXPECTED_SERIES_INT32.loc[20:30] = np.nan
EXPECTED_SERIES_INT64.loc[20:30] = np.nan
EXPECTED_SERIES_UINT8.loc[20:30] = np.nan
EXPECTED_SERIES_UINT16.loc[20:30] = np.nan
EXPECTED_SERIES_UINT32.loc[20:30] = np.nan
EXPECTED_SERIES_UINT64.loc[20:30] = np.nan
EXPECTED_SERIES_BOOL.loc[20:30] = np.nan
EXPECTED_SERIES_STRING.loc[20:30] = np.nan
EXPECTED_SERIES_INT8[20:30] = np.nan
EXPECTED_SERIES_INT16[20:30] = np.nan
EXPECTED_SERIES_INT32[20:30] = np.nan
EXPECTED_SERIES_INT64[20:30] = np.nan
EXPECTED_SERIES_UINT8[20:30] = np.nan
EXPECTED_SERIES_UINT16[20:30] = np.nan
EXPECTED_SERIES_UINT32[20:30] = np.nan
EXPECTED_SERIES_UINT64[20:30] = np.nan
EXPECTED_SERIES_BOOL[20:30] = np.nan
EXPECTED_SERIES_STRING[20:30] = np.nan
mask = EXPECTED_SERIES_UINT64 > -1


TEST = pd.DataFrame({
'int8': EXPECTED_SERIES_INT8.astype('Int8'),
'int16': EXPECTED_SERIES_INT16.astype('Int16'),
'int32': EXPECTED_SERIES_INT32.astype('Int32'),
'int64': EXPECTED_SERIES_INT64.astype('Int64'),
'uint8': EXPECTED_SERIES_UINT8.astype('UInt8'),
'uint16': EXPECTED_SERIES_UINT16.astype('UInt16'),
'uint32': EXPECTED_SERIES_UINT32.astype('UInt32'),
'uint64': EXPECTED_SERIES_UINT64.astype('UInt64'),
'bool': EXPECTED_SERIES_BOOL.astype('boolean'),
'string': EXPECTED_SERIES_STRING.astype('string')
'int8': pd.Series(pd.array(EXPECTED_SERIES_INT8, dtype='Int8')),
'int16': pd.Series(pd.array(EXPECTED_SERIES_INT16, dtype='Int16')),
'int32': pd.Series(pd.array(EXPECTED_SERIES_INT32, dtype='Int32')),
'int64': pd.Series(pd.array(EXPECTED_SERIES_INT64, dtype='Int64')),
'uint8': pd.Series(pd.array(EXPECTED_SERIES_UINT8, dtype='UInt8')),
'uint16': pd.Series(pd.array(EXPECTED_SERIES_UINT16, dtype='UInt16')),
'uint32': pd.Series(pd.array(EXPECTED_SERIES_UINT32, dtype='UInt32')),
'uint64': pd.Series(pd.array(EXPECTED_SERIES_UINT64, dtype='UInt64')),
'bool': pd.Series(pd.array(EXPECTED_SERIES_BOOL, dtype='boolean')),
'string': pd.Series(EXPECTED_SERIES_STRING, dtype='string')
})


EXPECTED = pd.DataFrame({
'int8': EXPECTED_SERIES_INT8.astype('float16'),
'int16': EXPECTED_SERIES_INT16.astype('float32'),
'int32': EXPECTED_SERIES_INT32.astype('float64'),
'int64': EXPECTED_SERIES_INT64.astype('float64'),
'uint8': EXPECTED_SERIES_UINT8.astype('float16'),
'uint16': EXPECTED_SERIES_UINT16.astype('float32'),
'uint32': EXPECTED_SERIES_UINT32.astype('float64'),
'uint64': EXPECTED_SERIES_UINT64.astype('float64'),
'bool': EXPECTED_SERIES_BOOL.astype('float16'),
'string': EXPECTED_SERIES_STRING
})
EXPECTED = TEST


EXPECTED_PARQUET_TYPES = {
Expand All @@ -80,7 +71,8 @@
'string': 'BYTE_ARRAY'
}

@pytest.mark.parametrize('comp', (None,'snappy', 'gzip'))

@pytest.mark.parametrize('comp', (None, 'snappy', 'gzip'))
@pytest.mark.parametrize('scheme', ('simple', 'hive'))
def test_write_nullable_columns(tempdir, scheme, comp):
fname = os.path.join(tempdir, 'test_write_nullable_columns.parquet')
Expand Down
17 changes: 17 additions & 0 deletions fastparquet/test/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import tempfile
import shutil

import pandas as pd

TEST_DATA = "test-data"

port = 5555
Expand Down Expand Up @@ -76,3 +78,18 @@ def tempdir():
yield d
if os.path.exists(d):
shutil.rmtree(d, ignore_errors=True)



def makeMixedDataFrame():
index = pd.Index(["a", "b", "c", "d", "e"], name="index")

data = {
"A": pd.Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype="float64"),
"B": pd.Series([0.0, 1.0, 0.0, 1.0, 0.0], dtype="float64"),
"C": pd.Series(["foo1", "foo2", "foo3", "foo4", "foo5"], dtype='object'),
"D": pd.bdate_range("1/1/2009", periods=5),
}
return pd.DataFrame(data=data)


Loading