Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into viviannguyen/sc-39372/…
Browse files Browse the repository at this point in the history
…wrap-aggregate-api-for-python-api
  • Loading branch information
ihnorton committed Mar 2, 2024
2 parents 5361950 + 42c8e3d commit c328590
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 8 deletions.
16 changes: 16 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,25 @@ jobs:
# 11.7 necessary due to: https://github.com/actions/setup-python/issues/682#issuecomment-1604261330
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-12' && contains(fromJSON('["3.7", "3.8"]'), matrix.python-version) && '11.7' || '11' }}
#MACOSX_DEPLOYMENT_TARGET: "10.11"
# On windows-2019 we are using the Visual Studio generator, which is multi-config and places the build artifacts in a subdirectory
steps:
- name: Checkout TileDB-Py `dev`
uses: actions/checkout@v3

# By default Visual Studio chooses the earliest installed toolset version
# for the main build and vcpkg chooses the latest. Force it to use the
# latest (14.39 currently).
- name: Setup MSVC toolset (VS 2022)
uses: TheMrMilchmann/setup-msvc-dev@v3
if: matrix.os == 'windows-latest'
with:
arch: x64
toolset: 14.39

- name: Install Ninja (VS 2022)
uses: seanmiddleditch/gha-setup-ninja@v4
if: matrix.os == 'windows-latest'

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
Expand Down Expand Up @@ -80,6 +95,7 @@ jobs:
fi
env:
TILEDB_FORCE_ALL_DEPS: True
CMAKE_GENERATOR: "Ninja"
if: matrix.os == 'windows-latest'

- name: "Download TileDB From Tarball And Build TileDB-Py (macOS)"
Expand Down
27 changes: 27 additions & 0 deletions .github/workflows/daily-test-build-numpy.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,36 @@ jobs:
# 11.7 necessary due to: https://github.com/actions/setup-python/issues/682#issuecomment-1604261330
#MACOSX_DEPLOYMENT_TARGET: "10.15"
MACOSX_DEPLOYMENT_TARGET: ${{ matrix.os == 'macos-11' && contains(fromJson('["3.7", "3.8"]'), matrix.python-version) && '11.7' || '11' }}
VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite'
steps:
- name: Checkout TileDB-Py `dev`
uses: actions/checkout@v3

# By default Visual Studio chooses the earliest installed toolset version
# for the main build and vcpkg chooses the latest. Force it to use the
# latest (14.39 currently).
- name: Setup MSVC toolset (VS 2022)
uses: TheMrMilchmann/setup-msvc-dev@v3
if: matrix.os == 'windows-latest'
with:
arch: x64
toolset: 14.39

- name: Install Ninja (VS 2022)
uses: seanmiddleditch/gha-setup-ninja@v4
if: matrix.os == 'windows-latest'

- name: Enable vcpkg binary caching
uses: actions/github-script@v6
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: "Set CMAKE_GENERATOR"
run: export CMAKE_GENERATOR="Ninja"
if: matrix.os == 'windows-latest'

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
Expand Down Expand Up @@ -67,6 +93,7 @@ jobs:

- name: Build TileDB-Py
run: |
echo "CMAKE_GENERATOR=$CMAKE_GENERATOR"
python setup.py build_ext --inplace --werror
python setup.py develop
Expand Down
30 changes: 30 additions & 0 deletions .github/workflows/daily-test-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ jobs:

test:
runs-on: ${{ matrix.os }}
defaults:
run:
shell: bash
strategy:
matrix:
os: [ubuntu-latest, macos-12, windows-latest]
Expand All @@ -39,8 +42,34 @@ jobs:
env:
TILEDB_VERSION: ${{ matrix.libtiledb_version }}
MACOSX_DEPLOYMENT_TARGET: "11"
VCPKG_BINARY_SOURCES: 'clear;x-gha,readwrite'

steps:
# By default Visual Studio chooses the earliest installed toolset version
# for the main build and vcpkg chooses the latest. Force it to use the
# latest (14.39 currently).
- name: Setup MSVC toolset (VS 2022)
uses: TheMrMilchmann/setup-msvc-dev@v3
if: matrix.os == 'windows-latest'
with:
arch: x64
toolset: 14.39

- name: Enable vcpkg binary caching
uses: actions/github-script@v6
with:
script: |
core.exportVariable('ACTIONS_CACHE_URL', process.env.ACTIONS_CACHE_URL || '');
core.exportVariable('ACTIONS_RUNTIME_TOKEN', process.env.ACTIONS_RUNTIME_TOKEN || '');
- name: Install Ninja (VS 2022)
uses: seanmiddleditch/gha-setup-ninja@v4
if: matrix.os == 'windows-latest'

- name: "Set CMAKE_GENERATOR"
run: echo "CMAKE_GENERATOR=Ninja" >> $GITHUB_ENV
if: matrix.os == 'windows-latest'

- name: Set up Python
uses: actions/setup-python@v2
with:
Expand Down Expand Up @@ -73,6 +102,7 @@ jobs:

- name: Build TileDB-Py
run: |
echo "CMAKE_GENERATOR=$CMAKE_GENERATOR"
python setup.py build_ext --inplace --werror
python setup.py develop
env:
Expand Down
6 changes: 5 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
# Allow to override TILEDB_FORCE_ALL_DEPS with environment variable
TILEDB_FORCE_ALL_DEPS = "TILEDB_FORCE_ALL_DEPS" in os.environ
TILEDB_DISABLE_SERIALIZATION = "TILEDB_DISABLE_SERIALIZATION" in os.environ
CMAKE_ARCHITECTURE = os.environ.get("CMAKE_ARCHITECTURE", None)
CMAKE_GENERATOR = os.environ.get("CMAKE_GENERATOR", None)

# Directory containing this file
Expand Down Expand Up @@ -240,7 +241,10 @@ def build_libtiledb(src_dir):
cmake_cmd.append("-DCMAKE_BUILD_TYPE={}".format(build_type))

if os.name == "nt":
cmake_cmd.extend(["-A", "x64", "-DMSVC_MP_FLAG=/MP4"])
cmake_cmd.extend(["-DMSVC_MP_FLAG=/MP4"])

if CMAKE_ARCHITECTURE:
cmake_cmd.extend(["-A", CMAKE_ARCHITECTURE])

if CMAKE_GENERATOR:
cmake_cmd.extend(["-G", CMAKE_GENERATOR])
Expand Down
2 changes: 1 addition & 1 deletion tiledb/filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def read(self, offset: int = 0, size: int = -1) -> bytes:

if size == -1:
size = len(self)
size = max(size - offset, 0)
size = min(size, len(self) - offset)

return lt.Filestore._buffer_export(
self._ctx,
Expand Down
25 changes: 23 additions & 2 deletions tiledb/tests/test_filestore.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,33 @@ def test_buffer(self, capfd):
schema.attr(0).dump()
assert_captured(capfd, "Type: BLOB")

data = b"buffer"

fs = tiledb.Filestore(path)
fs.write(data)
assert bytes(data) == fs.read()

def test_small_buffer(self, capfd):
path = self.path("test_small_buffer")
# create a 4 byte array
data = b"abcd"

fs = tiledb.Filestore(path)

with self.assertRaises(tiledb.TileDBError):
fs.write(data)

schema = tiledb.ArraySchema.from_file()
tiledb.Array.create(path, schema)

assert schema.attr(0).name == "contents"
assert schema.attr(0).dtype == np.bytes_

schema.attr(0).dump()
assert_captured(capfd, "Type: BLOB")

fs = tiledb.Filestore(path)
fs.write(data)
assert data[3:4] == fs.read(offset=3, size=1)

def test_uri(self, text_fname):
path = self.path("test_uri")
schema = tiledb.ArraySchema.from_file(text_fname)
Expand Down
48 changes: 44 additions & 4 deletions tiledb/tests/test_pandas_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import random
import string
import sys
import uuid

import numpy as np
Expand Down Expand Up @@ -380,6 +381,10 @@ def test_dataframe_categorical(self):
with tiledb.open(uri) as B:
tm.assert_frame_equal(df, B.df[:])

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_rt1(self):
def rand_dtype(dtype, size):
nbytes = size * np.dtype(dtype).itemsize
Expand Down Expand Up @@ -409,7 +414,12 @@ def rand_dtype(dtype, size):

csv_array_uri = os.path.join(uri, "tiledb_csv")
tiledb.from_csv(
csv_array_uri, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri,
csv_uri,
index_col=0,
parse_dates=[1],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
)

df_from_array = tiledb.open_dataframe(csv_array_uri)
Expand All @@ -420,7 +430,12 @@ def rand_dtype(dtype, size):
with tiledb.FileIO(tiledb.VFS(), csv_uri, "rb") as fio:
csv_array_uri2 = os.path.join(csv_array_uri + "_2")
tiledb.from_csv(
csv_array_uri2, csv_uri, index_col=0, parse_dates=[1], sparse=False
csv_array_uri2,
csv_uri,
index_col=0,
parse_dates=[1],
sparse=False,
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_from_array2 = tiledb.open_dataframe(csv_array_uri2)
Expand Down Expand Up @@ -677,6 +692,10 @@ def test_csv_dense(self):
tmp_array2 = os.path.join(tmp_dir, "array2")
tiledb.from_csv(tmp_array2, tmp_csv, sparse=False)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_col_to_sparse_dims(self):
df = make_dataframe_basic3(20)

Expand All @@ -697,6 +716,7 @@ def test_csv_col_to_sparse_dims(self):
sparse=True,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
)

df_bk = tiledb.open_dataframe(tmp_array)
Expand Down Expand Up @@ -734,6 +754,7 @@ def test_csv_col_to_sparse_dims(self):
tmp_csv2,
index_col=["int_vals"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=True,
allows_duplicates=True,
float_precision="round_trip",
Expand All @@ -748,6 +769,10 @@ def test_csv_col_to_sparse_dims(self):
cmp_df = df.set_index("int_vals").sort_values(by="time")
tm.assert_frame_equal(res_df, cmp_df)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_schema_only(self):
col_size = 10
df = make_dataframe_basic3(col_size)
Expand Down Expand Up @@ -784,6 +809,7 @@ def test_dataframe_csv_schema_only(self):
tmp_csv,
index_col=["time", "double_range"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
mode="schema_only",
capacity=1001,
sparse=True,
Expand Down Expand Up @@ -856,6 +882,10 @@ def test_dataframe_csv_schema_only(self):
df_bk.sort_index(level="time", inplace=True)
tm.assert_frame_equal(df_bk, df_combined)

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_dataframe_csv_chunked(self):
col_size = 200
df = make_dataframe_basic3(col_size)
Expand All @@ -876,7 +906,7 @@ def test_dataframe_csv_chunked(self):
tmp_csv,
index_col=["double_range"],
parse_dates=["time"],
date_spec={"time": "%Y-%m-%dT%H:%M:%S.%f"},
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=10,
sparse=True,
quotechar='"',
Expand All @@ -893,7 +923,12 @@ def test_dataframe_csv_chunked(self):
# Test dense chunked
tmp_array_dense = os.path.join(tmp_dir, "array_dense")
tiledb.from_csv(
tmp_array_dense, tmp_csv, parse_dates=["time"], sparse=False, chunksize=25
tmp_array_dense,
tmp_csv,
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
sparse=False,
chunksize=25,
)

with tiledb.open(tmp_array_dense) as A:
Expand Down Expand Up @@ -933,6 +968,10 @@ def test_dataframe_csv_chunked(self):
df_idx_res = A.query(coords=False).df[int(ned[0]) : int(ned[1])]
tm.assert_frame_equal(df_idx_res, df.reset_index(drop=True))

@pytest.mark.skipif(
sys.version_info < (3, 8),
reason="requires Python 3.8 or higher. date_format argument is not supported in 3.7 and below",
)
def test_csv_fillna(self):
if pytest.tiledb_vfs == "s3":
pytest.skip(
Expand Down Expand Up @@ -1016,6 +1055,7 @@ def check_array(path, df):
csv_paths,
index_col=["time"],
parse_dates=["time"],
date_format="%Y-%m-%d %H:%M:%S.%f",
chunksize=25,
sparse=True,
)
Expand Down

0 comments on commit c328590

Please sign in to comment.