Skip to content

Commit

Permalink
Add start, stop, and temporal_policy to FeatureVector and FeatureVect…
Browse files Browse the repository at this point in the history
…orArray (#549)
  • Loading branch information
jparismorgan authored Oct 15, 2024
1 parent 849b6e3 commit 36978a6
Show file tree
Hide file tree
Showing 10 changed files with 163 additions and 65 deletions.
7 changes: 6 additions & 1 deletion apis/python/src/tiledb/vector_search/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1661,7 +1661,12 @@ def ingest_type_erased(

ctx = vspy.Ctx(config)
data = vspy.FeatureVectorArray(
ctx, parts_array_uri, ids_array_uri, 0, to_temporal_policy(index_timestamp)
ctx,
parts_array_uri,
ids_array_uri,
0, # first_col
0, # last_col
to_temporal_policy(index_timestamp),
)
if index_type == "VAMANA":
index = vspy.IndexVamana(ctx, index_group_uri)
Expand Down
27 changes: 21 additions & 6 deletions apis/python/src/tiledb/vector_search/type_erased_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,22 @@ void init_type_erased_module(py::module_& m) {

py::class_<FeatureVector>(m, "FeatureVector", py::buffer_protocol())
.def(
py::init<const tiledb::Context&, const std::string&>(),
py::keep_alive<1, 2>() // IndexIVFFlat should keep ctx alive.
)
"__init__",
[](FeatureVector& instance,
const tiledb::Context& ctx,
const std::string& uri,
size_t first_col,
size_t last_col,
std::optional<TemporalPolicy> temporal_policy) {
new (&instance)
FeatureVector(ctx, uri, first_col, last_col, temporal_policy);
},
py::keep_alive<1, 2>(),
py::arg("ctx"),
py::arg("uri"),
py::arg("first_col") = 0,
py::arg("last_col") = 0,
py::arg("temporal_policy") = std::nullopt)
.def(py::init<size_t, const std::string&>())
.def(py::init<size_t, void*, const std::string&>())
.def("dimensions", &FeatureVector::dimensions)
Expand Down Expand Up @@ -244,16 +257,18 @@ void init_type_erased_module(py::module_& m) {
const tiledb::Context& ctx,
const std::string& uri,
const std::string& ids_uri,
size_t num_vectors,
size_t first_col,
size_t last_col,
std::optional<TemporalPolicy> temporal_policy) {
new (&instance) FeatureVectorArray(
ctx, uri, ids_uri, num_vectors, temporal_policy);
ctx, uri, ids_uri, first_col, last_col, temporal_policy);
},
py::keep_alive<1, 2>(), // FeatureVectorArray should keep ctx alive.
py::arg("ctx"),
py::arg("uri"),
py::arg("ids_uri") = "",
py::arg("num_vectors") = 0,
py::arg("first_col") = 0,
py::arg("last_col") = 0,
py::arg("temporal_policy") = std::nullopt)
.def(py::init<size_t, size_t, const std::string&, const std::string&>())
.def("dimensions", &FeatureVectorArray::dimensions)
Expand Down
46 changes: 34 additions & 12 deletions src/include/api/feature_vector.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,13 +86,20 @@ class FeatureVector {
* @param ctx
* @param uri
*/
FeatureVector(const tiledb::Context& ctx, const std::string& uri) {
auto array = tiledb_helpers::open_array(tdb_func__, ctx, uri, TILEDB_READ);
FeatureVector(
const tiledb::Context& ctx,
const std::string& uri,
size_t start = 0,
size_t end = 0,
std::optional<TemporalPolicy> temporal_policy_input = std::nullopt) {
auto temporal_policy = temporal_policy_input.value_or(TemporalPolicy{});
auto array = tiledb_helpers::open_array(
tdb_func__, ctx, uri, TILEDB_READ, temporal_policy);

feature_type_ = get_array_datatype(*array);
array->close();

tdb_vector_from_datatype(ctx, uri);
tdb_vector_from_datatype(ctx, uri, start, end, temporal_policy);
}

/*
Expand Down Expand Up @@ -126,25 +133,35 @@ class FeatureVector {
* Dispatch to the appropriate concrete class based on the datatype.
*/
void tdb_vector_from_datatype(
const tiledb::Context& ctx, const std::string& uri) {
const tiledb::Context& ctx,
const std::string& uri,
size_t start,
size_t end,
TemporalPolicy temporal_policy) {
switch (feature_type_) {
case TILEDB_FLOAT32:
vector_ = std::make_unique<vector_impl<tdbVector<float>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<float>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_INT8:
vector_ = std::make_unique<vector_impl<tdbVector<int8_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<int8_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT8:
vector_ = std::make_unique<vector_impl<tdbVector<uint8_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint8_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_INT32:
vector_ = std::make_unique<vector_impl<tdbVector<int32_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<int32_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT32:
vector_ = std::make_unique<vector_impl<tdbVector<uint32_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint32_t>>>(
ctx, uri, start, end, temporal_policy);
break;
case TILEDB_UINT64:
vector_ = std::make_unique<vector_impl<tdbVector<uint64_t>>>(ctx, uri);
vector_ = std::make_unique<vector_impl<tdbVector<uint64_t>>>(
ctx, uri, start, end, temporal_policy);
break;
default:
throw std::runtime_error("Unsupported attribute type");
Expand Down Expand Up @@ -202,8 +219,13 @@ class FeatureVector {
explicit vector_impl(size_t size)
: vector_(size) {
}
vector_impl(const tiledb::Context& ctx, const std::string& uri)
: vector_(ctx, uri) {
vector_impl(
const tiledb::Context& ctx,
const std::string& uri,
size_t start,
size_t end,
TemporalPolicy temporal_policy)
: vector_(ctx, uri, start, end, temporal_policy) {
}
//[[nodiscard]] void* data() override {
// return _cpo::data(vector_);
Expand Down
Loading

0 comments on commit 36978a6

Please sign in to comment.