Skip to content

Commit

Permalink
cleanup code
Browse files Browse the repository at this point in the history
  • Loading branch information
jparismorgan committed Oct 16, 2024
1 parent 0f36bc2 commit 023360f
Show file tree
Hide file tree
Showing 5 changed files with 6 additions and 28 deletions.
21 changes: 1 addition & 20 deletions apis/python/src/tiledb/vector_search/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -1855,7 +1855,6 @@ def ingest_vectors_udf(
partial_write_array_parts_uri = partial_write_array_group[PARTS_ARRAY_NAME].uri
partial_write_array_index_uri = partial_write_array_group[INDEX_ARRAY_NAME].uri

# start=0, end=100, batch=10
ctx = vspy.Ctx(config) if index_type == "IVF_PQ" else None
index = (
vspy.IndexIVFPQ(
Expand All @@ -1874,7 +1873,6 @@ def ingest_vectors_udf(
if part_end > end:
part_end = end

# part_id = 0
part_id = int(part / batch)

logger.debug("Input vectors start_pos: %d, end_pos: %d", part, part_end)
Expand Down Expand Up @@ -2096,16 +2094,6 @@ def compute_partition_indexes_udf(
with tiledb.scope_ctx(ctx_or_config=config):
group = tiledb.Group(index_group_uri)
index_array_uri = group[INDEX_ARRAY_NAME].uri
# TODO(paris): We will need to adjust this, as Python does:
# PARTIAL_WRITE_ARRAY_DIR = (
# storage_formats[storage_version]["PARTIAL_WRITE_ARRAY_DIR"]
# + "_"
# + "".join(random.choices(string.ascii_letters, k=10))
# )
# But in C++ we'll have a different URI.
# In python they do it in case a previous ingestio fails / does not complete, the old
# URI will be sitting there.
# https://github.com/TileDB-Inc/TileDB-Vector-Search/pull/357
partial_write_array_dir_uri = group[PARTIAL_WRITE_ARRAY_DIR].uri
partial_write_array_group = tiledb.Group(partial_write_array_dir_uri)
partial_index_array_uri = partial_write_array_group[INDEX_ARRAY_NAME].uri
Expand Down Expand Up @@ -2166,8 +2154,6 @@ def ivf_pq_consolidate_partition_udf(
partitions, work_items, partition_id_start, partition_id_end, batch
)

# Reads from a set of input ranges and writes a set of output ranges. For every array you need
# you need to do the same thing. We can do this in C++.
def consolidate_partition_udf(
index_group_uri: str,
partitions: int,
Expand Down Expand Up @@ -2923,8 +2909,6 @@ def scale_resources(min_resource, max_resource, max_input_size, input_size):
ingest_nodes.append(ingest_additions_node)

work_items = len(ingest_nodes) * input_vectors_work_items_per_worker
# Computes the sizes for each partitions (sizes for each parition as well as the indexes).
# Can leave this.
compute_indexes_node = submit(
compute_partition_indexes_udf,
index_group_uri=index_group_uri,
Expand Down Expand Up @@ -3001,9 +2985,6 @@ def consolidate_and_vacuum(
We also don't consolidate type-erased indexes because they are only written once. If we add
distributed ingestion we should write a C++ method to consolidate them.
"""

# TODO: We need to do this for IVF_PQ as part of these changes.

with tiledb.Group(index_group_uri) as group:
write_group = tiledb.Group(index_group_uri, "w")

Expand Down Expand Up @@ -3089,7 +3070,7 @@ def consolidate_and_vacuum(
uri=index_group_uri,
dimensions=dimensions,
vector_type=vector_type,
num_subspaces=int(num_subspaces),
num_subspaces=num_subspaces,
partitions=partitions,
config=config,
storage_version=storage_version,
Expand Down
2 changes: 1 addition & 1 deletion src/include/api/ivf_pq_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ class IndexIVFPQ {
size_t start,
size_t end,
size_t partition_start) = 0;
// virtual void ingest(const FeatureVectorArray& input_vectors) = 0;

virtual void ingest(
const FeatureVectorArray& input_vectors,
const FeatureVector& external_ids) = 0;
Expand Down
1 change: 0 additions & 1 deletion src/include/index/ivf_pq_group.h
Original file line number Diff line number Diff line change
Expand Up @@ -339,7 +339,6 @@ class ivf_pq_group : public base_index_group<index_type> {

create_temp_data_group();

// Store the metadata if all the arrays were created successfully
metadata_.store_metadata(write_group);
}

Expand Down
5 changes: 3 additions & 2 deletions src/include/test/unit_api_ivf_pq_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -201,8 +201,8 @@ TEST_CASE("create empty index and then train and query", "[api_ivf_pq_index]") {
CHECK(index.partitioning_index_type_string() == partitioning_index_type);
CHECK(index.distance_metric() == DistanceMetric::SUM_OF_SQUARES);

// Make sure we can query with k_factor > 1 on an empty index that is not
// loaded by URI.
// Make sure we can query with k_factor > 1 on an empty index that is not
// loaded by URI.
size_t top_k = 1;
size_t nprobe = 1;
float k_factor = 2.f;
Expand Down Expand Up @@ -976,6 +976,7 @@ TEST_CASE("write and load index with timestamps", "[api_ivf_pq_index]") {
all_ingestion_timestamps.end(),
std::vector<uint64_t>{99, 100}.begin()));
}

// Load it at timestamp 5 (before ingestion) and make sure we can query and be
// returned fill values.
for (auto upper_bound : std::vector<size_t>{0, 4}) {
Expand Down
5 changes: 1 addition & 4 deletions src/include/test/unit_ivf_pq_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,6 @@ TEST_CASE("ivf_pq_index write and read", "[ivf_pq_index]") {
idx.train(
training_set, partitions, TemporalPolicy(TimeTravel, write_timestamp));
idx.ingest(training_set, ids);
// idx.write_index(
// ctx, index_uri, TemporalPolicy(TimeTravel, write_timestamp));
}

{
Expand Down Expand Up @@ -585,7 +583,6 @@ TEST_CASE("query simple", "[ivf_pq_index]") {
auto queries =
ColMajorMatrix<feature_type>{{{value, value, value, value}}};
auto&& [scores, ids] = index.query(queries, k_nn, nprobe);

CHECK(scores(0, 0) == 0);
CHECK(ids(0, 0) == i * 11);
}
Expand Down Expand Up @@ -709,6 +706,7 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
CHECK(
k_nn == check_single_vector_num_equal<uint32_t>(ids_reranking, ids));
CHECK(scores_reranking(0, 0) == 0);

auto&& [scores_no_reranking, ids_no_reranking] =
index_infinite.query(queries, k_nn, nprobe, 1.f);
CHECK(
Expand All @@ -728,7 +726,6 @@ TEST_CASE("k_factor", "[ivf_pq_index]") {
CHECK(
k_nn == check_single_vector_num_equal<uint32_t>(ids_reranking, ids));
CHECK(scores_reranking(0, 0) == 0);

auto&& [scores_no_reranking, ids_no_reranking] =
index_finite.query(queries, k_nn, nprobe, 1.f);
CHECK(
Expand Down

0 comments on commit 023360f

Please sign in to comment.