Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add serialization and API changes for post_array_schema_from_rest. #5237

Merged
merged 11 commits into from
Aug 27, 2024
2 changes: 1 addition & 1 deletion test/src/unit-capi-config.cc
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void check_save_to_file() {
ss << "rest.curl.buffer_size 524288\n";
ss << "rest.curl.verbose false\n";
ss << "rest.http_compressor any\n";
ss << "rest.load_enumerations_on_array_open true\n";
ss << "rest.load_enumerations_on_array_open false\n";
ss << "rest.load_metadata_on_array_open true\n";
ss << "rest.load_non_empty_domain_on_array_open true\n";
Comment on lines +233 to 235
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are these options restricted to REST only? Especially the first two have use cases outside of it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah also crossed my mind while working on this.. I didn't introduce any new config options in this PR but maybe sm.load_X_on_array_open or sm.array_open.load_X would be more fitting for these? I think they were each introduced to tune REST behavior and that's probably where the names originated.

I opened SC-53109 to consider renaming them, probably best to handle the renaming separately.

ss << "rest.retry_count 25\n";
Expand Down
63 changes: 63 additions & 0 deletions test/src/unit-enumerations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1120,6 +1120,69 @@ TEST_CASE_METHOD(
REQUIRE(schema->is_enumeration_loaded("test_enmr") == true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - All Schemas",
"[enumeration][array][load-all-enumerations][all-schemas]") {
create_array();
auto array = get_array(QueryType::READ);
auto schema = array->array_schema_latest_ptr();
REQUIRE(schema->is_enumeration_loaded("test_enmr") == false);
std::string schema_name_1 = schema->name();

// Evolve once to add an enumeration.
auto ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
std::vector<std::string> var_values{"one", "two", "three"};
auto var_enmr = create_enumeration(
var_values, false, Datatype::STRING_ASCII, "ase_var_enmr");
ase->add_enumeration(var_enmr);
auto attr4 = make_shared<Attribute>(HERE(), "attr4", Datatype::UINT16);
attr4->set_enumeration_name("ase_var_enmr");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
auto all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_2 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);

// Evolve a second time to drop an enumeration.
ase = make_shared<ArraySchemaEvolution>(HERE(), memory_tracker_);
ase->drop_enumeration("test_enmr");
ase->drop_attribute("attr1");
CHECK_NOTHROW(ase->evolve_schema(schema));
// Apply evolution to the array and reopen.
CHECK_NOTHROW(Array::evolve_array_schema(
ctx_.resources(), uri_, ase.get(), array->get_encryption_key()));
CHECK(array->reopen().ok());
CHECK_NOTHROW(array->load_all_enumerations());
all_schemas = array->array_schemas_all();
schema = array->array_schema_latest_ptr();
std::string schema_name_3 = schema->name();

// Check all schemas.
CHECK(all_schemas[schema_name_1]->is_enumeration_loaded("test_enmr") == true);
CHECK(all_schemas[schema_name_2]->is_enumeration_loaded("test_enmr") == true);
CHECK(
all_schemas[schema_name_2]->is_enumeration_loaded("ase_var_enmr") ==
true);
CHECK_THROWS_WITH(
all_schemas[schema_name_3]->is_enumeration_loaded("test_enmr"),
Catch::Matchers::ContainsSubstring("No enumeration named"));
CHECK(
all_schemas[schema_name_3]->is_enumeration_loaded("ase_var_enmr") ==
true);
}

TEST_CASE_METHOD(
EnumerationFx,
"Array - Load All Enumerations - Repeated",
Expand Down
114 changes: 100 additions & 14 deletions test/src/unit-request-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#ifdef TILEDB_SERIALIZATION

#include "test/support/src/helpers.h"
#include "test/support/src/mem_helpers.h"
#include "test/support/tdb_catch.h"
#include "tiledb/api/c_api/buffer/buffer_api_internal.h"
Expand All @@ -41,6 +42,7 @@
#include "tiledb/sm/c_api/tiledb_serialization.h"
#include "tiledb/sm/c_api/tiledb_struct_def.h"
#include "tiledb/sm/cpp_api/tiledb"
#include "tiledb/sm/cpp_api/tiledb_experimental"
#include "tiledb/sm/crypto/encryption_key.h"
#include "tiledb/sm/enums/array_type.h"
#include "tiledb/sm/enums/encryption_type.h"
Expand All @@ -67,6 +69,7 @@ struct RequestHandlerFx {
Config cfg_;
Context ctx_;
EncryptionKey enc_key_;
shared_ptr<ArraySchema> schema_;
};

struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
Expand All @@ -75,11 +78,17 @@ struct HandleLoadArraySchemaRequestFx : RequestHandlerFx {
}

virtual shared_ptr<ArraySchema> create_schema() override;
shared_ptr<ArraySchema> call_handler(

std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype);

shared_ptr<const Enumeration> create_string_enumeration(
std::string name, std::vector<std::string>& values);

shared_ptr<ArraySchema> schema_add_attribute(const std::string& attr_name);
};

struct HandleQueryPlanRequestFx : RequestHandlerFx {
Expand Down Expand Up @@ -116,15 +125,23 @@ struct HandleConsolidationPlanRequestFx : RequestHandlerFx {

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - default request",
"tiledb_handle_load_array_schema_request - no enumerations",
"[request_handler][load_array_schema][default]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(false), stype);
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 0);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
Expand All @@ -134,12 +151,57 @@ TEST_CASE_METHOD(
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);

create_array();
auto schema =
call_handler(serialization::LoadArraySchemaRequest(true), stype);
REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", "true").ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
tiledb::test::schema_equiv(*schema, *schema_);

// We did not evolve the schema so there should only be one.
auto all_schemas = std::get<1>(schema_response);
REQUIRE(all_schemas.size() == 1);
tiledb::test::schema_equiv(
*all_schemas.find(schema->name())->second, *schema_);
}

TEST_CASE_METHOD(
HandleLoadArraySchemaRequestFx,
"tiledb_handle_load_array_schema_request - multiple schemas",
"[request_handler][load_array_schema][schema-evolution]") {
auto stype = GENERATE(SerializationType::JSON, SerializationType::CAPNP);
std::string load_enums = GENERATE("true", "false");

create_array();

std::vector<shared_ptr<ArraySchema>> all_schemas{schema_};
all_schemas.push_back(schema_add_attribute("b"));
all_schemas.push_back(schema_add_attribute("c"));
all_schemas.push_back(schema_add_attribute("d"));

REQUIRE(cfg_.set("rest.load_enumerations_on_array_open", load_enums).ok());
auto schema_response =
call_handler(serialization::LoadArraySchemaRequest(cfg_), stype);
auto schema = std::get<0>(schema_response);
if (load_enums == "true") {
REQUIRE(schema->has_enumeration("enmr"));
REQUIRE(schema->get_loaded_enumeration_names().size() == 1);
REQUIRE(schema->get_loaded_enumeration_names()[0] == "enmr");
REQUIRE(schema->get_enumeration("enmr") != nullptr);
}
// The latest schema should be equal to the last applied evolution.
tiledb::test::schema_equiv(*schema, *all_schemas.back());

// Validate schemas returned from the request in the order they were created.
auto r_all_schemas = std::get<1>(schema_response);
std::map<std::string, shared_ptr<ArraySchema>> resp(
r_all_schemas.begin(), r_all_schemas.end());
for (int i = 0; const auto& s : resp) {
tiledb::test::schema_equiv(*s.second, *all_schemas[i++]);
}
}

TEST_CASE_METHOD(
Expand Down Expand Up @@ -346,7 +408,9 @@ TEST_CASE_METHOD(
RequestHandlerFx::RequestHandlerFx(const std::string uri)
: memory_tracker_(tiledb::test::create_test_memory_tracker())
, uri_(uri)
, ctx_(cfg_) {
, ctx_(cfg_)
, schema_(make_shared<ArraySchema>(
ArrayType::DENSE, ctx_.resources().ephemeral_memory_tracker())) {
delete_array();
throw_if_not_ok(enc_key_.set_key(EncryptionType::NO_ENCRYPTION, nullptr, 0));
}
Expand Down Expand Up @@ -405,9 +469,28 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
tiledb::test::create_test_memory_tracker());
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::schema_add_attribute(
const std::string& attr_name) {
tiledb::Context ctx;
tiledb::ArraySchemaEvolution ase(ctx);
auto attr = tiledb::Attribute::create<int32_t>(ctx, attr_name);
ase.add_attribute(attr);
// Evolve and update the original schema member variable.
schema_ = ase.ptr()->array_schema_evolution_->evolve_schema(schema_);
// Apply the schema evolution.
Array::evolve_array_schema(
this->ctx_.resources(),
this->uri_,
ase.ptr()->array_schema_evolution_,
this->enc_key_);

// Return the new evolved schema for validation.
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {
// Create a schema to serialize
auto schema =
schema_ =
make_shared<ArraySchema>(HERE(), ArrayType::SPARSE, memory_tracker_);
auto dim =
make_shared<Dimension>(HERE(), "dim1", Datatype::INT32, memory_tracker_);
Expand All @@ -416,20 +499,23 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::create_schema() {

auto dom = make_shared<Domain>(HERE(), memory_tracker_);
throw_if_not_ok(dom->add_dimension(dim));
throw_if_not_ok(schema->set_domain(dom));
throw_if_not_ok(schema_->set_domain(dom));

std::vector<std::string> values = {"pig", "cow", "chicken", "dog", "cat"};
auto enmr = create_string_enumeration("enmr", values);
schema->add_enumeration(enmr);
schema_->add_enumeration(enmr);

auto attr = make_shared<Attribute>(HERE(), "attr", Datatype::INT32);
attr->set_enumeration_name("enmr");
throw_if_not_ok(schema->add_attribute(attr));
throw_if_not_ok(schema_->add_attribute(attr));

return schema;
return schema_;
}

shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
std::tuple<
shared_ptr<ArraySchema>,
std::unordered_map<std::string, shared_ptr<ArraySchema>>>
HandleLoadArraySchemaRequestFx::call_handler(
serialization::LoadArraySchemaRequest req, SerializationType stype) {
// If this looks weird, its because we're using the public C++ API to create
// these objets instead of the internal APIs elsewhere in this test suite.
Expand All @@ -451,7 +537,7 @@ shared_ptr<ArraySchema> HandleLoadArraySchemaRequestFx::call_handler(
REQUIRE(rval == TILEDB_OK);

return serialization::deserialize_load_array_schema_response(
stype, resp_buf->buffer(), memory_tracker_);
uri_, stype, resp_buf->buffer(), memory_tracker_);
}

shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {
Expand Down
20 changes: 20 additions & 0 deletions test/support/src/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1621,6 +1621,26 @@ void read_sparse_v11(
tiledb_query_free(&query);
}

void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2) {
CHECK(schema1.array_type() == schema2.array_type());
CHECK(schema1.attributes().size() == schema2.attributes().size());
for (unsigned int i = 0; i < schema2.attribute_num(); i++) {
auto a = schema1.attribute(i);
auto b = schema2.attribute(i);
CHECK(a->cell_val_num() == b->cell_val_num());
CHECK(a->name() == b->name());
CHECK(a->type() == b->type());
CHECK(a->nullable() == b->nullable());
CHECK(a->get_enumeration_name() == b->get_enumeration_name());
}
CHECK(schema1.capacity() == schema2.capacity());
CHECK(schema1.cell_order() == schema2.cell_order());
CHECK(schema1.tile_order() == schema2.tile_order());
CHECK(schema1.allows_dups() == schema2.allows_dups());
CHECK(schema1.array_uri().to_string() == schema2.array_uri().to_string());
}

template void check_subarray<int8_t>(
tiledb::sm::Subarray& subarray, const SubarrayRanges<int8_t>& ranges);
template void check_subarray<uint8_t>(
Expand Down
9 changes: 9 additions & 0 deletions test/support/src/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -957,6 +957,15 @@ void write_sparse_v11(
*/
void read_sparse_v11(
tiledb_ctx_t* ctx, const std::string& array_name, uint64_t timestamp);

/**
* Helper function to test two array schemas are equivalent.
*
* @param schema1 Expected array schema.
* @param schema2 Actual array schema.
*/
void schema_equiv(
const sm::ArraySchema& schema1, const sm::ArraySchema& schema2);
} // namespace tiledb::test

#endif
Loading
Loading