Skip to content
Open
196 changes: 186 additions & 10 deletions test/src/unit-enumerations.cc

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion test/src/unit-request-handlers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
}

return Enumeration::create(
ctx_.resources(),
name,
Datatype::STRING_ASCII,
constants::var_num,
Expand All @@ -473,6 +474,7 @@ HandleLoadArraySchemaRequestFx::create_string_enumeration(
total_size,
offsets.data(),
offsets.size() * sizeof(uint64_t),
true,
tiledb::test::create_test_memory_tracker());
}

Expand Down Expand Up @@ -546,7 +548,7 @@ HandleLoadArraySchemaRequestFx::call_handler(
REQUIRE(rval == TILEDB_OK);

return serialization::deserialize_load_array_schema_response(
uri_, cfg_, stype, resp_buf->buffer(), memory_tracker_);
ctx_.resources(), uri_, cfg_, stype, resp_buf->buffer(), memory_tracker_);
}

shared_ptr<ArraySchema> HandleQueryPlanRequestFx::create_schema() {
Expand Down
5 changes: 5 additions & 0 deletions tiledb/api/c_api/enumeration/enumeration_api.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ capi_return_t tiledb_enumeration_alloc(
memory_tracker->set_type(tiledb::sm::MemoryTrackerType::ENUMERATION_CREATE);

*enumeration = tiledb_enumeration_handle_t::make_handle(
ctx->context().resources(),
std::string(name),
datatype,
cell_val_num,
Expand All @@ -79,7 +80,11 @@ capi_return_t tiledb_enumeration_alloc(
data_size,
offsets,
offsets_size,
false,
memory_tracker);

// wait for the value map to finish in case it throws
(*enumeration)->enumeration()->value_map();
} catch (...) {
*enumeration = nullptr;
throw;
Expand Down
1 change: 1 addition & 0 deletions tiledb/api/c_api/enumeration/enumeration_api_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
#define TILEDB_CAPI_ENUMERATION_INTERNAL_H

#include "enumeration_api_experimental.h"
#include "tiledb/api/c_api/context/context_api_internal.h"
#include "tiledb/api/c_api_support/handle/handle.h"
#include "tiledb/common/common.h"
#include "tiledb/sm/array_schema/enumeration.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,14 @@

#include "../enumeration_api_internal.h"
#include "tiledb/sm/enums/datatype.h"
#include "tiledb/sm/storage_manager/context.h"

int main() {
tiledb::sm::Config cfg;
tiledb::sm::Context ctx(cfg);
try {
tiledb_enumeration_handle_t e{
ctx.resources(),
"fooo",
tiledb::sm::Datatype::INT32,
1,
Expand All @@ -40,6 +44,7 @@ int main() {
0,
nullptr,
0,
false,
nullptr};
} catch (...) {
}
Expand Down
7 changes: 5 additions & 2 deletions tiledb/sm/array/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -336,7 +336,7 @@ Status Array::open_without_fragments(
throw instead of return Status */
if (!use_refactored_array_open()) {
auto&& [st, array_schema_latest] =
rest_client->get_array_schema_from_rest(array_uri_);
rest_client->get_array_schema_from_rest(resources_, array_uri_);
if (!st.ok()) {
throw StatusException(st);
}
Expand Down Expand Up @@ -499,7 +499,7 @@ Status Array::open(
}
if (!use_refactored_array_open()) {
auto&& [st, array_schema_latest] =
rest_client->get_array_schema_from_rest(array_uri_);
rest_client->get_array_schema_from_rest(resources_, array_uri_);
throw_if_not_ok(st);
set_array_schema_latest(array_schema_latest.value());
if (serialize_enumerations()) {
Expand Down Expand Up @@ -850,6 +850,7 @@ Array::get_enumerations_all_schemas() {
// Pass an empty list of enumeration names. REST will use timestamps to
// load all enumerations on all schemas for the array within that range.
ret = rest_client->post_enumerations_from_rest(
resources_,
array_uri_,
array_dir_timestamp_start_,
array_dir_timestamp_end_,
Expand Down Expand Up @@ -949,6 +950,7 @@ std::vector<shared_ptr<const Enumeration>> Array::get_enumerations(
}

loaded = rest_client->post_enumerations_from_rest(
resources_,
array_uri_,
array_dir_timestamp_start_,
array_dir_timestamp_end_,
Expand Down Expand Up @@ -2137,6 +2139,7 @@ void load_enumeration_into_schema(
}

auto ret = rest_client->post_enumerations_from_rest(
ctx.resources(),
array_schema.array_uri(),
array_schema.timestamp_start(),
array_schema.timestamp_end(),
Expand Down
4 changes: 4 additions & 0 deletions tiledb/sm/array/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -746,6 +746,10 @@ class Array {
return config_;
}

inline const ContextResources& resources() const {
return resources_;
}

/** Directly set the array URI for serialized compatibility with pre
* TileDB 2.5 clients */
void set_uri_serialized(const std::string& uri) {
Expand Down
2 changes: 1 addition & 1 deletion tiledb/sm/array/array_directory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1344,7 +1344,7 @@ shared_ptr<const Enumeration> ArrayDirectory::load_enumeration(
}

Deserializer deserializer(tile->data(), tile->size());
return Enumeration::deserialize(deserializer, memory_tracker);
return Enumeration::deserialize(resources_, deserializer, memory_tracker);
}

} // namespace tiledb::sm
5 changes: 4 additions & 1 deletion tiledb/sm/array_schema/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,10 @@ conclude(object_library)
#
commence(object_library enumeration)
this_target_sources(enumeration.cc)
this_target_object_libraries(buffer constants seedable_global_PRNG)
this_target_object_libraries(buffer constants context_resources seedable_global_PRNG)
if(TILEDB_STATS)
this_target_compile_definitions(-DTILEDB_STATS)
endif()
conclude(object_library)

#
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/array_schema/array_schema_operations.cc
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ shared_ptr<ArraySchema> load_array_schema(
if (uri.is_tiledb()) {
auto& rest_client = ctx.rest_client();
auto&& [st, array_schema_response] =
rest_client.get_array_schema_from_rest(uri);
rest_client.get_array_schema_from_rest(ctx.resources(), uri);
throw_if_not_ok(st);
auto array_schema = std::move(array_schema_response).value();

Expand All @@ -254,6 +254,7 @@ shared_ptr<ArraySchema> load_array_schema(
// Pass an empty list of enumeration names. REST will use timestamps to
// load all enumerations on all schemas for the array within that range.
auto ret = rest_client.post_enumerations_from_rest(
ctx.resources(),
uri,
array_schema->timestamp_start(),
array_schema->timestamp_end(),
Expand Down
77 changes: 62 additions & 15 deletions tiledb/sm/array_schema/enumeration.cc
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
#include <iostream>
#include <sstream>

#include "tiledb/common/logger.h"
#include "tiledb/common/memory_tracker.h"
#include "tiledb/common/random/random_label.h"

Expand All @@ -49,6 +50,7 @@ class EnumerationException : public StatusException {
};

Enumeration::Enumeration(
const ContextResources& resources,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't like a "data class" like Enumeration using a "behavior class" like ContextResources; it will make removing the Context dependency on the C API harder.

const std::string& name,
const std::string& path_name,
Datatype type,
Expand All @@ -58,8 +60,10 @@ Enumeration::Enumeration(
uint64_t data_size,
const void* offsets,
uint64_t offsets_size,
bool async,
shared_ptr<MemoryTracker> memory_tracker)
: memory_tracker_(memory_tracker)
: resources_(resources)
, memory_tracker_(memory_tracker)
, name_(name)
, path_name_(path_name)
, type_(type)
Expand Down Expand Up @@ -94,6 +98,7 @@ Enumeration::Enumeration(
if (data_empty && offsets_empty) {
// This is an empty enumeration so we're done checking for argument
// validity.
value_map_status_.emplace(Status::Ok());
return;
}

Expand Down Expand Up @@ -191,11 +196,36 @@ Enumeration::Enumeration(
throw_if_not_ok(offsets_.write(offsets, 0, offsets_size));
}

generate_value_map();
if (async) {
tiledb::common::ThreadPool::Task value_map_future =
resources.compute_tp().async(
[](std::reference_wrapper<Enumeration> enumeration) {
enumeration.get().generate_value_map();
return Status::Ok();
},
std::reference_wrapper<Enumeration>(*this));
value_map_future_ = std::move(value_map_future);
Comment on lines +200 to +207
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
tiledb::common::ThreadPool::Task value_map_future =
resources.compute_tp().async(
[](std::reference_wrapper<Enumeration> enumeration) {
enumeration.get().generate_value_map();
return Status::Ok();
},
std::reference_wrapper<Enumeration>(*this));
value_map_future_ = std::move(value_map_future);
value_map_future_ =
resources.compute_tp().async(
[](std::reference_wrapper<Enumeration> enumeration) {
enumeration.get().generate_value_map();
return Status::Ok();
},
std::reference_wrapper<Enumeration>(*this));

(and format)

} else {
generate_value_map();
value_map_status_.emplace(Status::Ok());
}
}

Enumeration::~Enumeration() {
if (value_map_future_.valid()) {
auto st = value_map_future_.wait();
if (!st.ok()) {
tiledb::common::global_logger().warn(
"Enumeration::~Enumeration(): Error computing value map: " +
st.to_string());
}
}
}
Comment on lines +214 to 223
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wouldn't log the warning; it's not important to tell the user that an operation they did not care for failed.


shared_ptr<const Enumeration> Enumeration::deserialize(
Deserializer& deserializer, shared_ptr<MemoryTracker> memory_tracker) {
const ContextResources& resources,
Deserializer& deserializer,
shared_ptr<MemoryTracker> memory_tracker) {
auto disk_version = deserializer.read<uint32_t>();
if (disk_version > constants::enumerations_version) {
throw EnumerationException(
Expand Down Expand Up @@ -233,6 +263,7 @@ shared_ptr<const Enumeration> Enumeration::deserialize(
}

return create(
resources,
name,
path_name,
static_cast<Datatype>(type),
Expand All @@ -242,6 +273,7 @@ shared_ptr<const Enumeration> Enumeration::deserialize(
data_size,
offsets,
offsets_size,
true,
memory_tracker);
}

Expand Down Expand Up @@ -326,6 +358,7 @@ shared_ptr<const Enumeration> Enumeration::extend(
}

return create(
resources_,
name_,
"",
type_,
Expand All @@ -335,6 +368,7 @@ shared_ptr<const Enumeration> Enumeration::extend(
new_data.size(),
new_offsets_ptr,
new_offsets_size,
false,
memory_tracker_);
}

Expand Down Expand Up @@ -413,15 +447,36 @@ void Enumeration::serialize(Serializer& serializer) const {
uint64_t Enumeration::index_of(const void* data, uint64_t size) const {
std::string_view value_view(static_cast<const char*>(data), size);

auto iter = value_map_.find(value_view);
if (iter == value_map_.end()) {
const auto& values = value_map();
auto iter = values.find(value_view);
if (iter == values.end()) {
return constants::enumeration_missing_value;
}

return iter->second;
}

/**
* Add a value to a value map. Checks for duplicates.
*
* @param sv A string view of the data to add.
* @param index The index of the data in the Enumeration.
*/
static void add_value_to_map(
Enumeration::EnumerationValueMap& value_map,
std::string_view& sv,
uint64_t index) {
const auto res = value_map.emplace(sv, index);
if (!res.second) {
throw EnumerationException(
"Invalid duplicated value in enumeration '" + std::string(sv) + "'");
}
}

void Enumeration::generate_value_map() {
auto timer =
resources_.stats().start_timer("Enumeration::generate_value_map");

auto char_data = data_.data_as<char>();
if (var_size()) {
auto offsets = offsets_.data_as<uint64_t>();
Expand All @@ -438,7 +493,7 @@ void Enumeration::generate_value_map() {
}

auto sv = std::string_view(char_data + offsets[i], length);
add_value_to_map(sv, i);
add_value_to_map(value_map_, sv, i);
}
} else {
uint64_t i = 0;
Expand All @@ -448,20 +503,12 @@ void Enumeration::generate_value_map() {

while (i * stride < data_.size()) {
auto sv = std::string_view(char_data + i * stride, stride);
add_value_to_map(sv, i);
add_value_to_map(value_map_, sv, i);
i += 1;
}
}
}

void Enumeration::add_value_to_map(std::string_view& sv, uint64_t index) {
const auto res = value_map_.emplace(sv, index);
if (!res.second) {
throw EnumerationException(
"Invalid duplicated value in enumeration '" + std::string(sv) + "'");
}
}

} // namespace tiledb::sm

std::ostream& operator<<(
Expand Down
Loading
Loading