Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Extending simplified PQ functions #520

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions include/pq.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,25 @@ DISKANN_DLLEXPORT int generate_opq_pivots(const float *train_data, size_t num_tr
unsigned num_pq_chunks, std::string opq_pivots_path,
bool make_zero_mean = false);

DISKANN_DLLEXPORT int generate_pq_pivots_simplified(const float *train_data, size_t num_train, size_t dim,
size_t num_pq_chunks, std::vector<float> &pivot_data_vector);

DISKANN_DLLEXPORT int generate_pq_pivots_simplified(float *train_data, size_t num_train, size_t dim,
size_t num_pq_chunks, std::vector<float> &pivot_data_vector,
std::vector<float> &centroids, std::vector<uint32_t> &offsets,
const bool make_zero_mean = true,
const uint32_t kmeans_iters_for_pq = 15);

template <typename T>
int generate_pq_data_from_pivots(const std::string &data_file, unsigned num_centers, unsigned num_pq_chunks,
const std::string &pq_pivots_path, const std::string &pq_compressed_vectors_path,
bool use_opq = false);

DISKANN_DLLEXPORT int generate_pq_data_from_pivots_simplified(const float *data, const size_t num,
const float *pivot_data, const size_t pivots_num,
const size_t dim, const size_t num_pq_chunks,
std::vector<uint8_t> &pq);
using PQ_DATA_TYPE = uint8_t;
DISKANN_DLLEXPORT int generate_pq_data_from_pivots_simplified(float *data, const size_t num, const float *pivot_data,
const size_t pivots_num, const size_t dim,
const size_t num_pq_chunks,
const std::vector<float> &centroids,
const std::vector<uint32_t> &offsets,
std::vector<PQ_DATA_TYPE> &pq);

template <typename T>
void generate_disk_quantized_data(const std::string &data_file_to_use, const std::string &disk_pq_pivots_path,
Expand Down
128 changes: 100 additions & 28 deletions src/pq.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -348,39 +348,92 @@ void pq_dist_lookup(const uint8_t *pq_ids, const size_t n_pts, const size_t pq_n
// Input is provided in the in-memory buffer train_data.
// Output is stored in the in-memory buffer pivot_data_vector.
// Simplification is based on the following assumptions:
// dim % num_pq_chunks == 0
// num_centers == 256 by default
// KMEANS_ITERS_FOR_PQ == 15 by default
// make_zero_mean is false by default.
// These assumptions allow to make the function much simpler and avoid storing
// array of chunk_offsets and centroids.
// The compiler pragma for multi-threading support is removed from this implementation
// for the purpose of integration into systems that strictly control resource allocation.
int generate_pq_pivots_simplified(const float *train_data, size_t num_train, size_t dim, size_t num_pq_chunks,
std::vector<float> &pivot_data_vector)
int generate_pq_pivots_simplified(float *train_data, size_t num_train, size_t dim, size_t num_pq_chunks,
std::vector<float> &pivot_data_vector, std::vector<float> &centroids,
std::vector<uint32_t> &offsets, const bool make_zero_mean,
const uint32_t kmeans_iters_for_pq)
{
if (num_pq_chunks > dim || dim % num_pq_chunks != 0)
if (num_pq_chunks == 0 || num_pq_chunks > dim)
{
return -1;
}

// Calculate offsets of chunks
{
size_t chunk_size = dim / num_pq_chunks;
std::vector<size_t> sizes;

// If dim % num_pq_chunks != 0 we need to adjust chunk_size
// to cover the whole of vectors.
if (dim % num_pq_chunks != 0)
{
do
{
chunk_size++;
} while (chunk_size * num_pq_chunks < dim);

sizes.resize(num_pq_chunks, chunk_size);
size_t target = chunk_size * num_pq_chunks;
for (auto iter = sizes.rbegin(); iter != sizes.rend() && target > dim; iter++, target--)
{
(*iter)--;
}
}
else
{
sizes.resize(num_pq_chunks, chunk_size);
}

offsets.resize(num_pq_chunks, 0);
for (size_t i = 0; i < num_pq_chunks - 1; i++)
{
offsets[i + 1] = (uint32_t)(offsets[i] + sizes[i]);
}
offsets.push_back((uint32_t)dim);
}

const size_t num_centers = 256;
const size_t cur_chunk_size = dim / num_pq_chunks;
const uint32_t KMEANS_ITERS_FOR_PQ = 15;
const uint32_t max_chunk_size = offsets[1];

pivot_data_vector.resize(num_centers * dim);
std::vector<float> cur_pivot_data_vector(num_centers * cur_chunk_size);
std::vector<float> cur_data_vector(num_train * cur_chunk_size);
std::vector<float> cur_pivot_data_vector(num_centers * max_chunk_size);
std::vector<float> cur_data_vector(num_train * max_chunk_size);
std::vector<uint32_t> closest_center_vector(num_train);

float *pivot_data = &pivot_data_vector[0];
float *cur_pivot_data = &cur_pivot_data_vector[0];
float *cur_data = &cur_data_vector[0];
uint32_t *closest_center = &closest_center_vector[0];

centroids.clear();
if (make_zero_mean)
{
centroids.resize(dim, 0.0);

for (uint64_t d = 0; d < dim; d++)
{
for (uint64_t p = 0; p < num_train; p++)
{
centroids[d] += train_data[p * dim + d];
}
centroids[d] /= num_train;

for (uint64_t p = 0; p < num_train; p++)
{
train_data[p * dim + d] -= centroids[d];
}
}
}

for (size_t i = 0; i < num_pq_chunks; i++)
{
size_t chunk_offset = cur_chunk_size * i;
const size_t chunk_offset = offsets[i];
const size_t cur_chunk_size = offsets[i + 1] - offsets[i];

for (int32_t j = 0; j < num_train; j++)
{
Expand All @@ -390,7 +443,7 @@ int generate_pq_pivots_simplified(const float *train_data, size_t num_train, siz

kmeans::kmeanspp_selecting_pivots(cur_data, num_train, cur_chunk_size, cur_pivot_data, num_centers);

kmeans::run_lloyds(cur_data, num_train, cur_chunk_size, cur_pivot_data, num_centers, KMEANS_ITERS_FOR_PQ, NULL,
kmeans::run_lloyds(cur_data, num_train, cur_chunk_size, cur_pivot_data, num_centers, kmeans_iters_for_pq, NULL,
closest_center);

for (uint64_t j = 0; j < num_centers; j++)
Expand Down Expand Up @@ -776,24 +829,33 @@ int generate_opq_pivots(const float *passed_train_data, size_t num_train, uint32
// Output is stored in the in-memory buffer pq.
// Simplification is based on the following assumptions:
// supporting only float data type
// dim % num_pq_chunks == 0, which results in a fixed chunk_size
// num_centers == 256 by default
// make_zero_mean is false by default.
// These assumptions allow to make the function much simpler and avoid using
// array of chunk_offsets and centroids.
// The compiler pragma for multi-threading support is removed from this implementation
// for the purpose of integration into systems that strictly control resource allocation.
int generate_pq_data_from_pivots_simplified(const float *data, const size_t num, const float *pivot_data,
int generate_pq_data_from_pivots_simplified(float *data, const size_t num, const float *pivot_data,
const size_t pivots_num, const size_t dim, const size_t num_pq_chunks,
std::vector<uint8_t> &pq)
const std::vector<float> &centroids, const std::vector<uint32_t> &offsets,
std::vector<PQ_DATA_TYPE> &pq)
{
if (num_pq_chunks == 0 || num_pq_chunks > dim || dim % num_pq_chunks != 0)
if (num_pq_chunks == 0 || num_pq_chunks > dim)
{
return -1;
}

if (!centroids.empty() && centroids.size() != dim)
{
return -1;
}

if (offsets.size() != num_pq_chunks + 1 || offsets[0] != 0)
{
return -1;
}

const size_t num_centers = 256;
const size_t chunk_size = dim / num_pq_chunks;
const uint32_t max_chunk_size = offsets[1];

if (pivots_num != num_centers * dim)
{
Expand All @@ -802,33 +864,43 @@ int generate_pq_data_from_pivots_simplified(const float *data, const size_t num,

pq.resize(num * num_pq_chunks);

std::vector<float> cur_pivot_vector(num_centers * chunk_size);
std::vector<float> cur_data_vector(num * chunk_size);
std::vector<float> cur_pivot_vector(num_centers * max_chunk_size);
std::vector<float> cur_data_vector(num * max_chunk_size);
std::vector<uint32_t> closest_center_vector(num);

float *cur_pivot_data = &cur_pivot_vector[0];
float *cur_data = &cur_data_vector[0];
uint32_t *closest_center = &closest_center_vector[0];

if (!centroids.empty())
{
for (size_t p = 0; p < num; p++)
{
for (uint64_t d = 0; d < dim; d++)
{
data[p * dim + d] -= centroids[d];
}
}
}

for (size_t i = 0; i < num_pq_chunks; i++)
{
const size_t chunk_offset = chunk_size * i;
const size_t chunk_offset = offsets[i];
const size_t cur_chunk_size = offsets[i + 1] - offsets[i];

for (int j = 0; j < num_centers; j++)
{
std::memcpy(cur_pivot_data + j * chunk_size, pivot_data + j * dim + chunk_offset,
chunk_size * sizeof(float));
std::memcpy(cur_pivot_data + j * cur_chunk_size, pivot_data + j * dim + chunk_offset,
cur_chunk_size * sizeof(float));
}

for (int j = 0; j < num; j++)
{
for (size_t k = 0; k < chunk_size; k++)
{
cur_data[j * chunk_size + k] = data[j * dim + chunk_offset + k];
}
std::memcpy(cur_data + j * cur_chunk_size, data + j * dim + chunk_offset, cur_chunk_size * sizeof(float));
}

math_utils::compute_closest_centers(cur_data, num, chunk_size, cur_pivot_data, num_centers, 1, closest_center);
math_utils::compute_closest_centers(cur_data, num, cur_chunk_size, cur_pivot_data, num_centers, 1,
closest_center);

for (int j = 0; j < num; j++)
{
Expand Down
Loading