Skip to content

Commit

Permalink
refactor namespace ov::* -> ov::genai::*
Browse files Browse the repository at this point in the history
  • Loading branch information
pavel-esir committed May 24, 2024
1 parent 28c313b commit c395a8d
Show file tree
Hide file tree
Showing 18 changed files with 181 additions and 179 deletions.
57 changes: 17 additions & 40 deletions src/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ Minimalistc example

int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::cout << pipe.generate("The Sun is yellow bacause");
}
```
Expand All @@ -75,9 +75,9 @@ Using Group Beam Search Decoding
int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");
ov::genai::LLMPipeline pipe(model_path, "CPU");
ov::GenerationConfig config = pipe.get_generation_config();
ov::genai::GenerationConfig config = pipe.get_generation_config();
config.max_new_tokens = 256;
config.num_groups = 3;
config.group_size = 5;
Expand All @@ -87,80 +87,57 @@ int main(int argc, char* argv[]) {
}
```

A simplest chat in C++
A simple chat in C++ using grouped beam search decoding
``` cpp
#include "openvino/genai/llm_pipeline.hpp"
#include <iostream>

int main(int argc, char* argv[]) {
std::string prompt;

std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");

pipe.start_chat();
for (size_t i = 0; i < questions.size(); i++) {
std::cout << "question:\n";
std::getline(std::cin, prompt);

std::cout << pipe(prompt) << std::endl>>;
}
pipe.finish_chat();
}
```
Specifying generation_config to use grouped beam search
``` cpp
int main(int argc, char* argv[]) {
std::string prompt;
std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");
ov::genai::LLMPipeline pipe(model_path, "CPU");

ov::GenerationConfig config = pipe.get_generation_config();
ov::genai::GenerationConfig config = pipe.get_generation_config();
config.max_new_tokens = 256;
config.num_groups = 3;
config.group_size = 5;
config.diversity_penalty = 1.0f;

auto streamer = [](std::string word) { std::cout << word << std::flush; };
pipe.start_chat();
for (size_t i = 0; i < questions.size(); i++) {
for (;;;) {
std::cout << "question:\n";
std::cout << prompt << std::endl;
std::getline(std::cin, prompt);
if (prompts == "Stop!")
break;

auto answer = pipe(prompt, config, streamer);
// no need to print answer, streamer will do that
std::cout << "answer:\n";
auto answer = pipe(prompt, config);
std::cout << answer << std::endl;
}
pipe.finish_chat();
}
```
Streaming example with lambda function

``` cpp
#include "openvino/genai/llm_pipeline.hpp"
#include <iostream>
int main(int argc, char* argv[]) {
std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");
ov::genai::LLMPipeline pipe(model_path, "CPU");
auto streamer = [](std::string word) { std::cout << word << std::flush; };
std::cout << pipe.generate("The Sun is yellow bacause", streamer);
}
```

Streaming with custom class
Streaming with a custom class
``` cpp
#include "openvino/genai/streamer_base.hpp"
#include "openvino/genai/llm_pipeline.hpp"
#include <iostream>

class CustomStreamer: public ov::StreamerBase {
class CustomStreamer: public ov::genai::StreamerBase {
public:
void put(int64_t token) {
/* custom decoding/tokens processing code
Expand All @@ -179,7 +156,7 @@ int main(int argc, char* argv[]) {
CustomStreamer custom_streamer;

std::string model_path = argv[1];
ov::LLMPipeline pipe(model_path, "CPU");
ov::genai::LLMPipeline pipe(model_path, "CPU");
std::cout << pipe.generate("The Sun is yellow bacause", custom_streamer);
}
```
4 changes: 3 additions & 1 deletion src/cpp/include/openvino/genai/generation_config.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/genai/tokenizer.hpp"

namespace ov {
namespace genai {

/**
* @brief controls the stopping condition for grouped beam search. The following values are possible:
Expand Down Expand Up @@ -102,4 +103,5 @@ class OPENVINO_GENAI_EXPORTS GenerationConfig {
static GenerationConfig anymap_to_generation_config(const ov::AnyMap& config_map = {});
};

} // namespace ov
} // namespace genai
} // namespace ov
10 changes: 6 additions & 4 deletions src/cpp/include/openvino/genai/llm_pipeline.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "openvino/genai/streamer_base.hpp"

namespace ov {
namespace genai {

using StreamerVariant = std::variant<std::function<void (std::string)>, std::shared_ptr<StreamerBase>>;
using OptionalGenerationConfig = std::optional<GenerationConfig>;
Expand Down Expand Up @@ -85,7 +86,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
*/
LLMPipeline(
const std::string& model_path,
const ov::Tokenizer& tokenizer,
const ov::genai::Tokenizer& tokenizer,
const std::string& device="CPU",
const ov::AnyMap& plugin_config = {},
const std::string& ov_tokenizers_path=""
Expand Down Expand Up @@ -164,7 +165,7 @@ class OPENVINO_GENAI_EXPORTS LLMPipeline {
return generate(text, generation_config, streamer);
}

ov::Tokenizer get_tokenizer();
ov::genai::Tokenizer get_tokenizer();
GenerationConfig get_generation_config() const;
void set_generation_config(const GenerationConfig& generation_config);

Expand Down Expand Up @@ -210,6 +211,7 @@ static constexpr ov::Property<std::string> eos_token{"eos_token"};

// only lambda streamer can be set via ov::streamer(),... syntaxic sugar,
// because std::variant<StremaerBase, std::function<>> can not be stored in AnyMap
static constexpr ov::Property<std::function<void (std::string)>> streamer_lambda{"streamer"};
static constexpr ov::Property<std::function<void (std::string)>> streamer{"streamer"};

} // namespace ov
} // namespace genai
} // namespace ov
4 changes: 3 additions & 1 deletion src/cpp/include/openvino/genai/streamer_base.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "openvino/genai/tokenizer.hpp"

namespace ov {
namespace genai {

/**
* @brief base class for streamers. In order to use inherit from from this class and inplement put, and methods
Expand All @@ -25,4 +26,5 @@ class StreamerBase {
virtual void end() = 0;
};

} // namespace ov
} // namespace genai
} // namespace ov
4 changes: 3 additions & 1 deletion src/cpp/include/openvino/genai/tokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "openvino/genai/visibility.hpp"

namespace ov {
namespace genai {

/**
* @brief class is used to encode prompts and decode resulting tokens
Expand Down Expand Up @@ -78,4 +79,5 @@ class OPENVINO_GENAI_EXPORTS Tokenizer {
std::shared_ptr<TokenizerImpl> m_pimpl;
};

} // namespace ov
} // namespace genai
} // namespace ov
12 changes: 4 additions & 8 deletions src/cpp/src/generation_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,11 @@
#include "utils.hpp"


namespace {


} // namespace


namespace ov {
namespace genai {

GenerationConfig::GenerationConfig(std::string json_path) {
using ov::generate_utils::read_json_param;
using ov::genai::utils::read_json_param;

std::ifstream f(json_path);
OPENVINO_ASSERT(f.is_open(), "Failed to open '" + json_path + "' with generation config");
Expand Down Expand Up @@ -61,7 +56,7 @@ GenerationConfig::GenerationConfig(std::string json_path) {
}

GenerationConfig GenerationConfig::anymap_to_generation_config(const ov::AnyMap& config_map) {
using ov::generate_utils::read_anymap_param;
using ov::genai::utils::read_anymap_param;

GenerationConfig config;
read_anymap_param(config_map, "max_new_tokens", config.max_new_tokens);
Expand Down Expand Up @@ -109,4 +104,5 @@ bool GenerationConfig::is_multimomial() const {
return do_sample;
}

} // namespace genai
} // namespace ov
20 changes: 11 additions & 9 deletions src/cpp/src/greedy_decoding.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@
#include "utils.hpp"

namespace ov {
namespace genai {

ov::EncodedResults greedy_decoding(
EncodedResults greedy_decoding(
ov::InferRequest& m_model_runner,
ov::Tensor input_ids,
ov::Tensor attention_mask,
const ov::GenerationConfig generation_config,
const ov::genai::GenerationConfig generation_config,
const std::shared_ptr<StreamerBase> streamer,
const bool is_chat_conversation
) {
Expand All @@ -23,9 +24,9 @@ ov::EncodedResults greedy_decoding(

// todo: make this work even if position_ids are not specified
auto position_ids = ov::Tensor{ov::element::i64, input_ids.get_shape()};
generate_utils::initialize_position_ids(position_ids, attention_mask, kv_cache_len);
utils::initialize_position_ids(position_ids, attention_mask, kv_cache_len);

ov::EncodedResults results;
EncodedResults results;
results.scores.resize(batch_size);
results.tokens.resize(batch_size);
std::fill(results.scores.begin(), results.scores.end(), 0);
Expand Down Expand Up @@ -72,7 +73,7 @@ ov::EncodedResults greedy_decoding(
std::vector<int64_t> token_iter_results(batch_size); // results of a single infer request
std::vector<int> eos_met(batch_size, 0); // use int because can not use std::all_of with vector<bool>
for (size_t batch = 0; batch < batch_size; ++batch) {
auto res = generate_utils::softmax(logits, batch);
auto res = utils::softmax(logits, batch);
auto out_token = res.first;
results.tokens[batch].emplace_back(res.first);
results.scores[batch] += res.second;
Expand All @@ -89,8 +90,8 @@ ov::EncodedResults greedy_decoding(
return results;

for (size_t i = 0; i < max_tokens - 1; ++i) {
generate_utils::update_position_ids(m_model_runner.get_tensor("position_ids"), m_model_runner.get_tensor("attention_mask"));
m_model_runner.set_tensor("attention_mask", generate_utils::extend_attention(m_model_runner.get_tensor("attention_mask")));
utils::update_position_ids(m_model_runner.get_tensor("position_ids"), m_model_runner.get_tensor("attention_mask"));
m_model_runner.set_tensor("attention_mask", utils::extend_attention(m_model_runner.get_tensor("attention_mask")));

// todo: consider replacing with start_async and run callback right after that
m_model_runner.infer();
Expand All @@ -102,7 +103,7 @@ ov::EncodedResults greedy_decoding(
std::vector<int> eos_met(batch_size, 0); // use int because can not use std::all_of with vector<bool>
for (size_t batch = 0; batch < batch_size; ++batch) {

auto res = ov::generate_utils::softmax(logits, batch);
auto res = ov::genai::utils::softmax(logits, batch);
auto out_token = res.first;
results.tokens[batch].emplace_back(res.first);
results.scores[batch] += res.second;
Expand All @@ -125,4 +126,5 @@ ov::EncodedResults greedy_decoding(
return results;
}

}
} //namespace genai
} //namespace ov
16 changes: 9 additions & 7 deletions src/cpp/src/group_beam_searcher.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ struct Parameters {
size_t group_size = 5;
float diversity_penalty = 1.0;
size_t max_new_tokens = 20;
ov::StopCriteria stop_criteria = ov::StopCriteria::heuristic;
ov::genai::StopCriteria stop_criteria = ov::genai::StopCriteria::heuristic;
float length_penalty = 1.0;
size_t no_repeat_ngram_size = std::numeric_limits<size_t>::max();

Expand Down Expand Up @@ -128,15 +128,15 @@ struct Group {
float best_sum_logprobs = ongoing.front().score;
float worst_score = min_heap.front().score;
switch (parameters.stop_criteria) {
case ov::StopCriteria::early:
case ov::genai::StopCriteria::early:
done = true;
return;
case ov::StopCriteria::heuristic: {
case ov::genai::StopCriteria::heuristic: {
float highest_attainable_score = best_sum_logprobs / std::pow(float(cur_len), parameters.length_penalty);
done = worst_score >= highest_attainable_score;
return;
}
case ov::StopCriteria::never: {
case ov::genai::StopCriteria::never: {
size_t length = parameters.length_penalty > 0.0 ? parameters.max_new_tokens : cur_len;
float highest_attainable_score = best_sum_logprobs / std::pow(float(length), parameters.length_penalty);
done = worst_score >= highest_attainable_score;
Expand Down Expand Up @@ -324,7 +324,7 @@ void initialize_inputs(const ov::Tensor& input_ids, const ov::Tensor& attention_

ov::Tensor position_ids = request.get_tensor("position_ids");
position_ids.set_shape(input_shape);
ov::generate_utils::initialize_position_ids(position_ids, attention_mask);
ov::genai::utils::initialize_position_ids(position_ids, attention_mask);

ov::Tensor beam_idx = request.get_tensor("beam_idx");
beam_idx.set_shape({input_shape.at(0)});
Expand Down Expand Up @@ -367,6 +367,7 @@ void update_position_ids(ov::Tensor&& position_ids, const ov::Tensor&& attention


namespace ov {
namespace genai {

EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor input_ids, ov::Tensor attention_mask, GenerationConfig config) {
OPENVINO_ASSERT(config.num_beams % config.num_beam_groups == 0, "number of beams should be divisible by number of groups");
Expand Down Expand Up @@ -427,12 +428,13 @@ EncodedResults beam_search(ov::InferRequest& lm, ov::Tensor input_ids, ov::Tenso
auto compare_scores = [](Beam left, Beam right) { return (left.score > right.score); };
std::sort(beams.begin(), beams.end(), compare_scores);

ov::EncodedResults results;
ov::genai::EncodedResults results;
for (auto beam = beams.begin(); beam != beams.begin() + config.num_return_sequences; ++beam) {
results.scores.emplace_back(beam->score);
results.tokens.emplace_back(beam->tokens);
}
return results;
}

} // namespace ov
} // namespace genai
} // namespace ov
Loading

0 comments on commit c395a8d

Please sign in to comment.