Skip to content

Commit

Permalink
style
Browse files Browse the repository at this point in the history
  • Loading branch information
mzegla committed Dec 11, 2024
1 parent 6794bdf commit cf743ae
Show file tree
Hide file tree
Showing 5 changed files with 8 additions and 10 deletions.
6 changes: 3 additions & 3 deletions src/llm/apis/openai_completions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -356,15 +356,15 @@ absl::Status OpenAIChatCompletionsHandler::parseCommonPart(uint32_t maxTokensLim
auto assistantConfidenceThresholdIt = doc.FindMember("assistant_confidence_threshold");

if (isSpeculativePipeline) {
if (numAssistantTokensIt == doc.MemberEnd() && assistantConfidenceThresholdIt == doc.MemberEnd())
if (numAssistantTokensIt == doc.MemberEnd() && assistantConfidenceThresholdIt == doc.MemberEnd())
return absl::InvalidArgumentError("Speculative decoding requires either num_assistant_tokens or assistant_confidence_threshold to be set.");

if (numAssistantTokensIt != doc.MemberEnd() && assistantConfidenceThresholdIt != doc.MemberEnd())
return absl::InvalidArgumentError("num_assistant_tokens and assistant_confidence_threshold are mutually exclusive and cannot both be set.");
} else if (numAssistantTokensIt != doc.MemberEnd() || assistantConfidenceThresholdIt != doc.MemberEnd()) {
return absl::InvalidArgumentError("num_assistant_tokens and assistant_confidence_threshold are only supported when speculative decoding is enabled.");
}
// num_assistant_tokens: uint;
// num_assistant_tokens: uint;
if (numAssistantTokensIt != doc.MemberEnd()) {
if (!numAssistantTokensIt->value.IsUint() || numAssistantTokensIt->value.GetUint() == 0) {
return absl::InvalidArgumentError("num_assistant_tokens must be an unsigned integer greater than 0");
Expand Down
4 changes: 2 additions & 2 deletions src/llm/apis/openai_completions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ struct OpenAIChatCompletionsRequest {
std::optional<bool> ignoreEOS{std::nullopt};
std::optional<std::set<std::string>> stop{std::nullopt};
std::optional<bool> includeStopStrInOutput{std::nullopt};
std::optional<int> numReturnSequences{std::nullopt}; // effective for beam search and multinomial decoding
std::optional<int> numReturnSequences{std::nullopt}; // effective for beam search and multinomial decoding
// Multinomial decoding specific
std::optional<float> temperature{std::nullopt};
std::optional<float> topP{std::nullopt};
std::optional<int> topK{std::nullopt};
std::optional<int> seed{std::nullopt};
std::optional<float> frequencyPenalty{std::nullopt};
std::optional<float> presencePenalty{std::nullopt};;
std::optional<float> presencePenalty{std::nullopt};
std::optional<float> repetitionPenalty{std::nullopt};
// Beam search specific
std::optional<int> bestOf{std::nullopt};
Expand Down
2 changes: 0 additions & 2 deletions src/llm/http_llm_calculator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,7 @@ class HttpLLMCalculator : public CalculatorBase {
if (this->generationHandle->get_status() == ov::genai::GenerationStatus::RUNNING || this->generationHandle->can_read()) {
// Subsequent iteration
OVMS_PROFILE_SCOPE("Generation of subsequent streaming response");
//SPDLOG_LOGGER_INFO(llm_calculator_logger, "Start read() ...");
ov::genai::GenerationOutputs generationOutputs = this->generationHandle->read();
//SPDLOG_LOGGER_INFO(llm_calculator_logger, "End read() ...");
RET_CHECK(generationOutputs.size() == 1); // TODO: Support multiple generations
this->apiHandler->incrementProcessedTokens(generationOutputs.begin()->second.generated_ids.size());

Expand Down
4 changes: 2 additions & 2 deletions src/llm/llmnoderesources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -156,8 +156,8 @@ Status LLMNodeResources::initializeLLMNodeResources(LLMNodeResources& nodeResour

if (!nodeOptions.draft_models_path().empty()) {
auto draftSchedulerConfig = prepareDraftModelSchedulerConfig(nodeOptions);
auto draftModelConfig = ov::genai::draft_model(nodeOptions.draft_models_path(), nodeOptions.draft_device(),
ov::genai::scheduler_config(draftSchedulerConfig));
auto draftModelConfig = ov::genai::draft_model(nodeOptions.draft_models_path(), nodeOptions.draft_device(),
ov::genai::scheduler_config(draftSchedulerConfig));
nodeResources->pluginConfig.insert(draftModelConfig);
nodeResources->isSpeculativePipeline = true;
}
Expand Down
2 changes: 1 addition & 1 deletion src/llm/llmnoderesources.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@

#include "../logging.hpp"
#include "../stringutils.hpp"
#include "src/python/utils.hpp"
#include "src/llm/llm_calculator.pb.h"
#include "src/python/utils.hpp"
#include "text_processor.hpp"

namespace ovms {
Expand Down

0 comments on commit cf743ae

Please sign in to comment.