Skip to content

Commit

Permalink
Merge pull request #44 from ehsavoie/opentelemetry
Browse files Browse the repository at this point in the history
Using telemetry instrumentation in injection module to track LLM Usage.
  • Loading branch information
ehsavoie authored Jan 7, 2025
2 parents 2104ff8 + 399110f commit 76209d9
Show file tree
Hide file tree
Showing 19 changed files with 367 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,5 +35,15 @@
<module name="org.jboss.modules"/>
<module name="org.jboss.weld.core"/>
<module name="org.wildfly.common"/>
<!-- OpenTelemetry -->
<module name="org.wildfly.extension.opentelemetry-api" optional="true"/>
<module name="io.opentelemetry.api" optional="true"/>
<module name="io.opentelemetry.api.events" optional="true"/>
<module name="io.opentelemetry.context" optional="true"/>
<module name="io.opentelemetry.exporter" optional="true"/>
<module name="io.opentelemetry.otlp" optional="true"/>
<module name="io.opentelemetry.sdk" optional="true"/>
<module name="io.opentelemetry.semconv" optional="true"/>
<module name="io.smallrye.opentelemetry" optional="true" services="import"/>
</dependencies>
</module>
Original file line number Diff line number Diff line change
Expand Up @@ -36,5 +36,7 @@
<module name="org.wildfly.security.elytron-private"/>
<module name="org.wildfly.service"/>
<module name="org.wildfly.subsystem"/>
<!-- OpenTelemetry -->
<module name="org.wildfly.extension.opentelemetry-api" optional="true"/>
</dependencies>
</module>
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@
<!-- Require Java 11 -->
<maven.compiler.target>11</maven.compiler.target>
<maven.compiler.source>11</maven.compiler.source>
<version.org.wildfly>34.0.1.Final</version.org.wildfly>
<version.org.wildfly.core>26.0.1.Final</version.org.wildfly.core>
<version.org.wildfly>35.0.0.Beta1</version.org.wildfly>
<version.org.wildfly.core>27.0.0.Beta5</version.org.wildfly.core>
<version.org.wildfly.common>1.7.0.Final</version.org.wildfly.common>
<version.org.wildfly.galleon-plugins>7.3.1.Final</version.org.wildfly.galleon-plugins>

Expand Down
20 changes: 20 additions & 0 deletions wildfly-ai/injection/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,5 +80,25 @@
<scope>provided</scope>
<optional>true</optional>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.opentelemetry</groupId>
<artifactId>opentelemetry-context</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>org.wildfly</groupId>
<artifactId>wildfly-opentelemetry-api</artifactId>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>io.smallrye.opentelemetry</groupId>
<artifactId>smallrye-opentelemetry-api</artifactId>
<scope>provided</scope>
</dependency>
</dependencies>
</project>
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ public <T> T getBeanPropertyValue(String beanName, String propertyName, Class<T>
return (T) new ProducerFunction<Object>() {
@Override
public Object produce(Instance<Object> lookup, String beanName) {
List<ChatModelListener> listeners = lookup.select(ChatModelListener.class).handlesStream().map(Handle<ChatModelListener>::get).collect(Collectors.toList() );
List<ChatModelListener> listeners = lookup.select(ChatModelListener.class).handlesStream().map(Handle<ChatModelListener>::get).collect(Collectors.toList());
WildFlyChatModelConfig config = (WildFlyChatModelConfig) beanData.get(getBeanPropertyName(beanName, BEAN_VALUE));
if (ChatLanguageModel.class.isAssignableFrom(expectedType) && !config.isStreaming()) {
return (T) config.createLanguageModel(listeners);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ public class WildFlyMistralAiChatModelLanguage implements WildFlyChatModelConfig
private Double topP;
private boolean isJson;
private boolean streaming;
private boolean observable;

@Override
public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners) {
Expand All @@ -46,6 +47,9 @@ public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners)
if (isJson) {
builder.responseFormat("json_object");
}
if (observable) {
// builder.listeners(Collections.singletonList(new OpenTelemetryChatModelListener()));
}
return builder.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ public class WildFlyOllamaChatModelConfig implements WildFlyChatModelConfig {
private long connectTimeOut;
private String modelName;
private boolean streaming;
private boolean observable;

@Override
public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners) {
Expand All @@ -37,14 +38,16 @@ public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners)
if (isJson) {
builder.format("json");
}
if (observable) {
builder.listeners(listeners);
}
return builder.build();
}

@Override
public StreamingChatLanguageModel createStreamingLanguageModel(List<ChatModelListener> listeners) {
OllamaStreamingChatModel.OllamaStreamingChatModelBuilder builder = OllamaStreamingChatModel.builder()
.baseUrl(baseUrl)
.listeners(listeners)
.logRequests(logRequests)
.logResponses(logResponses)
.temperature(temperature)
Expand All @@ -53,6 +56,9 @@ public StreamingChatLanguageModel createStreamingLanguageModel(List<ChatModelLis
if (isJson) {
builder.format("json");
}
if (observable) {
builder.listeners(listeners);
}
return builder.build();
}

Expand Down Expand Up @@ -96,11 +102,17 @@ public WildFlyOllamaChatModelConfig modelName(String modelName) {
return this;
}

public WildFlyOllamaChatModelConfig streaming(boolean streaming) {

public WildFlyOllamaChatModelConfig setStreaming(boolean streaming) {
this.streaming = streaming;
return this;
}

public WildFlyOllamaChatModelConfig setObservable(boolean observable) {
this.observable = observable;
return this;
}

@Override
public boolean isStreaming() {
return streaming;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ public class WildFlyOpenAiChatModelConfig implements WildFlyChatModelConfig {
private Double topP;
private boolean isJson;
private boolean streaming;
private boolean observable;

@Override
public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners) {
Expand All @@ -50,6 +51,9 @@ public ChatLanguageModel createLanguageModel(List<ChatModelListener> listeners)
if (isJson) {
builder.responseFormat("json_object");
}
if (observable) {
builder.listeners(listeners);
}
return builder.build();
}

Expand All @@ -61,7 +65,6 @@ public StreamingChatLanguageModel createStreamingLanguageModel(List<ChatModelLis
.frequencyPenalty(frequencyPenalty)
.logRequests(logRequests)
.logResponses(logResponses)
.listeners(listeners)
.maxTokens(maxToken)
.modelName(modelName)
.organizationId(organizationId)
Expand All @@ -73,6 +76,9 @@ public StreamingChatLanguageModel createStreamingLanguageModel(List<ChatModelLis
if (isJson) {
builder.responseFormat("json_object");
}
if (observable) {
builder.listeners(listeners);
}
return builder.build();
}

Expand Down Expand Up @@ -146,11 +152,16 @@ public WildFlyOpenAiChatModelConfig setJson(boolean isJson) {
return this;
}

public WildFlyOpenAiChatModelConfig streaming(boolean streaming) {
public WildFlyOpenAiChatModelConfig setStreaming(boolean streaming) {
this.streaming = streaming;
return this;
}

public WildFlyOpenAiChatModelConfig setObservable(boolean observable) {
this.observable = observable;
return this;
}

@Override
public boolean isStreaming() {
return streaming;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* Copyright The WildFly Authors
* SPDX-License-Identifier: Apache-2.0
*/
package org.wildfly.extension.ai.injection.observability;

import dev.langchain4j.data.message.AiMessage;
import dev.langchain4j.model.chat.listener.ChatModelErrorContext;
import dev.langchain4j.model.chat.listener.ChatModelListener;
import dev.langchain4j.model.chat.listener.ChatModelRequest;
import dev.langchain4j.model.chat.listener.ChatModelRequestContext;
import dev.langchain4j.model.chat.listener.ChatModelResponse;
import dev.langchain4j.model.chat.listener.ChatModelResponseContext;
import io.opentelemetry.api.common.AttributeKey;
import io.opentelemetry.api.common.Attributes;
import io.opentelemetry.api.metrics.DoubleHistogram;
import io.opentelemetry.api.metrics.LongHistogram;
import io.opentelemetry.api.metrics.Meter;
import jakarta.annotation.PostConstruct;
import jakarta.enterprise.context.Dependent;
import jakarta.inject.Inject;
import java.util.List;
import java.util.concurrent.TimeUnit;

/**
* Creates metrics following the <a href="https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/">Semantic
* Conventions for GenAI Metrics</a>.
*/
@Dependent
public class OpenTelemetryMetricsChatModelListener implements ChatModelListener {

private static final String MP_AI_METRIC_START_TIME_NAME = "MP_AI_METRIC_START_TIME";

private static final String METRIC_CLIENT_TOKEN_USAGE_NAME = "gen_ai.client.token.usage";
private static final String METRIC_CLIENT_OPERATION_DURATION_NAME = "gen_ai.client.operation.duration";

private LongHistogram clientTokenUsage;
private DoubleHistogram clientOperationDuration;

@Inject
private Meter meter;

@PostConstruct
private void init() {
clientTokenUsage = meter.histogramBuilder(METRIC_CLIENT_TOKEN_USAGE_NAME)
.ofLongs()
.setDescription("Measures number of input and output tokens used")
.setExplicitBucketBoundariesAdvice(List.of(1L, 4L, 16L, 64L, 256L, 1024L, 4096L, 16384L, 65536L, 262144L,
1048576L, 4194304L, 16777216L, 67108864L))
.build();

clientOperationDuration = meter.histogramBuilder(METRIC_CLIENT_OPERATION_DURATION_NAME)
.setDescription("GenAI operation duration")
.setExplicitBucketBoundariesAdvice(
List.of(0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92))
.setUnit("s")
.build();
}

@Override
public void onRequest(ChatModelRequestContext requestContext) {
requestContext.attributes().put(MP_AI_METRIC_START_TIME_NAME, System.nanoTime());
}

@Override
public void onResponse(ChatModelResponseContext responseContext) {
final long endTime = System.nanoTime();
final long startTime = (Long) responseContext.attributes().get(MP_AI_METRIC_START_TIME_NAME);

final ChatModelRequest request = responseContext.request();
final ChatModelResponse response = responseContext.response();

Attributes inputTokenCountAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat",
AttributeKey.stringKey("gen_ai.request.model"), request.model(),
AttributeKey.stringKey("gen_ai.response.model"), response.model(),
AttributeKey.stringKey("gen_ai.token.type"), "input");
//Record
clientTokenUsage.record(response.tokenUsage().inputTokenCount(), inputTokenCountAttributes);

Attributes outputTokenCountAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat",
AttributeKey.stringKey("gen_ai.request.model"), request.model(),
AttributeKey.stringKey("gen_ai.response.model"), response.model(),
AttributeKey.stringKey("gen_ai.token.type"), "output");

//Record
clientTokenUsage.record(response.tokenUsage().outputTokenCount(), outputTokenCountAttributes);

//Record duration
Attributes durationAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat",
AttributeKey.stringKey("gen_ai.request.model"), request.model(),
AttributeKey.stringKey("gen_ai.response.model"), response.model());
recordClientOperationDuration(startTime, endTime, durationAttributes);
}

@Override
public void onError(ChatModelErrorContext errorContext) {
final long endTime = System.nanoTime();
final long startTime = (Long) errorContext.attributes().get(MP_AI_METRIC_START_TIME_NAME);
final ChatModelRequest request = errorContext.request();
final ChatModelResponse response = errorContext.partialResponse();

StringBuilder sb = new StringBuilder()
.append(errorContext.error().getClass().getName());

AiMessage aiMessage = errorContext.partialResponse().aiMessage();
if (aiMessage != null) {
sb.append(";").append(aiMessage.text());
}

//Record duration
Attributes durationAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat",
AttributeKey.stringKey("gen_ai.request.model"), request.model(),
AttributeKey.stringKey("gen_ai.response.model"), response.model(),
AttributeKey.stringKey("error.type"), sb.toString());
recordClientOperationDuration(startTime, endTime, durationAttributes);
}

private void recordClientOperationDuration(final long startTime, long endTime, final Attributes attributes) {
clientOperationDuration.record(TimeUnit.SECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS), attributes);
}
}
Loading

0 comments on commit 76209d9

Please sign in to comment.