From 399110f049527b0f694144bc31130e6d2ea7ead5 Mon Sep 17 00:00:00 2001 From: Emmanuel Hugonnet Date: Tue, 7 Jan 2025 12:52:58 +0100 Subject: [PATCH] Using telemetry instrumentation in injection module to track LLM Usage. * Adding support for traces * Adding support for metrics Signed-off-by: Emmanuel Hugonnet --- .../extension/ai/injection/main/module.xml | 10 ++ .../org/wildfly/extension/ai/main/module.xml | 2 + pom.xml | 4 +- wildfly-ai/injection/pom.xml | 20 +++ .../ai/injection/WildFlyLLMConfig.java | 2 +- .../WildFlyMistralAiChatModelLanguage.java | 4 + .../chat/WildFlyOllamaChatModelConfig.java | 16 ++- .../chat/WildFlyOpenAiChatModelConfig.java | 15 ++- ...OpenTelemetryMetricsChatModelListener.java | 121 ++++++++++++++++++ .../OpenTelemetryTracesChatModelListener.java | 113 ++++++++++++++++ .../src/main/resources/META-INF/beans.xml | 5 + wildfly-ai/subsystem/pom.xml | 5 + .../extension/ai/AISubsystemRegistrar.java | 6 +- .../wildfly/extension/ai/Capabilities.java | 3 + ...tChatModelProviderServiceConfigurator.java | 18 +++ ...aChatModelProviderServiceConfigurator.java | 18 ++- ...IChatModelProviderServiceConfigurator.java | 8 +- .../ai/deployment/AIDependencyProcessor.java | 3 +- .../ai/deployment/AIDeploymentProcessor.java | 11 +- 19 files changed, 367 insertions(+), 17 deletions(-) create mode 100644 wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/observability/OpenTelemetryMetricsChatModelListener.java create mode 100644 wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/observability/OpenTelemetryTracesChatModelListener.java create mode 100644 wildfly-ai/injection/src/main/resources/META-INF/beans.xml diff --git a/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/injection/main/module.xml b/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/injection/main/module.xml index 43c7022..c11a5e0 100644 --- a/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/injection/main/module.xml +++ b/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/injection/main/module.xml @@ -35,5 +35,15 @@ + + + + + + + + + + diff --git a/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/main/module.xml b/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/main/module.xml index 61277fe..4ae0777 100644 --- a/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/main/module.xml +++ b/ai-feature-pack/src/main/resources/modules/system/layers/base/org/wildfly/extension/ai/main/module.xml @@ -36,5 +36,7 @@ + + diff --git a/pom.xml b/pom.xml index c352b95..50110f6 100644 --- a/pom.xml +++ b/pom.xml @@ -33,8 +33,8 @@ 11 11 - 34.0.1.Final - 26.0.1.Final + 35.0.0.Beta1 + 27.0.0.Beta5 1.7.0.Final 7.3.1.Final diff --git a/wildfly-ai/injection/pom.xml b/wildfly-ai/injection/pom.xml index 2a6e7e6..b84d3f2 100644 --- a/wildfly-ai/injection/pom.xml +++ b/wildfly-ai/injection/pom.xml @@ -80,5 +80,25 @@ provided true + + io.opentelemetry + opentelemetry-api + provided + + + io.opentelemetry + opentelemetry-context + provided + + + org.wildfly + wildfly-opentelemetry-api + provided + + + io.smallrye.opentelemetry + smallrye-opentelemetry-api + provided + diff --git a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/WildFlyLLMConfig.java b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/WildFlyLLMConfig.java index 7f01f47..485d686 100644 --- a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/WildFlyLLMConfig.java +++ b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/WildFlyLLMConfig.java @@ -61,7 +61,7 @@ public T getBeanPropertyValue(String beanName, String propertyName, Class return (T) new ProducerFunction() { @Override public Object produce(Instance lookup, String beanName) { - List listeners = lookup.select(ChatModelListener.class).handlesStream().map(Handle::get).collect(Collectors.toList() ); + List listeners = lookup.select(ChatModelListener.class).handlesStream().map(Handle::get).collect(Collectors.toList()); WildFlyChatModelConfig config = (WildFlyChatModelConfig) beanData.get(getBeanPropertyName(beanName, BEAN_VALUE)); if (ChatLanguageModel.class.isAssignableFrom(expectedType) && !config.isStreaming()) { return (T) config.createLanguageModel(listeners); diff --git a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyMistralAiChatModelLanguage.java b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyMistralAiChatModelLanguage.java index 2637f53..f27b0ee 100644 --- a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyMistralAiChatModelLanguage.java +++ b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyMistralAiChatModelLanguage.java @@ -27,6 +27,7 @@ public class WildFlyMistralAiChatModelLanguage implements WildFlyChatModelConfig private Double topP; private boolean isJson; private boolean streaming; + private boolean observable; @Override public ChatLanguageModel createLanguageModel(List listeners) { @@ -46,6 +47,9 @@ public ChatLanguageModel createLanguageModel(List listeners) if (isJson) { builder.responseFormat("json_object"); } + if (observable) { +// builder.listeners(Collections.singletonList(new OpenTelemetryChatModelListener())); + } return builder.build(); } diff --git a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyOllamaChatModelConfig.java b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyOllamaChatModelConfig.java index a812109..0f706c3 100644 --- a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyOllamaChatModelConfig.java +++ b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/chat/WildFlyOllamaChatModelConfig.java @@ -23,6 +23,7 @@ public class WildFlyOllamaChatModelConfig implements WildFlyChatModelConfig { private long connectTimeOut; private String modelName; private boolean streaming; + private boolean observable; @Override public ChatLanguageModel createLanguageModel(List listeners) { @@ -37,6 +38,9 @@ public ChatLanguageModel createLanguageModel(List listeners) if (isJson) { builder.format("json"); } + if (observable) { + builder.listeners(listeners); + } return builder.build(); } @@ -44,7 +48,6 @@ public ChatLanguageModel createLanguageModel(List listeners) public StreamingChatLanguageModel createStreamingLanguageModel(List listeners) { OllamaStreamingChatModel.OllamaStreamingChatModelBuilder builder = OllamaStreamingChatModel.builder() .baseUrl(baseUrl) - .listeners(listeners) .logRequests(logRequests) .logResponses(logResponses) .temperature(temperature) @@ -53,6 +56,9 @@ public StreamingChatLanguageModel createStreamingLanguageModel(List listeners) { @@ -50,6 +51,9 @@ public ChatLanguageModel createLanguageModel(List listeners) if (isJson) { builder.responseFormat("json_object"); } + if (observable) { + builder.listeners(listeners); + } return builder.build(); } @@ -61,7 +65,6 @@ public StreamingChatLanguageModel createStreamingLanguageModel(ListSemantic + * Conventions for GenAI Metrics. + */ +@Dependent +public class OpenTelemetryMetricsChatModelListener implements ChatModelListener { + + private static final String MP_AI_METRIC_START_TIME_NAME = "MP_AI_METRIC_START_TIME"; + + private static final String METRIC_CLIENT_TOKEN_USAGE_NAME = "gen_ai.client.token.usage"; + private static final String METRIC_CLIENT_OPERATION_DURATION_NAME = "gen_ai.client.operation.duration"; + + private LongHistogram clientTokenUsage; + private DoubleHistogram clientOperationDuration; + + @Inject + private Meter meter; + + @PostConstruct + private void init() { + clientTokenUsage = meter.histogramBuilder(METRIC_CLIENT_TOKEN_USAGE_NAME) + .ofLongs() + .setDescription("Measures number of input and output tokens used") + .setExplicitBucketBoundariesAdvice(List.of(1L, 4L, 16L, 64L, 256L, 1024L, 4096L, 16384L, 65536L, 262144L, + 1048576L, 4194304L, 16777216L, 67108864L)) + .build(); + + clientOperationDuration = meter.histogramBuilder(METRIC_CLIENT_OPERATION_DURATION_NAME) + .setDescription("GenAI operation duration") + .setExplicitBucketBoundariesAdvice( + List.of(0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28, 2.56, 5.12, 10.24, 20.48, 40.96, 81.92)) + .setUnit("s") + .build(); + } + + @Override + public void onRequest(ChatModelRequestContext requestContext) { + requestContext.attributes().put(MP_AI_METRIC_START_TIME_NAME, System.nanoTime()); + } + + @Override + public void onResponse(ChatModelResponseContext responseContext) { + final long endTime = System.nanoTime(); + final long startTime = (Long) responseContext.attributes().get(MP_AI_METRIC_START_TIME_NAME); + + final ChatModelRequest request = responseContext.request(); + final ChatModelResponse response = responseContext.response(); + + Attributes inputTokenCountAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat", + AttributeKey.stringKey("gen_ai.request.model"), request.model(), + AttributeKey.stringKey("gen_ai.response.model"), response.model(), + AttributeKey.stringKey("gen_ai.token.type"), "input"); + //Record + clientTokenUsage.record(response.tokenUsage().inputTokenCount(), inputTokenCountAttributes); + + Attributes outputTokenCountAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat", + AttributeKey.stringKey("gen_ai.request.model"), request.model(), + AttributeKey.stringKey("gen_ai.response.model"), response.model(), + AttributeKey.stringKey("gen_ai.token.type"), "output"); + + //Record + clientTokenUsage.record(response.tokenUsage().outputTokenCount(), outputTokenCountAttributes); + + //Record duration + Attributes durationAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat", + AttributeKey.stringKey("gen_ai.request.model"), request.model(), + AttributeKey.stringKey("gen_ai.response.model"), response.model()); + recordClientOperationDuration(startTime, endTime, durationAttributes); + } + + @Override + public void onError(ChatModelErrorContext errorContext) { + final long endTime = System.nanoTime(); + final long startTime = (Long) errorContext.attributes().get(MP_AI_METRIC_START_TIME_NAME); + final ChatModelRequest request = errorContext.request(); + final ChatModelResponse response = errorContext.partialResponse(); + + StringBuilder sb = new StringBuilder() + .append(errorContext.error().getClass().getName()); + + AiMessage aiMessage = errorContext.partialResponse().aiMessage(); + if (aiMessage != null) { + sb.append(";").append(aiMessage.text()); + } + + //Record duration + Attributes durationAttributes = Attributes.of(AttributeKey.stringKey("gen_ai.operation.name"), "chat", + AttributeKey.stringKey("gen_ai.request.model"), request.model(), + AttributeKey.stringKey("gen_ai.response.model"), response.model(), + AttributeKey.stringKey("error.type"), sb.toString()); + recordClientOperationDuration(startTime, endTime, durationAttributes); + } + + private void recordClientOperationDuration(final long startTime, long endTime, final Attributes attributes) { + clientOperationDuration.record(TimeUnit.SECONDS.convert(endTime - startTime, TimeUnit.NANOSECONDS), attributes); + } +} diff --git a/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/observability/OpenTelemetryTracesChatModelListener.java b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/observability/OpenTelemetryTracesChatModelListener.java new file mode 100644 index 0000000..b2d5f82 --- /dev/null +++ b/wildfly-ai/injection/src/main/java/org/wildfly/extension/ai/injection/observability/OpenTelemetryTracesChatModelListener.java @@ -0,0 +1,113 @@ +/* + * Copyright The WildFly Authors + * SPDX-License-Identifier: Apache-2.0 + */ +package org.wildfly.extension.ai.injection.observability; + +import dev.langchain4j.model.chat.listener.ChatModelErrorContext; +import dev.langchain4j.model.chat.listener.ChatModelListener; +import dev.langchain4j.model.chat.listener.ChatModelRequest; +import dev.langchain4j.model.chat.listener.ChatModelRequestContext; +import dev.langchain4j.model.chat.listener.ChatModelResponse; +import dev.langchain4j.model.chat.listener.ChatModelResponseContext; +import dev.langchain4j.model.output.TokenUsage; +import io.opentelemetry.api.trace.Span; +import io.opentelemetry.api.trace.SpanBuilder; +import io.opentelemetry.api.trace.SpanKind; +import io.opentelemetry.api.trace.Tracer; +import io.opentelemetry.context.Context; +import io.opentelemetry.context.Scope; +import jakarta.enterprise.context.Dependent; +import jakarta.inject.Inject; +import org.wildfly.extension.ai.injection.AILogger; + +/** + * Creates metrics following the Semantic + * Conventions for GenAI Spans. + */ +@Dependent +public class OpenTelemetryTracesChatModelListener implements ChatModelListener { + + public OpenTelemetryTracesChatModelListener() { + } + + private static final String OTEL_SCOPE_KEY_NAME = "OTelScope"; + private static final String OTEL_SPAN_KEY_NAME = "OTelSpan"; + + @Inject + private Tracer tracer; + + @Override + public void onRequest(ChatModelRequestContext requestContext) { + if (tracer == null) { + return; + } + final ChatModelRequest request = requestContext.request(); + SpanBuilder spanBuilder = tracer.spanBuilder("chat " + request.model()) + .setSpanKind(SpanKind.SERVER) + .setAttribute("gen_ai.operation.name", "chat"); + if (requestContext.attributes().get(OTEL_SPAN_KEY_NAME) != null) { + spanBuilder.setParent(Context.current().with((Span) requestContext.attributes().get(OTEL_SPAN_KEY_NAME))); + } + if (request.maxTokens() != null) { + spanBuilder.setAttribute("gen_ai.request.max_tokens", request.maxTokens()); + } + + if (request.temperature() != null) { + spanBuilder.setAttribute("gen_ai.request.temperature", request.temperature()); + } + + if (request.topP() != null) { + spanBuilder.setAttribute("gen_ai.request.top_p", request.topP()); + } + if (request.messages() != null && !request.messages().isEmpty()) { + spanBuilder.setAttribute("gen_ai.request.messages", request.messages().toString()); + } + Span span = spanBuilder.startSpan(); + Scope scope = span.makeCurrent(); + requestContext.attributes().put(OTEL_SCOPE_KEY_NAME, scope); + requestContext.attributes().put(OTEL_SPAN_KEY_NAME, span); + } + + @Override + public void onResponse(ChatModelResponseContext responseContext) { + Span span = (Span) responseContext.attributes().get(OTEL_SPAN_KEY_NAME); + if (span != null) { + ChatModelResponse response = responseContext.response(); + span.setAttribute("gen_ai.response.id", response.id()) + .setAttribute("gen_ai.response.model", response.model()); + if (response.finishReason() != null) { + span.setAttribute("gen_ai.response.finish_reasons", response.finishReason().toString()); + } + TokenUsage tokenUsage = response.tokenUsage(); + if (tokenUsage != null) { + span.setAttribute("gen_ai.usage.output_tokens", tokenUsage.outputTokenCount()) + .setAttribute("gen_ai.usage.input_tokens", tokenUsage.inputTokenCount()); + } + if (response.aiMessage() != null) { + span.setAttribute("gen_ai.response.message", response.aiMessage().toString()); + } + span.end(); + } + Scope scope = (Scope) responseContext.attributes().get(OTEL_SCOPE_KEY_NAME); + AILogger.ROOT_LOGGER.debug("OpenTelemetryChatModelListener.onResponse with context " + span.getSpanContext() + " and tracer " + tracer + " in thread " + Thread.currentThread() + " with scope " + scope); + closeScope(scope); + } + + @Override + public void onError(ChatModelErrorContext errorContext) { + Span span = (Span) errorContext.attributes().get(OTEL_SPAN_KEY_NAME); + if (span != null) { + span.recordException(errorContext.error()); + span.end(); + } + closeScope((Scope) errorContext.attributes().get(OTEL_SCOPE_KEY_NAME)); + } + + private void closeScope(Scope scope) { + if (scope != null) { + AILogger.ROOT_LOGGER.debug("OpenTelemetryChatModelListener.closeScope tracer " + tracer + " in thread " + Thread.currentThread() + " with scope " + scope); + scope.close(); + } + } +} diff --git a/wildfly-ai/injection/src/main/resources/META-INF/beans.xml b/wildfly-ai/injection/src/main/resources/META-INF/beans.xml new file mode 100644 index 0000000..4ca8195 --- /dev/null +++ b/wildfly-ai/injection/src/main/resources/META-INF/beans.xml @@ -0,0 +1,5 @@ + + + diff --git a/wildfly-ai/subsystem/pom.xml b/wildfly-ai/subsystem/pom.xml index 1e59fd9..d6da34e 100644 --- a/wildfly-ai/subsystem/pom.xml +++ b/wildfly-ai/subsystem/pom.xml @@ -210,5 +210,10 @@ com.squareup.retrofit2 converter-jackson + + org.wildfly + wildfly-opentelemetry-api + provided + diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/AISubsystemRegistrar.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/AISubsystemRegistrar.java index 3b99675..f8af744 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/AISubsystemRegistrar.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/AISubsystemRegistrar.java @@ -37,6 +37,8 @@ class AISubsystemRegistrar implements SubsystemResourceDefinitionRegistrar { static final String NAME = "ai"; static final PathElement PATH = SubsystemResourceDefinitionRegistrar.pathElement(NAME); static final ParentResourceDescriptionResolver RESOLVER = new SubsystemResourceDescriptionResolver(NAME, AISubsystemRegistrar.class); + private static final int PHASE_DEPENDENCIES_AI = 0x1930; + private static final int PHASE_POST_MODULE_AI = 0x3840; @Override public ManagementResourceRegistration register(SubsystemRegistration parent, ManagementResourceRegistrationContext context) { @@ -45,8 +47,8 @@ public ManagementResourceRegistration register(SubsystemRegistration parent, Man ResourceDescriptor descriptor = ResourceDescriptor .builder(RESOLVER) .withDeploymentChainContributor(target -> { - target.addDeploymentProcessor(NAME, Phase.DEPENDENCIES, Phase.DEPENDENCIES_MICROPROFILE_OPENTRACING, new AIDependencyProcessor()); - target.addDeploymentProcessor(NAME, Phase.POST_MODULE, Phase.POST_MODULE_MICROPROFILE_OPENTRACING, new AIDeploymentProcessor()); + target.addDeploymentProcessor(NAME, Phase.DEPENDENCIES, PHASE_DEPENDENCIES_AI, new AIDependencyProcessor()); + target.addDeploymentProcessor(NAME, Phase.POST_MODULE, PHASE_POST_MODULE_AI, new AIDeploymentProcessor()); }) .build(); ManagementResourceRegistrar.of(descriptor).register(registration); diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/Capabilities.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/Capabilities.java index dd6296b..fe9b626 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/Capabilities.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/Capabilities.java @@ -23,4 +23,7 @@ public interface Capabilities { UnaryServiceDescriptor CONTENT_RETRIEVER_PROVIDER_DESCRIPTOR = UnaryServiceDescriptor.of("org.wildfly.ai.rag.retriever", ContentRetriever.class); RuntimeCapability CONTENT_RETRIEVER_PROVIDER_CAPABILITY = RuntimeCapability.Builder.of(CONTENT_RETRIEVER_PROVIDER_DESCRIPTOR).setAllowMultipleRegistrations(true).build(); + + String OPENTELEMETRY_CAPABILITY_NAME = "org.wildfly.extension.opentelemetry"; + String OPENTELEMETRY_CONFIG_CAPABILITY_NAME = "org.wildfly.extension.opentelemetry.config"; } diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/AbstractChatModelProviderServiceConfigurator.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/AbstractChatModelProviderServiceConfigurator.java index c6c8ac0..012f7a4 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/AbstractChatModelProviderServiceConfigurator.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/AbstractChatModelProviderServiceConfigurator.java @@ -10,9 +10,11 @@ import java.util.function.Supplier; import org.jboss.as.controller.OperationContext; import org.wildfly.extension.ai.injection.chat.WildFlyChatModelConfig; +import org.wildfly.extension.opentelemetry.api.WildFlyOpenTelemetryConfig; import org.wildfly.service.capture.ValueRegistry; import org.wildfly.subsystem.service.ResourceServiceConfigurator; import org.wildfly.subsystem.service.ResourceServiceInstaller; +import org.wildfly.subsystem.service.ServiceDependency; import org.wildfly.subsystem.service.capability.CapabilityServiceInstaller; public abstract class AbstractChatModelProviderServiceConfigurator implements ResourceServiceConfigurator { @@ -37,4 +39,20 @@ public Consumer install(OperationContext context) { } }; } + + ResourceServiceInstaller installService(final String name, Supplier factory, ServiceDependency openTelemetryConfig) { + Consumer captor = registry.add(name); + ResourceServiceInstaller installer = CapabilityServiceInstaller.builder(CHAT_MODEL_PROVIDER_CAPABILITY, factory) + .requires(openTelemetryConfig) + .withCaptor(captor) + .asActive() + .build(); + Consumer remover = ctx -> registry.remove(ctx.getCurrentAddressValue()); + return new ResourceServiceInstaller() { + @Override + public Consumer install(OperationContext context) { + return installer.install(context).andThen(remover); + } + }; + } } diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OllamaChatModelProviderServiceConfigurator.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OllamaChatModelProviderServiceConfigurator.java index aa487a8..3f41d6d 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OllamaChatModelProviderServiceConfigurator.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OllamaChatModelProviderServiceConfigurator.java @@ -20,11 +20,15 @@ import org.wildfly.extension.ai.AIAttributeDefinitions; import static org.wildfly.extension.ai.AIAttributeDefinitions.RESPONSE_FORMAT; +import static org.wildfly.extension.ai.Capabilities.OPENTELEMETRY_CAPABILITY_NAME; import org.wildfly.extension.ai.injection.chat.WildFlyChatModelConfig; import org.wildfly.extension.ai.injection.chat.WildFlyOllamaChatModelConfig; +import org.wildfly.extension.opentelemetry.api.WildFlyOpenTelemetryConfig; + import org.wildfly.service.capture.ValueRegistry; import org.wildfly.subsystem.service.ResourceServiceInstaller; +import org.wildfly.subsystem.service.ServiceDependency; /** * Configures an aggregate ChatModel provider service. @@ -45,20 +49,32 @@ public ResourceServiceInstaller configure(OperationContext context, ModelNode mo Integer maxRetries = MAX_RETRIES.resolveModelAttribute(context, model).asIntOrNull(); String modelName = MODEL_NAME.resolveModelAttribute(context, model).asString(); boolean isJson = AIAttributeDefinitions.ResponseFormat.isJson(RESPONSE_FORMAT.resolveModelAttribute(context, model).asStringOrNull()); + boolean isObservable= context.getCapabilityServiceSupport().hasCapability(OPENTELEMETRY_CAPABILITY_NAME); + final ServiceDependency openTelemetryConfig; + if(isObservable) { + openTelemetryConfig = ServiceDependency.on(WildFlyOpenTelemetryConfig.SERVICE_DESCRIPTOR); + } else { + openTelemetryConfig = null; + } Supplier factory = new Supplier<>() { @Override public WildFlyChatModelConfig get() { return new WildFlyOllamaChatModelConfig() .baseUrl(baseUrl) - .setJson(isJson) .logRequests(logRequests) .logResponses(logResponses) .maxRetries(maxRetries) + .setJson(isJson) + .setObservable(isObservable) + .setStreaming(false) .temperature(temperature) .timeout(connectTimeOut) .modelName(modelName); } }; + if(isObservable) { + return installService(context.getCurrentAddressValue(), factory, openTelemetryConfig); + } return installService(context.getCurrentAddressValue(), factory); } } diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OpenAIChatModelProviderServiceConfigurator.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OpenAIChatModelProviderServiceConfigurator.java index 6ba43c6..386d70b 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OpenAIChatModelProviderServiceConfigurator.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/chat/OpenAIChatModelProviderServiceConfigurator.java @@ -14,6 +14,7 @@ import static org.wildfly.extension.ai.AIAttributeDefinitions.RESPONSE_FORMAT; import static org.wildfly.extension.ai.AIAttributeDefinitions.TEMPERATURE; import static org.wildfly.extension.ai.AIAttributeDefinitions.TOP_P; +import static org.wildfly.extension.ai.Capabilities.OPENTELEMETRY_CAPABILITY_NAME; import static org.wildfly.extension.ai.chat.OpenAIChatLanguageModelProviderRegistrar.FREQUENCY_PENALTY; import static org.wildfly.extension.ai.chat.OpenAIChatLanguageModelProviderRegistrar.ORGANIZATION_ID; import static org.wildfly.extension.ai.chat.OpenAIChatLanguageModelProviderRegistrar.PRESENCE_PENALTY; @@ -24,8 +25,10 @@ import org.jboss.as.controller.OperationFailedException; import org.jboss.dmr.ModelNode; import org.wildfly.extension.ai.AIAttributeDefinitions; + import org.wildfly.extension.ai.injection.chat.WildFlyChatModelConfig; import org.wildfly.extension.ai.injection.chat.WildFlyOpenAiChatModelConfig; + import org.wildfly.service.capture.ValueRegistry; import org.wildfly.subsystem.service.ResourceServiceInstaller; @@ -54,6 +57,7 @@ public ResourceServiceInstaller configure(OperationContext context, ModelNode mo Double temperature = TEMPERATURE.resolveModelAttribute(context, model).asDoubleOrNull(); Double topP = TOP_P.resolveModelAttribute(context, model).asDoubleOrNull(); boolean isJson = AIAttributeDefinitions.ResponseFormat.isJson(RESPONSE_FORMAT.resolveModelAttribute(context, model).asStringOrNull()); + boolean isObservable= context.getCapabilityServiceSupport().hasCapability(OPENTELEMETRY_CAPABILITY_NAME); Supplier factory = new Supplier<>() { @Override public WildFlyChatModelConfig get() { @@ -61,7 +65,6 @@ public WildFlyChatModelConfig get() { .apiKey(key) .baseUrl(baseUrl) .frequencyPenalty(frequencyPenalty) - .setJson(isJson) .logRequests(logRequests) .logResponses(logResponses) .maxTokens(maxToken) @@ -69,6 +72,9 @@ public WildFlyChatModelConfig get() { .organizationId(organizationId) .presencePenalty(presencePenalty) .seed(seed) + .setJson(isJson) + .setObservable(isObservable) + .setStreaming(false) .temperature(temperature) .timeout(connectTimeOut) .topP(topP); diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDependencyProcessor.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDependencyProcessor.java index bab042f..50b6e4b 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDependencyProcessor.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDependencyProcessor.java @@ -40,7 +40,6 @@ public class AIDependencyProcessor implements DeploymentUnitProcessor { "dev.langchain4j.weaviate", "dev.langchain4j.web-search-engines" }; - public static final String[] EXPORTED_MODULES = { "dev.langchain4j", "io.smallrye.llm", @@ -93,7 +92,7 @@ public void deploy(DeploymentPhaseContext deploymentPhaseContext) throws Deploym String embeddingStoreName = annotation.value().asString(); ROOT_LOGGER.debug("We need the EmbeddingStore called " + embeddingStoreName); requiredEmbeddingStores.add(embeddingStoreName); - }else if (dev.langchain4j.rag.content.retriever.ContentRetriever.class.isAssignableFrom(fieldClass)) { + } else if (dev.langchain4j.rag.content.retriever.ContentRetriever.class.isAssignableFrom(fieldClass)) { ROOT_LOGGER.debug("We need the ContentRetriever in the class " + field.declaringClass()); String contentRetrieverName = annotation.value().asString(); ROOT_LOGGER.debug("We need the ContentRetriever called " + contentRetrieverName); diff --git a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDeploymentProcessor.java b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDeploymentProcessor.java index 4f24f09..09287b6 100644 --- a/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDeploymentProcessor.java +++ b/wildfly-ai/subsystem/src/main/java/org/wildfly/extension/ai/deployment/AIDeploymentProcessor.java @@ -6,6 +6,7 @@ import static org.jboss.as.weld.Capabilities.WELD_CAPABILITY_NAME; import static org.wildfly.extension.ai.AILogger.ROOT_LOGGER; +import static org.wildfly.extension.ai.Capabilities.OPENTELEMETRY_CAPABILITY_NAME; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.rag.content.retriever.ContentRetriever; @@ -22,10 +23,7 @@ import org.wildfly.extension.ai.injection.WildFlyBeanRegistry; import org.wildfly.extension.ai.injection.chat.WildFlyChatModelConfig; -/** - * - * @author Emmanuel Hugonnet (c) 2024 Red Hat, Inc. - */ + public class AIDeploymentProcessor implements DeploymentUnitProcessor { @Override @@ -38,6 +36,11 @@ public void deploy(DeploymentPhaseContext deploymentPhaseContext) throws Deploym ROOT_LOGGER.cdiRequired(); } List requiredChatModels = deploymentUnit.getAttachmentList(AIAttachements.CHAT_MODELS); + if (! support.hasCapability(OPENTELEMETRY_CAPABILITY_NAME)) { + ROOT_LOGGER.info("No opentelemetry support available"); + } else { + ROOT_LOGGER.info("Time to instrument our LLM !!!!!"); + } List chatLanguageModelNames = deploymentUnit.getAttachmentList(AIAttachements.CHAT_MODEL_KEYS); List requiredEmbeddingModels = deploymentUnit.getAttachmentList(AIAttachements.EMBEDDING_MODELS); List requiredEmbeddingModelNames = deploymentUnit.getAttachmentList(AIAttachements.EMBEDDING_MODEL_KEYS);