diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts
index 4e0a089f2c295..416bb8636fe67 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts
@@ -23,7 +23,7 @@ async function discoverAuth(parsedTarget: Url, log: ToolingLog) {
const response = await fetch(url);
status = response.status;
} catch (err) {
- log.debug(`${url} resulted in ${err.message}`);
+ log.error(`${url} resulted in ${err.message}`);
status = 0;
}
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts
index 31578b20afb04..606d3e62f3ce0 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts
@@ -160,7 +160,7 @@ export class KibanaClient {
});
if (ready) {
- this.log.info('Knowledge base is already installed');
+ this.log.success('Knowledge base is already installed');
return;
}
@@ -179,7 +179,7 @@ export class KibanaClient {
{ retries: 10 }
);
- this.log.info('Knowledge base installed');
+ this.log.success('Knowledge base installed');
}
async createSpaceIfNeeded() {
@@ -207,7 +207,7 @@ export class KibanaClient {
});
if (spaceExistsResponse.data.id) {
- this.log.info(`Space id ${this.spaceId} found`);
+ this.log.success(`Space id ${this.spaceId} found`);
return;
}
@@ -226,7 +226,7 @@ export class KibanaClient {
);
if (spaceCreatedResponse.status === 200) {
- this.log.info(`Created space ${this.spaceId}`);
+ this.log.success(`Created space ${this.spaceId}`);
} else {
throw new Error(
`Error creating space: ${spaceCreatedResponse.status} - ${spaceCreatedResponse.data}`
@@ -627,7 +627,7 @@ export class KibanaClient {
})
.concat({
score: errors.length === 0 ? 1 : 0,
- criterion: 'The conversation encountered errors',
+ criterion: 'The conversation did not encounter any errors',
reasoning: errors.length
? `The following errors occurred: ${errors.map((error) => error.error.message)}`
: 'No errors occurred',
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts
index a422e50b57a12..c235b830fcdd8 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts
@@ -18,7 +18,7 @@ import {
customThresholdAIAssistantLogCount,
} from '../../alert_templates/templates';
-describe('alert function', () => {
+describe('Alert function', () => {
const ruleIds: any[] = [];
before(async () => {
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts
index 4e65814dca5cc..cd8b22587e97b 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts
@@ -15,7 +15,7 @@ import { MessageRole } from '@kbn/observability-ai-assistant-plugin/common';
import { chatClient, kibanaClient, synthtraceEsClients } from '../../services';
import { apmErrorCountAIAssistant } from '../../alert_templates/templates';
-describe('apm', () => {
+describe('APM', () => {
const ruleIds: any[] = [];
before(async () => {
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts
new file mode 100644
index 0000000000000..d58f36ad82691
--- /dev/null
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts
@@ -0,0 +1,109 @@
+/*
+ * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
+ * or more contributor license agreements. Licensed under the Elastic License
+ * 2.0; you may not use this file except in compliance with the Elastic License
+ * 2.0.
+ */
+
+///
+
+import expect from '@kbn/expect/expect';
+import {
+ InstallationStatusResponse,
+ PerformInstallResponse,
+ UninstallResponse,
+} from '@kbn/product-doc-base-plugin/common/http_api/installation';
+import { RETRIEVE_DOCUMENTATION_NAME } from '../../../../server/functions/documentation';
+import { chatClient, kibanaClient, logger } from '../../services';
+
+const ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH = '/internal/product_doc_base/status';
+const ELASTIC_DOCS_INSTALL_ALL_API_PATH = '/internal/product_doc_base/install';
+const ELASTIC_DOCS_UNINSTALL_ALL_API_PATH = '/internal/product_doc_base/uninstall';
+
+describe('Retrieve documentation function', () => {
+ before(async () => {
+ let statusResponse = await kibanaClient.callKibana('get', {
+ pathname: ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH,
+ });
+
+ if (statusResponse.data.overall === 'installed') {
+ logger.success('Elastic documentation is already installed');
+ } else {
+ logger.info('Installing Elastic documentation');
+ const installResponse = await kibanaClient.callKibana('post', {
+ pathname: ELASTIC_DOCS_INSTALL_ALL_API_PATH,
+ });
+
+ if (!installResponse.data.installed) {
+ logger.error('Could not install Elastic documentation');
+ throw new Error('Documentation did not install successfully before running tests.');
+ }
+
+ statusResponse = await kibanaClient.callKibana('get', {
+ pathname: ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH,
+ });
+
+ if (statusResponse.data.overall !== 'installed') {
+ throw new Error('Documentation is not fully installed, cannot proceed with tests.');
+ } else {
+ logger.success('Installed Elastic documentation');
+ }
+ }
+ });
+
+ it('retrieves Elasticsearch documentation', async () => {
+ const prompt = 'How can I configure HTTPS in Elasticsearch?';
+ const conversation = await chatClient.complete(prompt);
+
+ const result = await chatClient.evaluate(conversation, [
+ `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about the Elastic stack`,
+ 'The assistant provides guidance on configuring HTTPS for Elasticsearch based on the retrieved documentation',
+ 'Does not hallucinate steps without first calling the retrieve_elastic_doc function',
+ 'Mentions Elasticsearch and HTTPS configuration steps consistent with the documentation',
+ ]);
+
+ expect(result.passed).to.be(true);
+ });
+
+ it('retrieves Kibana documentation', async () => {
+ const prompt = 'What is Kibana Lens and how do I create a bar chart visualization with it?';
+ const conversation = await chatClient.complete(prompt);
+
+ const result = await chatClient.evaluate(conversation, [
+ `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about Kibana`,
+ 'Accurately explains what Kibana Lens is and provides doc-based steps for creating a bar chart visualization',
+ `Does not invent unsupported instructions, answers should reference what's found in the Kibana docs`,
+ ]);
+
+ expect(result.passed).to.be(true);
+ });
+
+ it('retrieves Observability documentation', async () => {
+ const prompt =
+ 'How can I set up APM instrumentation for my Node.js service in Elastic Observability?';
+ const conversation = await chatClient.complete(prompt);
+
+ const result = await chatClient.evaluate(conversation, [
+ `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about Observability`,
+ 'Provides instructions based on the Observability docs for setting up APM instrumentation in a Node.js service',
+ 'Mentions steps like installing the APM agent, configuring it with the service name and APM Server URL, etc., as per the docs',
+ 'Does not provide hallucinated steps, should align with actual Observability documentation',
+ ]);
+
+ expect(result.passed).to.be(true);
+ });
+
+ after(async () => {
+ // Uninstall all installed documentation
+ logger.info('Uninstalling Elastic documentation');
+ const uninstallResponse = await kibanaClient.callKibana('post', {
+ pathname: ELASTIC_DOCS_UNINSTALL_ALL_API_PATH,
+ });
+
+ if (uninstallResponse.data.success) {
+ logger.success('Uninstalled Elastic documentation');
+ } else {
+ logger.error('Could not uninstall Elastic documentation');
+ }
+ });
+});
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
index 63c13ae0f4f04..e49ba46c9e734 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts
@@ -11,7 +11,7 @@ import expect from '@kbn/expect/expect';
import { MessageRole } from '@kbn/observability-ai-assistant-plugin/common';
import { chatClient, esClient } from '../../services';
-describe('elasticsearch functions', () => {
+describe('Elasticsearch functions', () => {
// using 'all' for elasticsearch scenarios enables the LLM correctly pick
// elasticsearch functions when querying for data
before(() => {
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
index 6f550d4ffcbe7..168714e4e355b 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts
@@ -77,6 +77,7 @@ describe('ES|QL query generation', () => {
},
},
});
+
await esClient.index({
index: 'packetbeat-8.11.3',
document: {
diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts
index ae749a8efcd14..6cba8f95eae24 100644
--- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts
+++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts
@@ -13,7 +13,7 @@ import { chatClient, esClient, kibanaClient } from '../../services';
const KB_INDEX = '.kibana-observability-ai-assistant-kb-*';
-describe('knowledge base', () => {
+describe('Knowledge base', () => {
describe('kb functions', () => {
it('summarizes and recalls information', async () => {
let conversation = await chatClient.complete(