diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts index 4e0a089f2c295..416bb8636fe67 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/get_service_urls.ts @@ -23,7 +23,7 @@ async function discoverAuth(parsedTarget: Url, log: ToolingLog) { const response = await fetch(url); status = response.status; } catch (err) { - log.debug(`${url} resulted in ${err.message}`); + log.error(`${url} resulted in ${err.message}`); status = 0; } diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts index 31578b20afb04..606d3e62f3ce0 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/kibana_client.ts @@ -160,7 +160,7 @@ export class KibanaClient { }); if (ready) { - this.log.info('Knowledge base is already installed'); + this.log.success('Knowledge base is already installed'); return; } @@ -179,7 +179,7 @@ export class KibanaClient { { retries: 10 } ); - this.log.info('Knowledge base installed'); + this.log.success('Knowledge base installed'); } async createSpaceIfNeeded() { @@ -207,7 +207,7 @@ export class KibanaClient { }); if (spaceExistsResponse.data.id) { - this.log.info(`Space id ${this.spaceId} found`); + this.log.success(`Space id ${this.spaceId} found`); return; } @@ -226,7 +226,7 @@ export class KibanaClient { ); if (spaceCreatedResponse.status === 200) { - this.log.info(`Created space ${this.spaceId}`); + this.log.success(`Created space ${this.spaceId}`); } else { throw new Error( `Error creating space: ${spaceCreatedResponse.status} - ${spaceCreatedResponse.data}` @@ -627,7 +627,7 @@ export class KibanaClient { }) .concat({ score: errors.length === 0 ? 1 : 0, - criterion: 'The conversation encountered errors', + criterion: 'The conversation did not encounter any errors', reasoning: errors.length ? `The following errors occurred: ${errors.map((error) => error.error.message)}` : 'No errors occurred', diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts index a422e50b57a12..c235b830fcdd8 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/alerts/index.spec.ts @@ -18,7 +18,7 @@ import { customThresholdAIAssistantLogCount, } from '../../alert_templates/templates'; -describe('alert function', () => { +describe('Alert function', () => { const ruleIds: any[] = []; before(async () => { diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts index 4e65814dca5cc..cd8b22587e97b 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/apm/index.spec.ts @@ -15,7 +15,7 @@ import { MessageRole } from '@kbn/observability-ai-assistant-plugin/common'; import { chatClient, kibanaClient, synthtraceEsClients } from '../../services'; import { apmErrorCountAIAssistant } from '../../alert_templates/templates'; -describe('apm', () => { +describe('APM', () => { const ruleIds: any[] = []; before(async () => { diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts new file mode 100644 index 0000000000000..d58f36ad82691 --- /dev/null +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/documentation/index.spec.ts @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +/// + +import expect from '@kbn/expect/expect'; +import { + InstallationStatusResponse, + PerformInstallResponse, + UninstallResponse, +} from '@kbn/product-doc-base-plugin/common/http_api/installation'; +import { RETRIEVE_DOCUMENTATION_NAME } from '../../../../server/functions/documentation'; +import { chatClient, kibanaClient, logger } from '../../services'; + +const ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH = '/internal/product_doc_base/status'; +const ELASTIC_DOCS_INSTALL_ALL_API_PATH = '/internal/product_doc_base/install'; +const ELASTIC_DOCS_UNINSTALL_ALL_API_PATH = '/internal/product_doc_base/uninstall'; + +describe('Retrieve documentation function', () => { + before(async () => { + let statusResponse = await kibanaClient.callKibana('get', { + pathname: ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH, + }); + + if (statusResponse.data.overall === 'installed') { + logger.success('Elastic documentation is already installed'); + } else { + logger.info('Installing Elastic documentation'); + const installResponse = await kibanaClient.callKibana('post', { + pathname: ELASTIC_DOCS_INSTALL_ALL_API_PATH, + }); + + if (!installResponse.data.installed) { + logger.error('Could not install Elastic documentation'); + throw new Error('Documentation did not install successfully before running tests.'); + } + + statusResponse = await kibanaClient.callKibana('get', { + pathname: ELASTIC_DOCS_INSTALLATION_STATUS_API_PATH, + }); + + if (statusResponse.data.overall !== 'installed') { + throw new Error('Documentation is not fully installed, cannot proceed with tests.'); + } else { + logger.success('Installed Elastic documentation'); + } + } + }); + + it('retrieves Elasticsearch documentation', async () => { + const prompt = 'How can I configure HTTPS in Elasticsearch?'; + const conversation = await chatClient.complete(prompt); + + const result = await chatClient.evaluate(conversation, [ + `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about the Elastic stack`, + 'The assistant provides guidance on configuring HTTPS for Elasticsearch based on the retrieved documentation', + 'Does not hallucinate steps without first calling the retrieve_elastic_doc function', + 'Mentions Elasticsearch and HTTPS configuration steps consistent with the documentation', + ]); + + expect(result.passed).to.be(true); + }); + + it('retrieves Kibana documentation', async () => { + const prompt = 'What is Kibana Lens and how do I create a bar chart visualization with it?'; + const conversation = await chatClient.complete(prompt); + + const result = await chatClient.evaluate(conversation, [ + `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about Kibana`, + 'Accurately explains what Kibana Lens is and provides doc-based steps for creating a bar chart visualization', + `Does not invent unsupported instructions, answers should reference what's found in the Kibana docs`, + ]); + + expect(result.passed).to.be(true); + }); + + it('retrieves Observability documentation', async () => { + const prompt = + 'How can I set up APM instrumentation for my Node.js service in Elastic Observability?'; + const conversation = await chatClient.complete(prompt); + + const result = await chatClient.evaluate(conversation, [ + `Uses the ${RETRIEVE_DOCUMENTATION_NAME} function before answering the question about Observability`, + 'Provides instructions based on the Observability docs for setting up APM instrumentation in a Node.js service', + 'Mentions steps like installing the APM agent, configuring it with the service name and APM Server URL, etc., as per the docs', + 'Does not provide hallucinated steps, should align with actual Observability documentation', + ]); + + expect(result.passed).to.be(true); + }); + + after(async () => { + // Uninstall all installed documentation + logger.info('Uninstalling Elastic documentation'); + const uninstallResponse = await kibanaClient.callKibana('post', { + pathname: ELASTIC_DOCS_UNINSTALL_ALL_API_PATH, + }); + + if (uninstallResponse.data.success) { + logger.success('Uninstalled Elastic documentation'); + } else { + logger.error('Could not uninstall Elastic documentation'); + } + }); +}); diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts index 63c13ae0f4f04..e49ba46c9e734 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/elasticsearch/index.spec.ts @@ -11,7 +11,7 @@ import expect from '@kbn/expect/expect'; import { MessageRole } from '@kbn/observability-ai-assistant-plugin/common'; import { chatClient, esClient } from '../../services'; -describe('elasticsearch functions', () => { +describe('Elasticsearch functions', () => { // using 'all' for elasticsearch scenarios enables the LLM correctly pick // elasticsearch functions when querying for data before(() => { diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts index 6f550d4ffcbe7..168714e4e355b 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/esql/index.spec.ts @@ -77,6 +77,7 @@ describe('ES|QL query generation', () => { }, }, }); + await esClient.index({ index: 'packetbeat-8.11.3', document: { diff --git a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts index ae749a8efcd14..6cba8f95eae24 100644 --- a/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts +++ b/x-pack/solutions/observability/plugins/observability_ai_assistant_app/scripts/evaluation/scenarios/kb/index.spec.ts @@ -13,7 +13,7 @@ import { chatClient, esClient, kibanaClient } from '../../services'; const KB_INDEX = '.kibana-observability-ai-assistant-kb-*'; -describe('knowledge base', () => { +describe('Knowledge base', () => { describe('kb functions', () => { it('summarizes and recalls information', async () => { let conversation = await chatClient.complete(