From fa2ac45283df196bf93a335194eba9627a33e23a Mon Sep 17 00:00:00 2001
From: Mike Donnalley <mdonnalley@salesforce.com>
Date: Wed, 29 Jan 2025 10:16:50 -0700
Subject: [PATCH 1/3] feat: use spec for tests

---
 command-snapshot.json                         |  18 +--
 messages/agent.generate.test-cases.md         |  11 --
 messages/agent.generate.test-definition.md    |  13 --
 messages/agent.generate.test-spec.md          |  11 ++
 messages/agent.test.create.md                 |  27 ++++
 package.json                                  |   4 +
 schemas/agent-test-create.json                |  19 +++
 .../agent/generate/test-definition.ts         |  93 -------------
 .../generate/{test-cases.ts => test-spec.ts}  | 124 +++++++++---------
 src/commands/agent/test/create.ts             | 123 +++++++++++++++++
 .../agent/generate/test-cases.test.ts         |  94 -------------
 yarn.lock                                     |  22 +++-
 12 files changed, 276 insertions(+), 283 deletions(-)
 delete mode 100644 messages/agent.generate.test-cases.md
 delete mode 100644 messages/agent.generate.test-definition.md
 create mode 100644 messages/agent.generate.test-spec.md
 create mode 100644 messages/agent.test.create.md
 create mode 100644 schemas/agent-test-create.json
 delete mode 100644 src/commands/agent/generate/test-definition.ts
 rename src/commands/agent/generate/{test-cases.ts => test-spec.ts} (64%)
 create mode 100644 src/commands/agent/test/create.ts
 delete mode 100644 test/commands/agent/generate/test-cases.test.ts

diff --git a/command-snapshot.json b/command-snapshot.json
index f9b1c0d..02897af 100644
--- a/command-snapshot.json
+++ b/command-snapshot.json
@@ -74,15 +74,7 @@
   },
   {
     "alias": [],
-    "command": "agent:generate:test-cases",
-    "flagAliases": [],
-    "flagChars": [],
-    "flags": ["flags-dir"],
-    "plugin": "@salesforce/plugin-agent"
-  },
-  {
-    "alias": [],
-    "command": "agent:generate:test-definition",
+    "command": "agent:generate:test-spec",
     "flagAliases": [],
     "flagChars": [],
     "flags": ["flags-dir"],
@@ -104,6 +96,14 @@
     "flags": ["api-version", "flags-dir", "job-id", "json", "target-org", "use-most-recent"],
     "plugin": "@salesforce/plugin-agent"
   },
+  {
+    "alias": [],
+    "command": "agent:test:create",
+    "flagAliases": [],
+    "flagChars": ["o", "p", "s"],
+    "flags": ["api-version", "flags-dir", "json", "no-prompt", "preview", "spec", "target-org"],
+    "plugin": "@salesforce/plugin-agent"
+  },
   {
     "alias": [],
     "command": "agent:test:list",
diff --git a/messages/agent.generate.test-cases.md b/messages/agent.generate.test-cases.md
deleted file mode 100644
index 9299e00..0000000
--- a/messages/agent.generate.test-cases.md
+++ /dev/null
@@ -1,11 +0,0 @@
-# summary
-
-Interactively generate a new Set of AI Evaluation test cases.
-
-# description
-
-Answer the prompts to generate an AiEvaluationTestSet that will be written to a file. You can then run "sf agent generate definition" to generate the AiEvaluationDefinition that can be used to evaluate the test set.
-
-# examples
-
-- <%= config.bin %> <%= command.id %>
diff --git a/messages/agent.generate.test-definition.md b/messages/agent.generate.test-definition.md
deleted file mode 100644
index 6724286..0000000
--- a/messages/agent.generate.test-definition.md
+++ /dev/null
@@ -1,13 +0,0 @@
-# summary
-
-Interactively generate a new AI Evaluation Test Definition.
-
-# description
-
-This command will prompt you for the necessary information to create a new AiEvaluationDefinition. The definition will be saved to the `aiEvaluationDefinitions` directory in the project.
-
-You must have the `Bots` and `AiEvaluationTestSets` metadata types present in your project to use this command.
-
-# examples
-
-- <%= config.bin %> <%= command.id %>
diff --git a/messages/agent.generate.test-spec.md b/messages/agent.generate.test-spec.md
new file mode 100644
index 0000000..01537be
--- /dev/null
+++ b/messages/agent.generate.test-spec.md
@@ -0,0 +1,11 @@
+# summary
+
+Interactively generate a specification file for a AI evaluation test.
+
+# description
+
+This command will prompt you for the necessary information to create a new spec file (in yaml format). You can then create a new AI evaluation using "sf agent test create --spec <spec-file>".
+
+# examples
+
+- <%= config.bin %> <%= command.id %>
diff --git a/messages/agent.test.create.md b/messages/agent.test.create.md
new file mode 100644
index 0000000..35e0c1d
--- /dev/null
+++ b/messages/agent.test.create.md
@@ -0,0 +1,27 @@
+# summary
+
+Summary of a command.
+
+# description
+
+More information about a command. Don't repeat the summary.
+
+# flags.spec.summary
+
+Description of a flag.
+
+# flags.spec.description
+
+More information about a flag. Don't repeat the summary.
+
+# flags.preview.summary
+
+Preview the test metadata without deploying to your org.
+
+# flags.no-prompt.summary
+
+Don't prompt for confirmation when overwriting an existing test.
+
+# examples
+
+- <%= config.bin %> <%= command.id %>
diff --git a/package.json b/package.json
index 7a5a25a..7d52e94 100644
--- a/package.json
+++ b/package.json
@@ -13,6 +13,7 @@
     "@salesforce/core": "^8.8.0",
     "@salesforce/kit": "^3.2.1",
     "@salesforce/sf-plugins-core": "^12.1.0",
+    "@salesforce/source-deploy-retrieve": "^12.14.0",
     "ansis": "^3.3.2",
     "fast-xml-parser": "^4.5.1",
     "ink": "^5.0.1",
@@ -77,6 +78,9 @@
           },
           "generate": {
             "external": true
+          },
+          "create": {
+            "external": true
           }
         }
       }
diff --git a/schemas/agent-test-create.json b/schemas/agent-test-create.json
new file mode 100644
index 0000000..e9d1333
--- /dev/null
+++ b/schemas/agent-test-create.json
@@ -0,0 +1,19 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$ref": "#/definitions/AgentTestCreateResult",
+  "definitions": {
+    "AgentTestCreateResult": {
+      "type": "object",
+      "properties": {
+        "path": {
+          "type": "string"
+        },
+        "contents": {
+          "type": "string"
+        }
+      },
+      "required": ["path", "contents"],
+      "additionalProperties": false
+    }
+  }
+}
diff --git a/src/commands/agent/generate/test-definition.ts b/src/commands/agent/generate/test-definition.ts
deleted file mode 100644
index d24391a..0000000
--- a/src/commands/agent/generate/test-definition.ts
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2024, salesforce.com, inc.
- * All rights reserved.
- * Licensed under the BSD 3-Clause license.
- * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
- */
-import { dirname, join } from 'node:path';
-import { mkdir, writeFile } from 'node:fs/promises';
-import { SfCommand } from '@salesforce/sf-plugins-core';
-import { Messages, SfError } from '@salesforce/core';
-import { input, select } from '@inquirer/prompts';
-import { theme } from '../../../inquirer-theme.js';
-import { readDir } from '../../../read-dir.js';
-
-Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
-const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.test-definition');
-
-export default class AgentGenerateTestDefinition extends SfCommand<void> {
-  public static readonly summary = messages.getMessage('summary');
-  public static readonly description = messages.getMessage('description');
-  public static readonly examples = messages.getMessages('examples');
-  public static readonly enableJsonFlag = false;
-  public static readonly state = 'beta';
-
-  public async run(): Promise<void> {
-    const testSetDir = join('force-app', 'main', 'default', 'aiEvaluationTestSets');
-    const testSets = (await readDir(testSetDir)).map((testSet) => testSet.replace('.aiEvaluationTestSet-meta.xml', ''));
-    if (testSets.length === 0) {
-      throw new SfError(`No test sets found in ${testSetDir}`, 'NoTestSetsFoundError', [
-        'Run the "sf agent generate test-cases" command to create a test set',
-      ]);
-    }
-
-    const botsDir = join('force-app', 'main', 'default', 'bots');
-    const bots = await readDir(botsDir);
-    if (bots.length === 0) {
-      throw new SfError(`No agents found in ${botsDir}`, 'NoAgentsFoundError');
-    }
-
-    const subjectType = await select<string>({
-      message: 'What are you testing',
-      choices: ['AGENT'],
-      theme,
-    });
-
-    const agent = await select<string>({
-      message: 'Select the Agent to test',
-      choices: bots,
-      theme,
-    });
-
-    const testSet = await select<string>({
-      message: 'Select the test set to use',
-      choices: testSets,
-      theme,
-    });
-
-    const name = await input({
-      message: 'Enter a name for the test definition',
-      validate: (i: string): string | boolean => (i.length > 0 ? true : 'Name cannot be empty'),
-      theme,
-    });
-
-    const description = await input({
-      message: 'Enter a description for test definition (optional)',
-      theme,
-    });
-
-    const xml = `<?xml version="1.0" encoding="UTF-8"?>
-<AiEvaluationDefinition xmlns="http://soap.sforce.com/2006/04/metadata">
-    ${description ? `<description>${description}</description>` : ''}
-    <name>${name}</name>
-    <subjectType>${subjectType}</subjectType>
-    <subjectName>${agent}</subjectName>
-    <testSetName>${testSet}</testSetName>
-</AiEvaluationDefinition>`;
-
-    // remove all empty lines
-    const cleanedXml = xml.replace(/^\s*[\r\n]/gm, '');
-
-    const definitionPath = join(
-      'force-app',
-      'main',
-      'default',
-      'aiEvaluationDefinitions',
-      `${name}.aiEvaluationDefinition-meta.xml`
-    );
-    await mkdir(dirname(definitionPath), { recursive: true });
-    this.log();
-    this.log(`Created ${definitionPath}`);
-    await writeFile(definitionPath, cleanedXml);
-  }
-}
diff --git a/src/commands/agent/generate/test-cases.ts b/src/commands/agent/generate/test-spec.ts
similarity index 64%
rename from src/commands/agent/generate/test-cases.ts
rename to src/commands/agent/generate/test-spec.ts
index 1329011..bc1d8df 100644
--- a/src/commands/agent/generate/test-cases.ts
+++ b/src/commands/agent/generate/test-spec.ts
@@ -4,16 +4,18 @@
  * Licensed under the BSD 3-Clause license.
  * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
  */
-import { dirname, join } from 'node:path';
-import { mkdir, readFile, writeFile } from 'node:fs/promises';
+import { join } from 'node:path';
+import { readFile } from 'node:fs/promises';
 import { SfCommand } from '@salesforce/sf-plugins-core';
-import { Messages } from '@salesforce/core';
+import { Messages, SfError } from '@salesforce/core';
+import { generateTestSpec } from '@salesforce/agents';
 import { select, input, confirm, checkbox } from '@inquirer/prompts';
 import { XMLParser } from 'fast-xml-parser';
 import { theme } from '../../../inquirer-theme.js';
 import { readDir } from '../../../read-dir.js';
+
 Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
-const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.test-cases');
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.generate.test-spec');
 
 // TODO: add these back once we refine the regex
 // export const FORTY_CHAR_API_NAME_REGEX =
@@ -21,18 +23,18 @@ const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.genera
 // export const EIGHTY_CHAR_API_NAME_REGEX =
 //   /^(?=.{1,97}$)[a-zA-Z]([a-zA-Z0-9]|_(?!_)){0,14}(__[a-zA-Z]([a-zA-Z0-9]|_(?!_)){0,79})?$/;
 
-export type TestSetInputs = {
+type TestCase = {
   utterance: string;
-  actionSequenceExpectedValue: string[];
-  botRatingExpectedValue: string;
-  topicSequenceExpectedValue: string;
+  expectedActions: string[];
+  expectedTopic: string;
+  expectedOutcome: string;
 };
 
 function castArray<T>(value: T | T[]): T[] {
   return Array.isArray(value) ? value : [value];
 }
 
-async function promptForTestCase(genAiPlugins: Record<string, string>): Promise<TestSetInputs> {
+async function promptForTestCase(genAiPlugins: Record<string, string>): Promise<TestCase> {
   const utterance = await input({
     message: 'Utterance',
     validate: (d: string): boolean | string => d.length > 0 || 'utterance cannot be empty',
@@ -72,16 +74,16 @@ async function promptForTestCase(genAiPlugins: Record<string, string>): Promise<
       theme,
     });
 
-  const topicSequenceExpectedValue = await select<string>({
+  const expectedTopic = await select<string>({
     message: 'Expected topic',
     choices: [...topics, customKey],
     theme,
   });
 
-  if (topicSequenceExpectedValue === customKey) {
+  if (expectedTopic === customKey) {
     return {
       utterance,
-      topicSequenceExpectedValue: await input({
+      expectedTopic: await input({
         message: 'Expected topic',
         validate: (d: string): boolean | string => {
           if (!d.length) {
@@ -92,67 +94,40 @@ async function promptForTestCase(genAiPlugins: Record<string, string>): Promise<
         theme,
       }),
       // If the user selects OTHER for the topic, then we don't have a genAiPlugin to get actions from so we ask for them for custom input
-      actionSequenceExpectedValue: await askForOtherActions(),
-      botRatingExpectedValue: await askForBotRating(),
+      expectedActions: await askForOtherActions(),
+      expectedOutcome: await askForBotRating(),
     };
   }
 
-  const genAiPluginXml = await readFile(genAiPlugins[topicSequenceExpectedValue], 'utf-8');
+  const genAiPluginXml = await readFile(genAiPlugins[expectedTopic], 'utf-8');
   const parser = new XMLParser();
   const parsed = parser.parse(genAiPluginXml) as { GenAiPlugin: { genAiFunctions: Array<{ functionName: string }> } };
   const actions = castArray(parsed.GenAiPlugin.genAiFunctions).map((f) => f.functionName);
 
-  let actionSequenceExpectedValue = await checkbox<string>({
+  let expectedActions = await checkbox<string>({
     message: 'Expected action(s)',
     choices: [...actions, customKey],
     theme,
     required: true,
   });
 
-  if (actionSequenceExpectedValue.includes(customKey)) {
+  if (expectedActions.includes(customKey)) {
     const additional = await askForOtherActions();
 
-    actionSequenceExpectedValue = [...actionSequenceExpectedValue.filter((a) => a !== customKey), ...additional];
+    expectedActions = [...expectedActions.filter((a) => a !== customKey), ...additional];
   }
 
-  const botRatingExpectedValue = await askForBotRating();
+  const expectedOutcome = await askForBotRating();
 
   return {
     utterance,
-    actionSequenceExpectedValue,
-    botRatingExpectedValue,
-    topicSequenceExpectedValue,
+    expectedActions,
+    expectedOutcome,
+    expectedTopic,
   };
 }
 
-export function constructTestSetXML(testCases: TestSetInputs[]): string {
-  const tab = '  ';
-  let xml = `<?xml version="1.0" encoding="UTF-8"?>\n<AiEvaluationTestSet>\n${tab}<subjectType>AGENT</subjectType>\n`;
-  testCases.forEach((testCase, i) => {
-    xml += `  <testCase>
-    <number>${i + 1}</number>
-    <inputs>
-      <utterance>${testCase.utterance}</utterance>
-    </inputs>
-    <expectation>
-      <name>topic_sequence_match</name>
-      <expectedValue>${testCase.topicSequenceExpectedValue}</expectedValue>
-    </expectation>
-    <expectation>
-      <name>action_sequence_match</name>
-      <expectedValue>${`[${testCase.actionSequenceExpectedValue.map((v) => `"${v}"`).join(',')}]`}</expectedValue>
-    </expectation>
-    <expectation>
-      <name>bot_response_rating</name>
-      <expectedValue>${testCase.botRatingExpectedValue}</expectedValue>
-    </expectation>
-  </testCase>\n`;
-  });
-  xml += '</AiEvaluationTestSet>';
-  return xml;
-}
-
-export default class AgentGenerateTestCases extends SfCommand<void> {
+export default class AgentGenerateTestSpec extends SfCommand<void> {
   public static readonly summary = messages.getMessage('summary');
   public static readonly description = messages.getMessage('description');
   public static readonly examples = messages.getMessages('examples');
@@ -160,9 +135,26 @@ export default class AgentGenerateTestCases extends SfCommand<void> {
   public static readonly state = 'beta';
 
   public async run(): Promise<void> {
-    const testSetName = await input({
-      message: 'What is the name of this set of test cases',
+    const botsDir = join('force-app', 'main', 'default', 'bots');
+    const bots = await readDir(botsDir);
+    if (bots.length === 0) {
+      throw new SfError(`No agents found in ${botsDir}`, 'NoAgentsFoundError');
+    }
+
+    const subjectType = await select<string>({
+      message: 'What are you testing',
+      choices: ['AGENT'],
+      theme,
+    });
+
+    const subjectName = await select<string>({
+      message: 'Select the Agent to test',
+      choices: bots,
+      theme,
+    });
 
+    const name = await input({
+      message: 'Enter a name for the test definition',
       validate(d: string): boolean | string {
         // ensure that it's not empty
         if (!d.length) {
@@ -178,6 +170,12 @@ export default class AgentGenerateTestCases extends SfCommand<void> {
         // }
         // return true;
       },
+      theme,
+    });
+
+    const description = await input({
+      message: 'Enter a description for test definition (optional)',
+      theme,
     });
 
     const genAiPluginDir = join('force-app', 'main', 'default', 'genAiPlugins');
@@ -201,16 +199,18 @@ export default class AgentGenerateTestCases extends SfCommand<void> {
       })
     );
 
-    const testSetPath = join(
-      'force-app',
-      'main',
-      'default',
-      'aiEvaluationTestSets',
-      `${testSetName}.aiEvaluationTestSet-meta.xml`
-    );
-    await mkdir(dirname(testSetPath), { recursive: true });
     this.log();
-    this.log(`Created ${testSetPath}`);
-    await writeFile(testSetPath, constructTestSetXML(testCases));
+
+    await generateTestSpec(
+      {
+        name,
+        description,
+        subjectType,
+        subjectName,
+        testCases,
+      },
+      `${name}-test-spec.yaml`
+    );
+    this.log(`Created ${name}-test-spec.yaml`);
   }
 }
diff --git a/src/commands/agent/test/create.ts b/src/commands/agent/test/create.ts
new file mode 100644
index 0000000..6db3d5e
--- /dev/null
+++ b/src/commands/agent/test/create.ts
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2024, salesforce.com, inc.
+ * All rights reserved.
+ * Licensed under the BSD 3-Clause license.
+ * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+ */
+import { join } from 'node:path';
+import { SfCommand, Flags } from '@salesforce/sf-plugins-core';
+import { Lifecycle, Messages, SfError } from '@salesforce/core';
+import { AgentTester, AgentTestCreateLifecycleStages } from '@salesforce/agents';
+import { DeployResult } from '@salesforce/source-deploy-retrieve';
+import { MultiStageOutput } from '@oclif/multi-stage-output';
+import { CLIError } from '@oclif/core/errors';
+
+Messages.importMessagesDirectoryFromMetaUrl(import.meta.url);
+const messages = Messages.loadMessages('@salesforce/plugin-agent', 'agent.test.create');
+
+export type AgentTestCreateResult = {
+  path: string;
+  contents: string;
+};
+
+export default class AgentTestCreate extends SfCommand<AgentTestCreateResult> {
+  public static readonly summary = messages.getMessage('summary');
+  public static readonly description = messages.getMessage('description');
+  public static readonly examples = messages.getMessages('examples');
+  public static readonly state = 'beta';
+
+  public static readonly flags = {
+    spec: Flags.file({
+      summary: messages.getMessage('flags.spec.summary'),
+      description: messages.getMessage('flags.spec.description'),
+      char: 's',
+      required: true,
+      exists: true,
+    }),
+    'target-org': Flags.requiredOrg(),
+    'api-version': Flags.orgApiVersion(),
+    preview: Flags.boolean({
+      summary: messages.getMessage('flags.preview.summary'),
+    }),
+    'no-prompt': Flags.boolean({
+      summary: messages.getMessage('flags.no-prompt.summary'),
+      char: 'p',
+    }),
+  };
+  private mso?: MultiStageOutput<{ path: string }>;
+
+  public async run(): Promise<AgentTestCreateResult> {
+    const { flags } = await this.parse(AgentTestCreate);
+    const agentTester = new AgentTester(flags['target-org'].getConnection(flags['api-version']));
+
+    const lifecycle = Lifecycle.getInstance();
+
+    lifecycle.on(AgentTestCreateLifecycleStages.CreatingLocalMetadata, async () => {
+      this.mso = new MultiStageOutput<{ path: string }>({
+        jsonEnabled: this.jsonEnabled(),
+        stages: Object.values(AgentTestCreateLifecycleStages),
+        title: `Creating test for ${flags.spec}`,
+      });
+      this.mso?.skipTo(AgentTestCreateLifecycleStages.CreatingLocalMetadata);
+      return Promise.resolve();
+    });
+
+    lifecycle.on(AgentTestCreateLifecycleStages.DeployingMetadata, async () =>
+      Promise.resolve(this.mso?.skipTo(AgentTestCreateLifecycleStages.DeployingMetadata))
+    );
+
+    lifecycle.on(AgentTestCreateLifecycleStages.Waiting, async () =>
+      Promise.resolve(this.mso?.skipTo(AgentTestCreateLifecycleStages.Waiting))
+    );
+
+    lifecycle.on(AgentTestCreateLifecycleStages.Done, async (result: DeployResult) => {
+      if (result.response.success) {
+        this.mso?.skipTo(AgentTestCreateLifecycleStages.Done);
+        this.mso?.stop();
+      } else {
+        this.mso?.error();
+        this.mso?.stop();
+      }
+
+      return Promise.resolve();
+    });
+
+    const confirmationCallback = flags['no-prompt']
+      ? async (): Promise<boolean> => Promise.resolve(true)
+      : async (spec: { name: string }): Promise<boolean> =>
+          this.confirm({
+            message: `An AiEvaluationDefinition with the name ${spec.name} already exists in the org. Do you want to overwrite it?`,
+            defaultAnswer: false,
+          });
+
+    const { path, contents } = await agentTester.create(flags.spec, {
+      outputDir: join('force-app', 'main', 'default', 'aiEvaluationDefinitions'),
+      preview: flags.preview,
+      confirmationCallback,
+    });
+
+    if (flags.preview) {
+      this.mso?.skipTo(AgentTestCreateLifecycleStages.Done);
+      this.mso?.stop();
+      this.log(`Preview of AiEvaluationDefinition created at ${path}`);
+      this.log();
+      this.log(contents);
+    } else {
+      this.log(
+        `AiEvaluationDefinition created at ${path} and deployed to ${
+          flags['target-org'].getUsername() ?? flags['target-org'].getOrgId()
+        }`
+      );
+    }
+
+    return {
+      path,
+      contents,
+    };
+  }
+
+  protected catch(error: Error | SfError | CLIError): Promise<never> {
+    this.mso?.error();
+    return super.catch(error);
+  }
+}
diff --git a/test/commands/agent/generate/test-cases.test.ts b/test/commands/agent/generate/test-cases.test.ts
deleted file mode 100644
index 6e53176..0000000
--- a/test/commands/agent/generate/test-cases.test.ts
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2023, salesforce.com, inc.
- * All rights reserved.
- * Licensed under the BSD 3-Clause license.
- * For full license text, see LICENSE.txt file in the repo root or https://opensource.org/licenses/BSD-3-Clause
- */
-import { expect } from 'chai';
-import { type TestSetInputs, constructTestSetXML } from '../../../../src/commands/agent/generate/test-cases.js';
-
-describe('constructTestSetXML', () => {
-  it('should return a valid test set XML', () => {
-    const testCases = [
-      {
-        utterance: 'hello',
-        actionSequenceExpectedValue: ['foo', 'bar'],
-        botRatingExpectedValue: 'baz',
-        topicSequenceExpectedValue: 'qux',
-      },
-      {
-        utterance: 'goodbye',
-        actionSequenceExpectedValue: ['foo', 'bar'],
-        botRatingExpectedValue: 'baz',
-        topicSequenceExpectedValue: 'qux',
-      },
-      {
-        utterance: 'how are you',
-        actionSequenceExpectedValue: ['foo', 'bar'],
-        botRatingExpectedValue: 'baz',
-        topicSequenceExpectedValue: 'qux',
-      },
-    ] satisfies TestSetInputs[];
-
-    const xml = constructTestSetXML(testCases);
-
-    expect(xml).to.equal(`<?xml version="1.0" encoding="UTF-8"?>
-<AiEvaluationTestSet>
-  <subjectType>AGENT</subjectType>
-  <testCase>
-    <number>1</number>
-    <inputs>
-      <utterance>hello</utterance>
-    </inputs>
-    <expectation>
-      <name>topic_sequence_match</name>
-      <expectedValue>qux</expectedValue>
-    </expectation>
-    <expectation>
-      <name>action_sequence_match</name>
-      <expectedValue>["foo","bar"]</expectedValue>
-    </expectation>
-    <expectation>
-      <name>bot_response_rating</name>
-      <expectedValue>baz</expectedValue>
-    </expectation>
-  </testCase>
-  <testCase>
-    <number>2</number>
-    <inputs>
-      <utterance>goodbye</utterance>
-    </inputs>
-    <expectation>
-      <name>topic_sequence_match</name>
-      <expectedValue>qux</expectedValue>
-    </expectation>
-    <expectation>
-      <name>action_sequence_match</name>
-      <expectedValue>["foo","bar"]</expectedValue>
-    </expectation>
-    <expectation>
-      <name>bot_response_rating</name>
-      <expectedValue>baz</expectedValue>
-    </expectation>
-  </testCase>
-  <testCase>
-    <number>3</number>
-    <inputs>
-      <utterance>how are you</utterance>
-    </inputs>
-    <expectation>
-      <name>topic_sequence_match</name>
-      <expectedValue>qux</expectedValue>
-    </expectation>
-    <expectation>
-      <name>action_sequence_match</name>
-      <expectedValue>["foo","bar"]</expectedValue>
-    </expectation>
-    <expectation>
-      <name>bot_response_rating</name>
-      <expectedValue>baz</expectedValue>
-    </expectation>
-  </testCase>
-</AiEvaluationTestSet>`);
-  });
-});
diff --git a/yarn.lock b/yarn.lock
index 767036d..8ccd7f2 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1636,6 +1636,26 @@
     minimatch "^9.0.5"
     proxy-agent "^6.4.0"
 
+"@salesforce/source-deploy-retrieve@^12.14.0":
+  version "12.14.0"
+  resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.14.0.tgz#04036f76301071b2188c92f70d77a138bc0d72cf"
+  integrity sha512-3WOQCUY0a8cNYx5/NVtaubLEgxo/vHS/7k4Kw/FEZY3ysALpPCqWk2psJQP56xsp/SDAI3lV0VpMZadrL+ryMw==
+  dependencies:
+    "@salesforce/core" "^8.8.2"
+    "@salesforce/kit" "^3.2.3"
+    "@salesforce/ts-types" "^2.0.12"
+    fast-levenshtein "^3.0.0"
+    fast-xml-parser "^4.5.1"
+    got "^11.8.6"
+    graceful-fs "^4.2.11"
+    ignore "^5.3.2"
+    isbinaryfile "^5.0.2"
+    jszip "^3.10.1"
+    mime "2.6.0"
+    minimatch "^9.0.5"
+    proxy-agent "^6.4.0"
+    yaml "^2.6.1"
+
 "@salesforce/ts-types@^2.0.10", "@salesforce/ts-types@^2.0.11", "@salesforce/ts-types@^2.0.12":
   version "2.0.12"
   resolved "https://registry.yarnpkg.com/@salesforce/ts-types/-/ts-types-2.0.12.tgz#60420622812a7ec7e46d220667bc29b42dc247ff"
@@ -8275,7 +8295,7 @@ yallist@^4.0.0:
   resolved "https://registry.yarnpkg.com/yallist/-/yallist-4.0.0.tgz#9bb92790d9c0effec63be73519e11a35019a3a72"
   integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==
 
-yaml@^2.4.5, yaml@^2.7.0:
+yaml@^2.4.5, yaml@^2.6.1, yaml@^2.7.0:
   version "2.7.0"
   resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.7.0.tgz#aef9bb617a64c937a9a748803786ad8d3ffe1e98"
   integrity sha512-+hSoy/QHluxmC9kCIJyL/uyFmLmc+e5CFR5Wa+bpIhIj85LVb9ZH2nVnqrHoSvKogwODv0ClqZkmiSSaIH5LTA==

From 3f6c144b787887ca43d281b981016b59b08c5bfd Mon Sep 17 00:00:00 2001
From: Mike Donnalley <mdonnalley@salesforce.com>
Date: Wed, 29 Jan 2025 12:28:05 -0700
Subject: [PATCH 2/3] fix: update api responses

---
 schemas/agent-test-results.json               | 31 ++++++++---------
 .../1.json                                    |  2 +-
 .../2.json                                    |  2 +-
 .../3.json                                    |  2 +-
 ...tions_runs_4KBSM000000003F4AQ_results.json | 34 +++++++++----------
 5 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/schemas/agent-test-results.json b/schemas/agent-test-results.json
index 3e20a58..cf9e2f4 100644
--- a/schemas/agent-test-results.json
+++ b/schemas/agent-test-results.json
@@ -45,7 +45,7 @@
     },
     "TestStatus": {
       "type": "string",
-      "enum": ["New", "InProgress", "Completed", "Error"]
+      "enum": ["NEW", "IN_PROGRESS", "COMPLETED", "ERROR", "TERMINATED"]
     },
     "TestCaseResult": {
       "type": "object",
@@ -53,22 +53,25 @@
         "status": {
           "$ref": "#/definitions/TestStatus"
         },
-        "utterance": {
-          "type": "string"
-        },
         "startTime": {
           "type": "string"
         },
         "endTime": {
           "type": "string"
         },
+        "inputs": {
+          "type": "object",
+          "properties": {
+            "utterance": {
+              "type": "string"
+            }
+          },
+          "required": ["utterance"],
+          "additionalProperties": false
+        },
         "generatedData": {
           "type": "object",
           "properties": {
-            "type": {
-              "type": "string",
-              "const": "AGENT"
-            },
             "actionsSequence": {
               "type": "array",
               "items": {
@@ -80,18 +83,12 @@
             },
             "topic": {
               "type": "string"
-            },
-            "inputTokensCount": {
-              "type": "string"
-            },
-            "outputTokensCount": {
-              "type": "string"
             }
           },
-          "required": ["type", "actionsSequence", "outcome", "topic", "inputTokensCount", "outputTokensCount"],
+          "required": ["actionsSequence", "outcome", "topic"],
           "additionalProperties": false
         },
-        "expectationResults": {
+        "testResults": {
           "type": "array",
           "items": {
             "type": "object",
@@ -150,7 +147,7 @@
           }
         }
       },
-      "required": ["status", "utterance", "startTime", "generatedData", "expectationResults"],
+      "required": ["status", "startTime", "inputs", "generatedData", "testResults"],
       "additionalProperties": false
     }
   }
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json
index 58716da..daf2bbc 100644
--- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/1.json
@@ -1,4 +1,4 @@
 {
-  "status": "InProgress",
+  "status": "IN_PROGRESS",
   "startTime": "2024-11-13T15:00:00.000Z"
 }
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json
index 58716da..daf2bbc 100644
--- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/2.json
@@ -1,4 +1,4 @@
 {
-  "status": "InProgress",
+  "status": "IN_PROGRESS",
   "startTime": "2024-11-13T15:00:00.000Z"
 }
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json
index 88bd062..d4f6503 100644
--- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ/3.json
@@ -1,4 +1,4 @@
 {
-  "status": "Completed",
+  "status": "COMPLETED",
   "startTime": "2024-11-13T15:00:00.000Z"
 }
diff --git a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json
index 704b480..27cd345 100644
--- a/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json
+++ b/test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_results.json
@@ -1,5 +1,5 @@
 {
-  "status": "Completed",
+  "status": "COMPLETED",
   "startTime": "2024-11-28T12:00:00Z",
   "endTime": "2024-11-28T12:00:48.56Z",
   "errorMessage": null,
@@ -9,20 +9,20 @@
     "testCases": [
       {
         "status": "COMPLETED",
-        "utterance": "Summarize account Acme",
+        "inputs": {
+          "utterance": "Summarize account Acme"
+        },
         "startTime": "2024-11-28T12:00:10Z",
         "endTime": "2024-11-28T12:00:20Z",
         "generatedData": {
           "type": "AGENT",
           "actionsSequence": ["Action1", "Action2"],
           "outcome": "Success",
-          "topic": "Mathematics",
-          "inputTokensCount": 50,
-          "outputTokensCount": 55
+          "topic": "Mathematics"
         },
-        "expectationResults": [
+        "testResults": [
           {
-            "name": "topic_sequence_match",
+            "name": "expectedTopic",
             "actualValue": "GeneralCRM",
             "expectedValue": "GeneralCRM",
             "score": 1.0,
@@ -36,7 +36,7 @@
             "errorMessage": null
           },
           {
-            "name": "action_sequence_match",
+            "name": "expectedActions",
             "actualValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]",
             "expectedValue": "[\"IdentifyRecordByName\",\"SummarizeRecord\"]",
             "score": 1.0,
@@ -50,7 +50,7 @@
             "errorMessage": null
           },
           {
-            "name": "bot_response_rating",
+            "name": "expectedOutcome",
             "actualValue": "Here is the summary of the account Acme. How else can I assist you? Acme is a customer since 2019. They have 3 open opportunities and 2 open cases.",
             "expectedValue": "Summary of account details are shown",
             "score": 0.9,
@@ -68,19 +68,19 @@
       {
         "status": "COMPLETED",
         "startTime": "2024-11-28T12:00:30Z",
-        "utterance": "Summarize the open cases and Activities of acme from sep to nov 2024",
+        "inputs": {
+          "utterance": "Summarize the open cases and Activities of acme from sep to nov 2024"
+        },
         "endTime": "2024-11-28T12:00:40Z",
         "generatedData": {
           "type": "AGENT",
           "actionsSequence": ["Action3", "Action4"],
           "outcome": "Failure",
-          "topic": "Physics",
-          "inputTokensCount": 60,
-          "outputTokensCount": 50
+          "topic": "Physics"
         },
-        "expectationResults": [
+        "testResults": [
           {
-            "name": "topic_sequence_match",
+            "name": "expectedTopic",
             "actualValue": "GeneralCRM",
             "expectedValue": "GeneralCRM",
             "score": 1,
@@ -94,7 +94,7 @@
             "errorMessage": null
           },
           {
-            "name": "action_sequence_match",
+            "name": "expectedActions",
             "actualValue": "[\"IdentifyRecordByName\",\"QueryRecords\"]",
             "expectedValue": "[\"IdentifyRecordByName\",\"QueryRecords\",\"GetActivitiesTimeline\"]",
             "score": 0.5,
@@ -108,7 +108,7 @@
             "errorMessage": "Actual response does not match the expected response"
           },
           {
-            "name": "bot_response_rating",
+            "name": "expectedOutcome",
             "actualValue": "It looks like I am unable to find the information you are looking for due to access restrictions. How else can I assist you?",
             "expectedValue": "Summary of open cases and activities associated with timeline",
             "score": 0.1,

From e79593bd6ea7ee31a6ff9fc2ca078a4c3958045d Mon Sep 17 00:00:00 2001
From: Willie Ruemmele <willieruemmele@gmail.com>
Date: Wed, 29 Jan 2025 15:24:51 -0700
Subject: [PATCH 3/3] chore: bump agents

---
 package.json |  2 +-
 yarn.lock    | 65 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/package.json b/package.json
index 7d52e94..7b61dbe 100644
--- a/package.json
+++ b/package.json
@@ -9,7 +9,7 @@
     "@inquirer/prompts": "^7.2.0",
     "@oclif/core": "^4",
     "@oclif/multi-stage-output": "^0.7.12",
-    "@salesforce/agents": "^0.8.0",
+    "@salesforce/agents": "^0.9.0",
     "@salesforce/core": "^8.8.0",
     "@salesforce/kit": "^3.2.1",
     "@salesforce/sf-plugins-core": "^12.1.0",
diff --git a/yarn.lock b/yarn.lock
index 8ccd7f2..5a9285e 100644
--- a/yarn.lock
+++ b/yarn.lock
@@ -1462,18 +1462,19 @@
   resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33"
   integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==
 
-"@salesforce/agents@^0.8.0":
-  version "0.8.0"
-  resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.8.0.tgz#fb823e19ed1a49b895f98426a50faea4923286a4"
-  integrity sha512-m+PNYaqPoKQbTCkudJFRdgON0wBJV3kW1uBHi9IevEqCWbrBcK4JVDmsb5C9IOTBwt9wVfSWi9Dvpqh0oU75mQ==
+"@salesforce/agents@^0.9.0":
+  version "0.9.0"
+  resolved "https://registry.yarnpkg.com/@salesforce/agents/-/agents-0.9.0.tgz#a1292c7b678451d40c71c0b9e2fd657083616ad8"
+  integrity sha512-VOoJejtY+tFTUUZhx958IcuZ936hwpoua1G/1wHsJTWHg+gcpsYigSF6oF2T0SD8ZuTxzHiG6l/fopMJZiyutg==
   dependencies:
     "@salesforce/core" "^8.8.2"
     "@salesforce/kit" "^3.2.3"
     "@salesforce/sf-plugins-core" "^12.1.2"
-    "@salesforce/source-deploy-retrieve" "^12.12.3"
+    "@salesforce/source-deploy-retrieve" "^12.14.0"
     ansis "^3.9.0"
-    fast-xml-parser "^4"
+    fast-xml-parser "^4.5.1"
     nock "^13.5.6"
+    yaml "^2.7.0"
 
 "@salesforce/cli-plugins-testkit@^5.3.35":
   version "5.3.35"
@@ -1617,25 +1618,6 @@
     cli-progress "^3.12.0"
     terminal-link "^3.0.0"
 
-"@salesforce/source-deploy-retrieve@^12.12.3":
-  version "12.12.3"
-  resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.12.3.tgz#b03df07a60c55004c3b4c7ce8df3ecfd20b7742a"
-  integrity sha512-kQ78RekRvTLh5yp8eB67szRoQr64R/0PETgszxf65RRPzLTmBGs0JpkZBMx0GN95Mb6BWvOEjTYLgyezVPUXsw==
-  dependencies:
-    "@salesforce/core" "^8.8.0"
-    "@salesforce/kit" "^3.2.2"
-    "@salesforce/ts-types" "^2.0.12"
-    fast-levenshtein "^3.0.0"
-    fast-xml-parser "^4.5.1"
-    got "^11.8.6"
-    graceful-fs "^4.2.11"
-    ignore "^5.3.2"
-    isbinaryfile "^5.0.2"
-    jszip "^3.10.1"
-    mime "2.6.0"
-    minimatch "^9.0.5"
-    proxy-agent "^6.4.0"
-
 "@salesforce/source-deploy-retrieve@^12.14.0":
   version "12.14.0"
   resolved "https://registry.yarnpkg.com/@salesforce/source-deploy-retrieve/-/source-deploy-retrieve-12.14.0.tgz#04036f76301071b2188c92f70d77a138bc0d72cf"
@@ -4267,7 +4249,7 @@ fast-xml-parser@4.4.1:
   dependencies:
     strnum "^1.0.5"
 
-fast-xml-parser@^4, fast-xml-parser@^4.5.1:
+fast-xml-parser@^4.5.1:
   version "4.5.1"
   resolved "https://registry.yarnpkg.com/fast-xml-parser/-/fast-xml-parser-4.5.1.tgz#a7e665ff79b7919100a5202f23984b6150f9b31e"
   integrity sha512-y655CeyUQ+jj7KBbYMc4FG01V8ZQqjN+gDYGJ50RtfsUB8iG9AmwmwoAgeKLJdmueKKMrH1RJ7yXHTSoczdv5w==
@@ -7497,7 +7479,16 @@ stack-utils@^2.0.6:
   dependencies:
     escape-string-regexp "^2.0.0"
 
-"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
+"string-width-cjs@npm:string-width@^4.2.0":
+  version "4.2.3"
+  resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
+  integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
+  dependencies:
+    emoji-regex "^8.0.0"
+    is-fullwidth-code-point "^3.0.0"
+    strip-ansi "^6.0.1"
+
+string-width@^4.0.0, string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3:
   version "4.2.3"
   resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010"
   integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==
@@ -7592,7 +7583,14 @@ string_decoder@~1.1.1:
   dependencies:
     safe-buffer "~5.1.0"
 
-"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1:
+"strip-ansi-cjs@npm:strip-ansi@^6.0.1":
+  version "6.0.1"
+  resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
+  integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
+  dependencies:
+    ansi-regex "^5.0.1"
+
+strip-ansi@6.0.1, strip-ansi@^6.0.0, strip-ansi@^6.0.1:
   version "6.0.1"
   resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9"
   integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==
@@ -8201,7 +8199,7 @@ workerpool@^6.5.1:
   resolved "https://registry.yarnpkg.com/workerpool/-/workerpool-6.5.1.tgz#060f73b39d0caf97c6db64da004cd01b4c099544"
   integrity sha512-Fs4dNYcsdpYSAfVxhnl1L5zTksjvOJxtC5hzMNl+1t9B8hTJTdKDyZ5ju7ztgPy+ft9tBFXoOlDNiOT9WUXZlA==
 
-"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0:
+"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0":
   version "7.0.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
   integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
@@ -8219,6 +8217,15 @@ wrap-ansi@^6.2.0:
     string-width "^4.1.0"
     strip-ansi "^6.0.0"
 
+wrap-ansi@^7.0.0:
+  version "7.0.0"
+  resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43"
+  integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==
+  dependencies:
+    ansi-styles "^4.0.0"
+    string-width "^4.1.0"
+    strip-ansi "^6.0.0"
+
 wrap-ansi@^8.1.0:
   version "8.1.0"
   resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-8.1.0.tgz#56dc22368ee570face1b49819975d9b9a5ead214"