-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
refactor: comply with latest api spec
- Loading branch information
1 parent
b468c27
commit aaa5ad6
Showing
4 changed files
with
145 additions
and
149 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,47 +8,77 @@ import { Connection, Lifecycle, PollingClient, StatusResult } from '@salesforce/ | |
import { Duration } from '@salesforce/kit'; | ||
import { MaybeMock } from './maybe-mock'; | ||
|
||
type Format = 'human' | 'tap' | 'junit' | 'json'; | ||
type Format = 'human' | 'json'; | ||
|
||
type TestStatus = 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; | ||
|
||
type AgentTestStartResponse = { | ||
id: string; | ||
aiEvaluationId: string; | ||
status: TestStatus; | ||
}; | ||
|
||
type AgentTestStatusResponse = { | ||
status: 'NEW' | 'IN_PROGRESS' | 'COMPLETED' | 'ERROR'; | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorMessage?: string; | ||
}; | ||
|
||
type AgentTestDetailsResponse = { | ||
AiEvaluationSuiteDefinition: string; | ||
tests: Array<{ | ||
AiEvaluationDefinition: string; | ||
results: Array<{ | ||
test_number: number; | ||
results: Array<{ | ||
name: string; | ||
actual: string[]; | ||
is_pass: boolean; | ||
execution_time_ms: number; | ||
error?: string; | ||
}>; | ||
}>; | ||
type TestCaseResult = { | ||
status: TestStatus; | ||
number: string; | ||
startTime: string; | ||
endTime?: string; | ||
generatedData: { | ||
type: 'AGENT'; | ||
actionsSequence: string[]; | ||
outcome: 'Success' | 'Failure'; | ||
topic: string; | ||
inputTokensCount: string; | ||
outputTokensCount: string; | ||
}; | ||
expectationResults: Array<{ | ||
name: string; | ||
actualValue: string; | ||
expectedValue: string; | ||
score: number; | ||
result: 'Passed' | 'Failed'; | ||
metricLabel: 'Accuracy' | 'Precision'; | ||
metricExplainability: string; | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorCode?: string; | ||
errorMessage?: string; | ||
}>; | ||
}; | ||
|
||
type AgentTestDetailsResponse = { | ||
status: TestStatus; | ||
startTime: string; | ||
endTime?: string; | ||
errorMessage?: string; | ||
testCases: TestCaseResult[]; | ||
}; | ||
|
||
export class AgentTester { | ||
private maybeMock: MaybeMock; | ||
public constructor(connection: Connection) { | ||
this.maybeMock = new MaybeMock(connection); | ||
} | ||
|
||
public async start(suiteId: string): Promise<{ id: string }> { | ||
/** | ||
* Starts an AI evaluation run based on the provided name or ID. | ||
* | ||
* @param nameOrId - The name or ID of the AI evaluation definition. | ||
* @param type - Specifies whether the provided identifier is a 'name' or 'id'. Defaults to 'name'. If 'name' is provided, nameOrId is treated as the name of the AiEvaluationDefinition. If 'id' is provided, nameOrId is treated as the unique ID of the AiEvaluationDefinition. | ||
* @returns A promise that resolves to an object containing the ID of the started AI evaluation run. | ||
*/ | ||
public async start(nameOrId: string, type: 'name' | 'id' = 'name'): Promise<{ aiEvaluationId: string }> { | ||
const url = '/einstein/ai-evaluations/runs'; | ||
|
||
return this.maybeMock.request<AgentTestStartResponse>('POST', url, { | ||
aiEvaluationSuiteDefinition: suiteId, | ||
[type === 'name' ? 'aiEvaluationDefinitionName' : 'aiEvaluationDefinitionVersionId']: nameOrId, | ||
}); | ||
} | ||
|
||
|
@@ -100,14 +130,7 @@ export class AgentTester { | |
const response = await this.maybeMock.request<AgentTestDetailsResponse>('GET', url); | ||
return { | ||
response, | ||
formatted: | ||
format === 'human' | ||
? await humanFormat(response) | ||
: format === 'tap' | ||
? await tapFormat(response) | ||
: format === 'junit' | ||
? await junitFormat(response) | ||
: await jsonFormat(response), | ||
formatted: format === 'human' ? await humanFormat(jobId, response) : await jsonFormat(response), | ||
}; | ||
} | ||
|
||
|
@@ -118,100 +141,30 @@ export class AgentTester { | |
} | ||
} | ||
|
||
export async function humanFormat(details: AgentTestDetailsResponse): Promise<string> { | ||
// TODO: the api response isn't finalized so this is just a POC | ||
export async function humanFormat(name: string, details: AgentTestDetailsResponse): Promise<string> { | ||
const { Ux } = await import('@salesforce/sf-plugins-core'); | ||
const ux = new Ux(); | ||
|
||
const tables: string[] = []; | ||
for (const aiEvalDef of details.tests) { | ||
for (const result of aiEvalDef.results) { | ||
const table = ux.makeTable({ | ||
title: `Test Results for ${aiEvalDef.AiEvaluationDefinition} (#${result.test_number})`, | ||
data: result.results.map((r) => ({ | ||
'TEST NAME': r.name, | ||
OUTCOME: r.is_pass ? 'Pass' : 'Fail', | ||
MESSAGE: r.error ?? '', | ||
'RUNTIME (MS)': r.execution_time_ms, | ||
})), | ||
}); | ||
tables.push(table); | ||
} | ||
for (const testCase of details.testCases) { | ||
const table = ux.makeTable({ | ||
title: `Test Case #${testCase.number}`, | ||
data: testCase.expectationResults.map((r) => ({ | ||
name: r.name, | ||
outcome: r.result === 'Passed' ? 'Pass' : 'Fail', | ||
actualValue: r.actualValue, | ||
expectedValue: r.expectedValue, | ||
score: r.score, | ||
'metric label': r.metricLabel, | ||
message: r.errorMessage ?? '', | ||
'runtime (MS)': r.endTime ? new Date(r.endTime).getTime() - new Date(r.startTime).getTime() : 0, | ||
})), | ||
}); | ||
tables.push(table); | ||
} | ||
|
||
return tables.join('\n'); | ||
} | ||
|
||
export async function junitFormat(details: AgentTestDetailsResponse): Promise<string> { | ||
// APEX EXAMPLE | ||
// <?xml version="1.0" encoding="UTF-8"?> | ||
// <testsuites> | ||
// <testsuite name="force.apex" timestamp="2024-11-13T19:19:23.000Z" hostname="https://energy-site-1368-dev-ed.scratch.my.salesforce.com" tests="11" failures="0" errors="0" time="2.57"> | ||
// <properties> | ||
// <property name="outcome" value="Successful"/> | ||
// <property name="testsRan" value="11"/> | ||
// <property name="passing" value="11"/> | ||
// <property name="failing" value="0"/> | ||
// <property name="skipped" value="0"/> | ||
// <property name="passRate" value="100%"/> | ||
// <property name="failRate" value="0%"/> | ||
// <property name="testStartTime" value="Wed Nov 13 2024 12:19:23 PM"/> | ||
// <property name="testSetupTimeInMs" value="0"/> | ||
// <property name="testExecutionTime" value="2.57 s"/> | ||
// <property name="testTotalTime" value="2.57 s"/> | ||
// <property name="commandTime" value="0.17 s"/> | ||
// <property name="hostname" value="https://energy-site-1368-dev-ed.scratch.my.salesforce.com"/> | ||
// <property name="orgId" value="00DEi000006OlrxMAC"/> | ||
// <property name="username" value="[email protected]"/> | ||
// <property name="testRunId" value="707Ei00000dTRSa"/> | ||
// <property name="userId" value="005Ei00000FkGU9IAN"/> | ||
// </properties> | ||
// <testcase name="importSampleData" classname="TestSampleDataController" time="0.27"> | ||
// </testcase> | ||
// <testcase name="blankAddress" classname="GeocodingServiceTest" time="0.01"> | ||
// </testcase> | ||
// <testcase name="errorResponse" classname="GeocodingServiceTest" time="0.01"> | ||
// </testcase> | ||
// <testcase name="successResponse" classname="GeocodingServiceTest" time="0.01"> | ||
// </testcase> | ||
// <testcase name="createFileFailsWhenIncorrectBase64Data" classname="FileUtilitiesTest" time="0.10"> | ||
// </testcase> | ||
// <testcase name="createFileFailsWhenIncorrectFilename" classname="FileUtilitiesTest" time="0.03"> | ||
// </testcase> | ||
// <testcase name="createFileFailsWhenIncorrectRecordId" classname="FileUtilitiesTest" time="0.35"> | ||
// </testcase> | ||
// <testcase name="createFileSucceedsWhenCorrectInput" classname="FileUtilitiesTest" time="0.22"> | ||
// </testcase> | ||
// <testcase name="testGetPagedPropertyList" classname="TestPropertyController" time="1.01"> | ||
// </testcase> | ||
// <testcase name="testGetPicturesNoResults" classname="TestPropertyController" time="0.06"> | ||
// </testcase> | ||
// <testcase name="testGetPicturesWithResults" classname="TestPropertyController" time="0.51"> | ||
// </testcase> | ||
// </testsuite> | ||
// </testsuites> | ||
await Promise.reject(new Error('Not implemented')); | ||
return JSON.stringify(details, null, 2); | ||
} | ||
|
||
export async function tapFormat(details: AgentTestDetailsResponse): Promise<string> { | ||
// APEX EXAMPLE (these are streamed in chunks) | ||
// 1..11 | ||
// ok 1 TestPropertyController.testGetPagedPropertyList | ||
// ok 2 TestPropertyController.testGetPicturesNoResults | ||
// ok 3 TestPropertyController.testGetPicturesWithResults | ||
// ok 4 FileUtilitiesTest.createFileFailsWhenIncorrectBase64Data | ||
// ok 5 FileUtilitiesTest.createFileFailsWhenIncorrectFilename | ||
// ok 6 FileUtilitiesTest.createFileFailsWhenIncorrectRecordId | ||
// ok 7 FileUtilitiesTest.createFileSucceedsWhenCorrectInput | ||
// ok 8 TestSampleDataController.importSampleData | ||
// ok 9 GeocodingServiceTest.blankAddress | ||
// ok 10 GeocodingServiceTest.errorResponse | ||
// ok 11 GeocodingServiceTest.successResponse | ||
// # Run "sf apex get test -i 707Ei00000dUJry -o [email protected] --result-format <format>" to retrieve test results in a different format. | ||
await Promise.reject(new Error('Not implemented')); | ||
return JSON.stringify(details, null, 2); | ||
} | ||
|
||
export async function jsonFormat(details: AgentTestDetailsResponse): Promise<string> { | ||
return Promise.resolve(JSON.stringify(details, null, 2)); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
{ | ||
"id": "4KBSM000000003F4AQ" | ||
"aiEvaluationId": "4KBSM000000003F4AQ", | ||
"status": "NEW" | ||
} |
103 changes: 72 additions & 31 deletions
103
test/mocks/einstein_ai-evaluations_runs_4KBSM000000003F4AQ_details.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters