From a4b9c70d984d5ff16831a436a560ec9a0b063e93 Mon Sep 17 00:00:00 2001 From: tomek7667 Date: Thu, 28 Mar 2024 13:10:38 +0100 Subject: [PATCH] Improve regression testing, add specify test number to the script --- sandbox/test12/abc/ghi/xyz.txt | 1 + sandbox/test12/abc/jkl/xyz.txt | 1 + sandbox/test12/def/ghi/pqr/xyz.txt | 1 + sandbox/test12/def/mno/xyz.txt | 1 + sandbox/test12/tree.txt | 13 +++ src/domain/actions/WriteTaskList.ts | 23 ++++- src/domain/contexts/WriteTaskList | 19 +++- src/main.ts | 149 +++++++++++++++------------- src/regression.ts | 88 ++++++++++++++-- 9 files changed, 213 insertions(+), 83 deletions(-) create mode 100644 sandbox/test12/abc/ghi/xyz.txt create mode 100644 sandbox/test12/abc/jkl/xyz.txt create mode 100644 sandbox/test12/def/ghi/pqr/xyz.txt create mode 100644 sandbox/test12/def/mno/xyz.txt create mode 100644 sandbox/test12/tree.txt diff --git a/sandbox/test12/abc/ghi/xyz.txt b/sandbox/test12/abc/ghi/xyz.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/sandbox/test12/abc/ghi/xyz.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/sandbox/test12/abc/jkl/xyz.txt b/sandbox/test12/abc/jkl/xyz.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/sandbox/test12/abc/jkl/xyz.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/sandbox/test12/def/ghi/pqr/xyz.txt b/sandbox/test12/def/ghi/pqr/xyz.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/sandbox/test12/def/ghi/pqr/xyz.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/sandbox/test12/def/mno/xyz.txt b/sandbox/test12/def/mno/xyz.txt new file mode 100644 index 0000000..95d09f2 --- /dev/null +++ b/sandbox/test12/def/mno/xyz.txt @@ -0,0 +1 @@ +hello world \ No newline at end of file diff --git a/sandbox/test12/tree.txt b/sandbox/test12/tree.txt new file mode 100644 index 0000000..942bcc2 --- /dev/null +++ b/sandbox/test12/tree.txt @@ -0,0 +1,13 @@ +tree: + 1: + abc: + ghi: + xyz.txt + jkl: + xyz.txt + def: + ghi: + pqr: + xyz.txt + mno: + xyz.txt \ No newline at end of file diff --git a/src/domain/actions/WriteTaskList.ts b/src/domain/actions/WriteTaskList.ts index cd0035a..3f7bfdb 100644 --- a/src/domain/actions/WriteTaskList.ts +++ b/src/domain/actions/WriteTaskList.ts @@ -64,6 +64,25 @@ const examples: Example[] = [ }, ]), }, + { + role: "user", + content: + "Create a tree structure of an existing project at './example/' directory and save it to a file", + }, + { + role: "assistant", + content: formatWrap([ + { + task: "GetTree", + description: "Current directory", + }, + { + task: "WriteFile", + description: + "Write the tree structure given in the context to 'tree_structure.yaml' file |{{{RESULT_0}}}", + }, + ]), + }, ]; export const WriteTaskList = (projectRoot: string) => { @@ -76,7 +95,7 @@ export const WriteTaskList = (projectRoot: string) => { content: z.infer ) => { try { - if (config.verbose) { + if (true || config.verbose) { console.log( "A plan to execute the tasks: ", content.map((c) => c.task).join(", ") @@ -94,6 +113,7 @@ export const WriteTaskList = (projectRoot: string) => { `YAML>>>${stringify(result)}<< { message: "SUCCESS", }; } catch (err: any) { + console.log("Error: ", err); throw new Error(err); } }, diff --git a/src/domain/contexts/WriteTaskList b/src/domain/contexts/WriteTaskList index 0564747..f435a10 100644 --- a/src/domain/contexts/WriteTaskList +++ b/src/domain/contexts/WriteTaskList @@ -1,4 +1,4 @@ -You are an AI agent that will get a task from the user. Your job is to write down a step by step plan to complete the task using available commands. +You are an AI agent that will get a task from the user. Your job is to write down a very detailed, step by step plan to complete the user's task using available commands. The available commands are: - DeleteFile - Deletes 1 file, returns `void` @@ -55,8 +55,23 @@ Example 2 - Getting cinnamon roll recipe and saving it to a file with the recipe ] {/DATA} +Note that the descriptions in the given examples are not detailed enough. You need to write more detailed descriptions for your tasks. -Remember that if you want to reference a result from a previous task, you need to start indexing from 0, e.g. {{{RESULT_0}}} for task 1 result. +Remember that if you want to reference a result from a previous task, you need to start indexing from 0, e.g. {{{RESULT_0}}} for task 1 result. If you think, that a task will require a detailed context to fulfill the user goal, you can use variable {{{RESULT_i}}} followed by a pipe '|' to store the context. For example, you can use `|{{{RESULT_0}}}` to store the context of the first task. Example: + +Example 3 - Create a tree structure of an existing project at './example/' directory and save it to a file: +{DATA} +[ + { + "task": "GetTree", + "description": "Current directory" + }, + { + "task": "WriteFile", + "description": "Write the tree structure given in the context to 'tree_structure.yaml' file |{{{RESULT_0}}}" + } +] +{/DATA} Make sure that the tasks list is as short as possible, and only to achieve the user goal. You have all the tools you need to get real data. For example 'GetLinks' command can be used to get links from the internet. diff --git a/src/main.ts b/src/main.ts index bbcd19f..2aad4b2 100644 --- a/src/main.ts +++ b/src/main.ts @@ -9,80 +9,87 @@ const app = express(); app.use(bodyParser.json()); app.post("/actions", async (req, res) => { - const parsed = Action.Create.safeParse(req.body); - if (!parsed.success) { - res.status(400).json(parsed); - return; - } - const data: z.infer = parsed.data; - const api = new Api(data); - switch (data.action) { - case Actions.DeleteFile: { - await api.DeleteFile.perform(data.message); - break; - } - case Actions.WriteFile: { - await api.WriteFile.perform(data.message); - break; - } - case Actions.ListDirs: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.ListDirs.perform(data.message), - }); - } - case Actions.ReadFiles: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.ReadFiles.perform(data.message), - }); - } - case Actions.WriteTaskList: { - await api.WriteTaskList.perform(data.message); - break; - } - case Actions.RunCommand: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.RunCommand.perform(data.message), - }); + try { + const parsed = Action.Create.safeParse(req.body); + if (!parsed.success) { + res.status(400).json(parsed); + return; } - case Actions.GetLinks: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.GetLinks.perform(data.message), - }); + const data: z.infer = parsed.data; + const api = new Api(data); + switch (data.action) { + case Actions.DeleteFile: { + await api.DeleteFile.perform(data.message); + break; + } + case Actions.WriteFile: { + await api.WriteFile.perform(data.message); + break; + } + case Actions.ListDirs: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.ListDirs.perform(data.message), + }); + } + case Actions.ReadFiles: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.ReadFiles.perform(data.message), + }); + } + case Actions.WriteTaskList: { + await api.WriteTaskList.perform(data.message); + break; + } + case Actions.RunCommand: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.RunCommand.perform(data.message), + }); + } + case Actions.GetLinks: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.GetLinks.perform(data.message), + }); + } + case Actions.VisitLink: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.VisitLink.perform(data.message), + }); + } + case Actions.GetTree: { + return res.status(200).json({ + success: true, + message: "Success", + data: await api.GetTree.perform(data.message), + }); + } + default: { + return res.status(400).json({ + success: false, + message: `Unknown action: ${data.action}. AcceptedActions: ${AcceptedActions}`, + }); + } } - case Actions.VisitLink: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.VisitLink.perform(data.message), - }); - } - case Actions.GetTree: { - return res.status(200).json({ - success: true, - message: "Success", - data: await api.GetTree.perform(data.message), - }); - } - default: { - return res.status(400).json({ - success: false, - message: `Unknown action: ${data.action}. AcceptedActions: ${AcceptedActions}`, - }); - } - } - return res.status(200).json({ - success: true, - message: "Success", - }); + return res.status(200).json({ + success: true, + message: "Success", + }); + } catch (err: any) { + return res.status(500).json({ + success: false, + message: err?.message ?? err?.toString() ?? "Unknown error", + }); + } }); app.listen(config.port, config.hostname, async () => { diff --git a/src/regression.ts b/src/regression.ts index 989e682..0ad614d 100644 --- a/src/regression.ts +++ b/src/regression.ts @@ -1,5 +1,11 @@ import colors from "colors"; -import { mkdirSync, readdirSync, readFileSync, rmSync } from "fs"; +import { + mkdirSync, + readdirSync, + readFileSync, + rmSync, + writeFileSync, +} from "fs"; import { Api } from "./domain"; import { config } from "./config"; @@ -311,23 +317,86 @@ const tests = [ } return true; }, + + // test12 + async () => { + mkdirSync("sandbox/test12"); + mkdirSync("sandbox/test12/abc"); + mkdirSync("sandbox/test12/def"); + mkdirSync("sandbox/test12/def/ghi"); + mkdirSync("sandbox/test12/abc/ghi"); + mkdirSync("sandbox/test12/abc/jkl"); + mkdirSync("sandbox/test12/def/mno"); + mkdirSync("sandbox/test12/def/ghi/pqr"); + writeFileSync("sandbox/test12/abc/ghi/xyz.txt", "hello world"); + writeFileSync("sandbox/test12/abc/jkl/xyz.txt", "hello world"); + writeFileSync("sandbox/test12/def/mno/xyz.txt", "hello world"); + writeFileSync("sandbox/test12/def/ghi/pqr/xyz.txt", "hello world"); + await makeApi("test12").WriteTaskList.perform( + "Save tree structure of current dir to a file called 'tree.txt'" + ); + + // Assert + const test12Files = readdirSync("sandbox/test12"); + if (!test12Files.includes("tree.txt")) { + console.log("test12: 'tree.txt' not found".red); + return false; + } + const treeTxtContent = readFileSync("sandbox/test12/tree.txt", "utf-8"); + if ( + !treeTxtContent.includes("abc") || + !treeTxtContent.includes("def") + ) { + console.log( + "test12: treeTxtContent doesn't include 'abc' or 'def'".red + ); + return false; + } + return true; + }, ]; -export const performRegression = async () => { +const printSpacer = () => { + console.log( + `==============================================================`.cyan + .bold + ); +}; + +export const performRegression = async (testNumber?: number) => { colors.enable(); rmSync("sandbox", { recursive: true }); mkdirSync("sandbox", { recursive: true }); const testSummary: { [key: string]: boolean } = {}; + printSpacer; + if (testNumber !== undefined) { + try { + const test = tests[testNumber - 1]; + const isSuccess = await test(); + if (isSuccess) { + printSpacer(); + console.log(`\t\t\tTest ${testNumber} passed!`.green.bold); + printSpacer(); + } else { + printSpacer(); + console.log(`\t\t\tTest ${testNumber} failed!`.red.bold); + printSpacer(); + } + } catch (e) { + console.log(e); + printSpacer(); + console.log(`\t\t\tTest ${testNumber} failed!`.red.bold); + printSpacer(); + } + return; + } + try { for (let i = 0; i < tests.length; i++) { try { const test = tests[i]; - console.log( - `==============================================================\n\t\t\tRunning test ${ - i + 1 - }\n==============================================================` - .cyan.bold - ); + printSpacer(); + console.log(`\t\t\tRunning test ${i + 1}`); for ( let trialNumber = 0; trialNumber < config.retryRegressionNumber - 1; @@ -356,10 +425,11 @@ export const performRegression = async () => { testSummary[`Test ${i + 1}`] = false; } } + printSpacer(); } finally { console.log("\n\n\t\t\tRegression summary:\t\t\t\n".bgBlack.white); console.log(testSummary); } }; -performRegression(); +performRegression(process.argv[2] ? Number(process.argv[2]) : undefined);