Skip to content

Commit

Permalink
Improve regression testing, add specify test number to the script
Browse files Browse the repository at this point in the history
  • Loading branch information
tomek7667 committed Mar 28, 2024
1 parent fdb1034 commit a4b9c70
Show file tree
Hide file tree
Showing 9 changed files with 213 additions and 83 deletions.
1 change: 1 addition & 0 deletions sandbox/test12/abc/ghi/xyz.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
1 change: 1 addition & 0 deletions sandbox/test12/abc/jkl/xyz.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
1 change: 1 addition & 0 deletions sandbox/test12/def/ghi/pqr/xyz.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
1 change: 1 addition & 0 deletions sandbox/test12/def/mno/xyz.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world
13 changes: 13 additions & 0 deletions sandbox/test12/tree.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
tree:
1:
abc:
ghi:
xyz.txt
jkl:
xyz.txt
def:
ghi:
pqr:
xyz.txt
mno:
xyz.txt
23 changes: 22 additions & 1 deletion src/domain/actions/WriteTaskList.ts
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,25 @@ const examples: Example[] = [
},
]),
},
{
role: "user",
content:
"Create a tree structure of an existing project at './example/' directory and save it to a file",
},
{
role: "assistant",
content: formatWrap([
{
task: "GetTree",
description: "Current directory",
},
{
task: "WriteFile",
description:
"Write the tree structure given in the context to 'tree_structure.yaml' file |{{{RESULT_0}}}",
},
]),
},
];

export const WriteTaskList = (projectRoot: string) => {
Expand All @@ -76,7 +95,7 @@ export const WriteTaskList = (projectRoot: string) => {
content: z.infer<typeof Action.Schemas.WriteTaskList>
) => {
try {
if (config.verbose) {
if (true || config.verbose) {
console.log(
"A plan to execute the tasks: ",
content.map((c) => c.task).join(", ")
Expand All @@ -94,6 +113,7 @@ export const WriteTaskList = (projectRoot: string) => {
`YAML>>>${stringify(result)}<<<YAML`
);
});
console.log({ message });
const body = JSON.stringify({
action: task,
workDir: projectRoot,
Expand All @@ -114,6 +134,7 @@ export const WriteTaskList = (projectRoot: string) => {
message: "SUCCESS",
};
} catch (err: any) {
console.log("Error: ", err);
throw new Error(err);
}
},
Expand Down
19 changes: 17 additions & 2 deletions src/domain/contexts/WriteTaskList
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
You are an AI agent that will get a task from the user. Your job is to write down a step by step plan to complete the task using available commands.
You are an AI agent that will get a task from the user. Your job is to write down a very detailed, step by step plan to complete the user's task using available commands.

The available commands are:
- DeleteFile - Deletes 1 file, returns `void`
Expand Down Expand Up @@ -55,8 +55,23 @@ Example 2 - Getting cinnamon roll recipe and saving it to a file with the recipe
]
{/DATA}

Note that the descriptions in the given examples are not detailed enough. You need to write more detailed descriptions for your tasks.

Remember that if you want to reference a result from a previous task, you need to start indexing from 0, e.g. {{{RESULT_0}}} for task 1 result.
Remember that if you want to reference a result from a previous task, you need to start indexing from 0, e.g. {{{RESULT_0}}} for task 1 result. If you think, that a task will require a detailed context to fulfill the user goal, you can use variable {{{RESULT_i}}} followed by a pipe '|' to store the context. For example, you can use `|{{{RESULT_0}}}` to store the context of the first task. Example:

Example 3 - Create a tree structure of an existing project at './example/' directory and save it to a file:
{DATA}
[
{
"task": "GetTree",
"description": "Current directory"
},
{
"task": "WriteFile",
"description": "Write the tree structure given in the context to 'tree_structure.yaml' file |{{{RESULT_0}}}"
}
]
{/DATA}

Make sure that the tasks list is as short as possible, and only to achieve the user goal. You have all the tools you need to get real data. For example 'GetLinks' command can be used to get links from the internet.

Expand Down
149 changes: 78 additions & 71 deletions src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,80 +9,87 @@ const app = express();
app.use(bodyParser.json());

app.post("/actions", async (req, res) => {
const parsed = Action.Create.safeParse(req.body);
if (!parsed.success) {
res.status(400).json(parsed);
return;
}
const data: z.infer<typeof Action.Create> = parsed.data;
const api = new Api(data);
switch (data.action) {
case Actions.DeleteFile: {
await api.DeleteFile.perform(data.message);
break;
}
case Actions.WriteFile: {
await api.WriteFile.perform(data.message);
break;
}
case Actions.ListDirs: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.ListDirs.perform(data.message),
});
}
case Actions.ReadFiles: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.ReadFiles.perform(data.message),
});
}
case Actions.WriteTaskList: {
await api.WriteTaskList.perform(data.message);
break;
}
case Actions.RunCommand: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.RunCommand.perform(data.message),
});
try {
const parsed = Action.Create.safeParse(req.body);
if (!parsed.success) {
res.status(400).json(parsed);
return;
}
case Actions.GetLinks: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.GetLinks.perform(data.message),
});
const data: z.infer<typeof Action.Create> = parsed.data;
const api = new Api(data);
switch (data.action) {
case Actions.DeleteFile: {
await api.DeleteFile.perform(data.message);
break;
}
case Actions.WriteFile: {
await api.WriteFile.perform(data.message);
break;
}
case Actions.ListDirs: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.ListDirs.perform(data.message),
});
}
case Actions.ReadFiles: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.ReadFiles.perform(data.message),
});
}
case Actions.WriteTaskList: {
await api.WriteTaskList.perform(data.message);
break;
}
case Actions.RunCommand: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.RunCommand.perform(data.message),
});
}
case Actions.GetLinks: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.GetLinks.perform(data.message),
});
}
case Actions.VisitLink: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.VisitLink.perform(data.message),
});
}
case Actions.GetTree: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.GetTree.perform(data.message),
});
}
default: {
return res.status(400).json({
success: false,
message: `Unknown action: ${data.action}. AcceptedActions: ${AcceptedActions}`,
});
}
}
case Actions.VisitLink: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.VisitLink.perform(data.message),
});
}
case Actions.GetTree: {
return res.status(200).json({
success: true,
message: "Success",
data: await api.GetTree.perform(data.message),
});
}
default: {
return res.status(400).json({
success: false,
message: `Unknown action: ${data.action}. AcceptedActions: ${AcceptedActions}`,
});
}
}

return res.status(200).json({
success: true,
message: "Success",
});
return res.status(200).json({
success: true,
message: "Success",
});
} catch (err: any) {
return res.status(500).json({
success: false,
message: err?.message ?? err?.toString() ?? "Unknown error",
});
}
});

app.listen(config.port, config.hostname, async () => {
Expand Down
88 changes: 79 additions & 9 deletions src/regression.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import colors from "colors";
import { mkdirSync, readdirSync, readFileSync, rmSync } from "fs";
import {
mkdirSync,
readdirSync,
readFileSync,
rmSync,
writeFileSync,
} from "fs";
import { Api } from "./domain";
import { config } from "./config";

Expand Down Expand Up @@ -311,23 +317,86 @@ const tests = [
}
return true;
},

// test12
async () => {
mkdirSync("sandbox/test12");
mkdirSync("sandbox/test12/abc");
mkdirSync("sandbox/test12/def");
mkdirSync("sandbox/test12/def/ghi");
mkdirSync("sandbox/test12/abc/ghi");
mkdirSync("sandbox/test12/abc/jkl");
mkdirSync("sandbox/test12/def/mno");
mkdirSync("sandbox/test12/def/ghi/pqr");
writeFileSync("sandbox/test12/abc/ghi/xyz.txt", "hello world");
writeFileSync("sandbox/test12/abc/jkl/xyz.txt", "hello world");
writeFileSync("sandbox/test12/def/mno/xyz.txt", "hello world");
writeFileSync("sandbox/test12/def/ghi/pqr/xyz.txt", "hello world");
await makeApi("test12").WriteTaskList.perform(
"Save tree structure of current dir to a file called 'tree.txt'"
);

// Assert
const test12Files = readdirSync("sandbox/test12");
if (!test12Files.includes("tree.txt")) {
console.log("test12: 'tree.txt' not found".red);
return false;
}
const treeTxtContent = readFileSync("sandbox/test12/tree.txt", "utf-8");
if (
!treeTxtContent.includes("abc") ||
!treeTxtContent.includes("def")
) {
console.log(
"test12: treeTxtContent doesn't include 'abc' or 'def'".red
);
return false;
}
return true;
},
];

export const performRegression = async () => {
const printSpacer = () => {
console.log(
`==============================================================`.cyan
.bold
);
};

export const performRegression = async (testNumber?: number) => {
colors.enable();
rmSync("sandbox", { recursive: true });
mkdirSync("sandbox", { recursive: true });
const testSummary: { [key: string]: boolean } = {};
printSpacer;
if (testNumber !== undefined) {
try {
const test = tests[testNumber - 1];
const isSuccess = await test();
if (isSuccess) {
printSpacer();
console.log(`\t\t\tTest ${testNumber} passed!`.green.bold);
printSpacer();
} else {
printSpacer();
console.log(`\t\t\tTest ${testNumber} failed!`.red.bold);
printSpacer();
}
} catch (e) {
console.log(e);
printSpacer();
console.log(`\t\t\tTest ${testNumber} failed!`.red.bold);
printSpacer();
}
return;
}

try {
for (let i = 0; i < tests.length; i++) {
try {
const test = tests[i];
console.log(
`==============================================================\n\t\t\tRunning test ${
i + 1
}\n==============================================================`
.cyan.bold
);
printSpacer();
console.log(`\t\t\tRunning test ${i + 1}`);
for (
let trialNumber = 0;
trialNumber < config.retryRegressionNumber - 1;
Expand Down Expand Up @@ -356,10 +425,11 @@ export const performRegression = async () => {
testSummary[`Test ${i + 1}`] = false;
}
}
printSpacer();
} finally {
console.log("\n\n\t\t\tRegression summary:\t\t\t\n".bgBlack.white);
console.log(testSummary);
}
};

performRegression();
performRegression(process.argv[2] ? Number(process.argv[2]) : undefined);

0 comments on commit a4b9c70

Please sign in to comment.