heroku · mclenhard · May 16, 2025
@@ -259,6 +259,15 @@ Add this snippet to your Trae `mcp_settings.json`:
 - `pg_backups` - Manage database backups and schedules.
 - `pg_upgrade` - Upgrade PostgreSQL to a newer version.
 
+
+
+## Running evals
+
+The evals package loads an mcp client that then runs the index.ts file, so there is no need to rebuild between tests. You can load environment variables by prefixing the npx command. Full documentation can be found [here](https://www.mcpevals.io/docs).
+
+```bash
+OPENAI_API_KEY=your-key  npx mcp-eval src/evals/evals.ts src/tools/data.ts
+```
 ## Debugging
 
 You can use the [MCP inspector](https://modelcontextprotocol.io/docs/tools/inspector) or the

@@ -14,7 +14,8 @@
     "jsonschema": "^1.5.0",
     "tar-stream": "^3.1.7",
     "zod": "^3.24.2",
-    "zod-to-json-schema": "^3.24.5"
+    "zod-to-json-schema": "^3.24.5",
+    "mcp-evals": "^1.0.18"
   },
   "devDependencies": {
     "@eslint/eslintrc": "^3.3.1",
@@ -83,4 +84,4 @@
     "exec-perms": "shx chmod +x dist/*.js",
     "test:watch": "nyc mocha --watch --forbid-only"
   }
-}
+}
@@ -0,0 +1,59 @@
+//evals.ts
+
+import { EvalConfig } from 'mcp-evals';
+import { openai } from "@ai-sdk/openai";
+import { grade, EvalFunction } from "mcp-evals";
+
+const pg_psqlEval: EvalFunction = {
+    name: "pg_psql Tool Evaluation",
+    description: "Evaluates the functionality of the pg_psql tool",
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "How can I create a new table named 'users' with two columns id and name, then retrieve all rows using pg_psql?");
+        return JSON.parse(result);
+    }
+};
+
+const pg_infoEval: EvalFunction = {
+    name: 'PG Info Tool Evaluation',
+    description: 'Evaluates the database status retrieval from the PG Info tool',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Show me the status of the 'mydb' database in the 'myapp' app using the pg_info tool.");
+        return JSON.parse(result);
+    }
+};
+
+const pg_psEval: EvalFunction = {
+    name: 'pg_ps tool evaluation',
+    description: 'Evaluates the monitoring of active queries: progress, resources, performance',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Show me the currently running queries on the 'mydb' database with verbose output so I can monitor resource usage on Heroku");
+        return JSON.parse(result);
+    }
+};
+
+const pgLocksEval: EvalFunction = {
+    name: 'pg_locks Tool Evaluation',
+    description: 'Evaluates the functionality of the pg_locks tool',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Please analyze the locks for the database 'mydb' to check for blocked queries or deadlocks.");
+        return JSON.parse(result);
+    }
+};
+
+const pg_outliersEval: EvalFunction = {
+    name: 'pg_outliers Tool Evaluation',
+    description: 'Evaluates the tool for finding resource-heavy queries in Postgres',
+    run: async () => {
+        const result = await grade(openai("gpt-4"), "Which queries in my Postgres database are the most resource-intensive and how can I optimize them?");
+        return JSON.parse(result);
+    }
+};
+
+const config: EvalConfig = {
+    model: openai("gpt-4"),
+    evals: [pg_psqlEval, pg_infoEval, pg_psEval, pgLocksEval, pg_outliersEval]
+};
+
+export default config;
+
+export const evals = [pg_psqlEval, pg_infoEval, pg_psEval, pgLocksEval, pg_outliersEval];