From cb18f41469f134e8d4aa6ff2f8d82c11c9a554c0 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 01:49:25 -0500 Subject: [PATCH 01/11] streamline types --- evals/index.eval.ts | 3 +- evals/utils.ts | 2 +- lib/cache/ActionCache.ts | 2 +- lib/cache/BaseCache.ts | 2 +- lib/handlers/actHandler.ts | 11 ++-- lib/handlers/extractHandler.ts | 3 +- lib/handlers/observeHandler.ts | 9 ++- lib/index.ts | 109 ++++++++++++--------------------- lib/inference.ts | 31 ++-------- lib/llm/AnthropicClient.ts | 7 ++- lib/llm/LLMClient.ts | 2 +- lib/llm/LLMProvider.ts | 4 +- lib/llm/OpenAIClient.ts | 7 ++- lib/utils.ts | 2 +- lib/vision.ts | 7 ++- types/act.ts | 23 +++++++ types/browser.ts | 8 +++ types/inference.ts | 14 +++++ types/log.ts | 13 ++++ lib/types.ts => types/model.ts | 28 --------- types/playwright.ts | 13 ++++ types/stagehand.ts | 87 ++++++++++++++++++++++++++ 22 files changed, 233 insertions(+), 154 deletions(-) create mode 100644 types/act.ts create mode 100644 types/browser.ts create mode 100644 types/inference.ts create mode 100644 types/log.ts rename lib/types.ts => types/model.ts (50%) create mode 100644 types/playwright.ts create mode 100644 types/stagehand.ts diff --git a/evals/index.eval.ts b/evals/index.eval.ts index 1aa1c178..151ca7f6 100644 --- a/evals/index.eval.ts +++ b/evals/index.eval.ts @@ -3,7 +3,8 @@ import { Stagehand } from "../lib"; import { z } from "zod"; import process from "process"; import { EvalLogger } from "./utils"; -import { AvailableModel, LogLine } from "../lib/types"; +import { AvailableModel } from "../types/model"; +import { LogLine } from "../types/log"; const env: "BROWSERBASE" | "LOCAL" = process.env.EVAL_ENV?.toLowerCase() === "browserbase" diff --git a/evals/utils.ts b/evals/utils.ts index 2c3fca54..45cfb07d 100644 --- a/evals/utils.ts +++ b/evals/utils.ts @@ -1,6 +1,6 @@ -import { LogLine } from "../lib/types"; import { Stagehand } from "../lib"; import { logLineToString } from "../lib/utils"; +import { LogLine } from "../types/log"; type LogLineEval = LogLine & { parsedAuxiliary?: string | object; diff --git a/lib/cache/ActionCache.ts b/lib/cache/ActionCache.ts index 61059d5f..b54801d6 100644 --- a/lib/cache/ActionCache.ts +++ b/lib/cache/ActionCache.ts @@ -1,4 +1,4 @@ -import { LogLine } from "../../lib/types"; +import { LogLine } from "../../types/log"; import { BaseCache, CacheEntry } from "./BaseCache"; export interface PlaywrightCommand { diff --git a/lib/cache/BaseCache.ts b/lib/cache/BaseCache.ts index fa1e6d71..7ba0ace4 100644 --- a/lib/cache/BaseCache.ts +++ b/lib/cache/BaseCache.ts @@ -1,7 +1,7 @@ import * as fs from "fs"; import * as path from "path"; import * as crypto from "crypto"; -import { LogLine } from "../../lib/types"; +import { LogLine } from "../../types/log"; export interface CacheEntry { timestamp: number; diff --git a/lib/handlers/actHandler.ts b/lib/handlers/actHandler.ts index 1b366ae1..6c7d8b5f 100644 --- a/lib/handlers/actHandler.ts +++ b/lib/handlers/actHandler.ts @@ -2,15 +2,15 @@ import { Stagehand } from "../index"; import { LLMProvider } from "../llm/LLMProvider"; import { ScreenshotService } from "../vision"; import { verifyActCompletion, act, fillInVariables } from "../inference"; -import { - LogLine, - PlaywrightCommandException, - PlaywrightCommandMethodNotSupportedException, -} from "../types"; import { Locator, Page } from "@playwright/test"; import { ActionCache } from "../cache/ActionCache"; import { LLMClient, modelsWithVision } from "../llm/LLMClient"; import { generateId } from "../utils"; +import { LogLine } from "../../types/log"; +import { + PlaywrightCommandException, + PlaywrightCommandMethodNotSupportedException, +} from "../../types/playwright"; export class StagehandActHandler { private readonly stagehand: Stagehand; @@ -1096,7 +1096,6 @@ export class StagehandActHandler { action, domElements: outputString, steps, - llmProvider: this.llmProvider, llmClient, screenshot: annotatedScreenshot, logger: this.logger, diff --git a/lib/handlers/extractHandler.ts b/lib/handlers/extractHandler.ts index bc9b8d00..53d372c0 100644 --- a/lib/handlers/extractHandler.ts +++ b/lib/handlers/extractHandler.ts @@ -1,7 +1,7 @@ import { LLMProvider } from "../llm/LLMProvider"; import { Stagehand } from "../index"; import { z } from "zod"; -import { AvailableModel, LogLine } from "../types"; +import { LogLine } from "../../types/log"; import { extract } from "../inference"; import { LLMClient } from "../llm/LLMClient"; @@ -114,7 +114,6 @@ export class StagehandExtractHandler { progress, previouslyExtractedContent: content, domElements: outputString, - llmProvider: this.llmProvider, schema, llmClient, chunksSeen: chunksSeen.length, diff --git a/lib/handlers/observeHandler.ts b/lib/handlers/observeHandler.ts index f0e7ac04..24e37e8a 100644 --- a/lib/handlers/observeHandler.ts +++ b/lib/handlers/observeHandler.ts @@ -1,10 +1,10 @@ -import { LLMProvider } from "../llm/LLMProvider"; -import { LogLine, AvailableModel } from "../types"; +import { LogLine } from "../../types/log"; import { Stagehand } from "../index"; import { observe } from "../inference"; -import { LLMClient, modelsWithVision } from "../llm/LLMClient"; -import { ScreenshotService } from "../vision"; +import { LLMClient } from "../llm/LLMClient"; +import { LLMProvider } from "../llm/LLMProvider"; import { generateId } from "../utils"; +import { ScreenshotService } from "../vision"; export class StagehandObserveHandler { private readonly stagehand: Stagehand; @@ -134,7 +134,6 @@ export class StagehandObserveHandler { const observationResponse = await observe({ instruction, domElements: outputString, - llmProvider: this.llmProvider, llmClient, image: annotatedScreenshot, requestId, diff --git a/lib/index.ts b/lib/index.ts index c2e4725b..e89604ad 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -1,18 +1,31 @@ -import { type Page, type BrowserContext, chromium } from "@playwright/test"; -import { z } from "zod"; +import { Browserbase } from "@browserbasehq/sdk"; +import { type BrowserContext, chromium, type Page } from "@playwright/test"; +import { randomUUID } from "crypto"; import fs from "fs"; -import { Browserbase, ClientOptions } from "@browserbasehq/sdk"; -import { LLMProvider } from "./llm/LLMProvider"; -import { AvailableModel } from "./types"; -// @ts-ignore we're using a built js file as a string here +import os from "os"; +import { z } from "zod"; +import { BrowserResult } from "../types/browser"; +import { LogLine } from "../types/log"; +import { + ActOptions, + ActResult, + ConstructorParams, + ExtractOptions, + ExtractResult, + InitFromPageOptions, + InitFromPageResult, + InitOptions, + InitResult, + ObserveOptions, + ObserveResult, +} from "../types/stagehand"; import { scriptContent } from "./dom/build/scriptContent"; -import { LogLine } from "./types"; -import { randomUUID } from "crypto"; -import { logLineToString } from "./utils"; +import { StagehandActHandler } from "./handlers/actHandler"; import { StagehandExtractHandler } from "./handlers/extractHandler"; import { StagehandObserveHandler } from "./handlers/observeHandler"; -import { StagehandActHandler } from "./handlers/actHandler"; import { LLMClient } from "./llm/LLMClient"; +import { LLMProvider } from "./llm/LLMProvider"; +import { logLineToString } from "./utils"; require("dotenv").config({ path: ".env" }); @@ -26,7 +39,7 @@ async function getBrowser( logger: (message: LogLine) => void, browserbaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams, browserbaseResumeSessionID?: string, -) { +): Promise { if (env === "BROWSERBASE") { if (!apiKey) { logger({ @@ -184,7 +197,7 @@ async function getBrowser( }, }); - const tmpDir = fs.mkdtempSync(`/tmp/pwtest`); + const tmpDir = fs.mkdtempSync("/tmp/pwtest"); fs.mkdirSync(`${tmpDir}/userdir/Default`, { recursive: true }); const defaultPreferences = { @@ -311,22 +324,7 @@ export class Stagehand { browserbaseResumeSessionID, modelName, modelClientOptions, - }: { - env: "LOCAL" | "BROWSERBASE"; - apiKey?: string; - projectId?: string; - verbose?: 0 | 1 | 2; - debugDom?: boolean; - llmProvider?: LLMProvider; - headless?: boolean; - logger?: (message: LogLine) => void; - domSettleTimeoutMs?: number; - browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; - enableCaching?: boolean; - browserbaseResumeSessionID?: string; - modelName?: AvailableModel; - modelClientOptions?: ClientOptions; - } = { + }: ConstructorParams = { env: "BROWSERBASE", }, ) { @@ -356,14 +354,7 @@ export class Stagehand { modelName, modelClientOptions, domSettleTimeoutMs, - }: { - modelName?: AvailableModel; - modelClientOptions?: ClientOptions; - domSettleTimeoutMs?: number; - } = {}): Promise<{ - debugUrl: string; - sessionUrl: string; - }> { + }: InitOptions = {}): Promise { const llmClient = modelName ? this.llmProvider.getClient(modelName, modelClientOptions) : this.llmClient; @@ -377,7 +368,11 @@ export class Stagehand { this.browserbaseResumeSessionID, ).catch((e) => { console.error("Error in init:", e); - return { context: undefined, debugUrl: undefined, sessionUrl: undefined }; + return { + context: undefined, + debugUrl: undefined, + sessionUrl: undefined, + } as BrowserResult; }); this.context = context; this.page = context.pages()[0]; @@ -442,11 +437,11 @@ export class Stagehand { return { debugUrl, sessionUrl }; } - async initFromPage( - page: Page, - modelName?: AvailableModel, - modelClientOptions?: ClientOptions, - ): Promise<{ context: BrowserContext }> { + async initFromPage({ + page, + modelName, + modelClientOptions, + }: InitFromPageOptions): Promise { this.page = page; this.context = page.context(); this.llmClient = modelName @@ -474,7 +469,6 @@ export class Stagehand { return { context: this.context }; } - // Logging private pending_logs_to_send_to_browserbase: LogLine[] = []; private is_processing_browserbase_logs: boolean = false; @@ -653,18 +647,7 @@ export class Stagehand { useVision = "fallback", variables = {}, domSettleTimeoutMs, - }: { - action: string; - modelName?: AvailableModel; - modelClientOptions?: ClientOptions; - useVision?: "fallback" | boolean; - variables?: Record; - domSettleTimeoutMs?: number; - }): Promise<{ - success: boolean; - message: string; - action: string; - }> { + }: ActOptions): Promise { if (!this.actHandler) { throw new Error("Act handler not initialized"); } @@ -743,13 +726,7 @@ export class Stagehand { modelName, modelClientOptions, domSettleTimeoutMs, - }: { - instruction: string; - schema: T; - modelName?: AvailableModel; - modelClientOptions?: ClientOptions; - domSettleTimeoutMs?: number; - }): Promise> { + }: ExtractOptions): Promise> { if (!this.extractHandler) { throw new Error("Extract handler not initialized"); } @@ -812,13 +789,7 @@ export class Stagehand { }); } - async observe(options?: { - instruction?: string; - modelName?: AvailableModel; - modelClientOptions?: ClientOptions; - useVision?: boolean; - domSettleTimeoutMs?: number; - }): Promise<{ selector: string; description: string }[]> { + async observe(options?: ObserveOptions): Promise { if (!this.observeHandler) { throw new Error("Observe handler not initialized"); } diff --git a/lib/inference.ts b/lib/inference.ts index a9e1197d..5fc48dba 100644 --- a/lib/inference.ts +++ b/lib/inference.ts @@ -21,6 +21,8 @@ import { ChatMessage, LLMClient, } from "./llm/LLMClient"; +import { VerifyActCompletionParams } from "../types/inference"; +import { ActResult, ActParams } from "../types/act"; export async function verifyActCompletion({ goal, @@ -30,15 +32,7 @@ export async function verifyActCompletion({ domElements, logger, requestId, -}: { - goal: string; - steps: string; - llmClient: LLMClient; - screenshot?: Buffer; - domElements?: string; - logger: (message: { category?: string; message: string }) => void; - requestId: string; -}): Promise { +}: VerifyActCompletionParams): Promise { const messages: ChatMessage[] = [ buildVerifyActCompletionSystemPrompt(), buildVerifyActCompletionUserPrompt(goal, steps, domElements), @@ -106,24 +100,7 @@ export async function act({ logger, requestId, variables, -}: { - action: string; - steps?: string; - domElements: string; - llmClient: LLMClient; - screenshot?: Buffer; - retries?: number; - logger: (message: { category?: string; message: string }) => void; - requestId: string; - variables?: Record; -}): Promise<{ - method: string; - element: number; - args: any[]; - completed: boolean; - step: string; - why?: string; -} | null> { +}: ActParams): Promise { const messages: ChatMessage[] = [ buildActSystemPrompt(), buildActUserPrompt(action, steps, domElements, variables), diff --git a/lib/llm/AnthropicClient.ts b/lib/llm/AnthropicClient.ts index 9d0b7612..bb4aa99b 100644 --- a/lib/llm/AnthropicClient.ts +++ b/lib/llm/AnthropicClient.ts @@ -1,9 +1,10 @@ import Anthropic, { ClientOptions } from "@anthropic-ai/sdk"; -import { LLMClient, ChatCompletionOptions } from "./LLMClient"; +import { Message, MessageCreateParams } from "@anthropic-ai/sdk/resources"; import { zodToJsonSchema } from "zod-to-json-schema"; +import { LogLine } from "../../types/log"; +import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { AvailableModel, LogLine } from "../types"; -import { Message, MessageCreateParams } from "@anthropic-ai/sdk/resources"; +import { ChatCompletionOptions, LLMClient } from "./LLMClient"; export class AnthropicClient extends LLMClient { private client: Anthropic; diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 904396d0..9a83e92e 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -1,4 +1,4 @@ -import { AvailableModel, ToolCall } from "../types"; +import { AvailableModel, ToolCall } from "../../types/model"; export interface ChatMessage { role: "system" | "user" | "assistant"; diff --git a/lib/llm/LLMProvider.ts b/lib/llm/LLMProvider.ts index 1da0ad91..a4fb8d0d 100644 --- a/lib/llm/LLMProvider.ts +++ b/lib/llm/LLMProvider.ts @@ -2,12 +2,12 @@ import { OpenAIClient } from "./OpenAIClient"; import { AnthropicClient } from "./AnthropicClient"; import { LLMClient } from "./LLMClient"; import { LLMCache } from "../cache/LLMCache"; +import { LogLine } from "../../types/log"; import { - LogLine, AvailableModel, ModelProvider, ClientOptions, -} from "../types"; +} from "../../types/model"; export class LLMProvider { private modelToProviderMap: { [key in AvailableModel]: ModelProvider } = { diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 4432bc9c..47a08354 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -1,9 +1,10 @@ import OpenAI, { ClientOptions } from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; -import { LLMClient, ChatCompletionOptions } from "./LLMClient"; -import { LLMCache } from "../cache/LLMCache"; -import { LogLine, AvailableModel } from "../types"; import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; +import { LogLine } from "../../types/log"; +import { AvailableModel } from "../../types/model"; +import { LLMCache } from "../cache/LLMCache"; +import { ChatCompletionOptions, LLMClient } from "./LLMClient"; export class OpenAIClient extends LLMClient { private client: OpenAI; diff --git a/lib/utils.ts b/lib/utils.ts index 9a4fb800..544aec2e 100644 --- a/lib/utils.ts +++ b/lib/utils.ts @@ -1,5 +1,5 @@ import crypto from "crypto"; -import { LogLine } from "./types"; +import { LogLine } from "../types/log"; export function generateId(operation: string) { return crypto.createHash("sha256").update(operation).digest("hex"); diff --git a/lib/vision.ts b/lib/vision.ts index 5774912e..4dbb4ae0 100644 --- a/lib/vision.ts +++ b/lib/vision.ts @@ -1,10 +1,11 @@ -import { type Frame, type ElementHandle, Page } from "@playwright/test"; +import { Page } from "@playwright/test"; +import { exec } from "child_process"; import fs from "fs"; import path from "path"; import sharp from "sharp"; -import { exec } from "child_process"; -import { LogLine } from "./types"; +import { LogLine } from "../types/log"; import { logLineToString } from "./utils"; + type AnnotationBox = { x: number; y: number; diff --git a/types/act.ts b/types/act.ts new file mode 100644 index 00000000..8fb033cb --- /dev/null +++ b/types/act.ts @@ -0,0 +1,23 @@ +import { Buffer } from "buffer"; +import { LLMClient } from "../lib/llm/LLMClient"; + +export interface ActParams { + action: string; + steps?: string; + domElements: string; + llmClient: LLMClient; + screenshot?: Buffer; + retries?: number; + logger: (message: { category?: string; message: string }) => void; + requestId: string; + variables?: Record; +} + +export interface ActResult { + method: string; + element: number; + args: any[]; + completed: boolean; + step: string; + why?: string; +} diff --git a/types/browser.ts b/types/browser.ts new file mode 100644 index 00000000..a36ae56a --- /dev/null +++ b/types/browser.ts @@ -0,0 +1,8 @@ +import { Browser, BrowserContext } from "@playwright/test"; + +export interface BrowserResult { + browser?: Browser; + context: BrowserContext; + debugUrl?: string; + sessionUrl?: string; +} diff --git a/types/inference.ts b/types/inference.ts new file mode 100644 index 00000000..367b7b68 --- /dev/null +++ b/types/inference.ts @@ -0,0 +1,14 @@ +import { Buffer } from "buffer"; +import { LLMClient } from "../lib/llm/LLMClient"; +import { LLMProvider } from "../lib/llm/LLMProvider"; + +export interface VerifyActCompletionParams { + goal: string; + steps: string; + llmProvider: LLMProvider; + llmClient: LLMClient; + screenshot?: Buffer; + domElements?: string; + logger: (message: { category?: string; message: string }) => void; + requestId: string; +} diff --git a/types/log.ts b/types/log.ts new file mode 100644 index 00000000..381af6fc --- /dev/null +++ b/types/log.ts @@ -0,0 +1,13 @@ +export type LogLine = { + id?: string; + category?: string; + message: string; + level?: 0 | 1 | 2; + timestamp?: string; + auxiliary?: { + [key: string]: { + value: string; + type: "object" | "string" | "html" | "integer" | "float" | "boolean"; + }; + }; +}; diff --git a/lib/types.ts b/types/model.ts similarity index 50% rename from lib/types.ts rename to types/model.ts index 47248c71..d48dfa00 100644 --- a/lib/types.ts +++ b/types/model.ts @@ -3,34 +3,6 @@ import { Tool as AnthropicTool } from "@anthropic-ai/sdk/resources"; import type { ClientOptions as OpenAIClientOptions } from "openai"; import { ChatCompletionTool as OpenAITool } from "openai/resources"; -export class PlaywrightCommandException extends Error { - constructor(message: string) { - super(message); - this.name = "PlaywrightCommandException"; - } -} - -export class PlaywrightCommandMethodNotSupportedException extends Error { - constructor(message: string) { - super(message); - this.name = "PlaywrightCommandMethodNotSupportedException"; - } -} - -export type LogLine = { - id?: string; - category?: string; - message: string; - level?: 0 | 1 | 2; - timestamp?: string; - auxiliary?: { - [key: string]: { - value: string; - type: "object" | "string" | "html" | "integer" | "float" | "boolean"; - }; - }; -}; - export type AvailableModel = | "gpt-4o" | "gpt-4o-mini" diff --git a/types/playwright.ts b/types/playwright.ts new file mode 100644 index 00000000..31380439 --- /dev/null +++ b/types/playwright.ts @@ -0,0 +1,13 @@ +export class PlaywrightCommandException extends Error { + constructor(message: string) { + super(message); + this.name = "PlaywrightCommandException"; + } +} + +export class PlaywrightCommandMethodNotSupportedException extends Error { + constructor(message: string) { + super(message); + this.name = "PlaywrightCommandMethodNotSupportedException"; + } +} diff --git a/types/stagehand.ts b/types/stagehand.ts new file mode 100644 index 00000000..59f77928 --- /dev/null +++ b/types/stagehand.ts @@ -0,0 +1,87 @@ +import Browserbase from "@browserbasehq/sdk"; +import { BrowserContext, Page } from "@playwright/test"; +import { LLMProvider } from "../lib/llm/LLMProvider"; +import { LogLine } from "./log"; +import { AvailableModel, ClientOptions } from "./model"; +import { z } from "zod"; + +export interface ConstructorParams { + env: "LOCAL" | "BROWSERBASE"; + apiKey?: string; + projectId?: string; + verbose?: 0 | 1 | 2; + debugDom?: boolean; + llmProvider?: LLMProvider; + headless?: boolean; + logger?: (message: LogLine) => void; + domSettleTimeoutMs?: number; + browserBaseSessionCreateParams?: Browserbase.Sessions.SessionCreateParams; + enableCaching?: boolean; + browserbaseResumeSessionID?: string; + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; +} + +export interface InitResult { + debugUrl: string; + sessionUrl: string; +} + +export interface InitOptions { + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; + domSettleTimeoutMs?: number; +} + +export interface InitResult { + debugUrl: string; + sessionUrl: string; +} + +export interface InitFromPageOptions { + page: Page; + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; +} + +export interface InitFromPageResult { + context: BrowserContext; +} + +export interface ActOptions { + action: string; + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; + useVision?: "fallback" | boolean; + variables?: Record; + domSettleTimeoutMs?: number; +} + +export interface ActResult { + success: boolean; + message: string; + action: string; +} + +export interface ExtractOptions { + instruction: string; + schema: T; + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; + domSettleTimeoutMs?: number; +} + +export type ExtractResult = z.infer; + +export interface ObserveOptions { + instruction?: string; + modelName?: AvailableModel; + modelClientOptions?: ClientOptions; + useVision?: boolean; + domSettleTimeoutMs?: number; +} + +export interface ObserveResult { + selector: string; + description: string; +} From c8334416ec414d55671bec4fcf0bc55623549579 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 17:49:42 -0500 Subject: [PATCH 02/11] export utils/debug --- lib/dom/debug.ts | 2 +- lib/dom/utils.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/dom/debug.ts b/lib/dom/debug.ts index a84fad42..dddc6789 100644 --- a/lib/dom/debug.ts +++ b/lib/dom/debug.ts @@ -1,4 +1,4 @@ -async function debugDom() { +export async function debugDom() { window.chunkNumber = 0; const { selectorMap: multiSelectorMap, outputString } = diff --git a/lib/dom/utils.ts b/lib/dom/utils.ts index e2a1b29b..197fb9af 100644 --- a/lib/dom/utils.ts +++ b/lib/dom/utils.ts @@ -1,4 +1,4 @@ -async function waitForDomSettle() { +export async function waitForDomSettle() { return new Promise((resolve) => { const createTimeout = () => { return setTimeout(() => { From c69aaf82d263a4bffd034dbdc33001187d68e1f9 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 17:50:52 -0500 Subject: [PATCH 03/11] expose startDomDebug --- lib/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/index.ts b/lib/index.ts index e89604ad..3d8df99e 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -600,7 +600,7 @@ export class Stagehand { } } - private async startDomDebug() { + async startDomDebug() { try { await this.page .evaluate(() => { From cf28b648a738ebbcae1541df3ae6df285eaaf904 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 17:55:04 -0500 Subject: [PATCH 04/11] prevent node_modules checking --- tsconfig.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tsconfig.json b/tsconfig.json index 89cd66de..4aad6e00 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -11,8 +11,8 @@ "baseUrl": ".", "paths": { "*": ["node_modules/*", "lib/types/*"] - } + }, + "skipLibCheck": true }, - "exclude": ["node_modules", "dist", ".eslintrc.cjs"] } From 2a705453403c3034973610457d6f2f2070c6d60a Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 17:58:43 -0500 Subject: [PATCH 05/11] create changeset --- .changeset/swift-fishes-fail.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/swift-fishes-fail.md diff --git a/.changeset/swift-fishes-fail.md b/.changeset/swift-fishes-fail.md new file mode 100644 index 00000000..e5722057 --- /dev/null +++ b/.changeset/swift-fishes-fail.md @@ -0,0 +1,5 @@ +--- +"@browserbasehq/stagehand": patch +--- + +Streamline type definitions and fix existing typescript errors From b9e1f7974eea1d60352e42b483ae2d1ab403d669 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 23:29:01 -0500 Subject: [PATCH 06/11] add chat message type --- lib/llm/LLMClient.ts | 23 ++++++++++++++++------- lib/llm/OpenAIClient.ts | 10 ++++++++-- 2 files changed, 24 insertions(+), 9 deletions(-) diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 9a83e92e..4784d498 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -2,15 +2,24 @@ import { AvailableModel, ToolCall } from "../../types/model"; export interface ChatMessage { role: "system" | "user" | "assistant"; - content: - | string - | { - type: "image_url" | "text"; - image_url?: { url: string }; - text?: string; - }[]; + content: ChatMessageContent; } +export type ChatMessageContent = + | string + | (ChatMessageImageContent | ChatMessageTextContent)[]; + +export type ChatMessageImageContent = { + type: "image_url"; + image_url: { url: string }; + text?: string; +}; + +export type ChatMessageTextContent = { + type: string; + text: string; +}; + export const modelsWithVision: AvailableModel[] = [ "gpt-4o", "gpt-4o-mini", diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 47a08354..65db51bd 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -4,7 +4,12 @@ import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; import { LogLine } from "../../types/log"; import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { ChatCompletionOptions, LLMClient } from "./LLMClient"; +import { + ChatCompletionOptions, + ChatMessage, + ChatMessageImageContent, + LLMClient, +} from "./LLMClient"; export class OpenAIClient extends LLMClient { private client: OpenAI; @@ -61,6 +66,7 @@ export class OpenAIClient extends LLMClient { cacheOptions, options.requestId, ); + if (cachedResponse) { this.logger({ category: "llm_cache", @@ -94,7 +100,7 @@ export class OpenAIClient extends LLMClient { } if (options.image) { - const screenshotMessage: any = { + const screenshotMessage: ChatMessage = { role: "user", content: [ { From 6fc1c2b16923a8c2e474181e4c300f77a1ef68e9 Mon Sep 17 00:00:00 2001 From: Sameel Date: Mon, 25 Nov 2024 23:29:24 -0500 Subject: [PATCH 07/11] convert types to interfaces --- lib/llm/LLMClient.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 4784d498..3944146f 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -9,16 +9,16 @@ export type ChatMessageContent = | string | (ChatMessageImageContent | ChatMessageTextContent)[]; -export type ChatMessageImageContent = { +export interface ChatMessageImageContent { type: "image_url"; image_url: { url: string }; text?: string; -}; +} -export type ChatMessageTextContent = { +export interface ChatMessageTextContent { type: string; text: string; -}; +} export const modelsWithVision: AvailableModel[] = [ "gpt-4o", From 1963130fc80a49e40f427129dc6ef33130288dba Mon Sep 17 00:00:00 2001 From: Sameel Date: Tue, 26 Nov 2024 01:44:28 -0500 Subject: [PATCH 08/11] format messages to prevent casting --- lib/llm/LLMClient.ts | 3 ++- lib/llm/OpenAIClient.ts | 50 +++++++++++++++++++++++++++++++++-------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 3944146f..1d8eee5e 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -3,6 +3,7 @@ import { AvailableModel, ToolCall } from "../../types/model"; export interface ChatMessage { role: "system" | "user" | "assistant"; content: ChatMessageContent; + name?: string; } export type ChatMessageContent = @@ -16,7 +17,7 @@ export interface ChatMessageImageContent { } export interface ChatMessageTextContent { - type: string; + type: Exclude; text: string; } diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 65db51bd..e08d0f8c 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -1,15 +1,15 @@ import OpenAI, { ClientOptions } from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; -import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; +import { + ChatCompletionContentPartImage, + ChatCompletionContentPartText, + ChatCompletionCreateParamsNonStreaming, + ChatCompletionMessageParam, +} from "openai/resources/chat"; import { LogLine } from "../../types/log"; import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { - ChatCompletionOptions, - ChatMessage, - ChatMessageImageContent, - LLMClient, -} from "./LLMClient"; +import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient"; export class OpenAIClient extends LLMClient { private client: OpenAI; @@ -143,10 +143,42 @@ export class OpenAIClient extends LLMClient { }, }); - const response = await this.client.chat.completions.create({ + const formattedMessages: ChatCompletionMessageParam[] = + options.messages.map((message) => { + if (Array.isArray(message.content)) { + const contentParts = message.content.map((content) => { + if ("image_url" in content) { + return { + image_url: { + url: content.image_url.url, + }, + type: "image_url", + } as ChatCompletionContentPartImage; + } else { + return { + text: content.text, + type: "text", + } as ChatCompletionContentPartText; + } + }); + return { + ...message, + content: contentParts, + } as ChatCompletionMessageParam; + } + + return message as ChatCompletionMessageParam; + }); + + const body: ChatCompletionCreateParamsNonStreaming = { ...openAiOptions, + model: this.modelName, + messages: formattedMessages, response_format: responseFormat, - } as unknown as ChatCompletionCreateParamsNonStreaming); // TODO (kamath): remove this forced typecast + stream: false, + }; + + const response = await this.client.chat.completions.create(body); this.logger({ category: "openai", From 73bfe1f0bd07159a4b13799501578d697471c88a Mon Sep 17 00:00:00 2001 From: Sameel Date: Tue, 26 Nov 2024 13:01:13 -0500 Subject: [PATCH 09/11] make startDomDebug private --- lib/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/index.ts b/lib/index.ts index 3d8df99e..e89604ad 100644 --- a/lib/index.ts +++ b/lib/index.ts @@ -600,7 +600,7 @@ export class Stagehand { } } - async startDomDebug() { + private async startDomDebug() { try { await this.page .evaluate(() => { From 9a1be98a65f62e1af9b1b5ffe25002974b49deac Mon Sep 17 00:00:00 2001 From: Sameel Date: Tue, 26 Nov 2024 13:02:44 -0500 Subject: [PATCH 10/11] Revert "format messages to prevent casting" This reverts commit 1963130fc80a49e40f427129dc6ef33130288dba. --- lib/llm/LLMClient.ts | 3 +-- lib/llm/OpenAIClient.ts | 50 ++++++++--------------------------------- 2 files changed, 10 insertions(+), 43 deletions(-) diff --git a/lib/llm/LLMClient.ts b/lib/llm/LLMClient.ts index 1d8eee5e..3944146f 100644 --- a/lib/llm/LLMClient.ts +++ b/lib/llm/LLMClient.ts @@ -3,7 +3,6 @@ import { AvailableModel, ToolCall } from "../../types/model"; export interface ChatMessage { role: "system" | "user" | "assistant"; content: ChatMessageContent; - name?: string; } export type ChatMessageContent = @@ -17,7 +16,7 @@ export interface ChatMessageImageContent { } export interface ChatMessageTextContent { - type: Exclude; + type: string; text: string; } diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index e08d0f8c..65db51bd 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -1,15 +1,15 @@ import OpenAI, { ClientOptions } from "openai"; import { zodResponseFormat } from "openai/helpers/zod"; -import { - ChatCompletionContentPartImage, - ChatCompletionContentPartText, - ChatCompletionCreateParamsNonStreaming, - ChatCompletionMessageParam, -} from "openai/resources/chat"; +import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; import { LogLine } from "../../types/log"; import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient"; +import { + ChatCompletionOptions, + ChatMessage, + ChatMessageImageContent, + LLMClient, +} from "./LLMClient"; export class OpenAIClient extends LLMClient { private client: OpenAI; @@ -143,42 +143,10 @@ export class OpenAIClient extends LLMClient { }, }); - const formattedMessages: ChatCompletionMessageParam[] = - options.messages.map((message) => { - if (Array.isArray(message.content)) { - const contentParts = message.content.map((content) => { - if ("image_url" in content) { - return { - image_url: { - url: content.image_url.url, - }, - type: "image_url", - } as ChatCompletionContentPartImage; - } else { - return { - text: content.text, - type: "text", - } as ChatCompletionContentPartText; - } - }); - return { - ...message, - content: contentParts, - } as ChatCompletionMessageParam; - } - - return message as ChatCompletionMessageParam; - }); - - const body: ChatCompletionCreateParamsNonStreaming = { + const response = await this.client.chat.completions.create({ ...openAiOptions, - model: this.modelName, - messages: formattedMessages, response_format: responseFormat, - stream: false, - }; - - const response = await this.client.chat.completions.create(body); + } as unknown as ChatCompletionCreateParamsNonStreaming); // TODO (kamath): remove this forced typecast this.logger({ category: "openai", From be34014a086243fb3f52c366f26613efc7a8e227 Mon Sep 17 00:00:00 2001 From: Sameel Date: Tue, 26 Nov 2024 13:03:14 -0500 Subject: [PATCH 11/11] formatting --- lib/llm/OpenAIClient.ts | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/lib/llm/OpenAIClient.ts b/lib/llm/OpenAIClient.ts index 65db51bd..80e7e4ec 100644 --- a/lib/llm/OpenAIClient.ts +++ b/lib/llm/OpenAIClient.ts @@ -4,12 +4,7 @@ import { ChatCompletionCreateParamsNonStreaming } from "openai/resources/chat"; import { LogLine } from "../../types/log"; import { AvailableModel } from "../../types/model"; import { LLMCache } from "../cache/LLMCache"; -import { - ChatCompletionOptions, - ChatMessage, - ChatMessageImageContent, - LLMClient, -} from "./LLMClient"; +import { ChatCompletionOptions, ChatMessage, LLMClient } from "./LLMClient"; export class OpenAIClient extends LLMClient { private client: OpenAI;