From 092be444a1fb0764f59dc734e80a22e409a05cbc Mon Sep 17 00:00:00 2001 From: Shyam Raghuwanshi Date: Fri, 26 Jul 2024 23:27:39 +0530 Subject: [PATCH] updating-playwright-lib (#401) --- JS/edgechains/arakoodev/package.json | 5 +- .../src/scraper/src/lib/playwright.ts | 334 ++++++++++-------- .../arakoodev/src/scraper/src/utils/index.ts | 44 ++- .../src/scraper/src/utils/page-parser.ts | 96 ++--- 4 files changed, 260 insertions(+), 219 deletions(-) diff --git a/JS/edgechains/arakoodev/package.json b/JS/edgechains/arakoodev/package.json index 166c55a8..5eafd3fd 100644 --- a/JS/edgechains/arakoodev/package.json +++ b/JS/edgechains/arakoodev/package.json @@ -13,8 +13,7 @@ "./arakooserver": "./dist/arakooserver/src/index.js", "./db": "./dist/db/src/index.js", "./scraper": "./dist/scraper/src/index.js", - "./sync-rpc": "./dist/sync-rpc/export.js", - "./playwright": "./dist/playwright/src/index.js" + "./sync-rpc": "./dist/sync-rpc/export.js" }, "scripts": { "build": "rm -rf dist && tsc -b && cp -r src/sync-rpc dist/sync-rpc", @@ -26,6 +25,8 @@ "@babel/core": "^7.24.4", "@babel/preset-env": "^7.24.4", "@hono/node-server": "^1.11.0", + "@lifeomic/attempt": "^3.1.0", + "@playwright/test": "^1.45.3", "@supabase/supabase-js": "^2.42.3", "axios": "^1.7.2", "axios-retry": "^4.1.0", diff --git a/JS/edgechains/arakoodev/src/scraper/src/lib/playwright.ts b/JS/edgechains/arakoodev/src/scraper/src/lib/playwright.ts index efe3abbd..1f56e3fe 100644 --- a/JS/edgechains/arakoodev/src/scraper/src/lib/playwright.ts +++ b/JS/edgechains/arakoodev/src/scraper/src/lib/playwright.ts @@ -1,75 +1,102 @@ -import { chromium } from "playwright"; +import { chromium, Page } from "playwright"; +import { expect } from "@playwright/test"; import axios from "axios"; -import { parseArr, parseSite, preprocessJsonInput } from "../utils/index"; -import retry from "retry"; +import { parseArr, parseSite, preprocessJsonInput } from '../utils/index'; +import { retry } from "@lifeomic/attempt"; import { removeBlankTags } from "../utils/page-parser"; export class Playwright { - constructor() {} - async #createPrompt({ task, page }: { task: string; page: any }) { + apiKey: string; + + constructor({ apiKey }: { apiKey: string }) { + this.apiKey = apiKey; + } + + async #createPrompt({ task, page, completedTaskArr }) { + const [currentPageUrl, currentPageTitle, siteOverview] = await Promise.all([ + page.evaluate('location.href'), + page.evaluate('document.title'), + parseSite(page).then(html => removeBlankTags(html).slice(0, 20000)) + ]); + + const completedActions = completedTaskArr || []; + return ` You are a Senior SDET tasked with writing Playwright code for testing purposes. Your role involves implementing specific task-based code segments within a larger test file, following the instructions provided closely. Assume that common imports like 'test' and 'expect' from '@playwright/test' are already at the top of the file. - + Context: - Your computer is a Mac. Cmd is the meta key, META. - The browser is already open. - - Current page URL: ${await page.evaluate("location.href")}. - - Current page title: ${await page.evaluate("document.title")}. + - Current page URL: ${currentPageUrl}. + - Current page title: ${currentPageTitle}. + - [NO NEED TO WRITE CODE TO OPEN THIS URL AGAIN ${currentPageUrl}, THE BROWSER IS ALREADY OPEN] + - HumanMessage Write Playwright code for this: ${task} - Overview of the site in HTML format: \\\ - ${removeBlankTags(await parseSite(page)).slice(0, 25000)} + ${siteOverview} \\\ - + + Completed Actions: ${completedActions.join(", ")} + Key Points: - - Start directly with Playwright actions as described in the user task, without adding extraneous steps or assertions. + - Don't navigate to a new page unless explicitly instructed. + - Don't give every links like this await page.click('a[href="https:^]'), only click on the specific link mentioned in the task. + - Please note that if there is a timeout error, you may need to increase the timeout value or use a different method to locate the input fields. + - Follow the following Playwright actions for the task: + // for navigating to the page + - await page.goto('https://github.com/login'); + // for clicking on the button + - await page.getByRole('button', { name: 'Submit' }).click(); + // for filling the input fields + - await page.getByLabel('Username or email address').fill('username'); + - await page.getByLabel('Password').fill('password'); + // Text input + - await page.getByRole('textbox').fill('Peter'); + // Date input + - await page.getByLabel('Birth date').fill('2020-02-02'); + // Time input + - await page.getByLabel('Appointment time').fill('13:15'); + // Local datetime input + - await page.getByLabel('Local time').fill('2020-03-02T05:15'); + - await page.locator('[data-test="password"]').fill('secret_sauce'); + - await page.getByRole('button', { name: 'Sign in' }).click(); + - await page.innerText('html') + - page.getByRole('listitem').filter({ hasText: 'Product 2' }); + - await page.getByRole('listitem').filter({ hasText: 'Product 2' }).getByRole('button', { name: 'Add to cart' }).click(); + - page.locator('button.buttonIcon.episode-actions-later'); + - await expect(page.getByText('welcome')).toBeVisible(); + - await expect(page.getByText('welcome')).toBeVisible(); + - await page.innerText(selector); + - await page.innerText(selector, options); + - const page = await browser.newPage(); + - await page.goto('https://keycode.info'); + - await page.press('body', 'A'); + - await page.screenshot({ path: 'A.png' }); + - await page.press('body', 'ArrowLeft'); + - await page.screenshot({ path: 'ArrowLeft.png' }); + - await page.press('body', 'Shift+O'); + - await page.screenshot({ path: 'O.png' }); + - await browser.close(); + // click on any links + - await page.click('a[href="https://blog.sbensu.com/posts/demand-for-visual-programming/"]'); + + - Start directly with Playwright actions without adding extraneous steps or assertions. - Include assertions like 'expect' statements or wait functions such as 'waitForLoadState' only when they are specifically requested in the user task. - - Minimal, relevant comments should be used to clarify complex actions or essential aspects of the test's purpose. - Apply 'frameLocator' for content in nested iframes, as needed based on the task requirements. - - Store the output in a variable and Return the output not log that - - User Task: [Insert the specific user task here, including any detailed instructions related to the execution, waiting for specific conditions, or explicit requests for assertions and waits.] - + Expected Code Format: \\\ - // [Insert Playwright code based on the task description. Begin with necessary actions directly, and include 'waitForLoadState', assertions, or 'expect' statements only if explicitly requested in the task. Comments should be concise and pertinent, especially for complex actions or decisions.] + // Insert Playwright code based on the task description. Begin with necessary actions directly, and include 'waitForLoadState', assertions, or 'expect' statements only if explicitly requested in the task. Comments should be concise and pertinent, especially for complex actions or decisions.] \\\ - - The objective is to create Playwright code that is efficient, precise, and perfectly aligned with the task's requirements, integrating seamlessly into the larger test file. All actions and comments should be relevant and necessary, catering to a senior-level professional's understanding of the testing scenario. - - HumanMessage Write Playwright code for this: ${task} - - Examples: + + The objective is to create Playwright code that is efficient, precise, and perfectly aligned with the task's requirements, integrating seamlessly into the larger test file. All actions should be relevant and necessary, catering to a senior-level professional's understanding of the testing scenario. + + + Examples: go to hacker news - await page.goto('https://news.ycombinator.com/') click on the first link - page.click('a[href="https://blog.sbensu.com/posts/demand-for-visual-programming/"]') - give me all the text of this page - await page.waitForLoadState('networkidle') - - - Some Playwright Actions that should use for you reference: - - await page.goto('https://github.com/login'); - - await page.getByLabel('Username or email address').fill('username'); - - await page.getByLabel('Password').fill('password'); - - await page.getByRole('button', { name: 'Sign in' }).click(); - - await page.innerText('html') - - page.getByRole('button', { name: 'submit' }); - - page.getByRole('listitem').filter({ hasText: 'Product 2' }); - - await page.getByRole('listitem').filter({ hasText: 'Product 2' }).getByRole('button', { name: 'Add to cart' }).click(); - - page.locator('button.buttonIcon.episode-actions-later'); - - await expect(page.getByText('welcome')).toBeVisible(); - - await expect(page.getByText('welcome')).toBeVisible(); - - await page.innerText(selector); - - await page.innerText(selector, options); - - const page = await browser.newPage(); - - await page.goto('https://keycode.info'); - - await page.press('body', 'A'); - - await page.screenshot({ path: 'A.png' }); - - await page.press('body', 'ArrowLeft'); - - await page.screenshot({ path: 'ArrowLeft.png' }); - - await page.press('body', 'Shift+O'); - - await page.screenshot({ path: 'O.png' }); - - await browser.close(); - // click on the links, example - - await page.click('a[href="https://blog.sbensu.com/posts/demand-for-visual-programming/"]'); + give me all the text of this page - await page.innerText('html'); `; } @@ -91,118 +118,135 @@ export class Playwright { "Identify and click on the first link displayed on the Hacker News homepage", "Extract and return all the text content from the page" ] - Ensure that each action is specific, clear, and comprehensive to facilitate precise implementation. \`\`\` + + Input: + "Go to random website. we have fields in this pattern First Name Last Name Company Name Role in Company Address Email Phone Number and then fill this fields John Smith IT Solutions Analyst 98 North Road " + + Output: + \`\`\` + [ + "Navigate to the random website by entering the URL 'https://www.randomwebsite.com/' in the browser", + "Fill the form fields with the following data: First Name: John, Last Name: Smith, Company Name: IT Solutions, Role in Company: Analyst, Address: 98 North Road, Email:", + ] + \`\`\` + + Input: + "Go to google and search for the term 'automation'. Click on the first link and extract the text from the page." + + Output: + \`\`\` + [ + "Navigate to the random website by entering the URL 'https://google.com' in the browser", + "Search for the term 'automation' in the search bar and hit Enter key", + "Click on the first link displayed in the search results", + "Extract and return all the text content from the page" + ] + \`\`\` + + + Ensure that each action is specific, clear, and comprehensive to facilitate precise implementation. `; } - async #openAIRequest({ chatApi, prompt }: { chatApi: string; prompt: string }) { - return new Promise((resolve, reject) => { - const operation = retry.operation({ - retries: 5, - factor: 3, - minTimeout: 1 * 1000, - maxTimeout: 60 * 1000, - randomize: true, - }); - - operation.attempt(async function (currentAttempt) { - await axios - .post( - "https://api.openai.com/v1/chat/completions", - { - model: "gpt-3.5-turbo-16k", - messages: [{ role: "user", content: prompt }], - max_tokens: 1000, - temperature: 0.7, - }, - { - headers: { - Authorization: "Bearer " + chatApi, - "content-type": "application/json", - }, - } - ) - .then((response) => { - resolve(response.data.choices[0].message.content); - }) - .catch((error) => { - if (error.response) { - console.log( - "Server responded with status code:", - error.response.status - ); - console.log("Response data:", error.response.data); - } else if (error.request) { - console.log("No response received:", error); - } else { - console.log( - "Error creating request:", - error.message, - "\n", - "Retrying ", - currentAttempt - ); - } - if (operation.retry(error)) { - return; - } - reject(error); - }); - }); + async #openAIRequest({ prompt }: { prompt: string }) { + return await retry(async () => { + const response = await axios.post( + "https://api.openai.com/v1/chat/completions", + { + model: "gpt-3.5-turbo-16k", + messages: [ + { role: "user", content: prompt } + ], + max_tokens: 1000, + temperature: 0.7, + }, + { + headers: { + Authorization: `Bearer ${this.apiKey}`, + "content-type": "application/json", + }, + } + ); + return response.data.choices[0].message.content; }); } - /** - * Get Playwright code for a specific task - * @param chatApi - OpenAI API key - * @param task - Task description - * @param url - URL to navigate to default is https://www.google.com - * @param headless - Run in headless mode default is false - * @returns Playwright code example - page.goto('https://www.google.com') - **/ - async call({ - chatApi, - task, - url, - headless = true, - }: { - chatApi: string; - task: string; - url?: string; - headless?: boolean; - }) { - const AsyncFunction = async function () {}.constructor; - - const browser = await chromium.launch({ - headless: headless, - }); + async #findInPage({ page, task }): Promise { + const prompt = ` + You are a programmer and your job is to pick out information in code to a pm. You are working on an html file. You will extract the necessary content asked from the information provided. + + Context: + Your computer is a mac. Cmd is the meta key, META. + The browser is already open. + Current page url is ${await page.evaluate('location.href')}. + Current page title is ${await page.evaluate('document.title')}. + Humman message: ${task} + + Here is the overview of the site. Format is in html: + \`\`\` + ${removeBlankTags(await parseSite(page)).slice(0, 25000)} + \`\`\` + `; + + return await this.#openAIRequest({ prompt }); + } + async #execPlayWrightCode({ page, code }: { page: Page, code: string }) { + const AsyncFunction = async function () { }.constructor; + const dependencies = [ + { param: 'page', value: page }, + ]; + const func = AsyncFunction(...dependencies.map((d) => d.param), code); + const args = dependencies.map((d) => d.value); + return await func(...args); + } + + /** + * Get Playwright code for a specific task + * @param task - Task description + * @param url - URL to navigate to default is https://www.google.com + * @param headless - Run in headless mode default is false + **/ + async call({ task, url, headless = true }: { task: string, url?: string, headless?: boolean }) { + const browser = await chromium.launch({ headless }); const page = await browser.newPage(); await page.goto(url || "https://www.google.com"); const taskPrompt = this.#createPromptForTaskArr(task); - const taskArr: any = parseArr(await this.#openAIRequest({ chatApi, prompt: taskPrompt })); + const taskArr: any = parseArr(await this.#openAIRequest({ prompt: taskPrompt })); + const completedTaskArr: string[] = []; let response: string = ""; + let err; + + for (const task of taskArr) { + if (response) break; - for (let i = 0; i < taskArr.length; i++) { - if (!response) { - const element = taskArr[i]; - const prompt = await this.#createPrompt({ task: element, page }); - let res: any = preprocessJsonInput(await this.#openAIRequest({ chatApi, prompt })); - const dependencies = [{ param: "page", value: page }]; + console.log(task); + let success = false; + let action = ""; + const prompt = await this.#createPrompt({ task, page, completedTaskArr }); + let errExecIndex = 0; - const func = AsyncFunction(...dependencies.map((d) => d.param), res); - const args = dependencies.map((d) => d.value); + while (!success) { + let res: any = preprocessJsonInput(await this.#openAIRequest({ prompt: prompt + err })); + + if (errExecIndex > 3) { + const findInResponse = await this.#findInPage({ page, task }); + const retryPrompt = await this.#createPrompt({ task: `We are getting this error three times: ${err}. Try writing Playwright code using this: ${findInResponse}`, page, completedTaskArr }); + res = preprocessJsonInput(await this.#openAIRequest({ prompt: retryPrompt })); + } try { - const res = await func(...args); - if (res) { - response = res; - } + const finalResponse = await this.#execPlayWrightCode({ page, code: res }); + if (finalResponse) response = finalResponse; + completedTaskArr.push(action); + success = true; } catch (error: any) { - console.log(error); + err = `\n\nError in this command ${action} Error: ${error.message}\n${error.stack} Try another way to do this action`; + console.log("Error: ", error.message); + errExecIndex++; } } } @@ -210,4 +254,4 @@ export class Playwright { await browser.close(); return response; } -} +} \ No newline at end of file diff --git a/JS/edgechains/arakoodev/src/scraper/src/utils/index.ts b/JS/edgechains/arakoodev/src/scraper/src/utils/index.ts index 2776161d..50e0d8f8 100644 --- a/JS/edgechains/arakoodev/src/scraper/src/utils/index.ts +++ b/JS/edgechains/arakoodev/src/scraper/src/utils/index.ts @@ -1,33 +1,29 @@ + const codeRegex = /```(.*)(\r\n|\r|\n)(?[\w\W\n]+)(\r\n|\r|\n)```/; +const codeRegex2 = /```javascript(.*)(\r\n|\r|\n)(?[\w\W\n]+)(\r\n|\r|\n)```/i; export function preprocessJsonInput(text) { - try { - return text.match(codeRegex).groups.code.trim(); - } catch (e) { - return text.trim(); - } + try { + return text.match(codeRegex).groups.code.trim(); + } catch (e) { + return text.trim() + } } export function parseArr(text) { - try { - if (text.startsWith("[") && text.endsWith("]")) { - return JSON.parse(text); - } - return text.match(codeRegex).groups.code.trim(); - } catch (e) { - throw new Error("No code found"); - // try { - // const regexPattern = /\[(.*?)\]/g; - // const matches = text.match(regexPattern)[1]; - // console.log({ matches }) - // if (!matches) { - // throw new Error("No code found") - // } - // return matches; - // } catch (error) { - // throw new Error("No code found") - // } + try { + if (text.startsWith("[") && text.endsWith("]")) { + return JSON.parse(text); } + if(text.startsWith("```Javascript") || text.startsWith("```javascript")) { + return text.match(codeRegex2).groups.code.trim(); + } + return text.match(codeRegex).groups.code.trim(); + } catch (e) { + console.log({ text }) + throw new Error("No code found"); + } } -export { parseSite } from "./page-parser.js"; + +export { parseSite } from './page-parser.js'; \ No newline at end of file diff --git a/JS/edgechains/arakoodev/src/scraper/src/utils/page-parser.ts b/JS/edgechains/arakoodev/src/scraper/src/utils/page-parser.ts index fc0918f5..456cfb7b 100644 --- a/JS/edgechains/arakoodev/src/scraper/src/utils/page-parser.ts +++ b/JS/edgechains/arakoodev/src/scraper/src/utils/page-parser.ts @@ -3,30 +3,30 @@ import { JSDOM } from "jsdom"; const { document } = new JSDOM(`...`).window; const tagsToLog = [ - "a", - "p", - "span", - "div", - "button", - "label", - "input", - "textarea", - "section", - "select", - "option", - "table", - "td", - "th", - "ul", - "ol", - "li", - "h1", - "h2", - "h3", - "h4", - "h5", - "h6", - "iframe", + 'a', + 'p', + 'span', + 'div', + 'button', + 'label', + 'input', + 'textarea', + 'section', + 'select', + 'option', + 'table', + 'td', + 'th', + 'ul', + 'ol', + 'li', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'iframe', ]; function createElement(node) { @@ -35,17 +35,18 @@ function createElement(node) { const dataAttributes = Object.entries(node.attributes).filter( (a) => (tagsToLog.includes(node.tagName) && - (a[0].startsWith("name") || - a[0].startsWith("value") || - a[0].startsWith("data-component") || - a[0].startsWith("data-name") || - a[0].startsWith("aria-") || - a[0] === "class" || - a[0] === "type" || - a[0] === "role")) || - // always log these - a[0] === "href" || - a[0] === "id" + (a[0].startsWith('name') || + a[0].startsWith('value') || + a[0].startsWith('data-component') || + a[0].startsWith('data-name') || + a[0].startsWith('aria-') || + a[0] === 'class' || + a[0] === 'type' || + a[0] === 'role')) || + a[0] === 'href' || + a[0] === 'id' || + a[0] === 'type' + ); dataAttributes.forEach(([attr, value]) => { elem.setAttribute(attr, value); @@ -59,19 +60,19 @@ function createTextNode(text) { } function isAdsIframe(node) { - const style = node.getAttribute("style") || ""; - const id = node.getAttribute("id") || ""; + const style = node.getAttribute('style') || ''; + const id = node.getAttribute('id') || ''; return ( - node.getAttribute("height") === 0 || - style.includes("display: none") || - id.startsWith("google_ads_iframe") + node.getAttribute('height') === 0 || + style.includes('display: none') || + id.startsWith('google_ads_iframe') ); } async function dfs(node, parentElem, childFrames = []) { for (const childNode of node.childNodes) { if (childNode.nodeType === 1) { - if (childNode.tagName === "IFRAME") { + if (childNode.tagName === 'IFRAME') { // optimize for performance later for (let { childFrame, attributes } of childFrames) { if ( @@ -88,10 +89,9 @@ async function dfs(node, parentElem, childFrames = []) { parentElem.appendChild(childElem); const newChildFrame = await toChildFramesWithAttributes(childFrame); //@ts-ignore - const bodyNode = await childFrame.locator("body", { timeout: 1000 }); + const bodyNode = await childFrame.locator('body', { timeout: 1000 }); const bodyHtml = await bodyNode.innerHTML(); await dfs(parseFrame(bodyHtml), childElem, newChildFrame); - // ignore other matches that might be the same parent break; } @@ -128,10 +128,9 @@ async function toChildFramesWithAttributes(frame) { } async function getStructure(frame) { - const bodyNode = await frame.locator("body", { timeout: 1000 }); + const bodyNode = await frame.locator('body', { timeout: 1000 }); const bodyHtml = await bodyNode.innerHTML(); const node = parseFrame(bodyHtml); - const rootElem = createElement(node); await dfs(node, rootElem, await toChildFramesWithAttributes(frame)); return rootElem; @@ -143,7 +142,7 @@ function parseFrame(html) { script: false, noscript: false, style: false, - pre: true, // keep text content when parsing + pre: true, // keep text content when parsing }, }); } @@ -154,14 +153,15 @@ export async function parseSite(page) { return structure.innerHTML; } + export function removeBlankTags(text) { const emptyTagRegex = /<(\w+)([^>]*)><\/\1>|<(\w+)([^>]*)>\s*<\/\3>/g; let newText = text; while (emptyTagRegex.test(newText)) { - newText = newText.replace(emptyTagRegex, ""); + newText = newText.replace(emptyTagRegex, ''); } return newText; -} +} \ No newline at end of file