From 364a6905dc284e43df702792ab1d4af1c62ad366 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 6 Mar 2024 18:10:49 +0000 Subject: [PATCH 1/9] chore(deps): update tj-actions/changed-files action to v42.0.6 --- .github/workflows/test.yaml | 2 +- .github/workflows/vale.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index f50601214..6781af248 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -39,7 +39,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v42.0.5 + uses: tj-actions/changed-files@v42.0.6 with: files: | **/*.md diff --git a/.github/workflows/vale.yaml b/.github/workflows/vale.yaml index 67458bd99..2ddf2abe8 100644 --- a/.github/workflows/vale.yaml +++ b/.github/workflows/vale.yaml @@ -13,7 +13,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v42.0.5 + uses: tj-actions/changed-files@v42.0.6 with: files: | **/*.{md,mdx} From 1d22a8d9d37dbacd91585cd84dfddd3055cb99a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Daniel=20W=C3=A9br?= <115245871+webrdaniel@users.noreply.github.com> Date: Thu, 7 Mar 2024 09:55:59 +0100 Subject: [PATCH 2/9] fix(landing page): design fixes (#877) --- apify-docs-theme/src/theme/custom.css | 8 ++ .../CardWithImageAndContent/styles.module.css | 15 ++- src/components/Heading.tsx | 1 + .../OpenSourceCards/OpenSourceCards.tsx | 119 +++++++++--------- .../OpenSourceCards/styles.module.css | 13 +- src/pages/index.module.css | 11 +- src/pages/index.tsx | 30 +++-- 7 files changed, 117 insertions(+), 80 deletions(-) diff --git a/apify-docs-theme/src/theme/custom.css b/apify-docs-theme/src/theme/custom.css index 69c67cbbe..eee74e536 100644 --- a/apify-docs-theme/src/theme/custom.css +++ b/apify-docs-theme/src/theme/custom.css @@ -922,6 +922,14 @@ html[data-theme='dark'] .actionLink:hover::after { align-items: flex-start; gap: 1.6rem; align-self: stretch; + height: 100%; +} + +.cardContentWrapperText { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 0.4rem; } .cardContentList { diff --git a/src/components/CardWithImageAndContent/styles.module.css b/src/components/CardWithImageAndContent/styles.module.css index 839d66366..1e64e196b 100644 --- a/src/components/CardWithImageAndContent/styles.module.css +++ b/src/components/CardWithImageAndContent/styles.module.css @@ -27,4 +27,17 @@ align-items: center; gap: 0.8rem; align-self: stretch; -} \ No newline at end of file +} + +.cardWithImageAndContentImage { + display: inherit; +} + +.cardWithImageAndContent { + display: flex; + flex-direction: column; +} + +.cardWithImageAndContentContent { + flex: 1; +} diff --git a/src/components/Heading.tsx b/src/components/Heading.tsx index 99fedff6a..31e4a7e0a 100644 --- a/src/components/Heading.tsx +++ b/src/components/Heading.tsx @@ -121,6 +121,7 @@ interface HeadingCssProps { const getHeadingCss = ({ $type = 'titleXs' }: HeadingCssProps) => HEADING_VARIANTS_CSS[$type]; const StyledHeading = styled(TextBaseComponent)` + margin: 0; ${getHeadingCss} `; diff --git a/src/components/OpenSourceCards/OpenSourceCards.tsx b/src/components/OpenSourceCards/OpenSourceCards.tsx index 566897dae..45862b880 100644 --- a/src/components/OpenSourceCards/OpenSourceCards.tsx +++ b/src/components/OpenSourceCards/OpenSourceCards.tsx @@ -17,7 +17,6 @@ const OpenSourceCards: React.FC = () => { return ( <> { } content={
- - Crawlee - - - A popular web scraping and browser automation library. - - - Star - +
+ + Crawlee + + + A popular web scraping and browser automation library. + +
+
+ + Star + +
} /> { /> } content={ -
- - Got Scraping - - - An HTTP client made for scraping based on Got. - - - Star - +
+
+ + Got Scraping + + + An HTTP client made for scraping based on Got. + +
+
+ + Star + +
} /> { /> } content={ -
- - Fingerprint Suite - - +
+
+ + Fingerprint Suite + + Browser fingerprinting tools for anonymizing your scrapers. - - - Star - + +
+
+ + Star + +
} /> diff --git a/src/components/OpenSourceCards/styles.module.css b/src/components/OpenSourceCards/styles.module.css index 4267fe3e3..de1e42f71 100644 --- a/src/components/OpenSourceCards/styles.module.css +++ b/src/components/OpenSourceCards/styles.module.css @@ -4,10 +4,11 @@ */ .headingLink { - display: flex; - justify-content: center; - align-items: center; - gap: 0.8rem; + display: flex; + justify-content: center; + align-items: center; + gap: 0.8rem; + width: fit-content; } .headingLink :hover { @@ -38,3 +39,7 @@ html[data-theme='dark'] .headingLink::after { html[data-theme='dark'] .headingLink:hover::after { background-image: url('/img/external_link_icon_primary_dark.svg'); } + +.githubButtonWrapper { + margin-top: auto; +} diff --git a/src/pages/index.module.css b/src/pages/index.module.css index 7414e45bc..c54c8920f 100644 --- a/src/pages/index.module.css +++ b/src/pages/index.module.css @@ -91,6 +91,13 @@ align-self: stretch; } +.cardContentWrapperText { + display: flex; + flex-direction: column; + align-items: flex-start; + gap: 0.4rem; +} + .cardContentList { display: flex; flex-direction: column; @@ -130,7 +137,7 @@ .bannerContentImage { width: 38.4rem; position: absolute; - margin-top: 20rem; + margin-top: 18.4rem; } } @@ -139,7 +146,7 @@ .bannerContentDescription { width: 50.2rem; } .bannerContentImage { width: 50.2rem; - margin-top: 15.8rem; + margin-top: 14.2rem; } } \ No newline at end of file diff --git a/src/pages/index.tsx b/src/pages/index.tsx index 730618293..d5c33016b 100644 --- a/src/pages/index.tsx +++ b/src/pages/index.tsx @@ -189,10 +189,12 @@ export default function Home() { } content={
- SDK - - Software toolkits for developing new Actors. - +
+ SDK + + Software toolkits for developing new Actors. + +
  • SDK for JavaScript
  • @@ -214,10 +216,12 @@ export default function Home() { } content={
    - API - - Interact with the Apify platform from your applications. - +
    + API + + Interact with the Apify platform from your applications. + +
    • API client for JavaScript
    • @@ -240,10 +244,12 @@ export default function Home() { } content={
      - CLI - - Control the Apify platform from terminal or shell scripts. - +
      + CLI + + Control the Apify platform from terminal or shell scripts. + +
      • CLI Reference
      • From 733542d0b7055e399a444a157761935fe16a08ee Mon Sep 17 00:00:00 2001 From: Apify Bot Date: Thu, 7 Mar 2024 08:57:28 +0000 Subject: [PATCH 3/9] chore: publish new version of @apify/docs-theme [skip ci] --- apify-docs-theme/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apify-docs-theme/package.json b/apify-docs-theme/package.json index 1806ca1aa..f149df623 100644 --- a/apify-docs-theme/package.json +++ b/apify-docs-theme/package.json @@ -1,6 +1,6 @@ { "name": "@apify/docs-theme", - "version": "1.0.106", + "version": "1.0.107", "description": "", "main": "./src/index.js", "files": [ From 335dfb01da548170d18c81e194c31102996cb116 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Fri, 8 Mar 2024 01:03:24 +0000 Subject: [PATCH 4/9] chore(deps): update tj-actions/changed-files action to v42.0.7 --- .github/workflows/test.yaml | 2 +- .github/workflows/vale.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 6781af248..9df32016b 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -39,7 +39,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v42.0.6 + uses: tj-actions/changed-files@v42.0.7 with: files: | **/*.md diff --git a/.github/workflows/vale.yaml b/.github/workflows/vale.yaml index 2ddf2abe8..e358ba4f8 100644 --- a/.github/workflows/vale.yaml +++ b/.github/workflows/vale.yaml @@ -13,7 +13,7 @@ jobs: - name: Get changed files id: changed-files - uses: tj-actions/changed-files@v42.0.6 + uses: tj-actions/changed-files@v42.0.7 with: files: | **/*.{md,mdx} From 3367c159546bcca1c67049b2c8bd39abf88922e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jind=C5=99ich=20B=C3=A4r?= Date: Fri, 8 Mar 2024 12:24:14 +0100 Subject: [PATCH 5/9] feat: lychee link checker from built website version (#855) Co-authored-by: TCMO <92638966+TC-MO@users.noreply.github.com> Co-authored-by: TC-MO --- .github/styles/Apify/Capitalization.yml | 4 +-- .github/workflows/lychee.yml | 34 +++++++++++++++++++ .lycheeignore | 9 +++++ apify-docs-theme/src/config.js | 4 +-- .../academy/glossary/concepts/http_cookies.md | 2 +- sources/academy/glossary/tools/modheader.md | 2 +- .../academy/glossary/tools/switchyomega.md | 2 +- .../saving_useful_stats.md | 2 +- .../get_most_of_actors/actor_readme.md | 10 +++--- .../run_actor_and_retrieve_data_via_api.md | 11 +++--- .../node_js/debugging_web_scraper.md | 6 ++-- .../tutorials/node_js/optimizing_scrapers.md | 8 ++--- .../executing_scripts/injecting_code.md | 4 +-- .../page/interacting_with_a_page.md | 6 ++-- .../actor_definition/output_schema.md | 10 +++--- sources/platform/actors/development/index.md | 4 +-- .../actors/running/usage_and_resources.md | 4 ++- sources/platform/integrations/langchain.md | 8 ++--- sources/platform/integrations/llama.md | 3 +- .../integrations/webhooks/ad_hoc_webhooks.md | 2 +- sources/platform/storage/dataset.md | 6 ++-- sources/platform/storage/key_value_store.md | 4 +-- sources/platform/storage/request_queue.md | 4 +-- 23 files changed, 97 insertions(+), 52 deletions(-) create mode 100644 .github/workflows/lychee.yml create mode 100644 .lycheeignore diff --git a/.github/styles/Apify/Capitalization.yml b/.github/styles/Apify/Capitalization.yml index cbcbc00c1..d01835a60 100644 --- a/.github/styles/Apify/Capitalization.yml +++ b/.github/styles/Apify/Capitalization.yml @@ -3,8 +3,8 @@ message: "The word '%s' should always be capitalized." ignorecase: false level: error tokens: - - '\bactor\b' - - '\bactors\b' + - '(? - Here’s an example for this section: > ## How much will it cost me to scrape Google Maps reviews? > - >
        Apify provides you with $5 free usage credits to use every month on the Apify Free plan and you can get up to 100,000 reviews from this Google Maps Reviews Scraper for those credits. So 100k results will be completely free! + >
        Apify provides you with $5 free usage credits to use every month on the Apify Free plan and you can get up to 100,000 reviews from this Google Maps Reviews Scraper for those credits. This means 100k results will be completely free! >
        But if you need to get more data or to get your data regularly you should grab an Apify subscription. We recommend our $49/month Starter plan - you can get up to 1 million Google Maps reviews every month with the $49 monthly plan! Or 10 million with the $499 Scale plan - wow! 4. **How to scrape (target site)** @@ -94,4 +94,4 @@ If you want some general tips on how to make GitHub README that stands out, chec ## Next up {#next} -If you followed all the tips described above, your Actor README is almost good to go! In the [next lesson](./guidelines_for_writing.md) we will give you a few instructions on how you can create a tutorial for your Actor. +If you followed all the tips described above, your Actor README is almost good to go! In the [next lesson](./guidelines_for_writing.md) we will give you a few instructions on how you can create a tutorial for your Actor. diff --git a/sources/academy/tutorials/api/run_actor_and_retrieve_data_via_api.md b/sources/academy/tutorials/api/run_actor_and_retrieve_data_via_api.md index fb2c80ab2..f123e5140 100644 --- a/sources/academy/tutorials/api/run_actor_and_retrieve_data_via_api.md +++ b/sources/academy/tutorials/api/run_actor_and_retrieve_data_via_api.md @@ -9,11 +9,12 @@ slug: /api/run-actor-and-retrieve-data-via-api --- -The most popular way of [integrating](https://help.apify.com/en/collections/1669767-integrating-with-apify) the Apify platform with an external project/application is by programmatically running an [Actor](/platform/actors) or [task](/platform/actors/running/tasks), waiting for it to complete its run, then collecting its data and using it within the project. Though this process sounds somewhat complicated, it's actually quite easy to do; however, due to the plethora of features offered on the Apify platform, new users may not be sure how exactly to implement this type of integration. So, let's dive in and see how you can do it. +The most popular way of [integrating](https://help.apify.com/en/collections/1669769-integrations) the Apify platform with an external project/application is by programmatically running an [Actor](/platform/actors) or [task](/platform/actors/running/tasks), waiting for it to complete its run, then collecting its data and using it within the project. Though this process sounds somewhat complicated, it's actually quite easy to do; however, due to the plethora of features offered on the Apify platform, new users may not be sure how exactly to implement this type of integration. Let's dive in and see how you can do it. > Remember to check out our [API documentation](/api/v2) with examples in different languages and a live API console. We also recommend testing the API with a nice desktop client like [Postman](https://www.getpostman.com/) or [Insomnia](https://insomnia.rest). -There are 2 main ways of using the Apify API: + +Apify API offers two ways of interacting with it: - [Synchronously](#synchronous-flow) - [Asynchronously](#asynchronous-flow) @@ -36,7 +37,7 @@ To run, or **call**, an Actor/task, you will need a few things: - Some other optional settings if you'd like to change the default values (such as allocated memory or the build). -The URL for a [POST request](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST) to run an actor looks like this: +The URL of [POST request](https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods/POST) to run an actor looks like this: ```cURL https://api.apify.com/v2/acts/ACTOR_NAME_OR_ID/runs?token=YOUR_TOKEN @@ -261,9 +262,9 @@ https://api.apify.com/v2/datasets/DATASET_ID/items By default, it will return the data in JSON format with some metadata. The actual data are in the `items` array. -There are plenty of additional parameters that you can use. You can learn about them in the [documentation](/api/v2#/reference/datasets/item-collection/get-items). We will only mention that you can pass a `format` parameter that transforms the response into popular formats like CSV, XML, Excel, RSS, etc. +You can use plenty of additional parameters, to learn more about them, visit our API reference [documentation](/api/v2#/reference/datasets/item-collection/get-items). We will only mention that you can pass a `format` parameter that transforms the response into popular formats like CSV, XML, Excel, RSS, etc. -The items are paginated, which means you can ask only for a subset of the data. Specify this using the `limit` and `offset` parameters. There is actually an overall limit of 250,000 items that the endpoint can return per request. To retrieve more, you will need to send more requests incrementing the `offset` parameter. +The items are paginated, which means you can ask only for a subset of the data. Specify this using the `limit` and `offset` parameters. This endpoint has a limit of 250,000 items that it can return per request. To retrieve more, you will need to send more requests incrementing the `offset` parameter. ```cURL https://api.apify.com/v2/datasets/DATASET_ID/items?format=csv&offset=250000 diff --git a/sources/academy/tutorials/node_js/debugging_web_scraper.md b/sources/academy/tutorials/node_js/debugging_web_scraper.md index f83abe4e0..f1930867a 100644 --- a/sources/academy/tutorials/node_js/debugging_web_scraper.md +++ b/sources/academy/tutorials/node_js/debugging_web_scraper.md @@ -23,15 +23,15 @@ jq.src = 'https://ajax.googleapis.com/ajax/libs/jquery/2.2.2/jquery.min.js'; document.getElementsByTagName('head')[0].appendChild(jq); ``` -If that doesn't work because of CORS violation, you can install [this extension](https://chrome.google.com/webstore/detail/jquery-inject/iibfbhlfimdnkinkcenncoeejnmpemof) that injects jQuery on a button click. +If that doesn't work because of CORS violation, you can install [this extension](https://chrome.google.com/webstore/detail/ekkjohcjbjcjjifokpingdbdlfekjcgi) that injects jQuery on a button click. -There are 2 main ways how to test a pageFunction code in your console: +You can test a `pageFunction` code in two ways in your console: ## Pasting and running a small code snippet Usually, you don't need to paste in the whole pageFunction as you can simply isolate the critical part of the code you are trying to debug. You will need to remove any references to the `context` object and its properties like `request` and the final return statement but otherwise, the code should work 1:1. -I will also usually remove `const` declarations on the top level variables. This helps you to run the same code many times over without needing to restart the console (you cannot declare constants more than once). So my declaration will change from: +I will also usually remove `const` declarations on the top level variables. This helps you to run the same code many times over without needing to restart the console (you cannot declare constants more than once). My declaration will change from: ```js const results = []; diff --git a/sources/academy/tutorials/node_js/optimizing_scrapers.md b/sources/academy/tutorials/node_js/optimizing_scrapers.md index c3e72d684..5d24dea9e 100644 --- a/sources/academy/tutorials/node_js/optimizing_scrapers.md +++ b/sources/academy/tutorials/node_js/optimizing_scrapers.md @@ -13,9 +13,9 @@ slug: /node-js/optimizing-scrapers Especially if you are running your scrapers on [Apify](https://apify.com), performance is directly related to your wallet (or rather bank account). The slower and heavier your program is, the more proxy bandwidth, storage, [compute units](https://help.apify.com/en/articles/3490384-what-is-a-compute-unit) and higher [subscription plan](https://apify.com/pricing) you'll need. -The goal of optimization is simple: Make the code run as fast possible and use the least resources possible. On Apify, the resources are memory and CPU usage (don't forget that the more memory you allocate to a run, the bigger share of CPU you get - proportionally). Memory alone should never be a bottleneck though. If it is, that means either a bug (memory leak) or bad architecture of the program (you need to split the computation to smaller parts). So in the rest of this article, we will focus only on optimizing CPU usage. You allocate more memory only to get more power from the CPU. +The goal of optimization is simple: Make the code run as fast possible and use the least resources possible. On Apify, the resources are memory and CPU usage (don't forget that the more memory you allocate to a run, the bigger share of CPU you get - proportionally). Memory alone should never be a bottleneck though. If it is, that means either a bug (memory leak) or bad architecture of the program (you need to split the computation to smaller parts). The rest of this article, will focus only on optimizing CPU usage. You allocate more memory only to get more power from the CPU. -There is one more thing. Optimization has its own cost: development time. You should always think about how much time you're able to spend on it and if it's worth it. +One more thing to remember. Optimization has its own cost: development time. You should always think about how much time you're able to spend on it and if it's worth it. Before we dive into the practical side of things, lets diverge with an analogy to help us think about the performance of scrapers. @@ -29,13 +29,13 @@ Now, if you want to build your own game and you are not a C/C++ veteran with a t ## Back to scrapers {#back-to-scrapers} -What are the engines of the scraping world? A [browser](https://github.com/puppeteer/puppeteer/blob/master/docs/api.md), an [HTTP library](https://www.npmjs.com/package/@apify/http-request), an [HTML parser](https://github.com/cheeriojs/cheerio), and a [JSON parser](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse). The CPU spends more than 99% of its workload in these libraries. As with engines, you are not likely gonna write these from scratch - instead you'll use something like [Crawlee](https://crawlee.dev) that handles a lot of the overheads for you. +What are the engines of the scraping world? A [browser](https://github.com/puppeteer/puppeteer?tab=readme-ov-file#puppeteer), an [HTTP library](https://www.npmjs.com/package/@apify/http-request), an [HTML parser](https://github.com/cheeriojs/cheerio), and a [JSON parser](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/JSON/parse). The CPU spends more than 99% of its workload in these libraries. As with engines, you are not likely gonna write these from scratch - instead you'll use something like [Crawlee](https://crawlee.dev) that handles a lot of the overheads for you. It is about how you use these tools. The small amount of code you write in your [`requestHandler`](https://crawlee.dev/api/http-crawler/interface/HttpCrawlerOptions#requestHandler) is absolutely insignificant compared to what is running inside these tools. In other words, it doesn't matter how many functions you call or how many variables you extract. If you want to optimize your scrapers, you need to choose the lightweight option from the tools and use it as little as possible. A crawler scraping only JSON API can be as much as 200 times faster/cheaper than a browser based solution. **Ranking of the tools from the most efficient to the least:** 1. **JSON API** (HTTP call + JSON parse) - Scraping an API (public or internal) is the best option. The response is usually smaller than the HTML page and the data are already structured and cheap to parse. Usable for about 30% of websites. -2. **Pure HTML** (HTTP call + HTML parse) - All data is on the main single HTML page. Often the HTML contains script and JSON data that are rich and nicely structured. Some pages can be quite big and the parsing is slower than for JSON. But it is still 10-20 times faster than a browser. Usable for about 90% of websites. +2. **Pure HTML** (HTTP call + HTML parse) - All data is on the main single HTML page. Often the HTML contains script and JSON data that are rich and nicely structured. Some pages can be quite big and the parsing is slower than for JSON. But it is still 10–20 times faster than a browser. Usable for about 90% of websites. 3. **Browser** (hundreds of HTTP calls, script execution, rendering) - Browsers are huge beasts. They do so much work to allow for smooth human interaction which makes them really inefficient for scraping. Use a browser only if it helps you bypass anti-scraping protection or you need to interact with the page. diff --git a/sources/academy/webscraping/puppeteer_playwright/executing_scripts/injecting_code.md b/sources/academy/webscraping/puppeteer_playwright/executing_scripts/injecting_code.md index b51d91f2d..9c9c32276 100644 --- a/sources/academy/webscraping/puppeteer_playwright/executing_scripts/injecting_code.md +++ b/sources/academy/webscraping/puppeteer_playwright/executing_scripts/injecting_code.md @@ -22,7 +22,7 @@ We'll be covering both of these cases in this brief lesson. Sometimes, you need your custom code to run before any other code is run on the page. Perhaps you need to modify an object's prototype, or even re-define certain global variables before they are used by the page's native scripts. -Luckily, Puppeteer and Playwright both have functions for this. In Puppeteer, we use the [`page.evaluateOnNewDocument()`](https://puppeteer.github.io/puppeteer/docs/puppeteer.page.evaluateonnewdocument/) function, while in Playwright we use [`page.addInitScript()`](https://playwright.dev/docs/api/class-page#page-add-init-script). We'll use these functions to override the native `addEventListener` function, setting it to a function that does nothing. This will prevent event listeners from being added to elements. +Luckily, Puppeteer and Playwright both have functions for this. In Puppeteer, we use the [`page.evaluateOnNewDocument()`](https://pptr.dev/api/puppeteer.page.evaluateonnewdocument) function, while in Playwright we use [`page.addInitScript()`](https://playwright.dev/docs/api/class-page#page-add-init-script). We'll use these functions to override the native `addEventListener` function, setting it to a function that does nothing. This will prevent event listeners from being added to elements. @@ -128,4 +128,4 @@ await browser.close(); ## Next up {#next} -Next, we'll be learning a bit about how to extract data using Playwright/Puppeteer. There are two main ways to do this, so [next exciting lesson](./extracting_data.md) will be about both of them! +Next, we'll be learning a bit about how to extract data using Playwright/Puppeteer. You can use one of the two main ways to do this, so [next exciting lesson](./extracting_data.md) will be about both of them! diff --git a/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md b/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md index f05cf33c6..423e443e9 100644 --- a/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md +++ b/sources/academy/webscraping/puppeteer_playwright/page/interacting_with_a_page.md @@ -55,7 +55,7 @@ await page.click('button + button'); With `page.click()`, Puppeteer and Playwright actually drag the mouse and click, allowing the bot to act more human-like. This is different from programmatically clicking with `Element.click()` in vanilla client-side JavaScript. -Notice that in the Playwright example, we are using a different selector than in the Puppeteer example. This is because Playwright supports [many custom CSS selectors](https://playwright.dev/docs/selectors#text-selector), such as the **has-text** pseudo class. As a rule of thumb, using text selectors is much more preferable to using regular selectors, as they are much less likely to break. If Google makes the sibling above the **I agree** button a `
        ` element instead of a `