Merge branch 'master' into feat/code-samples-webhooks

apify · Dec 20, 2024 · 521b1cb · 521b1cb
2 parents 8d83e2f + 8f36cb5
commit 521b1cb
Show file tree

Hide file tree

Showing 19 changed files with 158 additions and 26 deletions.
diff --git a/apify-api/openapi/components/schemas/actor-runs/ChargeRunRequest.yaml b/apify-api/openapi/components/schemas/actor-runs/ChargeRunRequest.yaml
@@ -0,0 +1,12 @@
+title: ChargeRunRequest
+required:
+  - eventName
+  - eventCount
+type: object
+properties:
+  eventName:
+    type: string
+    example: ANALYZE_PAGE
+  eventCount:
+    type: number
+    example: 1
diff --git a/apify-api/openapi/components/schemas/request-queues/GetHeadAndLockResponse.yaml b/apify-api/openapi/components/schemas/request-queues/GetHeadAndLockResponse.yaml
@@ -11,7 +11,15 @@ properties:
         example: 1000
       queueModifiedAt:
         type: string
+        description: The modifiedAt is updated whenever the queue is modified. Modifications include adding, updating, or removing requests, as well as locking or unlocking requests in the queue.
         example: '2018-03-14T23:00:00.000Z'
+      queueHasLockedRequests:
+        type: boolean
+        description: Whether the queue contains requests locked by any client (either the one calling the endpoint or a different one).
+        example: true
+      clientKey:
+        type: string
+        example: client-one
       hadMultipleClients:
         type: boolean
         example: true

diff --git a/apify-api/openapi/components/schemas/request-queues/RequestQueue.yaml b/apify-api/openapi/components/schemas/request-queues/RequestQueue.yaml
@@ -25,7 +25,8 @@ properties:
     example: '2019-12-12T07:34:14.202Z'
   modifiedAt:
     type: string
-    example: '2019-12-13T08:36:13.202Z'
+    description: The modifiedAt is updated whenever the queue is modified. Modifications include adding, updating, or removing requests, as well as locking or unlocking requests in the queue.
+    example: '2030-12-13T08:36:13.202Z'
   accessedAt:
     type: string
     example: '2019-12-14T08:36:13.202Z'

diff --git a/apify-api/openapi/components/tags.yaml b/apify-api/openapi/components/tags.yaml
@@ -620,6 +620,10 @@
   x-displayName: Resurrect run
   x-parent-tag-name: Actor runs
   x-trait: 'true'
+- name: Actor runs/Charge events in run
+  x-displayName: Charge events in run
+  x-parent-tag-name: Actor runs
+  x-trait: 'true'
 - name: Actor runs/Update status message
   x-displayName: Update status message
   x-parent-tag-name: Actor runs

diff --git a/apify-api/openapi/components/x-tag-groups.yaml b/apify-api/openapi/components/x-tag-groups.yaml
@@ -40,6 +40,7 @@
   - Actor runs/Metamorph run
   - Actor runs/Reboot run
   - Actor runs/Resurrect run
+  - Actor runs/Charge events in run
   - Actor runs/Update status message
 - name: Actor builds
   tags:

diff --git a/apify-api/openapi/openapi.yaml b/apify-api/openapi/openapi.yaml
@@ -544,6 +544,8 @@ paths:
     $ref: paths/actor-runs/actor-runs@{runId}@reboot.yaml
   '/v2/actor-runs/{runId}/resurrect':
     $ref: paths/actor-runs/actor-runs@{runId}@resurrect.yaml
+  '/v2/actor-runs/{runId}/charge':
+    $ref: paths/actor-runs/actor-runs@{runId}@charge.yaml
   /v2/actor-builds:
     $ref: paths/actor-builds/actor-builds.yaml
   '/v2/actor-builds/{buildId}':

diff --git a/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@charge.yaml b/apify-api/openapi/paths/actor-runs/actor-runs@{runId}@charge.yaml
@@ -0,0 +1,46 @@
+post:
+  tags:
+    - Actor runs/Charge events in run
+  summary: Charge events in run
+  description: |
+    Charge for events in the run of your [pay per event Actor](https://docs.apify.com/platform/actors/running/actors-in-store#pay-per-event).
+    The event you are charging for must be one of the configured events in your Actor. If the Actor is not set up as pay per event, or if the event is not configured,
+    the endpoint will return an error. The endpoint must be called from the Actor run itself, with the same API token that the run was started with.
+
+    Note that pay per events Actors are still in alpha. Please, reach out to us with any questions or feedback.
+  operationId: PostChargeRun
+  parameters:
+    - name: runId
+      in: path
+      required: true
+      schema:
+        type: string
+      example: 3KH8gEpp4d8uQSe8T
+      description: Run ID.
+    - name: idempotency-key
+      in: header
+      required: false
+      schema:
+        type: string
+      example: 2024-12-09T01:23:45.000Z-random-uuid
+      description: Always pass a unique idempotency key (any unique string) for each charge to avoid double charging in case of retries or network errors.
+  requestBody:
+      description: 'Define which event, and how many times, you want to charge for.'
+      content:
+          application/json:
+              schema:
+                  $ref: "../../components/schemas/actor-runs/ChargeRunRequest.yaml"
+              example:
+                  eventName: 'ANALYZE_PAGE'
+                  eventCount: 1
+      required: true
+  responses:
+    '201':
+      description: 'The charge was successful. Note that you still have to make sure in your Actor that the total charge for the run respects the maximum value set by the user, as the API does not check this. Above the limit, the charges reported as successful in API will not be added to your payouts, but you will still bear the associated costs. Use the Apify charge manager or SDK to avoid having to deal with this manually.'
+  deprecated: false
+  x-js-parent: RunClient
+  x-js-name: charge
+  x-js-doc-url: https://docs.apify.com/api/client/js/reference/class/RunClient#charge
+  x-py-parent: RunClientAsync
+  x-py-name: charge
+  x-py-doc-url: https://docs.apify.com/api/client/python/reference/class/RunClientAsync#charge
diff --git a/apify-api/openapi/paths/request-queues/request-queues@{queueId}@[email protected] b/apify-api/openapi/paths/request-queues/request-queues@{queueId}@[email protected]
@@ -6,10 +6,9 @@ post:
     Returns the given number of first requests from the queue and locks them for
     the given time.
 
-    If this endpoint locks the request, no other client will be able to get and
+    If this endpoint locks the request, no other client or run will be able to get and
     lock these requests.
 
-
     The response contains the `hadMultipleClients` boolean field which indicates
     that the queue was accessed by more than one client (with unique or empty
     `clientKey`).

diff --git a/...-api/openapi/paths/request-queues/request-queues@{queueId}@requests@{requestId}@lock.yaml b/...-api/openapi/paths/request-queues/request-queues@{queueId}@requests@{requestId}@lock.yaml
@@ -37,11 +37,10 @@ put:
     - name: clientKey
       in: query
       description: |
-        A unique identifier of the client accessing the request queue. It must
-        be a string between 1 and 32 characters long. This identifier is used to
-        ensure one client is not to able delete or prolong
-
-        a request from another client.
+          A unique identifier of the client accessing the request queue. It must
+          be a string between 1 and 32 characters long. This identifier is used to for locking
+          and unlocking requests. You can delete or prolong lock only for requests that were locked by by same
+          client key or from the same Actor run.
       style: form
       explode: true
       schema:
@@ -121,11 +120,10 @@ delete:
     - name: clientKey
       in: query
       description: |
-        A unique identifier of the client accessing the request queue. It must
-        be a string between 1 and 32 characters long. This identifier is used to
-        ensure one client is not to able delete or prolong
-
-        a request from another client.
+          A unique identifier of the client accessing the request queue. It must
+          be a string between 1 and 32 characters long. This identifier is used to for locking
+          and unlocking requests. You can delete or prolong lock only for requests that were locked by by same
+          client key or from the same Actor run.
       style: form
       explode: true
       schema:

diff --git a/nginx.conf b/nginx.conf
@@ -324,4 +324,7 @@ server {
   }
   # Redirect rule for "upgrading-to-v03" to "upgrading-to-v0x"
   rewrite ^/python/docs/upgrading/upgrading-to-v03$ /python/docs/upgrading/upgrading-to-v0x permanent;
+
+  # Redirect rule so that /python/docs actually leads somewhere
+  rewrite ^/python/docs/?$ /python/docs/quick-start;
 }
diff --git a/sources/platform/actors/development/actor_definition/dataset_schema/validation.md b/sources/platform/actors/development/actor_definition/dataset_schema/validation.md
@@ -4,6 +4,9 @@ description:  Specify the dataset schema within the Actors so you can add monito
 slug: /actors/development/actor-definition/dataset-schema/validation
 ---
 
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+
 **Specify the dataset schema within the Actors so you can add monitoring and validation at the field level.**
 
 ---
@@ -105,6 +108,8 @@ The type of the AJV validation error object is [here](https://github.com/ajv-val
 
 If you use the Apify JS client or Apify SDK and call `pushData` function you can access the validation errors in a `try catch` block like this:
 
+<Tabs>
+  <TabItem value="Javascript" label="Javascript" default>
 ```javascript
 try {
     const response = await Actor.pushData(items);
@@ -115,6 +120,17 @@ try {
     });
 }
 ```
+  </TabItem>
+  <TabItem value="Python" label="Python">
+```python
+try:
+    await Actor.push_data(items)
+except ApifyApiError as error:
+    if "invalidItems" in error.data:
+        validation_errors = e.data["invalidItems"]
+```
+  </TabItem>
+</Tabs>
 
 ## Examples of common types of validation
 

diff --git a/sources/platform/actors/development/actor_definition/input_schema/specification.md b/sources/platform/actors/development/actor_definition/input_schema/specification.md
@@ -20,13 +20,16 @@ The Actor input schema serves three main purposes:
 To define an input schema for an Actor, set `input` field in the `.actor/actor.json` file to an input schema object (described below), or path to a JSON file containing the input schema object.
 For backwards compatibility, if the `input` field is omitted, the system looks for an `INPUT_SCHEMA.json` file either in the `.actor` directory or the Actor's top-level directory—but note that this functionality is deprececated and might be removed in the future. The maximum allowed size for the input schema file is 500 kB.
 
-When you provide an input schema, the system will validate the input data passed to the Actor on start (via the API or Apify Console) against the specified schema to ensure compliance before starting the Actor.
+When you provide an input schema, the Apify platform will validate the input data passed to the Actor on start (via the API or Apify Console) to ensure compliance before starting the Actor.
 If the input object doesn't conform the schema, the caller receives an error and the Actor is not started.
 
 :::note Validation aid
 
-You can also use our [visual input schema editor](https://apify.github.io/input-schema-editor-react/) to guide you through the creation of the `INPUT_SCHEMA.json` file.
-If you need to validate your input schemas, you can use the [`apify validate-schema`](/cli/docs/reference#apify-validate-schema-path) command in the Apify CLI.
+You can use our [visual input schema editor](https://apify.github.io/input-schema-editor-react/) to guide you through the creation of the `INPUT_SCHEMA.json` file.
+
+To ensure the input schema is valid, here's a corresponding [JSON schema file](https://github.com/apify/apify-shared-js/blob/master/packages/input_schema/src/schema.json).
+
+You can also use the [`apify validate-schema`](/cli/docs/reference#apify-validate-schema-path) command in the Apify CLI.
 
 :::
 

diff --git a/sources/platform/integrations/images/api-token-organization.png b/sources/platform/integrations/images/api-token-organization.png
diff --git a/sources/platform/integrations/images/api-token-scoped-default-storage-access.png b/sources/platform/integrations/images/api-token-scoped-default-storage-access.png
diff --git a/sources/platform/integrations/images/api-token-scoped-restricted-access-active.png b/sources/platform/integrations/images/api-token-scoped-restricted-access-active.png
diff --git a/sources/platform/integrations/images/api-token-scoped-run-modes.png b/sources/platform/integrations/images/api-token-scoped-run-modes.png
diff --git a/sources/platform/integrations/images/api-token-scoped-run-tasks.png b/sources/platform/integrations/images/api-token-scoped-run-tasks.png
diff --git a/sources/platform/integrations/images/api-token.png b/sources/platform/integrations/images/api-token.png
diff --git a/sources/platform/integrations/programming/api.md b/sources/platform/integrations/programming/api.md
@@ -80,7 +80,7 @@ A single token can combine both types. You can create a token that can _read_ an
 
 ### Allowing tokens to create resources
 
-If you need to create new resources with the token (for example, create a new Task, or storage), you need to explicitly allow that as well.
+If you need to create new resources with the token (for example, create a new task, or storage), you need to explicitly allow that as well.
 
 Once you create a new resource with the token, _the token will gain full access to that resource_, regardless of other permissions. It is not possible to create a token that can create a dataset, but not write to it.
 
@@ -94,19 +94,21 @@ Some permissions require other permissions to be granted alongside them. These a
 
 #### Automatic dependencies
 
-The form enforces certain dependencies automatically. For example, when you grant the _Write_ permission for a dataset, the _Read_ permission is automatically selected. This ensures that you can write to a dataset if you can also read from it.
+The form enforces certain dependencies automatically. For example, when you grant the **Write** permission for a dataset, the **Read** permission is automatically selected. This ensures that you can write to a dataset if you can also read from it.
 
 ![The Write permission depends on Read for a dataset](../images/api-token-scoped-dependencies.png)
 
 #### Manual dependencies
 
-Other dependencies are more complicated, and **it is your responsibility that the token is set up correctly**. Specifically:
+Other dependencies are more complicated, so it is up to you to ensure that the token is configured correctly.
 
-- To create or update a Schedule, the token needs access not only to the Schedule itself, but also to the Actor or task that is being scheduled.
-- Similarly, to create or update a task, the token needs the additional permission to access the task's Actor itself.
+Specifically:
+
+- To create or update a Schedule, the token needs access not only to the Schedule itself, but also to the Actor (the **Run** permission) or task (the **Read** permission) that is being scheduled.
+- Similarly, to create, update or run a task, the token needs the **Run** permission on the task's Actor itself.
 
 :::tip
-Let's say that you have an Actor and you want to programmatically create schedules for that Actor. Then you can create a token that has the account level _Create_ permission on schedules, but only the resource-specific _Run_ permission on the Actor. Such a token has exactly the permissions it needs, and nothing more.
+Let's say that you have an Actor and you want to programmatically create schedules for that Actor. Then you can create a token that has the account level **Create** permission on schedules, but only the resource-specific **Run** permission on the Actor. Such a token has exactly the permissions it needs, and nothing more.
 :::
 
 ### Actor execution
@@ -146,8 +148,20 @@ This restriction is _transitive_, which means that if the Actor runs another Act
 
 When Apify [runs an Actor](/platform/actors/running/runs-and-builds#runs), it automatically creates a set of default storages (a dataset, a key-value store and request queue) that the Actor can use in runtime.
 
-- Regardless of mode, the injected token always gets write access to its default storages, and to the run itself (for example, so that the Actor can abort itself). You don't need to configure this on your scoped token.
-- If a scoped token can run an Actor, it gets **write access to default storages of the runs it triggered**. Moreover, it gets **read access to default storages of _all_ runs of that Actor**. If this is not desirable, change your Actor to output data into an existing named storage, or have it create a new storage.
+You can configure whether the scoped token you are going use to run the Actor should get **Write**
+access to these default storages.
+
+![Configure whether the trigger token gets write access to the run default storages.](../images/api-token-scoped-default-storage-access.png)
+
+:::tip
+Let's say your Actor produces a lot of data that you want to delete just after the Actor finishes. If you enable this toggle, your scoped token will be allowed to do that.
+:::
+
+:::caution
+Even if you disable this option, **the default storages can still be accessed anonymously using just their ID** (which can be obtained via the [run object](https://docs.apify.com/api/v2#tag/Actor-runsRun-object-and-its-storages)).
+
+Moreover, if a scoped token can run an Actor, it can also list all its runs, including their storage IDs, ultimately exposing their content as well. If this is not desirable, change your Actor to output data into an existing named storage, or have it create a new storage.
+:::
 
 ### Schedules
 
@@ -164,7 +178,32 @@ If you set up a webhook pointing to the Apify API, the Apify platform will autom
 Therefore, you need to make sure the token has sufficient permissions not only to set up the webhook, but also to perform the actual operation.
 
 :::tip
-
 Let's say you want to create a webhook that pushes an item to a dataset every time an Actor successfully finishes. Then such a scoped token needs to be allowed to both run the Actor (to create the webhook), and write to that dataset.
-
 :::
+
+### Troubleshooting
+
+#### How do I allow a token to run a task?
+
+Tasks don't have a dedicated **Run** permission. Instead, you should configure the token with the following permissions:
+
+- **Run** on the Actor that the task is executing
+- **Read** on the task
+
+See the following example:
+
+![Scoped token configured to run a task](../images/api-token-scoped-run-tasks.png)
+
+Refer to [this section](#permission-dependencies) to understand how permission dependencies work.
+
+#### My run failed and I can see `insufficient permissions` in the logs
+
+When a run fails with insufficient permissions in the logs, it typically means the Actor is using a scoped token with **Restricted access** configured.
+
+![Scoped token with Restricted access](../images/api-token-scoped-restricted-access-active.png)
+
+What is happening is that the Actor is trying to access a resource (such as a dataset, or a key-value store) or perform an operation that it does not have sufficient permissions for.
+
+If you know what it is, you can add the permission to the scope of your token. If you don't, you can switch the permission mode on the token to **Full access**. This means that the Actor will be able to access all your account data.
+
+Refer to [Actor execution](#actor-execution) section to understand how executing Actors with scoped tokens works.