Skip to content

feat: add dataset.create_items_public_url and key_value_store.create_keys_public_url #453

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Aug 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
131 changes: 130 additions & 1 deletion src/apify_client/clients/resource_clients/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,16 @@
import warnings
from contextlib import asynccontextmanager, contextmanager
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urlparse, urlunparse

from apify_shared.utils import create_storage_content_signature

from apify_client._types import ListPage
from apify_client._utils import catch_not_found_or_throw, filter_out_none_values_recursively, pluck_data
from apify_client._utils import (
catch_not_found_or_throw,
filter_out_none_values_recursively,
pluck_data,
)
from apify_client.clients.base import ResourceClient, ResourceClientAsync
from apify_client.errors import ApifyApiError

Expand Down Expand Up @@ -558,6 +565,67 @@ def get_statistics(self) -> dict | None:

return None

def create_items_public_url(
self,
*,
offset: int | None = None,
limit: int | None = None,
clean: bool | None = None,
desc: bool | None = None,
fields: list[str] | None = None,
omit: list[str] | None = None,
unwind: list[str] | None = None,
skip_empty: bool | None = None,
skip_hidden: bool | None = None,
flatten: list[str] | None = None,
view: str | None = None,
expires_in_secs: int | None = None,
) -> str:
"""Generate a URL that can be used to access dataset items.

If the client has permission to access the dataset's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
This value sets the expiration duration in seconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `offset`) will be included as query parameters in the URL.

Returns:
The public dataset items URL.
"""
dataset = self.get()

request_params = self._params(
offset=offset,
limit=limit,
desc=desc,
clean=clean,
fields=fields,
omit=omit,
unwind=unwind,
skipEmpty=skip_empty,
skipHidden=skip_hidden,
flatten=flatten,
view=view,
)

if dataset and 'urlSigningSecretKey' in dataset:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmmm starting here the rest of the function seems identical to the async variant. Now sure if there is a nice way to reuse the code? I guess this is how we do it here in the client? 🤔

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about this, but this is how every other methods are written. WDYT @janbuchar?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We sure do repeat stuff here, but no need to change that now. Perhaps one day we can generate the whole client lib from an OpenAPI spec. Until then, this will have to do

signature = create_storage_content_signature(
resource_id=dataset['id'],
url_signing_secret_key=dataset['urlSigningSecretKey'],
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
)
request_params['signature'] = signature

items_public_url = urlparse(self._url('items'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
items_public_url = items_public_url._replace(query=urlencode(filtered_params))

return urlunparse(items_public_url)


class DatasetClientAsync(ResourceClientAsync):
"""Async sub-client for manipulating a single dataset."""
Expand Down Expand Up @@ -1003,3 +1071,64 @@ async def get_statistics(self) -> dict | None:
catch_not_found_or_throw(exc)

return None

async def create_items_public_url(
self,
*,
offset: int | None = None,
limit: int | None = None,
clean: bool | None = None,
desc: bool | None = None,
fields: list[str] | None = None,
omit: list[str] | None = None,
unwind: list[str] | None = None,
skip_empty: bool | None = None,
skip_hidden: bool | None = None,
flatten: list[str] | None = None,
view: str | None = None,
expires_in_secs: int | None = None,
) -> str:
"""Generate a URL that can be used to access dataset items.

If the client has permission to access the dataset's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
This value sets the expiration duration in seconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `offset`) will be included as query parameters in the URL.

Returns:
The public dataset items URL.
"""
dataset = await self.get()

request_params = self._params(
offset=offset,
limit=limit,
desc=desc,
clean=clean,
fields=fields,
omit=omit,
unwind=unwind,
skipEmpty=skip_empty,
skipHidden=skip_hidden,
flatten=flatten,
view=view,
)

if dataset and 'urlSigningSecretKey' in dataset:
signature = create_storage_content_signature(
resource_id=dataset['id'],
url_signing_secret_key=dataset['urlSigningSecretKey'],
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
)
request_params['signature'] = signature

items_public_url = urlparse(self._url('items'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
items_public_url = items_public_url._replace(query=urlencode(filtered_params))

return urlunparse(items_public_url)
100 changes: 100 additions & 0 deletions src/apify_client/clients/resource_clients/key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
from contextlib import asynccontextmanager, contextmanager
from http import HTTPStatus
from typing import TYPE_CHECKING, Any
from urllib.parse import urlencode, urlparse, urlunparse

from apify_shared.utils import create_storage_content_signature

from apify_client._utils import (
catch_not_found_or_throw,
Expand Down Expand Up @@ -264,6 +267,54 @@ def delete_record(self, key: str) -> None:
timeout_secs=_SMALL_TIMEOUT,
)

def create_keys_public_url(
self,
*,
limit: int | None = None,
exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
expires_in_secs: int | None = None,
) -> str:
"""Generate a URL that can be used to access key-value store keys.

If the client has permission to access the key-value store's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
This value sets the expiration duration in seconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.

Returns:
The public key-value store keys URL.
"""
store = self.get()

request_params = self._params(
limit=limit,
exclusive_start_key=exclusive_start_key,
collection=collection,
prefix=prefix,
)

if store and 'urlSigningSecretKey' in store:
signature = create_storage_content_signature(
resource_id=store['id'],
url_signing_secret_key=store['urlSigningSecretKey'],
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
)
request_params['signature'] = signature

keys_public_url = urlparse(self._url('keys'))

filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))

return urlunparse(keys_public_url)


class KeyValueStoreClientAsync(ResourceClientAsync):
"""Async sub-client for manipulating a single key-value store."""
Expand Down Expand Up @@ -503,3 +554,52 @@ async def delete_record(self, key: str) -> None:
params=self._params(),
timeout_secs=_SMALL_TIMEOUT,
)

async def create_keys_public_url(
self,
*,
limit: int | None = None,
exclusive_start_key: str | None = None,
collection: str | None = None,
prefix: str | None = None,
expires_in_secs: int | None = None,
) -> str:
"""Generate a URL that can be used to access key-value store keys.

If the client has permission to access the key-value store's URL signing key,
the URL will include a signature to verify its authenticity.

You can optionally control how long the signed URL should be valid using the `expires_in_secs` option.
This value sets the expiration duration in seconds from the time the URL is generated.
If not provided, the URL will not expire.

Any other options (like `limit` or `prefix`) will be included as query parameters in the URL.

Returns:
The public key-value store keys URL.
"""
store = await self.get()

keys_public_url = urlparse(self._url('keys'))

request_params = self._params(
limit=limit,
exclusive_start_key=exclusive_start_key,
collection=collection,
prefix=prefix,
)

if store and 'urlSigningSecretKey' in store:
signature = create_storage_content_signature(
resource_id=store['id'],
url_signing_secret_key=store['urlSigningSecretKey'],
expires_in_millis=expires_in_secs * 1000 if expires_in_secs is not None else None,
)
request_params['signature'] = signature

keys_public_url = urlparse(self._url('keys'))
filtered_params = {k: v for k, v in request_params.items() if v is not None}
if filtered_params:
keys_public_url = keys_public_url._replace(query=urlencode(filtered_params))

return urlunparse(keys_public_url)
10 changes: 10 additions & 0 deletions tests/integration/integration_test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import secrets
import string


def random_string(length: int = 10) -> str:
return ''.join(secrets.choice(string.ascii_letters) for _ in range(length))


def random_resource_name(resource: str) -> str:
return f'python-client-test-{resource}-{random_string(5)}'
90 changes: 90 additions & 0 deletions tests/integration/test_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
from __future__ import annotations

from typing import TYPE_CHECKING

import impit

from integration.integration_test_utils import random_resource_name

if TYPE_CHECKING:
from apify_client import ApifyClient, ApifyClientAsync


class TestDatasetSync:
def test_dataset_should_create_public_items_expiring_url_with_params(self, apify_client: ApifyClient) -> None:
created_dataset = apify_client.datasets().get_or_create(name=random_resource_name('dataset'))

dataset = apify_client.dataset(created_dataset['id'])
items_public_url = dataset.create_items_public_url(
expires_in_secs=2000,
limit=10,
offset=0,
)

assert 'signature=' in items_public_url
assert 'limit=10' in items_public_url
assert 'offset=0' in items_public_url

impit_client = impit.Client()
response = impit_client.get(items_public_url, timeout=5)
assert response.status_code == 200

dataset.delete()
assert apify_client.dataset(created_dataset['id']).get() is None

def test_dataset_should_create_public_items_non_expiring_url(self, apify_client: ApifyClient) -> None:
created_dataset = apify_client.datasets().get_or_create(name=random_resource_name('dataset'))

dataset = apify_client.dataset(created_dataset['id'])
items_public_url = dataset.create_items_public_url()

assert 'signature=' in items_public_url

impit_client = impit.Client()
response = impit_client.get(items_public_url, timeout=5)
assert response.status_code == 200

dataset.delete()
assert apify_client.dataset(created_dataset['id']).get() is None


class TestDatasetAsync:
async def test_dataset_should_create_public_items_expiring_url_with_params(
self, apify_client_async: ApifyClientAsync
) -> None:
created_dataset = await apify_client_async.datasets().get_or_create(name=random_resource_name('dataset'))

dataset = apify_client_async.dataset(created_dataset['id'])
items_public_url = await dataset.create_items_public_url(
expires_in_secs=2000,
limit=10,
offset=0,
)

assert 'signature=' in items_public_url
assert 'limit=10' in items_public_url
assert 'offset=0' in items_public_url

impit_async_client = impit.AsyncClient()
response = await impit_async_client.get(items_public_url, timeout=5)
assert response.status_code == 200

await dataset.delete()
assert await apify_client_async.dataset(created_dataset['id']).get() is None

async def test_dataset_should_create_public_items_non_expiring_url(
self, apify_client_async: ApifyClientAsync
) -> None:
created_dataset = await apify_client_async.datasets().get_or_create(name=random_resource_name('dataset'))

dataset = apify_client_async.dataset(created_dataset['id'])
items_public_url = await dataset.create_items_public_url()

assert 'signature=' in items_public_url

impit_async_client = impit.AsyncClient()
response = await impit_async_client.get(items_public_url, timeout=5)
assert response.status_code == 200

await dataset.delete()
assert await apify_client_async.dataset(created_dataset['id']).get() is None
Loading
Loading