Skip to content

Commit

Permalink
feat: Separate ID and name params for Actor.open_xxx (#56)
Browse files Browse the repository at this point in the history
  • Loading branch information
jirimoravcik authored Feb 9, 2023
1 parent 61a5349 commit a1e962e
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 44 deletions.
27 changes: 18 additions & 9 deletions docs/docs.md
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ That’s useful if you want to use the client as a different Apify user than the

***

#### async classmethod open_dataset(dataset_id_or_name=None, \*, force_cloud=False)
#### async classmethod open_dataset(\*, id=None, name=None, force_cloud=False)

Open a dataset.

Expand All @@ -154,8 +154,11 @@ The actual data is stored either on the local filesystem or in the Apify cloud.

* **Parameters**

* **dataset_id_or_name** (`str`, *optional*) – ID or name of the dataset to be opened.
If not provided, the method returns the default dataset associated with the actor run.
* **id** (`str`, *optional*) – ID of the dataset to be opened.
If neither id nor name are provided, the method returns the default dataset associated with the actor run.

* **name** (`str`, *optional*) – Name of the dataset to be opened.
If neither id nor name are provided, the method returns the default dataset associated with the actor run.

* **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Expand All @@ -170,7 +173,7 @@ The actual data is stored either on the local filesystem or in the Apify cloud.

***

#### async classmethod open_key_value_store(key_value_store_id_or_name=None, \*, force_cloud=False)
#### async classmethod open_key_value_store(\*, id=None, name=None, force_cloud=False)

Open a key-value store.

Expand All @@ -180,8 +183,11 @@ The actual data is stored either on a local filesystem or in the Apify cloud.

* **Parameters**

* **key_value_store_id_or_name** (`str`, *optional*) – ID or name of the key-value store to be opened.
If not provided, the method returns the default key-value store associated with the actor run.
* **id** (`str`, *optional*) – ID of the key-value store to be opened.
If neither id nor name are provided, the method returns the default key-value store associated with the actor run.

* **name** (`str`, *optional*) – Name of the key-value store to be opened.
If neither id nor name are provided, the method returns the default key-value store associated with the actor run.

* **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Expand All @@ -196,7 +202,7 @@ The actual data is stored either on a local filesystem or in the Apify cloud.

***

#### async classmethod open_request_queue(request_queue_id_or_name=None, \*, force_cloud=False)
#### async classmethod open_request_queue(\*, id=None, name=None, force_cloud=False)

Open a request queue.

Expand All @@ -207,8 +213,11 @@ and depth-first crawling orders.

* **Parameters**

* **request_queue_id_or_name** (`str`, *optional*) – ID or name of the request queue to be opened.
If not provided, the method returns the default request queue associated with the actor run.
* **id** (`str`, *optional*) – ID of the request queue to be opened.
If neither id nor name are provided, the method returns the default request queue associated with the actor run.

* **name** (`str`, *optional*) – Name of the request queue to be opened.
If neither id nor name are provided, the method returns the default request queue associated with the actor run.

* **force_cloud** (`bool`, *optional*) – If set to True then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Expand Down
46 changes: 31 additions & 15 deletions src/apify/actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -478,56 +478,68 @@ def _get_storage_client(self, force_cloud: bool) -> Optional[ApifyClientAsync]:
return self._apify_client if force_cloud else None

@classmethod
async def open_dataset(cls, dataset_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> Dataset:
async def open_dataset(cls, *, id: Optional[str] = None, name: Optional[str] = None, force_cloud: bool = False) -> Dataset:
"""Open a dataset.
Datasets are used to store structured data where each object stored has the same attributes,
such as online store products or real estate offers.
The actual data is stored either on the local filesystem or in the Apify cloud.
Args:
dataset_id_or_name (str, optional): ID or name of the dataset to be opened.
If not provided, the method returns the default dataset associated with the actor run.
id (str, optional): ID of the dataset to be opened.
If neither `id` nor `name` are provided, the method returns the default dataset associated with the actor run.
name (str, optional): Name of the dataset to be opened.
If neither `id` nor `name` are provided, the method returns the default dataset associated with the actor run.
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Returns:
Dataset: An instance of the `Dataset` class for the given ID or name.
"""
return await cls._get_default_instance().open_dataset(dataset_id_or_name=dataset_id_or_name, force_cloud=force_cloud)
return await cls._get_default_instance().open_dataset(id=id, name=name, force_cloud=force_cloud)

async def _open_dataset_internal(self, dataset_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> Dataset:
async def _open_dataset_internal(self, *, id: Optional[str] = None, name: Optional[str] = None, force_cloud: bool = False) -> Dataset:
self._raise_if_not_initialized()

dataset_id_or_name = id or name
return await StorageManager.open_storage(Dataset, dataset_id_or_name, self._get_storage_client(force_cloud), self._config)

@classmethod
async def open_key_value_store(cls, key_value_store_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> KeyValueStore:
async def open_key_value_store(cls, *, id: Optional[str] = None, name: Optional[str] = None, force_cloud: bool = False) -> KeyValueStore:
"""Open a key-value store.
Key-value stores are used to store records or files, along with their MIME content type.
The records are stored and retrieved using a unique key.
The actual data is stored either on a local filesystem or in the Apify cloud.
Args:
key_value_store_id_or_name (str, optional): ID or name of the key-value store to be opened.
If not provided, the method returns the default key-value store associated with the actor run.
id (str, optional): ID of the key-value store to be opened.
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
name (str, optional): Name of the key-value store to be opened.
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Returns:
KeyValueStore: An instance of the `KeyValueStore` class for the given ID or name.
"""
return await cls._get_default_instance().open_key_value_store(key_value_store_id_or_name=key_value_store_id_or_name, force_cloud=force_cloud)
return await cls._get_default_instance().open_key_value_store(id=id, name=name, force_cloud=force_cloud)

async def _open_key_value_store_internal(self, key_value_store_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> KeyValueStore:
async def _open_key_value_store_internal(
self,
*,
id: Optional[str] = None,
name: Optional[str] = None,
force_cloud: bool = False,
) -> KeyValueStore:
self._raise_if_not_initialized()

key_value_store_id_or_name = id or name
return await StorageManager.open_storage(KeyValueStore, key_value_store_id_or_name, self._get_storage_client(force_cloud), self._config)

@classmethod
async def open_request_queue(cls, request_queue_id_or_name: Optional[str] = None, *, force_cloud: bool = False) -> RequestQueue:
async def open_request_queue(cls, *, id: Optional[str] = None, name: Optional[str] = None, force_cloud: bool = False) -> RequestQueue:
"""Open a request queue.
Request queue represents a queue of URLs to crawl, which is stored either on local filesystem or in the Apify cloud.
Expand All @@ -536,24 +548,28 @@ async def open_request_queue(cls, request_queue_id_or_name: Optional[str] = None
and depth-first crawling orders.
Args:
request_queue_id_or_name (str, optional): ID or name of the request queue to be opened.
If not provided, the method returns the default request queue associated with the actor run.
id (str, optional): ID of the request queue to be opened.
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
name (str, optional): Name of the request queue to be opened.
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
This way it is possible to combine local and cloud storage.
Returns:
RequestQueue: An instance of the `RequestQueue` class for the given ID or name.
"""
return await cls._get_default_instance().open_request_queue(request_queue_id_or_name=request_queue_id_or_name, force_cloud=force_cloud)
return await cls._get_default_instance().open_request_queue(id=id, name=name, force_cloud=force_cloud)

async def _open_request_queue_internal(
self,
request_queue_id_or_name: Optional[str] = None,
*,
id: Optional[str] = None,
name: Optional[str] = None,
force_cloud: bool = False,
) -> RequestQueue:
self._raise_if_not_initialized()

request_queue_id_or_name = id or name
return await StorageManager.open_storage(RequestQueue, request_queue_id_or_name, self._get_storage_client(force_cloud), self._config)

@classmethod
Expand Down
2 changes: 1 addition & 1 deletion src/apify/storages/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ class Dataset:
def __init__(self, id: str, name: Optional[str], client: Union[ApifyClientAsync, MemoryStorage]) -> None:
"""Create a `Dataset` instance.
Do not use the constructor directly, use the `Dataset.open` function instead.
Do not use the constructor directly, use the `Actor.open_dataset()` function instead.
Args:
id (str): ID of the dataset.
Expand Down
2 changes: 1 addition & 1 deletion src/apify/storages/key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class KeyValueStore:
def __init__(self, id: str, name: Optional[str], client: Union[ApifyClientAsync, MemoryStorage]) -> None:
"""Create a `KeyValueStore` instance.
Do not use the constructor directly, use the `KeyValueStore.open` function instead.
Do not use the constructor directly, use the `Actor.open_key_value_store()` function instead.
Args:
id (str): ID of the key-value store.
Expand Down
8 changes: 5 additions & 3 deletions src/apify/storages/request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ class RequestQueue:
def __init__(self, id: str, name: Optional[str], client: Union[ApifyClientAsync, MemoryStorage]) -> None:
"""Create a `RequestQueue` instance.
Do not use the constructor directly, use the `RequestQueue.open` function instead.
Do not use the constructor directly, use the `Actor.open_request_queue()` function instead.
Args:
id (str): ID of the request queue.
Expand Down Expand Up @@ -499,8 +499,10 @@ async def open(cls, request_queue_id_or_name: Optional[str] = None, config: Opti
and depth-first crawling orders.
Args:
request_queue_id_or_name (str, optional): ID or name of the request queue to be opened.
If not provided, the method returns the default request queue associated with the actor run.
id (str, optional): ID of the request queue to be opened.
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
name (str, optional): Name of the request queue to be opened.
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
config (Configuration, optional): A `Configuration` instance, uses global configuration if omitted.
Returns:
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_actor_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ async def main() -> None:
async with Actor:
input_object = await Actor.get_input()
dataset_name = input_object['datasetName']
dataset1 = await Actor.open_dataset(dataset_name)
dataset2 = await Actor.open_dataset(dataset_name)
dataset1 = await Actor.open_dataset(name=dataset_name)
dataset2 = await Actor.open_dataset(name=dataset_name)
assert dataset1 is dataset2
await dataset1.drop()

Expand Down
6 changes: 3 additions & 3 deletions tests/integration/test_actor_key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ async def main() -> None:
async with Actor:
input_object = await Actor.get_input()
kvs_name = input_object['kvsName']
kvs1 = await Actor.open_key_value_store(kvs_name)
kvs2 = await Actor.open_key_value_store(kvs_name)
kvs1 = await Actor.open_key_value_store(name=kvs_name)
kvs2 = await Actor.open_key_value_store(name=kvs_name)
assert kvs1 is kvs2
await kvs1.drop()

Expand Down Expand Up @@ -75,7 +75,7 @@ async def main_get() -> None:
async with Actor:
input_object = await Actor.get_input()
# Access KVS of the previous 'set' run
kvs = await Actor.open_key_value_store(input_object['kvs-id'])
kvs = await Actor.open_key_value_store(name=input_object['kvs-id'])
value = await kvs.get_value('test')
assert value['number'] == 123
assert value['string'] == 'a string'
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_actor_request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ async def main() -> None:
async with Actor:
input_object = await Actor.get_input()
rq_name = input_object['rqName']
rq1 = await Actor.open_request_queue(rq_name)
rq2 = await Actor.open_request_queue(rq_name)
rq1 = await Actor.open_request_queue(name=rq_name)
rq2 = await Actor.open_request_queue(name=rq_name)
assert rq1 is rq2
await rq1.drop()

Expand Down
4 changes: 2 additions & 2 deletions tests/unit/actor/test_actor_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@ async def test_same_references(self) -> None:
dataset2 = await Actor.open_dataset()
assert dataset1 is dataset2
dataset_name = 'non-default'
dataset_named1 = await Actor.open_dataset(dataset_name)
dataset_named2 = await Actor.open_dataset(dataset_name)
dataset_named1 = await Actor.open_dataset(name=dataset_name)
dataset_named2 = await Actor.open_dataset(name=dataset_name)
assert dataset_named1 is dataset_named2

async def test_open_datatset_based_env_var(
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/actor/test_actor_key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ async def test_same_references(self) -> None:
kvs2 = await Actor.open_key_value_store()
assert kvs1 is kvs2
kvs_name = 'non-default'
kvs_named1 = await Actor.open_key_value_store(kvs_name)
kvs_named2 = await Actor.open_key_value_store(kvs_name)
kvs_named1 = await Actor.open_key_value_store(name=kvs_name)
kvs_named2 = await Actor.open_key_value_store(name=kvs_name)
assert kvs_named1 is kvs_named2


Expand Down
4 changes: 2 additions & 2 deletions tests/unit/actor/test_actor_memory_storage_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ async def test_actor_memory_storage_e2e(monkeypatch: pytest.MonkeyPatch, tmp_pat
old_client = StorageClientManager.get_storage_client()
async with Actor:
old_default_kvs = await Actor.open_key_value_store()
old_non_default_kvs = await Actor.open_key_value_store('non-default')
old_non_default_kvs = await Actor.open_key_value_store(name='non-default')
# Create data in default and non-default key-value store
await old_default_kvs.set_value('test', 'default value')
await old_non_default_kvs.set_value('test', 'non-default value')
Expand All @@ -43,7 +43,7 @@ def get_storage_client() -> 'MemoryStorage':
assert old_client is not StorageClientManager.get_storage_client()
default_kvs = await Actor.open_key_value_store()
assert default_kvs is not old_default_kvs
non_default_kvs = await Actor.open_key_value_store('non-default')
non_default_kvs = await Actor.open_key_value_store(name='non-default')
assert non_default_kvs is not old_non_default_kvs
default_value = await default_kvs.get_value('test')
non_default_value = await non_default_kvs.get_value('test')
Expand Down
4 changes: 2 additions & 2 deletions tests/unit/actor/test_actor_request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,6 @@ async def test_same_references(self) -> None:
rq2 = await Actor.open_request_queue()
assert rq1 is rq2
rq_name = 'non-default'
rq_named1 = await Actor.open_request_queue(rq_name)
rq_named2 = await Actor.open_request_queue(rq_name)
rq_named1 = await Actor.open_request_queue(name=rq_name)
rq_named2 = await Actor.open_request_queue(name=rq_name)
assert rq_named1 is rq_named2

0 comments on commit a1e962e

Please sign in to comment.