From 1282fb8a30c41ccaa33c0afee59cdc7c43ee07b9 Mon Sep 17 00:00:00 2001 From: Vlada Dusek Date: Tue, 14 Nov 2023 14:19:09 +0100 Subject: [PATCH] Rm get_random_id and use crypto_random_object_id --- src/apify/_crypto.py | 6 ++--- .../resource_clients/dataset.py | 4 ++-- .../resource_clients/key_value_store.py | 4 ++-- .../resource_clients/request_queue.py | 4 ++-- src/apify/scrapy/__init__.py | 2 +- src/apify/scrapy/scheduler.py | 7 +++--- src/apify/scrapy/utils.py | 23 +++---------------- src/apify/storages/request_queue.py | 4 ++-- tests/integration/README.md | 2 +- tests/integration/_utils.py | 4 ++-- tests/integration/test_actor_api_helpers.py | 10 ++++---- tests/integration/test_fixtures.py | 4 ++-- .../resource_clients/test_key_value_store.py | 8 +++---- tests/unit/test_crypto.py | 12 +++++----- 14 files changed, 39 insertions(+), 55 deletions(-) diff --git a/src/apify/_crypto.py b/src/apify/_crypto.py index 4f7fc947..12f5798d 100644 --- a/src/apify/_crypto.py +++ b/src/apify/_crypto.py @@ -30,8 +30,8 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict: Returns: disc: Encrypted password and value. """ - key_bytes = _crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8') - initialized_vector_bytes = _crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8') + key_bytes = crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8') + initialized_vector_bytes = crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8') value_bytes = value.encode('utf-8') password_bytes = key_bytes + initialized_vector_bytes @@ -122,7 +122,7 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey: return public_key -def _crypto_random_object_id(length: int = 17) -> str: +def crypto_random_object_id(length: int = 17) -> str: """Python reimplementation of cryptoRandomObjectId from `@apify/utilities`.""" chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789' return ''.join(secrets.choice(chars) for _ in range(length)) diff --git a/src/apify/_memory_storage/resource_clients/dataset.py b/src/apify/_memory_storage/resource_clients/dataset.py index 3dbaa7ee..ed21c0d3 100644 --- a/src/apify/_memory_storage/resource_clients/dataset.py +++ b/src/apify/_memory_storage/resource_clients/dataset.py @@ -10,7 +10,7 @@ from apify_shared.types import JSONSerializable from apify_shared.utils import ignore_docs -from ..._crypto import _crypto_random_object_id +from ..._crypto import crypto_random_object_id from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage from ...consts import _StorageTypes from ..file_storage_utils import _update_dataset_items, _update_metadata @@ -52,7 +52,7 @@ def __init__( name: Optional[str] = None, ) -> None: """Initialize the DatasetClient.""" - self._id = id or _crypto_random_object_id() + self._id = id or crypto_random_object_id() self._resource_directory = os.path.join(base_storage_directory, name or self._id) self._memory_storage_client = memory_storage_client self._name = name diff --git a/src/apify/_memory_storage/resource_clients/key_value_store.py b/src/apify/_memory_storage/resource_clients/key_value_store.py index c3e4f6d7..cd33fca9 100644 --- a/src/apify/_memory_storage/resource_clients/key_value_store.py +++ b/src/apify/_memory_storage/resource_clients/key_value_store.py @@ -15,7 +15,7 @@ from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps -from ..._crypto import _crypto_random_object_id +from ..._crypto import crypto_random_object_id from ..._utils import ( _force_remove, _force_rename, @@ -73,7 +73,7 @@ def __init__( name: Optional[str] = None, ) -> None: """Initialize the KeyValueStoreClient.""" - self._id = id or _crypto_random_object_id() + self._id = id or crypto_random_object_id() self._resource_directory = os.path.join(base_storage_directory, name or self._id) self._memory_storage_client = memory_storage_client self._name = name diff --git a/src/apify/_memory_storage/resource_clients/request_queue.py b/src/apify/_memory_storage/resource_clients/request_queue.py index ebd5968c..2c574da5 100644 --- a/src/apify/_memory_storage/resource_clients/request_queue.py +++ b/src/apify/_memory_storage/resource_clients/request_queue.py @@ -10,7 +10,7 @@ from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps -from ..._crypto import _crypto_random_object_id +from ..._crypto import crypto_random_object_id from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage, _unique_key_to_request_id from ...consts import _StorageTypes from ..file_storage_utils import _delete_request, _update_metadata, _update_request_queue_item @@ -46,7 +46,7 @@ def __init__( name: Optional[str] = None, ) -> None: """Initialize the RequestQueueClient.""" - self._id = id or _crypto_random_object_id() + self._id = id or crypto_random_object_id() self._resource_directory = os.path.join(base_storage_directory, name or self._id) self._memory_storage_client = memory_storage_client self._name = name diff --git a/src/apify/scrapy/__init__.py b/src/apify/scrapy/__init__.py index a8046f8c..44b52138 100644 --- a/src/apify/scrapy/__init__.py +++ b/src/apify/scrapy/__init__.py @@ -1,4 +1,4 @@ from .middlewares import ApifyRetryMiddleware from .pipelines import ActorDatasetPushPipeline from .scheduler import ApifyScheduler -from .utils import get_random_id, get_running_event_loop_id, open_queue_with_custom_client, to_apify_request, to_scrapy_request +from .utils import get_running_event_loop_id, open_queue_with_custom_client, to_apify_request, to_scrapy_request diff --git a/src/apify/scrapy/scheduler.py b/src/apify/scrapy/scheduler.py index 8831675e..57fc546e 100644 --- a/src/apify/scrapy/scheduler.py +++ b/src/apify/scrapy/scheduler.py @@ -12,8 +12,9 @@ ) from exc from ..actor import Actor +from .._crypto import crypto_random_object_id from ..storages import RequestQueue -from .utils import get_random_id, nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request +from .utils import nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request class ApifyScheduler(BaseScheduler): @@ -79,7 +80,7 @@ def enqueue_request(self, request: Request) -> bool: Returns: True if the request was successfully enqueued, False otherwise. """ - call_id = get_random_id() + call_id = crypto_random_object_id(8) Actor.log.debug(f'[{call_id}]: ApifyScheduler.enqueue_request was called (scrapy_request={request})...') apify_request = to_apify_request(request, spider=self.spider) @@ -100,7 +101,7 @@ def next_request(self) -> Optional[Request]: Returns: The next request, or None if there are no more requests. """ - call_id = get_random_id() + call_id = crypto_random_object_id(8) Actor.log.debug(f'[{call_id}]: ApifyScheduler.next_request was called...') assert isinstance(self._rq, RequestQueue) diff --git a/src/apify/scrapy/utils.py b/src/apify/scrapy/utils.py index 255522de..bb898362 100644 --- a/src/apify/scrapy/utils.py +++ b/src/apify/scrapy/utils.py @@ -1,8 +1,6 @@ import asyncio import codecs import pickle -import random -import string try: from scrapy import Request, Spider @@ -13,27 +11,12 @@ ) from exc from ..actor import Actor +from .._crypto import crypto_random_object_id from ..storages import RequestQueue, StorageClientManager nested_event_loop: asyncio.AbstractEventLoop = asyncio.new_event_loop() -def get_random_id(length: int = 6) -> str: - """Generate a random ID from alphanumeric characters. - - It could be useful mainly for debugging purposes. - - Args: - length: The lenght of the ID. Defaults to 6. - - Returns: - generated random ID - """ - characters = string.ascii_letters + string.digits - random_id = ''.join(random.choice(characters) for _ in range(length)) - return random_id - - def get_running_event_loop_id() -> int: """Get the ID of the currently running event loop. @@ -56,7 +39,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict: """ assert isinstance(scrapy_request, Request) - call_id = get_random_id() + call_id = crypto_random_object_id(8) Actor.log.debug(f'[{call_id}]: to_apify_request was called (scrapy_request={scrapy_request})...') apify_request = { @@ -99,7 +82,7 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request: assert 'id' in apify_request assert 'uniqueKey' in apify_request - call_id = get_random_id() + call_id = crypto_random_object_id(8) Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...') # If the apify_request comes from the Scrapy diff --git a/src/apify/storages/request_queue.py b/src/apify/storages/request_queue.py index ff35f09d..8cdf8712 100644 --- a/src/apify/storages/request_queue.py +++ b/src/apify/storages/request_queue.py @@ -9,7 +9,7 @@ from apify_client.clients import RequestQueueClientAsync, RequestQueueCollectionClientAsync from apify_shared.utils import ignore_docs -from .._crypto import _crypto_random_object_id +from .._crypto import crypto_random_object_id from .._memory_storage import MemoryStorageClient from .._memory_storage.resource_clients import RequestQueueClient, RequestQueueCollectionClient from .._utils import LRUCache, _budget_ow, _unique_key_to_request_id @@ -73,7 +73,7 @@ class RequestQueue(BaseStorage): """ _request_queue_client: Union[RequestQueueClientAsync, RequestQueueClient] - _client_key = _crypto_random_object_id() + _client_key = crypto_random_object_id() _queue_head_dict: OrderedDictType[str, str] _query_queue_head_task: Optional[asyncio.Task] _in_progress: Set[str] diff --git a/tests/integration/README.md b/tests/integration/README.md index 8af5fe52..fb86f84b 100644 --- a/tests/integration/README.md +++ b/tests/integration/README.md @@ -85,7 +85,7 @@ To pass the source code of the `src/main.py` file directly, use the `main_py` ar ```python async def test_something(self, make_actor: ActorFactory) -> None: - expected_output = f'ACTOR_OUTPUT_{_crypto_random_object_id(5)}' + expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}' main_py_source = f""" import asyncio from datetime import datetime diff --git a/tests/integration/_utils.py b/tests/integration/_utils.py index a3495a93..e6e89052 100644 --- a/tests/integration/_utils.py +++ b/tests/integration/_utils.py @@ -1,7 +1,7 @@ -from apify._crypto import _crypto_random_object_id +from apify._crypto import crypto_random_object_id def generate_unique_resource_name(label: str) -> str: """Generates a unique resource name, which will contain the given label.""" label = label.replace('_', '-') - return f'python-sdk-tests-{label}-generated-{_crypto_random_object_id(8)}' + return f'python-sdk-tests-{label}-generated-{crypto_random_object_id(8)}' diff --git a/tests/integration/test_actor_api_helpers.py b/tests/integration/test_actor_api_helpers.py index d516358d..9b7e3132 100644 --- a/tests/integration/test_actor_api_helpers.py +++ b/tests/integration/test_actor_api_helpers.py @@ -2,7 +2,7 @@ import json from apify import Actor -from apify._crypto import _crypto_random_object_id +from apify._crypto import crypto_random_object_id from apify_client import ApifyClientAsync from ._utils import generate_unique_resource_name @@ -128,7 +128,7 @@ async def main_outer() -> None: outer_actor = await make_actor('start-outer', main_func=main_outer) inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = _crypto_random_object_id() + test_value = crypto_random_object_id() outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) @@ -169,7 +169,7 @@ async def main_outer() -> None: outer_actor = await make_actor('call-outer', main_func=main_outer) inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = _crypto_random_object_id() + test_value = crypto_random_object_id() outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) @@ -209,7 +209,7 @@ async def main_outer() -> None: outer_actor = await make_actor('call-task-outer', main_func=main_outer) inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = _crypto_random_object_id() + test_value = crypto_random_object_id() task = await apify_client_async.tasks().create( actor_id=inner_actor_id, @@ -304,7 +304,7 @@ async def main_outer() -> None: outer_actor = await make_actor('metamorph-outer', main_func=main_outer) inner_actor_id = (await inner_actor.get() or {})['id'] - test_value = _crypto_random_object_id() + test_value = crypto_random_object_id() outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id}) diff --git a/tests/integration/test_fixtures.py b/tests/integration/test_fixtures.py index 1b149ea6..a26e3018 100644 --- a/tests/integration/test_fixtures.py +++ b/tests/integration/test_fixtures.py @@ -1,7 +1,7 @@ from datetime import datetime, timezone from apify import Actor -from apify._crypto import _crypto_random_object_id +from apify._crypto import crypto_random_object_id from apify_client import ApifyClientAsync from .conftest import ActorFactory @@ -29,7 +29,7 @@ async def main() -> None: assert run_result['actId'] == output_record['value'] async def test_main_py(self, make_actor: ActorFactory) -> None: - expected_output = f'ACTOR_OUTPUT_{_crypto_random_object_id(5)}' + expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}' main_py_source = f""" import asyncio from apify import Actor diff --git a/tests/unit/memory_storage/resource_clients/test_key_value_store.py b/tests/unit/memory_storage/resource_clients/test_key_value_store.py index fe185407..9dcaf1f2 100644 --- a/tests/unit/memory_storage/resource_clients/test_key_value_store.py +++ b/tests/unit/memory_storage/resource_clients/test_key_value_store.py @@ -8,7 +8,7 @@ import pytest -from apify._crypto import _crypto_random_object_id +from apify._crypto import crypto_random_object_id from apify._memory_storage import MemoryStorageClient from apify._memory_storage.resource_clients import KeyValueStoreClient from apify._utils import _maybe_parse_body @@ -227,7 +227,7 @@ async def test_delete_record(key_value_store_client: KeyValueStoreClient) -> Non async def test_writes_correct_metadata(memory_storage_client: MemoryStorageClient, test_case: Dict) -> None: test_input = test_case['input'] expected_output = test_case['expectedOutput'] - key_value_store_name = _crypto_random_object_id() + key_value_store_name = crypto_random_object_id() # Write the input data to the key-value store store_details = await memory_storage_client.key_value_stores().get_or_create(name=key_value_store_name) @@ -287,14 +287,14 @@ async def test_writes_correct_metadata(memory_storage_client: MemoryStorageClien async def test_reads_correct_metadata(memory_storage_client: MemoryStorageClient, test_case: Dict) -> None: test_input = test_case['input'] expected_output = test_case['expectedOutput'] - key_value_store_name = _crypto_random_object_id() + key_value_store_name = crypto_random_object_id() # Ensure the directory for the store exists storage_path = os.path.join(memory_storage_client._key_value_stores_directory, key_value_store_name) os.makedirs(storage_path, exist_ok=True) store_metadata = { - 'id': _crypto_random_object_id(), + 'id': crypto_random_object_id(), 'name': None, 'accessedAt': datetime.now(timezone.utc), 'createdAt': datetime.now(timezone.utc), diff --git a/tests/unit/test_crypto.py b/tests/unit/test_crypto.py index f749bb51..7d692c9a 100644 --- a/tests/unit/test_crypto.py +++ b/tests/unit/test_crypto.py @@ -2,7 +2,7 @@ import pytest -from apify._crypto import _crypto_random_object_id, _load_private_key, _load_public_key, private_decrypt, public_encrypt +from apify._crypto import crypto_random_object_id, _load_private_key, _load_public_key, private_decrypt, public_encrypt # NOTE: Uses the same keys as in: # https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts @@ -18,7 +18,7 @@ class TestCrypto(): def test_encrypt_decrypt_varions_string(self) -> None: - for value in [_crypto_random_object_id(10), '👍', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+', '[', ']', '{', '}', '|', ';', ':', '"', "'", ',', '.', '<', '>', '?', '/', '~']: # noqa: E501 + for value in [crypto_random_object_id(10), '👍', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+', '[', ']', '{', '}', '|', ';', ':', '"', "'", ',', '.', '<', '>', '?', '/', '~']: # noqa: E501 encrypted = public_encrypt(value, public_key=PUBLIC_KEY) decrypted_value = private_decrypt(**encrypted, private_key=PRIVATE_KEY) assert decrypted_value == value @@ -62,9 +62,9 @@ def test_private_encrypt_node_js_encrypted_value(self) -> None: assert decrypted_value == value - def test__crypto_random_object_id(self) -> None: - assert len(_crypto_random_object_id()) == 17 - assert len(_crypto_random_object_id(5)) == 5 - long_random_object_id = _crypto_random_object_id(1000) + def test_crypto_random_object_id(self) -> None: + assert len(crypto_random_object_id()) == 17 + assert len(crypto_random_object_id(5)) == 5 + long_random_object_id = crypto_random_object_id(1000) for char in long_random_object_id: assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'