Skip to content

Commit

Permalink
Rm get_random_id and use crypto_random_object_id
Browse files Browse the repository at this point in the history
  • Loading branch information
vdusek committed Nov 14, 2023
1 parent 6d46d9e commit 1282fb8
Show file tree
Hide file tree
Showing 14 changed files with 39 additions and 55 deletions.
6 changes: 3 additions & 3 deletions src/apify/_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
Returns:
disc: Encrypted password and value.
"""
key_bytes = _crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
initialized_vector_bytes = _crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
key_bytes = crypto_random_object_id(ENCRYPTION_KEY_LENGTH).encode('utf-8')
initialized_vector_bytes = crypto_random_object_id(ENCRYPTION_IV_LENGTH).encode('utf-8')
value_bytes = value.encode('utf-8')

password_bytes = key_bytes + initialized_vector_bytes
Expand Down Expand Up @@ -122,7 +122,7 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
return public_key


def _crypto_random_object_id(length: int = 17) -> str:
def crypto_random_object_id(length: int = 17) -> str:
"""Python reimplementation of cryptoRandomObjectId from `@apify/utilities`."""
chars = 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'
return ''.join(secrets.choice(chars) for _ in range(length))
Expand Down
4 changes: 2 additions & 2 deletions src/apify/_memory_storage/resource_clients/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from apify_shared.types import JSONSerializable
from apify_shared.utils import ignore_docs

from ..._crypto import _crypto_random_object_id
from ..._crypto import crypto_random_object_id
from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage
from ...consts import _StorageTypes
from ..file_storage_utils import _update_dataset_items, _update_metadata
Expand Down Expand Up @@ -52,7 +52,7 @@ def __init__(
name: Optional[str] = None,
) -> None:
"""Initialize the DatasetClient."""
self._id = id or _crypto_random_object_id()
self._id = id or crypto_random_object_id()
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
self._memory_storage_client = memory_storage_client
self._name = name
Expand Down
4 changes: 2 additions & 2 deletions src/apify/_memory_storage/resource_clients/key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@

from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps

from ..._crypto import _crypto_random_object_id
from ..._crypto import crypto_random_object_id
from ..._utils import (
_force_remove,
_force_rename,
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
name: Optional[str] = None,
) -> None:
"""Initialize the KeyValueStoreClient."""
self._id = id or _crypto_random_object_id()
self._id = id or crypto_random_object_id()
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
self._memory_storage_client = memory_storage_client
self._name = name
Expand Down
4 changes: 2 additions & 2 deletions src/apify/_memory_storage/resource_clients/request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps

from ..._crypto import _crypto_random_object_id
from ..._crypto import crypto_random_object_id
from ..._utils import _force_rename, _raise_on_duplicate_storage, _raise_on_non_existing_storage, _unique_key_to_request_id
from ...consts import _StorageTypes
from ..file_storage_utils import _delete_request, _update_metadata, _update_request_queue_item
Expand Down Expand Up @@ -46,7 +46,7 @@ def __init__(
name: Optional[str] = None,
) -> None:
"""Initialize the RequestQueueClient."""
self._id = id or _crypto_random_object_id()
self._id = id or crypto_random_object_id()
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
self._memory_storage_client = memory_storage_client
self._name = name
Expand Down
2 changes: 1 addition & 1 deletion src/apify/scrapy/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from .middlewares import ApifyRetryMiddleware
from .pipelines import ActorDatasetPushPipeline
from .scheduler import ApifyScheduler
from .utils import get_random_id, get_running_event_loop_id, open_queue_with_custom_client, to_apify_request, to_scrapy_request
from .utils import get_running_event_loop_id, open_queue_with_custom_client, to_apify_request, to_scrapy_request
7 changes: 4 additions & 3 deletions src/apify/scrapy/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,9 @@
) from exc

from ..actor import Actor
from .._crypto import crypto_random_object_id
from ..storages import RequestQueue
from .utils import get_random_id, nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request
from .utils import nested_event_loop, open_queue_with_custom_client, to_apify_request, to_scrapy_request


class ApifyScheduler(BaseScheduler):
Expand Down Expand Up @@ -79,7 +80,7 @@ def enqueue_request(self, request: Request) -> bool:
Returns:
True if the request was successfully enqueued, False otherwise.
"""
call_id = get_random_id()
call_id = crypto_random_object_id(8)
Actor.log.debug(f'[{call_id}]: ApifyScheduler.enqueue_request was called (scrapy_request={request})...')

apify_request = to_apify_request(request, spider=self.spider)
Expand All @@ -100,7 +101,7 @@ def next_request(self) -> Optional[Request]:
Returns:
The next request, or None if there are no more requests.
"""
call_id = get_random_id()
call_id = crypto_random_object_id(8)
Actor.log.debug(f'[{call_id}]: ApifyScheduler.next_request was called...')
assert isinstance(self._rq, RequestQueue)

Expand Down
23 changes: 3 additions & 20 deletions src/apify/scrapy/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import asyncio
import codecs
import pickle
import random
import string

try:
from scrapy import Request, Spider
Expand All @@ -13,27 +11,12 @@
) from exc

from ..actor import Actor
from .._crypto import crypto_random_object_id
from ..storages import RequestQueue, StorageClientManager

nested_event_loop: asyncio.AbstractEventLoop = asyncio.new_event_loop()


def get_random_id(length: int = 6) -> str:
"""Generate a random ID from alphanumeric characters.
It could be useful mainly for debugging purposes.
Args:
length: The lenght of the ID. Defaults to 6.
Returns:
generated random ID
"""
characters = string.ascii_letters + string.digits
random_id = ''.join(random.choice(characters) for _ in range(length))
return random_id


def get_running_event_loop_id() -> int:
"""Get the ID of the currently running event loop.
Expand All @@ -56,7 +39,7 @@ def to_apify_request(scrapy_request: Request, spider: Spider) -> dict:
"""
assert isinstance(scrapy_request, Request)

call_id = get_random_id()
call_id = crypto_random_object_id(8)
Actor.log.debug(f'[{call_id}]: to_apify_request was called (scrapy_request={scrapy_request})...')

apify_request = {
Expand Down Expand Up @@ -99,7 +82,7 @@ def to_scrapy_request(apify_request: dict, spider: Spider) -> Request:
assert 'id' in apify_request
assert 'uniqueKey' in apify_request

call_id = get_random_id()
call_id = crypto_random_object_id(8)
Actor.log.debug(f'[{call_id}]: to_scrapy_request was called (apify_request={apify_request})...')

# If the apify_request comes from the Scrapy
Expand Down
4 changes: 2 additions & 2 deletions src/apify/storages/request_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from apify_client.clients import RequestQueueClientAsync, RequestQueueCollectionClientAsync
from apify_shared.utils import ignore_docs

from .._crypto import _crypto_random_object_id
from .._crypto import crypto_random_object_id
from .._memory_storage import MemoryStorageClient
from .._memory_storage.resource_clients import RequestQueueClient, RequestQueueCollectionClient
from .._utils import LRUCache, _budget_ow, _unique_key_to_request_id
Expand Down Expand Up @@ -73,7 +73,7 @@ class RequestQueue(BaseStorage):
"""

_request_queue_client: Union[RequestQueueClientAsync, RequestQueueClient]
_client_key = _crypto_random_object_id()
_client_key = crypto_random_object_id()
_queue_head_dict: OrderedDictType[str, str]
_query_queue_head_task: Optional[asyncio.Task]
_in_progress: Set[str]
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ To pass the source code of the `src/main.py` file directly, use the `main_py` ar

```python
async def test_something(self, make_actor: ActorFactory) -> None:
expected_output = f'ACTOR_OUTPUT_{_crypto_random_object_id(5)}'
expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}'
main_py_source = f"""
import asyncio
from datetime import datetime
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from apify._crypto import _crypto_random_object_id
from apify._crypto import crypto_random_object_id


def generate_unique_resource_name(label: str) -> str:
"""Generates a unique resource name, which will contain the given label."""
label = label.replace('_', '-')
return f'python-sdk-tests-{label}-generated-{_crypto_random_object_id(8)}'
return f'python-sdk-tests-{label}-generated-{crypto_random_object_id(8)}'
10 changes: 5 additions & 5 deletions tests/integration/test_actor_api_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json

from apify import Actor
from apify._crypto import _crypto_random_object_id
from apify._crypto import crypto_random_object_id
from apify_client import ApifyClientAsync

from ._utils import generate_unique_resource_name
Expand Down Expand Up @@ -128,7 +128,7 @@ async def main_outer() -> None:
outer_actor = await make_actor('start-outer', main_func=main_outer)

inner_actor_id = (await inner_actor.get() or {})['id']
test_value = _crypto_random_object_id()
test_value = crypto_random_object_id()

outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id})

Expand Down Expand Up @@ -169,7 +169,7 @@ async def main_outer() -> None:
outer_actor = await make_actor('call-outer', main_func=main_outer)

inner_actor_id = (await inner_actor.get() or {})['id']
test_value = _crypto_random_object_id()
test_value = crypto_random_object_id()

outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id})

Expand Down Expand Up @@ -209,7 +209,7 @@ async def main_outer() -> None:
outer_actor = await make_actor('call-task-outer', main_func=main_outer)

inner_actor_id = (await inner_actor.get() or {})['id']
test_value = _crypto_random_object_id()
test_value = crypto_random_object_id()

task = await apify_client_async.tasks().create(
actor_id=inner_actor_id,
Expand Down Expand Up @@ -304,7 +304,7 @@ async def main_outer() -> None:
outer_actor = await make_actor('metamorph-outer', main_func=main_outer)

inner_actor_id = (await inner_actor.get() or {})['id']
test_value = _crypto_random_object_id()
test_value = crypto_random_object_id()

outer_run_result = await outer_actor.call(run_input={'test_value': test_value, 'inner_actor_id': inner_actor_id})

Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_fixtures.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime, timezone

from apify import Actor
from apify._crypto import _crypto_random_object_id
from apify._crypto import crypto_random_object_id
from apify_client import ApifyClientAsync

from .conftest import ActorFactory
Expand Down Expand Up @@ -29,7 +29,7 @@ async def main() -> None:
assert run_result['actId'] == output_record['value']

async def test_main_py(self, make_actor: ActorFactory) -> None:
expected_output = f'ACTOR_OUTPUT_{_crypto_random_object_id(5)}'
expected_output = f'ACTOR_OUTPUT_{crypto_random_object_id(5)}'
main_py_source = f"""
import asyncio
from apify import Actor
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import pytest

from apify._crypto import _crypto_random_object_id
from apify._crypto import crypto_random_object_id
from apify._memory_storage import MemoryStorageClient
from apify._memory_storage.resource_clients import KeyValueStoreClient
from apify._utils import _maybe_parse_body
Expand Down Expand Up @@ -227,7 +227,7 @@ async def test_delete_record(key_value_store_client: KeyValueStoreClient) -> Non
async def test_writes_correct_metadata(memory_storage_client: MemoryStorageClient, test_case: Dict) -> None:
test_input = test_case['input']
expected_output = test_case['expectedOutput']
key_value_store_name = _crypto_random_object_id()
key_value_store_name = crypto_random_object_id()

# Write the input data to the key-value store
store_details = await memory_storage_client.key_value_stores().get_or_create(name=key_value_store_name)
Expand Down Expand Up @@ -287,14 +287,14 @@ async def test_writes_correct_metadata(memory_storage_client: MemoryStorageClien
async def test_reads_correct_metadata(memory_storage_client: MemoryStorageClient, test_case: Dict) -> None:
test_input = test_case['input']
expected_output = test_case['expectedOutput']
key_value_store_name = _crypto_random_object_id()
key_value_store_name = crypto_random_object_id()

# Ensure the directory for the store exists
storage_path = os.path.join(memory_storage_client._key_value_stores_directory, key_value_store_name)
os.makedirs(storage_path, exist_ok=True)

store_metadata = {
'id': _crypto_random_object_id(),
'id': crypto_random_object_id(),
'name': None,
'accessedAt': datetime.now(timezone.utc),
'createdAt': datetime.now(timezone.utc),
Expand Down
12 changes: 6 additions & 6 deletions tests/unit/test_crypto.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import pytest

from apify._crypto import _crypto_random_object_id, _load_private_key, _load_public_key, private_decrypt, public_encrypt
from apify._crypto import crypto_random_object_id, _load_private_key, _load_public_key, private_decrypt, public_encrypt

# NOTE: Uses the same keys as in:
# https://github.com/apify/apify-shared-js/blob/master/test/crypto.test.ts
Expand All @@ -18,7 +18,7 @@

class TestCrypto():
def test_encrypt_decrypt_varions_string(self) -> None:
for value in [_crypto_random_object_id(10), '👍', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+', '[', ']', '{', '}', '|', ';', ':', '"', "'", ',', '.', '<', '>', '?', '/', '~']: # noqa: E501
for value in [crypto_random_object_id(10), '👍', '!', '@', '#', '$', '%', '^', '&', '*', '(', ')', '-', '_', '=', '+', '[', ']', '{', '}', '|', ';', ':', '"', "'", ',', '.', '<', '>', '?', '/', '~']: # noqa: E501
encrypted = public_encrypt(value, public_key=PUBLIC_KEY)
decrypted_value = private_decrypt(**encrypted, private_key=PRIVATE_KEY)
assert decrypted_value == value
Expand Down Expand Up @@ -62,9 +62,9 @@ def test_private_encrypt_node_js_encrypted_value(self) -> None:

assert decrypted_value == value

def test__crypto_random_object_id(self) -> None:
assert len(_crypto_random_object_id()) == 17
assert len(_crypto_random_object_id(5)) == 5
long_random_object_id = _crypto_random_object_id(1000)
def test_crypto_random_object_id(self) -> None:
assert len(crypto_random_object_id()) == 17
assert len(crypto_random_object_id(5)) == 5
long_random_object_id = crypto_random_object_id(1000)
for char in long_random_object_id:
assert char in 'abcdefghijklmnopqrstuvwxyzABCEDFGHIJKLMNOPQRSTUVWXYZ0123456789'

0 comments on commit 1282fb8

Please sign in to comment.