Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Added get_public_url method to KeyValueStore #572

Merged
merged 35 commits into from
Oct 22, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
1ffa698
Add get public URL method for kv store
Oct 6, 2024
9311e08
Use storage_dir from configuration
Oct 6, 2024
f0e4625
Update test assertion value
Oct 8, 2024
2d7e31e
Move implementation to kv client
Oct 8, 2024
2d02a9d
Revert "Update test assertion value"
Oct 8, 2024
da00325
Merge branch 'apify:master' into kvstore-public-url
akshay11298 Oct 9, 2024
fe165b5
Update src/crawlee/memory_storage_client/_key_value_store_client.py
akshay11298 Oct 9, 2024
b771459
Update src/crawlee/memory_storage_client/_key_value_store_client.py
akshay11298 Oct 9, 2024
d7dfb3b
Removed unused params
Oct 9, 2024
c47c1f7
Update the test
Oct 10, 2024
65ead6b
Fix path
Oct 10, 2024
9d2e7cb
Merge remote-tracking branch 'origin/master' into kvstore-public-url
janbuchar Oct 10, 2024
dab9329
Update tests/unit/storages/test_key_value_store.py
akshay11298 Oct 10, 2024
d09a2ec
Update the get_public_url method to be async
Oct 10, 2024
087b92d
None content-type handling
Oct 14, 2024
bb26ed8
Revert "None content-type handling"
Oct 14, 2024
fa6ffbf
Get filename from record if present
Oct 14, 2024
7f3d260
Temp log
Oct 14, 2024
e3ce39b
Use resource_dir
Oct 14, 2024
0444105
Temp
Oct 14, 2024
bca1914
Revert "Temp"
Oct 14, 2024
9292359
Use os.path join to prepare correct path
Oct 20, 2024
33b307a
Merge branch 'apify:master' into kvstore-public-url
akshay11298 Oct 20, 2024
8a68a26
Minor
Oct 22, 2024
37dccca
fix tests
Oct 22, 2024
a381f86
Merge branch 'apify:master' into kvstore-public-url
akshay11298 Oct 22, 2024
11af0c0
Update tests/unit/storages/test_key_value_store.py
vdusek Oct 22, 2024
d6636c2
Update src/crawlee/base_storage_client/_base_key_value_store_client.py
vdusek Oct 22, 2024
1ada2fb
Update src/crawlee/storages/_key_value_store.py
vdusek Oct 22, 2024
f31b826
Update tests/unit/storages/test_key_value_store.py
vdusek Oct 22, 2024
636c08a
Update src/crawlee/memory_storage_client/_key_value_store_client.py
vdusek Oct 22, 2024
e28fb28
Update tests/unit/storages/test_key_value_store.py
vdusek Oct 22, 2024
ec87446
Update src/crawlee/memory_storage_client/_key_value_store_client.py
vdusek Oct 22, 2024
edf1131
Update src/crawlee/base_storage_client/_base_key_value_store_client.py
vdusek Oct 22, 2024
d9cca16
Update src/crawlee/base_storage_client/_base_key_value_store_client.py
vdusek Oct 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/crawlee/base_storage_client/_base_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,3 +114,14 @@ async def delete_record(self, key: str) -> None:
Args:
key: The key of the record which to delete
"""

@abstractmethod
def get_public_url(self, key: str) -> str:
akshay11298 marked this conversation as resolved.
Show resolved Hide resolved
"""Get the public URL for the given key.

Args:
key: Key of the record for which URL is required
vdusek marked this conversation as resolved.
Show resolved Hide resolved

Returns:
The public URL for the given key.
"""
vdusek marked this conversation as resolved.
Show resolved Hide resolved
6 changes: 6 additions & 0 deletions src/crawlee/memory_storage_client/_key_value_store_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,12 @@ async def delete_record(self, key: str) -> None:
if self._memory_storage_client.persist_storage:
await existing_store_by_id.delete_persisted_record(record)

@override
def get_public_url(self, key: str) -> str:
store_name = self.name or self.id
storage_dir = self._memory_storage_client.storage_dir
return f'file://{storage_dir}/key_value_stores/{store_name}/{key}.txt'
janbuchar marked this conversation as resolved.
Show resolved Hide resolved

async def persist_record(self, record: KeyValueStoreRecord) -> None:
"""Persist the specified record to the key-value store."""
store_directory = self.resource_directory
Expand Down
11 changes: 11 additions & 0 deletions src/crawlee/storages/_key_value_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,3 +157,14 @@ async def set_value(
return await self._resource_client.delete_record(key)

return await self._resource_client.set_record(key, value, content_type)

def get_public_url(self, key: str) -> str:
"""Get the public URL for the given key.

Args:
key: Key of the record for which URL is required
vdusek marked this conversation as resolved.
Show resolved Hide resolved

Returns:
The public URL for the given key.
"""
return self._resource_client.get_public_url(key)
10 changes: 10 additions & 0 deletions tests/unit/storages/test_key_value_store.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from __future__ import annotations

from pathlib import Path
from typing import AsyncGenerator
from urllib.parse import unquote, urlparse

import pytest

Expand Down Expand Up @@ -100,3 +102,11 @@ async def test_static_get_set_value(key_value_store: KeyValueStore) -> None:
await key_value_store.set_value('test-static', 'static')
value = await key_value_store.get_value('test-static')
assert value == 'static'


async def test_static_get_public_url(key_value_store: KeyValueStore) -> None:
vdusek marked this conversation as resolved.
Show resolved Hide resolved
await key_value_store.set_value('test-static', 'static')
public_url = key_value_store.get_public_url('test-static')

path = unquote(urlparse(public_url).path)
akshay11298 marked this conversation as resolved.
Show resolved Hide resolved
assert Path(path).read_text() == 'static'
Loading