-
Notifications
You must be signed in to change notification settings - Fork 189
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add file utils #745
Add file utils #745
Changes from all commits
6d452f4
398b9a6
182a2da
149413e
86661a4
bf808e1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import griptape.utils as utils | ||
from concurrent import futures | ||
from typing import Optional | ||
|
||
|
||
def load_file(path: str) -> bytes: | ||
"""Load a file from the given path and return its content as bytes. | ||
|
||
Args: | ||
path (str): The path to the file to load. | ||
|
||
Returns: | ||
The content of the file. | ||
""" | ||
with open(path, "rb") as f: | ||
return f.read() | ||
|
||
|
||
def load_files(paths: list[str], futures_executor: Optional[futures.ThreadPoolExecutor] = None) -> dict[str, bytes]: | ||
"""Load multiple files concurrently and return a dictionary of their content. | ||
|
||
Args: | ||
paths: The paths to the files to load. | ||
futures_executor: The executor to use for concurrent loading. If None, a new ThreadPoolExecutor will be created. | ||
|
||
Returns: | ||
A dictionary where the keys are a hash of the path and the values are the content of the files. | ||
""" | ||
|
||
if futures_executor is None: | ||
futures_executor = futures.ThreadPoolExecutor() | ||
|
||
return utils.execute_futures_dict( | ||
{utils.str_to_hash(str(path)): futures_executor.submit(load_file, path) for path in paths} | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
||
foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar foobar | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
import os | ||
from griptape.loaders import TextLoader | ||
from griptape import utils | ||
from concurrent import futures | ||
from tests.mocks.mock_embedding_driver import MockEmbeddingDriver | ||
|
||
MAX_TOKENS = 50 | ||
|
||
|
||
class TestFileUtils: | ||
def test_load_file(self): | ||
dirname = os.path.dirname(__file__) | ||
file = utils.load_file(os.path.join(dirname, "../../resources/foobar-many.txt")) | ||
|
||
assert file.decode("utf-8").startswith("foobar foobar foobar") | ||
assert len(file.decode("utf-8")) == 4563 | ||
|
||
def test_load_files(self): | ||
dirname = os.path.dirname(__file__) | ||
sources = ["resources/foobar-many.txt", "resources/foobar-many.txt", "resources/small.png"] | ||
sources = [os.path.join(dirname, "../../", source) for source in sources] | ||
files = utils.load_files(sources, futures_executor=futures.ThreadPoolExecutor(max_workers=1)) | ||
assert len(files) == 2 | ||
|
||
test_file = files[utils.str_to_hash(sources[0])] | ||
assert len(test_file) == 4563 | ||
assert test_file.decode("utf-8").startswith("foobar foobar foobar") | ||
|
||
small_file = files[utils.str_to_hash(sources[2])] | ||
assert len(small_file) == 97 | ||
assert small_file[:8] == b"\x89PNG\r\n\x1a\n" | ||
|
||
def test_load_file_with_loader(self): | ||
dirname = os.path.dirname(__file__) | ||
file = utils.load_file(os.path.join(dirname, "../../", "resources/foobar-many.txt")) | ||
artifacts = TextLoader(max_tokens=MAX_TOKENS, embedding_driver=MockEmbeddingDriver()).load(file) | ||
|
||
assert len(artifacts) == 39 | ||
assert isinstance(artifacts, list) | ||
assert artifacts[0].value.startswith("foobar foobar foobar") | ||
|
||
def test_load_files_with_loader(self): | ||
dirname = os.path.dirname(__file__) | ||
sources = ["resources/foobar-many.txt"] | ||
sources = [os.path.join(dirname, "../../", source) for source in sources] | ||
files = utils.load_files(sources) | ||
loader = TextLoader(max_tokens=MAX_TOKENS, embedding_driver=MockEmbeddingDriver()) | ||
collection = loader.load_collection(list(files.values())) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wondering if There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could have two functions, something like Another more heavy handed alternative could be to allow passing a Currently:
With suggestion:
If the input is the same shape as the output of
|
||
|
||
test_file_artifacts = collection[loader.to_key(files[utils.str_to_hash(sources[0])])] | ||
assert len(test_file_artifacts) == 39 | ||
assert isinstance(test_file_artifacts, list) | ||
assert test_file_artifacts[0].value.startswith("foobar foobar foobar") |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Arg doc for
futures_executor
is missing.