From 982f708c85cb998705be5c6a364c034becc06bc6 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Wed, 21 Aug 2024 09:23:51 -0700 Subject: [PATCH 01/10] Improve ListArtifact --- CHANGELOG.md | 2 ++ griptape/artifacts/list_artifact.py | 23 +++++++++++++--------- griptape/schemas/base_schema.py | 6 +++++- griptape/tasks/tool_task.py | 6 +++++- tests/unit/artifacts/test_list_artifact.py | 6 ++++++ 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0528b1a46..0aedd1970 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added - `BaseConversationMemory.prompt_driver` for use with autopruning. +- Generic type support to `ListArtifact`. +- Iteration support to `ListArtifact`. ### Fixed - Parsing streaming response with some OpenAi compatible services. diff --git a/griptape/artifacts/list_artifact.py b/griptape/artifacts/list_artifact.py index 298f29c6a..9ebec394c 100644 --- a/griptape/artifacts/list_artifact.py +++ b/griptape/artifacts/list_artifact.py @@ -1,23 +1,25 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Generic, Optional, TypeVar from attrs import Attribute, define, field from griptape.artifacts import BaseArtifact if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Iterator, Sequence + +T = TypeVar("T", bound=BaseArtifact) @define -class ListArtifact(BaseArtifact): - value: Sequence[BaseArtifact] = field(factory=list, metadata={"serializable": True}) +class ListArtifact(BaseArtifact, Generic[T]): + value: Sequence[T] = field(factory=list, metadata={"serializable": True}) item_separator: str = field(default="\n\n", kw_only=True, metadata={"serializable": True}) validate_uniform_types: bool = field(default=False, kw_only=True, metadata={"serializable": True}) @value.validator # pyright: ignore[reportAttributeAccessIssue] - def validate_value(self, _: Attribute, value: list[BaseArtifact]) -> None: + def validate_value(self, _: Attribute, value: list[T]) -> None: if self.validate_uniform_types and len(value) > 0: first_type = type(value[0]) @@ -31,18 +33,21 @@ def child_type(self) -> Optional[type]: else: return None - def __getitem__(self, key: int) -> BaseArtifact: + def __getitem__(self, key: int) -> T: return self.value[key] def __bool__(self) -> bool: return len(self) > 0 + def __add__(self, other: BaseArtifact) -> ListArtifact[T]: + return ListArtifact(self.value + other.value) + + def __iter__(self) -> Iterator[T]: + return iter(self.value) + def to_text(self) -> str: return self.item_separator.join([v.to_text() for v in self.value]) - def __add__(self, other: BaseArtifact) -> BaseArtifact: - return ListArtifact(self.value + other.value) - def is_type(self, target_type: type) -> bool: if self.value: return isinstance(self.value[0], target_type) diff --git a/griptape/schemas/base_schema.py b/griptape/schemas/base_schema.py index f25e8870b..b285d1476 100644 --- a/griptape/schemas/base_schema.py +++ b/griptape/schemas/base_schema.py @@ -2,7 +2,7 @@ from abc import ABC from collections.abc import Sequence -from typing import Any, Literal, Union, _SpecialForm, get_args, get_origin +from typing import Any, Literal, TypeVar, Union, _SpecialForm, get_args, get_origin import attrs from marshmallow import INCLUDE, Schema, fields @@ -56,6 +56,10 @@ def _get_field_for_type(cls, field_type: type) -> fields.Field | fields.Nested: field_class, args, optional = cls._get_field_type_info(field_type) + # Resolve TypeVars to their bound type + if isinstance(field_class, TypeVar): + field_class = field_class.__bound__ + if attrs.has(field_class): if ABC in field_class.__bases__: return fields.Nested(PolymorphicSchema(inner_class=field_class), allow_none=optional) diff --git a/griptape/tasks/tool_task.py b/griptape/tasks/tool_task.py index 6dd5000b3..68260ea91 100644 --- a/griptape/tasks/tool_task.py +++ b/griptape/tasks/tool_task.py @@ -84,7 +84,11 @@ def run(self) -> BaseArtifact: subtask.after_run() if isinstance(subtask.output, ListArtifact): - self.output = subtask.output[0] + first_artifact = subtask.output[0] + if isinstance(first_artifact, BaseArtifact): + self.output = first_artifact + else: + self.output = ErrorArtifact(f"Output is not an Artifact: {type(subtask.output[0])}") else: self.output = InfoArtifact("No tool output") except Exception as e: diff --git a/tests/unit/artifacts/test_list_artifact.py b/tests/unit/artifacts/test_list_artifact.py index 06d234645..37769e1b8 100644 --- a/tests/unit/artifacts/test_list_artifact.py +++ b/tests/unit/artifacts/test_list_artifact.py @@ -23,6 +23,12 @@ def test___add__(self): assert artifact.value[0].value == "foo" assert artifact.value[1].value == "bar" + def test___iter__(self): + assert [a.value for a in ListArtifact([TextArtifact("foo"), TextArtifact("bar")])] == ["foo", "bar"] + + def test_type_var(self): + assert ListArtifact[TextArtifact]([TextArtifact("foo")]).value[0].value == "foo" + def test_validate_value(self): with pytest.raises(ValueError): ListArtifact([TextArtifact("foo"), BlobArtifact(b"bar")], validate_uniform_types=True) From b839973e8027c0fb075b6d6821bc4966699ab72a Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Wed, 21 Aug 2024 13:28:29 -0700 Subject: [PATCH 02/10] Refactor extraction engine methods, return type, and extraction behavior. --- CHANGELOG.md | 7 ++++ .../engines/src/extraction_engines_1.py | 12 +++---- .../engines/src/extraction_engines_2.py | 17 ++++------ .../extraction/base_extraction_engine.py | 17 +++++++--- .../extraction/csv_extraction_engine.py | 26 +++++++-------- .../extraction/json_extraction_engine.py | 33 ++++++++----------- griptape/tasks/extraction_task.py | 7 ++-- .../engines/extraction/json/system.j2 | 2 ++ .../templates/engines/extraction/json/user.j2 | 4 +-- griptape/tools/extraction/tool.py | 2 +- .../extraction/test_csv_extraction_engine.py | 4 +-- .../extraction/test_json_extraction_engine.py | 29 +++++++++------- tests/unit/memory/tool/test_task_memory.py | 6 +--- tests/unit/tools/test_extraction_tool.py | 10 +++--- 14 files changed, 91 insertions(+), 85 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0aedd1970..09e82ef68 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Generic type support to `ListArtifact`. - Iteration support to `ListArtifact`. +### Changed +- **BREAKING**: Split `BaseExtractionEngine.extract` into `extract` and `extract_artifacts` for consistency with `BaseSummaryEngine`. +- **BREAKING**: `BaseExtractionEngine` no longer catches exceptions and returns `ErrorArtifact`s. +- `JsonExtractionEngine` to extract either a JSON object or array depending on the provided schema. +- `JsonExtractionEngine.extract_artifacts` now returns a `ListArtifact[JsonArtifact]`. +- `CsvExtractionEngine.extract_artifacts` now returns a `ListArtifact[CsvRowArtifact]`. + ### Fixed - Parsing streaming response with some OpenAi compatible services. diff --git a/docs/griptape-framework/engines/src/extraction_engines_1.py b/docs/griptape-framework/engines/src/extraction_engines_1.py index c681980f2..45ccfd3e0 100644 --- a/docs/griptape-framework/engines/src/extraction_engines_1.py +++ b/docs/griptape-framework/engines/src/extraction_engines_1.py @@ -1,10 +1,9 @@ -from griptape.artifacts import ListArtifact from griptape.drivers import OpenAiChatPromptDriver from griptape.engines import CsvExtractionEngine # Initialize the CsvExtractionEngine instance csv_engine = CsvExtractionEngine( - prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), + prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), column_names=["name", "age", "location"] ) # Define some unstructured data @@ -15,10 +14,7 @@ """ # Extract CSV rows using the engine -result = csv_engine.extract(sample_text, column_names=["name", "age", "location"]) +result = csv_engine.extract_text(sample_text) -if isinstance(result, ListArtifact): - for row in result.value: - print(row.to_text()) -else: - print(result.to_text()) +for row in result: + print(row.to_text()) diff --git a/docs/griptape-framework/engines/src/extraction_engines_2.py b/docs/griptape-framework/engines/src/extraction_engines_2.py index d47bb48e5..2fb8cd8b0 100644 --- a/docs/griptape-framework/engines/src/extraction_engines_2.py +++ b/docs/griptape-framework/engines/src/extraction_engines_2.py @@ -1,11 +1,13 @@ from schema import Schema -from griptape.artifacts.list_artifact import ListArtifact from griptape.drivers import OpenAiChatPromptDriver from griptape.engines import JsonExtractionEngine +# Define a schema for extraction +user_schema = Schema([{"name": str, "age": int, "location": str}]).json_schema("UserSchema") + json_engine = JsonExtractionEngine( - prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), + prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), template_schema=user_schema ) # Define some unstructured data @@ -14,14 +16,9 @@ Bob (Age 35) lives in California. """ -# Define a schema for extraction -user_schema = Schema({"users": [{"name": str, "age": int, "location": str}]}).json_schema("UserSchema") # Extract data using the engine -result = json_engine.extract(sample_json_text, template_schema=user_schema) +result = json_engine.extract_text(sample_json_text) -if isinstance(result, ListArtifact): - for artifact in result.value: - print(artifact.value) -else: - print(result.to_text()) +for artifact in result: + print(artifact.value) diff --git a/griptape/engines/extraction/base_extraction_engine.py b/griptape/engines/extraction/base_extraction_engine.py index fb1fab6c4..10ba5d142 100644 --- a/griptape/engines/extraction/base_extraction_engine.py +++ b/griptape/engines/extraction/base_extraction_engine.py @@ -5,11 +5,11 @@ from attrs import Attribute, Factory, define, field +from griptape.artifacts import ListArtifact, TextArtifact from griptape.chunkers import BaseChunker, TextChunker from griptape.configs import Defaults if TYPE_CHECKING: - from griptape.artifacts import ErrorArtifact, ListArtifact from griptape.drivers import BasePromptDriver from griptape.rules import Ruleset @@ -47,11 +47,20 @@ def min_response_tokens(self) -> int: - self.prompt_driver.tokenizer.max_input_tokens * self.max_token_multiplier, ) + def extract_text( + self, + text: str, + *, + rulesets: Optional[list[Ruleset]] = None, + **kwargs, + ) -> ListArtifact: + return self.extract_artifacts(ListArtifact([TextArtifact(text)]), rulesets=rulesets, **kwargs) + @abstractmethod - def extract( + def extract_artifacts( self, - text: str | ListArtifact, + artifacts: ListArtifact[TextArtifact], *, rulesets: Optional[list[Ruleset]] = None, **kwargs, - ) -> ListArtifact | ErrorArtifact: ... + ) -> ListArtifact: ... diff --git a/griptape/engines/extraction/csv_extraction_engine.py b/griptape/engines/extraction/csv_extraction_engine.py index c9c040f65..40c9058ec 100644 --- a/griptape/engines/extraction/csv_extraction_engine.py +++ b/griptape/engines/extraction/csv_extraction_engine.py @@ -6,7 +6,7 @@ from attrs import Factory, define, field -from griptape.artifacts import CsvRowArtifact, ErrorArtifact, ListArtifact, TextArtifact +from griptape.artifacts import CsvRowArtifact, ListArtifact, TextArtifact from griptape.common import Message, PromptStack from griptape.engines import BaseExtractionEngine from griptape.utils import J2 @@ -21,23 +21,21 @@ class CsvExtractionEngine(BaseExtractionEngine): system_template_generator: J2 = field(default=Factory(lambda: J2("engines/extraction/csv/system.j2")), kw_only=True) user_template_generator: J2 = field(default=Factory(lambda: J2("engines/extraction/csv/user.j2")), kw_only=True) - def extract( + def extract_artifacts( self, - text: str | ListArtifact, + artifacts: ListArtifact[TextArtifact], *, rulesets: Optional[list[Ruleset]] = None, **kwargs, - ) -> ListArtifact | ErrorArtifact: - try: - return ListArtifact( - self._extract_rec( - cast(list[TextArtifact], text.value) if isinstance(text, ListArtifact) else [TextArtifact(text)], - [], - ), - item_separator="\n", - ) - except Exception as e: - return ErrorArtifact(f"error extracting CSV rows: {e}") + ) -> ListArtifact[CsvRowArtifact]: + return ListArtifact( + self._extract_rec( + cast(list[TextArtifact], artifacts.value), + [], + rulesets=rulesets, + ), + item_separator="\n", + ) def text_to_csv_rows(self, text: str, column_names: list[str]) -> list[CsvRowArtifact]: rows = [] diff --git a/griptape/engines/extraction/json_extraction_engine.py b/griptape/engines/extraction/json_extraction_engine.py index 8f2f4a3fe..d3928c3ea 100644 --- a/griptape/engines/extraction/json_extraction_engine.py +++ b/griptape/engines/extraction/json_extraction_engine.py @@ -6,7 +6,7 @@ from attrs import Factory, define, field -from griptape.artifacts import ErrorArtifact, ListArtifact, TextArtifact +from griptape.artifacts import JsonArtifact, ListArtifact, TextArtifact from griptape.common import PromptStack from griptape.common.prompt_stack.messages.message import Message from griptape.engines import BaseExtractionEngine @@ -18,7 +18,7 @@ @define class JsonExtractionEngine(BaseExtractionEngine): - JSON_PATTERN = r"(?s)[^\[]*(\[.*\])" + JSON_PATTERN = r"(?s)(\{.*\}|\[.*\])" template_schema: dict = field(default=Factory(dict), kw_only=True) system_template_generator: J2 = field( @@ -26,40 +26,33 @@ class JsonExtractionEngine(BaseExtractionEngine): ) user_template_generator: J2 = field(default=Factory(lambda: J2("engines/extraction/json/user.j2")), kw_only=True) - def extract( + def extract_artifacts( self, - text: str | ListArtifact, + artifacts: ListArtifact[TextArtifact], *, rulesets: Optional[list[Ruleset]] = None, **kwargs, - ) -> ListArtifact | ErrorArtifact: - try: - return ListArtifact( - self._extract_rec( - cast(list[TextArtifact], text.value) if isinstance(text, ListArtifact) else [TextArtifact(text)], - [], - rulesets=rulesets, - ), - item_separator="\n", - ) - except Exception as e: - return ErrorArtifact(f"error extracting JSON: {e}") + ) -> ListArtifact[JsonArtifact]: + return ListArtifact( + self._extract_rec(cast(list[TextArtifact], artifacts.value), [], rulesets=rulesets), + item_separator="\n", + ) - def json_to_text_artifacts(self, json_input: str) -> list[TextArtifact]: + def json_to_text_artifacts(self, json_input: str) -> list[JsonArtifact]: json_matches = re.findall(self.JSON_PATTERN, json_input, re.DOTALL) if json_matches: - return [TextArtifact(json.dumps(e)) for e in json.loads(json_matches[-1])] + return [JsonArtifact(json.loads(e)) for e in json_matches] else: return [] def _extract_rec( self, artifacts: list[TextArtifact], - extractions: list[TextArtifact], + extractions: list[JsonArtifact], *, rulesets: Optional[list[Ruleset]] = None, - ) -> list[TextArtifact]: + ) -> list[JsonArtifact]: artifacts_text = self.chunk_joiner.join([a.value for a in artifacts]) system_prompt = self.system_template_generator.render( json_template_schema=json.dumps(self.template_schema), diff --git a/griptape/tasks/extraction_task.py b/griptape/tasks/extraction_task.py index c74c3ac49..234840401 100644 --- a/griptape/tasks/extraction_task.py +++ b/griptape/tasks/extraction_task.py @@ -4,10 +4,11 @@ from attrs import define, field +from griptape.artifacts import ListArtifact from griptape.tasks import BaseTextInputTask if TYPE_CHECKING: - from griptape.artifacts import ErrorArtifact, ListArtifact + from griptape.artifacts import ErrorArtifact from griptape.engines import BaseExtractionEngine @@ -17,4 +18,6 @@ class ExtractionTask(BaseTextInputTask): args: dict = field(kw_only=True, factory=dict) def run(self) -> ListArtifact | ErrorArtifact: - return self.extraction_engine.extract(self.input.to_text(), rulesets=self.all_rulesets, **self.args) + return self.extraction_engine.extract_artifacts( + ListArtifact([self.input]), rulesets=self.all_rulesets, **self.args + ) diff --git a/griptape/templates/engines/extraction/json/system.j2 b/griptape/templates/engines/extraction/json/system.j2 index 987ff19a9..568914dd5 100644 --- a/griptape/templates/engines/extraction/json/system.j2 +++ b/griptape/templates/engines/extraction/json/system.j2 @@ -1,3 +1,5 @@ +Your output must exactly match the Extraction Template JSON Schema. + Extraction Template JSON Schema: """{{ json_template_schema }}""" {% if rulesets %} diff --git a/griptape/templates/engines/extraction/json/user.j2 b/griptape/templates/engines/extraction/json/user.j2 index 984977d9a..9b0a611f0 100644 --- a/griptape/templates/engines/extraction/json/user.j2 +++ b/griptape/templates/engines/extraction/json/user.j2 @@ -1,4 +1,4 @@ -Extract information from the Text based on the Extraction Template JSON Schema into an array of JSON objects. +Extract information from the Text. Text: """{{ text }}""" -JSON array: +JSON: diff --git a/griptape/tools/extraction/tool.py b/griptape/tools/extraction/tool.py index 1f6d06b80..279ca3376 100644 --- a/griptape/tools/extraction/tool.py +++ b/griptape/tools/extraction/tool.py @@ -57,4 +57,4 @@ def extract(self, params: dict) -> ListArtifact | InfoArtifact | ErrorArtifact: else: return ErrorArtifact("memory not found") - return self.extraction_engine.extract(artifacts) + return self.extraction_engine.extract_artifacts(artifacts) diff --git a/tests/unit/engines/extraction/test_csv_extraction_engine.py b/tests/unit/engines/extraction/test_csv_extraction_engine.py index 893c21d60..36f70c6ec 100644 --- a/tests/unit/engines/extraction/test_csv_extraction_engine.py +++ b/tests/unit/engines/extraction/test_csv_extraction_engine.py @@ -8,8 +8,8 @@ class TestCsvExtractionEngine: def engine(self): return CsvExtractionEngine(column_names=["test1"]) - def test_extract(self, engine): - result = engine.extract("foo") + def test_extract_text(self, engine): + result = engine.extract_text("foo") assert len(result.value) == 1 assert result.value[0].value == {"test1": "mock output"} diff --git a/tests/unit/engines/extraction/test_json_extraction_engine.py b/tests/unit/engines/extraction/test_json_extraction_engine.py index 48430f1e5..938438af4 100644 --- a/tests/unit/engines/extraction/test_json_extraction_engine.py +++ b/tests/unit/engines/extraction/test_json_extraction_engine.py @@ -1,7 +1,9 @@ +from os.path import dirname, join, normpath +from pathlib import Path + import pytest from schema import Schema -from griptape.artifacts import ErrorArtifact from griptape.engines import JsonExtractionEngine from tests.mocks.mock_prompt_driver import MockPromptDriver @@ -16,22 +18,27 @@ def engine(self): template_schema=Schema({"foo": "bar"}).json_schema("TemplateSchema"), ) - def test_extract(self, engine): - result = engine.extract("foo") + def test_extract_text(self, engine): + result = engine.extract_text("foo") + + assert len(result.value) == 1 + assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + + def test_chunked_extract_text(self, engine): + large_text = Path(normpath(join(dirname(__file__), "../../../resources", "test.txt"))).read_text() - assert len(result.value) == 2 - assert result.value[0].value == '{"test_key_1": "test_value_1"}' - assert result.value[1].value == '{"test_key_2": "test_value_2"}' + extracted = engine.extract_text(large_text * 50) + assert len(extracted) == 177 + assert extracted[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] def test_extract_error(self, engine): engine.template_schema = lambda: "non serializable" - assert isinstance(engine.extract("foo"), ErrorArtifact) + with pytest.raises(TypeError): + engine.extract_text("foo") def test_json_to_text_artifacts(self, engine): - assert [ - a.value - for a in engine.json_to_text_artifacts('[{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}]') - ] == ['{"test_key_1": "test_value_1"}', '{"test_key_2": "test_value_2"}'] + extracted = engine.json_to_text_artifacts('[{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}]') + assert extracted[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] def test_json_to_text_artifacts_no_matches(self, engine): assert engine.json_to_text_artifacts("asdfasdfasdf") == [] diff --git a/tests/unit/memory/tool/test_task_memory.py b/tests/unit/memory/tool/test_task_memory.py index 2f6ffe1c9..d2575959a 100644 --- a/tests/unit/memory/tool/test_task_memory.py +++ b/tests/unit/memory/tool/test_task_memory.py @@ -1,6 +1,6 @@ import pytest -from griptape.artifacts import BlobArtifact, CsvRowArtifact, ErrorArtifact, InfoArtifact, ListArtifact, TextArtifact +from griptape.artifacts import BlobArtifact, ErrorArtifact, InfoArtifact, ListArtifact, TextArtifact from griptape.memory import TaskMemory from griptape.memory.task.storage import BlobArtifactStorage, TextArtifactStorage from griptape.structures import Agent @@ -10,10 +10,6 @@ class TestTaskMemory: - @pytest.fixture(autouse=True) - def _mock_griptape(self, mocker): - mocker.patch("griptape.engines.CsvExtractionEngine.extract", return_value=[CsvRowArtifact({"foo": "bar"})]) - @pytest.fixture() def memory(self): return defaults.text_task_memory("MyMemory") diff --git a/tests/unit/tools/test_extraction_tool.py b/tests/unit/tools/test_extraction_tool.py index 1219da373..edf1663d2 100644 --- a/tests/unit/tools/test_extraction_tool.py +++ b/tests/unit/tools/test_extraction_tool.py @@ -39,16 +39,14 @@ def test_json_extract_artifacts(self, json_tool): {"values": {"data": {"memory_name": json_tool.input_memory[0].name, "artifact_namespace": "foo"}}} ) - assert len(result.value) == 2 - assert result.value[0].value == '{"test_key_1": "test_value_1"}' - assert result.value[1].value == '{"test_key_2": "test_value_2"}' + assert len(result.value) == 1 + assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] def test_json_extract_content(self, json_tool): result = json_tool.extract({"values": {"data": "foo"}}) - assert len(result.value) == 2 - assert result.value[0].value == '{"test_key_1": "test_value_1"}' - assert result.value[1].value == '{"test_key_2": "test_value_2"}' + assert len(result.value) == 1 + assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] def test_csv_extract_artifacts(self, csv_tool): csv_tool.input_memory[0].store_artifact("foo", TextArtifact("foo,bar\nbaz,maz")) From e92e0850502fb7aacb12c7d72bc525bea82a7115 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Wed, 21 Aug 2024 13:37:35 -0700 Subject: [PATCH 03/10] Update system template --- griptape/templates/engines/extraction/json/system.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/griptape/templates/engines/extraction/json/system.j2 b/griptape/templates/engines/extraction/json/system.j2 index 568914dd5..b6bac028a 100644 --- a/griptape/templates/engines/extraction/json/system.j2 +++ b/griptape/templates/engines/extraction/json/system.j2 @@ -1,4 +1,4 @@ -Your output must exactly match the Extraction Template JSON Schema. +Your answer MUST be JSON that successfully validates against the Extraction Template JSON Schema. Extraction Template JSON Schema: """{{ json_template_schema }}""" From 004e36a427d6e0e03a203ca48fc629a4cca3133c Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Wed, 21 Aug 2024 14:47:51 -0700 Subject: [PATCH 04/10] Revert some sillyness, update docs --- CHANGELOG.md | 3 +- .../engines/extraction-engines.md | 27 +++++++++++------ .../engines/src/extraction_engines_2.py | 29 ++++++++++++------- .../structures/src/tasks_6.py | 7 ++--- .../structures/src/tasks_7.py | 4 +-- griptape/artifacts/json_artifact.py | 4 +-- .../extraction/csv_extraction_engine.py | 2 +- .../extraction/json_extraction_engine.py | 6 ++-- .../engines/extraction/json/system.j2 | 2 -- .../templates/engines/extraction/json/user.j2 | 4 +-- .../extraction/test_json_extraction_engine.py | 15 ++++++---- tests/unit/tools/test_extraction_tool.py | 10 ++++--- 12 files changed, 66 insertions(+), 47 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 09e82ef68..0b9ccbe59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,7 +13,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - **BREAKING**: Split `BaseExtractionEngine.extract` into `extract` and `extract_artifacts` for consistency with `BaseSummaryEngine`. - **BREAKING**: `BaseExtractionEngine` no longer catches exceptions and returns `ErrorArtifact`s. -- `JsonExtractionEngine` to extract either a JSON object or array depending on the provided schema. +- **BREAKING**: `JsonExtractionEngine.template_schema` is now required. +- **BREAKING**: `CsvExtractionEngine.column_names` is now required. - `JsonExtractionEngine.extract_artifacts` now returns a `ListArtifact[JsonArtifact]`. - `CsvExtractionEngine.extract_artifacts` now returns a `ListArtifact[CsvRowArtifact]`. diff --git a/docs/griptape-framework/engines/extraction-engines.md b/docs/griptape-framework/engines/extraction-engines.md index b971e63cc..43333e4b0 100644 --- a/docs/griptape-framework/engines/extraction-engines.md +++ b/docs/griptape-framework/engines/extraction-engines.md @@ -10,10 +10,7 @@ As of now, Griptape supports two types of Extraction Engines: the CSV Extraction ## CSV -The CSV Extraction Engine is designed specifically for extracting data from CSV-formatted content. - -!!! info - The CSV Extraction Engine requires the `column_names` parameter for specifying the columns to be extracted. +The CSV Extraction Engine is designed for extracting CSV-formatted content from unstructured data. ```python --8<-- "docs/griptape-framework/engines/src/extraction_engines_1.py" @@ -27,15 +24,27 @@ Charlie,40,Texas ## JSON -The JSON Extraction Engine is tailored for extracting data from JSON-formatted content. +The JSON Extraction Engine is designed for extracting JSON-formatted content from unstructed data. -!!! info - The JSON Extraction Engine requires the `template_schema` parameter for specifying the structure to be extracted. ```python --8<-- "docs/griptape-framework/engines/src/extraction_engines_2.py" ``` ``` -{'name': 'Alice', 'age': 28, 'location': 'New York'} -{'name': 'Bob', 'age': 35, 'location': 'California'} +{ + "model": "GPT-3.5", + "notes": [ + "Part of OpenAI's GPT series.", + "Used in ChatGPT and Microsoft Copilot." + ] +} +{ + "model": "GPT-4", + "notes": [ + "Part of OpenAI's GPT series.", + "Praised for increased accuracy and multimodal capabilities.", + "Architecture and number of parameters not revealed." + ] +} +...Output truncated for brevity... ``` diff --git a/docs/griptape-framework/engines/src/extraction_engines_2.py b/docs/griptape-framework/engines/src/extraction_engines_2.py index 2fb8cd8b0..35bbe53cd 100644 --- a/docs/griptape-framework/engines/src/extraction_engines_2.py +++ b/docs/griptape-framework/engines/src/extraction_engines_2.py @@ -1,24 +1,31 @@ -from schema import Schema +import json +from schema import Literal, Schema + +from griptape.artifacts import ErrorArtifact, ListArtifact from griptape.drivers import OpenAiChatPromptDriver from griptape.engines import JsonExtractionEngine +from griptape.loaders import WebLoader # Define a schema for extraction -user_schema = Schema([{"name": str, "age": int, "location": str}]).json_schema("UserSchema") - json_engine = JsonExtractionEngine( - prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), template_schema=user_schema + prompt_driver=OpenAiChatPromptDriver(model="gpt-4o"), + template_schema=Schema( + { + Literal("model", description="Name of an LLM model."): str, + Literal("notes", description="Any notes of substance about the model."): Schema([str]), + } + ).json_schema("ProductSchema"), ) -# Define some unstructured data -sample_json_text = """ -Alice (Age 28) lives in New York. -Bob (Age 35) lives in California. -""" +# Load data from the web +web_data = WebLoader().load("https://en.wikipedia.org/wiki/Large_language_model") +if isinstance(web_data, ErrorArtifact): + raise Exception(web_data.value) # Extract data using the engine -result = json_engine.extract_text(sample_json_text) +result = json_engine.extract_artifacts(ListArtifact(web_data)) for artifact in result: - print(artifact.value) + print(json.dumps(artifact.value, indent=2)) diff --git a/docs/griptape-framework/structures/src/tasks_6.py b/docs/griptape-framework/structures/src/tasks_6.py index a1b84e44d..ecd6f354f 100644 --- a/docs/griptape-framework/structures/src/tasks_6.py +++ b/docs/griptape-framework/structures/src/tasks_6.py @@ -4,7 +4,9 @@ from griptape.tasks import ExtractionTask # Instantiate the CSV extraction engine -csv_extraction_engine = CsvExtractionEngine(prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo")) +csv_extraction_engine = CsvExtractionEngine( + prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), column_names=["Name", "Age", "Address"] +) # Define some unstructured data and columns csv_data = """ @@ -13,15 +15,12 @@ Charlie is 40 and lives in Texas. """ -columns = ["Name", "Age", "Address"] - # Create an agent and add the ExtractionTask to it agent = Agent() agent.add_task( ExtractionTask( extraction_engine=csv_extraction_engine, - args={"column_names": columns}, ) ) diff --git a/docs/griptape-framework/structures/src/tasks_7.py b/docs/griptape-framework/structures/src/tasks_7.py index 909d00084..1a3a32b29 100644 --- a/docs/griptape-framework/structures/src/tasks_7.py +++ b/docs/griptape-framework/structures/src/tasks_7.py @@ -8,6 +8,7 @@ # Instantiate the json extraction engine json_extraction_engine = JsonExtractionEngine( prompt_driver=OpenAiChatPromptDriver(model="gpt-3.5-turbo"), + template_schema=Schema({"users": [{"name": str, "age": int, "location": str}]}).json_schema("UserSchema"), ) # Define some unstructured data and a schema @@ -15,13 +16,12 @@ Alice (Age 28) lives in New York. Bob (Age 35) lives in California. """ -user_schema = Schema({"users": [{"name": str, "age": int, "location": str}]}).json_schema("UserSchema") + agent = Agent() agent.add_task( ExtractionTask( extraction_engine=json_extraction_engine, - args={"template_schema": user_schema}, ) ) diff --git a/griptape/artifacts/json_artifact.py b/griptape/artifacts/json_artifact.py index b292879a9..26f69da7f 100644 --- a/griptape/artifacts/json_artifact.py +++ b/griptape/artifacts/json_artifact.py @@ -7,11 +7,11 @@ from griptape.artifacts import BaseArtifact -Json = Union[dict[str, "Json"], list["Json"], str, int, float, bool, None] - @define class JsonArtifact(BaseArtifact): + Json = Union[dict[str, "Json"], list["Json"], str, int, float, bool, None] + value: Json = field(converter=lambda v: json.loads(json.dumps(v)), metadata={"serializable": True}) def to_text(self) -> str: diff --git a/griptape/engines/extraction/csv_extraction_engine.py b/griptape/engines/extraction/csv_extraction_engine.py index 40c9058ec..70977f1f8 100644 --- a/griptape/engines/extraction/csv_extraction_engine.py +++ b/griptape/engines/extraction/csv_extraction_engine.py @@ -17,7 +17,7 @@ @define class CsvExtractionEngine(BaseExtractionEngine): - column_names: list[str] = field(default=Factory(list), kw_only=True) + column_names: list[str] = field(kw_only=True) system_template_generator: J2 = field(default=Factory(lambda: J2("engines/extraction/csv/system.j2")), kw_only=True) user_template_generator: J2 = field(default=Factory(lambda: J2("engines/extraction/csv/user.j2")), kw_only=True) diff --git a/griptape/engines/extraction/json_extraction_engine.py b/griptape/engines/extraction/json_extraction_engine.py index d3928c3ea..c817efd5f 100644 --- a/griptape/engines/extraction/json_extraction_engine.py +++ b/griptape/engines/extraction/json_extraction_engine.py @@ -18,9 +18,9 @@ @define class JsonExtractionEngine(BaseExtractionEngine): - JSON_PATTERN = r"(?s)(\{.*\}|\[.*\])" + JSON_PATTERN = r"(?s)[^\[]*(\[.*\])" - template_schema: dict = field(default=Factory(dict), kw_only=True) + template_schema: dict = field(kw_only=True) system_template_generator: J2 = field( default=Factory(lambda: J2("engines/extraction/json/system.j2")), kw_only=True ) @@ -42,7 +42,7 @@ def json_to_text_artifacts(self, json_input: str) -> list[JsonArtifact]: json_matches = re.findall(self.JSON_PATTERN, json_input, re.DOTALL) if json_matches: - return [JsonArtifact(json.loads(e)) for e in json_matches] + return [JsonArtifact(e) for e in json.loads(json_matches[-1])] else: return [] diff --git a/griptape/templates/engines/extraction/json/system.j2 b/griptape/templates/engines/extraction/json/system.j2 index b6bac028a..987ff19a9 100644 --- a/griptape/templates/engines/extraction/json/system.j2 +++ b/griptape/templates/engines/extraction/json/system.j2 @@ -1,5 +1,3 @@ -Your answer MUST be JSON that successfully validates against the Extraction Template JSON Schema. - Extraction Template JSON Schema: """{{ json_template_schema }}""" {% if rulesets %} diff --git a/griptape/templates/engines/extraction/json/user.j2 b/griptape/templates/engines/extraction/json/user.j2 index 9b0a611f0..984977d9a 100644 --- a/griptape/templates/engines/extraction/json/user.j2 +++ b/griptape/templates/engines/extraction/json/user.j2 @@ -1,4 +1,4 @@ -Extract information from the Text. +Extract information from the Text based on the Extraction Template JSON Schema into an array of JSON objects. Text: """{{ text }}""" -JSON: +JSON array: diff --git a/tests/unit/engines/extraction/test_json_extraction_engine.py b/tests/unit/engines/extraction/test_json_extraction_engine.py index 938438af4..b4aa58b75 100644 --- a/tests/unit/engines/extraction/test_json_extraction_engine.py +++ b/tests/unit/engines/extraction/test_json_extraction_engine.py @@ -21,15 +21,16 @@ def engine(self): def test_extract_text(self, engine): result = engine.extract_text("foo") - assert len(result.value) == 1 - assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + assert len(result.value) == 2 + assert result.value[0].value == {"test_key_1": "test_value_1"} + assert result.value[1].value == {"test_key_2": "test_value_2"} def test_chunked_extract_text(self, engine): large_text = Path(normpath(join(dirname(__file__), "../../../resources", "test.txt"))).read_text() extracted = engine.extract_text(large_text * 50) - assert len(extracted) == 177 - assert extracted[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + assert len(extracted) == 354 + assert extracted[0].value == {"test_key_1": "test_value_1"} def test_extract_error(self, engine): engine.template_schema = lambda: "non serializable" @@ -37,8 +38,10 @@ def test_extract_error(self, engine): engine.extract_text("foo") def test_json_to_text_artifacts(self, engine): - extracted = engine.json_to_text_artifacts('[{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}]') - assert extracted[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + assert [ + a.value + for a in engine.json_to_text_artifacts('[{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}]') + ] == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] def test_json_to_text_artifacts_no_matches(self, engine): assert engine.json_to_text_artifacts("asdfasdfasdf") == [] diff --git a/tests/unit/tools/test_extraction_tool.py b/tests/unit/tools/test_extraction_tool.py index edf1663d2..1dbb9def2 100644 --- a/tests/unit/tools/test_extraction_tool.py +++ b/tests/unit/tools/test_extraction_tool.py @@ -39,14 +39,16 @@ def test_json_extract_artifacts(self, json_tool): {"values": {"data": {"memory_name": json_tool.input_memory[0].name, "artifact_namespace": "foo"}}} ) - assert len(result.value) == 1 - assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + assert len(result.value) == 2 + assert result.value[0].value == {"test_key_1": "test_value_1"} + assert result.value[1].value == {"test_key_2": "test_value_2"} def test_json_extract_content(self, json_tool): result = json_tool.extract({"values": {"data": "foo"}}) - assert len(result.value) == 1 - assert result.value[0].value == [{"test_key_1": "test_value_1"}, {"test_key_2": "test_value_2"}] + assert len(result.value) == 2 + assert result.value[0].value == {"test_key_1": "test_value_1"} + assert result.value[1].value == {"test_key_2": "test_value_2"} def test_csv_extract_artifacts(self, csv_tool): csv_tool.input_memory[0].store_artifact("foo", TextArtifact("foo,bar\nbaz,maz")) From dca34985688c819e60889da75bb85005bcc90629 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Wed, 21 Aug 2024 09:23:51 -0700 Subject: [PATCH 05/10] Improve ListArtifact --- CHANGELOG.md | 2 ++ griptape/artifacts/list_artifact.py | 23 +++++++++++++--------- griptape/schemas/base_schema.py | 6 +++++- griptape/tasks/tool_task.py | 6 +++++- tests/unit/artifacts/test_list_artifact.py | 6 ++++++ 5 files changed, 32 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 59947dc5f..2288c9fc2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased ### Added - `BaseConversationMemory.prompt_driver` for use with autopruning. +- Generic type support to `ListArtifact`. +- Iteration support to `ListArtifact`. ### Fixed - Parsing streaming response with some OpenAi compatible services. diff --git a/griptape/artifacts/list_artifact.py b/griptape/artifacts/list_artifact.py index 298f29c6a..9ebec394c 100644 --- a/griptape/artifacts/list_artifact.py +++ b/griptape/artifacts/list_artifact.py @@ -1,23 +1,25 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Generic, Optional, TypeVar from attrs import Attribute, define, field from griptape.artifacts import BaseArtifact if TYPE_CHECKING: - from collections.abc import Sequence + from collections.abc import Iterator, Sequence + +T = TypeVar("T", bound=BaseArtifact) @define -class ListArtifact(BaseArtifact): - value: Sequence[BaseArtifact] = field(factory=list, metadata={"serializable": True}) +class ListArtifact(BaseArtifact, Generic[T]): + value: Sequence[T] = field(factory=list, metadata={"serializable": True}) item_separator: str = field(default="\n\n", kw_only=True, metadata={"serializable": True}) validate_uniform_types: bool = field(default=False, kw_only=True, metadata={"serializable": True}) @value.validator # pyright: ignore[reportAttributeAccessIssue] - def validate_value(self, _: Attribute, value: list[BaseArtifact]) -> None: + def validate_value(self, _: Attribute, value: list[T]) -> None: if self.validate_uniform_types and len(value) > 0: first_type = type(value[0]) @@ -31,18 +33,21 @@ def child_type(self) -> Optional[type]: else: return None - def __getitem__(self, key: int) -> BaseArtifact: + def __getitem__(self, key: int) -> T: return self.value[key] def __bool__(self) -> bool: return len(self) > 0 + def __add__(self, other: BaseArtifact) -> ListArtifact[T]: + return ListArtifact(self.value + other.value) + + def __iter__(self) -> Iterator[T]: + return iter(self.value) + def to_text(self) -> str: return self.item_separator.join([v.to_text() for v in self.value]) - def __add__(self, other: BaseArtifact) -> BaseArtifact: - return ListArtifact(self.value + other.value) - def is_type(self, target_type: type) -> bool: if self.value: return isinstance(self.value[0], target_type) diff --git a/griptape/schemas/base_schema.py b/griptape/schemas/base_schema.py index f25e8870b..b285d1476 100644 --- a/griptape/schemas/base_schema.py +++ b/griptape/schemas/base_schema.py @@ -2,7 +2,7 @@ from abc import ABC from collections.abc import Sequence -from typing import Any, Literal, Union, _SpecialForm, get_args, get_origin +from typing import Any, Literal, TypeVar, Union, _SpecialForm, get_args, get_origin import attrs from marshmallow import INCLUDE, Schema, fields @@ -56,6 +56,10 @@ def _get_field_for_type(cls, field_type: type) -> fields.Field | fields.Nested: field_class, args, optional = cls._get_field_type_info(field_type) + # Resolve TypeVars to their bound type + if isinstance(field_class, TypeVar): + field_class = field_class.__bound__ + if attrs.has(field_class): if ABC in field_class.__bases__: return fields.Nested(PolymorphicSchema(inner_class=field_class), allow_none=optional) diff --git a/griptape/tasks/tool_task.py b/griptape/tasks/tool_task.py index 6dd5000b3..68260ea91 100644 --- a/griptape/tasks/tool_task.py +++ b/griptape/tasks/tool_task.py @@ -84,7 +84,11 @@ def run(self) -> BaseArtifact: subtask.after_run() if isinstance(subtask.output, ListArtifact): - self.output = subtask.output[0] + first_artifact = subtask.output[0] + if isinstance(first_artifact, BaseArtifact): + self.output = first_artifact + else: + self.output = ErrorArtifact(f"Output is not an Artifact: {type(subtask.output[0])}") else: self.output = InfoArtifact("No tool output") except Exception as e: diff --git a/tests/unit/artifacts/test_list_artifact.py b/tests/unit/artifacts/test_list_artifact.py index 06d234645..37769e1b8 100644 --- a/tests/unit/artifacts/test_list_artifact.py +++ b/tests/unit/artifacts/test_list_artifact.py @@ -23,6 +23,12 @@ def test___add__(self): assert artifact.value[0].value == "foo" assert artifact.value[1].value == "bar" + def test___iter__(self): + assert [a.value for a in ListArtifact([TextArtifact("foo"), TextArtifact("bar")])] == ["foo", "bar"] + + def test_type_var(self): + assert ListArtifact[TextArtifact]([TextArtifact("foo")]).value[0].value == "foo" + def test_validate_value(self): with pytest.raises(ValueError): ListArtifact([TextArtifact("foo"), BlobArtifact(b"bar")], validate_uniform_types=True) From a7f10d4231ddc01769cc1b5128b632657087804d Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 22 Aug 2024 09:30:37 -0700 Subject: [PATCH 06/10] Make type covariant --- griptape/artifacts/list_artifact.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/griptape/artifacts/list_artifact.py b/griptape/artifacts/list_artifact.py index 9ebec394c..02dd295cd 100644 --- a/griptape/artifacts/list_artifact.py +++ b/griptape/artifacts/list_artifact.py @@ -9,7 +9,7 @@ if TYPE_CHECKING: from collections.abc import Iterator, Sequence -T = TypeVar("T", bound=BaseArtifact) +T = TypeVar("T", bound=BaseArtifact, covariant=True) @define From b1ded7b2980c679f107a253b52ffd958c21fa906 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 22 Aug 2024 09:46:55 -0700 Subject: [PATCH 07/10] Wording improvements --- CHANGELOG.md | 2 +- docs/griptape-framework/engines/extraction-engines.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 498499d73..f78aa0751 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Iteration support to `ListArtifact`. ### Changed -- **BREAKING**: Split `BaseExtractionEngine.extract` into `extract` and `extract_artifacts` for consistency with `BaseSummaryEngine`. +- **BREAKING**: Split `BaseExtractionEngine.extract` into `extract_text` and `extract_artifacts` for consistency with `BaseSummaryEngine`. - **BREAKING**: `BaseExtractionEngine` no longer catches exceptions and returns `ErrorArtifact`s. - **BREAKING**: `JsonExtractionEngine.template_schema` is now required. - **BREAKING**: `CsvExtractionEngine.column_names` is now required. diff --git a/docs/griptape-framework/engines/extraction-engines.md b/docs/griptape-framework/engines/extraction-engines.md index 43333e4b0..c9be21dde 100644 --- a/docs/griptape-framework/engines/extraction-engines.md +++ b/docs/griptape-framework/engines/extraction-engines.md @@ -10,7 +10,7 @@ As of now, Griptape supports two types of Extraction Engines: the CSV Extraction ## CSV -The CSV Extraction Engine is designed for extracting CSV-formatted content from unstructured data. +The CSV Extraction Engine extracts tabular content from unstructured data. ```python --8<-- "docs/griptape-framework/engines/src/extraction_engines_1.py" @@ -24,7 +24,7 @@ Charlie,40,Texas ## JSON -The JSON Extraction Engine is designed for extracting JSON-formatted content from unstructed data. +The JSON Extraction Engine extracts JSON-formatted content from unstructed data. ```python From e15ba3fae8d7cce7d5e517a865c3307d35011009 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 22 Aug 2024 11:58:50 -0700 Subject: [PATCH 08/10] Better words --- docs/griptape-framework/engines/extraction-engines.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/griptape-framework/engines/extraction-engines.md b/docs/griptape-framework/engines/extraction-engines.md index c9be21dde..cd06d22bc 100644 --- a/docs/griptape-framework/engines/extraction-engines.md +++ b/docs/griptape-framework/engines/extraction-engines.md @@ -10,7 +10,7 @@ As of now, Griptape supports two types of Extraction Engines: the CSV Extraction ## CSV -The CSV Extraction Engine extracts tabular content from unstructured data. +The CSV Extraction Engine extracts tabular content from unstructured text. ```python --8<-- "docs/griptape-framework/engines/src/extraction_engines_1.py" @@ -24,7 +24,7 @@ Charlie,40,Texas ## JSON -The JSON Extraction Engine extracts JSON-formatted content from unstructed data. +The JSON Extraction Engine extracts JSON-formatted content from unstructed text. ```python From f66925ca3e8308bd369530927e73c9419666f20f Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 3 Oct 2024 11:37:47 -0700 Subject: [PATCH 09/10] Fix typo --- docs/griptape-framework/engines/extraction-engines.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/griptape-framework/engines/extraction-engines.md b/docs/griptape-framework/engines/extraction-engines.md index cd06d22bc..c00352691 100644 --- a/docs/griptape-framework/engines/extraction-engines.md +++ b/docs/griptape-framework/engines/extraction-engines.md @@ -24,7 +24,7 @@ Charlie,40,Texas ## JSON -The JSON Extraction Engine extracts JSON-formatted content from unstructed text. +The JSON Extraction Engine extracts JSON-formatted content from unstructured text. ```python From d1250094caf57d439909d137303428f0d23ab9f8 Mon Sep 17 00:00:00 2001 From: Collin Dutter Date: Thu, 3 Oct 2024 11:39:44 -0700 Subject: [PATCH 10/10] Fix bad merge --- griptape/artifacts/list_artifact.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/griptape/artifacts/list_artifact.py b/griptape/artifacts/list_artifact.py index 16fc323df..02dd295cd 100644 --- a/griptape/artifacts/list_artifact.py +++ b/griptape/artifacts/list_artifact.py @@ -18,18 +18,6 @@ class ListArtifact(BaseArtifact, Generic[T]): item_separator: str = field(default="\n\n", kw_only=True, metadata={"serializable": True}) validate_uniform_types: bool = field(default=False, kw_only=True, metadata={"serializable": True}) - def __getitem__(self, key: int) -> T: - return self.value[key] - - def __bool__(self) -> bool: - return len(self) > 0 - - def __add__(self, other: BaseArtifact) -> ListArtifact[T]: - return ListArtifact(self.value + other.value) - - def __iter__(self) -> Iterator[T]: - return iter(self.value) - @value.validator # pyright: ignore[reportAttributeAccessIssue] def validate_value(self, _: Attribute, value: list[T]) -> None: if self.validate_uniform_types and len(value) > 0: