From ce4ed3225a26ee549db68c12072fa782a5f9af7b Mon Sep 17 00:00:00 2001 From: Eugene Yurtsev Date: Sat, 25 Mar 2023 11:19:22 -0400 Subject: [PATCH] Update extraction interface to require an Object node (#90) The schema should always be defined with respect to an Object rather than other types of nodes, otherwise there is some ambiguity about when interpreting how to encode the object (at least when using CSV encoding). --- kor/extraction.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/kor/extraction.py b/kor/extraction.py index 427fef3..83905b1 100644 --- a/kor/extraction.py +++ b/kor/extraction.py @@ -4,7 +4,7 @@ from langchain.schema import BaseLanguageModel from kor.encoders import Encoder, initialize_encoder -from kor.nodes import AbstractSchemaNode +from kor.nodes import Object from kor.prompts import create_langchain_prompt from kor.type_descriptors import TypeDescriptor, initialize_type_descriptors from kor.validators import Validator @@ -14,7 +14,7 @@ def create_extraction_chain( llm: BaseLanguageModel, - node: AbstractSchemaNode, + node: Object, *, encoder_or_encoder_class: Union[Type[Encoder], Encoder, str] = "csv", type_descriptor: Union[TypeDescriptor, str] = "typescript", @@ -36,6 +36,8 @@ def create_extraction_chain( Returns: A langchain chain """ + if not isinstance(node, Object): + raise ValueError(f"node must be an Object got {type(node)}") encoder = initialize_encoder(encoder_or_encoder_class, node, **encoder_kwargs) type_descriptor_to_use = initialize_type_descriptors(type_descriptor) return LLMChain(