From 2313d5c0977e4ea17d94cf8bb704e1b67bd03597 Mon Sep 17 00:00:00 2001 From: lpm0073 Date: Wed, 29 Nov 2023 07:07:05 -0600 Subject: [PATCH 1/4] refactor: move all validation logic to Pydantic in langchain.py --- grader/grader.py | 163 ++------------------------------- grader/langchain.py | 12 +-- grader/tests/test_responses.py | 8 +- 3 files changed, 16 insertions(+), 167 deletions(-) diff --git a/grader/grader.py b/grader/grader.py index 98ef549..de38e91 100644 --- a/grader/grader.py +++ b/grader/grader.py @@ -1,9 +1,6 @@ # -*- coding: utf-8 -*- """Provide a class for grading a submission against an assignment.""" "" -import json -import os - from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator from .exceptions import ( @@ -24,13 +21,6 @@ ResponseFailedError.__name__, ] -HERE = os.path.abspath(os.path.dirname(__file__)) -REQUIRED_KEYS_SPEC = "required-keys.json" -REQUIRED_KEYS_PATH = os.path.join(HERE, "data", REQUIRED_KEYS_SPEC) - -HUMAN_PROMPT = {"content": "a prompt from a human", "additional_kwargs": {}, "type": "human", "example": False} -AI_RESPONSE = {"content": "a response from the AI", "additional_kwargs": {}, "type": "ai", "example": False} - class Grade(BaseModel): """ @@ -69,8 +59,6 @@ class AutomatedGrader: def __init__(self, assignment, potential_points=100): self._assignment = assignment self._potential_points = potential_points - with open(REQUIRED_KEYS_PATH, "r", encoding="utf-8") as f: # pylint: disable=invalid-name - self.required_keys = json.load(f) @property def assignment(self): @@ -82,131 +70,6 @@ def potential_points(self): """Return the potential points for the assignment.""" return self._potential_points - def validate_keys(self, subject, control): - """Validate that the subject has all the keys in the control dict.""" - assignment_keys = set(subject.keys()) - required_keys = set(control.keys()) - - if not required_keys.issubset(assignment_keys): - missing_keys = required_keys.difference(assignment_keys) - raise InvalidResponseStructureError( - f"The assignment is missing one or more required keys. missing: {missing_keys}" - ) - return True - - def validate_statuscode(self): - """Validate that the assignment's statusCode is 200.""" - if "statusCode" not in self.assignment: - raise InvalidResponseStructureError(f"The assignment must have a statusCode. assignment: {self.assignment}") - if not isinstance(self.assignment.get("statusCode"), int): - status_code_type = type(self.assignment.get("statusCode")) - raise IncorrectResponseTypeError( - f"The assignment's statusCode must be an integer. received: {status_code_type}" - ) - status_code = self.assignment["statusCode"] - if not status_code == 200: - raise ResponseFailedError(f"The assignment's statusCode must be 200. received: {status_code}") - return True - - def validate_base64encoded(self): - """Validate that the assignment's isBase64Encoded is False.""" - if "isBase64Encoded" not in self.assignment: - raise InvalidResponseStructureError( - f"The assignment must have a isBase64Encoded. assignment: {self.assignment}" - ) - is_base64_encoded = self.assignment.get("isBase64Encoded") - if not isinstance(is_base64_encoded, bool): - is_base64_encoded_type = type(is_base64_encoded) - raise IncorrectResponseTypeError( - f"The assignment's base64Encoded must be a boolean. received: {is_base64_encoded_type}" - ) - if self.assignment["isBase64Encoded"]: - raise IncorrectResponseValueError("The assignment's isBase64Encoded must be False.") - - def validate_body(self): - """Validate that the assignment's body is a dict with the correct keys.""" - if "body" not in self.assignment: - raise InvalidResponseStructureError(f"The assignment must have a body. assignment: {self.assignment}") - - body = self.assignment.get("body") - if not isinstance(body, dict): - body_type = type(body) - raise IncorrectResponseTypeError(f"The assignment's body must be a dict. Received {body_type}.") - if not "chat_memory" in body: - raise InvalidResponseStructureError( - f"The assignment's body must have a key named chat_memory. body: {body}" - ) - if not "messages" in body["chat_memory"]: - raise InvalidResponseStructureError( - f"The assignment's body.chat_memory must has a key named messages. body: {body}" - ) - messages = body["chat_memory"]["messages"] - if not isinstance(messages, list): - messages_type = type(messages) - raise IncorrectResponseTypeError( - f"The assignment's body.chat_memory.messages must be a list. Received {messages_type}." - ) - if len(messages) < 2: - raise InvalidResponseStructureError( - f"The messages list must contain at least two elements. messages: {messages}" - ) - - for message in messages: - if not isinstance(message, dict): - raise InvalidResponseStructureError( - f"All elements in the messages list must be dictionaries. messages: {messages}" - ) - - human_prompt = messages[0] - ai_response = messages[1] - - self.validate_keys(human_prompt, HUMAN_PROMPT) - self.validate_keys(ai_response, AI_RESPONSE) - - if not human_prompt["type"] == "human": - raise IncorrectResponseValueError(f"The first message must be a human prompt. first prompt: {human_prompt}") - if not ai_response["type"] == "ai": - raise IncorrectResponseValueError(f"The second message must be an AI response. response: {ai_response}") - - def validate_metadata(self): - """Validate that the assignment's metadata is a dict with the correct keys.""" - body = self.assignment.get("body") - request_meta_data = body["request_meta_data"] - if not isinstance(request_meta_data, dict): - meta_data_type = type(request_meta_data) - raise InvalidResponseStructureError( - f"The assignment must has a dict named request_meta_data. received: {meta_data_type}" - ) - if request_meta_data.get("lambda") is None: - raise InvalidResponseStructureError( - f"The request_meta_data key lambda_langchain must exist. request_meta_data: {request_meta_data}" - ) - if request_meta_data.get("model") is None: - raise InvalidResponseStructureError( - f"The request_meta_data key model must exist. request_meta_data: {request_meta_data}" - ) - if request_meta_data.get("end_point") is None: - raise InvalidResponseStructureError( - f"The request_meta_data end_point must exist. request_meta_data: {request_meta_data}" - ) - - if not request_meta_data.get("lambda") == "lambda_langchain": - raise IncorrectResponseValueError(f"The request_meta_data.lambda must be lambda_langchain. body: {body}") - if not request_meta_data.get("model") == "gpt-3.5-turbo": - raise IncorrectResponseValueError(f"The request_meta_data.model must be gpt-3.5-turbo. body: {body}") - if not request_meta_data.get("end_point") == "ChatCompletion": - raise IncorrectResponseValueError(f"The request_meta_data.end_point must be ChatCompletion. body: {body}") - - def validate(self): - """Validate the assignment data structure.""" - if not isinstance(self.assignment, dict): - raise InvalidResponseStructureError("The assignment must be a dictionary.") - self.validate_keys(self.assignment, self.required_keys) - self.validate_statuscode() - self.validate_base64encoded() - self.validate_body() - self.validate_metadata() - def grade_response(self, message: AGException = None): """Create a grade dict from the assignment.""" grade = self.potential_points * (1 - (message.penalty_pct if message else 0)) @@ -217,29 +80,16 @@ def grade_response(self, message: AGException = None): return grade.model_dump() def grade(self): - """Grade the assignment. - This is an experimental usage of pydantic to validate the assignment. - Only two tests should pass, the rest should raise exceptions and then - be processed with the legacy code below. - """ + """Grade the assignment.""" try: lc_response = LCResponse(**self.assignment) lc_response.validate_response() return self.grade_response() - except (ValidationError, TypeError): - # FIX NOTE: need to map these to an applicable exception from grader.exceptions. - print("warning: assignment failed an un-mapped pydantic validation.") - except ( - ResponseFailedError, - InvalidResponseStructureError, - ResponseFailedError, - IncorrectResponseValueError, - IncorrectResponseTypeError, - ) as e: - return self.grade_response(e) - - try: - self.validate() + except (ValidationError, TypeError) as e: + try: + raise InvalidResponseStructureError("The assignment failed pydantic validation.") from e + except InvalidResponseStructureError as reraised_e: + return self.grade_response(reraised_e) except ( ResponseFailedError, InvalidResponseStructureError, @@ -248,4 +98,3 @@ def grade(self): IncorrectResponseTypeError, ) as e: return self.grade_response(e) - return self.grade_response() diff --git a/grader/langchain.py b/grader/langchain.py index 846b557..27e7357 100644 --- a/grader/langchain.py +++ b/grader/langchain.py @@ -17,8 +17,8 @@ class LCRequestMetaData(BaseModel): """LangChain request meta data""" lambda_name: str = Field(..., alias="lambda") - model: str - end_point: str + model: str = Field(...) + end_point: str = Field(...) temperature: float = Field(..., ge=0, le=1) max_tokens: int = Field(..., gt=0) @@ -52,10 +52,10 @@ class LCBody(BaseModel): chat_memory: LCChatMemory output_key: Optional[str] = Field(None) input_key: Optional[str] = Field(None) - return_messages: bool - human_prefix: str = Field("Human") - ai_prefix: str = Field("AI") - memory_key: str = Field("chat_history") + return_messages: Optional[bool] = Field(True) + human_prefix: Optional[str] = Field("Human") + ai_prefix: Optional[str] = Field("AI") + memory_key: Optional[str] = Field("chat_history") request_meta_data: LCRequestMetaData diff --git a/grader/tests/test_responses.py b/grader/tests/test_responses.py index 3382153..3419a99 100644 --- a/grader/tests/test_responses.py +++ b/grader/tests/test_responses.py @@ -81,8 +81,8 @@ def test_incorrect_messages(self): automated_grader = AutomatedGrader(assignment=assignment) grade = automated_grader.grade() - assert grade["message_type"] == "IncorrectResponseValueError" - assert grade["grade"] == 85, "The grade is not 85" + assert grade["message_type"] == "InvalidResponseStructureError" + assert grade["grade"] == 70, "The grade is not 70" def test_incorrect_data_type(self): """Test an assignment with an incorrect data type.""" @@ -90,8 +90,8 @@ def test_incorrect_data_type(self): automated_grader = AutomatedGrader(assignment=assignment) grade = automated_grader.grade() - assert grade["message_type"] == "IncorrectResponseTypeError" - assert grade["grade"] == 90, "The grade is not 85" + assert grade["message_type"] == "InvalidResponseStructureError" + assert grade["grade"] == 70, "The grade is not 70" def test_bad_message_01(self): """Test an assignment with an incorrect message.""" From b327ca70c8c5b8d22719b9848b2112ae1cdf8423 Mon Sep 17 00:00:00 2001 From: lpm0073 Date: Wed, 29 Nov 2023 07:42:20 -0600 Subject: [PATCH 2/4] feat: all grading logic moved to pydantic --- grader/batch.py | 8 ++----- grader/grader.py | 41 +++++++++++++++++++-------------- grader/tests/init.py | 8 +------ grader/tests/test_responses.py | 42 +++++++++++++++++++++++++--------- 4 files changed, 58 insertions(+), 41 deletions(-) diff --git a/grader/batch.py b/grader/batch.py index c3015e6..47707cd 100644 --- a/grader/batch.py +++ b/grader/batch.py @@ -21,12 +21,8 @@ def main(filepath: str = None, output_folder: str = "out", potential_points: int assignments = glob.glob(os.path.join(filepath, "*.json")) for assignment_filename in assignments: with open(assignment_filename, "r", encoding="utf-8") as f: - try: - assignment = json.load(f) - except json.JSONDecodeError: - print(f"warning: invalid JSON in assignment_filename: {assignment_filename}") - assignment = f.read() - grader = AutomatedGrader(assignment, potential_points=potential_points) + assignment = f.read() + grader = AutomatedGrader(assignment=assignment, potential_points=potential_points) grade = grader.grade() with open( os.path.join(OUTPUT_FILE_PATH, f"{os.path.basename(assignment_filename)}"), "w", encoding="utf-8" diff --git a/grader/grader.py b/grader/grader.py index de38e91..b1e9cb6 100644 --- a/grader/grader.py +++ b/grader/grader.py @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- """Provide a class for grading a submission against an assignment.""" "" +import json + from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator from .exceptions import ( @@ -53,22 +55,11 @@ def message_type_is_valid(cls, message_type): # flake8: noqa: E701 -class AutomatedGrader: +class AutomatedGrader(BaseModel): """Grade a submission against an assignment.""" - def __init__(self, assignment, potential_points=100): - self._assignment = assignment - self._potential_points = potential_points - - @property - def assignment(self): - """Return the assignment.""" - return self._assignment - - @property - def potential_points(self): - """Return the potential points for the assignment.""" - return self._potential_points + assignment: str = Field(..., description="The assignment to grade.") + potential_points: float = Field(100, description="The maximum number of points that can be awarded.", ge=0) def grade_response(self, message: AGException = None): """Create a grade dict from the assignment.""" @@ -81,15 +72,31 @@ def grade_response(self, message: AGException = None): def grade(self): """Grade the assignment.""" + assignment_json: dict + lc_response: LCResponse + + # 1.) attempt to load the assignment as JSON try: - lc_response = LCResponse(**self.assignment) - lc_response.validate_response() - return self.grade_response() + assignment_json = json.loads(self.assignment) + except json.JSONDecodeError as e: + try: + raise InvalidResponseStructureError("The assignment is not valid JSON") from e + except InvalidResponseStructureError as reraised_e: + return self.grade_response(reraised_e) + + # 2.) attempt to validate the assignment using Pydantic + try: + lc_response = LCResponse(**assignment_json) except (ValidationError, TypeError) as e: try: raise InvalidResponseStructureError("The assignment failed pydantic validation.") from e except InvalidResponseStructureError as reraised_e: return self.grade_response(reraised_e) + + # 3.) validate the assignment + try: + lc_response.validate_response() + return self.grade_response() except ( ResponseFailedError, InvalidResponseStructureError, diff --git a/grader/tests/init.py b/grader/tests/init.py index 74858ff..f5f93e5 100644 --- a/grader/tests/init.py +++ b/grader/tests/init.py @@ -1,14 +1,8 @@ # -*- coding: utf-8 -*- """Provide common functions for testing.""" -import json def get_event(filespec): """Reads a JSON file and returns the event""" with open(filespec, "r", encoding="utf-8") as f: # pylint: disable=invalid-name - try: - event = json.load(f) - return event - except json.JSONDecodeError: - print(f"warning: invalid JSON in file: {filespec}") - return f.read() + return f.read() diff --git a/grader/tests/test_responses.py b/grader/tests/test_responses.py index 3419a99..924be8b 100644 --- a/grader/tests/test_responses.py +++ b/grader/tests/test_responses.py @@ -10,6 +10,9 @@ from .init import get_event +POTENTIAL_PONTS = 100 + + # pylint: disable=too-few-public-methods class TestGrader: """Test the OpenAI API via Langchain using the Lambda Layer, 'genai'.""" @@ -17,8 +20,9 @@ class TestGrader: def test_success(self): """Test a valid successful submission.""" assignment = get_event("tests/events/correct.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) assert isinstance(grade, dict), "The grade is not a dictionary" assert grade["message_type"] == "Success" @@ -33,8 +37,9 @@ def test_success(self): def test_success_verbose(self): """Test a valid successful submission.""" assignment = get_event("tests/events/correct-verbose.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) assert isinstance(grade, dict), "The grade is not a dictionary" assert grade["message_type"] == "Success" @@ -49,7 +54,8 @@ def test_success_verbose(self): def test_bad_data(self): """Test an assignment with bad data.""" assignment = get_event("tests/events/bad-data.txt") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) + grade = automated_grader.grade() print(grade) @@ -59,7 +65,7 @@ def test_bad_data(self): def test_incorrect_response_type(self): """Test an assignment with an incorrect response type.""" assignment = get_event("tests/events/incorrect-response-type.txt") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() print(grade) @@ -69,62 +75,76 @@ def test_incorrect_response_type(self): def test_incorrect_response_statuscode(self): """Test an assignment with an incorrect response status code.""" assignment = get_event("tests/events/incorrect-response-status.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "ResponseFailedError" assert grade["grade"] == 80, "The grade is not 80" def test_incorrect_messages(self): """Test an assignment with an incorrect message.""" assignment = get_event("tests/events/wrong-messages.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" def test_incorrect_data_type(self): """Test an assignment with an incorrect data type.""" assignment = get_event("tests/events/type-error.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" def test_bad_message_01(self): """Test an assignment with an incorrect message.""" assignment = get_event("tests/events/bad-message-1.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" def test_bad_message_02(self): """Test an assignment with an incorrect message.""" assignment = get_event("tests/events/bad-message-2.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" def test_bad_message_03(self): """Test an assignment with an incorrect message.""" assignment = get_event("tests/events/bad-message-3.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" def test_bad_message_04(self): """Test an assignment with an incorrect message.""" assignment = get_event("tests/events/bad-message-4.json") - automated_grader = AutomatedGrader(assignment=assignment) + automated_grader = AutomatedGrader(assignment=assignment, potential_points=POTENTIAL_PONTS) grade = automated_grader.grade() + print(grade) + assert grade["message_type"] == "InvalidResponseStructureError" assert grade["grade"] == 70, "The grade is not 70" From 4186189a728aa91668a61d26cda85fc6d4f78ef4 Mon Sep 17 00:00:00 2001 From: GitHub Action Date: Wed, 29 Nov 2023 13:43:48 +0000 Subject: [PATCH 3/4] chore: [gh] Update __version__.py to 1.5.0 [skip ci] --- grader/__version__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/grader/__version__.py b/grader/__version__.py index a6b0b8f..28dfc5b 100644 --- a/grader/__version__.py +++ b/grader/__version__.py @@ -1,2 +1,2 @@ # -*- coding: utf-8 -*- -__version__ = "1.4.0" +__version__ = "1.5.0" From 10ddab5a84bedc01679e69b40c22fcc6b4067b37 Mon Sep 17 00:00:00 2001 From: lpm0073 Date: Wed, 29 Nov 2023 07:45:44 -0600 Subject: [PATCH 4/4] refactor: move VALID_MESSAGE_TYPES to exceptions.py --- grader/exceptions.py | 9 +++++++++ grader/grader.py | 10 +--------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/grader/exceptions.py b/grader/exceptions.py index 08d008c..ae44eb9 100644 --- a/grader/exceptions.py +++ b/grader/exceptions.py @@ -54,3 +54,12 @@ class ResponseFailedError(AGException): def __init__(self, message): self.message = message super().__init__(self.message, penalty_pct=AGRubric.RESPONSE_FAILED_PENALTY_PCT) + + +VALID_MESSAGE_TYPES = [ + "Success", + IncorrectResponseTypeError.__name__, + IncorrectResponseValueError.__name__, + InvalidResponseStructureError.__name__, + ResponseFailedError.__name__, +] diff --git a/grader/grader.py b/grader/grader.py index b1e9cb6..88965ca 100644 --- a/grader/grader.py +++ b/grader/grader.py @@ -6,6 +6,7 @@ from pydantic import BaseModel, Field, ValidationError, field_validator, model_validator from .exceptions import ( + VALID_MESSAGE_TYPES, AGException, IncorrectResponseTypeError, IncorrectResponseValueError, @@ -15,15 +16,6 @@ from .langchain import LCResponse -VALID_MESSAGE_TYPES = [ - "Success", - IncorrectResponseTypeError.__name__, - IncorrectResponseValueError.__name__, - InvalidResponseStructureError.__name__, - ResponseFailedError.__name__, -] - - class Grade(BaseModel): """ This is the base class for all Grader types. It provides the common interface and