From a314351560f6814a32b6ac6bbe27b58e3ad2a67c Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 26 Oct 2025 04:47:25 +0500 Subject: [PATCH 1/5] chore: Output Parser Updated to Support Reasoning Model for Preplexity --- .../langchain_perplexity/chat_models.py | 10 +- .../langchain_perplexity/output_parsers.py | 92 +++++++++++++++++++ 2 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 libs/partners/perplexity/langchain_perplexity/output_parsers.py diff --git a/libs/partners/perplexity/langchain_perplexity/chat_models.py b/libs/partners/perplexity/langchain_perplexity/chat_models.py index 4321c05755225..fd8b716165675 100644 --- a/libs/partners/perplexity/langchain_perplexity/chat_models.py +++ b/libs/partners/perplexity/langchain_perplexity/chat_models.py @@ -33,7 +33,6 @@ UsageMetadata, subtract_usage, ) -from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.outputs import ChatGeneration, ChatGenerationChunk, ChatResult from langchain_core.runnables import Runnable, RunnableMap, RunnablePassthrough from langchain_core.utils import get_pydantic_field_names, secret_from_env @@ -42,6 +41,11 @@ from pydantic import BaseModel, ConfigDict, Field, SecretStr, model_validator from typing_extensions import Self +from langchain_perplexity.output_parsers import ( + ReasoningJsonOutputParser, + ReasoningStructuredOutputParser, +) + _DictOrPydanticClass: TypeAlias = dict[str, Any] | type[BaseModel] _DictOrPydantic: TypeAlias = dict | BaseModel @@ -510,9 +514,9 @@ def with_structured_output( }, ) output_parser = ( - PydanticOutputParser(pydantic_object=schema) # type: ignore[arg-type] + ReasoningStructuredOutputParser(pydantic_object=schema) # type: ignore[arg-type] if is_pydantic_schema - else JsonOutputParser() + else ReasoningJsonOutputParser() ) else: raise ValueError( diff --git a/libs/partners/perplexity/langchain_perplexity/output_parsers.py b/libs/partners/perplexity/langchain_perplexity/output_parsers.py new file mode 100644 index 0000000000000..324f4dbb86810 --- /dev/null +++ b/libs/partners/perplexity/langchain_perplexity/output_parsers.py @@ -0,0 +1,92 @@ +from typing import Any + +from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser +from langchain_core.outputs import Generation + + +def strip_think_tags(text: str) -> str: + """Removes ... tags from text. + + Args: + text: The input text that may contain think tags. + """ + + def remove_think_tags(text: str) -> str: + """Remove content between and tags more safely.""" + result = [] + i = 0 + while i < len(text): + # Look for opening tag + open_tag_pos = text.find("", i) + if open_tag_pos == -1: + # No more opening tags, add the rest and break + result.append(text[i:]) + break + + # Add text before the opening tag + result.append(text[i:open_tag_pos]) + + # Look for closing tag + close_tag_pos = text.find("", open_tag_pos + 7) + if close_tag_pos == -1: + # No closing tag found, treat opening tag as literal text + result.append("") + i = open_tag_pos + 7 + else: + # Skip the content between tags and move past closing tag + i = close_tag_pos + 8 # "" is 8 characters + + return "".join(result).strip() + + return remove_think_tags(text) + + +class ReasoningJsonOutputParser(JsonOutputParser): + """A JSON output parser that strips reasoning tags before parsing. + + This parser removes any content enclosed in tags from the input text + before delegating to the parent JsonOutputParser for JSON parsing. + + """ + + def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any: + """Parse the result of an LLM call to a JSON object. + + Args: + result: The result of the LLM call. + partial: Whether to parse partial JSON objects. + If `True`, the output will be a JSON object containing + all the keys that have been returned so far. + If `False`, the output will be the full JSON object. + + Returns: + The parsed JSON object. + + Raises: + OutputParserException: If the output is not valid JSON. + """ + text = result[0].text + text = strip_think_tags(text) + return super().parse_result([Generation(text=text)], partial=partial) + + +class ReasoningStructuredOutputParser(PydanticOutputParser): + """A structured output parser that strips reasoning tags before parsing. + + This parser removes any content enclosed in tags from the input text + before delegating to the parent PydanticOutputParser for structured parsing. + """ + + def parse_result(self, result: list[Generation], *, partial: bool = False) -> Any: + """Parse the result of an LLM call to a Pydantic object. + + Args: + result: The result of the LLM call. + partial: Whether to parse partial JSON objects. + If `True`, the output will be a JSON object containing + all the keys that have been returned so far. + If `False`, the output will be the full JSON object. + """ + text = result[0].text + text = strip_think_tags(text) + return super().parse_result([Generation(text=text)], partial=partial) From c93fafe62f8e9f97efba40b1784d1a85c3f0f4a1 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 26 Oct 2025 04:47:42 +0500 Subject: [PATCH 2/5] tests: Updated Tests --- .../tests/unit_tests/test_output_parsers.py | 332 ++++++++++++++++++ 1 file changed, 332 insertions(+) create mode 100644 libs/partners/perplexity/tests/unit_tests/test_output_parsers.py diff --git a/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py b/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py new file mode 100644 index 0000000000000..cd59ea2573d40 --- /dev/null +++ b/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py @@ -0,0 +1,332 @@ +"""Unit tests for output parsers.""" + +import pytest +from langchain_core.exceptions import OutputParserException +from langchain_core.outputs import Generation +from pydantic import BaseModel, Field + +from langchain_perplexity.output_parsers import ( + ReasoningJsonOutputParser, + ReasoningStructuredOutputParser, + strip_think_tags, +) + + +class TestStripThinkTags: + """Tests for the strip_think_tags function.""" + + def test_strip_simple_think_tags(self) -> None: + """Test stripping simple think tags.""" + text = "Hello some reasoning world" + result = strip_think_tags(text) + assert result == "Hello world" + + def test_strip_multiple_think_tags(self) -> None: + """Test stripping multiple think tags.""" + text = "first Hello second world\ + third" + result = strip_think_tags(text) + assert result == "Hello world" + + def test_strip_nested_like_think_tags(self) -> None: + """Test stripping think tags that might appear nested.""" + text = "outer inner still outer result" + result = strip_think_tags(text) + # The function removes from first to first + # then continues from after that + assert result == "still outer result" + + def test_strip_think_tags_no_closing_tag(self) -> None: + """Test handling of think tags without closing tag.""" + text = "Hello unclosed reasoning world" + result = strip_think_tags(text) + # Treats unclosed tag as literal text + assert result == "Hello unclosed reasoning world" + + def test_strip_think_tags_empty_content(self) -> None: + """Test stripping empty think tags.""" + text = "Hello world" + result = strip_think_tags(text) + assert result == "Hello world" + + def test_strip_think_tags_no_tags(self) -> None: + """Test text without any think tags.""" + text = "Hello world" + result = strip_think_tags(text) + assert result == "Hello world" + + def test_strip_think_tags_only_tags(self) -> None: + """Test text containing only think tags.""" + text = "reasoning" + result = strip_think_tags(text) + assert result == "" + + def test_strip_think_tags_multiline(self) -> None: + """Test stripping think tags across multiple lines.""" + text = """Hello + +reasoning line 1 +reasoning line 2 + +world""" + result = strip_think_tags(text) + assert result == "Hello\n\nworld" + + def test_strip_think_tags_with_special_chars(self) -> None: + """Test think tags containing special characters.""" + text = 'Before {"key": "value"} After' + result = strip_think_tags(text) + assert result == "Before After" + + +class TestReasoningJsonOutputParser: + """Tests for ReasoningJsonOutputParser.""" + + def test_parse_json_without_think_tags(self) -> None: + """Test parsing JSON without think tags.""" + parser = ReasoningJsonOutputParser() + text = '{"name": "John", "age": 30}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == {"name": "John", "age": 30} + + def test_parse_json_with_think_tags(self) -> None: + """Test parsing JSON with think tags.""" + parser = ReasoningJsonOutputParser() + text = 'Let me construct the JSON{"name": "John", "age": 30}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == {"name": "John", "age": 30} + + def test_parse_json_with_multiple_think_tags(self) -> None: + """Test parsing JSON with multiple think tags.""" + parser = ReasoningJsonOutputParser() + text = 'Step 1{"name": thinking"John", "age": 30}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == {"name": "John", "age": 30} + + def test_parse_markdown_json_with_think_tags(self) -> None: + """Test parsing markdown-wrapped JSON with think tags.""" + parser = ReasoningJsonOutputParser() + text = """Building response +```json +{"name": "John", "age": 30} +```""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == {"name": "John", "age": 30} + + def test_parse_complex_json_with_think_tags(self) -> None: + """Test parsing complex nested JSON with think tags.""" + parser = ReasoningJsonOutputParser() + text = """Creating nested structure +{ + "user": { + "name": "John", + "address": { + "city": "NYC", + "zip": "10001" + } + }, + "items": [1, 2, 3] +}""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == { + "user": {"name": "John", "address": {"city": "NYC", "zip": "10001"}}, + "items": [1, 2, 3], + } + + def test_parse_invalid_json_with_think_tags(self) -> None: + """Test that invalid JSON raises an exception even with think tags.""" + parser = ReasoningJsonOutputParser() + text = "This will fail{invalid json}" + generation = Generation(text=text) + with pytest.raises(OutputParserException): + parser.parse_result([generation]) + + def test_parse_empty_string_after_stripping(self) -> None: + """Test parsing when only think tags remain.""" + parser = ReasoningJsonOutputParser() + text = "Only reasoning, no output" + generation = Generation(text=text) + with pytest.raises(OutputParserException): + parser.parse_result([generation]) + + def test_parse_json_array_with_think_tags(self) -> None: + """Test parsing JSON array with think tags.""" + parser = ReasoningJsonOutputParser() + text = 'Creating array[{"id": 1}, {"id": 2}]' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert result == [{"id": 1}, {"id": 2}] + + def test_partial_json_parsing_with_think_tags(self) -> None: + """Test partial JSON parsing with think tags.""" + parser = ReasoningJsonOutputParser() + text = 'Starting{"name": "John", "age":' + generation = Generation(text=text) + # Partial parsing should handle incomplete JSON + result = parser.parse_result([generation], partial=True) + assert result == {"name": "John"} + + +class MockPerson(BaseModel): + """Mock Pydantic model for testing.""" + + name: str = Field(description="The person's name") + age: int = Field(description="The person's age") + email: str | None = Field(default=None, description="The person's email") + + +class MockCompany(BaseModel): + """Mock nested Pydantic model for testing.""" + + company_name: str = Field(description="Company name") + employees: list[MockPerson] = Field(description="List of employees") + founded_year: int = Field(description="Year founded") + + +class TestReasoningStructuredOutputParser: + """Tests for ReasoningStructuredOutputParser.""" + + def test_parse_structured_output_without_think_tags(self) -> None: + """Test parsing structured output without think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = '{"name": "John Doe", "age": 30, "email": "john@example.com"}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "John Doe" + assert result.age == 30 + assert result.email == "john@example.com" + + def test_parse_structured_output_with_think_tags(self) -> None: + """Test parsing structured output with think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = 'Let me create a person\ + object{"name": "John Doe", "age": 30}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "John Doe" + assert result.age == 30 + assert result.email is None + + def test_parse_structured_output_with_multiple_think_tags(self) -> None: + """Test parsing with multiple think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = """Step 1: Determine name +Step 2: Determine age +{"name": "Jane Smith", "age": 25}""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "Jane Smith" + assert result.age == 25 + + def test_parse_structured_output_markdown_with_think_tags(self) -> None: + """Test parsing markdown-wrapped structured output with think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = """Building person object +```json +{"name": "Alice Brown", "age": 35, "email": "alice@example.com"} +```""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "Alice Brown" + assert result.age == 35 + assert result.email == "alice@example.com" + + def test_parse_nested_structured_output_with_think_tags(self) -> None: + """Test parsing nested Pydantic models with think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockCompany) + text = """Creating company with employees +{ + "company_name": "Tech Corp", + "founded_year": 2020, + "employees": [ + {"name": "John", "age": 30}, + {"name": "Jane", "age": 28} + ] +}""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockCompany) + assert result.company_name == "Tech Corp" + assert result.founded_year == 2020 + assert len(result.employees) == 2 + assert result.employees[0].name == "John" + assert result.employees[1].name == "Jane" + + def test_parse_invalid_structured_output_with_think_tags(self) -> None: + """Test that invalid structured output raises exception.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + # Missing required field 'age' + text = 'Creating person{"name": "John"}' + generation = Generation(text=text) + with pytest.raises(OutputParserException): + parser.parse_result([generation]) + + def test_parse_structured_wrong_type_with_think_tags(self) -> None: + """Test that wrong types raise validation errors.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + # Age should be int, not string + text = 'Creating person{"name": "John", "age": "thirty"}' + generation = Generation(text=text) + with pytest.raises(OutputParserException): + parser.parse_result([generation]) + + def test_parse_empty_after_stripping_think_tags(self) -> None: + """Test handling when only think tags remain.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = "Only reasoning here" + generation = Generation(text=text) + with pytest.raises(OutputParserException): + parser.parse_result([generation]) + + def test_get_format_instructions(self) -> None: + """Test that format instructions work correctly.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + instructions = parser.get_format_instructions() + assert "MockPerson" in instructions or "name" in instructions + assert isinstance(instructions, str) + + def test_partial_structured_parsing_with_think_tags(self) -> None: + """Test partial parsing of structured output with think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = 'Starting{"name": "John", "age": 30' + generation = Generation(text=text) + # Partial parsing should handle incomplete JSON + result = parser.parse_result([generation], partial=True) + # With partial=True, it should return what it can parse + assert "name" in result or isinstance(result, MockPerson) + + def test_parser_with_think_tags_in_json_values(self) -> None: + """Test that think tags in JSON string values don't cause issues.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + # Think tags should be stripped before JSON parsing, so they won't be in values + text = 'reasoning{"name": "John ", "age": 30}' + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "John " + assert result.age == 30 + + def test_multiline_think_tags_with_structured_output(self) -> None: + """Test parsing structured output with multiline think tags.""" + parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + text = """ +Step 1: Consider the requirements +Step 2: Structure the data +Step 3: Format as JSON + +{"name": "Bob Wilson", "age": 40, "email": "bob@example.com"}""" + generation = Generation(text=text) + result = parser.parse_result([generation]) + assert isinstance(result, MockPerson) + assert result.name == "Bob Wilson" + assert result.age == 40 + assert result.email == "bob@example.com" From 6bbd7ff2ea6bf0b1b5a7f344a8692c0d857618ae Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Sun, 26 Oct 2025 05:01:22 +0500 Subject: [PATCH 3/5] chore: Output Parser Updated to Support Reasoning Model for Preplexity --- .../langchain_perplexity/output_parsers.py | 7 ++- .../tests/unit_tests/test_output_parsers.py | 48 ++++++++++++++----- libs/partners/perplexity/uv.lock | 2 +- 3 files changed, 42 insertions(+), 15 deletions(-) diff --git a/libs/partners/perplexity/langchain_perplexity/output_parsers.py b/libs/partners/perplexity/langchain_perplexity/output_parsers.py index 324f4dbb86810..28a08478c69c4 100644 --- a/libs/partners/perplexity/langchain_perplexity/output_parsers.py +++ b/libs/partners/perplexity/langchain_perplexity/output_parsers.py @@ -1,7 +1,8 @@ -from typing import Any +from typing import Any, Generic from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser from langchain_core.outputs import Generation +from langchain_core.utils.pydantic import TBaseModel def strip_think_tags(text: str) -> str: @@ -70,7 +71,9 @@ def parse_result(self, result: list[Generation], *, partial: bool = False) -> An return super().parse_result([Generation(text=text)], partial=partial) -class ReasoningStructuredOutputParser(PydanticOutputParser): +class ReasoningStructuredOutputParser( + PydanticOutputParser[TBaseModel], Generic[TBaseModel] +): """A structured output parser that strips reasoning tags before parsing. This parser removes any content enclosed in tags from the input text diff --git a/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py b/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py index cd59ea2573d40..369e028f82151 100644 --- a/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py +++ b/libs/partners/perplexity/tests/unit_tests/test_output_parsers.py @@ -193,7 +193,9 @@ class TestReasoningStructuredOutputParser: def test_parse_structured_output_without_think_tags(self) -> None: """Test parsing structured output without think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = '{"name": "John Doe", "age": 30, "email": "john@example.com"}' generation = Generation(text=text) result = parser.parse_result([generation]) @@ -204,7 +206,9 @@ def test_parse_structured_output_without_think_tags(self) -> None: def test_parse_structured_output_with_think_tags(self) -> None: """Test parsing structured output with think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = 'Let me create a person\ object{"name": "John Doe", "age": 30}' generation = Generation(text=text) @@ -216,7 +220,9 @@ def test_parse_structured_output_with_think_tags(self) -> None: def test_parse_structured_output_with_multiple_think_tags(self) -> None: """Test parsing with multiple think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = """Step 1: Determine name Step 2: Determine age {"name": "Jane Smith", "age": 25}""" @@ -228,7 +234,9 @@ def test_parse_structured_output_with_multiple_think_tags(self) -> None: def test_parse_structured_output_markdown_with_think_tags(self) -> None: """Test parsing markdown-wrapped structured output with think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = """Building person object ```json {"name": "Alice Brown", "age": 35, "email": "alice@example.com"} @@ -242,7 +250,9 @@ def test_parse_structured_output_markdown_with_think_tags(self) -> None: def test_parse_nested_structured_output_with_think_tags(self) -> None: """Test parsing nested Pydantic models with think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockCompany) + parser: ReasoningStructuredOutputParser[MockCompany] = ( + ReasoningStructuredOutputParser(pydantic_object=MockCompany) + ) text = """Creating company with employees { "company_name": "Tech Corp", @@ -263,7 +273,9 @@ def test_parse_nested_structured_output_with_think_tags(self) -> None: def test_parse_invalid_structured_output_with_think_tags(self) -> None: """Test that invalid structured output raises exception.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) # Missing required field 'age' text = 'Creating person{"name": "John"}' generation = Generation(text=text) @@ -272,7 +284,9 @@ def test_parse_invalid_structured_output_with_think_tags(self) -> None: def test_parse_structured_wrong_type_with_think_tags(self) -> None: """Test that wrong types raise validation errors.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) # Age should be int, not string text = 'Creating person{"name": "John", "age": "thirty"}' generation = Generation(text=text) @@ -281,7 +295,9 @@ def test_parse_structured_wrong_type_with_think_tags(self) -> None: def test_parse_empty_after_stripping_think_tags(self) -> None: """Test handling when only think tags remain.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = "Only reasoning here" generation = Generation(text=text) with pytest.raises(OutputParserException): @@ -289,14 +305,18 @@ def test_parse_empty_after_stripping_think_tags(self) -> None: def test_get_format_instructions(self) -> None: """Test that format instructions work correctly.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) instructions = parser.get_format_instructions() assert "MockPerson" in instructions or "name" in instructions assert isinstance(instructions, str) def test_partial_structured_parsing_with_think_tags(self) -> None: """Test partial parsing of structured output with think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = 'Starting{"name": "John", "age": 30' generation = Generation(text=text) # Partial parsing should handle incomplete JSON @@ -306,7 +326,9 @@ def test_partial_structured_parsing_with_think_tags(self) -> None: def test_parser_with_think_tags_in_json_values(self) -> None: """Test that think tags in JSON string values don't cause issues.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) # Think tags should be stripped before JSON parsing, so they won't be in values text = 'reasoning{"name": "John ", "age": 30}' generation = Generation(text=text) @@ -317,7 +339,9 @@ def test_parser_with_think_tags_in_json_values(self) -> None: def test_multiline_think_tags_with_structured_output(self) -> None: """Test parsing structured output with multiline think tags.""" - parser = ReasoningStructuredOutputParser(pydantic_object=MockPerson) + parser: ReasoningStructuredOutputParser[MockPerson] = ( + ReasoningStructuredOutputParser(pydantic_object=MockPerson) + ) text = """ Step 1: Consider the requirements Step 2: Structure the data diff --git a/libs/partners/perplexity/uv.lock b/libs/partners/perplexity/uv.lock index 2bc8de6b9c934..f1aee45497607 100644 --- a/libs/partners/perplexity/uv.lock +++ b/libs/partners/perplexity/uv.lock @@ -495,7 +495,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.0.0" +version = "1.0.1" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" }, From c6b1e7d76dbe4d37c3081288ffec7bc5fa6f4535 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Tue, 4 Nov 2025 23:55:00 +0500 Subject: [PATCH 4/5] chore: no loop in strip_think_tags --- .../langchain_perplexity/output_parsers.py | 51 ++++++++----------- 1 file changed, 22 insertions(+), 29 deletions(-) diff --git a/libs/partners/perplexity/langchain_perplexity/output_parsers.py b/libs/partners/perplexity/langchain_perplexity/output_parsers.py index 28a08478c69c4..917a8dc6e088d 100644 --- a/libs/partners/perplexity/langchain_perplexity/output_parsers.py +++ b/libs/partners/perplexity/langchain_perplexity/output_parsers.py @@ -1,3 +1,4 @@ +import re from typing import Any, Generic from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser @@ -6,40 +7,32 @@ def strip_think_tags(text: str) -> str: - """Removes ... tags from text. + """Removes all ... tags and their content from text. + + This function removes all occurrences of think tags, preserving text + before, between, and after the tags. It also handles markdown code fences. Args: text: The input text that may contain think tags. + + Returns: + The text with all `...` blocks removed. """ + # Remove all ... blocks using regex + # The pattern matches followed by any content (non-greedy) until + result = re.sub(r".*?", "", text, flags=re.DOTALL) + + # Remove markdown code fence markers if present + result = result.strip() + if result.startswith("```json"): + result = result[len("```json") :].strip() + elif result.startswith("```"): + result = result[3:].strip() + + if result.endswith("```"): + result = result[:-3].strip() - def remove_think_tags(text: str) -> str: - """Remove content between and tags more safely.""" - result = [] - i = 0 - while i < len(text): - # Look for opening tag - open_tag_pos = text.find("", i) - if open_tag_pos == -1: - # No more opening tags, add the rest and break - result.append(text[i:]) - break - - # Add text before the opening tag - result.append(text[i:open_tag_pos]) - - # Look for closing tag - close_tag_pos = text.find("", open_tag_pos + 7) - if close_tag_pos == -1: - # No closing tag found, treat opening tag as literal text - result.append("") - i = open_tag_pos + 7 - else: - # Skip the content between tags and move past closing tag - i = close_tag_pos + 8 # "" is 8 characters - - return "".join(result).strip() - - return remove_think_tags(text) + return result class ReasoningJsonOutputParser(JsonOutputParser): From ce2e3105da97c938a66a00a1a15be31f6de42e25 Mon Sep 17 00:00:00 2001 From: keenborder786 <21110290@lums.edu.pk> Date: Wed, 5 Nov 2025 00:00:47 +0500 Subject: [PATCH 5/5] chore: revert uv.lock --- libs/partners/perplexity/uv.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/partners/perplexity/uv.lock b/libs/partners/perplexity/uv.lock index f1aee45497607..2bc8de6b9c934 100644 --- a/libs/partners/perplexity/uv.lock +++ b/libs/partners/perplexity/uv.lock @@ -495,7 +495,7 @@ wheels = [ [[package]] name = "langchain-core" -version = "1.0.1" +version = "1.0.0" source = { editable = "../../core" } dependencies = [ { name = "jsonpatch" },