⚡️ Speed up function _format_plain_text by 5%
#612
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 5% (0.05x) speedup for
_format_plain_textinmarimo/_server/ai/prompts.py⏱️ Runtime :
32.1 microseconds→30.5 microseconds(best of250runs)📝 Explanation and details
The optimization improves performance by 5% through two key changes:
1. Smarter whitespace detection:
plain_text.strip()withnot plain_text or plain_text.isspace()strip()creates a new string object and scans the entire string twice (once forward, once backward)isspace()only scans once and doesn't allocate new memorynot plain_textcheck handles empty strings immediately without any method calls2. More efficient string concatenation:
++operator is faster than f-string interpolation overheadPerformance benefits vary by input type:
isspace()still needs to scan the full string, but this is the uncommon caseThe optimization is most effective for the common cases of empty strings and strings with surrounding whitespace, while maintaining identical behavior. The slight regression on large whitespace-only strings is acceptable given the overall performance gain and the fact that such inputs are likely rare in typical AI prompt formatting scenarios.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
from future import annotations
imports
import pytest # used for our unit tests
from marimo._server.ai.prompts import _format_plain_text
unit tests
-------- Basic Test Cases --------
def test_basic_nonempty_string():
# Basic non-empty string should be formatted correctly
input_text = "This is some context."
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"This is some context.")
codeflash_output = _format_plain_text(input_text) # 565ns -> 553ns (2.17% faster)
def test_basic_with_leading_and_trailing_spaces():
# Leading/trailing spaces should not affect output except for stripping for empty detection
input_text = " Some context here. "
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
" Some context here. ")
codeflash_output = _format_plain_text(input_text) # 699ns -> 601ns (16.3% faster)
def test_basic_with_newlines_inside():
# Newlines inside the text should be preserved
input_text = "Line one.\nLine two."
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Line one.\nLine two.")
codeflash_output = _format_plain_text(input_text) # 535ns -> 574ns (6.79% slower)
def test_basic_with_special_characters():
# Special characters should be preserved
input_text = "Context with symbols: @#$%^&()"
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Context with symbols: @#$%^&()")
codeflash_output = _format_plain_text(input_text) # 536ns -> 540ns (0.741% slower)
-------- Edge Test Cases --------
def test_edge_empty_string():
# Empty string should return empty string
codeflash_output = _format_plain_text("") # 404ns -> 300ns (34.7% faster)
def test_edge_whitespace_only_string():
# String with only whitespace should return empty string
codeflash_output = _format_plain_text(" ") # 498ns -> 550ns (9.45% slower)
codeflash_output = _format_plain_text("\n\t ") # 223ns -> 237ns (5.91% slower)
def test_edge_string_with_only_newlines():
# String with only newlines (or tabs) should return empty string
codeflash_output = _format_plain_text("\n\n") # 433ns -> 457ns (5.25% slower)
codeflash_output = _format_plain_text("\t\t") # 221ns -> 221ns (0.000% faster)
def test_edge_string_with_spaces_and_newlines():
# String with spaces and newlines only should return empty string
codeflash_output = _format_plain_text(" \n\t ") # 398ns -> 412ns (3.40% slower)
def test_edge_string_with_one_nonspace_character():
# String with one non-space character should be formatted
input_text = "a"
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"a")
codeflash_output = _format_plain_text(input_text) # 579ns -> 550ns (5.27% faster)
def test_edge_string_with_unicode_characters():
# Unicode should be preserved
input_text = "Café 😊 你好"
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Café 😊 你好")
codeflash_output = _format_plain_text(input_text) # 995ns -> 899ns (10.7% faster)
def test_edge_string_with_only_unicode_whitespace():
# Unicode whitespace (e.g. non-breaking space) should be treated as whitespace
input_text = "\u00A0" # non-breaking space
codeflash_output = _format_plain_text(input_text) # 499ns -> 493ns (1.22% faster)
def test_edge_string_with_mixed_whitespace_and_text():
# Mixed whitespace and text should be formatted
input_text = "\n Text\n"
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"\n Text\n")
codeflash_output = _format_plain_text(input_text) # 693ns -> 651ns (6.45% faster)
-------- Large Scale Test Cases --------
def test_large_scale_long_string():
# Very long string should be formatted correctly
input_text = "a" * 1000
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ "a" * 1000)
codeflash_output = _format_plain_text(input_text) # 812ns -> 799ns (1.63% faster)
def test_large_scale_long_string_with_whitespace():
# Long string with leading/trailing whitespace should be formatted
input_text = " " * 100 + "context" + " " * 100
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ " " * 100 + "context" + " " * 100)
codeflash_output = _format_plain_text(input_text) # 809ns -> 639ns (26.6% faster)
def test_large_scale_multiline_string():
# Multiline string with 1000 lines
input_text = "\n".join([f"Line {i}" for i in range(1000)])
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ input_text)
codeflash_output = _format_plain_text(input_text) # 980ns -> 889ns (10.2% faster)
def test_large_scale_all_whitespace():
# Large string with only whitespace should return empty string
input_text = " " * 1000
codeflash_output = _format_plain_text(input_text) # 1.01μs -> 1.05μs (3.99% slower)
def test_large_scale_mixed_empty_and_nonempty_lines():
# String with many empty lines and some non-empty lines
input_text = "\n" * 500 + "context" + "\n" * 499
expected = ("If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ "\n" * 500 + "context" + "\n" * 499)
codeflash_output = _format_plain_text(input_text) # 1.30μs -> 1.07μs (22.3% faster)
-------- Determinism Test --------
def test_determinism():
# Multiple calls with same input should produce same output
input_text = "Deterministic context"
codeflash_output = _format_plain_text(input_text); result1 = codeflash_output # 545ns -> 560ns (2.68% slower)
codeflash_output = _format_plain_text(input_text); result2 = codeflash_output # 301ns -> 292ns (3.08% faster)
-------- Test for prompt string presence --------
def test_output_contains_prompt_string_when_nonempty():
# The output must contain the prompt string if input is nonempty
input_text = "Some context"
codeflash_output = _format_plain_text(input_text); result = codeflash_output # 533ns -> 517ns (3.09% faster)
def test_output_is_exactly_empty_when_input_is_empty():
# Output should be exactly empty string for empty input
codeflash_output = _format_plain_text("") # 404ns -> 259ns (56.0% faster)
codeflash_output = _format_plain_text(" ") # 330ns -> 455ns (27.5% slower)
codeflash_output = _format_plain_text("\n") # 169ns -> 190ns (11.1% slower)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import pytest # used for our unit tests
from marimo._server.ai.prompts import _format_plain_text
unit tests
1. Basic Test Cases
def test_basic_nonempty_string():
# Basic: A normal, non-empty string should be formatted correctly.
input_text = "This is a test context."
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"This is a test context."
)
codeflash_output = _format_plain_text(input_text) # 571ns -> 568ns (0.528% faster)
def test_basic_leading_trailing_whitespace():
# Basic: Leading/trailing whitespace should not affect output (should not be stripped from context).
input_text = " This is a test context. "
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
" This is a test context. "
)
codeflash_output = _format_plain_text(input_text) # 712ns -> 647ns (10.0% faster)
def test_basic_multiline_string():
# Basic: Multiline input should be preserved.
input_text = "Line 1.\nLine 2.\nLine 3."
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Line 1.\nLine 2.\nLine 3."
)
codeflash_output = _format_plain_text(input_text) # 581ns -> 556ns (4.50% faster)
def test_basic_unicode_characters():
# Basic: Unicode characters should be preserved.
input_text = "Café 漢字 😊"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Café 漢字 😊"
)
codeflash_output = _format_plain_text(input_text) # 1.00μs -> 891ns (12.2% faster)
2. Edge Test Cases
def test_edge_empty_string():
# Edge: Empty string should return empty string.
codeflash_output = _format_plain_text("") # 430ns -> 257ns (67.3% faster)
def test_edge_whitespace_only():
# Edge: String with only whitespace should return empty string.
codeflash_output = _format_plain_text(" ") # 514ns -> 539ns (4.64% slower)
codeflash_output = _format_plain_text("\t\n\r") # 229ns -> 269ns (14.9% slower)
def test_edge_newlines_only():
# Edge: String with only newlines should return empty string.
codeflash_output = _format_plain_text("\n\n\n") # 420ns -> 449ns (6.46% slower)
def test_edge_tab_and_space_mix():
# Edge: String with tabs and spaces only should return empty string.
codeflash_output = _format_plain_text(" \t \t ") # 434ns -> 428ns (1.40% faster)
def test_edge_leading_trailing_newlines():
# Edge: Leading/trailing newlines should not be stripped from context.
input_text = "\n\nThis is context.\n\n"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"\n\nThis is context.\n\n"
)
codeflash_output = _format_plain_text(input_text) # 661ns -> 587ns (12.6% faster)
def test_edge_only_special_characters():
# Edge: Only special (non-whitespace) characters should be preserved.
input_text = "!@#$%^&()"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"!@#$%^&()"
)
codeflash_output = _format_plain_text(input_text) # 574ns -> 527ns (8.92% faster)
def test_edge_long_whitespace_then_text():
# Edge: Long whitespace before actual text should not affect output.
input_text = " Actual text"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
" Actual text"
)
codeflash_output = _format_plain_text(input_text) # 681ns -> 619ns (10.0% faster)
def test_edge_text_with_internal_whitespace():
# Edge: Internal whitespace should be preserved.
input_text = "Line1\n\n\nLine2"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Line1\n\n\nLine2"
)
codeflash_output = _format_plain_text(input_text) # 563ns -> 517ns (8.90% faster)
def test_edge_text_with_only_tab_characters():
# Edge: Only tab characters should return empty string.
codeflash_output = _format_plain_text("\t\t\t") # 488ns -> 492ns (0.813% slower)
def test_edge_text_with_mixed_whitespace_and_text():
# Edge: Mixed whitespace before and after text should be preserved.
input_text = " \n\t Hello world! \t\n "
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
" \n\t Hello world! \t\n "
)
codeflash_output = _format_plain_text(input_text) # 650ns -> 625ns (4.00% faster)
3. Large Scale Test Cases
def test_large_scale_long_string():
# Large: Very long string should be handled and preserved.
input_text = "a" * 1000
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ "a" * 1000
)
codeflash_output = _format_plain_text(input_text) # 861ns -> 773ns (11.4% faster)
def test_large_scale_multiline():
# Large: Multiline string with 1000 lines.
input_text = "\n".join([f"Line {i}" for i in range(1000)])
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ input_text
)
codeflash_output = _format_plain_text(input_text) # 985ns -> 939ns (4.90% faster)
def test_large_scale_mostly_whitespace_but_one_char():
# Large: 999 whitespace chars and 1 non-whitespace char.
input_text = " " * 999 + "X"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ " " * 999 + "X"
)
codeflash_output = _format_plain_text(input_text) # 1.40μs -> 1.35μs (2.95% faster)
def test_large_scale_all_whitespace():
# Large: 1000 whitespace characters should return empty string.
input_text = " \t\n" * 333 + " " # 999 + 1 = 1000 chars
codeflash_output = _format_plain_text(input_text) # 914ns -> 1.05μs (13.2% slower)
def test_large_scale_unicode():
# Large: 1000 unicode emoji characters.
input_text = "😊" * 1000
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
+ "😊" * 1000
)
codeflash_output = _format_plain_text(input_text) # 1.37μs -> 1.20μs (14.7% faster)
4. Additional Robustness Tests
def test_text_with_only_newline_and_tab_and_space():
# Edge: Input with only whitespace of various kinds should return empty string.
input_text = " \n\t\r " * 50
codeflash_output = _format_plain_text(input_text) # 517ns -> 652ns (20.7% slower)
def test_text_with_control_characters():
# Edge: Control characters (other than whitespace) should be preserved.
input_text = "abc\x00\x01\x02def"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"abc\x00\x01\x02def"
)
codeflash_output = _format_plain_text(input_text) # 559ns -> 577ns (3.12% slower)
def test_text_with_at_kind_url():
# Basic: Input that itself mentions @kind://name should be preserved.
input_text = "Reference: @kind://name"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"Reference: @kind://name"
)
codeflash_output = _format_plain_text(input_text) # 545ns -> 551ns (1.09% slower)
def test_text_with_formatting_characters():
# Edge: Input with formatting characters (e.g., form feed, vertical tab) should be preserved.
input_text = "foo\fbar\vqux"
expected = (
"If the prompt mentions @kind://name, use the following context to help you answer the question:\n\n"
"foo\fbar\vqux"
)
codeflash_output = _format_plain_text(input_text) # 546ns -> 495ns (10.3% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from marimo._server.ai.prompts import _format_plain_text
def test__format_plain_text():
_format_plain_text('ᚁ、')
def test__format_plain_text_2():
_format_plain_text('')
🔎 Concolic Coverage Tests and Runtime
codeflash_concolic_bps3n5s8/tmphevgxa28/test_concolic_coverage.py::test__format_plain_textcodeflash_concolic_bps3n5s8/tmphevgxa28/test_concolic_coverage.py::test__format_plain_text_2To edit these changes
git checkout codeflash/optimize-_format_plain_text-mhvj24u7and push.