diff --git a/documentation/functions.md b/documentation/functions.md
index 2b681eec..b4adcae5 100644
--- a/documentation/functions.md
+++ b/documentation/functions.md
@@ -266,16 +266,16 @@ For instance, `JOIN_STRINGS("; ", "Alpha", "Beta", "Gamma)` returns `"Alpha; Bet
### LPAD
-The `LPAD` function pads an expression on the left side with a specified padding character and returns the padded expression till the desired length. It takes three arguments:
+The `LPAD` function pads an expression on the left side with a specified padding character until it is the desired length. It takes three arguments:
-1. The input expression to pad.
-2. The desired final length of the expression.
-3. The single character to use for padding.
+1. The input string to pad.
+2. The desired final length of the expression. It should be a positive integer literal.
+3. The single character literal to use for padding.
The function behaves as follows:
- If the input expression is shorter than the desired length, it adds padding characters on the left until reaching the desired length.
-- If the input expression is longer than the desired length, it truncates the expression from the left to match the desired length.
+- If the input expression is longer than the desired length, it truncates the expression by removing characters from the right side until it matches the desired length.
- If the desired length is 0, it returns an empty string.
- If the desired length is negative, it raises an error.
- If the padding argument is not a single character, it raises an error.
@@ -284,19 +284,28 @@ The function behaves as follows:
Customers(left_padded_name = LPAD(name, 30, "*"))
```
+For demonstration purposes here are examples on how it pads on string literals. Note that the first argument cannot be a string literal and this is only for demonstration purposes:
+| Input | Output |
+|-------|--------|
+| `LPAD("123", 6, "0")` | `"000123"` |
+| `LPAD("123", 5, "#")` | `"##123"` |
+| `LPAD("123", 3, "0")` | `"123"` |
+| `LPAD("123", 2, "0")` | `"12"` |
+| `LPAD("123", 0, "0")` | `""` |
+
### RPAD
-The `RPAD` function pads an expression on the right side with a specified padding character and returns the padded expression till the desired length. It takes three arguments:
+The `RPAD` function pads an expression on the right side with a specified padding character until it is the desired length. It takes three arguments:
-1. The input expression to pad.
-2. The desired final length of the expression.
-3. The single character to use for padding.
+1. The input string to pad.
+2. The desired final length of the expression. It should be a positive integer literal.
+3. The single character literal to use for padding.
The function behaves as follows:
- If the input expression is shorter than the desired length, it adds padding characters on the right until reaching the desired length.
-- If the input expression is longer than the desired length, it truncates the expression from the left to match the desired length.
+- If the input expression is longer than the desired length, it truncates the expression by removing characters from the right side until it matches the desired length.
- If the desired length is 0, it returns an empty string
- If the desired length is negative, it raises an error
- If the padding argument is not a single character, it raises an error
@@ -305,6 +314,15 @@ The function behaves as follows:
Customers(right_padded_name = RPAD(name, 30, "*"))
```
+For demonstration purposes here are examples on how it pads on string literals. Please note the first argument cannot be a string literal and this is only for demonstration purposes:
+| Input | Output |
+|-------|--------|
+| `RPAD("123", 6, "0")` | `"123000"` |
+| `RPAD("123", 5, "#")` | `"123##"` |
+| `RPAD("123", 3, "0")` | `"123"` |
+| `RPAD("123", 2, "0")` | `"12"` |
+| `RPAD("123", 0, "0")` | `""` |
+
## Datetime Functions
diff --git a/pydough/sqlglot/transform_bindings.py b/pydough/sqlglot/transform_bindings.py
index c14476a3..70d84ab5 100644
--- a/pydough/sqlglot/transform_bindings.py
+++ b/pydough/sqlglot/transform_bindings.py
@@ -729,15 +729,13 @@ def convert_contains(
return convert_like(None, [column, pattern])
-def convert_lpad(
+def pad_helper(
raw_args: Sequence[RelationalExpression] | None,
sql_glot_args: Sequence[SQLGlotExpression],
+ pad_func: str,
) -> SQLGlotExpression:
"""
- Converts and pads the string to the left till the string is the specified length.
- If length is 0, return an empty string.
- If length is negative, raise an error.
- If length is positive, pad the string on the left to the specified length.
+ Helper function for LPAD and RPAD.
Expects sqlglot_args[0] to be the column to pad.
Expects sqlglot_args[1] and sqlglot_args[2] to be literals.
Expects sqlglot_args[1] to be the returned length of the padded string.
@@ -748,45 +746,80 @@ def convert_lpad(
SQLGlot expressions. (Not actively used in this implementation.)
`sql_glot_args`: The operands passed to the function after they were converted
to SQLGlot expressions. The first operand is expected to be a string.
+ `pad_func`: The name of the padding function to use.
Returns:
- The SQLGlot expression matching the functionality of
- `LPAD(string, length, padding)`. With the caveat that if length is 0,
- it will return an empty string.
+ A tuple of sqlglot expressions for the column to pad, the length of the column,
+ the required length, padding string and the integer literal of the required length.
"""
+ assert pad_func in ["LPAD", "RPAD"]
assert len(sql_glot_args) == 3
if (
- not isinstance(sql_glot_args[1], sqlglot_expressions.Literal)
- or sql_glot_args[1].is_string
+ isinstance(sql_glot_args[1], sqlglot_expressions.Literal)
+ and not sql_glot_args[1].is_string
):
- raise ValueError("LPAD function requires the length argument to be an integer.")
+ try:
+ required_len = int(sql_glot_args[1].this)
+ if required_len < 0:
+ raise ValueError()
+ except ValueError:
+ raise ValueError(
+ f"{pad_func} function requires the length argument to be a non-negative integer literal."
+ )
+ else:
+ raise ValueError(
+ f"{pad_func} function requires the length argument to be a non-negative integer literal."
+ )
if (
not isinstance(sql_glot_args[2], sqlglot_expressions.Literal)
or not sql_glot_args[2].is_string
):
- raise ValueError("LPAD function requires the padding argument to be a string.")
+ raise ValueError(
+ f"{pad_func} function requires the padding argument to be a string literal of length 1."
+ )
if len(str(sql_glot_args[2].this)) != 1:
raise ValueError(
- "LPAD function requires the padding argument to be of length 1."
+ f"{pad_func} function requires the padding argument to be a string literal of length 1."
)
- try:
- required_len = int(sql_glot_args[1].this)
- except ValueError:
- raise ValueError("LPAD function requires the length argument to be an integer.")
- if required_len < 0:
- raise ValueError("LPAD function requires a non-negative length.")
- if required_len == 0:
- return sqlglot_expressions.convert("")
-
col_glot = sql_glot_args[0]
col_len_glot = sqlglot_expressions.Length(this=sql_glot_args[0])
required_len_glot = sqlglot_expressions.convert(required_len)
pad_string_glot = sqlglot_expressions.convert(
str(sql_glot_args[2].this) * required_len
)
+ return col_glot, col_len_glot, required_len_glot, pad_string_glot, required_len
+
+
+def convert_lpad(
+ raw_args: Sequence[RelationalExpression] | None,
+ sql_glot_args: Sequence[SQLGlotExpression],
+) -> SQLGlotExpression:
+ """
+ Converts and pads the string to the left till the string is the specified length.
+ If length is 0, return an empty string.
+ If length is negative, raise an error.
+ If length is positive, pad the string on the left to the specified length.
+
+ Args:
+ `raw_args`: The operands passed to the function before they were converted to
+ SQLGlot expressions. (Not actively used in this implementation.)
+ `sql_glot_args`: The operands passed to the function after they were converted
+ to SQLGlot expressions. The first operand is expected to be a string.
+
+ Returns:
+ The SQLGlot expression matching the functionality of
+ `LPAD(string, length, padding)`. With the caveat that if length is 0,
+ it will return an empty string.
+ """
+ col_glot, col_len_glot, required_len_glot, pad_string_glot, required_len = (
+ pad_helper(raw_args, sql_glot_args, "LPAD")
+ )
+ if required_len == 0:
+ return sqlglot_expressions.convert("")
+
answer = convert_iff_case(
None,
[
@@ -819,10 +852,6 @@ def convert_rpad(
If length is 0, return an empty string.
If length is negative, raise an error.
If length is positive, pad the string on the right to the specified length.
- Expects sqlglot_args[0] to be the column to pad.
- Expects sqlglot_args[1] and sqlglot_args[2] to be literals.
- Expects sqlglot_args[1] to be the returned length of the padded string.
- Expects sqlglot_args[2] to be the string to pad with.
Args:
`raw_args`: The operands passed to the function before they were converted to
@@ -835,38 +864,12 @@ def convert_rpad(
`RPAD(string, length, padding)`. With the caveat that if length is 0,
it will return an empty string.
"""
- assert len(sql_glot_args) == 3
-
- if (
- not isinstance(sql_glot_args[1], sqlglot_expressions.Literal)
- or sql_glot_args[1].is_string
- ):
- raise ValueError("RPAD function requires the length argument to be an integer.")
-
- if (
- not isinstance(sql_glot_args[2], sqlglot_expressions.Literal)
- or not sql_glot_args[2].is_string
- ):
- raise ValueError("RPAD function requires the padding argument to be a string")
- if len(str(sql_glot_args[2].this)) != 1:
- raise ValueError(
- "RPAD function requires the padding argument to be of length 1."
- )
-
- try:
- required_len = int(sql_glot_args[1].this)
- except ValueError:
- raise ValueError("RPAD function requires the length argument to be an integer.")
- if required_len < 0:
- raise ValueError("RPAD function requires a non-negative length")
+ col_glot, _, required_len_glot, pad_string_glot, required_len = pad_helper(
+ raw_args, sql_glot_args, "RPAD"
+ )
if required_len == 0:
return sqlglot_expressions.convert("")
- col_glot = sql_glot_args[0]
- required_len_glot = sqlglot_expressions.convert(required_len)
- pad_string_glot = sqlglot_expressions.convert(
- str(sql_glot_args[2].this) * required_len
- )
answer = sqlglot_expressions.Substring(
this=convert_concat(None, [col_glot, pad_string_glot]),
start=sqlglot_expressions.convert(1),
diff --git a/tests/bad_pydough_functions.py b/tests/bad_pydough_functions.py
index a42332bf..7443d197 100644
--- a/tests/bad_pydough_functions.py
+++ b/tests/bad_pydough_functions.py
@@ -108,6 +108,16 @@ def bad_lpad_6():
return Customers(padded_name=LPAD(name, datetime.datetime.now(), "*"))
+def bad_lpad_7():
+ # Non-literal length
+ return Customers(padded_name=LPAD(name, LENGTH(phone), "*"))
+
+
+def bad_lpad_8():
+ # Non-literal padding string
+ return Customers(padded_name=LPAD(name, 20, LENGTH(phone)))
+
+
def bad_rpad_1():
# String length argument
return Customers(padded_name=RPAD(name, "20", "*"))
@@ -138,6 +148,16 @@ def bad_rpad_6():
return Customers(padded_name=RPAD(name, datetime.datetime.now(), "*"))
+def bad_rpad_7():
+ # Non-literal length
+ return Customers(padded_name=RPAD(name, LENGTH(phone), "*"))
+
+
+def bad_rpad_8():
+ # Non-literal padding string
+ return Customers(padded_name=RPAD(name, 20, LENGTH(phone)))
+
+
def bad_floor():
# Using `math.floor` (calls __floor__)
return Customer(age=math.floor(order.total_price))
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 81f1ccaf..a763c102 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -13,12 +13,16 @@
bad_lpad_4,
bad_lpad_5,
bad_lpad_6,
+ bad_lpad_7,
+ bad_lpad_8,
bad_rpad_1,
bad_rpad_2,
bad_rpad_3,
bad_rpad_4,
bad_rpad_5,
bad_rpad_6,
+ bad_rpad_7,
+ bad_rpad_8,
bad_slice_1,
bad_slice_2,
bad_slice_3,
@@ -127,7 +131,7 @@
impl_tpch_q22,
)
-from pydough import init_pydough_context, to_df
+from pydough import init_pydough_context, to_df, to_sql
from pydough.configs import PyDoughConfigs
from pydough.conversion.relational_converter import convert_ast_to_relational
from pydough.database_connectors import DatabaseContext
@@ -1628,8 +1632,8 @@ def test_pipeline_e2e_errors(
"David Ki",
"Emily Da",
],
- zero_pad_right=["", "", "", "", ""],
- zero_pad_left=["", "", "", "", ""],
+ zero_pad_right=[""] * 5,
+ zero_pad_left=[""] * 5,
right_padded_space=lambda x: x.original_name.apply(
lambda s: (s + " " * 30)[:30]
),
@@ -1674,74 +1678,110 @@ def test_defog_e2e_with_custom_data(
pd.testing.assert_frame_equal(result, answer_impl())
-@pytest.mark.execute
@pytest.mark.parametrize(
- "impl, error_msg",
+ "impl, graph_name, error_msg",
[
pytest.param(
bad_lpad_1,
- "LPAD function requires the length argument to be an integer.",
+ "Broker",
+ "LPAD function requires the length argument to be a non-negative integer literal.",
id="bad_lpad_1",
),
pytest.param(
bad_lpad_2,
- "LPAD function requires the padding argument to be of length 1.",
+ "Broker",
+ "LPAD function requires the padding argument to be a string literal of length 1.",
id="bad_lpad_2",
),
pytest.param(
bad_lpad_3,
- "LPAD function requires a non-negative length",
+ "Broker",
+ "LPAD function requires the length argument to be a non-negative integer literal.",
id="bad_lpad_3",
),
pytest.param(
bad_lpad_4,
- "LPAD function requires the padding argument to be of length 1",
+ "Broker",
+ "LPAD function requires the padding argument to be a string literal of length 1.",
id="bad_lpad_4",
),
pytest.param(
bad_lpad_5,
- "LPAD function requires the length argument to be an integer.",
+ "Broker",
+ "LPAD function requires the length argument to be a non-negative integer literal.",
id="bad_lpad_5",
),
pytest.param(
bad_lpad_6,
- "LPAD function requires the length argument to be an integer.",
+ "Broker",
+ "LPAD function requires the length argument to be a non-negative integer literal.",
id="bad_lpad_6",
),
+ pytest.param(
+ bad_lpad_7,
+ "Broker",
+ "LPAD function requires the length argument to be a non-negative integer literal.",
+ id="bad_lpad_7",
+ ),
+ pytest.param(
+ bad_lpad_8,
+ "Broker",
+ "LPAD function requires the padding argument to be a string literal of length 1.",
+ id="bad_lpad_8",
+ ),
pytest.param(
bad_rpad_1,
- "RPAD function requires the length argument to be an integer.",
+ "Broker",
+ "RPAD function requires the length argument to be a non-negative integer literal.",
id="bad_rpad_1",
),
pytest.param(
bad_rpad_2,
- "RPAD function requires the padding argument to be of length 1.",
+ "Broker",
+ "RPAD function requires the padding argument to be a string literal of length 1.",
id="bad_rpad_2",
),
pytest.param(
bad_rpad_3,
- "RPAD function requires a non-negative length",
+ "Broker",
+ "RPAD function requires the length argument to be a non-negative integer literal.",
id="bad_rpad_3",
),
pytest.param(
bad_rpad_4,
- "RPAD function requires the padding argument to be of length 1",
+ "Broker",
+ "RPAD function requires the padding argument to be a string literal of length 1.",
id="bad_rpad_4",
),
pytest.param(
bad_rpad_5,
- "RPAD function requires the length argument to be an integer.",
+ "Broker",
+ "RPAD function requires the length argument to be a non-negative integer literal.",
id="bad_rpad_5",
),
pytest.param(
bad_rpad_6,
- "RPAD function requires the length argument to be an integer.",
+ "Broker",
+ "RPAD function requires the length argument to be a non-negative integer literal.",
id="bad_rpad_6",
),
+ pytest.param(
+ bad_rpad_7,
+ "Broker",
+ "RPAD function requires the length argument to be a non-negative integer literal.",
+ id="bad_rpad_7",
+ ),
+ pytest.param(
+ bad_rpad_8,
+ "Broker",
+ "RPAD function requires the padding argument to be a string literal of length 1.",
+ id="bad_rpad_8",
+ ),
],
)
def test_defog_e2e_errors(
impl: Callable[[], UnqualifiedNode],
+ graph_name: str,
error_msg: str,
defog_graphs: graph_fetcher,
sqlite_defog_connection: DatabaseContext,
@@ -1750,7 +1790,7 @@ def test_defog_e2e_errors(
Tests running bad PyDough code through the entire pipeline to verify that
a certain error is raised for defog database.
"""
- graph: GraphMetadata = defog_graphs("Broker")
+ graph: GraphMetadata = defog_graphs(graph_name)
with pytest.raises(Exception, match=error_msg):
root: UnqualifiedNode = init_pydough_context(graph)(impl)()
- to_df(root, metadata=graph, database=sqlite_defog_connection)
+ to_sql(root, metadata=graph, database=sqlite_defog_connection)