diff --git a/documentation/functions.md b/documentation/functions.md index 2b681eec..b4adcae5 100644 --- a/documentation/functions.md +++ b/documentation/functions.md @@ -266,16 +266,16 @@ For instance, `JOIN_STRINGS("; ", "Alpha", "Beta", "Gamma)` returns `"Alpha; Bet ### LPAD -The `LPAD` function pads an expression on the left side with a specified padding character and returns the padded expression till the desired length. It takes three arguments: +The `LPAD` function pads an expression on the left side with a specified padding character until it is the desired length. It takes three arguments: -1. The input expression to pad. -2. The desired final length of the expression. -3. The single character to use for padding. +1. The input string to pad. +2. The desired final length of the expression. It should be a positive integer literal. +3. The single character literal to use for padding. The function behaves as follows: - If the input expression is shorter than the desired length, it adds padding characters on the left until reaching the desired length. -- If the input expression is longer than the desired length, it truncates the expression from the left to match the desired length. +- If the input expression is longer than the desired length, it truncates the expression by removing characters from the right side until it matches the desired length. - If the desired length is 0, it returns an empty string. - If the desired length is negative, it raises an error. - If the padding argument is not a single character, it raises an error. @@ -284,19 +284,28 @@ The function behaves as follows: Customers(left_padded_name = LPAD(name, 30, "*")) ``` +For demonstration purposes here are examples on how it pads on string literals. Note that the first argument cannot be a string literal and this is only for demonstration purposes: +| Input | Output | +|-------|--------| +| `LPAD("123", 6, "0")` | `"000123"` | +| `LPAD("123", 5, "#")` | `"##123"` | +| `LPAD("123", 3, "0")` | `"123"` | +| `LPAD("123", 2, "0")` | `"12"` | +| `LPAD("123", 0, "0")` | `""` | + ### RPAD -The `RPAD` function pads an expression on the right side with a specified padding character and returns the padded expression till the desired length. It takes three arguments: +The `RPAD` function pads an expression on the right side with a specified padding character until it is the desired length. It takes three arguments: -1. The input expression to pad. -2. The desired final length of the expression. -3. The single character to use for padding. +1. The input string to pad. +2. The desired final length of the expression. It should be a positive integer literal. +3. The single character literal to use for padding. The function behaves as follows: - If the input expression is shorter than the desired length, it adds padding characters on the right until reaching the desired length. -- If the input expression is longer than the desired length, it truncates the expression from the left to match the desired length. +- If the input expression is longer than the desired length, it truncates the expression by removing characters from the right side until it matches the desired length. - If the desired length is 0, it returns an empty string - If the desired length is negative, it raises an error - If the padding argument is not a single character, it raises an error @@ -305,6 +314,15 @@ The function behaves as follows: Customers(right_padded_name = RPAD(name, 30, "*")) ``` +For demonstration purposes here are examples on how it pads on string literals. Please note the first argument cannot be a string literal and this is only for demonstration purposes: +| Input | Output | +|-------|--------| +| `RPAD("123", 6, "0")` | `"123000"` | +| `RPAD("123", 5, "#")` | `"123##"` | +| `RPAD("123", 3, "0")` | `"123"` | +| `RPAD("123", 2, "0")` | `"12"` | +| `RPAD("123", 0, "0")` | `""` | + ## Datetime Functions diff --git a/pydough/sqlglot/transform_bindings.py b/pydough/sqlglot/transform_bindings.py index c14476a3..70d84ab5 100644 --- a/pydough/sqlglot/transform_bindings.py +++ b/pydough/sqlglot/transform_bindings.py @@ -729,15 +729,13 @@ def convert_contains( return convert_like(None, [column, pattern]) -def convert_lpad( +def pad_helper( raw_args: Sequence[RelationalExpression] | None, sql_glot_args: Sequence[SQLGlotExpression], + pad_func: str, ) -> SQLGlotExpression: """ - Converts and pads the string to the left till the string is the specified length. - If length is 0, return an empty string. - If length is negative, raise an error. - If length is positive, pad the string on the left to the specified length. + Helper function for LPAD and RPAD. Expects sqlglot_args[0] to be the column to pad. Expects sqlglot_args[1] and sqlglot_args[2] to be literals. Expects sqlglot_args[1] to be the returned length of the padded string. @@ -748,45 +746,80 @@ def convert_lpad( SQLGlot expressions. (Not actively used in this implementation.) `sql_glot_args`: The operands passed to the function after they were converted to SQLGlot expressions. The first operand is expected to be a string. + `pad_func`: The name of the padding function to use. Returns: - The SQLGlot expression matching the functionality of - `LPAD(string, length, padding)`. With the caveat that if length is 0, - it will return an empty string. + A tuple of sqlglot expressions for the column to pad, the length of the column, + the required length, padding string and the integer literal of the required length. """ + assert pad_func in ["LPAD", "RPAD"] assert len(sql_glot_args) == 3 if ( - not isinstance(sql_glot_args[1], sqlglot_expressions.Literal) - or sql_glot_args[1].is_string + isinstance(sql_glot_args[1], sqlglot_expressions.Literal) + and not sql_glot_args[1].is_string ): - raise ValueError("LPAD function requires the length argument to be an integer.") + try: + required_len = int(sql_glot_args[1].this) + if required_len < 0: + raise ValueError() + except ValueError: + raise ValueError( + f"{pad_func} function requires the length argument to be a non-negative integer literal." + ) + else: + raise ValueError( + f"{pad_func} function requires the length argument to be a non-negative integer literal." + ) if ( not isinstance(sql_glot_args[2], sqlglot_expressions.Literal) or not sql_glot_args[2].is_string ): - raise ValueError("LPAD function requires the padding argument to be a string.") + raise ValueError( + f"{pad_func} function requires the padding argument to be a string literal of length 1." + ) if len(str(sql_glot_args[2].this)) != 1: raise ValueError( - "LPAD function requires the padding argument to be of length 1." + f"{pad_func} function requires the padding argument to be a string literal of length 1." ) - try: - required_len = int(sql_glot_args[1].this) - except ValueError: - raise ValueError("LPAD function requires the length argument to be an integer.") - if required_len < 0: - raise ValueError("LPAD function requires a non-negative length.") - if required_len == 0: - return sqlglot_expressions.convert("") - col_glot = sql_glot_args[0] col_len_glot = sqlglot_expressions.Length(this=sql_glot_args[0]) required_len_glot = sqlglot_expressions.convert(required_len) pad_string_glot = sqlglot_expressions.convert( str(sql_glot_args[2].this) * required_len ) + return col_glot, col_len_glot, required_len_glot, pad_string_glot, required_len + + +def convert_lpad( + raw_args: Sequence[RelationalExpression] | None, + sql_glot_args: Sequence[SQLGlotExpression], +) -> SQLGlotExpression: + """ + Converts and pads the string to the left till the string is the specified length. + If length is 0, return an empty string. + If length is negative, raise an error. + If length is positive, pad the string on the left to the specified length. + + Args: + `raw_args`: The operands passed to the function before they were converted to + SQLGlot expressions. (Not actively used in this implementation.) + `sql_glot_args`: The operands passed to the function after they were converted + to SQLGlot expressions. The first operand is expected to be a string. + + Returns: + The SQLGlot expression matching the functionality of + `LPAD(string, length, padding)`. With the caveat that if length is 0, + it will return an empty string. + """ + col_glot, col_len_glot, required_len_glot, pad_string_glot, required_len = ( + pad_helper(raw_args, sql_glot_args, "LPAD") + ) + if required_len == 0: + return sqlglot_expressions.convert("") + answer = convert_iff_case( None, [ @@ -819,10 +852,6 @@ def convert_rpad( If length is 0, return an empty string. If length is negative, raise an error. If length is positive, pad the string on the right to the specified length. - Expects sqlglot_args[0] to be the column to pad. - Expects sqlglot_args[1] and sqlglot_args[2] to be literals. - Expects sqlglot_args[1] to be the returned length of the padded string. - Expects sqlglot_args[2] to be the string to pad with. Args: `raw_args`: The operands passed to the function before they were converted to @@ -835,38 +864,12 @@ def convert_rpad( `RPAD(string, length, padding)`. With the caveat that if length is 0, it will return an empty string. """ - assert len(sql_glot_args) == 3 - - if ( - not isinstance(sql_glot_args[1], sqlglot_expressions.Literal) - or sql_glot_args[1].is_string - ): - raise ValueError("RPAD function requires the length argument to be an integer.") - - if ( - not isinstance(sql_glot_args[2], sqlglot_expressions.Literal) - or not sql_glot_args[2].is_string - ): - raise ValueError("RPAD function requires the padding argument to be a string") - if len(str(sql_glot_args[2].this)) != 1: - raise ValueError( - "RPAD function requires the padding argument to be of length 1." - ) - - try: - required_len = int(sql_glot_args[1].this) - except ValueError: - raise ValueError("RPAD function requires the length argument to be an integer.") - if required_len < 0: - raise ValueError("RPAD function requires a non-negative length") + col_glot, _, required_len_glot, pad_string_glot, required_len = pad_helper( + raw_args, sql_glot_args, "RPAD" + ) if required_len == 0: return sqlglot_expressions.convert("") - col_glot = sql_glot_args[0] - required_len_glot = sqlglot_expressions.convert(required_len) - pad_string_glot = sqlglot_expressions.convert( - str(sql_glot_args[2].this) * required_len - ) answer = sqlglot_expressions.Substring( this=convert_concat(None, [col_glot, pad_string_glot]), start=sqlglot_expressions.convert(1), diff --git a/tests/bad_pydough_functions.py b/tests/bad_pydough_functions.py index a42332bf..7443d197 100644 --- a/tests/bad_pydough_functions.py +++ b/tests/bad_pydough_functions.py @@ -108,6 +108,16 @@ def bad_lpad_6(): return Customers(padded_name=LPAD(name, datetime.datetime.now(), "*")) +def bad_lpad_7(): + # Non-literal length + return Customers(padded_name=LPAD(name, LENGTH(phone), "*")) + + +def bad_lpad_8(): + # Non-literal padding string + return Customers(padded_name=LPAD(name, 20, LENGTH(phone))) + + def bad_rpad_1(): # String length argument return Customers(padded_name=RPAD(name, "20", "*")) @@ -138,6 +148,16 @@ def bad_rpad_6(): return Customers(padded_name=RPAD(name, datetime.datetime.now(), "*")) +def bad_rpad_7(): + # Non-literal length + return Customers(padded_name=RPAD(name, LENGTH(phone), "*")) + + +def bad_rpad_8(): + # Non-literal padding string + return Customers(padded_name=RPAD(name, 20, LENGTH(phone))) + + def bad_floor(): # Using `math.floor` (calls __floor__) return Customer(age=math.floor(order.total_price)) diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index 81f1ccaf..a763c102 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -13,12 +13,16 @@ bad_lpad_4, bad_lpad_5, bad_lpad_6, + bad_lpad_7, + bad_lpad_8, bad_rpad_1, bad_rpad_2, bad_rpad_3, bad_rpad_4, bad_rpad_5, bad_rpad_6, + bad_rpad_7, + bad_rpad_8, bad_slice_1, bad_slice_2, bad_slice_3, @@ -127,7 +131,7 @@ impl_tpch_q22, ) -from pydough import init_pydough_context, to_df +from pydough import init_pydough_context, to_df, to_sql from pydough.configs import PyDoughConfigs from pydough.conversion.relational_converter import convert_ast_to_relational from pydough.database_connectors import DatabaseContext @@ -1628,8 +1632,8 @@ def test_pipeline_e2e_errors( "David Ki", "Emily Da", ], - zero_pad_right=["", "", "", "", ""], - zero_pad_left=["", "", "", "", ""], + zero_pad_right=[""] * 5, + zero_pad_left=[""] * 5, right_padded_space=lambda x: x.original_name.apply( lambda s: (s + " " * 30)[:30] ), @@ -1674,74 +1678,110 @@ def test_defog_e2e_with_custom_data( pd.testing.assert_frame_equal(result, answer_impl()) -@pytest.mark.execute @pytest.mark.parametrize( - "impl, error_msg", + "impl, graph_name, error_msg", [ pytest.param( bad_lpad_1, - "LPAD function requires the length argument to be an integer.", + "Broker", + "LPAD function requires the length argument to be a non-negative integer literal.", id="bad_lpad_1", ), pytest.param( bad_lpad_2, - "LPAD function requires the padding argument to be of length 1.", + "Broker", + "LPAD function requires the padding argument to be a string literal of length 1.", id="bad_lpad_2", ), pytest.param( bad_lpad_3, - "LPAD function requires a non-negative length", + "Broker", + "LPAD function requires the length argument to be a non-negative integer literal.", id="bad_lpad_3", ), pytest.param( bad_lpad_4, - "LPAD function requires the padding argument to be of length 1", + "Broker", + "LPAD function requires the padding argument to be a string literal of length 1.", id="bad_lpad_4", ), pytest.param( bad_lpad_5, - "LPAD function requires the length argument to be an integer.", + "Broker", + "LPAD function requires the length argument to be a non-negative integer literal.", id="bad_lpad_5", ), pytest.param( bad_lpad_6, - "LPAD function requires the length argument to be an integer.", + "Broker", + "LPAD function requires the length argument to be a non-negative integer literal.", id="bad_lpad_6", ), + pytest.param( + bad_lpad_7, + "Broker", + "LPAD function requires the length argument to be a non-negative integer literal.", + id="bad_lpad_7", + ), + pytest.param( + bad_lpad_8, + "Broker", + "LPAD function requires the padding argument to be a string literal of length 1.", + id="bad_lpad_8", + ), pytest.param( bad_rpad_1, - "RPAD function requires the length argument to be an integer.", + "Broker", + "RPAD function requires the length argument to be a non-negative integer literal.", id="bad_rpad_1", ), pytest.param( bad_rpad_2, - "RPAD function requires the padding argument to be of length 1.", + "Broker", + "RPAD function requires the padding argument to be a string literal of length 1.", id="bad_rpad_2", ), pytest.param( bad_rpad_3, - "RPAD function requires a non-negative length", + "Broker", + "RPAD function requires the length argument to be a non-negative integer literal.", id="bad_rpad_3", ), pytest.param( bad_rpad_4, - "RPAD function requires the padding argument to be of length 1", + "Broker", + "RPAD function requires the padding argument to be a string literal of length 1.", id="bad_rpad_4", ), pytest.param( bad_rpad_5, - "RPAD function requires the length argument to be an integer.", + "Broker", + "RPAD function requires the length argument to be a non-negative integer literal.", id="bad_rpad_5", ), pytest.param( bad_rpad_6, - "RPAD function requires the length argument to be an integer.", + "Broker", + "RPAD function requires the length argument to be a non-negative integer literal.", id="bad_rpad_6", ), + pytest.param( + bad_rpad_7, + "Broker", + "RPAD function requires the length argument to be a non-negative integer literal.", + id="bad_rpad_7", + ), + pytest.param( + bad_rpad_8, + "Broker", + "RPAD function requires the padding argument to be a string literal of length 1.", + id="bad_rpad_8", + ), ], ) def test_defog_e2e_errors( impl: Callable[[], UnqualifiedNode], + graph_name: str, error_msg: str, defog_graphs: graph_fetcher, sqlite_defog_connection: DatabaseContext, @@ -1750,7 +1790,7 @@ def test_defog_e2e_errors( Tests running bad PyDough code through the entire pipeline to verify that a certain error is raised for defog database. """ - graph: GraphMetadata = defog_graphs("Broker") + graph: GraphMetadata = defog_graphs(graph_name) with pytest.raises(Exception, match=error_msg): root: UnqualifiedNode = init_pydough_context(graph)(impl)() - to_df(root, metadata=graph, database=sqlite_defog_connection) + to_sql(root, metadata=graph, database=sqlite_defog_connection)