diff --git a/python/datafusion/__init__.py b/python/datafusion/__init__.py index 60d0d61b4..15ceefbdb 100644 --- a/python/datafusion/__init__.py +++ b/python/datafusion/__init__.py @@ -26,6 +26,8 @@ except ImportError: import importlib_metadata +from datafusion.col import col, column + from . import functions, object_store, substrait, unparser # The following imports are okay to remain as opaque to the user. @@ -95,16 +97,6 @@ ] -def column(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - -def col(value: str) -> Expr: - """Create a column expression.""" - return Expr.column(value) - - def literal(value) -> Expr: """Create a literal expression.""" return Expr.literal(value) diff --git a/python/datafusion/col.py b/python/datafusion/col.py new file mode 100644 index 000000000..1141dc092 --- /dev/null +++ b/python/datafusion/col.py @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Col class.""" + +from datafusion.expr import Expr + + +class Col: + """Create a column expression. + + This helper class allows an extra syntax of creating columns using the __getattr__ + method. + """ + + def __call__(self, value: str) -> Expr: + """Create a column expression.""" + return Expr.column(value) + + def __getattr__(self, value: str) -> Expr: + """Create a column using attribute syntax.""" + # For autocomplete to work with IPython + if value.startswith("__wrapped__"): + return getattr(type(self), value) + + return Expr.column(value) + + +col: Col = Col() +column: Col = Col() +__all__ = ["col", "column"] diff --git a/python/tests/test_expr.py b/python/tests/test_expr.py index dcf75f021..3651b60d6 100644 --- a/python/tests/test_expr.py +++ b/python/tests/test_expr.py @@ -249,6 +249,29 @@ def test_fill_null(df): assert result.column(2) == pa.array([1234, 1234, 8]) +def test_col_getattr(): + ctx = SessionContext() + data = { + "array_values": [[1, 2, 3], [4, 5], [6], []], + "struct_values": [ + {"name": "Alice", "age": 15}, + {"name": "Bob", "age": 14}, + {"name": "Charlie", "age": 13}, + {"name": None, "age": 12}, + ], + } + df = ctx.from_pydict(data, name="table1") + + names = df.select(col.struct_values["name"].alias("name")).collect() + names = [r.as_py() for rs in names for r in rs["name"]] + + array_values = df.select(col.array_values[1].alias("value")).collect() + array_values = [r.as_py() for rs in array_values for r in rs["value"]] + + assert names == ["Alice", "Bob", "Charlie", None] + assert array_values == [2, 5, None, None] + + def test_alias_with_metadata(df): df = df.select(col("a").alias("b", {"key": "value"})) assert df.schema().field("b").metadata == {b"key": b"value"}