Skip to content

Commit

Permalink
Update API
Browse files Browse the repository at this point in the history
  • Loading branch information
zhipengmao-db committed Jul 22, 2024
1 parent 6978bca commit 6866bbb
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 41 deletions.
76 changes: 44 additions & 32 deletions python/delta/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1062,17 +1062,14 @@ def __getNotMatchedBySourceBuilder(


@dataclass
class IdentityColumnSpec:
class IdentityGenerator:
"""
:param generatedAlwaysAsIdentity: whether the column is generated always as an identity
column or if users can provide values.
:type generatedAlwaysAsIdentity: bool
Identity generator specifications for the identity column in the Delta table.
:param start: the start for the identity column. Default is 1.
:type start: int
:param step: the step for the identity column. Default is 1.
:type step: int
"""
generatedAlways: bool
start: int = 1
step: int = 1

Expand Down Expand Up @@ -1181,8 +1178,8 @@ def addColumn(
colName: str,
dataType: Union[str, DataType],
nullable: bool = True,
generatedAlwaysAs: Optional[str] = None,
identityColumnSpec: Optional[IdentityColumnSpec] = None,
generatedAlwaysAs: Optional[Union[str, IdentityGenerator]] = None,
generatedByDefaultAs: Optional[IdentityGenerator] = None,
comment: Optional[str] = None,
) -> "DeltaTableBuilder":
"""
Expand All @@ -1195,12 +1192,15 @@ def addColumn(
:param nullable: whether column is nullable
:type nullable: bool
:param generatedAlwaysAs: a SQL expression if the column is always generated
as a function of other columns.
as a function of other columns;
an IdentityGenerator object if the column is always
generated using identity generator
See online documentation for details on Generated Columns.
:type generatedAlwaysAs: str
:param identityColumnSpec: specification to generate an identity column
:type generatedAlwaysAs: str or delta.tables.IdentityGenerator
:param generatedByDefaultAs: an IdentityGenerator object to generate identity values
if the user does not provide values for the column
See online documentation for details on Generated Columns.
:type identityColumnSpec: IdentityColumnSpec
:type generatedByDefaultAs: delta.tables.IdentityGenerator
:param comment: the column comment
:type comment: str
Expand All @@ -1224,39 +1224,51 @@ def addColumn(
if type(nullable) is not bool:
self._raise_type_error("Column nullable must be bool.", [nullable])
_col_jbuilder = _col_jbuilder.nullable(nullable)

if generatedAlwaysAs is not None and generatedByDefaultAs is not None:
raise ValueError(
"generatedByDefaultAs cannot be set with generatedAlwaysAs.",
[generatedByDefaultAs, generatedAlwaysAs]
)
if generatedAlwaysAs is not None:
if identityColumnSpec is not None:
raise ValueError(
"identityColumnSpec cannot be set with generatedAlwaysAs.",
[identityColumnSpec, generatedAlwaysAs],
if type(generatedAlwaysAs) is str:
_col_jbuilder = _col_jbuilder.generatedAlwaysAs(generatedAlwaysAs)
elif type(generatedAlwaysAs) is IdentityGenerator:
if dataType != LongType():
self._raise_type_error(
"Column identity generation requires the column to be integer.",
[dataType],
)
if generatedAlwaysAs.step == 0:
raise ValueError(
"Column identity generation requires step to be non-zero."
)
_col_jbuilder = _col_jbuilder.generatedAlwaysAsIdentity(
generatedAlwaysAs.start, generatedAlwaysAs.step
)

if type(generatedAlwaysAs) is not str:
else:
self._raise_type_error(
"Column generation expression must be str.", [generatedAlwaysAs]
"Column generation expression must be str or IdentityGenerator.",
[generatedAlwaysAs]
)
elif generatedByDefaultAs is not None:
if type(generatedByDefaultAs) is not IdentityGenerator:
self._raise_type_error(
"Column generation by default expression must be IdentityGenerator.",
[generatedByDefaultAs]
)
_col_jbuilder = _col_jbuilder.generatedAlwaysAs(generatedAlwaysAs)
if identityColumnSpec is not None:
if dataType != LongType():
self._raise_type_error(
"Column identity generation requires the column to be integer.",
[dataType],
)
if identityColumnSpec.step == 0:
if generatedByDefaultAs.step == 0:
raise ValueError(
"Column identity generation requires step to be non-zero."
)

_id_col_start = identityColumnSpec.start
_id_col_step = identityColumnSpec.step
if identityColumnSpec.generatedAlwaysAsIdentity:
_col_jbuilder = _col_jbuilder.generatedAlwaysAsIdentity(
_id_col_start, _id_col_step
)
else:
_col_jbuilder = _col_jbuilder.generatedByDefaultAsIdentity(
_id_col_start, _id_col_step
)
_col_jbuilder = _col_jbuilder.generatedByDefaultAsIdentity(
generatedByDefaultAs.start, generatedByDefaultAs.step
)

if comment is not None:
if type(comment) is not str:
Expand Down
48 changes: 39 additions & 9 deletions python/delta/tests/test_deltatable.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from pyspark.sql.types import StructType, StructField, StringType, IntegerType, LongType, DataType
from pyspark.sql.utils import AnalysisException, ParseException

from delta.tables import DeltaTable, DeltaTableBuilder, DeltaOptimizeBuilder, IdentityColumnSpec
from delta.tables import DeltaTable, DeltaTableBuilder, DeltaOptimizeBuilder, IdentityGenerator
from delta.testing.utils import DeltaTestCase


Expand Down Expand Up @@ -977,23 +977,53 @@ def test_delta_table_builder_with_bad_args(self) -> None:
with self.assertRaises(TypeError):
builder.addColumn("a", "int", generatedAlwaysAs=1) # type: ignore[arg-type]

# bad identityColumnSpec - can't be set with generatedAlwaysAs
with self.assertRaises(ValueError):
# bad generatedAlwaysAs - column data type must be Long
with self.assertRaises(TypeError):
builder.addColumn(
"a",
"int",
generatedAlwaysAs=1,
identityColumnSpec=IdentityColumnSpec(generatedAlways=True),
generatedAlwaysAs=IdentityGenerator()
) # type: ignore[arg-type]

# bad generatedAlwaysAs - step can't be 0
with self.assertRaises(ValueError):
builder.addColumn(
"a",
LongType,
generatedAlwaysAs=IdentityGenerator(step=0)
) # type: ignore[arg-type]

# bad identityColumnSpec - identityStep can't be 0
# bad generatedByDefaultAs - can't be set with generatedAlwaysAs
with self.assertRaises(ValueError):
builder.addColumn(
"a",
LongType,
generatedAlwaysAs="",
generatedByDefaultAs=IdentityGenerator()
) # type: ignore[arg-type]

# bad generatedByDefaultAs - argument type must be IdentityGenerator
with self.assertRaises(TypeError):
builder.addColumn(
"a",
LongType,
generatedByDefaultAs=""
) # type: ignore[arg-type]

# bad generatedByDefaultAs - column data type must be Long
with self.assertRaises(TypeError):
builder.addColumn(
"a",
"int",
identityColumnSpec=IdentityColumnSpec(
generatedAlways=True, step=0
)
generatedByDefaultAs=IdentityGenerator()
) # type: ignore[arg-type]

# bad generatedByDefaultAs - step can't be 0
with self.assertRaises(ValueError):
builder.addColumn(
"a",
LongType,
generatedByDefaultAs=IdentityGenerator(step=0)
) # type: ignore[arg-type]

# bad nullable
Expand Down

0 comments on commit 6866bbb

Please sign in to comment.