From 25eace806d85cfe8f1ba086760ce93da12a3c12c Mon Sep 17 00:00:00 2001 From: Arash Maymandi <27716912+am357@users.noreply.github.com> Date: Tue, 11 Apr 2023 14:02:25 -0700 Subject: [PATCH] Model experimental `Schema` (#1016) Adds an experimental model for PartiQL Schema; as part of working on this PR the following spec issues created as pre-requisite for finalizing the SchemaType: - Define PartiQL Type semantics: https://github.com/partiql/partiql-spec/issues/49 - Create PartiQLSchema specification: https://github.com/partiql/partiql-docs/issues/37 With this change we can model the `SQL` and `NoSQL` schemas on collections with collection constraints. In addition, with this model, we're able to represent other schemas like nested, array, scalar, and possibly Graph schemas as collection of Graph types: ``` // Example of nested schema <<{'a': INT, 'b': [{'x': DECIMAL, 'y': DECIMAL}]>> // Example of Scalar schema <> // Example of Array schema <<[INT, DECIMAL]>> ``` As StaticType currently does not model the Orderedness of Bag and List as constraints, leaving the Orderedness to the BAG and LIST types themselves. Why the experimental model is a collection? We model the Schema (E.g. an SQL Table`) as a collection to solve the orthogonality problem that SQL has with Table (as entities that define the shape of data) which implicitly defines them as a collection. --- CHANGELOG.md | 3 + .../lang/planner/transforms/plan/PlanTyper.kt | 13 +++- .../PartiQLSchemaInferencerTests.kt | 55 +++++++++----- .../kotlin/org/partiql/types/StaticType.kt | 75 +++++++++++++++++-- .../org/partiql/types/StaticTypeTest.kt | 58 ++++++++++++++ .../test/resources/junit-platform.properties | 3 + .../plugins/mockdb/LocalConnectorObject.kt | 4 +- .../mockdb/LocalConnectorMetadataTests.kt | 7 +- 8 files changed, 189 insertions(+), 29 deletions(-) create mode 100644 partiql-types/src/test/kotlin/org/partiql/types/StaticTypeTest.kt create mode 100644 partiql-types/src/test/resources/junit-platform.properties diff --git a/CHANGELOG.md b/CHANGELOG.md index cec4d9650..f77f18d9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 but brings it in conformance with the specification. - Added `partiql-plan` package which contains experimental PartiQL Plan data structures. - Initializes SPI Framework under `partiql-spi`. +- Models experimental `Schema` with constraints. + With this change, we're introducing `Tuple` and `Collection` constraints to be able to model the shape of data as + constraints. - Introduces the PartiQLSchemaInferencer and PlannerSession - The PlannerSession describes the current session and is used by the PartiQLSchemaInferencer. - The PartiQLSchemaInferencer provides a function, `infer`, to aid in inferring the output `StaticType` of a diff --git a/partiql-lang/src/main/kotlin/org/partiql/lang/planner/transforms/plan/PlanTyper.kt b/partiql-lang/src/main/kotlin/org/partiql/lang/planner/transforms/plan/PlanTyper.kt index 0353a405f..211b0f1ee 100644 --- a/partiql-lang/src/main/kotlin/org/partiql/lang/planner/transforms/plan/PlanTyper.kt +++ b/partiql-lang/src/main/kotlin/org/partiql/lang/planner/transforms/plan/PlanTyper.kt @@ -66,6 +66,7 @@ import org.partiql.types.StaticType import org.partiql.types.StringType import org.partiql.types.StructType import org.partiql.types.SymbolType +import org.partiql.types.TupleConstraint /** * Types a given logical plan. @@ -438,7 +439,8 @@ internal object PlanTyper : PlanRewriter() { fields = input.getTypeEnv().associate { attribute -> attribute.name to attribute.type }, - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ) @@ -648,7 +650,14 @@ internal object PlanTyper : PlanRewriter() { TODO("Duplicate keys in struct is not yet handled") } - return node.copy(type = StructType(structFields.toMap(), contentClosed = closedContent), fields = fields) + return node.copy( + type = StructType( + structFields.toMap(), + contentClosed = closedContent, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) + ), + fields = fields + ) } override fun visitArgValue(node: Arg.Value, ctx: Context): PlanNode { diff --git a/partiql-lang/src/test/kotlin/org/partiql/lang/planner/transforms/PartiQLSchemaInferencerTests.kt b/partiql-lang/src/test/kotlin/org/partiql/lang/planner/transforms/PartiQLSchemaInferencerTests.kt index 8adb12d64..c61640836 100644 --- a/partiql-lang/src/test/kotlin/org/partiql/lang/planner/transforms/PartiQLSchemaInferencerTests.kt +++ b/partiql-lang/src/test/kotlin/org/partiql/lang/planner/transforms/PartiQLSchemaInferencerTests.kt @@ -24,6 +24,7 @@ import org.partiql.types.ListType import org.partiql.types.StaticType import org.partiql.types.StaticType.Companion.unionOf import org.partiql.types.StructType +import org.partiql.types.TupleConstraint import java.net.URL import java.time.Instant import java.util.stream.Stream @@ -58,23 +59,30 @@ class PartiQLSchemaInferencerTests { "id" to TYPE_AWS_DDB_PETS_ID, "breed" to TYPE_AWS_DDB_PETS_BREED ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) val TABLE_AWS_DDB_B = BagType( StructType( fields = mapOf("identifier" to StaticType.STRING), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) val TABLE_AWS_B_B = BagType( StructType( fields = mapOf("identifier" to StaticType.INT), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) val TYPE_B_B_B_B_B = StaticType.INT - private val TYPE_B_B_B_B = StructType(mapOf("b" to TYPE_B_B_B_B_B), contentClosed = true) + private val TYPE_B_B_B_B = StructType( + mapOf("b" to TYPE_B_B_B_B_B), + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) + ) val TYPE_B_B_B_C = StaticType.INT val TYPE_B_B_C = StaticType.INT val TYPE_B_B_B = @@ -83,7 +91,8 @@ class PartiQLSchemaInferencerTests { "b" to TYPE_B_B_B_B, "c" to TYPE_B_B_B_C ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) } @@ -127,7 +136,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("pets" to StaticType.ANY), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ), problemHandler = assertProblemExists { @@ -144,7 +154,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("pets" to StaticType.ANY), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ), problemHandler = assertProblemExists { @@ -195,7 +206,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("pets" to StaticType.ANY), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ), problemHandler = assertProblemExists { @@ -539,7 +551,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("unknown_col" to AnyType()), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ), problemHandler = assertProblemExists { @@ -618,7 +631,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("cast_breed" to unionOf(StaticType.INT, StaticType.MISSING)), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -630,7 +644,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("upper_breed" to StaticType.STRING), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -640,7 +655,8 @@ class PartiQLSchemaInferencerTests { expected = BagType( StructType( fields = mapOf("a" to ListType(unionOf(StaticType.INT, StaticType.DECIMAL))), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -672,7 +688,8 @@ class PartiQLSchemaInferencerTests { "a" to StaticType.INT, "b" to StaticType.DECIMAL, ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -685,7 +702,8 @@ class PartiQLSchemaInferencerTests { "a" to StaticType.INT, "b" to StaticType.DECIMAL, ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -698,7 +716,8 @@ class PartiQLSchemaInferencerTests { "b" to StaticType.DECIMAL, "a" to StaticType.INT, ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -713,7 +732,8 @@ class PartiQLSchemaInferencerTests { "s" to StaticType.INT, "m" to StaticType.INT, ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), @@ -728,7 +748,8 @@ class PartiQLSchemaInferencerTests { "s" to StaticType.DECIMAL, "m" to StaticType.DECIMAL, ), - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) ) ), diff --git a/partiql-types/src/main/kotlin/org/partiql/types/StaticType.kt b/partiql-types/src/main/kotlin/org/partiql/types/StaticType.kt index 403217252..71a9f1a36 100644 --- a/partiql-types/src/main/kotlin/org/partiql/types/StaticType.kt +++ b/partiql-types/src/main/kotlin/org/partiql/types/StaticType.kt @@ -81,7 +81,7 @@ sealed class StaticType { LIST, SEXP, STRUCT, - BAG + BAG, ) } @@ -209,8 +209,20 @@ class UnsupportedTypeCheckException(message: String) : RuntimeException(message) */ sealed class CollectionType : SingleType() { abstract val elementType: StaticType + abstract val constraints: Set + + internal fun validateCollectionConstraints() { + if (elementType !is StructType && constraints.any { it is TupleCollectionConstraint }) { + throw UnsupportedTypeConstraint("Only collection of tuples can have tuple constraints") + } + } } +/** + * Exception thrown when a [StaticType] is initialized with an unsupported type constraint. + */ +class UnsupportedTypeConstraint(message: String) : Exception(message) + // Single types from ExprValueType. /** @@ -396,8 +408,13 @@ data class ClobType(override val metas: Map = mapOf()) : SingleType */ data class ListType( override val elementType: StaticType = ANY, - override val metas: Map = mapOf() + override val metas: Map = mapOf(), + override val constraints: Set = setOf() ) : CollectionType() { + + init { + validateCollectionConstraints() + } override fun flatten(): StaticType = this override val allTypes: List @@ -411,8 +428,12 @@ data class ListType( */ data class SexpType( override val elementType: StaticType = ANY, - override val metas: Map = mapOf() + override val metas: Map = mapOf(), + override val constraints: Set = setOf(), ) : CollectionType() { + init { + validateCollectionConstraints() + } override fun flatten(): StaticType = this override val allTypes: List @@ -426,8 +447,12 @@ data class SexpType( */ data class BagType( override val elementType: StaticType = ANY, - override val metas: Map = mapOf() + override val metas: Map = mapOf(), + override val constraints: Set = setOf(), ) : CollectionType() { + init { + this.validateCollectionConstraints() + } override fun flatten(): StaticType = this override val allTypes: List @@ -438,9 +463,18 @@ data class BagType( data class StructType( val fields: Map = mapOf(), + // `TupleConstraint` already has `Open` constraint which overlaps with `contentClosed`. + // In addition, `primaryKeyFields` must not exist on the `StructType` as `PrimaryKey` + // is a property of collection of tuples. As we have plans to define PartiQL types in + // more details it's foreseeable to have an refactor of our types in future and have a + // new definition of this type as `Tuple`. See the following issue for more details: + // https://github.com/partiql/partiql-spec/issues/49 + // TODO remove `contentClosed` and `primaryKeyFields` if after finalizing our type specification we're + // still going with `StructType`. val contentClosed: Boolean = false, val primaryKeyFields: List = listOf(), - override val metas: Map = mapOf() + val constraints: Set = setOf(), + override val metas: Map = mapOf(), ) : SingleType() { override fun flatten(): StaticType = this @@ -451,8 +485,8 @@ data class StructType( val entries = fields.entries val firstSeveral = entries.toList().take(3).joinToString { "${it.key}: ${it.value}" } return when { - entries.size <= 3 -> "struct($firstSeveral)" - else -> "struct($firstSeveral, ... and ${entries.size - 3} other field(s))" + entries.size <= 3 -> "struct($firstSeveral, $constraints)" + else -> "struct($firstSeveral, ... and ${entries.size - 3} other field(s), $constraints)" } } } @@ -518,6 +552,33 @@ sealed class NumberConstraint { } } +/** + * Represents Tuple constraints; this is still experimental. + * and subject to change upon finalization of the following: + * - https://github.com/partiql/partiql-spec/issues/49 + * - https://github.com/partiql/partiql-docs/issues/37 + */ +sealed class TupleConstraint { + data class UniqueAttrs(val value: Boolean) : TupleConstraint() + data class Open(val value: Boolean) : TupleConstraint() +} + +/** + * An Interface for constraints that are only applicable to collection of tuples, e.g. `PrimaryKey`. + */ +interface TupleCollectionConstraint + +/** + * Represents Collection constraints; this is still experimental. + * and subject to change upon finalization of the following: + * - https://github.com/partiql/partiql-spec/issues/49 + * - https://github.com/partiql/partiql-docs/issues/37 + */ +sealed class CollectionConstraint { + data class PrimaryKey(val keys: Set) : TupleCollectionConstraint, CollectionConstraint() + data class PartitionKey(val keys: Set) : TupleCollectionConstraint, CollectionConstraint() +} + internal fun StaticType.isNullOrMissing(): Boolean = (this is NullType || this is MissingType) internal fun StaticType.isNumeric(): Boolean = (this is IntType || this is FloatType || this is DecimalType) internal fun StaticType.isText(): Boolean = (this is SymbolType || this is StringType) diff --git a/partiql-types/src/test/kotlin/org/partiql/types/StaticTypeTest.kt b/partiql-types/src/test/kotlin/org/partiql/types/StaticTypeTest.kt new file mode 100644 index 000000000..83c689fb7 --- /dev/null +++ b/partiql-types/src/test/kotlin/org/partiql/types/StaticTypeTest.kt @@ -0,0 +1,58 @@ +package org.partiql.types + +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.assertThrows +import kotlin.test.assertEquals + +class StaticTypeTest { + @Test + fun collectionWithConstraintInitTest() { + val struct = StructType( + fields = mapOf( + "a" to StaticType.STRING, + ), + constraints = + setOf( + TupleConstraint.Open(false), + TupleConstraint.UniqueAttrs(false), + ) + ) + val constraint = setOf(CollectionConstraint.PrimaryKey(setOf("a"))) + val types = listOf( + BagType( + elementType = struct, + metas = mapOf(), + constraints = constraint + ), + ListType( + elementType = struct, + metas = mapOf(), + constraints = constraint + ), + SexpType( + elementType = struct, + metas = mapOf(), + constraints = constraint + ) + ) + + types.forEach { + assertEquals(it.elementType.toString(), "struct(a: string, [Open(value=false), UniqueAttrs(value=false)])") + } + + assertEquals( + BagType(elementType = StaticType.INT, metas = mapOf(), constraints = setOf()).toString(), + "bag(int)" + ) + + assertThrows { + BagType( + StaticType.INT, + metas = mapOf(), + constraints = setOf( + CollectionConstraint.PrimaryKey(setOf("a")) + ) + ) + } + } +} diff --git a/partiql-types/src/test/resources/junit-platform.properties b/partiql-types/src/test/resources/junit-platform.properties new file mode 100644 index 000000000..ad19ea833 --- /dev/null +++ b/partiql-types/src/test/resources/junit-platform.properties @@ -0,0 +1,3 @@ +junit.jupiter.execution.parallel.enabled = true +junit.jupiter.execution.parallel.mode.default = concurrent +junit.jupiter.execution.parallel.mode.classes.default = concurrent \ No newline at end of file diff --git a/plugins/partiql-mockdb/src/main/kotlin/org/partiql/plugins/mockdb/LocalConnectorObject.kt b/plugins/partiql-mockdb/src/main/kotlin/org/partiql/plugins/mockdb/LocalConnectorObject.kt index 491cc5f33..03472cc54 100644 --- a/plugins/partiql-mockdb/src/main/kotlin/org/partiql/plugins/mockdb/LocalConnectorObject.kt +++ b/plugins/partiql-mockdb/src/main/kotlin/org/partiql/plugins/mockdb/LocalConnectorObject.kt @@ -22,6 +22,7 @@ import org.partiql.types.NumberConstraint import org.partiql.types.StaticType import org.partiql.types.StringType import org.partiql.types.StructType +import org.partiql.types.TupleConstraint /** * This mock implementation of [ConnectorObject] is used to parse the [schema] into a [StaticType]. Currently, @@ -98,7 +99,8 @@ internal class LocalConnectorObject( fields = this.attributes.associate { it.getName() to it.getValueDesc() }, - contentClosed = true + contentClosed = true, + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)), ) private fun LocalSchema.TableSchema.getDesc(): StaticType { diff --git a/plugins/partiql-mockdb/src/test/kotlin/org/partiql/plugins/mockdb/LocalConnectorMetadataTests.kt b/plugins/partiql-mockdb/src/test/kotlin/org/partiql/plugins/mockdb/LocalConnectorMetadataTests.kt index 3d68fe6f7..c75be8cdb 100644 --- a/plugins/partiql-mockdb/src/test/kotlin/org/partiql/plugins/mockdb/LocalConnectorMetadataTests.kt +++ b/plugins/partiql-mockdb/src/test/kotlin/org/partiql/plugins/mockdb/LocalConnectorMetadataTests.kt @@ -10,6 +10,7 @@ import org.partiql.types.BagType import org.partiql.types.IntType import org.partiql.types.StaticType import org.partiql.types.StructType +import org.partiql.types.TupleConstraint import java.nio.file.Paths import kotlin.test.assertEquals @@ -73,9 +74,11 @@ class LocalConnectorMetadataTests { contentClosed = true, fields = mapOf( "nested_id" to IntType() - ) + ), + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) - ) + ), + constraints = setOf(TupleConstraint.Open(false), TupleConstraint.UniqueAttrs(true)) ) // Act