From f88641dfd494d9c08313d162f869e99dae89db8b Mon Sep 17 00:00:00 2001 From: James Gilles Date: Fri, 30 Aug 2024 17:05:22 -0400 Subject: [PATCH] Add schema::type_for_generate, update validation to use it, fixing a minor bug in the tests in the process Add utilities to ModuleDef for easier ues for codegen (#1678) Update crates/sats/src/proptest.rs Co-authored-by: Mazdak Farrokhzad Signed-off-by: james gilles Move constraints around for easier future ABI evolution WIP: Allow cyclic AlgebraicTypes. Final fixes, address review comments Final comments addressed & copy editing Remove outdated comment --- Cargo.lock | 1 + crates/lib/src/db/raw_def/v9.rs | 43 +- crates/sats/src/algebraic_type.rs | 16 + crates/sats/src/proptest.rs | 21 +- crates/sats/src/typespace.rs | 8 + crates/schema/Cargo.toml | 1 + .../type_for_generate.txt | 7 + crates/schema/src/def.rs | 201 +++- crates/schema/src/def/validate/v8.rs | 107 +- crates/schema/src/def/validate/v9.rs | 371 +++---- crates/schema/src/error.rs | 17 +- crates/schema/src/lib.rs | 1 + crates/schema/src/schema.rs | 35 +- crates/schema/src/type_for_generate.rs | 930 ++++++++++++++++++ crates/table/src/layout.rs | 44 +- 15 files changed, 1375 insertions(+), 428 deletions(-) create mode 100644 crates/schema/proptest-regressions/type_for_generate.txt create mode 100644 crates/schema/src/type_for_generate.rs diff --git a/Cargo.lock b/Cargo.lock index 09c2325748..052105a51e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4514,6 +4514,7 @@ dependencies = [ name = "spacetimedb-schema" version = "0.12.0" dependencies = [ + "enum-as-inner", "hashbrown 0.14.1", "itertools 0.12.0", "lazy_static", diff --git a/crates/lib/src/db/raw_def/v9.rs b/crates/lib/src/db/raw_def/v9.rs index 99ffcf05db..a2ad359424 100644 --- a/crates/lib/src/db/raw_def/v9.rs +++ b/crates/lib/src/db/raw_def/v9.rs @@ -120,7 +120,7 @@ pub struct RawTableDefV9 { pub indexes: Vec, /// Any unique constraints on the table. - pub unique_constraints: Vec, + pub constraints: Vec, /// The sequences for the table. pub sequences: Vec, @@ -296,6 +296,37 @@ pub struct RawScheduleDefV9 { pub reducer_name: RawIdentifier, } +/// A constraint definition attached to a table. +#[derive(Debug, Clone, SpacetimeType)] +#[sats(crate = crate)] +#[cfg_attr(feature = "test", derive(PartialEq, Eq, PartialOrd, Ord))] +pub struct RawConstraintDefV9 { + /// The name of the constraint. Must be unique within the containing `RawDatabaseDef`. + pub name: RawIdentifier, + + /// The data for the constraint. + pub data: RawConstraintDataV9, +} + +#[derive(Debug, Clone, SpacetimeType)] +#[sats(crate = crate)] +#[cfg_attr(feature = "test", derive(PartialEq, Eq, PartialOrd, Ord))] +#[non_exhaustive] +pub enum RawConstraintDataV9 { + Unique(RawUniqueConstraintDataV9), +} + +/// Requires that the projection of the table onto these `columns` is a bijection. +/// +/// That is, there must be a one-to-one relationship between a row and the `columns` of that row. +#[derive(Debug, Clone, SpacetimeType)] +#[sats(crate = crate)] +#[cfg_attr(feature = "test", derive(PartialEq, Eq, PartialOrd, Ord))] +pub struct RawUniqueConstraintDataV9 { + /// The columns that must be unique. + pub columns: ColList, +} + /// A miscellaneous module export. #[derive(Debug, Clone, SpacetimeType)] #[sats(crate = crate)] @@ -314,6 +345,7 @@ pub struct RawTypeDefV9 { pub name: RawScopedTypeNameV9, /// The type to which the declaration refers. + /// This must point to an `AlgebraicType::Product` or an `AlgebraicType::Sum` in the module's typespace. pub ty: AlgebraicTypeRef, /// Whether this type has a custom ordering. @@ -411,7 +443,7 @@ impl RawModuleDefV9Builder { name, product_type_ref, indexes: vec![], - unique_constraints: vec![], + constraints: vec![], sequences: vec![], schedule: None, primary_key: None, @@ -583,9 +615,10 @@ impl<'a> RawTableDefBuilder<'a> { /// Generates a [UniqueConstraintDef] using the supplied `columns`. pub fn with_unique_constraint(mut self, columns: ColList, name: Option) -> Self { let name = name.unwrap_or_else(|| self.generate_unique_constraint_name(&columns)); - self.table - .unique_constraints - .push(RawUniqueConstraintDefV9 { name, columns }); + self.table.constraints.push(RawConstraintDefV9 { + name, + data: RawConstraintDataV9::Unique(RawUniqueConstraintDataV9 { columns }), + }); self } diff --git a/crates/sats/src/algebraic_type.rs b/crates/sats/src/algebraic_type.rs index ec69c741d6..23d3450501 100644 --- a/crates/sats/src/algebraic_type.rs +++ b/crates/sats/src/algebraic_type.rs @@ -182,6 +182,22 @@ impl AlgebraicType { matches!(self, Self::Sum(p) if p.is_schedule_at()) } + /// Returns whether this type is a unit type. + pub fn is_unit(&self) -> bool { + matches!(self, Self::Product(p) if p.is_unit()) + } + + /// Returns whether this type is a never type. + pub fn is_never(&self) -> bool { + matches!(self, Self::Sum(p) if p.is_empty()) + } + + /// If this type is the standard option type, returns the type of the `some` variant. + /// Otherwise, returns `None`. + pub fn as_option(&self) -> Option<&AlgebraicType> { + self.as_sum().and_then(SumType::as_option) + } + /// Returns whether this type is scalar or a string type. pub fn is_scalar_or_string(&self) -> bool { self.is_scalar() || self.is_string() diff --git a/crates/sats/src/proptest.rs b/crates/sats/src/proptest.rs index 01616aa365..6d9558c00d 100644 --- a/crates/sats/src/proptest.rs +++ b/crates/sats/src/proptest.rs @@ -2,7 +2,7 @@ //! //! This notably excludes `Ref` types. -use crate::{i256, u256}; +use crate::{i256, u256, ProductTypeElement, SumTypeVariant}; use crate::{ AlgebraicType, AlgebraicTypeRef, AlgebraicValue, ArrayValue, MapType, MapValue, ProductType, ProductValue, SumType, SumValue, Typespace, F32, F64, @@ -54,15 +54,30 @@ fn generate_algebraic_type_from_leaves( prop_oneof![ gen_element.clone().prop_map(AlgebraicType::array), (gen_element.clone(), gen_element.clone()).prop_map(|(key, val)| AlgebraicType::map(key, val)), - // No need for field or variant names. - // No need to generate units here; // we already generate them in `generate_non_compound_algebraic_type`. vec(gen_element.clone().prop_map_into(), 1..=SIZE) + .prop_map(|vec| vec + .into_iter() + .enumerate() + .map(|(i, ty)| ProductTypeElement { + // Generate names because the validation code in the `schema` crate requires them. + name: Some(format!("field_{i}").into()), + algebraic_type: ty + }) + .collect()) .prop_map(Vec::into_boxed_slice) .prop_map(AlgebraicType::product), // Do not generate nevers here; we can't store never in a page. vec(gen_element.clone().prop_map_into(), 1..=SIZE) + .prop_map(|vec| vec + .into_iter() + .enumerate() + .map(|(i, ty)| SumTypeVariant { + name: Some(format!("variant_{i}").into()), + algebraic_type: ty + }) + .collect::>()) .prop_map(Vec::into_boxed_slice) .prop_map(AlgebraicType::sum), ] diff --git a/crates/sats/src/typespace.rs b/crates/sats/src/typespace.rs index b7802a8f44..81994a89f1 100644 --- a/crates/sats/src/typespace.rs +++ b/crates/sats/src/typespace.rs @@ -185,6 +185,14 @@ impl Typespace { Ok(()) } + /// Iterate over types in the typespace with their references. + pub fn refs_with_types(&self) -> impl Iterator { + self.types + .iter() + .enumerate() + .map(|(idx, ty)| (AlgebraicTypeRef(idx as _), ty)) + } + /// Check that the entire typespace is valid for generating a `SpacetimeDB` client module. /// See also the `spacetimedb_schema` crate, which layers additional validation on top /// of these checks. diff --git a/crates/schema/Cargo.toml b/crates/schema/Cargo.toml index 8739c56d21..eba30cc0dc 100644 --- a/crates/schema/Cargo.toml +++ b/crates/schema/Cargo.toml @@ -20,6 +20,7 @@ unicode-normalization.workspace = true serde_json.workspace = true smallvec.workspace = true hashbrown.workspace = true +enum-as-inner.workspace = true [dev-dependencies] spacetimedb-lib = { workspace = true, features = ["test"] } diff --git a/crates/schema/proptest-regressions/type_for_generate.txt b/crates/schema/proptest-regressions/type_for_generate.txt new file mode 100644 index 0000000000..a27ad4bcfd --- /dev/null +++ b/crates/schema/proptest-regressions/type_for_generate.txt @@ -0,0 +1,7 @@ +# Seeds for failure cases proptest has generated in the past. It is +# automatically read and these particular cases re-run before any +# novel cases are generated. +# +# It is recommended to check this file in to source control so that +# everyone who runs the test benefits from these saved cases. +cc 50cf163ac81228385b27f96ba1801355e39bc722a937a0cf6ec0d4b27d23ef14 # shrinks to t = Typespace { types: [Bool, Bool, Bool, Product(ProductType { elements: [ProductTypeElement { name: None, algebraic_type: Bool }] }), Bool] } diff --git a/crates/schema/src/def.rs b/crates/schema/src/def.rs index 6bcd3626de..e8584b4256 100644 --- a/crates/schema/src/def.rs +++ b/crates/schema/src/def.rs @@ -20,15 +20,16 @@ use std::hash::Hash; use crate::error::{IdentifierError, ValidationErrors}; use crate::identifier::Identifier; +use crate::type_for_generate::{AlgebraicTypeUse, ProductTypeDef, TypespaceForGenerate}; use hashbrown::Equivalent; use itertools::Itertools; use spacetimedb_data_structures::error_stream::{CollectAllErrors, CombineErrors, ErrorStream}; use spacetimedb_data_structures::map::HashMap; use spacetimedb_lib::db::raw_def; use spacetimedb_lib::db::raw_def::v9::{ - Lifecycle, RawIdentifier, RawIndexAlgorithm, RawIndexDefV9, RawModuleDefV9, RawReducerDefV9, RawScheduleDefV9, - RawScopedTypeNameV9, RawSequenceDefV9, RawTableDefV9, RawTypeDefV9, RawUniqueConstraintDefV9, TableAccess, - TableType, + Lifecycle, RawConstraintDataV9, RawConstraintDefV9, RawIdentifier, RawIndexAlgorithm, RawIndexDefV9, + RawModuleDefV9, RawReducerDefV9, RawScheduleDefV9, RawScopedTypeNameV9, RawSequenceDefV9, RawTableDefV9, + RawTypeDefV9, RawUniqueConstraintDataV9, TableAccess, TableType, }; use spacetimedb_lib::{ProductType, RawModuleDef}; use spacetimedb_primitives::{ColId, ColList}; @@ -88,30 +89,42 @@ pub struct ModuleDef { /// The typespace of the module definition. typespace: Typespace, + /// The typespace, restructured to be useful for client codegen. + typespace_for_generate: TypespaceForGenerate, + /// The global namespace of the module: /// tables, indexes, constraints, schedules, and sequences live in the global namespace. /// Concretely, though, they're stored in the `TableDef` data structures. /// This map allows looking up which `TableDef` stores the `Def` you're looking for. stored_in_table_def: IdentifierMap, + + /// A map from type refs to their names, if they have any. + refmap: HashMap, } impl ModuleDef { - /// Construct a `ModuleDef` by validating a `RawModuleDef`. - /// This is the only way to construct a `ModuleDef`. - /// (The `TryFrom` impls for this type just call this method.) - pub fn validate(raw: RawModuleDef) -> Result { - match raw { - RawModuleDef::V8BackCompat(v8_mod) => validate::v8::validate(v8_mod), - RawModuleDef::V9(v9_mod) => validate::v9::validate(v9_mod), - _ => unimplemented!(), - } - } - /// The tables of the module definition. pub fn tables(&self) -> impl Iterator { self.tables.values() } + /// The reducers of the module definition. + pub fn reducers(&self) -> impl Iterator { + self.reducers.values() + } + + /// The named types of the module definition. + pub fn types(&self) -> impl Iterator { + self.types.values() + } + + /// The named types of the module definition that aren't table row type definitions. + pub fn types_not_tables(&self) -> impl Iterator { + self.types + .values() + .filter(|typ| !typ.name.as_identifier().is_some_and(|name| self.table(name).is_some())) + } + /// The `Typespace` used by the module. /// /// `AlgebraicTypeRef`s in the table, reducer, and type alias declarations refer to this typespace. @@ -128,6 +141,11 @@ impl ModuleDef { &self.typespace } + /// The typespace of the module from a different perspective, one useful for client code generation. + pub fn typespace_for_generate(&self) -> &TypespaceForGenerate { + &self.typespace_for_generate + } + /// The `TableDef` an entity in the global namespace is stored in, if any. /// /// Generally, you will want to use the `lookup` method on the entity type instead. @@ -148,35 +166,39 @@ impl ModuleDef { self.tables.get(name) } + /// Lookup a type's name from its `AlgebraicTypeRef`. + pub fn type_name_from_ref(&self, r: AlgebraicTypeRef) -> Option<&RefPointee> { + self.refmap.get(&r) + } + /// Generate indexes for the module definition. /// We guarantee that all `unique` constraints have an index generated for them. /// This will be removed once another enforcement mechanism is implemented. /// This is a noop if there are already usable indexes present. fn generate_indexes(&mut self) { for table in self.tables.values_mut() { - for constraint in table.unique_constraints.values() { + for constraint in table.constraints.values() { + let ConstraintData::Unique(UniqueConstraintData { columns }) = &constraint.data; + // if we have a constraint for the index, we're fine. if table.indexes.values().any(|index| { let IndexDef { - algorithm: IndexAlgorithm::BTree { columns }, + algorithm: IndexAlgorithm::BTree { columns: index_columns }, .. } = index; - columns == &constraint.columns + index_columns == columns }) { continue; } - let column_names = constraint - .columns - .iter() - .map(|col_id| &*table.get_column(col_id).expect("validated unique constraint").name) - .join("_"); - - // TODO(1.0): ensure generated index names are identical when upgrading the Rust module bindings. - let mut index_name = - Identifier::new(format!("idx_{}_{}_{}_unique", table.name, column_names, constraint.name).into()) - .expect("validated identifier parts"); + // This replicates the logic from `RawIndexDefV8::for_column`. + let constraint_name = &constraint + .name + .trim_start_matches(&format!("ct_{}_", table.name)) + .trim_end_matches("_unique"); + let mut index_name = Identifier::new(format!("idx_{}_{}_unique", table.name, constraint_name).into()) + .expect("validated identifier parts"); // incredibly janky loop to avoid name collisions. // hey, somebody could be being malicious. @@ -190,7 +212,7 @@ impl ModuleDef { IndexDef { name: index_name.clone(), algorithm: IndexAlgorithm::BTree { - columns: constraint.columns.clone(), + columns: columns.clone(), }, accessor_name: None, // this is a generated index. }, @@ -260,6 +282,8 @@ impl From for RawModuleDefV9 { types, typespace, stored_in_table_def: _, + typespace_for_generate: _, + refmap: _, } = val; RawModuleDefV9 { @@ -330,7 +354,7 @@ pub struct TableDef { pub indexes: IdentifierMap, /// The unique constraints on the table, indexed by name. - pub unique_constraints: IdentifierMap, + pub constraints: IdentifierMap, /// The sequences for the table, indexed by name. pub sequences: IdentifierMap, @@ -364,7 +388,7 @@ impl From for RawTableDefV9 { primary_key, columns: _, // will be reconstructed from the product type. indexes, - unique_constraints, + constraints, sequences, schedule, table_type, @@ -376,9 +400,7 @@ impl From for RawTableDefV9 { product_type_ref, primary_key, indexes: to_raw(indexes, |index: &RawIndexDefV9| &index.name), - unique_constraints: to_raw(unique_constraints, |constraint: &RawUniqueConstraintDefV9| { - &constraint.name - }), + constraints: to_raw(constraints, |constraint: &RawConstraintDefV9| &constraint.name), sequences: to_raw(sequences, |sequence: &RawSequenceDefV9| &sequence.name), schedule: schedule.map(Into::into), table_type, @@ -514,29 +536,59 @@ pub struct ColumnDef { /// with name `Some(name.as_str())`. pub ty: AlgebraicType, + /// The type of the column, formatted for client code generation. + pub ty_for_generate: AlgebraicTypeUse, + /// The table this `ColumnDef` is stored in. pub table_name: Identifier, } +/// A constraint definition attached to a table. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ConstraintDef { + /// The name of the constraint. Unique within the containing `RawDatabaseDef`. + pub name: Identifier, + + /// The data for the constraint. + pub data: ConstraintData, +} + +impl From for RawConstraintDefV9 { + fn from(val: ConstraintDef) -> Self { + RawConstraintDefV9 { + name: val.name.into(), + data: val.data.into(), + } + } +} + +/// Data for a constraint attached to a table. +#[derive(Debug, Clone, Eq, PartialEq)] +#[non_exhaustive] +pub enum ConstraintData { + Unique(UniqueConstraintData), +} + +impl From for RawConstraintDataV9 { + fn from(val: ConstraintData) -> Self { + match val { + ConstraintData::Unique(unique) => RawConstraintDataV9::Unique(unique.into()), + } + } +} + /// Requires that the projection of the table onto these columns is an bijection. /// /// That is, there must be a one-to-one relationship between a row and the `columns` of that row. #[derive(Debug, Clone, Eq, PartialEq)] -#[non_exhaustive] -pub struct UniqueConstraintDef { - /// The name of the unique constraint. Must be unique within the containing `RawDatabaseDef`. - pub name: Identifier, - +pub struct UniqueConstraintData { /// The columns on the containing `TableDef` pub columns: ColList, } -impl From for RawUniqueConstraintDefV9 { - fn from(val: UniqueConstraintDef) -> Self { - RawUniqueConstraintDefV9 { - name: val.name.into(), - columns: val.columns, - } +impl From for RawUniqueConstraintDataV9 { + fn from(val: UniqueConstraintData) -> Self { + RawUniqueConstraintDataV9 { columns: val.columns } } } @@ -579,6 +631,8 @@ pub struct TypeDef { pub name: ScopedTypeName, /// The type to which the alias refers. + /// Look in `ModuleDef.typespace` for the actual type, + /// or in `ModuleDef.typespace_for_generate` for the client codegen version. pub ty: AlgebraicTypeRef, /// Whether this type has a custom ordering. @@ -594,6 +648,41 @@ impl From for RawTypeDefV9 { } } +/// The return value from [`ModuleDef::type_name_from_ref`]. +#[derive(Debug, Clone)] +#[non_exhaustive] +pub enum RefPointee { + Table(Identifier), + Type(ScopedTypeName), +} + +impl RefPointee { + /// Iterate over the segments of this name. + pub fn name_segments(&self) -> impl Iterator { + let (scope, name) = match self { + RefPointee::Table(id) => (None, id), + RefPointee::Type(ScopedTypeName { scope, name }) => (Some(&**scope), name), + }; + scope.into_iter().flatten().chain(std::iter::once(name)) + } + + /// Get the least-significant segment of this name. + pub fn name(&self) -> &Identifier { + match self { + RefPointee::Table(id) => id, + RefPointee::Type(scoped) => &scoped.name, + } + } + + /// Retrieve the `ScopedTypeName`, if this name is scoped. + pub fn scoped_name(&self) -> Option<&ScopedTypeName> { + match self { + RefPointee::Table(_) => None, + RefPointee::Type(scoped) => Some(scoped), + } + } +} + /// A scoped type name, in the form `scope0::scope1::...::scopeN::name`. /// /// These are the names that will be used *in client code generation*, NOT the names used for types @@ -639,6 +728,21 @@ impl ScopedTypeName { name, } } + + /// Retrieve the name of this type. + pub fn name(&self) -> &Identifier { + &self.name + } + + /// Retrieve the name of this type, if the scope is empty. + pub fn as_identifier(&self) -> Option<&Identifier> { + self.scope.is_empty().then_some(&self.name) + } + + /// Iterate over the segments of this name. + pub fn name_segments(&self) -> impl Iterator { + self.scope.iter().chain(std::iter::once(&self.name)) + } } impl fmt::Debug for ScopedTypeName { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { @@ -687,6 +791,11 @@ pub struct ReducerDef { /// This `ProductType` need not be registered in the module's `Typespace`. pub params: ProductType, + /// The parameters of the reducer, formatted for client codegen. + /// + /// This `ProductType` need not be registered in the module's `TypespaceForGenerate`. + pub params_for_generate: ProductTypeDef, + /// The special role of this reducer in the module lifecycle, if any. pub lifecycle: Option, } @@ -755,7 +864,7 @@ impl ModuleDefLookup for ColumnDef { } } -impl ModuleDefLookup for UniqueConstraintDef { +impl ModuleDefLookup for ConstraintDef { type Key<'a> = &'a Identifier; fn key(&self) -> Self::Key<'_> { @@ -763,7 +872,7 @@ impl ModuleDefLookup for UniqueConstraintDef { } fn lookup<'a>(module_def: &'a ModuleDef, key: Self::Key<'_>) -> Option<&'a Self> { - module_def.stored_in_table_def(key)?.unique_constraints.get(key) + module_def.stored_in_table_def(key)?.constraints.get(key) } } diff --git a/crates/schema/src/def/validate/v8.rs b/crates/schema/src/def/validate/v8.rs index 2ad9d061ec..1e2b8f962a 100644 --- a/crates/schema/src/def/validate/v8.rs +++ b/crates/schema/src/def/validate/v8.rs @@ -73,7 +73,6 @@ fn upgrade_table( ) -> RawTableDefV9 { // First, generate all the various things that are needed. // This is the hairiest part of v8. - let generated_indexes = table.schema.generated_indexes().collect::>(); let generated_constraints = table.schema.generated_constraints().collect::>(); let generated_sequences = table.schema.generated_sequences().collect::>(); @@ -96,13 +95,7 @@ fn upgrade_table( check_all_column_defs(product_type_ref, columns, &table_name, typespace, extra_errors); // Now we're ready to go through the various definitions and upgrade them. - let indexes = convert_all( - indexes - .into_iter() - .map(|idx| (idx, false)) - .chain(generated_indexes.into_iter().map(|idx| (idx, true))), - |(idx, is_generated)| upgrade_index(idx, is_generated), - ); + let indexes = convert_all(indexes, upgrade_index); let sequences = convert_all(sequences.into_iter().chain(generated_sequences), upgrade_sequence); let schedule = upgrade_schedule(scheduled, &table_name); @@ -111,9 +104,7 @@ fn upgrade_table( let unique_constraints = constraints .into_iter() .chain(generated_constraints) - .filter_map(|constraint| { - upgrade_constraint_to_unique_constraint(constraint, &table_name, &mut primary_key, extra_errors) - }) + .filter_map(|constraint| upgrade_constraint(constraint, &table_name, &mut primary_key, extra_errors)) .collect(); let table_type = table_type.into(); @@ -124,7 +115,7 @@ fn upgrade_table( product_type_ref, primary_key, indexes, - unique_constraints, + constraints: unique_constraints, sequences, schedule, table_type, @@ -225,7 +216,7 @@ fn check_column( } /// Upgrade an index. -fn upgrade_index(index: RawIndexDefV8, is_generated: bool) -> RawIndexDefV9 { +fn upgrade_index(index: RawIndexDefV8) -> RawIndexDefV9 { let RawIndexDefV8 { index_name, is_unique: _, // handled by generated_constraints @@ -239,7 +230,7 @@ fn upgrade_index(index: RawIndexDefV8, is_generated: bool) -> RawIndexDefV9 { }; // The updated bindings macros will correctly distinguish between accessor name and index name as specified in the // ABI stability proposal. The old macros don't make this distinction, so we just reuse the name for them. - let accessor_name = if is_generated { None } else { Some(index_name.clone()) }; + let accessor_name = Some(index_name.clone()); RawIndexDefV9 { name: index_name.clone(), // Set the accessor name to be the same as the index name. @@ -252,12 +243,12 @@ fn upgrade_index(index: RawIndexDefV8, is_generated: bool) -> RawIndexDefV9 { /// /// `primary_key` is mutable and will be set to `Some(constraint.columns.as_singleton())` if the constraint is a primary key. /// If it has already been set, an error will be pushed to `extra_errors`. -fn upgrade_constraint_to_unique_constraint( +fn upgrade_constraint( constraint: RawConstraintDefV8, table_name: &RawIdentifier, primary_key: &mut Option, extra_errors: &mut Vec, -) -> Option { +) -> Option { let RawConstraintDefV8 { constraint_name, constraints, @@ -282,12 +273,12 @@ fn upgrade_constraint_to_unique_constraint( } if constraints.has_unique() { - Some(RawUniqueConstraintDefV9 { + Some(RawConstraintDefV9 { name: constraint_name, - columns, + data: RawConstraintDataV9::Unique(RawUniqueConstraintDataV9 { columns }), }) } else { - // other constraints are implemented by `generated_indexes` and `generated_sequences`. + // other constraints are implemented by `generated_sequences`. // Note that `Constraints::unset` will not trigger any of the preceding branches, so will be ignored. // This is consistent with the original `TableSchema::from_(raw_)def`, which also ignored `Constraints::unset`. None @@ -362,12 +353,12 @@ mod tests { use crate::def::validate::v8::{IDENTITY_CONNECTED_NAME, IDENTITY_DISCONNECTED_NAME, INIT_NAME}; use crate::def::{validate::Result, ModuleDef}; use crate::error::*; + use crate::type_for_generate::ClientCodegenError; use spacetimedb_data_structures::expect_error_matching; use spacetimedb_lib::db::raw_def::*; use spacetimedb_lib::{ScheduleAt, TableDesc}; use spacetimedb_primitives::{ColId, ColList, Constraints}; - use spacetimedb_sats::typespace::TypeRefError; use spacetimedb_sats::{AlgebraicType, AlgebraicTypeRef, ProductType}; use v8::RawModuleDefV8Builder; use v9::Lifecycle; @@ -699,79 +690,49 @@ mod tests { } #[test] - fn recursive_type_ref() { + fn recursive_ref() { let recursive_type = AlgebraicType::product([("a", AlgebraicTypeRef(0).into())]); let mut builder = RawModuleDefV8Builder::default(); - builder.add_type_for_tests("Recursive", recursive_type.clone()); - builder.add_reducer_for_tests("silly", ProductType::from([("a", recursive_type.clone())])); - let result: Result = builder.finish().try_into(); + let ref_ = builder.add_type_for_tests("Recursive", recursive_type.clone()); + builder.add_reducer_for_tests("silly", ProductType::from([("a", ref_.into())])); + let result: ModuleDef = builder.finish().try_into().unwrap(); - // If you use a recursive type as a reducer argument, you get two errors. - // One for the reducer argument, and one for the type itself. - // This seems fine... - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && - ty.0 == recursive_type && - error == &TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0)) - }); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == recursive_type && - error == &TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0)) - }); + assert!(result.typespace_for_generate[ref_].is_recursive()); } #[test] - fn invalid_type_ref() { + fn out_of_bounds_ref() { let invalid_type_1 = AlgebraicType::product([("a", AlgebraicTypeRef(31).into())]); - let invalid_type_2 = AlgebraicType::option(AlgebraicTypeRef(55).into()); let mut builder = RawModuleDefV8Builder::default(); - builder.add_type_for_tests("Invalid", invalid_type_1.clone()); - builder.add_reducer_for_tests("silly", ProductType::from([("a", invalid_type_2.clone())])); + let ref_ = builder.add_type_for_tests("Invalid", invalid_type_1.clone()); + builder.add_reducer_for_tests("silly", ProductType::from([("a", ref_.into())])); let result: Result = builder.finish().try_into(); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && - ty.0 == invalid_type_1 && - error == &TypeRefError::InvalidTypeRef(AlgebraicTypeRef(31)) - }); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == invalid_type_2 && - error == &TypeRefError::InvalidTypeRef(AlgebraicTypeRef(55)) + expect_error_matching!(result, ValidationError::ClientCodegenError { location, error: ClientCodegenError::TypeRefError(_) } => { + location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } }); } #[test] - fn type_invalid() { + fn invalid_use() { let inner_type_invalid_for_use = AlgebraicType::product([("b", AlgebraicType::U32)]); let invalid_type = AlgebraicType::product([("a", inner_type_invalid_for_use.clone())]); let mut builder = RawModuleDefV8Builder::default(); - builder.add_type_for_tests("Invalid", invalid_type.clone()); - builder.add_reducer_for_tests("silly", ProductType::from([("a", invalid_type.clone())])); + let ref_ = builder.add_type_for_tests("Invalid", invalid_type.clone()); + builder.add_reducer_for_tests("silly", ProductType::from([("a", ref_.into())])); let result: Result = builder.finish().try_into(); - expect_error_matching!(result, ValidationError::NotValidForTypeDefinition { ref_, ty } => { - ref_ == &AlgebraicTypeRef(0) && - ty == &invalid_type - }); - expect_error_matching!(result, ValidationError::NotValidForTypeUse { location, ty } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == invalid_type - }); + expect_error_matching!( + result, + ValidationError::ClientCodegenError { + location, + error: ClientCodegenError::NonSpecialTypeNotAUse { ty } + } => { + location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && + ty.0 == inner_type_invalid_for_use + } + ); } #[test] diff --git a/crates/schema/src/def/validate/v9.rs b/crates/schema/src/def/validate/v9.rs index 7c22e0aaf9..c9dee0d9db 100644 --- a/crates/schema/src/def/validate/v9.rs +++ b/crates/schema/src/def/validate/v9.rs @@ -1,11 +1,11 @@ use crate::def::*; use crate::error::{RawColumnName, ValidationError}; +use crate::type_for_generate::{ClientCodegenError, ProductTypeDef, TypespaceForGenerateBuilder}; use crate::{def::validate::Result, error::TypeLocation}; use spacetimedb_data_structures::error_stream::{CollectAllErrors, CombineErrors}; use spacetimedb_data_structures::map::HashSet; use spacetimedb_lib::db::default_element_ordering::{product_type_has_default_ordering, sum_type_has_default_ordering}; use spacetimedb_lib::ProductType; -use spacetimedb_sats::WithTypespace; /// Validate a `RawModuleDefV9` and convert it into a `ModuleDef`, /// or return a stream of errors if the definition is invalid. @@ -18,11 +18,14 @@ pub fn validate(def: RawModuleDefV9) -> Result { misc_exports, } = def; + let known_type_definitions = types.iter().map(|def| def.ty); + let mut validator = ModuleValidator { typespace: &typespace, stored_in_table_def: Default::default(), type_namespace: Default::default(), lifecycle_reducers: Default::default(), + typespace_for_generate: TypespaceForGenerate::builder(&typespace, known_type_definitions), }; // Important general note: @@ -44,29 +47,27 @@ pub fn validate(def: RawModuleDefV9) -> Result { }) .collect_all_errors(); + let mut refmap = HashMap::default(); + let tables = tables .into_iter() .map(|table| { - validator - .validate_table_def(table) - .map(|table_def| (table_def.name.clone(), table_def)) + validator.validate_table_def(table).map(|table_def| { + refmap.insert(table_def.product_type_ref, RefPointee::Table(table_def.name.clone())); + (table_def.name.clone(), table_def) + }) }) .collect_all_errors(); let types = types .into_iter() .map(|ty| { - validator - .validate_type_def(ty) - .map(|type_def| (type_def.name.clone(), type_def)) + validator.validate_type_def(ty).map(|type_def| { + refmap.insert(type_def.ty, RefPointee::Type(type_def.name.clone())); + (type_def.name.clone(), type_def) + }) }) - .collect_all_errors::>() - .and_then(|types| { - // We need to validate the typespace *after* we have all the type definitions. - // Types in the typespace need to look stuff up in the type definitions. - validator.validate_typespace(&types)?; - Ok(types) - }); + .collect_all_errors::>(); // It's statically impossible for this assert to fire until `RawMiscModuleExportV9` grows some variants. assert_eq!( @@ -83,17 +84,23 @@ pub fn validate(def: RawModuleDefV9) -> Result { }); let ModuleValidator { - stored_in_table_def, .. + stored_in_table_def, + typespace_for_generate, + .. } = validator; let (tables, types, reducers) = (tables_types_reducers).map_err(|errors| errors.sort_deduplicate())?; + let typespace_for_generate = typespace_for_generate.finish(); + let mut result = ModuleDef { tables, reducers, types, typespace, + typespace_for_generate, stored_in_table_def, + refmap, }; result.generate_indexes(); @@ -108,6 +115,9 @@ struct ModuleValidator<'a> { /// Behind a reference to ensure we don't accidentally mutate it. typespace: &'a Typespace, + /// The in-progress typespace used to generate client types. + typespace_for_generate: TypespaceForGenerateBuilder<'a>, + /// Names we have seen so far. /// /// It would be nice if we could have span information here, but currently it isn't passed @@ -129,7 +139,7 @@ impl ModuleValidator<'_> { product_type_ref, primary_key, indexes, - unique_constraints, + constraints, sequences, schedule, table_type, @@ -171,15 +181,15 @@ impl ModuleValidator<'_> { .collect_all_errors(); // We can't validate the primary key without validating the unique constraints first. - let unique_constraints_primary_key = unique_constraints + let constraints_primary_key = constraints .into_iter() .map(|constraint| { table_in_progress - .validate_unique_constraint_def(constraint) + .validate_constraint_def(constraint) .map(|constraint| (constraint.name.clone(), constraint)) }) .collect_all_errors() - .and_then(|constraints: IdentifierMap| { + .and_then(|constraints: IdentifierMap| { table_in_progress.validate_primary_key(constraints, primary_key) }); @@ -198,15 +208,8 @@ impl ModuleValidator<'_> { let name = table_in_progress.add_to_global_namespace(raw_table_name.clone()); - let (name, columns, indexes, (unique_constraints, primary_key), sequences, schedule) = ( - name, - columns, - indexes, - unique_constraints_primary_key, - sequences, - schedule, - ) - .combine_errors()?; + let (name, columns, indexes, (constraints, primary_key), sequences, schedule) = + (name, columns, indexes, constraints_primary_key, sequences, schedule).combine_errors()?; Ok(TableDef { name, @@ -214,7 +217,7 @@ impl ModuleValidator<'_> { primary_key, columns, indexes, - unique_constraints, + constraints, sequences, schedule, table_type, @@ -230,7 +233,7 @@ impl ModuleValidator<'_> { lifecycle, } = reducer_def; - let params_valid: Result<()> = params + let params_for_generate: Result<_> = params .elements .iter() .enumerate() @@ -242,10 +245,18 @@ impl ModuleValidator<'_> { position, arg_name: param.name().map(Into::into), }; - let valid_for_use = self.validate_for_type_use(&location, ¶m.algebraic_type); - let resolves = self.validate_resolves(&location, ¶m.algebraic_type).map(|_| ()); - let ((), ()) = (valid_for_use, resolves).combine_errors()?; - Ok(()) + let param_name = param + .name() + .ok_or_else(|| { + ValidationError::ClientCodegenError { + location: location.clone().make_static(), + error: ClientCodegenError::NamelessReducerParam, + } + .into() + }) + .and_then(|s| identifier(s.into())); + let ty_use = self.validate_for_type_use(&location, ¶m.algebraic_type); + (param_name, ty_use).combine_errors() }) .collect_all_errors(); @@ -259,11 +270,15 @@ impl ModuleValidator<'_> { }) .transpose(); - let (name, (), lifecycle) = (name, params_valid, lifecycle).combine_errors()?; + let (name, params_for_generate, lifecycle) = (name, params_for_generate, lifecycle).combine_errors()?; Ok(ReducerDef { name, - params, + params: params.clone(), + params_for_generate: ProductTypeDef { + elements: params_for_generate, + recursive: false, // A ProductTypeDef not stored in a Typespace cannot be recursive. + }, lifecycle, }) } @@ -288,21 +303,30 @@ impl ModuleValidator<'_> { .into() }) .and_then(|pointed_to| { - if !custom_ordering { + let ordering_ok = if custom_ordering { + Ok(()) + } else { let correct = match pointed_to { AlgebraicType::Sum(sum) => sum_type_has_default_ordering(sum), AlgebraicType::Product(product) => product_type_has_default_ordering(product), _ => true, }; - if !correct { - return Err(ValidationError::TypeHasIncorrectOrdering { + if correct { + Ok(()) + } else { + Err(ValidationError::TypeHasIncorrectOrdering { type_name: name.clone(), ref_: ty, bad_type: pointed_to.clone().into(), } - .into()); + .into()) } - } + }; + + // Now check the definition is valid + let def_ok = self.validate_for_type_definition(ty); + + let ((), ()) = (ordering_ok, def_ok).combine_errors()?; // note: we return the reference `ty`, not the pointed-to type `pointed_to`. // The reference is semantically important. @@ -337,111 +361,24 @@ impl ModuleValidator<'_> { }) } - /// Validates that a type can be used to generate a client type definition or use. - /// - /// This reimplements `AlgebraicType::is_valid_for_client_type_definition` with more errors. - fn validate_for_type_definition_or_use( - &mut self, - ref_: AlgebraicTypeRef, - ty: &AlgebraicType, - ) -> Result { - if ty.is_valid_for_client_type_use() { - return Ok(TypeDefOrUse::Use); - } - let location = TypeLocation::InTypespace { ref_ }; - match ty { - AlgebraicType::Sum(sum) => sum - .variants - .iter() - .map(|variant| self.validate_for_type_use(&location, &variant.algebraic_type)) - .collect_all_errors::<()>() - .map_err(|_| { - ValidationErrors::from(ValidationError::NotValidForTypeDefinition { ref_, ty: ty.clone() }) - })?, - AlgebraicType::Product(product) => product - .elements - .iter() - .map(|element| self.validate_for_type_use(&location, &element.algebraic_type)) - .collect_all_errors::<()>() - .map_err(|_| { - ValidationErrors::from(ValidationError::NotValidForTypeDefinition { ref_, ty: ty.clone() }) - })?, - - // it's not a *valid* type use, but it isn't a valid type definition either. - // so, get some errors from the type use validation. - _ => self.validate_for_type_use(&location, ty)?, - } - Ok(TypeDefOrUse::Def) - } - /// Validates that a type can be used to generate a client type use. - fn validate_for_type_use(&mut self, location: &TypeLocation, ty: &AlgebraicType) -> Result<()> { - if ty.is_valid_for_client_type_use() { - Ok(()) - } else { - Err(ValidationError::NotValidForTypeUse { + fn validate_for_type_use(&mut self, location: &TypeLocation, ty: &AlgebraicType) -> Result { + self.typespace_for_generate.parse_use(ty).map_err(|err| { + ErrorStream::expect_nonempty(err.into_iter().map(|error| ValidationError::ClientCodegenError { location: location.clone().make_static(), - ty: ty.clone().into(), - } - .into()) - } - } - - /// Validate that a type resolves correctly, returning the resolved type if successful. - /// The resolved type will not contain any `Ref`s. - fn validate_resolves(&self, location: &TypeLocation, ty: &AlgebraicType) -> Result { - // This repeats some work for nested types. - // TODO: implement a reentrant, cached version of `resolve_refs`. - WithTypespace::new(self.typespace, ty).resolve_refs().map_err(|error| { - ValidationError::ResolutionFailure { - location: location.clone().make_static(), - ty: ty.clone().into(), error, - } - .into() + })) }) } - /// Validate the typespace. - /// This checks that every `Product`, `Sum`, and `Ref` in the typespace has a corresponding - /// `TypeDef`. - fn validate_typespace(&mut self, validated_type_defs: &HashMap) -> Result<()> { - let id_to_name = validated_type_defs - .values() - .map(|def| (&def.ty, &def.name)) - .collect::>(); - - self.typespace - .types - .iter() - .enumerate() - .map(|(pos, ty)| { - let ref_ = AlgebraicTypeRef(pos as u32); - let location = TypeLocation::InTypespace { ref_ }; - - let is_valid = - self.validate_for_type_definition_or_use(ref_, ty) - .and_then(|def_or_use| match def_or_use { - TypeDefOrUse::Def => { - if id_to_name.contains_key(&ref_) { - Ok(()) - } else { - Err(ValidationError::MissingTypeDef { - ref_, - ty: ty.clone().into(), - } - .into()) - } - } - TypeDefOrUse::Use => Ok(()), - }); - // Discard the resolved type, we only want to check that it DOES resolve. - let resolves = self.validate_resolves(&location, ty).map(|_| ()); - - let ((), ()) = (is_valid, resolves).combine_errors()?; - Ok(()) - }) - .collect_all_errors() + /// Validates that a type can be used to generate a client type definition. + fn validate_for_type_definition(&mut self, ref_: AlgebraicTypeRef) -> Result<()> { + self.typespace_for_generate.add_definition(ref_).map_err(|err| { + ErrorStream::expect_nonempty(err.into_iter().map(|error| ValidationError::ClientCodegenError { + location: TypeLocation::InTypespace { ref_ }, + error, + })) + }) } } @@ -476,15 +413,12 @@ impl TableValidator<'_, '_> { }) .and_then(|name| identifier(name.into())); - let ty = self - .module_validator - .validate_resolves( - &TypeLocation::InTypespace { - ref_: self.product_type_ref, - }, - &column.algebraic_type, - ) - .map(|_resolved| column.algebraic_type.clone()); // We don't need the resolved type. + let ty_for_generate = self.module_validator.validate_for_type_use( + &TypeLocation::InTypespace { + ref_: self.product_type_ref, + }, + &column.algebraic_type, + ); // This error will be created multiple times if the table name is invalid, // but we sort and deduplicate the error stream afterwards, @@ -494,11 +428,12 @@ impl TableValidator<'_, '_> { // nonempty. We need to put something in there if the table name is invalid. let table_name = identifier(self.raw_name.clone()); - let (name, ty, table_name) = (name, ty, table_name).combine_errors()?; + let (name, ty_for_generate, table_name) = (name, ty_for_generate, table_name).combine_errors()?; Ok(ColumnDef { name, - ty, + ty: column.algebraic_type.clone(), + ty_for_generate, col_id, table_name, }) @@ -506,17 +441,17 @@ impl TableValidator<'_, '_> { fn validate_primary_key( &mut self, - validated_unique_constraints: IdentifierMap, + validated_constraints: IdentifierMap, primary_key: Option, - ) -> Result<(IdentifierMap, Option)> { + ) -> Result<(IdentifierMap, Option)> { let pk = primary_key .map(|pk| -> Result { let pk = self.validate_col_id(&self.raw_name, pk)?; let pk_col_list = ColList::from(pk); - if validated_unique_constraints - .values() - .any(|constraint| constraint.columns == pk_col_list) - { + if validated_constraints.values().any(|constraint| { + let ConstraintData::Unique(UniqueConstraintData { columns }) = &constraint.data; + columns == &pk_col_list + }) { Ok(pk) } else { Err(ValidationError::MissingPrimaryKeyUniqueConstraint { @@ -526,7 +461,7 @@ impl TableValidator<'_, '_> { } }) .transpose()?; - Ok((validated_unique_constraints, pk)) + Ok((validated_constraints, pk)) } fn validate_sequence_def(&mut self, sequence: RawSequenceDefV9) -> Result { @@ -623,14 +558,21 @@ impl TableValidator<'_, '_> { } /// Validate a unique constraint definition. - fn validate_unique_constraint_def(&mut self, constraint: RawUniqueConstraintDefV9) -> Result { - let RawUniqueConstraintDefV9 { name, columns } = constraint; + fn validate_constraint_def(&mut self, constraint: RawConstraintDefV9) -> Result { + let RawConstraintDefV9 { name, data } = constraint; - let columns = self.validate_col_ids(&name, columns); - let name = self.add_to_global_namespace(name); + if let RawConstraintDataV9::Unique(RawUniqueConstraintDataV9 { columns }) = data { + let columns = self.validate_col_ids(&name, columns); + let name = self.add_to_global_namespace(name); - let (name, columns) = (name, columns).combine_errors()?; - Ok(UniqueConstraintDef { name, columns }) + let (name, columns) = (name, columns).combine_errors()?; + Ok(ConstraintDef { + name, + data: ConstraintData::Unique(UniqueConstraintData { columns }), + }) + } else { + unimplemented!("Unknown constraint type") + } } /// Validate a schedule definition. @@ -759,12 +701,6 @@ fn identifier(name: Box) -> Result { Identifier::new(name).map_err(|error| ValidationError::IdentifierError { error }.into()) } -/// Stores whether a type can be used to generate a definition or a use. -enum TypeDefOrUse { - Def, - Use, -} - fn check_scheduled_reducers_exist( tables: &IdentifierMap, reducers: &IdentifierMap, @@ -806,15 +742,15 @@ mod tests { use crate::def::validate::tests::{ check_product_type, expect_identifier, expect_raw_type_name, expect_resolve, expect_type_name, }; - use crate::def::IndexAlgorithm; use crate::def::{validate::Result, ModuleDef}; + use crate::def::{ConstraintData, IndexAlgorithm, UniqueConstraintData}; use crate::error::*; + use crate::type_for_generate::ClientCodegenError; use spacetimedb_data_structures::expect_error_matching; use spacetimedb_lib::db::raw_def::*; use spacetimedb_lib::ScheduleAt; use spacetimedb_primitives::ColList; - use spacetimedb_sats::typespace::TypeRefError; use spacetimedb_sats::{AlgebraicType, AlgebraicTypeRef, ProductType}; use v9::{Lifecycle, RawIndexAlgorithm, RawModuleDefV9Builder, TableAccess, TableType}; @@ -940,14 +876,14 @@ mod tests { assert_eq!(apples_def.primary_key, None); - assert_eq!(apples_def.unique_constraints.len(), 1); + assert_eq!(apples_def.constraints.len(), 1); let apples_unique_constraint = expect_identifier("Apples_unique_constraint"); assert_eq!( - apples_def.unique_constraints[&apples_unique_constraint].columns, - 3.into() + apples_def.constraints[&apples_unique_constraint].data, + ConstraintData::Unique(UniqueConstraintData { columns: 3.into() }) ); assert_eq!( - apples_def.unique_constraints[&apples_unique_constraint].name, + apples_def.constraints[&apples_unique_constraint].name, apples_unique_constraint ); @@ -994,10 +930,13 @@ mod tests { ); assert_eq!(bananas_def.primary_key, Some(0.into())); assert_eq!(bananas_def.indexes.len(), 2); - assert_eq!(bananas_def.unique_constraints.len(), 1); - let (bananas_constraint_name, bananas_constraint) = bananas_def.unique_constraints.iter().next().unwrap(); + assert_eq!(bananas_def.constraints.len(), 1); + let (bananas_constraint_name, bananas_constraint) = bananas_def.constraints.iter().next().unwrap(); assert_eq!(bananas_constraint_name, &bananas_constraint.name); - assert_eq!(bananas_constraint.columns, 0.into()); + assert_eq!( + bananas_constraint.data, + ConstraintData::Unique(UniqueConstraintData { columns: 0.into() }) + ); let delivery_def = &def.tables[&deliveries]; assert_eq!(delivery_def.name, deliveries); @@ -1260,55 +1199,27 @@ mod tests { } #[test] - fn recursive_type_ref() { + fn recursive_ref() { let recursive_type = AlgebraicType::product([("a", AlgebraicTypeRef(0).into())]); let mut builder = RawModuleDefV9Builder::new(); - builder.add_algebraic_type([], "Recursive", recursive_type.clone(), false); - builder.add_reducer("silly", ProductType::from([("a", recursive_type.clone())]), None); - let result: Result = builder.finish().try_into(); + let ref_ = builder.add_algebraic_type([], "Recursive", recursive_type.clone(), false); + builder.add_reducer("silly", ProductType::from([("a", ref_.into())]), None); + let result: ModuleDef = builder.finish().try_into().unwrap(); - // If you use a recursive type as a reducer argument, you get two errors. - // One for the reducer argument, and one for the type itself. - // This seems fine... - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && - ty.0 == recursive_type && - error == &TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0)) - }); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == recursive_type && - error == &TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0)) - }); + assert!(result.typespace_for_generate[ref_].is_recursive()); } #[test] - fn invalid_type_ref() { + fn out_of_bounds_ref() { let invalid_type_1 = AlgebraicType::product([("a", AlgebraicTypeRef(31).into())]); - let invalid_type_2 = AlgebraicType::option(AlgebraicTypeRef(55).into()); let mut builder = RawModuleDefV9Builder::new(); - builder.add_algebraic_type([], "Invalid", invalid_type_1.clone(), false); - builder.add_reducer("silly", ProductType::from([("a", invalid_type_2.clone())]), None); + let ref_ = builder.add_algebraic_type([], "Invalid", invalid_type_1.clone(), false); + builder.add_reducer("silly", ProductType::from([("a", ref_.into())]), None); let result: Result = builder.finish().try_into(); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && - ty.0 == invalid_type_1 && - error == &TypeRefError::InvalidTypeRef(AlgebraicTypeRef(31)) - }); - expect_error_matching!(result, ValidationError::ResolutionFailure { location, ty, error } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == invalid_type_2 && - error == &TypeRefError::InvalidTypeRef(AlgebraicTypeRef(55)) + expect_error_matching!(result, ValidationError::ClientCodegenError { location, error: ClientCodegenError::TypeRefError(_) } => { + location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } }); } @@ -1317,22 +1228,20 @@ mod tests { let inner_type_invalid_for_use = AlgebraicType::product([("b", AlgebraicType::U32)]); let invalid_type = AlgebraicType::product([("a", inner_type_invalid_for_use.clone())]); let mut builder = RawModuleDefV9Builder::new(); - builder.add_algebraic_type([], "Invalid", invalid_type.clone(), false); - builder.add_reducer("silly", ProductType::from([("a", invalid_type.clone())]), None); + let ref_ = builder.add_algebraic_type([], "Invalid", invalid_type.clone(), false); + builder.add_reducer("silly", ProductType::from([("a", ref_.into())]), None); let result: Result = builder.finish().try_into(); - expect_error_matching!(result, ValidationError::NotValidForTypeDefinition { ref_, ty } => { - ref_ == &AlgebraicTypeRef(0) && - ty == &invalid_type - }); - expect_error_matching!(result, ValidationError::NotValidForTypeUse { location, ty } => { - location == &TypeLocation::ReducerArg { - reducer_name: "silly".into(), - position: 0, - arg_name: Some("a".into()) - } && - ty.0 == invalid_type - }); + expect_error_matching!( + result, + ValidationError::ClientCodegenError { + location, + error: ClientCodegenError::NonSpecialTypeNotAUse { ty } + } => { + location == &TypeLocation::InTypespace { ref_: AlgebraicTypeRef(0) } && + ty.0 == inner_type_invalid_for_use + } + ); } #[test] diff --git a/crates/schema/src/error.rs b/crates/schema/src/error.rs index f46591deaf..ce91d080ef 100644 --- a/crates/schema/src/error.rs +++ b/crates/schema/src/error.rs @@ -3,12 +3,13 @@ use spacetimedb_lib::db::raw_def::v9::{Lifecycle, RawIdentifier, RawScopedTypeNa use spacetimedb_lib::{ProductType, SumType}; use spacetimedb_primitives::{ColId, ColList}; use spacetimedb_sats::algebraic_type::fmt::fmt_algebraic_type; -use spacetimedb_sats::{typespace::TypeRefError, AlgebraicType, AlgebraicTypeRef}; +use spacetimedb_sats::{AlgebraicType, AlgebraicTypeRef}; use std::borrow::Cow; use std::fmt; use crate::def::ScopedTypeName; use crate::identifier::Identifier; +use crate::type_for_generate::ClientCodegenError; /// A stream of validation errors, defined using the `ErrorStream` type. pub type ValidationErrors = ErrorStream; @@ -85,18 +86,10 @@ pub enum ValidationError { }, #[error("A scheduled table must have columns `scheduled_id: u64` and `scheduled_at: ScheduledAt`, but table `{table}` has columns {columns:?}")] ScheduledIncorrectColumns { table: RawIdentifier, columns: ProductType }, - #[error("{location} has type {ty:?} which cannot be used to generate a type use")] - NotValidForTypeUse { + #[error("error at {location}: {error}")] + ClientCodegenError { location: TypeLocation<'static>, - ty: PrettyAlgebraicType, - }, - #[error("{ref_} stores type {ty:?} which cannot be used to generate a type definition")] - NotValidForTypeDefinition { ref_: AlgebraicTypeRef, ty: AlgebraicType }, - #[error("Type {ty} failed to resolve")] - ResolutionFailure { - location: TypeLocation<'static>, - ty: PrettyAlgebraicType, - error: TypeRefError, + error: ClientCodegenError, }, #[error("Missing type definition for ref: {ref_}, holds type: {ty}")] MissingTypeDef { diff --git a/crates/schema/src/lib.rs b/crates/schema/src/lib.rs index 35f1e8e405..e960625ac8 100644 --- a/crates/schema/src/lib.rs +++ b/crates/schema/src/lib.rs @@ -6,3 +6,4 @@ pub mod def; pub mod error; pub mod identifier; pub mod schema; +pub mod type_for_generate; diff --git a/crates/schema/src/schema.rs b/crates/schema/src/schema.rs index faab6abc38..56e5d90a89 100644 --- a/crates/schema/src/schema.rs +++ b/crates/schema/src/schema.rs @@ -15,7 +15,8 @@ use spacetimedb_sats::product_value::InvalidFieldError; use std::sync::Arc; use crate::def::{ - ColumnDef, IndexAlgorithm, IndexDef, ModuleDef, ModuleDefLookup, SequenceDef, TableDef, UniqueConstraintDef, + ColumnDef, ConstraintData, ConstraintDef, IndexAlgorithm, IndexDef, ModuleDef, ModuleDefLookup, SequenceDef, + TableDef, UniqueConstraintData, }; use crate::identifier::Identifier; @@ -520,7 +521,7 @@ impl TableSchema { primary_key, columns, indexes, - unique_constraints, + constraints, sequences, schedule, table_type, @@ -533,12 +534,14 @@ impl TableSchema { .map(|(col_pos, def)| ColumnSchema::from_module_def(def, (), (table_id, col_pos.into()))) .collect(); - let unique_col_lists = unique_constraints + let unique_col_lists = constraints .values() - .map(|x| x.columns.clone()) + .map(|x| match &x.data { + ConstraintData::Unique(UniqueConstraintData { columns }) => columns.clone(), + }) .collect::>(); - let mut constraints: Vec = vec![]; + let mut constraint_schemas: Vec = vec![]; // note: these Ids are fixed up somewhere else, so we can just use 0 here... // but it would be nice to pass the correct values into this method. @@ -552,7 +555,7 @@ impl TableSchema { } else { let cols_name = generate_cols_name(&result.columns, |x| columns.get(x.idx()).map(|x| &*x.col_name)); #[allow(deprecated)] - constraints.push(ConstraintSchema::from_def( + constraint_schemas.push(ConstraintSchema::from_def( table_id, RawConstraintDefV8::for_column( name, @@ -575,7 +578,7 @@ impl TableSchema { let pk_col_list = primary_key.map(ColList::from).unwrap_or(ColList::empty()); - constraints.extend(unique_constraints.values().map(|def| { + constraint_schemas.extend(constraints.values().map(|def| { let mut result = ConstraintSchema::from_module_def(def, table_id, ConstraintId(0)); if result.columns == pk_col_list { result.constraints = result.constraints.push(Constraints::primary_key()); @@ -593,7 +596,7 @@ impl TableSchema { (*name).clone().into(), columns, indexes, - constraints, + constraint_schemas, sequences, (*table_type).into(), (*table_access).into(), @@ -998,17 +1001,19 @@ impl ConstraintSchema { } impl Schema for ConstraintSchema { - type Def = UniqueConstraintDef; + type Def = ConstraintDef; type Id = ConstraintId; type ParentId = TableId; fn from_module_def(def: &Self::Def, parent_id: Self::ParentId, id: Self::Id) -> Self { - ConstraintSchema { - constraint_id: id, - constraint_name: (*def.name).into(), - constraints: Constraints::unique(), - table_id: parent_id, - columns: def.columns.clone(), + match &def.data { + ConstraintData::Unique(UniqueConstraintData { columns }) => ConstraintSchema { + constraint_id: id, + constraint_name: (*def.name).into(), + constraints: Constraints::unique(), + table_id: parent_id, + columns: columns.clone(), + }, } } } diff --git a/crates/schema/src/type_for_generate.rs b/crates/schema/src/type_for_generate.rs new file mode 100644 index 0000000000..e5786c1452 --- /dev/null +++ b/crates/schema/src/type_for_generate.rs @@ -0,0 +1,930 @@ +//! `AlgebraicType` extensions for generating client code. + +use enum_as_inner::EnumAsInner; +use smallvec::SmallVec; +use spacetimedb_data_structures::{ + error_stream::{CollectAllErrors, CombineErrors, ErrorStream}, + map::{HashMap, HashSet}, +}; +use spacetimedb_lib::{AlgebraicType, ProductTypeElement}; +use spacetimedb_sats::{typespace::TypeRefError, AlgebraicTypeRef, ArrayType, SumTypeVariant, Typespace}; +use std::{ops::Index, sync::Arc}; + +use crate::{ + error::{IdentifierError, PrettyAlgebraicType}, + identifier::Identifier, +}; + +/// Errors that can occur when rearranging types for client codegen. +#[derive(thiserror::Error, Debug, PartialOrd, Ord, PartialEq, Eq)] +#[non_exhaustive] +pub enum ClientCodegenError { + #[error( + "internal codegen error: non-special product or sum type {ty} cannot be used to generate a client type use" + )] + NonSpecialTypeNotAUse { ty: PrettyAlgebraicType }, + + #[error("internal codegen error: invalid AlgebraicTypeRef")] + TypeRefError(#[from] TypeRefError), + + #[error("internal codegen error: type ref {ref_} was not pre-declared as a definition")] + NonDeclaredTypeDef { ref_: AlgebraicTypeRef }, + + #[error("internal codegen error: all type elements require names: {ty}")] + NamelessTypeDefElement { ty: PrettyAlgebraicType }, + + #[error("internal codegen error: all reducer parameters require names")] + NamelessReducerParam, + + #[error("internal codegen error: type {ty} is not valid for generating a definition")] + NotValidForDefinition { ty: PrettyAlgebraicType }, + + #[error("type {ty} contains identifier error {err}")] + NotValidIdentifier { + ty: PrettyAlgebraicType, + err: IdentifierError, + }, +} + +type Result = std::result::Result>; + +/// A typespace for generating client code. +/// +/// The key difference is that this typespace stores only `AlgebraicTypeDef`s, not `AlgebraicType`s. +/// We use the same `AlgebraicTypeRef`s from the original typespace. +/// The difference is that `AlgebraicTypeRef`s ONLY point to `AlgebraicTypeDef`s. +/// Chains of `AlgebraicTypeRef`s in the original `Typespace` are contracted to point to their ending `AlgebraicTypeDef`. +/// +/// For example, the input: +/// ```txt +/// [ +/// 0 -> AlgebraicType::Product { a: Ref(1) } +/// 1 -> AlgebraicType::Array(Ref(2)) +/// 2 -> AlgebraicType::Product { b: U32 } +/// ] +/// ``` +/// Results in the output: +/// ```txt +/// [ +/// 0 -> AlgebraicTypeDef::Product { a: Array(Ref(2)) } +/// 2 -> AlgebraicTypeDef::Product { b: U32 } +/// ] +/// ``` +/// +/// Cycles passing through a definition, such as: +/// ```txt +/// [ +/// 0 -> Product { a: Ref(1) } +/// 1 -> Sum { a: U32, b: Ref(0) } +/// ] +/// ``` +/// are permitted. +/// +/// Cycles NOT passing through a definition, such as: +/// ```txt +/// [ +/// 0 -> Ref(1) +/// 1 -> Array(Ref(0)) +/// ] +/// ``` +/// are forbidden. (Because most languages do not support anonymous recursive types.) +/// +/// The input must satisfy `AlgebraicType::is_valid_for_client_type_use`. +#[derive(Debug, Clone)] +pub struct TypespaceForGenerate { + defs: HashMap, +} + +impl TypespaceForGenerate { + /// Build a `TypespaceForGenerate`. + /// + /// We're required to declare known definitions up front. + /// This is required for distinguishing between a use of the unit type, and a reference to a type declaration of a product type with no elements. + pub fn builder( + typespace: &Typespace, + is_def: impl IntoIterator, + ) -> TypespaceForGenerateBuilder<'_> { + TypespaceForGenerateBuilder { + typespace, + result: TypespaceForGenerate { defs: HashMap::new() }, + is_def: is_def.into_iter().collect(), + uses: HashSet::new(), + known_uses: HashMap::new(), + currently_touching: HashSet::new(), + } + } + + /// Get the definitions of the typespace. + pub fn defs(&self) -> &HashMap { + &self.defs + } + + /// Get a definition in the typespace. + pub fn get(&self, ref_: AlgebraicTypeRef) -> Option<&AlgebraicTypeDef> { + self.defs.get(&ref_) + } +} + +impl Index for TypespaceForGenerate { + type Output = AlgebraicTypeDef; + + fn index(&self, index: AlgebraicTypeRef) -> &Self::Output { + &self.defs[&index] + } +} +impl Index<&'_ AlgebraicTypeRef> for TypespaceForGenerate { + type Output = AlgebraicTypeDef; + + fn index(&self, index: &'_ AlgebraicTypeRef) -> &Self::Output { + &self.defs[index] + } +} + +/// An algebraic type definition. +#[derive(Debug, Clone, EnumAsInner)] +pub enum AlgebraicTypeDef { + /// A product type declaration. + Product(ProductTypeDef), + /// A sum type declaration. + Sum(SumTypeDef), + /// A plain enum definition. + PlainEnum(PlainEnumTypeDef), +} + +impl AlgebraicTypeDef { + /// Check if a def is recursive. + pub fn is_recursive(&self) -> bool { + match self { + AlgebraicTypeDef::Product(ProductTypeDef { recursive, .. }) => *recursive, + AlgebraicTypeDef::Sum(SumTypeDef { recursive, .. }) => *recursive, + AlgebraicTypeDef::PlainEnum(_) => false, + } + } + + /// Extract all `AlgebraicTypeRef`s that are used in this type into the buffer. + fn extract_refs(&self, buf: &mut HashSet) { + match self { + AlgebraicTypeDef::Product(ProductTypeDef { elements, .. }) => { + for (_, ty) in elements.iter() { + ty.extract_refs(buf); + } + } + AlgebraicTypeDef::Sum(SumTypeDef { variants, .. }) => { + for (_, ty) in variants.iter() { + ty.extract_refs(buf); + } + } + AlgebraicTypeDef::PlainEnum(_) => {} + } + } + + /// Mark a def recursive. + /// Panics if the def is a `PlainEnum`, because how would that be recursive? + fn mark_recursive(&mut self) { + match self { + AlgebraicTypeDef::Product(ProductTypeDef { recursive, .. }) => { + *recursive = true; + } + AlgebraicTypeDef::Sum(SumTypeDef { recursive, .. }) => { + *recursive = true; + } + AlgebraicTypeDef::PlainEnum(def) => { + panic!("mark_recursive called on a PlainEnumTypeDef: {def:?}"); + } + } + } +} + +/// A product type definition. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct ProductTypeDef { + /// The elements of the product type, in order. + pub elements: Box<[(Identifier, AlgebraicTypeUse)]>, + /// If the type is recursive, that is, contains a use of itself. + pub recursive: bool, +} + +impl<'a> IntoIterator for &'a ProductTypeDef { + type Item = &'a (Identifier, AlgebraicTypeUse); + type IntoIter = std::slice::Iter<'a, (Identifier, AlgebraicTypeUse)>; + fn into_iter(self) -> Self::IntoIter { + self.elements.iter() + } +} + +/// A sum type definition. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct SumTypeDef { + /// The variants of the sum type, in order. + pub variants: Box<[(Identifier, AlgebraicTypeUse)]>, + /// If the type is recursive, that is, contains a use of itself. + pub recursive: bool, +} + +/// A sum type, all of whose variants contain (). +#[derive(Debug, Clone)] +pub struct PlainEnumTypeDef { + pub variants: Box<[Identifier]>, +} + +/// Scalar types, i.e. bools, integers and floats. +/// These types do not require a `VarLenRef` indirection when stored in a `spacetimedb_table::table::Table`. +#[derive(Debug, PartialEq, Eq, Clone, Copy, Hash)] +pub enum PrimitiveType { + Bool, + I8, + U8, + I16, + U16, + I32, + U32, + I64, + U64, + I128, + U128, + I256, + U256, + F32, + F64, +} + +impl PrimitiveType { + pub fn algebraic_type(&self) -> AlgebraicType { + match self { + PrimitiveType::Bool => AlgebraicType::Bool, + PrimitiveType::I8 => AlgebraicType::I8, + PrimitiveType::U8 => AlgebraicType::U8, + PrimitiveType::I16 => AlgebraicType::I16, + PrimitiveType::U16 => AlgebraicType::U16, + PrimitiveType::I32 => AlgebraicType::I32, + PrimitiveType::U32 => AlgebraicType::U32, + PrimitiveType::I64 => AlgebraicType::I64, + PrimitiveType::U64 => AlgebraicType::U64, + PrimitiveType::I128 => AlgebraicType::I128, + PrimitiveType::U128 => AlgebraicType::U128, + PrimitiveType::I256 => AlgebraicType::I256, + PrimitiveType::U256 => AlgebraicType::U256, + PrimitiveType::F32 => AlgebraicType::F32, + PrimitiveType::F64 => AlgebraicType::F64, + } + } +} + +impl<'a> IntoIterator for &'a SumTypeDef { + type Item = &'a (Identifier, AlgebraicTypeUse); + type IntoIter = std::slice::Iter<'a, (Identifier, AlgebraicTypeUse)>; + fn into_iter(self) -> Self::IntoIter { + self.variants.iter() + } +} + +/// A use of an algebraic type. +/// +/// This type uses `Arc`s to make cloning cheap. +/// These `Arc`s are interned/hash-consed in the `TypespaceForGenerateBuilder`. +/// They are not semantically meaningful and are guaranteed to be acyclic. +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +pub enum AlgebraicTypeUse { + /// A type where the definition is given by the typing context (`Typespace`). + /// In other words, this is defined by a pointer to another `AlgebraicType`. + /// An AlgebraicTypeUse must point to an `AlgebraicTypeDef`. + Ref(AlgebraicTypeRef), + + /// The type of array values where elements are of a base type `elem_ty`. + /// Values [`AlgebraicValue::Array(array)`](crate::AlgebraicValue::Array) will have this type. + Array(Arc), + + /// The type of map values consisting of a key type `key_ty` and value `ty`. + /// Values [`AlgebraicValue::Map(map)`](crate::AlgebraicValue::Map) will have this type. + /// The order of entries in a map value is observable. + Map { + key: Arc, + value: Arc, + }, + + /// A standard structural option type. + Option(Arc), + + /// The special `ScheduleAt` type. + ScheduleAt, + + /// The special `Identity` type. + Identity, + + /// The special `Address` type. + Address, + + /// The unit type (empty product). + /// This is *distinct* from a use of a definition of a product type with no elements. + Unit, + + /// The never type (empty sum). + /// This is *distinct* from a use of a definition of a sum type with no variants. + Never, + + /// The UTF-8 encoded `String` type. + String, + + /// A primitive type. + Primitive(PrimitiveType), +} + +impl AlgebraicTypeUse { + /// Extract all `AlgebraicTypeRef`s that are used in this type and add them to `buf`.` + fn extract_refs(&self, buf: &mut HashSet) { + match self { + AlgebraicTypeUse::Ref(ref_) => { + buf.insert(*ref_); + } + AlgebraicTypeUse::Array(elem_ty) => elem_ty.extract_refs(buf), + AlgebraicTypeUse::Map { key, value } => { + key.extract_refs(buf); + value.extract_refs(buf); + } + AlgebraicTypeUse::Option(elem_ty) => elem_ty.extract_refs(buf), + _ => {} + } + } +} + +/// A builder for a `TypespaceForGenerate`. +/// +/// This is complicated by the fact that a typespace can store both *uses* and *definitions* of types. +pub struct TypespaceForGenerateBuilder<'a> { + /// The original typespace. + typespace: &'a Typespace, + + /// The result we are building. + /// Invariant: all `Def`s in here have been fully processed and correctly marked cyclic. + /// Not all `Def`s may have been processed yet. + result: TypespaceForGenerate, + + /// The AlgebraicTypeRefs that we know point to definitions. Must be declared at the start of building. + /// This is necessary to disambiguate between a use of the unit type, and a reference to a type declaration of a product type with no elements. + is_def: HashSet, + + /// Interning data structure, no semantic meaning. + /// We only intern AlgebraicTypes that are used inside other AlgebraicTypes. + uses: HashSet>, + + /// AlgebraicTypeRefs that point to uses. + known_uses: HashMap, + + /// Stores all `AlgebraicTypeRef`s that are currently being operated on. + currently_touching: HashSet, +} + +impl TypespaceForGenerateBuilder<'_> { + /// Finish building the `TypespaceForGenerate`. + /// Panics if `add_definition` has not been called for all of `is_def`. + pub fn finish(mut self) -> TypespaceForGenerate { + // Finish validating any straggler uses that weren't already processed. + for type_ in self.is_def.iter() { + assert!( + self.result.defs.contains_key(type_), + "internal codegen error: not all definitions were processed. + Did you call `add_definition` for all types in `is_def`?" + ); + } + + self.mark_allowed_cycles(); + + self.result + } + + /// Use the `TypespaceForGenerateBuilder` to validate an `AlgebraicTypeUse`. + /// Does not actually add anything to the `TypespaceForGenerate`. + pub fn parse_use(&mut self, ty: &AlgebraicType) -> Result { + if ty.is_address() { + Ok(AlgebraicTypeUse::Address) + } else if ty.is_identity() { + Ok(AlgebraicTypeUse::Identity) + } else if ty.is_unit() { + Ok(AlgebraicTypeUse::Unit) + } else if ty.is_never() { + Ok(AlgebraicTypeUse::Never) + } else if let Some(elem_ty) = ty.as_option() { + let elem_ty = self.parse_use(elem_ty)?; + let interned = self.intern_use(elem_ty); + Ok(AlgebraicTypeUse::Option(interned)) + } else if ty.is_schedule_at() { + Ok(AlgebraicTypeUse::ScheduleAt) + } else { + match ty { + AlgebraicType::Ref(ref_) => { + // Indirectly recurse. + self.parse_ref(*ref_) + } + AlgebraicType::Array(ArrayType { elem_ty }) => { + let elem_ty = self.parse_use(elem_ty)?; + let interned = self.intern_use(elem_ty); + Ok(AlgebraicTypeUse::Array(interned)) + } + AlgebraicType::Map(map) => { + let key_ty = self.parse_use(&map.key_ty); + let value_ty = self.parse_use(&map.ty); + let (key_ty, value_ty) = (key_ty, value_ty).combine_errors()?; + let interned_key = self.intern_use(key_ty); + let interned_value = self.intern_use(value_ty); + Ok(AlgebraicTypeUse::Map { + key: interned_key, + value: interned_value, + }) + } + + AlgebraicType::String => Ok(AlgebraicTypeUse::String), + AlgebraicType::Bool => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::Bool)), + AlgebraicType::I8 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I8)), + AlgebraicType::U8 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U8)), + AlgebraicType::I16 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I16)), + AlgebraicType::U16 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U16)), + AlgebraicType::I32 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I32)), + AlgebraicType::U32 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U32)), + AlgebraicType::I64 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I64)), + AlgebraicType::U64 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U64)), + AlgebraicType::I128 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I128)), + AlgebraicType::U128 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U128)), + AlgebraicType::I256 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::I256)), + AlgebraicType::U256 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::U256)), + AlgebraicType::F32 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::F32)), + AlgebraicType::F64 => Ok(AlgebraicTypeUse::Primitive(PrimitiveType::F64)), + ty @ (AlgebraicType::Product(_) | AlgebraicType::Sum(_)) => { + Err(ErrorStream::from(ClientCodegenError::NonSpecialTypeNotAUse { + ty: PrettyAlgebraicType(ty.clone()), + })) + } + } + } + } + + /// This is the only seriously complicated case of `parse_use`, which has to deal with cycle detection. + /// So it gets its own function. + /// Mutually recursive with `parse_use`. + fn parse_ref(&mut self, ref_: AlgebraicTypeRef) -> Result { + if self.is_def.contains(&ref_) { + // We know this type is going to be a definition. + // So, we can just return a ref to it. + Ok(AlgebraicTypeUse::Ref(ref_)) + } else if let Some(use_) = self.known_uses.get(&ref_) { + // The ref is to a use which we have already seen. + Ok(use_.clone()) + } else { + // We haven't processed it yet. It's either a ref to a valid use, or invalid. + let def = self + .typespace + .get(ref_) + .ok_or(TypeRefError::InvalidTypeRef(ref_)) + .and_then(|def| { + if def == &AlgebraicType::Ref(ref_) { + // Self-reference. + Err(TypeRefError::RecursiveTypeRef(ref_)) + } else { + Ok(def) + } + }) + .map_err(|e| ErrorStream::from(ClientCodegenError::TypeRefError(e)))?; + + if self.currently_touching.contains(&ref_) { + return Err(ClientCodegenError::TypeRefError(TypeRefError::RecursiveTypeRef(ref_)).into()); + } + + // Mark this ref. + self.currently_touching.insert(ref_); + // Recurse. + let result = self.parse_use(def); + // Unmark this ref before dealing with possible errors. + self.currently_touching.remove(&ref_); + + let use_ = result?; + + self.known_uses.insert(ref_, use_.clone()); + + Ok(use_) + } + } + + /// Add a definition. + /// Not mutually recursive with anything. + /// Does not detect cycles, those are left for `mark_allowed_cycles`, which is called after all definitions are processed. + /// + /// Why not invoke this for all definitions ourselves, since we know which refs are definitions? + /// It's so that the caller can wrap errors with better context information. + pub fn add_definition(&mut self, ref_: AlgebraicTypeRef) -> Result<()> { + assert!( + self.is_def.contains(&ref_), + "internal codegen error: any AlgebraicTypeRef passed to `add_definition` must refer to a declared definition, {ref_} does not" + ); + + let def = self + .typespace + .get(ref_) + .ok_or_else(|| ErrorStream::from(ClientCodegenError::TypeRefError(TypeRefError::InvalidTypeRef(ref_))))?; + + let result = match def { + AlgebraicType::Product(product) => product + .elements + .iter() + .map(|ProductTypeElement { name, algebraic_type }| self.process_element(def, name, algebraic_type)) + .collect_all_errors() + .map(|elements| { + // We have just processed all the elements, so we know if it's recursive. + self.result.defs.insert( + ref_, + AlgebraicTypeDef::Product(ProductTypeDef { + elements, + recursive: false, // set in `mark_allowed_cycles` + }), + ); + }), + AlgebraicType::Sum(sum) => sum + .variants + .iter() + .map(|SumTypeVariant { name, algebraic_type }| self.process_element(def, name, algebraic_type)) + .collect_all_errors::>() + .map(|variants| { + if variants.iter().all(|(_, ty)| ty == &AlgebraicTypeUse::Unit) { + // We have just processed all the elements, so we know if it's recursive. + let variants = variants.into_iter().map(|(name, _)| name).collect(); + self.result + .defs + .insert(ref_, AlgebraicTypeDef::PlainEnum(PlainEnumTypeDef { variants })); + } else { + let variants = variants.into_boxed_slice(); + + self.result.defs.insert( + ref_, + AlgebraicTypeDef::Sum(SumTypeDef { + variants, + recursive: false, // set in `mark_allowed_cycles` + }), + ); + } + }), + _ => Err(ClientCodegenError::NotValidForDefinition { + ty: PrettyAlgebraicType(def.clone()), + } + .into()), + }; + + result + } + + /// Process an element/variant of a product/sum type. + /// + /// `def` is the *containing* type that corresponds to a `Def`, + /// `algebraic_type` is the type of the element/variant inside `def` and corresponds to a `Use`. + fn process_element( + &mut self, + def: &AlgebraicType, + element_name: &Option>, + element_type: &AlgebraicType, + ) -> Result<(Identifier, AlgebraicTypeUse)> { + let element_name = element_name + .as_ref() + .ok_or_else(|| ErrorStream::from(ClientCodegenError::NamelessTypeDefElement { ty: def.clone().into() })) + .and_then(|element_name| { + Identifier::new(element_name.clone()).map_err(|err| { + ErrorStream::from(ClientCodegenError::NotValidIdentifier { + ty: def.clone().into(), + err, + }) + }) + }); + let ty = self.parse_use(element_type); + (element_name, ty).combine_errors() + } + + // Intern a use. + // This is only used on types *inside* Map, Array, and Option types. + fn intern_use(&mut self, use_: AlgebraicTypeUse) -> Arc { + if let Some(ty) = self.uses.get(&use_) { + return ty.clone(); + } + let ty = Arc::new(use_); + self.uses.insert(ty.clone()); + ty + } + + /// Cycles passing through definitions are allowed. + /// This function is called after all definitions have been processed. + fn mark_allowed_cycles(&mut self) { + let mut to_process = self.is_def.clone(); + let mut scratch = HashSet::new(); + // We reuse this here as well. + self.currently_touching.clear(); + + while let Some(ref_) = to_process.iter().next().cloned() { + self.mark_allowed_cycles_rec(None, ref_, &mut to_process, &mut scratch); + } + } + + /// Recursively mark allowed cycles. + fn mark_allowed_cycles_rec( + &mut self, + parent: Option<&ParentChain>, + def: AlgebraicTypeRef, + to_process: &mut HashSet, + scratch: &mut HashSet, + ) { + // Mark who we're touching right now. + let correct = self.currently_touching.insert(def); + assert!( + correct, + "mark_allowed_cycles_rec should never be called on a ref that is already being touched" + ); + + // Figure out who to look at. + // Note: this skips over refs in the original typespace that + // didn't point to definitions; those have already been removed. + scratch.clear(); + let to_examine = scratch; + self.result.defs[&def].extract_refs(to_examine); + + // Update the parent chain with the current def, for passing to children. + let chain = ParentChain { parent, ref_: def }; + + // First, check for finished cycles. + for element in to_examine.iter() { + if self.currently_touching.contains(element) { + // We have a cycle. + for parent_ref in chain.iter() { + // For each def participating in the cycle, mark it as recursive. + self.result + .defs + .get_mut(&parent_ref) + .expect("all defs should have been processed by now") + .mark_recursive(); + // It's tempting to also remove `parent_ref` from `to_process` here, + // but that's wrong, because it might participate in other cycles. + + // We want to mark the start of the cycle as recursive too. + // If we've just done that, break. + if parent_ref == *element { + break; + } + } + } + } + + // Now that we've marked everything possible, we need to recurse. + // Need a buffer to iterate from because we reuse `to_examine` in children. + // This will usually not allocate. Most defs have less than 16 refs. + let to_recurse = to_examine + .iter() + .cloned() + .filter(|element| to_process.contains(element) && !self.currently_touching.contains(element)) + .collect::>(); + + // Recurse. + let scratch = to_examine; + for element in to_recurse { + self.mark_allowed_cycles_rec(Some(&chain), element, to_process, scratch); + } + + // We're done with this def. + // Clean up our state. + let correct = self.currently_touching.remove(&def); + assert!( + correct, + "mark_allowed_cycles_rec is finishing, we should be touching that ref." + ); + // Only remove a def from `to_process` once we've explored all the paths leaving it. + to_process.remove(&def); + } +} + +/// A chain of parent type definitions. +/// If type T uses type U, then T is a parent of U. +struct ParentChain<'a> { + parent: Option<&'a ParentChain<'a>>, + ref_: AlgebraicTypeRef, +} +impl<'a> ParentChain<'a> { + fn iter(&'a self) -> ParentChainIter<'a> { + ParentChainIter { current: Some(self) } + } +} + +/// An iterator over a chain of parent type definitions. +struct ParentChainIter<'a> { + current: Option<&'a ParentChain<'a>>, +} +impl Iterator for ParentChainIter<'_> { + type Item = AlgebraicTypeRef; + + fn next(&mut self) -> Option { + let current = self.current?; + self.current = current.parent; + Some(current.ref_) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + use spacetimedb_data_structures::expect_error_matching; + use spacetimedb_lib::AlgebraicType; + use spacetimedb_sats::proptest::generate_typespace_valid_for_codegen; + + fn is_def(typespace: &Typespace) -> HashSet { + typespace + .refs_with_types() + .filter_map(|(ref_, ty)| { + if ty.is_valid_for_client_type_definition() { + Some(ref_) + } else { + None + } + }) + .collect() + } + + proptest! { + #[test] + fn test_valid_typespace(t in generate_typespace_valid_for_codegen(5)) { + let is_def = is_def(&t); + let mut builder = TypespaceForGenerate::builder(&t, is_def.clone()); + + for (ref_, ty) in t.refs_with_types() { + if is_def.contains(&ref_) { + builder.add_definition(ref_).unwrap(); + } else { + builder.parse_use(ty).unwrap(); + } + } + } + } + + #[test] + fn test_collapses_chains() { + let mut t = Typespace::default(); + let def = t.add(AlgebraicType::product([("a", AlgebraicType::U32)])); + let ref0 = t.add(AlgebraicType::Ref(def)); + let ref1 = t.add(AlgebraicType::array(AlgebraicType::Ref(def))); + let ref2 = t.add(AlgebraicType::option(AlgebraicType::Ref(ref1))); + let ref3 = t.add(AlgebraicType::map(AlgebraicType::U64, AlgebraicType::Ref(ref2))); + let ref4 = t.add(AlgebraicType::Ref(ref3)); + + let expected_0 = AlgebraicTypeUse::Ref(def); + let expected_1 = AlgebraicTypeUse::Array(Arc::new(expected_0.clone())); + let expected_2 = AlgebraicTypeUse::Option(Arc::new(expected_1.clone())); + let expected_3 = AlgebraicTypeUse::Map { + key: Arc::new(AlgebraicTypeUse::Primitive(PrimitiveType::U64)), + value: Arc::new(expected_2.clone()), + }; + let expected_4 = expected_3.clone(); + + let mut for_generate_forward = TypespaceForGenerate::builder(&t, [def]); + for_generate_forward.add_definition(def).unwrap(); + let use0 = for_generate_forward.parse_use(&ref0.into()).unwrap(); + let use1 = for_generate_forward.parse_use(&ref1.into()).unwrap(); + let use2 = for_generate_forward.parse_use(&ref2.into()).unwrap(); + let use3 = for_generate_forward.parse_use(&ref3.into()).unwrap(); + let use4 = for_generate_forward.parse_use(&ref4.into()).unwrap(); + + assert_eq!(use0, expected_0); + assert_eq!(use1, expected_1); + assert_eq!(use2, expected_2); + assert_eq!(use3, expected_3); + assert_eq!(use4, expected_4); + + let mut for_generate_backward = TypespaceForGenerate::builder(&t, [def]); + let use4 = for_generate_backward.parse_use(&ref4.into()).unwrap(); + let use3 = for_generate_forward.parse_use(&ref3.into()).unwrap(); + let use2 = for_generate_forward.parse_use(&ref2.into()).unwrap(); + let use1 = for_generate_forward.parse_use(&ref1.into()).unwrap(); + let use0 = for_generate_backward.parse_use(&ref0.into()).unwrap(); + for_generate_backward.add_definition(def).unwrap(); + + assert_eq!(use0, expected_0); + assert_eq!(use1, expected_1); + assert_eq!(use2, expected_2); + assert_eq!(use3, expected_3); + assert_eq!(use4, expected_4); + } + + #[test] + fn test_detects_cycles() { + let cyclic_1 = Typespace::new(vec![AlgebraicType::Ref(AlgebraicTypeRef(0))]); + let mut for_generate = TypespaceForGenerate::builder(&cyclic_1, []); + let err1 = for_generate.parse_use(&AlgebraicType::Ref(AlgebraicTypeRef(0))); + + expect_error_matching!( + err1, + ClientCodegenError::TypeRefError(TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0))) + ); + + let cyclic_2 = Typespace::new(vec![ + AlgebraicType::Ref(AlgebraicTypeRef(1)), + AlgebraicType::Ref(AlgebraicTypeRef(0)), + ]); + let mut for_generate = TypespaceForGenerate::builder(&cyclic_2, []); + let err2 = for_generate.parse_use(&AlgebraicType::Ref(AlgebraicTypeRef(0))); + + expect_error_matching!( + err2, + ClientCodegenError::TypeRefError(TypeRefError::RecursiveTypeRef(AlgebraicTypeRef(0))) + ); + + let cyclic_3 = Typespace::new(vec![ + AlgebraicType::Ref(AlgebraicTypeRef(1)), + AlgebraicType::product([("field", AlgebraicType::Ref(AlgebraicTypeRef(0)))]), + ]); + let mut for_generate = TypespaceForGenerate::builder(&cyclic_3, [AlgebraicTypeRef(1)]); + for_generate + .parse_use(&AlgebraicType::Ref(AlgebraicTypeRef(0))) + .expect("should be allowed"); + for_generate + .add_definition(AlgebraicTypeRef(1)) + .expect("should be allowed"); + let result = for_generate.finish(); + let table = result.defs().get(&AlgebraicTypeRef(1)).expect("should be defined"); + + assert!(table.is_recursive(), "recursion not detected? table: {table:?}"); + + let cyclic_4 = Typespace::new(vec![ + AlgebraicType::product([("field", AlgebraicTypeRef(1).into())]), + AlgebraicType::product([("field", AlgebraicTypeRef(2).into())]), + AlgebraicType::product([("field", AlgebraicTypeRef(3).into())]), + AlgebraicType::product([("field", AlgebraicTypeRef(0).into())]), + AlgebraicType::product([("field", AlgebraicTypeRef(0).into())]), + ]); + let mut for_generate = TypespaceForGenerate::builder( + &cyclic_4, + [ + AlgebraicTypeRef(0), + AlgebraicTypeRef(1), + AlgebraicTypeRef(2), + AlgebraicTypeRef(3), + AlgebraicTypeRef(4), + ], + ); + + for i in 0..5 { + for_generate + .parse_use(&AlgebraicType::Ref(AlgebraicTypeRef(i))) + .expect("should be allowed"); + for_generate + .add_definition(AlgebraicTypeRef(i)) + .expect("should be allowed"); + } + let result = for_generate.finish(); + + for i in 0..4 { + assert!(result[AlgebraicTypeRef(i)].is_recursive(), "recursion not detected"); + } + assert!( + !result[AlgebraicTypeRef(4)].is_recursive(), + "recursion detected incorrectly" + ); + + // Branching cycles. + let cyclic_5 = Typespace::new(vec![ + // cyclic component. + AlgebraicType::product([("field", AlgebraicTypeRef(1).into())]), + AlgebraicType::product([ + ("cyclic_1", AlgebraicTypeRef(2).into()), + ("cyclic_2", AlgebraicTypeRef(3).into()), + ("acyclic", AlgebraicTypeRef(5).into()), + ]), + AlgebraicType::product([("field", AlgebraicTypeRef(0).into())]), + AlgebraicType::product([("field", AlgebraicTypeRef(0).into())]), + // points into cyclic component, but is not cyclic. + AlgebraicType::product([("field", AlgebraicTypeRef(2).into())]), + // referred to by cyclic component, but is not cyclic. + AlgebraicType::product([("field", AlgebraicType::U32)]), + ]); + let mut for_generate = TypespaceForGenerate::builder( + &cyclic_5, + [ + AlgebraicTypeRef(0), + AlgebraicTypeRef(1), + AlgebraicTypeRef(2), + AlgebraicTypeRef(3), + AlgebraicTypeRef(4), + AlgebraicTypeRef(5), + ], + ); + + for i in 0..6 { + for_generate + .parse_use(&AlgebraicType::Ref(AlgebraicTypeRef(i))) + .expect("should be allowed"); + for_generate + .add_definition(AlgebraicTypeRef(i)) + .expect("should be allowed"); + } + let result = for_generate.finish(); + + for i in 0..4 { + assert!(result[AlgebraicTypeRef(i)].is_recursive(), "recursion not detected"); + } + for i in 4..6 { + assert!( + !result[AlgebraicTypeRef(i)].is_recursive(), + "recursion detected incorrectly" + ); + } + } +} diff --git a/crates/table/src/layout.rs b/crates/table/src/layout.rs index aaef699b53..2628cd4688 100644 --- a/crates/table/src/layout.rs +++ b/crates/table/src/layout.rs @@ -16,6 +16,7 @@ use enum_as_inner::EnumAsInner; use spacetimedb_sats::{ bsatn, AlgebraicType, AlgebraicValue, ProductType, ProductTypeElement, ProductValue, SumType, SumTypeVariant, }; +pub use spacetimedb_schema::type_for_generate::PrimitiveType; /// Aligns a `base` offset to the `required_alignment` (in the positive direction) and returns it. /// @@ -269,27 +270,6 @@ pub struct SumTypeVariantLayout { pub name: Option>, } -/// Scalar types, i.e. bools, integers and floats. -/// These types do not require a `VarLenRef` indirection. -#[derive(Debug, PartialEq, Eq, Clone, Copy)] -pub enum PrimitiveType { - Bool, - I8, - U8, - I16, - U16, - I32, - U32, - I64, - U64, - I128, - U128, - I256, - U256, - F32, - F64, -} - impl HasLayout for PrimitiveType { fn layout(&self) -> &'static Layout { match self { @@ -467,28 +447,6 @@ impl AlgebraicTypeLayout { } } -impl PrimitiveType { - fn algebraic_type(&self) -> AlgebraicType { - match self { - PrimitiveType::Bool => AlgebraicType::Bool, - PrimitiveType::I8 => AlgebraicType::I8, - PrimitiveType::U8 => AlgebraicType::U8, - PrimitiveType::I16 => AlgebraicType::I16, - PrimitiveType::U16 => AlgebraicType::U16, - PrimitiveType::I32 => AlgebraicType::I32, - PrimitiveType::U32 => AlgebraicType::U32, - PrimitiveType::I64 => AlgebraicType::I64, - PrimitiveType::U64 => AlgebraicType::U64, - PrimitiveType::I128 => AlgebraicType::I128, - PrimitiveType::U128 => AlgebraicType::U128, - PrimitiveType::I256 => AlgebraicType::I256, - PrimitiveType::U256 => AlgebraicType::U256, - PrimitiveType::F32 => AlgebraicType::F32, - PrimitiveType::F64 => AlgebraicType::F64, - } - } -} - impl VarLenType { fn algebraic_type(&self) -> AlgebraicType { match self {