Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

websocket: send/receive reducer & table ids instead of names #1883

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
  •  
  •  
  •  
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

74 changes: 33 additions & 41 deletions crates/cli/src/subcommands/generate/csharp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::fmt::{self, Write};
use std::ops::Deref;

use convert_case::{Case, Casing};
use itertools::Itertools;
use spacetimedb_lib::sats::{AlgebraicType, AlgebraicTypeRef, ArrayType, ProductType, SumType};
use spacetimedb_lib::ReducerDef;
use spacetimedb_primitives::ColList;
Expand Down Expand Up @@ -505,7 +506,7 @@ fn autogen_csharp_access_funcs_for_struct(
}
}

pub fn autogen_csharp_reducer(ctx: &GenCtx, reducer: &ReducerDef, namespace: &str) -> String {
pub fn autogen_csharp_reducer(ctx: &GenCtx, reducer_idx: usize, reducer: &ReducerDef, namespace: &str) -> String {
let func_name = &*reducer.name;
let func_name_pascal_case = func_name.to_case(Case::Pascal);

Expand All @@ -515,7 +516,7 @@ pub fn autogen_csharp_reducer(ctx: &GenCtx, reducer: &ReducerDef, namespace: &st
writeln!(output, "[SpacetimeDB.Type]");
writeln!(output, "public partial class {func_name_pascal_case} : IReducerArgs");
indented_block(&mut output, |output| {
writeln!(output, "string IReducerArgs.ReducerName => \"{func_name}\";");
writeln!(output, "uint IReducerArgs.ReducerIndex => {reducer_idx};");
if !reducer.args.is_empty() {
writeln!(output);
}
Expand All @@ -542,13 +543,17 @@ pub fn autogen_csharp_reducer(ctx: &GenCtx, reducer: &ReducerDef, namespace: &st
pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str) -> Vec<(String, String)> {
let mut results = Vec::new();

let tables = items.iter().filter_map(|i| {
if let GenItem::Table(table) = i {
Some(table)
} else {
None
}
});
let tables: Vec<_> = items
.iter()
.filter_map(|i| {
if let GenItem::Table(table) = i {
Some(table)
} else {
None
}
})
.sorted_by_key(|t| &t.schema.table_name)
.collect();

let reducers: Vec<&ReducerDef> = items
.iter()
Expand All @@ -559,6 +564,7 @@ pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str)
None
}
})
.sorted_by_key(|i| &i.name)
.collect();
let reducer_names: Vec<String> = reducers
.iter()
Expand All @@ -569,7 +575,7 @@ pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str)

writeln!(output, "public sealed class RemoteTables");
indented_block(&mut output, |output| {
for table in tables {
for &table in &tables {
let schema = &table.schema;
let name = &schema.table_name;
let csharp_name = name.as_ref().to_case(Case::Pascal);
Expand Down Expand Up @@ -760,9 +766,7 @@ pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str)
for reducer_name in &reducer_names {
writeln!(output, "{reducer_name} {reducer_name},");
}
writeln!(output, "Unit StdbNone,");
writeln!(output, "Unit StdbIdentityConnected,");
writeln!(output, "Unit StdbIdentityDisconnected");
writeln!(output, "Unit StdbNone");
}
writeln!(output, ")>;");

Expand All @@ -782,43 +786,33 @@ pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str)
writeln!(output, "Reducers = new(this, this.SetReducerFlags);");
writeln!(output);

for item in items {
if let GenItem::Table(table) = item {
writeln!(
output,
"clientDB.AddTable<{table_type}>(\"{table_name}\", Db.{csharp_table_name});",
table_type = csharp_typename(ctx, table.data),
table_name = table.schema.table_name,
csharp_table_name = table.schema.table_name.as_ref().to_case(Case::Pascal)
);
}
for (table_idx, &table) in tables.iter().enumerate() {
writeln!(
output,
"clientDB.AddTable<{table_type}>({table_idx}, Db.{csharp_table_name});",
table_type = csharp_typename(ctx, table.data),
csharp_table_name = table.schema.table_name.as_ref().to_case(Case::Pascal)
);
}
});
writeln!(output);

writeln!(output, "protected override Reducer ToReducer(TransactionUpdate update)");
writeln!(
output,
"protected override Reducer ToReducer(uint reducerIdx, TransactionUpdate update)"
);
indented_block(output, |output| {
writeln!(output, "var encodedArgs = update.ReducerCall.Args;");
writeln!(output, "return update.ReducerCall.ReducerName switch {{");
writeln!(output, "return reducerIdx switch {{");
{
indent_scope!(output);
for (reducer, reducer_name) in std::iter::zip(&reducers, &reducer_names) {
let reducer_str_name = &reducer.name;
for (reducer_idx, reducer_name) in reducer_names.iter().enumerate() {
writeln!(
output,
"\"{reducer_str_name}\" => new Reducer.{reducer_name}(BSATNHelpers.Decode<{reducer_name}>(encodedArgs)),"
"{reducer_idx} => new Reducer.{reducer_name}(BSATNHelpers.Decode<{reducer_name}>(encodedArgs)),"
);
}
writeln!(output, "\"<none>\" => new Reducer.StdbNone(default),");
writeln!(
output,
"\"__identity_connected__\" => new Reducer.StdbIdentityConnected(default),"
);
writeln!(
output,
"\"__identity_disconnected__\" => new Reducer.StdbIdentityDisconnected(default),"
);
writeln!(output, "\"\" => new Reducer.StdbNone(default),"); //Transaction from CLI command
writeln!(output, "4294967295 => new Reducer.StdbNone(default),");
writeln!(
output,
r#"var reducer => throw new ArgumentOutOfRangeException("Reducer", $"Unknown reducer {{reducer}}")"#
Expand Down Expand Up @@ -850,9 +844,7 @@ pub fn autogen_csharp_globals(ctx: &GenCtx, items: &[GenItem], namespace: &str)
"Reducer.{reducer_name}(var args) => Reducers.Invoke{reducer_name}(eventContext, args),"
);
}
writeln!(output, "Reducer.StdbNone or");
writeln!(output, "Reducer.StdbIdentityConnected or");
writeln!(output, "Reducer.StdbIdentityDisconnected => true,");
writeln!(output, "Reducer.StdbNone => true,");
writeln!(
output,
r#"_ => throw new ArgumentOutOfRangeException("Reducer", $"Unknown reducer {{reducer}}")"#
Expand Down
35 changes: 21 additions & 14 deletions crates/cli/src/subcommands/generate/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use spacetimedb_schema::schema::{Schema, TableSchema};
use std::fs;
use std::ops::Deref;
use std::path::{Path, PathBuf};
use util::{iter_reducers, iter_tables};
use wasmtime::{Caller, StoreContextMut};

use crate::util::y_or_n;
Expand Down Expand Up @@ -266,15 +267,14 @@ pub fn generate(module: RawModuleDef, lang: Language, namespace: &str) -> anyhow
let items = itertools::chain!(
types,
tables.into_iter().map(GenItem::Table),
reducers
.filter(|r| !(r.name.starts_with("__") && r.name.ends_with("__")))
.map(GenItem::Reducer),
reducers.map(GenItem::Reducer),
);

let items: Vec<GenItem> = items.collect();
let reducer_idx = &mut 0;
let mut files: Vec<(String, String)> = items
.iter()
.filter_map(|item| item.generate(&ctx, lang, namespace))
.filter_map(|item| item.generate(&ctx, reducer_idx, lang, namespace))
.collect();
files.extend(generate_globals(&ctx, lang, namespace, &items));
files
Expand All @@ -284,10 +284,10 @@ pub fn generate(module: RawModuleDef, lang: Language, namespace: &str) -> anyhow

fn generate_lang(module: &ModuleDef, lang: impl Lang, namespace: &str) -> Vec<(String, String)> {
itertools::chain!(
module.tables().map(|tbl| {
iter_tables(module).enumerate().map(|(idx, tbl)| {
(
lang.table_filename(module, tbl),
lang.generate_table(module, namespace, tbl),
lang.generate_table(idx as u32, module, namespace, tbl),
)
}),
module.types().map(|typ| {
Expand All @@ -296,10 +296,10 @@ fn generate_lang(module: &ModuleDef, lang: impl Lang, namespace: &str) -> Vec<(S
lang.generate_type(module, namespace, typ),
)
}),
module.reducers().map(|reducer| {
iter_reducers(module).enumerate().map(|(idx, reducer)| {
(
lang.reducer_filename(&reducer.name),
lang.generate_reducer(module, namespace, reducer),
lang.generate_reducer(idx as u32, module, namespace, reducer),
)
}),
lang.generate_globals(module, namespace),
Expand All @@ -312,9 +312,9 @@ trait Lang {
fn type_filename(&self, type_name: &ScopedTypeName) -> String;
fn reducer_filename(&self, reducer_name: &Identifier) -> String;

fn generate_table(&self, module: &ModuleDef, namespace: &str, tbl: &TableDef) -> String;
fn generate_table(&self, idx: u32, module: &ModuleDef, namespace: &str, tbl: &TableDef) -> String;
fn generate_type(&self, module: &ModuleDef, namespace: &str, typ: &TypeDef) -> String;
fn generate_reducer(&self, module: &ModuleDef, namespace: &str, reducer: &ReducerDef) -> String;
fn generate_reducer(&self, idx: u32, module: &ModuleDef, namespace: &str, reducer: &ReducerDef) -> String;
Comment on lines -315 to +317
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change to add indices to the client codegen? I believe, but would like it confirmed, that these are not table IDs which could be sent over a WebSocket; rather, they appear to be an unrelated optimization to the SDK internals.

If that is the case, I would strongly prefer this be broken out into a separate PR, and frankly I would see very little reason to approve such a PR without measurements showing that HashMap lookups of table and reducer name strings are a significant performance overhead in the SDKs. I would also question why we'+ prefer to introduce a new ID to this interface, rather than a change purely internal to the codegen, possibly involving either generating these indices within the codegen, or possibly involving a perfect hash function.

If that is not the case, and this change is necessary or useful towards the actual goal of this PR, namely transmitting integer IDs rather than name strings over the WebSocket API, then I would like to see documentation on this trait's methods describing what these indices mean, where they come from and how they're used. I would also like the PR description amended to describe this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why this change to add indices to the client codegen? I believe, but would like it confirmed, that these are not table IDs which could be sent over a WebSocket; rather, they appear to be an unrelated optimization to the SDK internals.

Indeed, these are indices into the 2 arrays, one for reducer names and one for reducer ids and the same for table names <-> table ids. This seemed like the most efficient way to represent things, avoiding hash maps in many cases in favor of just Vec<_>s.

If that is the case, I would strongly prefer this be broken out into a separate PR, [...]

Sure, I can do that, but then it will likely miss the train...

[...] and frankly I would see very little reason to approve such a PR without measurements showing that HashMap lookups of table and reducer name strings are a significant performance overhead in the SDKs. I would also question why we'+ prefer to introduce a new ID to this interface, rather than a change purely internal to the codegen, possibly involving either generating these indices within the codegen, or possibly involving a perfect hash function.

Why it would need to be a significant perf overhead. It seems clear to me that it is a perf win and more predictable to identity hash an u32 and using it as an index into a vector, rather than hashing strings. I don't understand what you mean regarding a change internal to the codegen. Whatever we do, we have to keep mappings K -> ID and sometimes ID -> K in the incoming-message-loop and DbContextImpl, as the values of ID are determined at handshake. K could then be &'static str or an index into the list of reducers/tables. This PR opted for the latter for efficiency reasons.

If that is not the case, and this change is necessary or useful towards the actual goal of this PR, namely transmitting integer IDs rather than name strings over the WebSocket API, then I would like to see documentation on this trait's methods describing what these indices mean, where they come from and how they're used. I would also like the PR description amended to describe this change.

Sounds good, I will definitely add that documentation/amend the PR description.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not dispute that this is obviously better for runtime performance, but there are non-technical costs to making changes like this. For example:

  • I have a stack of outstanding PRs, ending in an actual P1 ticket with actual performance and usability implications for the SDK, which will significantly conflict with this change.
  • Reviewing this PR was made artificially and unnecessarily more difficult because it included this change, and I had to sort through which parts of the diff related to the stated goal of the PR versus which parts related to this change.
  • Introducing a third identifier for database objects, in addition to the string names and the runtime IDs, adds significant potential for confusion. It's already not great that tables have both names and IDs, with different semantics, but at least the names are of an obviously distinct type from the IDs, so there's less risk of confusing the two. I believe there to be a very real cost in code complexity and maintenance to adding another integer identifier for database objects which is not interchangeable with IDs.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is the additional cost that making a larger change may introduce more bugs. As the person who has to deal with all the bugs and their downstream ramifications, I can tell you that the cost is very very high. We just dealt with it most recently with the change to u256 for Identity. The nature of these things is that it always feels like it will probably be fine, but it rarely is and then we pay the price downstream.

I agree with @gefjon, please separate the PR. I don't think this one can go in, but we'll get it in eventually. Either that or we can just have this be a different version of the API and maintain both and eventually deprecate the one Lightfox is using currently.

fn generate_globals(&self, module: &ModuleDef, namespace: &str) -> Vec<(String, String)>;
}

Expand All @@ -339,15 +339,21 @@ fn generate_globals(ctx: &GenCtx, lang: Language, namespace: &str, items: &[GenI
}

impl GenItem {
fn generate(&self, ctx: &GenCtx, lang: Language, namespace: &str) -> Option<(String, String)> {
fn generate(
&self,
ctx: &GenCtx,
reducer_idx: &mut usize,
lang: Language,
namespace: &str,
) -> Option<(String, String)> {
match lang {
Language::Csharp => self.generate_csharp(ctx, namespace),
Language::Csharp => self.generate_csharp(ctx, reducer_idx, namespace),
Language::TypeScript => unreachable!(),
Language::Rust => unreachable!(),
}
}

fn generate_csharp(&self, ctx: &GenCtx, namespace: &str) -> Option<(String, String)> {
fn generate_csharp(&self, ctx: &GenCtx, reducer_idx: &mut usize, namespace: &str) -> Option<(String, String)> {
match self {
GenItem::Table(table) => {
let code = csharp::autogen_csharp_table(ctx, table, namespace);
Expand All @@ -366,7 +372,8 @@ impl GenItem {
_ => todo!(),
},
GenItem::Reducer(reducer) => {
let code = csharp::autogen_csharp_reducer(ctx, reducer, namespace);
let code = csharp::autogen_csharp_reducer(ctx, *reducer_idx, reducer, namespace);
*reducer_idx += 1;
Centril marked this conversation as resolved.
Show resolved Hide resolved
let pascalcase = reducer.name.deref().to_case(Case::Pascal);
Some((pascalcase + "Reducer.cs", code))
}
Expand Down
Loading
Loading