From 16f7c3f39d473df96292ed262b4294d2e3bb7a41 Mon Sep 17 00:00:00 2001 From: Griffin Berlstein Date: Tue, 14 May 2024 14:36:49 -0400 Subject: [PATCH] [Cider 2] Memory loading & dumps (#2041) * partial checkpoint * partial checkpoint * external tool checkpoint * reorder the main construction for cider * minor tweaks * update the main method to not error out * simple tidying * another partial checkpoint * Update the construction of the main method and serialize the memory data dumps * make it possible to load in values to memory --- Cargo.lock | 23 +- Cargo.toml | 5 +- interp/src/errors.rs | 5 + interp/src/flatten/flat_ir/cell_prototype.rs | 190 +++++++++++----- interp/src/flatten/flat_ir/component.rs | 2 +- interp/src/flatten/flat_ir/mod.rs | 2 + interp/src/flatten/mod.rs | 16 +- interp/src/flatten/primitives/builder.rs | 139 +++--------- interp/src/flatten/primitives/prim_trait.rs | 4 + .../flatten/primitives/stateful/memories.rs | 8 + interp/src/flatten/structures/context.rs | 4 + .../src/flatten/structures/environment/env.rs | 75 ++++-- interp/src/flatten/structures/indexed_map.rs | 2 +- interp/src/main.rs | 120 +++++----- interp/src/serialization/data_dump.rs | 175 +++++++++++--- interp/src/serialization/old.rs | 39 +++- interp/src/structures/values.rs | 2 +- tools/cider-data-converter/Cargo.toml | 20 ++ tools/cider-data-converter/src/converter.rs | 200 ++++++++++++++++ tools/cider-data-converter/src/json_data.rs | 213 ++++++++++++++++++ tools/cider-data-converter/src/lib.rs | 2 + tools/cider-data-converter/src/main.rs | 131 +++++++++++ 22 files changed, 1091 insertions(+), 286 deletions(-) create mode 100644 tools/cider-data-converter/Cargo.toml create mode 100644 tools/cider-data-converter/src/converter.rs create mode 100644 tools/cider-data-converter/src/json_data.rs create mode 100644 tools/cider-data-converter/src/lib.rs create mode 100644 tools/cider-data-converter/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index 7bbf496ac1..271275bfea 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -562,6 +562,19 @@ dependencies = [ "thiserror", ] +[[package]] +name = "cider-data-converter" +version = "0.1.0" +dependencies = [ + "argh", + "interp", + "itertools 0.11.0", + "proptest", + "serde", + "serde_json", + "thiserror", +] + [[package]] name = "clang-sys" version = "1.7.0" @@ -1512,7 +1525,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.48.5", + "windows-targets 0.52.4", ] [[package]] @@ -2624,18 +2637,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "f0126ad08bff79f29fc3ae6a55cc72352056dfff61e3ff8bb7129476d44b23aa" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "d1cd413b5d558b4c5bf3680e324a6fa5014e7b7c067a51e69dbdf47eb7148b66" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index da58e677ae..fe30f5cb16 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,9 +13,10 @@ members = [ "tools/data_gen", "cider-dap", "fud2", - "fud2/fud-core", + "fud2/fud-core", "data-conversion", - "tools/btor2/btor2i" + "tools/btor2/btor2i", + "tools/cider-data-converter", ] exclude = ["site"] diff --git a/interp/src/errors.rs b/interp/src/errors.rs index 7e2f8a051d..27e443b80b 100644 --- a/interp/src/errors.rs +++ b/interp/src/errors.rs @@ -180,6 +180,11 @@ pub enum InterpreterError { // TODO Griffin: Make this more descriptive #[error("Attempted to read an undefined memory address")] UndefinedReadAddr, + + #[error(transparent)] + SerializationError( + #[from] crate::serialization::data_dump::SerializationError, + ), } impl InterpreterError { diff --git a/interp/src/flatten/flat_ir/cell_prototype.rs b/interp/src/flatten/flat_ir/cell_prototype.rs index b51a63c428..b62b5ceaa6 100644 --- a/interp/src/flatten/flat_ir/cell_prototype.rs +++ b/interp/src/flatten/flat_ir/cell_prototype.rs @@ -1,7 +1,9 @@ use calyx_ir::{self as cir}; use smallvec::SmallVec; -use crate::primitives::prim_utils::get_params; +use crate::{ + primitives::prim_utils::get_params, serialization::data_dump::Dimensions, +}; use super::prelude::ComponentIdx; @@ -82,6 +84,101 @@ pub enum MemType { Std, } +#[derive(Debug, Clone)] +pub enum MemoryDimensions { + D1 { + d0_size: Width, + d0_idx_size: Width, + }, + D2 { + d0_size: Width, + d0_idx_size: Width, + d1_size: Width, + d1_idx_size: Width, + }, + D3 { + d0_size: Width, + d0_idx_size: Width, + d1_size: Width, + d1_idx_size: Width, + d2_size: Width, + d2_idx_size: Width, + }, + D4 { + d0_size: Width, + d0_idx_size: Width, + d1_size: Width, + d1_idx_size: Width, + d2_size: Width, + d2_idx_size: Width, + d3_size: Width, + d3_idx_size: Width, + }, +} + +impl MemoryDimensions { + pub fn size(&self) -> usize { + match self { + Self::D1 { d0_size, .. } => *d0_size as usize, + Self::D2 { + d0_size, d1_size, .. + } => *d0_size as usize * *d1_size as usize, + Self::D3 { + d0_size, + d1_size, + d2_size, + .. + } => *d0_size as usize * *d1_size as usize * *d2_size as usize, + Self::D4 { + d0_size, + d1_size, + d2_size, + d3_size, + .. + } => { + *d0_size as usize + * *d1_size as usize + * *d2_size as usize + * *d3_size as usize + } + } + } + + /// Returns a Dimensions object + pub fn as_serializing_dim(&self) -> Dimensions { + match self { + MemoryDimensions::D1 { d0_size, .. } => { + Dimensions::D1(*d0_size as usize) + } + MemoryDimensions::D2 { + d0_size, d1_size, .. + } => Dimensions::D2(*d0_size as usize, *d1_size as usize), + MemoryDimensions::D3 { + d0_size, + d1_size, + d2_size, + .. + } => Dimensions::D3( + *d0_size as usize, + *d1_size as usize, + *d2_size as usize, + ), + MemoryDimensions::D4 { + d0_size, + d1_size, + d2_size, + d3_size, + .. + } => Dimensions::D4( + *d0_size as usize, + *d1_size as usize, + *d2_size as usize, + *d3_size as usize, + ), + } + } +} + /// A type alias to allow potential space hacks pub type Width = u32; @@ -118,41 +215,10 @@ pub enum CellPrototype { out: Width, }, // Memories - MemD1 { - mem_type: MemType, - width: Width, - size: Width, - idx_size: Width, - }, - MemD2 { + Memory { mem_type: MemType, width: Width, - d0_size: Width, - d1_size: Width, - d0_idx_size: Width, - d1_idx_size: Width, - }, - MemD3 { - mem_type: MemType, - width: Width, - d0_size: Width, - d1_size: Width, - d2_size: Width, - d0_idx_size: Width, - d1_idx_size: Width, - d2_idx_size: Width, - }, - MemD4 { - mem_type: MemType, - width: Width, - d0_size: Width, - d1_size: Width, - d2_size: Width, - d3_size: Width, - d0_idx_size: Width, - d1_idx_size: Width, - d2_idx_size: Width, - d3_idx_size: Width, + dims: MemoryDimensions, }, // TODO Griffin: lots more @@ -463,15 +529,17 @@ impl CellPrototype { size: "SIZE", idx_size: "IDX_SIZE" ]; - Self::MemD1 { + Self::Memory { mem_type: if n == "comb_mem_d1" { MemType::Std } else { MemType::Seq }, width: width.try_into().unwrap(), - size: size.try_into().unwrap(), - idx_size: idx_size.try_into().unwrap(), + dims: MemoryDimensions::D1 { + d0_size: size.try_into().unwrap(), + d0_idx_size: idx_size.try_into().unwrap(), + }, } } n @ ("comb_mem_d2" | "seq_mem_d2") => { @@ -482,17 +550,19 @@ impl CellPrototype { d0_idx_size: "D0_IDX_SIZE", d1_idx_size: "D1_IDX_SIZE" ]; - Self::MemD2 { + Self::Memory { mem_type: if n == "comb_mem_d2" { MemType::Std } else { MemType::Seq }, width: width.try_into().unwrap(), - d0_size: d0_size.try_into().unwrap(), - d1_size: d1_size.try_into().unwrap(), - d0_idx_size: d0_idx_size.try_into().unwrap(), - d1_idx_size: d1_idx_size.try_into().unwrap(), + dims: MemoryDimensions::D2 { + d0_size: d0_size.try_into().unwrap(), + d1_size: d1_size.try_into().unwrap(), + d0_idx_size: d0_idx_size.try_into().unwrap(), + d1_idx_size: d1_idx_size.try_into().unwrap(), + }, } } n @ ("comb_mem_d3" | "seq_mem_d3") => { @@ -505,19 +575,21 @@ impl CellPrototype { d1_idx_size: "D1_IDX_SIZE", d2_idx_size: "D2_IDX_SIZE" ]; - Self::MemD3 { + Self::Memory { mem_type: if n == "comb_mem_d3" { MemType::Std } else { MemType::Seq }, width: width.try_into().unwrap(), - d0_size: d0_size.try_into().unwrap(), - d1_size: d1_size.try_into().unwrap(), - d2_size: d2_size.try_into().unwrap(), - d0_idx_size: d0_idx_size.try_into().unwrap(), - d1_idx_size: d1_idx_size.try_into().unwrap(), - d2_idx_size: d2_idx_size.try_into().unwrap(), + dims: MemoryDimensions::D3 { + d0_size: d0_size.try_into().unwrap(), + d1_size: d1_size.try_into().unwrap(), + d2_size: d2_size.try_into().unwrap(), + d0_idx_size: d0_idx_size.try_into().unwrap(), + d1_idx_size: d1_idx_size.try_into().unwrap(), + d2_idx_size: d2_idx_size.try_into().unwrap(), + }, } } n @ ("comb_mem_d4" | "seq_mem_d4") => { @@ -533,21 +605,23 @@ impl CellPrototype { d3_idx_size: "D3_IDX_SIZE" ]; - Self::MemD4 { + Self::Memory { mem_type: if n == "comb_mem_d4" { MemType::Std } else { MemType::Seq }, width: width.try_into().unwrap(), - d0_size: d0_size.try_into().unwrap(), - d1_size: d1_size.try_into().unwrap(), - d2_size: d2_size.try_into().unwrap(), - d3_size: d3_size.try_into().unwrap(), - d0_idx_size: d0_idx_size.try_into().unwrap(), - d1_idx_size: d1_idx_size.try_into().unwrap(), - d2_idx_size: d2_idx_size.try_into().unwrap(), - d3_idx_size: d3_idx_size.try_into().unwrap(), + dims: MemoryDimensions::D4 { + d0_size: d0_size.try_into().unwrap(), + d1_size: d1_size.try_into().unwrap(), + d2_size: d2_size.try_into().unwrap(), + d3_size: d3_size.try_into().unwrap(), + d0_idx_size: d0_idx_size.try_into().unwrap(), + d1_idx_size: d1_idx_size.try_into().unwrap(), + d2_idx_size: d2_idx_size.try_into().unwrap(), + d3_idx_size: d3_idx_size.try_into().unwrap(), + }, } } n @ ("std_unsyn_mult" | "std_unsyn_div" | "std_unsyn_smult" diff --git a/interp/src/flatten/flat_ir/component.rs b/interp/src/flatten/flat_ir/component.rs index f112fdee97..4e703d4d8f 100644 --- a/interp/src/flatten/flat_ir/component.rs +++ b/interp/src/flatten/flat_ir/component.rs @@ -75,7 +75,7 @@ pub struct AuxillaryComponentInfo { pub signature: IndexRange, /// the definitions created by this component pub definitions: DefinitionRanges, - // ------------------- + pub port_offset_map: SparseMap, pub ref_port_offset_map: SparseMap, diff --git a/interp/src/flatten/flat_ir/mod.rs b/interp/src/flatten/flat_ir/mod.rs index 7b22863bd3..e3b3b5a95a 100644 --- a/interp/src/flatten/flat_ir/mod.rs +++ b/interp/src/flatten/flat_ir/mod.rs @@ -14,3 +14,5 @@ pub(crate) mod prelude { pub use super::identifier::Identifier; pub use super::wires::core::*; } + +pub use control::translator::translate; diff --git a/interp/src/flatten/mod.rs b/interp/src/flatten/mod.rs index 9f209cbdeb..74f1fb1f2c 100644 --- a/interp/src/flatten/mod.rs +++ b/interp/src/flatten/mod.rs @@ -1,16 +1,4 @@ -pub(crate) mod flat_ir; +pub mod flat_ir; pub mod primitives; -pub(crate) mod structures; +pub mod structures; pub(crate) mod text_utils; - -use structures::environment::{Environment, Simulator}; - -pub fn flat_main(ctx: &calyx_ir::Context) { - let i_ctx = flat_ir::control::translator::translate(ctx); - - i_ctx.printer().print_program(); - - let env = Environment::new(&i_ctx); - let mut sim = Simulator::new(env); - sim._main_test() -} diff --git a/interp/src/flatten/primitives/builder.rs b/interp/src/flatten/primitives/builder.rs index bb8ae418fc..976288a422 100644 --- a/interp/src/flatten/primitives/builder.rs +++ b/interp/src/flatten/primitives/builder.rs @@ -1,17 +1,24 @@ +use super::{ + combinational::*, prim_trait::DummyPrimitive, stateful::*, Primitive, +}; use crate::{ - flatten::flat_ir::{ - cell_prototype::{CellPrototype, MemType, PrimType1}, - prelude::{CellInfo, GlobalPortIdx}, + flatten::{ + flat_ir::{ + cell_prototype::{CellPrototype, MemType, PrimType1}, + prelude::{CellInfo, GlobalPortIdx}, + }, + structures::context::Context, }, + serialization::data_dump::DataDump, values::Value, }; -use super::{combinational::*, Primitive}; -use super::{prim_trait::DummyPrimitive, stateful::*}; - pub fn build_primitive( prim: &CellInfo, base_port: GlobalPortIdx, + // extras for memory initialization + ctx: &Context, + dump: &Option, ) -> Box { match &prim.prototype { CellPrototype::Constant { @@ -20,8 +27,6 @@ pub fn build_primitive( c_type: _, } => { let v = Value::from(*val, *width); - // TODO griffin: see if it is worth putting the initialization back - // env.ports[base_port] = v.clone(); Box::new(StdConst::new(v, base_port)) } @@ -101,104 +106,30 @@ pub fn build_primitive( right: _, out: _, } => Box::new(StdCat::new(base_port)), - CellPrototype::MemD1 { - mem_type, - width, - size, - idx_size: _, - } => match mem_type { - MemType::Seq => Box::new(SeqMemD1::new( - base_port, - *width, - false, - *size as usize, - )), - MemType::Std => Box::new(CombMemD1::new( - base_port, - *width, - false, - *size as usize, - )), - }, - CellPrototype::MemD2 { - mem_type, - width, - d0_size, - d1_size, - d0_idx_size: _, - d1_idx_size: _, - } => match mem_type { - MemType::Seq => Box::new(SeqMemD2::new( - base_port, - *width, - false, - (*d0_size as usize, *d1_size as usize), - )), - MemType::Std => Box::new(CombMemD2::new( - base_port, - *width, - false, - (*d0_size as usize, *d1_size as usize), - )), - }, - CellPrototype::MemD3 { + CellPrototype::Memory { mem_type, width, - d0_size, - d1_size, - d2_size, - d0_idx_size: _, - d1_idx_size: _, - d2_idx_size: _, - } => match mem_type { - MemType::Seq => Box::new(SeqMemD3::new( - base_port, - *width, - false, - (*d0_size as usize, *d1_size as usize, *d2_size as usize), - )), - MemType::Std => Box::new(CombMemD3::new( - base_port, - *width, - false, - (*d0_size as usize, *d1_size as usize, *d2_size as usize), - )), - }, - CellPrototype::MemD4 { - mem_type, - width, - d0_size, - d1_size, - d2_size, - d3_size, - d0_idx_size: _, - d1_idx_size: _, - d2_idx_size: _, - d3_idx_size: _, - } => match mem_type { - MemType::Seq => Box::new(SeqMemD4::new( - base_port, - *width, - false, - ( - *d0_size as usize, - *d1_size as usize, - *d2_size as usize, - *d3_size as usize, - ), - )), - MemType::Std => Box::new(CombMemD4::new( - base_port, - *width, - false, - ( - *d0_size as usize, - *d1_size as usize, - *d2_size as usize, - *d3_size as usize, - ), - )), - }, + dims, + } => { + let data = dump.as_ref().and_then(|data| { + let string = ctx.lookup_string(prim.name); + data.get_data(string) + }); + + match mem_type { + MemType::Seq => Box::new(if let Some(data) = data { + SeqMem::new_with_init(base_port, *width, false, dims, data) + } else { + SeqMemD1::new(base_port, *width, false, dims) + }), + MemType::Std => Box::new(if let Some(data) = data { + CombMem::new_with_init(base_port, *width, false, dims, data) + } else { + CombMem::new(base_port, *width, false, dims) + }), + } + } + CellPrototype::Unknown(_, _) => { todo!() } diff --git a/interp/src/flatten/primitives/prim_trait.rs b/interp/src/flatten/primitives/prim_trait.rs index c6f712a6ae..405d2cc210 100644 --- a/interp/src/flatten/primitives/prim_trait.rs +++ b/interp/src/flatten/primitives/prim_trait.rs @@ -124,6 +124,10 @@ pub trait Primitive { fn has_serializable_state(&self) -> bool { self.serialize(None).has_state() } + + fn dump_memory_state(&self) -> Option> { + None + } } /// An empty primitive implementation used for testing. It does not do anything diff --git a/interp/src/flatten/primitives/stateful/memories.rs b/interp/src/flatten/primitives/stateful/memories.rs index 2828246a33..3d91dab260 100644 --- a/interp/src/flatten/primitives/stateful/memories.rs +++ b/interp/src/flatten/primitives/stateful/memories.rs @@ -386,6 +386,10 @@ impl Primitive for CombMem { fn has_serializable_state(&self) -> bool { true } + + fn dump_memory_state(&self) -> Option> { + Some(self.dump_data()) + } } pub struct SeqMem { @@ -587,6 +591,10 @@ impl Primitive for SeqMem { fn has_serializable_state(&self) -> bool { true } + + fn dump_memory_state(&self) -> Option> { + Some(self.dump_data()) + } } // type aliases, this is kinda stupid and should probably be changed. or maybe // it's fine, I really don't know. diff --git a/interp/src/flatten/structures/context.rs b/interp/src/flatten/structures/context.rs index 14c8e6efef..924bfd6d0a 100644 --- a/interp/src/flatten/structures/context.rs +++ b/interp/src/flatten/structures/context.rs @@ -368,4 +368,8 @@ impl Context { }, } } + + pub fn lookup_string(&self, id: Identifier) -> &String { + self.secondary.string_table.lookup_string(&id).unwrap() + } } diff --git a/interp/src/flatten/structures/environment/env.rs b/interp/src/flatten/structures/environment/env.rs index 4f44cca1cf..b0e5b82e97 100644 --- a/interp/src/flatten/structures/environment/env.rs +++ b/interp/src/flatten/structures/environment/env.rs @@ -12,6 +12,7 @@ use crate::{ errors::{InterpreterError, InterpreterResult}, flatten::{ flat_ir::{ + cell_prototype::CellPrototype, prelude::{ AssignedValue, AssignmentIdx, BaseIndices, ComponentIdx, ControlNode, GlobalCellIdx, GlobalPortIdx, GlobalPortRef, @@ -25,6 +26,7 @@ use crate::{ environment::program_counter::ControlPoint, index_trait::IndexRef, }, }, + serialization::data_dump::DataDump, values::Value, }; use std::fmt::Debug; @@ -200,7 +202,7 @@ pub struct Environment<'a> { } impl<'a> Environment<'a> { - pub fn new(ctx: &'a Context) -> Self { + pub fn new(ctx: &'a Context, data_map: Option) -> Self { let root = ctx.entry_point; let aux = &ctx.secondary[root]; @@ -219,7 +221,7 @@ impl<'a> Environment<'a> { let root_node = CellLedger::new_comp(root, &env); let root = env.cells.push(root_node); - env.layout_component(root); + env.layout_component(root, data_map); env } @@ -232,7 +234,11 @@ impl<'a> Environment<'a> { /// 3. cells + ports, primitive /// 4. sub-components /// 5. ref-cells & ports - fn layout_component(&mut self, comp: GlobalCellIdx) { + fn layout_component( + &mut self, + comp: GlobalCellIdx, + data_map: Option, + ) { let ComponentLedger { index_bases, comp_id, @@ -284,7 +290,9 @@ impl<'a> Environment<'a> { idx ); } - let cell_dyn = primitives::build_primitive(info, port_base); + let cell_dyn = primitives::build_primitive( + info, port_base, self.ctx, &data_map, + ); let cell = self.cells.push(CellLedger::Primitive { cell_dyn }); debug_assert_eq!( @@ -301,7 +309,8 @@ impl<'a> Environment<'a> { cell ); - self.layout_component(cell); + // layout sub-component but don't include the data map + self.layout_component(cell, None); } } @@ -326,13 +335,13 @@ impl<'a> Environment<'a> { // ===================== Environment print implementations ===================== impl<'a> Environment<'a> { - pub fn print_env(&self) { + pub fn _print_env(&self) { let root_idx = GlobalCellIdx::new(0); let mut hierarchy = Vec::new(); - self.print_component(root_idx, &mut hierarchy) + self._print_component(root_idx, &mut hierarchy) } - fn print_component( + fn _print_component( &self, target: GlobalCellIdx, hierarchy: &mut Vec, @@ -387,7 +396,7 @@ impl<'a> Environment<'a> { let cell_idx = &info.index_bases + cell_off; if definition.prototype.is_component() { - self.print_component(cell_idx, hierarchy); + self._print_component(cell_idx, hierarchy); } else if self.cells[cell_idx] .as_primitive() .unwrap() @@ -417,7 +426,7 @@ impl<'a> Environment<'a> { println!(" Ref Ports: {}", self.ref_ports.len()); } - pub fn print_pc(&self) { + pub fn _print_pc(&self) { println!("{:?}", self.pc) } } @@ -434,13 +443,18 @@ impl<'a> Simulator<'a> { Self { env } } - pub fn print_env(&self) { - self.env.print_env() + pub fn _print_env(&self) { + self.env._print_env() } + #[inline] pub fn ctx(&self) -> &Context { self.env.ctx } + + pub fn _unpack_env(self) -> Environment<'a> { + self.env + } } // =========================== simulation functions =========================== @@ -889,9 +903,38 @@ impl<'a> Simulator<'a> { Ok(()) } - pub fn _main_test(&mut self) { - self.env.print_pc(); - let _ = self.run_program(); - self.print_env(); + /// Dump the current state of the environment as a DataDump + pub fn dump_memories(&self) -> DataDump { + let ctx = self.ctx(); + let entrypoint_secondary = &ctx.secondary[ctx.entry_point]; + + let mut dump = DataDump::new_empty_with_top_level( + ctx.secondary[entrypoint_secondary.name].clone(), + ); + + let root = self.env.cells.first().unwrap().as_comp().unwrap(); + + for (offset, idx) in entrypoint_secondary.cell_offset_map.iter() { + let cell_info = &ctx.secondary[*idx]; + let cell_index = &root.index_bases + offset; + let name = ctx.secondary[cell_info.name].clone(); + if let CellPrototype::Memory { width, dims, .. } = + &cell_info.prototype + { + dump.push_memory( + name, + *width as usize, + dims.size(), + dims.as_serializing_dim(), + self.env.cells[cell_index] + .as_primitive() + .unwrap() + .dump_memory_state() + .unwrap(), + ) + } + } + + dump } } diff --git a/interp/src/flatten/structures/indexed_map.rs b/interp/src/flatten/structures/indexed_map.rs index 78abde87a3..9a66a90319 100644 --- a/interp/src/flatten/structures/indexed_map.rs +++ b/interp/src/flatten/structures/indexed_map.rs @@ -30,7 +30,7 @@ where K: IndexRef, { /// Special case for empty tuple to enable a key generator. - pub fn next(&mut self) -> K { + pub fn next_key(&mut self) -> K { self.push(()) } } diff --git a/interp/src/main.rs b/interp/src/main.rs index c76857af74..c19ae5b322 100644 --- a/interp/src/main.rs +++ b/interp/src/main.rs @@ -8,12 +8,15 @@ use interp::{ debugger::{source::SourceMap, Debugger}, environment::InterpreterState, errors::{InterpreterError, InterpreterResult}, + flatten::structures::environment::{Environment, Simulator}, interpreter::ComponentInterpreter, interpreter_ir as iir, + serialization::data_dump::DataDump, }; use rustyline::error::ReadlineError; use slog::warn; use std::{ + io::stdout, path::{Path, PathBuf}, rc::Rc, }; @@ -22,7 +25,7 @@ use std::{ /// The Calyx Interpreter pub struct Opts { /// input file - #[argh(positional, from_str_fn(read_path))] + #[argh(positional)] pub file: Option, /// output file, default is stdout @@ -40,7 +43,7 @@ pub struct Opts { /// path to optional datafile used to initialze memories. If it is not /// provided memories will be initialzed with zeros - #[argh(option, long = "data", short = 'd', from_str_fn(read_path))] + #[argh(option, long = "data", short = 'd')] pub data_file: Option, #[argh(switch, long = "no-verify")] @@ -71,9 +74,6 @@ pub struct Opts { comm: Option, } -fn read_path(path: &str) -> Result { - Ok(Path::new(path).into()) -} #[derive(FromArgs)] #[argh(subcommand)] enum Command { @@ -126,9 +126,7 @@ fn print_res( fn main() -> InterpreterResult<()> { let opts: Opts = argh::from_env(); - let builder = configuration::ConfigBuilder::new(); - - let config = builder + let config = configuration::ConfigBuilder::new() .quiet(opts.quiet) .allow_invalid_memory_access(opts.allow_invalid_memory_access) .error_on_overflow(opts.error_on_overflow) @@ -154,57 +152,69 @@ fn main() -> InterpreterResult<()> { let command = opts.comm.unwrap_or(Command::Interpret(CommandInterpret {})); - // up here temporarily - if let Command::Flat(_) = &command { - // this is stupid but will work for testing purposes. This should be - // fixed later - interp::flatten::flat_main(&ctx); - todo!("The flat interpreter cannot yet interpret programs") - } - - let entry_point = ctx.entrypoint; - - let metadata = ctx.metadata; + match &command { + comm @ (Command::Interpret(_) | Command::Debug(_)) => { + let entry_point = ctx.entrypoint; + let metadata = ctx.metadata; + + let components: iir::ComponentCtx = Rc::new( + ctx.components + .into_iter() + .map(|x| Rc::new(x.into())) + .collect(), + ); + + let main_component = components + .iter() + .find(|&cm| cm.name == entry_point) + .ok_or(InterpreterError::MissingMainComponent)?; + + let mut mems = interp::MemoryMap::inflate_map(&opts.data_file)?; + + let env = InterpreterState::init_top_level( + &components, + main_component, + &mut mems, + &config, + )?; + + let res = if matches!(comm, Command::Interpret(_)) { + ComponentInterpreter::interpret_program(env, main_component) + } else { + let map = if let Some(map_res) = + metadata.map(SourceMap::from_string) + { + Some(map_res?) + } else { + None + }; + + let cidb = + Debugger::new(&components, main_component, map, env)?; + cidb.main_loop() + }; - let components: iir::ComponentCtx = Rc::new( - ctx.components - .into_iter() - .map(|x| Rc::new(x.into())) - .collect(), - ); + print_res(res, opts.raw) + } + Command::Flat(_) => { + let i_ctx = interp::flatten::flat_ir::translate(&ctx); + let data_dump = opts + .data_file + .map(|path| { + let mut file = std::fs::File::open(path)?; + DataDump::deserialize(&mut file) + }) + // flip to a result of an option + .map_or(Ok(None), |res| res.map(Some))?; - let main_component = components - .iter() - .find(|&cm| cm.name == entry_point) - .ok_or(InterpreterError::MissingMainComponent)?; + let mut sim = Simulator::new(Environment::new(&i_ctx, data_dump)); - let mut mems = interp::MemoryMap::inflate_map(&opts.data_file)?; + sim.run_program()?; - let env = InterpreterState::init_top_level( - &components, - main_component, - &mut mems, - &config, - )?; + let output = sim.dump_memories(); - let res = match &command { - Command::Interpret(_) => { - ComponentInterpreter::interpret_program(env, main_component) - } - Command::Debug(CommandDebug {}) => { - let map = metadata.map(SourceMap::from_string); - let map = if let Some(map_res) = map { - Some(map_res?) - } else { - None - }; - let cidb = Debugger::new(&components, main_component, map, env)?; - cidb.main_loop() - } - Command::Flat(_) => { - todo!("The flat interpreter cannot yet interpret programs") + output.serialize(&mut stdout())?; + Ok(()) } - }; - - print_res(res, opts.raw) + } } diff --git a/interp/src/serialization/data_dump.rs b/interp/src/serialization/data_dump.rs index 0256233e5f..d05667c0bd 100644 --- a/interp/src/serialization/data_dump.rs +++ b/interp/src/serialization/data_dump.rs @@ -1,20 +1,75 @@ use std::num::NonZeroUsize; use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Serialize, Debug, Deserialize, PartialEq, Clone)] +pub enum Dimensions { + D1(usize), + D2(usize, usize), + D3(usize, usize, usize), + D4(usize, usize, usize, usize), +} + +impl Dimensions { + pub fn size(&self) -> usize { + match self { + Dimensions::D1(d0) => *d0, + Dimensions::D2(d0, d1) => d0 * d1, + Dimensions::D3(d0, d1, d2) => d0 * d1 * d2, + Dimensions::D4(d0, d1, d2, d3) => d0 * d1 * d2 * d3, + } + } +} + +impl From for Dimensions { + fn from(v: usize) -> Self { + Self::D1(v) + } +} + +impl From<(usize, usize)> for Dimensions { + fn from(v: (usize, usize)) -> Self { + Self::D2(v.0, v.1) + } +} + +impl From<(usize, usize, usize)> for Dimensions { + fn from(v: (usize, usize, usize)) -> Self { + Self::D3(v.0, v.1, v.2) + } +} + +impl From<(usize, usize, usize, usize)> for Dimensions { + fn from(v: (usize, usize, usize, usize)) -> Self { + Self::D4(v.0, v.1, v.2, v.3) + } +} #[derive(Serialize, Debug, Deserialize, PartialEq, Clone)] pub struct MemoryDeclaration { pub name: String, pub width: NonZeroUsize, pub size: NonZeroUsize, + pub dimensions: Dimensions, } impl MemoryDeclaration { - pub fn new(name: String, width: usize, size: usize) -> Self { + pub fn new( + name: String, + width: usize, + size: usize, + dimensions: Dimensions, + ) -> Self { + assert!( + size == dimensions.size(), + "mismatch between stated size and the dimensions" + ); Self { name, width: NonZeroUsize::new(width).expect("width must be non-zero"), size: NonZeroUsize::new(size).expect("size must be non-zero"), + dimensions, } } @@ -51,67 +106,132 @@ pub struct DataDump { } impl DataDump { + /// returns an empty data dump with a top level name + pub fn new_empty_with_top_level(top_level: String) -> Self { + Self { + header: DataHeader { + top_level, + memories: vec![], + }, + data: vec![], + } + } + + /// returns an empty data dump + pub fn new_empty() -> Self { + Self::new_empty_with_top_level("".to_string()) + } + + /// pushes a new memory into the data dump. This does not do any fancy + /// conversion so the data must already be configured into a byte iterator. + pub fn push_memory>( + &mut self, + name: String, + width: usize, + size: usize, + dimensions: Dimensions, + data: T, + ) { + let declaration = MemoryDeclaration::new(name, width, size, dimensions); + self.header.memories.push(declaration); + self.data.extend(data); + } + // TODO Griffin: handle the errors properly - pub fn serialize(&self, writer: &mut dyn std::io::Write) { + pub fn serialize( + &self, + writer: &mut dyn std::io::Write, + ) -> std::io::Result<()> { let header_str = serde_json::to_string(&self.header).unwrap(); let len_bytes = header_str.len(); let written = writer.write(&len_bytes.to_le_bytes()).unwrap(); assert_eq!(written, 8); write!(writer, "{}", header_str).unwrap(); - let written = writer.write(&self.data).unwrap(); + let written = writer.write(&self.data)?; assert_eq!(written, self.data.len()); + Ok(()) } - /// TODO Griffin: handle the errors properly - pub fn deserialize(reader: &mut dyn std::io::Read) -> Self { + // TODO Griffin: handle the errors properly + pub fn deserialize( + reader: &mut dyn std::io::Read, + ) -> Result { let mut raw_header_len = [0u8; 8]; reader.read_exact(&mut raw_header_len).unwrap(); let header_len = usize::from_le_bytes(raw_header_len); let mut raw_header = vec![0u8; header_len]; - reader.read_exact(&mut raw_header).unwrap(); - let header_str = String::from_utf8(raw_header).unwrap(); - let header: DataHeader = serde_json::from_str(&header_str).unwrap(); + reader.read_exact(&mut raw_header)?; + let header_str = String::from_utf8(raw_header)?; + let header: DataHeader = serde_json::from_str(&header_str)?; let mut data: Vec = Vec::with_capacity(header.data_size()); // we could do a read_exact here instead but I opted for read_to_end // instead to avoid allowing incorrect/malformed data files - let amount_read = reader.read_to_end(&mut data).unwrap(); + let amount_read = reader.read_to_end(&mut data)?; assert_eq!(amount_read, header.data_size()); - DataDump { header, data } + Ok(DataDump { header, data }) } // TODO Griffin: Replace the panic with a proper error and the standard // handling - pub fn get_data(&self, mem_name: &str) -> &[u8] { + pub fn get_data(&self, mem_name: &str) -> Option<&[u8]> { let mut current_base = 0_usize; for mem in &self.header.memories { if mem.name == mem_name { let end = current_base + mem.byte_count(); - return &self.data[current_base..end]; + return Some(&self.data[current_base..end]); } else { current_base += mem.byte_count(); } } - panic!("Memory not found") + None } } +/// An error struct to handle any errors generated during the deserialization process +#[derive(Debug, Error)] +pub enum SerializationError { + #[error(transparent)] + SerdeError(#[from] serde_json::Error), + + #[error(transparent)] + IoError(#[from] std::io::Error), + + #[error(transparent)] + FromUtf8Error(#[from] std::string::FromUtf8Error), +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_data_dump() { + fn test_data_dump() -> Result<(), SerializationError> { let header = DataHeader { top_level: "test".to_string(), memories: vec![ - MemoryDeclaration::new("mem0".to_string(), 32, 16), // 64 bytes - MemoryDeclaration::new("mem1".to_string(), 4, 17), // 17 bytes - MemoryDeclaration::new("mem2".to_string(), 3, 2), // 2 bytes - // 83 bytes + MemoryDeclaration::new( + "mem0".to_string(), + 32, + 16, + Dimensions::D1(16), + ), // 64 bytes + MemoryDeclaration::new( + "mem1".to_string(), + 4, + 17, + Dimensions::D1(17), + ), // 17 bytes + MemoryDeclaration::new( + "mem2".to_string(), + 3, + 2, + Dimensions::D1(2), + ), // 2 bytes + // 83 bytes ], }; @@ -129,16 +249,17 @@ mod tests { let mut buf = Vec::new(); - dump.serialize(&mut buf); - let reparsed_dump = DataDump::deserialize(&mut buf.as_slice()); + dump.serialize(&mut buf)?; + let reparsed_dump = DataDump::deserialize(&mut buf.as_slice())?; assert_eq!(reparsed_dump, dump); + Ok(()) } use proptest::prelude::*; prop_compose! { fn arb_memory_declaration()(name in any::(), width in 1_usize..=256, size in 1_usize..=500) -> MemoryDeclaration { - MemoryDeclaration::new(name.to_string(), width, size) + MemoryDeclaration::new(name.to_string(), width, size, Dimensions::D1(size)) } } @@ -205,9 +326,9 @@ mod tests { #[test] fn prop_roundtrip(dump in arb_data_dump()) { let mut buf = Vec::new(); - dump.serialize(&mut buf); + dump.serialize(&mut buf)?; - let reparsed_dump = DataDump::deserialize(&mut buf.as_slice()); + let reparsed_dump = DataDump::deserialize(&mut buf.as_slice())?; prop_assert_eq!(dump, reparsed_dump) } @@ -223,18 +344,18 @@ mod tests { #[test] fn comb_roundtrip(dump in arb_data_dump()) { for mem in &dump.header.memories { - let memory_prim = CombMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name))); + let memory_prim = CombMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dump.get_data(&mem.name).unwrap()); let data = memory_prim.dump_data(); - prop_assert_eq!(dump.get_data(&mem.name), data); + prop_assert_eq!(dump.get_data(&mem.name).unwrap(), data); } } #[test] fn seq_roundtrip(dump in arb_data_dump()) { for mem in &dump.header.memories { - let memory_prim = SeqMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dbg!(dump.get_data(&mem.name))); + let memory_prim = SeqMemD1::new_with_init(GlobalPortIdx::new(0), mem.width.get() as u32, false, mem.size.get(), dump.get_data(&mem.name).unwrap()); let data = memory_prim.dump_data(); - prop_assert_eq!(dump.get_data(&mem.name), data); + prop_assert_eq!(dump.get_data(&mem.name).unwrap(), data); } } } diff --git a/interp/src/serialization/old.rs b/interp/src/serialization/old.rs index 6e49278dc3..498d37d478 100644 --- a/interp/src/serialization/old.rs +++ b/interp/src/serialization/old.rs @@ -4,8 +4,8 @@ use serde::Serialize; use std::fmt::{Debug, Display}; use crate::{ - primitives::Primitive, structures::state_views::FullySerialize, - utils::PrintCode, values::Value, + flatten::flat_ir::cell_prototype::MemoryDimensions, primitives::Primitive, + structures::state_views::FullySerialize, utils::PrintCode, values::Value, }; /// An enum wrapping over a tuple representing the shape of a multi-dimensional @@ -71,6 +71,41 @@ impl From<(usize, usize, usize, usize)> for Shape { } } +impl From<&MemoryDimensions> for Shape { + fn from(value: &MemoryDimensions) -> Self { + match value { + MemoryDimensions::D1 { d0_size, .. } => { + Shape::D1(*d0_size as usize) + } + MemoryDimensions::D2 { + d0_size, d1_size, .. + } => Shape::D2(*d0_size as usize, *d1_size as usize), + MemoryDimensions::D3 { + d0_size, + d1_size, + d2_size, + .. + } => Shape::D3( + *d0_size as usize, + *d1_size as usize, + *d2_size as usize, + ), + MemoryDimensions::D4 { + d0_size, + d1_size, + d2_size, + d3_size, + .. + } => Shape::D4( + *d0_size as usize, + *d1_size as usize, + *d2_size as usize, + *d3_size as usize, + ), + } + } +} + /// A wrapper enum used during serialization. It represents either an unsigned integer, /// or a signed integer and is serialized as the underlying integer. This also allows /// mixed serialization of signed and unsigned values diff --git a/interp/src/structures/values.rs b/interp/src/structures/values.rs index 7401aea560..0c046e89c4 100644 --- a/interp/src/structures/values.rs +++ b/interp/src/structures/values.rs @@ -666,7 +666,7 @@ impl Value { assert!(!bytes.is_empty()); assert!(width <= bytes.len() * 8); // TODO griffin: Make this sanity check even mildly comprehensible - let overhead = dbg!(width.div_ceil(8) * 8) - width; + let overhead = (width.div_ceil(8) * 8) - width; assert!( bytes.last().unwrap().leading_zeros() >= overhead as u32, "The upper byte of the provided value has non-zero values in the padding. Given byte is {} but the upper {} bit(s) should be zero", diff --git a/tools/cider-data-converter/Cargo.toml b/tools/cider-data-converter/Cargo.toml new file mode 100644 index 0000000000..e189c34052 --- /dev/null +++ b/tools/cider-data-converter/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "cider-data-converter" +authors.workspace = true +rust-version.workspace = true +edition.workspace = true +version = "0.1.0" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +interp = { path = "../../interp" } +serde = { workspace = true } +serde_json = { workspace = true } +itertools = { workspace = true } +argh = { workspace = true } +thiserror = "1.0.59" + + +[dev-dependencies] +proptest = "1.0.0" diff --git a/tools/cider-data-converter/src/converter.rs b/tools/cider-data-converter/src/converter.rs new file mode 100644 index 0000000000..9e06aa4965 --- /dev/null +++ b/tools/cider-data-converter/src/converter.rs @@ -0,0 +1,200 @@ +use itertools::Itertools; +use std::collections::HashMap; + +use super::json_data::*; +use interp::serialization::data_dump::*; + +pub fn convert_to_data_dump(json: &JsonData) -> DataDump { + let mut data_dump = DataDump::new_empty(); + + for (name, entry) in json.0.iter() { + let width = &entry.format.width; + let data: Box> = match &entry.data { + DataVec::Id1(v1) => Box::new(v1.iter().flat_map(|val| { + // chopping off the upper bits + val.to_le_bytes() + .into_iter() + .take(width.div_ceil(8) as usize) + })), + DataVec::Id2(v1) => Box::new(v1.iter().flat_map(|v2| { + v2.iter().flat_map(|val| { + val.to_le_bytes() + .into_iter() + .take(width.div_ceil(8) as usize) + }) + })), + DataVec::Id3(v1) => Box::new(v1.iter().flat_map(|v2| { + v2.iter().flat_map(|v3| { + v3.iter().flat_map(|val| { + val.to_le_bytes() + .into_iter() + .take(width.div_ceil(8) as usize) + }) + }) + })), + DataVec::Id4(v1) => Box::new(v1.iter().flat_map(|v2| { + v2.iter().flat_map(|v3| { + v3.iter().flat_map(|v4| { + v4.iter().flat_map(|val| { + val.to_le_bytes() + .into_iter() + .take(width.div_ceil(8) as usize) + }) + }) + }) + })), + DataVec::Fd1(_) => todo!("implement fixed-point"), + DataVec::Fd2(_) => todo!("implement fixed-point"), + DataVec::Fd3(_) => todo!("implement fixed-point"), + DataVec::Fd4(_) => todo!("implement fixed-point"), + }; + + data_dump.push_memory( + name.clone(), + *width as usize, + entry.data.size(), + entry.data.dimensions(), + data, + ) + } + + data_dump +} + +fn format_data(dimension: &Dimensions, data: &[u8], width: usize) -> DataVec { + assert!(width.div_ceil(8) <= 8, "cannot fit in u64"); + let u64_stream = data.chunks_exact(width.div_ceil(8)).map(|chunk| { + let mut array = [0u8; 8]; + array[0..chunk.len()].copy_from_slice(chunk); + u64::from_le_bytes(array) + }); + // sanity check + assert!(data.len() % width.div_ceil(8) == 0); + + match dimension { + Dimensions::D1(_) => u64_stream.collect_vec().into(), + Dimensions::D2(_d0, d1) => u64_stream + .chunks(*d1) + .into_iter() + .map(|v| v.collect_vec()) + .collect_vec() + .into(), + Dimensions::D3(_d0, d1, d2) => u64_stream + .chunks(d1 * d2) + .into_iter() + .map(|v1| { + v1.chunks(*d2) + .into_iter() + .map(|v2| v2.collect_vec()) + .collect_vec() + }) + .collect_vec() + .into(), + Dimensions::D4(_d0, d1, d2, d3) => u64_stream + .chunks(d1 * d2 * d3) + .into_iter() + .map(|v1| { + v1.chunks(d2 * d3) + .into_iter() + .map(|v2| { + v2.chunks(*d3) + .into_iter() + .map(|v3| v3.collect_vec()) + .collect_vec() + }) + .collect_vec() + }) + .collect_vec() + .into(), + } +} + +pub fn convert_from_data_dump(dump: &DataDump) -> JsonPrintDump { + let mut map = HashMap::new(); + for declaration in &dump.header.memories { + let data = dump.get_data(&declaration.name).unwrap(); + let formatted_data = format_data( + &declaration.dimensions, + data, + declaration.width.into(), + ); + + map.insert(declaration.name.clone(), formatted_data); + } + + JsonPrintDump(map) +} + +#[cfg(test)] +mod tests { + use super::*; + use proptest::prelude::*; + + prop_compose! { + fn arb_format_info()(width in 1_u64..=64) -> FormatInfo { + FormatInfo { + width, + is_signed: false, + numeric_type: NumericType::Bitnum, + int_width: None, + } + } + } + + fn max_val(width: u64) -> u64 { + assert!(width <= 64); + 2u64.saturating_pow(width.try_into().unwrap()) - 1 + } + + fn dim_generator() -> impl Strategy { + prop_oneof![ + (1_usize..=32).prop_map(Dimensions::D1), + (1_usize..=32, 1_usize..=32) + .prop_map(|(d1, d2)| Dimensions::D2(d1, d2)), + (1_usize..=32, 1_usize..=32, 1_usize..=32) + .prop_map(|(d1, d2, d3)| Dimensions::D3(d1, d2, d3)), + (1_usize..=32, 1_usize..=32, 1_usize..=32, 1_usize..=32) + .prop_map(|(d1, d2, d3, d4)| Dimensions::D4(d1, d2, d3, d4)), + ] + } + + prop_compose! { + fn arb_data(width: u64, dimensions: Dimensions)(data in prop::collection::vec(0u64..max_val(width), dimensions.size())) -> DataVec { + match dimensions { + Dimensions::D1(_) => data.into(), + Dimensions::D2(_d0, d1) => data.into_iter().chunks(d1).into_iter().map(|v| v.collect_vec()).collect_vec().into(), + Dimensions::D3(_d0, d1, d2) => data.into_iter().chunks(d1 * d2).into_iter().map(|v1| v1.chunks(d2).into_iter().map(|v2| v2.collect_vec()).collect_vec()).collect_vec().into(), + Dimensions::D4(_d0, d1, d2, d3) => data.into_iter().chunks(d1 * d2 * d3).into_iter().map(|v1| v1.chunks(d2 * d3).into_iter().map(|v2| v2.chunks(d3).into_iter().map(|v3| v3.collect_vec()).collect_vec()).collect_vec()).collect_vec().into(), + } + } + } + + fn arb_json_entry() -> impl Strategy { + let arb_format_info = arb_format_info(); + let dim = dim_generator(); + (arb_format_info, dim).prop_flat_map(|(format, dimensions)| { + arb_data(format.width, dimensions).prop_map(move |x| { + JsonDataEntry { + data: x, + format: format.clone(), + } + }) + }) + } + + proptest! { + #[test] + fn test_json_roundtrip(map in prop::collection::hash_map(any::(), arb_json_entry(), 1..4)) { + let json_data = JsonData(map); + + let dump = convert_to_data_dump(&json_data); + + let json_print_dump = convert_from_data_dump(&dump); + + for (name, entry) in &json_data.0 { + prop_assert_eq!(&entry.data, json_print_dump.0.get(name).unwrap()) + } + } + + } +} diff --git a/tools/cider-data-converter/src/json_data.rs b/tools/cider-data-converter/src/json_data.rs new file mode 100644 index 0000000000..6b1afeab2f --- /dev/null +++ b/tools/cider-data-converter/src/json_data.rs @@ -0,0 +1,213 @@ +use std::collections::HashMap; + +use interp::serialization::data_dump::Dimensions; +use serde::{self, Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, Clone, Copy)] +#[serde(rename_all = "lowercase")] +pub enum NumericType { + Bitnum, + Fixed, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct FormatInfo { + pub numeric_type: NumericType, + pub is_signed: bool, + pub width: u64, + #[serde(default)] + #[serde(skip_serializing_if = "Option::is_none")] + pub int_width: Option, +} + +// this is stupid +#[derive(Debug, Serialize, Deserialize, PartialEq)] +#[serde(untagged)] +pub enum DataVec { + // integers for the bitnum values + Id1(Vec), + Id2(Vec>), + Id3(Vec>>), + Id4(Vec>>>), + // the float initialization of fixed point values + Fd1(Vec), + Fd2(Vec>), + Fd3(Vec>>), + Fd4(Vec>>>), +} + +impl From>>>> for DataVec { + fn from(v: Vec>>>) -> Self { + Self::Fd4(v) + } +} + +impl From>>> for DataVec { + fn from(v: Vec>>) -> Self { + Self::Fd3(v) + } +} + +impl From>> for DataVec { + fn from(v: Vec>) -> Self { + Self::Fd2(v) + } +} + +impl From> for DataVec { + fn from(v: Vec) -> Self { + Self::Fd1(v) + } +} + +impl From>>>> for DataVec { + fn from(v: Vec>>>) -> Self { + Self::Id4(v) + } +} + +impl From>>> for DataVec { + fn from(v: Vec>>) -> Self { + Self::Id3(v) + } +} + +impl From>> for DataVec { + fn from(v: Vec>) -> Self { + Self::Id2(v) + } +} + +impl From> for DataVec { + fn from(v: Vec) -> Self { + Self::Id1(v) + } +} + +impl DataVec { + /// Returns the number of elements in the memory. Will panic if the vectors + /// do not all have the same length within a given dimension. + pub fn size(&self) -> usize { + // TODO griffin: make the variable names more reasonable + match self { + DataVec::Id1(v1) => v1.len(), + DataVec::Id2(v1) => { + let v0_size = v1[0].len(); + + // Check that sizes are the same across each dimension + assert!(v1.iter().all(|v2| v2.len() == v0_size)); + v1.len() * v0_size + } + DataVec::Id3(v1) => { + let v1_0_size = v1[0].len(); + let v1_0_0_size = v1[0][0].len(); + + // Check that sizes are the same across each dimension + assert!(v1.iter().all(|v2| { v2.len() == v1_0_size })); + assert!(v1 + .iter() + .all(|v2| v2.iter().all(|v3| v3.len() == v1_0_0_size))); + v1.len() * v1_0_size * v1_0_0_size + } + DataVec::Id4(v1) => { + let v1_0_size = v1[0].len(); + let v1_0_0_size = v1[0][0].len(); + let v1_0_0_0_size = v1[0][0][0].len(); + // Check that sizes are the same across each dimension + assert!(v1.iter().all(|v2| { v2.len() == v1_0_size })); + assert!(v1 + .iter() + .all(|v2| { v2.iter().all(|v3| v3.len() == v1_0_0_size) })); + assert!(v1.iter().all(|v2| v2 + .iter() + .all(|v3| v3.iter().all(|v4| v4.len() == v1_0_0_0_size)))); + + v1.len() * v1_0_size * v1_0_0_size * v1_0_0_0_size + } + DataVec::Fd1(_) => todo!("implement fixed-point"), + DataVec::Fd2(_) => todo!("implement fixed-point"), + DataVec::Fd3(_) => todo!("implement fixed-point"), + DataVec::Fd4(_) => todo!("implement fixed-point"), + } + } + + pub fn dimensions(&self) -> Dimensions { + match self { + DataVec::Id1(v) => Dimensions::D1(v.len()), + DataVec::Id2(v) => Dimensions::D2(v.len(), v[0].len()), + DataVec::Id3(v) => { + Dimensions::D3(v.len(), v[0].len(), v[0][0].len()) + } + DataVec::Id4(v) => Dimensions::D4( + v.len(), + v[0].len(), + v[0][0].len(), + v[0][0][0].len(), + ), + DataVec::Fd1(v) => Dimensions::D1(v.len()), + DataVec::Fd2(v) => Dimensions::D2(v.len(), v[0].len()), + DataVec::Fd3(v) => { + Dimensions::D3(v.len(), v[0].len(), v[0][0].len()) + } + DataVec::Fd4(v) => Dimensions::D4( + v.len(), + v[0].len(), + v[0][0].len(), + v[0][0][0].len(), + ), + } + } +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct JsonDataEntry { + pub data: DataVec, + pub format: FormatInfo, +} + +// using a hashmap here means that the serialization is non-deterministic but +// for now that's probably fine +#[derive(Debug, Serialize, Deserialize)] +#[serde(transparent)] +pub struct JsonData(pub HashMap); + +#[derive(Debug, Serialize)] +#[serde(transparent)] +/// A structure meant to mimic the old style of data dump printing. +pub struct JsonPrintDump(pub HashMap); + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_json_data() { + let data = r#" +{ + "in": { + "data": [[ + 4.0 + ]], + "format": { + "numeric_type": "bitnum", + "is_signed": false, + "width": 32 + } + }, + "out": { + "data": [ + 6 + ], + "format": { + "numeric_type": "bitnum", + "is_signed": false, + "width": 32 + } + } +}"#; + + let json_data: JsonData = serde_json::from_str(data).unwrap(); + println!("{:?}", json_data); + println!("{}", serde_json::to_string_pretty(&json_data).unwrap()); + } +} diff --git a/tools/cider-data-converter/src/lib.rs b/tools/cider-data-converter/src/lib.rs new file mode 100644 index 0000000000..7e1dadc1bd --- /dev/null +++ b/tools/cider-data-converter/src/lib.rs @@ -0,0 +1,2 @@ +pub mod converter; +pub mod json_data; diff --git a/tools/cider-data-converter/src/main.rs b/tools/cider-data-converter/src/main.rs new file mode 100644 index 0000000000..5c8a50233d --- /dev/null +++ b/tools/cider-data-converter/src/main.rs @@ -0,0 +1,131 @@ +use argh::FromArgs; +use cider_data_converter::{converter, json_data::JsonData}; +use interp::serialization::data_dump::{self, SerializationError}; +use std::{ + fs::File, + io::{self, Read, Write}, + path::PathBuf, + str::FromStr, +}; +use thiserror::Error; + +const JSON_EXTENSION: &str = "data"; +const CIDER_EXTENSION: &str = "dump"; + +#[derive(Error)] +enum CiderDataConverterError { + #[error("Failed to read file: {0}")] + IoError(#[from] std::io::Error), + + #[error("Failed to parse \"to\" argument: {0}")] + BadToArgument(String), + + #[error("Unable to guess the conversion target. Please specify the target using the \"--to\" argument")] + UnknownTarget, + + #[error(transparent)] + SerdeError(#[from] serde_json::Error), + + #[error(transparent)] + DataDumpError(#[from] SerializationError), +} + +impl std::fmt::Debug for CiderDataConverterError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self) + } +} + +enum Action { + ToDataDump, + ToJson, +} + +impl FromStr for Action { + type Err = CiderDataConverterError; + + fn from_str(s: &str) -> Result { + match s.to_lowercase().as_str() { + "json" => Ok(Action::ToJson), + "cider" | "dump" | "data-dump" => Ok(Action::ToDataDump), + _ => Err(CiderDataConverterError::BadToArgument(s.to_string())), + } + } +} + +#[derive(FromArgs)] +/// Convert json data files to Cider DataDumps and vice-versa +struct Opts { + /// the input file to be converted. If not provided, it will read from stdin + #[argh(positional)] + input_path: Option, + /// the output file to be written. If not provided, it will write to stdout + #[argh(option, short = 'o')] + output_path: Option, + + /// optional specification of what action to perform. Can be "cider" or + /// "json". If not provided, the converter will try to guess based on file names + #[argh(option, short = 't', long = "to")] + action: Option, +} + +fn main() -> Result<(), CiderDataConverterError> { + let mut opts: Opts = argh::from_env(); + + let mut input: Box = opts + .input_path + .as_ref() + .map(|path| File::open(path).map(|x| Box::new(x) as Box)) + .unwrap_or(Ok(Box::new(io::stdin())))?; + + let mut output: Box = opts + .output_path + .as_ref() + .map(|path| File::create(path).map(|x| Box::new(x) as Box)) + .unwrap_or(Ok(Box::new(io::stdout())))?; + + // if no action is specified, try to guess based on file extensions + if opts.action.is_none() + && (opts.input_path.as_ref().is_some_and(|x| { + x.extension().map_or(false, |y| y == JSON_EXTENSION) + }) || opts.output_path.as_ref().is_some_and(|x| { + x.extension().map_or(false, |y| y == CIDER_EXTENSION) + })) + { + opts.action = Some(Action::ToDataDump); + } else if opts.action.is_none() + && (opts.output_path.as_ref().is_some_and(|x| { + x.extension().map_or(false, |x| x == JSON_EXTENSION) + }) || opts.input_path.as_ref().is_some_and(|x| { + x.extension().map_or(false, |x| x == CIDER_EXTENSION) + })) + { + opts.action = Some(Action::ToJson); + } + + if let Some(action) = opts.action { + match action { + Action::ToDataDump => { + let parsed_json: JsonData = + serde_json::from_reader(&mut input)?; + converter::convert_to_data_dump(&parsed_json) + .serialize(&mut output)?; + } + Action::ToJson => { + let data_dump = data_dump::DataDump::deserialize(&mut input)?; + let json_data = converter::convert_from_data_dump(&data_dump); + writeln!( + &mut output, + "{}", + serde_json::to_string_pretty(&json_data)? + )?; + } + } + } else { + // Since we can't guess based on input/output file names and no target + // was specified, we just error out. + return Err(CiderDataConverterError::UnknownTarget); + } + + Ok(()) +}