From 4c38c075d3cf4ed5e9517261cf47c44630b8c4e0 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:42:05 -0400 Subject: [PATCH 01/18] fix(codegen): ensure callee results are pushed on stack in correct order --- codegen/masm/src/codegen/emit/primop.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/codegen/masm/src/codegen/emit/primop.rs b/codegen/masm/src/codegen/emit/primop.rs index 8012c6f0a..df70f09f0 100644 --- a/codegen/masm/src/codegen/emit/primop.rs +++ b/codegen/masm/src/codegen/emit/primop.rs @@ -327,7 +327,7 @@ impl<'a> OpEmitter<'a> { } } - for result in signature.results.iter() { + for result in signature.results.iter().rev() { self.stack.push(result.ty.clone()); } From 77a63e4f39a721804540a9837dc9f5c7dc0101b2 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:44:38 -0400 Subject: [PATCH 02/18] fix(codegen): ensure global initializers are set during rodata init --- codegen/masm/src/codegen/emit/mem.rs | 2 - codegen/masm/src/codegen/mod.rs | 1 - codegen/masm/src/masm/program.rs | 113 ++++++++++++-------------- codegen/masm/src/packaging/package.rs | 4 +- hir-analysis/src/data.rs | 4 +- 5 files changed, 57 insertions(+), 67 deletions(-) diff --git a/codegen/masm/src/codegen/emit/mem.rs b/codegen/masm/src/codegen/emit/mem.rs index 874023271..afb2ed890 100644 --- a/codegen/masm/src/codegen/emit/mem.rs +++ b/codegen/masm/src/codegen/emit/mem.rs @@ -3,8 +3,6 @@ use midenc_hir::{self as hir, Felt, FieldElement, SourceSpan, StructType, Type}; use super::OpEmitter; use crate::masm::{NativePtr, Op}; -pub(crate) const PAGE_SIZE: u32 = 64 * 1024; - /// Allocation impl<'a> OpEmitter<'a> { /// Allocate a procedure-local memory slot of sufficient size to store a value diff --git a/codegen/masm/src/codegen/mod.rs b/codegen/masm/src/codegen/mod.rs index 117623f1a..2f025858e 100644 --- a/codegen/masm/src/codegen/mod.rs +++ b/codegen/masm/src/codegen/mod.rs @@ -4,7 +4,6 @@ mod opt; mod scheduler; mod stack; -pub(crate) use self::emit::mem::PAGE_SIZE; pub use self::{ emitter::FunctionEmitter, scheduler::Scheduler, diff --git a/codegen/masm/src/masm/program.rs b/codegen/masm/src/masm/program.rs index 36e06f00a..fd02f02ce 100644 --- a/codegen/masm/src/masm/program.rs +++ b/codegen/masm/src/masm/program.rs @@ -7,8 +7,8 @@ use miden_assembly::{ }; use miden_core::crypto::hash::Rpo256; use midenc_hir::{ - self as hir, diagnostics::Report, DataSegmentTable, Felt, FieldElement, FunctionIdent, Ident, - SourceSpan, + self as hir, diagnostics::Report, DataSegmentTable, Felt, FieldElement, FunctionIdent, + GlobalVariableTable, Ident, SourceSpan, }; use midenc_hir_analysis::GlobalVariableAnalysis; use midenc_session::{Emit, Session}; @@ -40,29 +40,6 @@ pub struct Program { heap_base: u32, } impl Program { - /// Create a new [Program] initialized from a [DataSegmentTable], a set of [Module]s, and an - /// optional entrypoint function. - /// - /// A `main.masm` module will be generated which invokes the given entrypoint on startup, after - /// initializing the global heap of the root context, based on the provided data segment table. - /// - /// You should generally prefer to use [Program::from_hir], but this constructor allows you to - /// manually produce a MASM program from its constituent parts. - pub fn new(entrypoint: FunctionIdent, segments: &DataSegmentTable, modules: M) -> Self - where - M: IntoIterator>, - { - use crate::codegen::PAGE_SIZE; - - let library = Library::new(segments, modules); - Self { - library, - entrypoint, - // By default, we assume the first two pages are reserved for shadow stack and globals - heap_base: 2 * PAGE_SIZE, - } - } - /// Create a new [Program] initialized from an [hir::Program]. /// /// The resulting [Program] will have the following: @@ -381,29 +358,6 @@ impl Library { Self::default() } - /// Create a new [Library] initialized from a [DataSegmentTable] and a set of [Module]s. - /// - /// You should generally prefer to use [Library::from_hir], but this constructor allows you to - /// manually produce a MASM program from its constituent parts. - pub fn new(segments: &DataSegmentTable, modules: M) -> Self - where - M: IntoIterator>, - { - let mut module_tree = ModuleTree::default(); - for module in modules { - module_tree.insert(module); - } - let modules = Modules::Open(module_tree); - let rodata = compute_rodata(segments); - Self { - modules, - libraries: vec![], - kernel: None, - rodata, - stack_pointer: None, - } - } - /// Create a new [Library] initialized from an [hir::Program]. /// /// The resulting [Library] will have the following: @@ -422,7 +376,11 @@ impl Library { } else { None }; - let rodata = compute_rodata(program.segments()); + let rodata = compute_rodata( + globals.layout().global_table_offset(), + program.globals(), + program.segments(), + ); Self { modules: Modules::default(), libraries: vec![], @@ -597,18 +555,53 @@ impl Emit for Library { /// /// This consists of the data itself, as well as a content digest, which will be used to place /// that data in the advice map when the program starts. -fn compute_rodata(segments: &DataSegmentTable) -> Vec { - let mut rodatas = Vec::with_capacity(segments.iter().count()); - - for segment in segments.iter() { +fn compute_rodata( + global_table_offset: u32, + globals: &GlobalVariableTable, + segments: &DataSegmentTable, +) -> Vec { + let mut rodatas = Vec::with_capacity(segments.iter().count() + 1); + + // Convert global variable initializers to a data segment, and place it at the computed + // global table offset in linear memory. + let extra = if globals.len() > 0 { + let size = globals.size_in_bytes(); + let offset = global_table_offset; + let mut data = Vec::::with_capacity(size); + data.resize(size, 0); + for gv in globals.iter() { + if let Some(init) = gv.initializer() { + let offset = unsafe { globals.offset_of(gv.id()) } as usize; + let init = globals.get_constant(init); + let init_bytes = init.as_slice(); + assert!(offset + init_bytes.len() <= data.len()); + let dst = &mut data[offset..(offset + init_bytes.len())]; + dst.copy_from_slice(init_bytes); + } + } // Don't bother emitting anything for zeroed segments - if segment.is_zeroed() { - continue; + if data.iter().any(|&b| b != 0) { + Some((size as u32, offset, Arc::new(midenc_hir::ConstantData::from(data)))) + } else { + None } - let size = segment.size(); - let offset = segment.offset(); + } else { + None + }; + + // Process all segments, ignoring zeroed segments (as Miden's memory is always zeroed) + for (size, offset, segment_data) in segments + .iter() + .filter_map(|segment| { + if segment.is_zeroed() { + None + } else { + Some((segment.size(), segment.offset(), segment.init())) + } + }) + .chain(extra) + { let base = NativePtr::from_ptr(offset); - let segment_data = segment.init(); // TODO(pauls): Do we ever have a need for data segments which are not aligned // to an word boundary? If so, we need to implement that @@ -636,13 +629,13 @@ fn compute_rodata(segments: &DataSegmentTable) -> Vec { // are mixed together, so that the data is preserved, and // the commitment is correct let mut iter = segment_data.as_slice().iter().copied().array_chunks::<4>(); - elements.extend(iter.by_ref().map(|bytes| Felt::new(u32::from_be_bytes(bytes) as u64))); + elements.extend(iter.by_ref().map(|bytes| Felt::new(u32::from_le_bytes(bytes) as u64))); if let Some(remainder) = iter.into_remainder() { let mut chunk = [0u8; 4]; for (i, byte) in remainder.into_iter().enumerate() { chunk[i] = byte; } - elements.push(Felt::new(u32::from_be_bytes(chunk) as u64)); + elements.push(Felt::new(u32::from_le_bytes(chunk) as u64)); } elements.resize(num_elements + padding, Felt::ZERO); let digest = Rpo256::hash_elements(&elements); diff --git a/codegen/masm/src/packaging/package.rs b/codegen/masm/src/packaging/package.rs index 10dfa773d..973c79b37 100644 --- a/codegen/masm/src/packaging/package.rs +++ b/codegen/masm/src/packaging/package.rs @@ -168,13 +168,13 @@ impl Rodata { let data = self.data.as_slice(); let mut felts = Vec::with_capacity(data.len() / 4); let mut iter = data.iter().copied().array_chunks::<4>(); - felts.extend(iter.by_ref().map(|bytes| Felt::new(u32::from_be_bytes(bytes) as u64))); + felts.extend(iter.by_ref().map(|bytes| Felt::new(u32::from_le_bytes(bytes) as u64))); if let Some(remainder) = iter.into_remainder() { let mut chunk = [0u8; 4]; for (i, byte) in remainder.into_iter().enumerate() { chunk[i] = byte; } - felts.push(Felt::new(u32::from_be_bytes(chunk) as u64)); + felts.push(Felt::new(u32::from_le_bytes(chunk) as u64)); } let padding = (self.size_in_words() * 4).abs_diff(felts.len()); diff --git a/hir-analysis/src/data.rs b/hir-analysis/src/data.rs index 7c0cb319e..ffa0ea8d2 100644 --- a/hir-analysis/src/data.rs +++ b/hir-analysis/src/data.rs @@ -35,7 +35,7 @@ impl Analysis for GlobalVariableAnalysis { ) -> AnalysisResult { let mut layout = GlobalVariableLayout { global_table_offset: core::cmp::max( - program.reserved_memory_bytes(), + program.reserved_memory_bytes().next_multiple_of(32), program.segments().next_available_offset(), ), ..GlobalVariableLayout::default() @@ -73,7 +73,7 @@ impl Analysis for GlobalVariableAnalysis { ) -> AnalysisResult { let mut layout = GlobalVariableLayout { global_table_offset: core::cmp::max( - module.reserved_memory_bytes(), + module.reserved_memory_bytes().next_multiple_of(32), module.segments().next_available_offset(), ), ..GlobalVariableLayout::default() From 6a6f774ab39956cfb5812321554634a7b12ae636 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:45:29 -0400 Subject: [PATCH 03/18] fix(sdk): force all allocations to be minimally word-aligned --- sdk/alloc/src/lib.rs | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/sdk/alloc/src/lib.rs b/sdk/alloc/src/lib.rs index cbd5b29c6..045312d64 100644 --- a/sdk/alloc/src/lib.rs +++ b/sdk/alloc/src/lib.rs @@ -12,6 +12,9 @@ use core::{ #[cfg(target_family = "wasm")] const PAGE_SIZE: usize = 2usize.pow(16); +/// We require all allocations to be minimally word-aligned, i.e. 32 byte alignment +const MIN_ALIGN: usize = 32; + /// The linear memory heap must not spill over into the region reserved for procedure /// locals, which begins at 2^30 in Miden's address space. const HEAP_END: *mut u8 = (2usize.pow(30) / 4) as *mut u8; @@ -71,7 +74,19 @@ impl BumpAlloc { unsafe impl GlobalAlloc for BumpAlloc { unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - let layout = layout.pad_to_align(); + // Force allocations to be at minimally word-aligned. This is wasteful of memory, but + // we don't need to be particularly conservative with memory anyway, as most, if not all, + // Miden programs will be relatively short-lived. This makes interop at the Rust/Miden + // call boundary less expensive, as we can typically pass pointers directly to Miden, + // whereas without this alignment guarantee, we would have to set up temporary buffers for + // Miden code to write to, and then copy out of that buffer to whatever Rust type, e.g. + // `Vec`, we actually want. + // + // NOTE: This cannot fail, because we're always meeting minimum alignment requirements + let layout = layout + .align_to(core::cmp::max(layout.align(), MIN_ALIGN)) + .unwrap() + .pad_to_align(); let size = layout.size(); let align = layout.align(); From 7b52f31492094c14f636c01bdee45b54a2f2e87e Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:46:08 -0400 Subject: [PATCH 04/18] fix(sdk): ensure we pass a miden-native ptr value to get_inputs --- sdk/base-sys/src/bindings/tx/mod.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sdk/base-sys/src/bindings/tx/mod.rs b/sdk/base-sys/src/bindings/tx/mod.rs index 9402b9d06..853ca0e07 100644 --- a/sdk/base-sys/src/bindings/tx/mod.rs +++ b/sdk/base-sys/src/bindings/tx/mod.rs @@ -20,6 +20,14 @@ pub fn get_inputs() -> Vec { const MAX_INPUTS: usize = 256; let mut inputs: Vec = Vec::with_capacity(MAX_INPUTS); let num_inputs = unsafe { + // Ensure the pointer is a valid Miden pointer + // + // NOTE: This relies on the fact that BumpAlloc makes all allocations + // minimally word-aligned. Each word consists of 4 elements of 4 bytes, + // so to get a Miden address from a Rust address, we divide by 16 to get + // the address in words (dividing by 4 gets us an address in elements, + // and by 4 again we get the word address). + let ptr = (inputs.as_mut_ptr() as usize) / 16; // The MASM for this function is here: // https://github.com/0xPolygonMiden/miden-base/blob/3cbe8d59dcf4ccc9c380b7c8417ac6178fc6b86a/miden-lib/asm/miden/note.masm#L69-L102 // #! Writes the inputs of the currently execute note into memory starting at the specified @@ -30,7 +38,7 @@ pub fn get_inputs() -> Vec { // #! - dest_ptr is the memory address to write the inputs. // Compiler generated adapter code at call site will drop the returned dest_ptr // and return the number of inputs - extern_note_get_inputs(inputs.as_mut_ptr()) + extern_note_get_inputs(ptr as *mut Felt) }; unsafe { inputs.set_len(num_inputs); From affb2001cb5d5b5049ad11858a9daf51c11608e9 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:48:07 -0400 Subject: [PATCH 05/18] fix(frontend-wasm): incorrect types applied to certain primops This commit ensures the default result type for ops that return u32 is bitcasted to i32, as that is the default integral type on Wasm. --- frontend-wasm/src/code_translator/mod.rs | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/frontend-wasm/src/code_translator/mod.rs b/frontend-wasm/src/code_translator/mod.rs index 16540cc28..64dda6ee8 100644 --- a/frontend-wasm/src/code_translator/mod.rs +++ b/frontend-wasm/src/code_translator/mod.rs @@ -259,15 +259,21 @@ pub fn translate_operator( /******************************* Unary Operators *************************************/ Operator::I32Clz | Operator::I64Clz => { let val = state.pop1(); - state.push1(builder.ins().clz(val, span)); + let count = builder.ins().clz(val, span); + // To ensure we match the Wasm semantics, treat the output of clz as an i32 + state.push1(builder.ins().bitcast(count, Type::I32, span)); } Operator::I32Ctz | Operator::I64Ctz => { let val = state.pop1(); - state.push1(builder.ins().ctz(val, span)); + let count = builder.ins().ctz(val, span); + // To ensure we match the Wasm semantics, treat the output of ctz as an i32 + state.push1(builder.ins().bitcast(count, Type::I32, span)); } Operator::I32Popcnt | Operator::I64Popcnt => { let val = state.pop1(); - state.push1(builder.ins().popcnt(val, span)); + let count = builder.ins().popcnt(val, span); + // To ensure we match the Wasm semantics, treat the output of popcnt as an i32 + state.push1(builder.ins().bitcast(count, Type::I32, span)); } Operator::I32Extend8S | Operator::I32Extend16S => { let val = state.pop1(); @@ -749,7 +755,7 @@ fn translate_br_if( state: &mut FuncTranslationState, span: SourceSpan, ) -> WasmResult<()> { - let cond = state.pop1(); + let cond = state.pop1_bitcasted(Type::I32, builder, span); let (br_destination, inputs) = translate_br_if_args(relative_depth, state); let next_block = builder.create_block(); let then_dest = br_destination; From a2d6bc708ef022bade9481ecd185ae82d4604f9f Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:49:40 -0400 Subject: [PATCH 06/18] fix(frontend-wasm): do not apply redundant casts --- .../src/module/func_translation_state.rs | 40 +++++++++++++++---- 1 file changed, 32 insertions(+), 8 deletions(-) diff --git a/frontend-wasm/src/module/func_translation_state.rs b/frontend-wasm/src/module/func_translation_state.rs index 1ad78bdab..a4b39c9e4 100644 --- a/frontend-wasm/src/module/func_translation_state.rs +++ b/frontend-wasm/src/module/func_translation_state.rs @@ -279,7 +279,11 @@ impl FuncTranslationState { span: SourceSpan, ) -> Value { let val = self.stack.pop().expect("attempted to pop a value from an empty stack"); - builder.ins().cast(val, ty.clone(), span) + if builder.data_flow_graph().value_type(val) != &ty { + builder.ins().cast(val, ty, span) + } else { + val + } } /// Pop one value and bitcast it to the specified type. @@ -290,7 +294,11 @@ impl FuncTranslationState { span: SourceSpan, ) -> Value { let val = self.stack.pop().expect("attempted to pop a value from an empty stack"); - builder.ins().bitcast(val, ty.clone(), span) + if builder.data_flow_graph().value_type(val) != &ty { + builder.ins().bitcast(val, ty, span) + } else { + val + } } /// Peek at the top of the stack without popping it. @@ -314,9 +322,17 @@ impl FuncTranslationState { ) -> (Value, Value) { let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); - let v1_casted = builder.ins().cast(v1, ty.clone(), span); - let v2_casted = builder.ins().cast(v2, ty, span); - (v1_casted, v2_casted) + let v1 = if builder.data_flow_graph().value_type(v1) != &ty { + builder.ins().cast(v1, ty.clone(), span) + } else { + v1 + }; + let v2 = if builder.data_flow_graph().value_type(v2) != &ty { + builder.ins().cast(v2, ty, span) + } else { + v2 + }; + (v1, v2) } /// Pop two values. Bitcast them to the specified type. Return them in the order they were @@ -329,9 +345,17 @@ impl FuncTranslationState { ) -> (Value, Value) { let v2 = self.stack.pop().unwrap(); let v1 = self.stack.pop().unwrap(); - let v1_casted = builder.ins().bitcast(v1, ty.clone(), span); - let v2_casted = builder.ins().bitcast(v2, ty, span); - (v1_casted, v2_casted) + let v1 = if builder.data_flow_graph().value_type(v1) != &ty { + builder.ins().bitcast(v1, ty.clone(), span) + } else { + v1 + }; + let v2 = if builder.data_flow_graph().value_type(v2) != &ty { + builder.ins().bitcast(v2, ty, span) + } else { + v2 + }; + (v1, v2) } /// Pop three values. Return them in the order they were pushed. From 89cb0d68badc6214008ed53b8a11476bec7addd0 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 04:50:10 -0400 Subject: [PATCH 07/18] fix(debugger): infinite loop in breakpoint id computation --- midenc-debug/src/ui/state.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/midenc-debug/src/ui/state.rs b/midenc-debug/src/ui/state.rs index 83f0b0497..06e5c4c85 100644 --- a/midenc-debug/src/ui/state.rs +++ b/midenc-debug/src/ui/state.rs @@ -151,6 +151,7 @@ impl State { .any(|bp| bp.id == candidate) { candidate = next; + next = candidate.wrapping_add(1); continue; } self.next_breakpoint_id = next; From fc9ff3adb2d649d7fcd543f1e7e2934e111de1e4 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 16:59:31 -0400 Subject: [PATCH 08/18] chore(codegen): be consistent about the way in which we push to stack --- codegen/masm/src/codegen/emit/mem.rs | 40 ++++++++++++------------- codegen/masm/src/codegen/emit/primop.rs | 4 +-- codegen/masm/src/codegen/emit/unary.rs | 38 +++++++++++------------ 3 files changed, 41 insertions(+), 41 deletions(-) diff --git a/codegen/masm/src/codegen/emit/mem.rs b/codegen/masm/src/codegen/emit/mem.rs index afb2ed890..450b42d9c 100644 --- a/codegen/masm/src/codegen/emit/mem.rs +++ b/codegen/masm/src/codegen/emit/mem.rs @@ -15,7 +15,7 @@ impl<'a> OpEmitter<'a> { Type::Ptr(pointee) => { let local = self.function.alloc_local(pointee.as_ref().clone()); self.emit(Op::LocAddr(local), span); - self.stack.push(ptr.clone()); + self.push(ptr.clone()); } ty => panic!("expected a pointer type, got {ty}"), } @@ -25,14 +25,14 @@ impl<'a> OpEmitter<'a> { #[allow(unused)] pub fn heap_base(&mut self, span: SourceSpan) { self.emit(Op::Exec("intrinsics::mem::heap_base".parse().unwrap()), span); - self.stack.push(Type::Ptr(Box::new(Type::U8))); + self.push(Type::Ptr(Box::new(Type::U8))); } /// Return the address of the top of the heap #[allow(unused)] pub fn heap_top(&mut self, span: SourceSpan) { self.emit(Op::Exec("intrinsics::mem::heap_top".parse().unwrap()), span); - self.stack.push(Type::Ptr(Box::new(Type::U8))); + self.push(Type::Ptr(Box::new(Type::U8))); } /// Grow the heap (from the perspective of Wasm programs) by N pages, returning the previous @@ -40,13 +40,13 @@ impl<'a> OpEmitter<'a> { pub fn mem_grow(&mut self, span: SourceSpan) { let _num_pages = self.stack.pop().expect("operand stack is empty"); self.emit(Op::Exec("intrinsics::mem::memory_grow".parse().unwrap()), span); - self.stack.push(Type::I32); + self.push(Type::I32); } /// Returns the size (in pages) of the heap (from the perspective of Wasm programs) pub fn mem_size(&mut self, span: SourceSpan) { self.emit(Op::Exec("intrinsics::mem::memory_size".parse().unwrap()), span); - self.stack.push(Type::U32); + self.push(Type::U32); } } @@ -60,7 +60,7 @@ impl<'a> OpEmitter<'a> { pub fn load_local(&mut self, local: hir::LocalId, span: SourceSpan) { let ty = self.function.local(local).ty.clone(); self.emit(Op::LocAddr(local), span); - self.stack.push(Type::Ptr(Box::new(ty.clone()))); + self.push(Type::Ptr(Box::new(ty.clone()))); self.load(ty, span) } @@ -86,7 +86,7 @@ impl<'a> OpEmitter<'a> { } ty => todo!("support for loading {ty} is not yet implemented"), } - self.stack.push(ty); + self.push(ty); } ty if !ty.is_pointer() => { panic!("invalid operand to load: expected pointer, got {ty}") @@ -111,7 +111,7 @@ impl<'a> OpEmitter<'a> { } ty => todo!("support for loading {ty} is not yet implemented"), } - self.stack.push(ty); + self.push(ty); } /// Emit a sequence of instructions to translate a raw pointer value to @@ -942,7 +942,7 @@ impl<'a> OpEmitter<'a> { pub fn store_local(&mut self, local: hir::LocalId, span: SourceSpan) { let ty = self.function.local(local).ty.clone(); self.emit(Op::LocAddr(local), span); - self.stack.push(Type::Ptr(Box::new(ty))); + self.push(Type::Ptr(Box::new(ty))); self.store(span) } @@ -1046,11 +1046,11 @@ impl<'a> OpEmitter<'a> { ); // Loop body - move value to top of stack, swap with pointer - self.stack.push(value); - self.stack.push(count); - self.stack.push(dst.clone()); - self.stack.push(dst.ty()); - self.stack.push(dst.ty()); + self.push(value); + self.push(count); + self.push(dst.clone()); + self.push(dst.ty()); + self.push(dst.ty()); self.dup(4, span); // [value, aligned_dst, i, dst, count, value] self.swap(1, span); // [aligned_dst, value, i, dst, count, value] @@ -1166,12 +1166,12 @@ impl<'a> OpEmitter<'a> { ); // Load the source value - self.stack.push(count.clone()); - self.stack.push(dst.clone()); - self.stack.push(src.clone()); - self.stack.push(Type::U32); - self.stack.push(dst.clone()); - self.stack.push(src.clone()); + self.push(count.clone()); + self.push(dst.clone()); + self.push(src.clone()); + self.push(Type::U32); + self.push(dst.clone()); + self.push(src.clone()); self.load(value_ty.clone(), span); // [value, new_dst, i, src, dst, count] // Write to the destination diff --git a/codegen/masm/src/codegen/emit/primop.rs b/codegen/masm/src/codegen/emit/primop.rs index df70f09f0..f6cb2a181 100644 --- a/codegen/masm/src/codegen/emit/primop.rs +++ b/codegen/masm/src/codegen/emit/primop.rs @@ -211,7 +211,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("support for assert_eq on {ty} is not implemented"), } - self.stack.push(ty); + self.push(ty); } /// Execute the given procedure. @@ -328,7 +328,7 @@ impl<'a> OpEmitter<'a> { } for result in signature.results.iter().rev() { - self.stack.push(result.ty.clone()); + self.push(result.ty.clone()); } self.emit(Op::Exec(callee), span); diff --git a/codegen/masm/src/codegen/emit/unary.rs b/codegen/masm/src/codegen/emit/unary.rs index 737c158bb..89d709cef 100644 --- a/codegen/masm/src/codegen/emit/unary.rs +++ b/codegen/masm/src/codegen/emit/unary.rs @@ -64,7 +64,7 @@ impl<'a> OpEmitter<'a> { (Type::I8 | Type::U8, _) if n <= 8 => self.trunc_int32(n, span), (src, dst) => unimplemented!("unsupported truncation of {src} to {dst}"), } - self.stack.push(dst.clone()); + self.push(dst.clone()); } /// Zero-extend an unsigned integral value of type `src` to type `dst` @@ -124,7 +124,7 @@ impl<'a> OpEmitter<'a> { ), (src, dst) => panic!("unsupported zero-extension from {src} to {dst}"), } - self.stack.push(dst.clone()); + self.push(dst.clone()); } /// Sign-extend an integral value of type `src` to type `dst` @@ -179,7 +179,7 @@ impl<'a> OpEmitter<'a> { ) => self.sext_smallint(src_bits, dst_bits, span), (src, dst) => panic!("unsupported sign-extension from {src} to {dst}"), } - self.stack.push(dst.clone()); + self.push(dst.clone()); } pub fn bitcast(&mut self, dst: &Type, _span: SourceSpan) { @@ -189,7 +189,7 @@ impl<'a> OpEmitter<'a> { src.is_integer() && dst.is_integer(), "invalid cast of {src} to {dst}: only integer-to-integer bitcasts are supported" ); - self.stack.push(dst.clone()); + self.push(dst.clone()); } /// Convert between two integral types, given as `src` and `dst`, @@ -338,7 +338,7 @@ impl<'a> OpEmitter<'a> { (Type::I1, _) => self.zext_smallint(src_bits, dst_bits, span), (src, dst) => unimplemented!("unsupported cast from {src} to {dst}"), } - self.stack.push(dst.clone()); + self.push(dst.clone()); } /// Cast `arg` to a pointer value @@ -349,11 +349,11 @@ impl<'a> OpEmitter<'a> { match arg.ty() { // We allow i32 here because Wasm uses it Type::U32 | Type::I32 => { - self.stack.push(ty.clone()); + self.push(ty.clone()); } Type::Felt => { self.emit(Op::U32Assert, span); - self.stack.push(ty.clone()); + self.push(ty.clone()); } int => panic!("invalid inttoptr cast: cannot cast value of type {int} to {ty}"), } @@ -397,7 +397,7 @@ impl<'a> OpEmitter<'a> { } ty => panic!("expected integral type for is_odd opcode, got {ty}"), } - self.stack.push(Type::I1); + self.push(Type::I1); } /// Compute the integral base-2 logarithm of the value on top of the operand stack, and @@ -441,13 +441,13 @@ impl<'a> OpEmitter<'a> { ], span, ); - self.stack.push(Type::U32); + self.push(Type::U32); } Type::I1 => { // 2^0 == 1 let _ = self.stack.pop(); self.emit_all(&[Op::Drop, Op::PushU8(0)], span); - self.stack.push(Type::U32); + self.push(Type::U32); } ty if !ty.is_integer() => { panic!("invalid ilog2 on {ty}: only integral types are supported") @@ -512,7 +512,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("popcnt for {ty} is not supported"), } - self.stack.push(Type::U32); + self.push(Type::U32); } /// Count the number of leading zero bits in the integral value on top of the operand stack, @@ -589,7 +589,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("clz for {ty} is not supported"), } - self.stack.push(Type::U32); + self.push(Type::U32); } /// Count the number of leading one bits in the integral value on top of the operand stack, @@ -676,7 +676,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("clo for {ty} is not supported"), } - self.stack.push(Type::U32); + self.push(Type::U32); } /// Count the number of trailing zero bits in the integral value on top of the operand stack, @@ -764,7 +764,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("ctz for {ty} is not supported"), } - self.stack.push(Type::U32); + self.push(Type::U32); } /// Count the number of trailing one bits in the integral value on top of the operand stack, @@ -817,7 +817,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("cto for {ty} is not supported"), } - self.stack.push(Type::U32); + self.push(Type::U32); } /// Invert the bitwise representation of the integral value on top of the operand stack. @@ -863,7 +863,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("bnot for {ty} is not supported"), } - self.stack.push(ty); + self.push(ty); } /// Invert the boolean value on top of the operand stack. @@ -873,7 +873,7 @@ impl<'a> OpEmitter<'a> { let arg = self.stack.pop().expect("operand stack is empty"); assert_eq!(arg.ty(), Type::I1, "logical NOT requires a boolean value"); self.emit(Op::Not, span); - self.stack.push(Type::I1); + self.push(Type::I1); } /// Compute 2^N, where N is the integral value on top of the operand stack, as @@ -923,7 +923,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("pow2 for {ty} is not supported"), } - self.stack.push(ty); + self.push(ty); } /// Increment the operand on top of the stack by 1. @@ -958,7 +958,7 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("incr for {ty} is not supported"), } - self.stack.push(ty); + self.push(ty); } /// Compute the modular multiplicative inverse of the operand on top of the stack, `n`, i.e. From cb52312eb8fc3a300349c9046dd7556159673648 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 17:00:49 -0400 Subject: [PATCH 09/18] fix(codegen): incorrect handling of multi-result instructions --- codegen/masm/src/codegen/emit/binary.rs | 12 ++++++------ codegen/masm/src/codegen/emit/mod.rs | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/codegen/masm/src/codegen/emit/binary.rs b/codegen/masm/src/codegen/emit/binary.rs index eea74de6c..40b46f205 100644 --- a/codegen/masm/src/codegen/emit/binary.rs +++ b/codegen/masm/src/codegen/emit/binary.rs @@ -360,10 +360,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("add is not yet implemented for {ty}"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn add_imm(&mut self, imm: Immediate, overflow: Overflow, span: SourceSpan) { @@ -393,10 +393,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("add is not yet implemented for {ty}"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn sub(&mut self, overflow: Overflow, span: SourceSpan) { @@ -425,10 +425,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("sub is not yet implemented for {ty}"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn sub_imm(&mut self, imm: Immediate, overflow: Overflow, span: SourceSpan) { @@ -457,10 +457,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("sub is not yet implemented for {ty}"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn mul(&mut self, overflow: Overflow, span: SourceSpan) { @@ -505,10 +505,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("mul for {ty} is not supported"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn mul_imm(&mut self, imm: Immediate, overflow: Overflow, span: SourceSpan) { @@ -539,10 +539,10 @@ impl<'a> OpEmitter<'a> { } ty => unimplemented!("mul for {ty} is not supported"), } - self.push(ty); if overflow.is_overflowing() { self.push(Type::I1); } + self.push(ty); } pub fn checked_div(&mut self, span: SourceSpan) { diff --git a/codegen/masm/src/codegen/emit/mod.rs b/codegen/masm/src/codegen/emit/mod.rs index 8d932786e..e572f66a4 100644 --- a/codegen/masm/src/codegen/emit/mod.rs +++ b/codegen/masm/src/codegen/emit/mod.rs @@ -157,7 +157,7 @@ impl<'a> DerefMut for InstOpEmitter<'a> { impl<'a> Drop for InstOpEmitter<'a> { fn drop(&mut self) { let results = self.dfg.inst_results(self.inst); - for (i, result) in results.iter().copied().rev().enumerate() { + for (i, result) in results.iter().copied().enumerate() { self.emitter.stack.rename(i, result); } } From 9d091c0d59aace1cfa360548e318d6a631a73b04 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 17:02:01 -0400 Subject: [PATCH 10/18] fix(codegen): incorrect lowering of global.{load,iadd,symbol} --- codegen/masm/src/codegen/emitter.rs | 32 ++++++++++++++++++++++++----- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/codegen/masm/src/codegen/emitter.rs b/codegen/masm/src/codegen/emitter.rs index e8181530e..4f4262277 100644 --- a/codegen/masm/src/codegen/emitter.rs +++ b/codegen/masm/src/codegen/emitter.rs @@ -6,7 +6,6 @@ use midenc_hir::{ adt::SparseMap, assert_matches, diagnostics::{SourceSpan, Span}, - Type, }; use midenc_hir_analysis::{ DominatorTree, GlobalVariableLayout, LivenessAnalysis, Loop, LoopAnalysis, @@ -461,6 +460,8 @@ impl<'b, 'f: 'b> BlockEmitter<'b, 'f> { } fn emit_global_value(&mut self, inst_info: &InstInfo, op: &hir::GlobalValueOp) { + use midenc_hir::Immediate; + assert_eq!(op.op, hir::Opcode::GlobalValue); let addr = self .function @@ -475,14 +476,35 @@ impl<'b, 'f: 'b> BlockEmitter<'b, 'f> { }); let span = self.function.f.dfg.inst_span(inst_info.inst); match self.function.f.dfg.global_value(op.global) { - hir::GlobalValueData::Load { ref ty, .. } => { + hir::GlobalValueData::Load { ref ty, offset, .. } => { let mut emitter = self.inst_emitter(inst_info.inst); + let offset = *offset; + let addr = if offset >= 0 { + addr + (offset as u32) + } else { + addr - offset.unsigned_abs() + }; emitter.load_imm(addr, ty.clone(), span); } - hir::GlobalValueData::IAddImm { .. } | hir::GlobalValueData::Symbol { .. } => { + global @ (hir::GlobalValueData::IAddImm { .. } + | hir::GlobalValueData::Symbol { .. }) => { + let ty = self + .function + .f + .dfg + .value_type(self.function.f.dfg.first_result(inst_info.inst)) + .clone(); let mut emitter = self.inst_emitter(inst_info.inst); - emitter.stack_mut().push(addr); - emitter.inttoptr(&Type::Ptr(Type::U8.into()), span); + let offset = global.offset(); + let addr = if offset >= 0 { + addr + (offset as u32) + } else { + addr - offset.unsigned_abs() + }; + emitter.literal(Immediate::U32(addr), span); + // "cast" the immediate to the expected type + emitter.stack_mut().pop(); + emitter.stack_mut().push(ty); } } } From fe8aa700f662385f0edf5f15e98d9373980f073e Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 17:02:47 -0400 Subject: [PATCH 11/18] fix(codegen): make sure we always drop unused instruction results --- codegen/masm/src/codegen/scheduler.rs | 32 ++++++++++++--------------- 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/codegen/masm/src/codegen/scheduler.rs b/codegen/masm/src/codegen/scheduler.rs index 02f0b4a1a..6a658d8c8 100644 --- a/codegen/masm/src/codegen/scheduler.rs +++ b/codegen/masm/src/codegen/scheduler.rs @@ -423,10 +423,9 @@ impl<'a> BlockScheduler<'a> { self.worklist.push(Plan::Drop(value)); } // We will only ever observe the instruction node type as a treegraph root - // when it has no results (and thus no dependents/predecessors in the graph), - // because in all other cases it will always have a predecessor of Result type. - // - // In practice, we only observe these nodes when handling block terminators. + // when it has no results (and thus no dependents/predecessors in the graph), or + // multiple results, because in all other cases it will always have a single + // predecessor of Result type. Node::Inst { id: inst, .. } => { let inst_info = self.get_or_analyze_inst_info(inst, node_id); self.plan_inst(inst_info); @@ -510,7 +509,7 @@ impl<'a> BlockScheduler<'a> { // definitely on us. Node::Inst { id: inst, .. } => { let inst_info = self.get_or_analyze_inst_info(inst, dependency_id); - self.materialize_inst_results(inst_info); + self.plan_inst(inst_info); } // This node type is never added as a pre-requisite Node::Argument(_) => unreachable!(), @@ -520,7 +519,15 @@ impl<'a> BlockScheduler<'a> { /// Schedule execution of a given instruction, see [Plan::Inst] docs for specific semantics. fn schedule_inst(&mut self, inst_info: Rc, scheduled_ops: &mut Vec) { - scheduled_ops.push(ScheduleOp::Inst(inst_info)); + scheduled_ops.push(ScheduleOp::Inst(inst_info.clone())); + // Ensure that any unused results are dropped immediately + let inst_results = self.f.dfg.inst_results(inst_info.inst); + for result in inst_results.iter().copied() { + let is_used = inst_info.results.iter().any(|v| v.value == result && v.is_used()); + if !is_used { + scheduled_ops.push(ScheduleOp::Drop(result)); + } + } } /// Schedule instructions which were deferred until after an instruction executes. @@ -573,7 +580,7 @@ impl<'a> BlockScheduler<'a> { .any(|p| p.dependent.is_instruction()); if is_used || (has_side_effects && !has_dependent_insts) { - self.materialize_inst_results(inst_info); + self.plan_inst(inst_info); } } @@ -613,17 +620,6 @@ impl<'a> BlockScheduler<'a> { // We're the first use of the referenced instruction, so materialize its // results, and drop any that have no uses. - self.materialize_inst_results(inst_info); - } - - fn materialize_inst_results(&mut self, inst_info: Rc) { - let inst_results = self.f.dfg.inst_results(inst_info.inst); - for result in inst_results.iter().copied() { - let is_used = inst_info.results.iter().any(|v| v.value == result && v.is_used()); - if !is_used { - self.worklist.push(Plan::Drop(result)); - } - } self.plan_inst(inst_info); } From e77f23578578e1227372e8cb25a1d49684610904 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Wed, 28 Aug 2024 17:04:56 -0400 Subject: [PATCH 12/18] chore(codegen): clippy suggested some improvements --- codegen/masm/src/masm/program.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/codegen/masm/src/masm/program.rs b/codegen/masm/src/masm/program.rs index fd02f02ce..1d3096b22 100644 --- a/codegen/masm/src/masm/program.rs +++ b/codegen/masm/src/masm/program.rs @@ -564,11 +564,10 @@ fn compute_rodata( // Convert global variable initializers to a data segment, and place it at the computed // global table offset in linear memory. - let extra = if globals.len() > 0 { + let extra = if !globals.is_empty() { let size = globals.size_in_bytes(); let offset = global_table_offset; - let mut data = Vec::::with_capacity(size); - data.resize(size, 0); + let mut data = vec![0; size]; for gv in globals.iter() { if let Some(init) = gv.initializer() { let offset = unsafe { globals.offset_of(gv.id()) } as usize; From e6a3e95803ed4f8a78524245430d5b49fbc9a263 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Thu, 29 Aug 2024 04:05:52 -0400 Subject: [PATCH 13/18] fix(codegen): incorrect order of elements for word-oriented loads/stores --- codegen/masm/intrinsics/mem.masm | 416 ++++++++++---------- codegen/masm/src/codegen/emit/mem.rs | 547 ++++++++++++++------------- codegen/masm/src/codegen/emit/mod.rs | 27 +- codegen/masm/src/convert.rs | 5 + codegen/masm/src/emulator/mod.rs | 14 +- codegen/masm/src/tests.rs | 30 +- 6 files changed, 548 insertions(+), 491 deletions(-) diff --git a/codegen/masm/intrinsics/mem.masm b/codegen/masm/intrinsics/mem.masm index 11a137e4c..a279e2a85 100644 --- a/codegen/masm/intrinsics/mem.masm +++ b/codegen/masm/intrinsics/mem.masm @@ -23,7 +23,7 @@ end # This must be called before any other heap intrinsics are called. This is checked # by each intrinsic export.heap_init # [heap_base] - push.MAGIC swap.1 push.0 dup.1 # [heap_top, heap_size, heap_base, MAGIC] + dup.0 push.0 swap.1 push.MAGIC # [MAGIC, heap_base, heap_size, heap_top] mem_storew.HEAP_INFO_ADDR dropw end @@ -31,7 +31,7 @@ end # Get the (byte) address where the base of the heap starts export.heap_base padw mem_loadw.HEAP_INFO_ADDR - drop drop swap.1 exec.verify_heap_magic + exec.verify_heap_magic movdn.2 drop drop end # Get the (byte) address of the top of the heap @@ -42,32 +42,33 @@ end # Get the (byte) address of the top of the heap export.heap_top padw mem_loadw.HEAP_INFO_ADDR - movdn.3 drop drop exec.verify_heap_magic + exec.verify_heap_magic drop drop end # Intrinsic corresponding to the `memory_size` instruction export.memory_size padw mem_loadw.HEAP_INFO_ADDR - drop movdn.2 drop exec.verify_heap_magic + exec.verify_heap_magic drop swap.1 drop end # Intrinsic corresponding to the `memory_grow` instruction export.memory_grow # [num_pages] - padw mem_loadw.HEAP_INFO_ADDR # [heap_top, heap_size, heap_base, MAGIC, num_pages] - dup.3 exec.verify_heap_magic - drop # [heap_size, heap_base, MAGIC, num_pages] - dup.0 movdn.4 # [heap_size, heap_base, MAGIC, num_pages, heap_size] - movup.3 # [num_pages, heap_size, heap_base, MAGIC0, heap_size] - u32overflowing_add # [overflowed, heap_size + num_pages, heap_base, MAGIC0, heap_size] - if.true # [new_heap_size, heap_base, MAGIC0, heap_size] + padw mem_loadw.HEAP_INFO_ADDR # [MAGIC, heap_base, heap_size, heap_top, num_pages] + dup.0 exec.verify_heap_magic # [MAGIC, heap_base, heap_size, heap_top, num_pages] + swap.3 drop # [heap_base, heap_size, MAGIC, num_pages] + dup.1 movdn.4 # [heap_base, heap_size, MAGIC, num_pages, heap_size] + swap.1 # [heap_size, heap_base, MAGIC, num_pages, heap_size] + movup.3 # [num_pages, heap_size, heap_base, MAGIC, heap_size] + u32overflowing_add # [overflowed, heap_size + num_pages, heap_base, MAGIC, heap_size] + if.true # [new_heap_size, heap_base, MAGIC, heap_size] # Cannot grow the memory, return -1 dropw # [] push.NEG1 else # Success, recompute the heap_top, and make sure it doesn't exceed HEAP_END - dup.0 # [new_heap_size, new_heap_size, heap_base, MAGIC0, heap_size] - push.PAGE_SIZE # [PAGE_SIZE, new_heap_size, new_heap_size, heap_base, MAGIC0, heap_size] - dup.3 # [heap_base, PAGE_SIZE, new_heap_size, new_heap_size, heap_base, MAGIC0, heap_size] + dup.0 # [new_heap_size, new_heap_size, heap_base, MAGIC, heap_size] + push.PAGE_SIZE # [PAGE_SIZE, new_heap_size, new_heap_size, heap_base, MAGIC, heap_size] + dup.3 # [heap_base, PAGE_SIZE, new_heap_size, new_heap_size, heap_base, MAGIC, heap_size] movdn.2 # [PAGE_SIZE, new_heap_size, heap_base, ..] u32overflowing_madd # [overflow, PAGE_SIZE * new_heap_size + heap_base, ..] if.true # [new_heap_top, new_heap_size, heap_base, MAGIC, heap_size] @@ -79,6 +80,8 @@ export.memory_grow # [num_pages] dup.0 u32lte.HEAP_END if.true # Write updated heap information, and return the old heap size (in pages) + swap.2 # [heap_base, new_heap_size, new_heap_top, MAGIC, heap_size] + movup.3 # [MAGIC, heap_base, new_heap_size, new_heap_top, heap_size] mem_storew.HEAP_INFO_ADDR dropw else @@ -94,20 +97,20 @@ end # word other than the at the specified index. # # The element index must be in the range 0..=3. -export.extract_element # [element_index, w0, w1, w2, w3] +export.extract_element # [element_index, w3, w2, w1, w0] # assert the index given is valid dup.0 push.3 lte assert # compute a set of three booleans which used in conjunction with cdrop will # extract the desired element of the given word - dup.0 push.3 lt movdn.5 # [element_index, w0, ..w3, element_index < 3] - dup.0 push.2 lt movdn.5 # [element_index, w0, ..w3, element_index < 2, ..] - push.1 lt # [element_index < 1, w0, ..w3, ..] + dup.0 push.1 gte movdn.5 # [element_index, w3, ..w0, element_index >= 1] + dup.0 push.2 gte movdn.5 # [element_index, w3, ..w0, element_index >= 2, ..] + push.3 eq # [element_index == 3, w3, ..w0, ..] - # drop w1 if the element index is zero; or drop w0 if the element index is non-zero + # if element index == 3, drop w2, else drop w3 cdrop - # drop w2 if the element index is one; or drop w0 and w1 if the element index is > 1 + # if element index >= 2, drop w1, else drop w2+ movup.3 cdrop - # drop w3 if the element index is two; or drop w0, w1, and w2 if the element index is 3 + # if element index >= 1, drop w0, else drop w1+ # # after this point, the only value on the operand stack remaining will be # the element of the word indicated by the index that was on the top of the @@ -122,7 +125,7 @@ proc.load_felt_unchecked # [waddr, index] # [waddr, 0, 0, 0, 0, index] padw movup.4 # load the word which contains the desired element - mem_loadw # [w0, w1, w2, w3, index] + mem_loadw # [w3, w2, w1, w0, index] # select the desired element movup.4 @@ -166,17 +169,18 @@ export.load_sw # [waddr, index, offset] # drop the element index swap.1 drop # load - padw movup.4 mem_loadw # [w0, w1, w2, w3, offset] + padw movup.4 mem_loadw # [w3, w2, w1, w0, offset] # drop the unused elements - movup.3 movup.3 drop drop + drop drop + # shift low bits + push.32 dup.3 # [offset, 32, w1, w0, offset] + u32overflowing_sub assertz # [32 - offset, w1, w0, offset] + u32shr # [lo, w0, offset] # shift high bits left by the offset - dup.2 u32shl # [hi, w1, offset] - # move the low bits to the top and shift them as well - swap.1 push.32 movup.3 # [offset, 32, w1, hi] - u32overflowing_sub assertz # [32 - offset, w1, hi] - u32shr # [lo, hi] + swap.2 # [offset, w0, lo] + u32shl # [hi, lo] # combine the two halves - u32or # [result] + u32or # [result] else # check if the load starts in the second element dup.1 eq.1 @@ -185,50 +189,52 @@ export.load_sw # [waddr, index, offset] # drop the element idnex swap.1 drop # load - padw movup.4 mem_loadw # [w0, w1, w2, w3, offset] + padw movup.4 mem_loadw # [w3, w2, w1, w0, offset] # drop the unused elements - drop movdn.2 movdn.2 drop # [w1, w2, offset] - # shift the high bits - dup.2 u32shl # [hi, w2, offset] + drop movup.2 drop # [w2, w1, offset] # shift the low bits - swap.1 push.32 movup.3 # [offset, 32, w2, hi] - u32overflowing_sub assertz # [32 - offset, w2, hi] - u32shr # [lo, hi] + push.32 dup.3 # [offset, 32, w2, w1, offset] + u32overflowing_sub assertz # [32 - offset, w2, w1, offset] + u32shr # [lo, w1, offset] + # shift high bits left by the offset + swap.2 # [offset, w1, lo] + u32shl # [hi, lo] # combine the two halves - u32or # [result] + u32or # [result] else # check if the load starts in the third element swap.1 eq.2 if.true # the load is across both the third and fourth elements - padw movup.4 mem_loadw # [w0, w1, w2, w3, offset] - # drop first two unused - drop drop # [w2, w3, offset] - # shift the high bits - dup.2 u32shl # [hi, w3, offset] + padw movup.4 mem_loadw # [w3, w2, w1, w0, offset] + # drop the unused elements + movup.3 movup.3 drop drop # [w3, w2, offset] # shift the low bits - swap.1 push.32 movup.3 # [offset, 32, w3, hi] - u32overflowing_sub assertz # [32 - offset, w3, hi] - u32shr # [lo, hi] + push.32 dup.3 # [offset, 32, w3, w2, offset] + u32overflowing_sub assertz # [32 - offset, w3, w2, offset] + u32shr # [lo, w2, offset] + # shift the high bits left by the offset + swap.2 # [offset, w2, lo] + u32shl # [hi, lo] # combine the two halves u32or # [result] else # the load crosses a word boundary # start with the word containing the low bits - dup.0 # [waddr, waddr, offset] + dup.0 # [waddr, waddr, offset] u32overflowing_add.1 assertz # [waddr + 1, waddr, offset] - # load the word and drop the unused elements - padw movup.4 mem_loadw movdn.3 drop drop drop # [w0, waddr, offset] + # load the low bits + mem_load # [w0, waddr, offset] # shift the low bits - push.32 dup.3 # [offset, 32, w0, waddr, offset] + push.32 dup.3 # [offset, 32, w0, waddr, offset] u32overflowing_sub assertz # [32 - offset, w0, waddr, offset] - u32shr # [lo, waddr, offset] + u32shr # [lo, waddr, offset] # load the word with the high bits, drop unused elements - swap.1 padw movup.4 mem_loadw drop drop drop # [w3, lo, offset] + swap.1 padw movup.4 mem_loadw movdn.3 drop drop drop # [w3, lo, offset] # shift high bits - movup.2 u32shl # [hi, lo] + movup.2 u32shl # [hi, lo] # combine the two halves - u32or # [result] + u32or # [result] end end end @@ -257,9 +263,9 @@ end # The data, on the stack, is shown below: # # If we visualize which bytes are contained in each 32-bit chunk on the stack, -# we get: +# when loaded by `mem_loadw`, we get: # -# [0..=4, 5..=8, 9..=12] +# [, 9..=12, 5..=8, 0..=4] # # These byte indices are relative to the nearest word-aligned address, in the # same order as they would occur in a byte-addressable address space. The @@ -270,7 +276,7 @@ end # If we visualize the layout of the bits of our u64 value spread across the # three chunks, we get: # -# [00000000111111111111111111111111, 111111111111111111111111111111, 11111111111111111111111100000000] +# [, 00000000111111111111111111111111, 111111111111111111111111111111, 11111111111111111111111100000000] # # As illustrated above, what should be a double-word value is occupying three words. # To "realign" the value, i.e. ensure that it is naturally aligned and fits in two @@ -336,31 +342,29 @@ export.realign_dw # [chunk_hi, chunk_mid, chunk_lo, offset] swap.1 # [x_hi, x_lo] end -# Shift a double-word (64-bit, in two 32-bit chunks) value by the given offset -# Returns three 32-bit chunks [chunk_hi, chunk_mid, chunk_lo] +# Shift a double-word (64-bit, in two 32-bit chunks) value by the given offset +# Returns three 32-bit chunks [chunk_lo, chunk_mid, chunk_hi] export.offset_dw # [value_hi, value_lo, offset] dup.0 dup.3 u32shr # [chunk_hi, value_hi, value_lo, offset] - movdn.3 # [value_hi, value_lo, offset, chunk_hi] + movdn.3 # [value_hi, value_lo, offset, chunk_hi] push.32 dup.3 u32wrapping_sub # [32 - offset, value_hi, value_lo, offset, chunk_hi] - u32shl # [ chunk_mid_hi, value_lo, offset, chunk_hi] - dup.1 # [ value_lo, chunk_mid_hi, value_lo, offset, chunk_hi] - dup.3 # [ offset, value_lo, chunk_mid_hi, value_lo, offset, chunk_hi] - u32shr # [ chunk_mid_lo, chunk_mid_hi, value_lo, offset, chunk_hi] - u32or # [ chunk_mid, value_lo, offset, chunk_hi] - movdn.2 # [ value_lo, offset, chunk_mid, chunk_hi] + u32shl # [ chunk_mid_hi, value_lo, offset, chunk_hi] + dup.1 # [ value_lo, chunk_mid_hi, value_lo, offset, chunk_hi] + dup.3 # [ offset, value_lo, chunk_mid_hi, value_lo, offset, chunk_hi] + u32shr # [ chunk_mid_lo, chunk_mid_hi, value_lo, offset, chunk_hi] + u32or # [ chunk_mid, value_lo, offset, chunk_hi] + movdn.2 # [ value_lo, offset, chunk_mid, chunk_hi] push.32 movup.2 u32wrapping_sub # [32 - offset, value_lo, offset, chunk_mid, chunk_hi] - u32shl # [ chunk_lo, chunk_mid, chunk_hi] - swap.2 # [ chunk_hi, chunk_mid, chunk_lo] + u32shl # [ chunk_lo, chunk_mid, chunk_hi] end - # Load a pair of machine words (32-bit elements) to the operand stack export.load_dw # [waddr, index, offset] # check for alignment and offset validity dup.2 eq.0 dup.3 push.8 u32lt assert # offset must be < 8 - # convert offset from bytes to bits + # convert offset from bytes to bits movup.3 push.8 u32wrapping_mul movdn.3 # [waddr, index, offset, value_hi, value_lo] # if the pointer is naturally aligned.. if.true @@ -372,32 +376,31 @@ export.load_dw # [waddr, index, offset] # drop index swap.1 drop # [waddr] # load first two elements - padw movup.4 mem_loadw # [w0, w1, w2, w3] + padw movup.4 mem_loadw # [w3, w2, w1, w0] # drop last two elements, and we're done - movup.3 movup.3 drop drop # [w0, w1] + drop drop swap.1 # [w0, w1] else dup.1 eq.1 if.true # drop index swap.1 drop # [waddr] # load second and third elements - padw movup.4 mem_loadw # [w0, w1, w2, w3] + padw movup.4 mem_loadw # [w3, w2, w1, w0] # drop unused elements, and we're done - drop movup.2 drop # [w1, w2] + movup.3 drop drop swap.1 # [w1, w2] else swap.1 eq.2 if.true # load third and fourth elements, drop unused, and we're done - padw movup.4 mem_loadw drop drop + padw movup.4 mem_loadw # [w3, w2, w1, w0] + movup.3 movup.3 drop drop swap.1 # [w2, w3] else - # load first element of next word, drop the rest - dup.0 u32overflowing_add.1 assertz padw movup.4 # [waddr + 1, 0, 0, 0, 0, waddr] - mem_loadw # [w0, w1, w2, w3, waddr] - movup.3 movup.3 movup.3 # [w1, w2, w3, w0, waddr] - drop drop drop # [w0, waddr] + # load first element of next word + dup.0 u32overflowing_add.1 assertz # [waddr + 1, waddr] + mem_load # [w0, waddr] # load fourth element, and we're done - swap.1 padw movup.4 # [waddr, 0, 0, 0, 0, w0] - mem_loadw drop drop drop + swap.1 padw movup.4 mem_loadw # [w3, w2, w1, w0, lo] + movdn.3 drop drop drop # [hi, lo] end end end @@ -405,50 +408,54 @@ export.load_dw # [waddr, index, offset] # check if we start in the first element dup.1 eq.0 if.true - # memory layout: [hi, mid, lo, 0] + # memory layout: [, lo, mid, hi] # drop the index - swap.1 drop # [waddr, offset] + swap.1 drop # [waddr, offset] # load three elements containing the double-word on the stack - padw movup.4 # [waddr, 0, 0, 0, 0, offset] - mem_loadw movup.3 drop + padw movup.4 mem_loadw # [w3, w2, w1, w0, offset] + drop # [w2, w1, w0, offset] + # move into stack order (hi bytes first) + swap.2 # [w0, w1, w2, offset] # re-align it, and we're done; realign_dw gets [w0, w1, w2, offset] exec.realign_dw else # check if we start in the second element dup.1 eq.1 if.true - # memory layout: [0, hi, mid, lo] + # memory layout: [lo, mid, hi, ] # drop the index swap.1 drop # load three elements containing the double-word on the stack - padw movup.4 mem_loadw drop + padw movup.4 mem_loadw # [w3, w2, w1, w0, offset] + movup.3 drop # [w3, w2, w1, offset] + # move into stack order + swap.2 # [w1, w2, w3, offset] # re-align it, and we're done; realign_dw gets [w1, w2, w3, offset] exec.realign_dw else # check if we start in the third element swap.1 eq.2 # [waddr, offset] if.true - # memory layout: [0, 0, 0, hi], [ mid, lo, 0, 0] + # memory layout: [mid, hi, ..] [, , , lo] # load one element from the next word dup.0 u32overflowing_add.1 assertz # [waddr + 1, waddr, offset] - padw movup.4 # [waddr + 1, 0, 0, 0, 0, waddr, offset] - mem_loadw # [chunk_lo, 0, 0, 0, waddr, offset] - movup.3 movup.3 movup.3 drop drop drop # [chunk_lo, waddr, offset] + mem_load # [chunk_lo, waddr, offset] # load two elements from the first word - swap.1 padw movup.4 # [waddr, 0, 0, 0, 0, chunk_lo, offset] - mem_loadw drop drop # [chunk_hi, chunk_mid, chunk_lo, offset] - # re-align it, and we're done + padw movup.5 # [waddr, 0, 0, 0, 0, chunk_lo, offset] + mem_loadw # [chunk_mid, chunk_hi, ?, ?, chunk_lo, offset] + swap.3 drop # [chunk_hi, ?, chunk_mid, chunk_lo, offset] + swap.1 drop # [chunk_hi, chunk_mid, chunk_lo, offset] + # re-align it, and we're done exec.realign_dw else - # memory layout: [0, 0, hi, mid], [lo, 0, 0, 0] + # memory layout: [hi, ..], [, , mid, lo] # load the two least-significant elements from the next word first dup.0 u32overflowing_add.1 assertz # [waddr + 1, waddr, offset] - padw movup.4 # [waddr + 1, 0, 0, 0, 0, waddr, offset] - mem_loadw # [chunk_mid, chunk_lo, 0, 0, waddr, offset] - movup.3 movup.3 drop drop # [chunk_mid, chunk_lo, waddr, offset] + padw movup.4 # [waddr + 1, 0, 0, 0, 0, waddr, offset] + mem_loadw drop drop # [mid, lo, waddr, offset] # load the most significant element from the first word - movup.2 padw movup.4 # [waddr, 0, 0, 0, 0, chunk_mid, chunk_lo, offset] - mem_loadw drop drop drop # [chunk_hi, chunk_mid, chunk_lo, offset] + padw movup.6 # [waddr, 0, 0, 0, 0, mid, lo, offset] + mem_loadw movdn.3 drop drop drop # [hi, mid, lo, offset] # re-align it, and we're done exec.realign_dw end @@ -461,23 +468,23 @@ end # at the specified index, leaving the modified word on top of the stack # # The element index must be in the range 0..=3. -export.replace_element # [element_index, value, w0, w1, w2, w3] +export.replace_element # [element_index, value, w3, w2, w1, w0] # assert the index given is valid dup.0 push.3 lte assert # compute a set of three booleans which used in conjunction with cdrop will # extract the desired value for each element of the given word - movup.2 dup.2 # [value, w0, element_index, value, w1, ..w3] - dup.2 push.0 eq cdrop # [w0', element_index, value, w1, ..w3] - movdn.6 # [element_index, value, w1, ..w3, w0'] + movup.2 dup.2 # [value, w3, element_index, value, w2, ..w0] + dup.2 push.3 eq cdrop # [w3', element_index, value, w2, ..w0] + movdn.5 # [element_index, value, w2, ..w0, w3'] movup.2 dup.2 - dup.2 push.1 eq cdrop - movdn.6 # [element_index, value, w2, w3, w0', w1'] + dup.2 push.2 eq cdrop # [w2', element_index, value, w1, w0, w3'] + movdn.5 # [element_index, value, w1, w0, w3', w2'] movup.2 dup.2 - dup.2 push.2 eq cdrop - movdn.6 # [element_index, value, w3, w0', w1', w2'] + dup.2 push.1 eq cdrop + movdn.5 # [element_index, value, w0, w3', w2', w1'] # on the last element, consume the element index and replacement value - push.3 eq cdrop # [w3', w0', w1', w2'] - movdn.4 + push.0 eq cdrop # [w0', w3', w2', w1'] + movdn.3 # [w3', w2', w1', w0'] end # See `store_felt` for safe usage @@ -486,15 +493,15 @@ proc.store_felt_unchecked # [waddr, index, value] # [waddr, 0, 0, 0, 0, waddr, index, value] padw dup.4 # load the original word - mem_loadw # [w0, w1, w2, w3, waddr, index, value] + mem_loadw # [w3, w2, w1, w0, waddr, index, value] # rewrite the desired element - movup.6 # [value, w0, w1, w2, w3, waddr, index] - movup.6 # [index, value, w0, w1, w2, w3, waddr] - exec.replace_element # [w0', w1', w2', w3', waddr] + movup.6 # [value, w3, w2, w1, w0, waddr, index] + movup.6 # [index, value, w3, w2, w1, w0, waddr] + exec.replace_element # [w3', w2', w1', w0', waddr] # store the updated word - mem_storew + movup.4 mem_storew dropw end @@ -535,32 +542,32 @@ export.store_sw # [waddr, index, offset, value] # drop the element index swap.1 drop # load current value - padw dup.4 mem_loadw # [w0, w1, w2, w3, waddr, offset, value] + padw dup.4 mem_loadw # [w3, w2, w1, w0, waddr, offset, value] # compute the bit shift - push.32 dup.6 sub # [rshift, w0..w3, waddr, offset, value] + push.32 dup.6 sub # [rshift, w3..w0, waddr, offset, value] # compute the masks - push.4294967295 dup.1 u32shl # [mask_hi, rshift, w0..w3, waddr, offset, value] - dup.0 u32not # [mask_lo, mask_hi, rshift, w0, w1, w2, w3, waddr, offset, value] + push.4294967295 dup.1 u32shl # [mask_hi, rshift, w3..w0, waddr, offset, value] + dup.0 u32not # [mask_lo, mask_hi, rshift, w3, w2, w1, w0, waddr, offset, value] # manipulate the bits of the two target elements, such that the 32-bit word # we're storing is placed at the correct offset from the start of the memory # cell when viewing the cell as a set of 4 32-bit chunks - movup.4 u32and # [w1_masked, mask_hi, rshift, w0, w2, w3, waddr, offset, value] - movup.3 movup.2 u32and # [w0_masked, w1_masked, rshift, w2, w3, waddr, offset, value] + movup.5 u32and # [w1_masked, mask_hi, rshift, w3, w2, w0, waddr, offset, value] + movup.5 movup.2 u32and # [w0_masked, w1_masked, rshift, w3, w2, waddr, offset, value] # now, we need to shift/mask/split the 32-bit value into two elements, then # combine them with the preserved bits of the original contents of the cell # # first, the contents of w0 - dup.7 movup.7 u32shr u32or # [w0', w1_masked, rshift, w2..w3, waddr, value] + dup.7 movup.7 u32shr u32or # [w0', w1_masked, rshift, w3..w2, waddr, value] # then the contents of w1 swap.1 - movup.6 movup.3 u32shl u32or # [w1', w0', w2, w3, waddr] + movup.6 movup.3 u32shl u32or # [w1', w0', w3, w2, waddr] # ensure word is in order - swap.1 + movup.3 movup.3 # [w3, w2, w1', w0', waddr] # finally, write back the updated word, and clean up the operand stack movup.4 mem_storew dropw @@ -569,36 +576,36 @@ export.store_sw # [waddr, index, offset, value] dup.1 eq.1 if.true # the load is across both the second and third elements - # drop the element idnex + # drop the element index swap.1 drop # load current value - padw dup.4 mem_loadw # [w0, w1, w2, w3, waddr, offset, value] + padw dup.4 mem_loadw # [w3, w2, w1, w0, waddr, offset, value] # compute the bit shift - push.32 dup.6 sub # [rshift, w0..w3, waddr, offset, value] + push.32 dup.6 sub # [rshift, w3..w0, waddr, offset, value] # compute the masks - push.4294967295 dup.1 u32shl # [mask_hi, rshift, w0..w3, waddr, offset, value] - dup.0 u32not # [mask_lo, mask_hi, rshift, w0, w1, w2, w3, waddr, offset, value] + push.4294967295 dup.1 u32shl # [mask_hi, rshift, w3..w0, waddr, offset, value] + dup.0 u32not # [mask_lo, mask_hi, rshift, w3, w2, w1, w0, waddr, offset, value] # manipulate the bits of the two target elements, such that the 32-bit word # we're storing is placed at the correct offset from the start of the memory # cell when viewing the cell as a set of 4 32-bit chunks - movup.5 u32and # [w2_masked, mask_hi, rshift, w0, w1, w3, waddr, offset, value] - movup.4 movup.2 u32and # [w1_masked, w2_masked, rshift, w0, w3, waddr, offset, value] + movup.4 u32and # [w2_masked, mask_hi, rshift, w3, w1, w0, waddr, offset, value] + movup.4 movup.2 u32and # [w1_masked, w2_masked, rshift, w3, w0, waddr, offset, value] # now, we need to shift/mask/split the 32-bit value into two elements, then # combine them with the preserved bits of the original contents of the cell # # first, the contents of w1 - dup.7 movup.7 u32shr u32or # [w1', w2_masked, rshift, w0, w3, waddr, value] + dup.7 movup.7 u32shr u32or # [w1', w2_masked, rshift, w3, w0, waddr, value] # then the contents of w2 swap.1 - movup.6 movup.3 u32shl u32or # [w2', w1', w0, w3, waddr] + movup.6 movup.3 u32shl u32or # [w2', w1', w3, w0, waddr] # ensure the elements are in order - swap.2 + movup.3 swap.3 # [w3, w2', w1', w0, waddr] # finally, write back the updated word, and clean up the operand stack movup.4 mem_storew dropw @@ -608,32 +615,29 @@ export.store_sw # [waddr, index, offset, value] if.true # the load is across both the third and fourth elements # load current value - padw dup.4 mem_loadw # [w0, w1, w2, w3, waddr, offset, value] + padw dup.4 mem_loadw # [w3, w2, w1, w0, waddr, offset, value] # compute the bit shift - push.32 dup.6 sub # [rshift, w0..w3, waddr, offset, value] + push.32 dup.6 sub # [rshift, w3..w0, waddr, offset, value] # compute the masks - push.4294967295 dup.1 u32shl # [mask_hi, rshift, w0..w3, waddr, offset, value] - dup.0 u32not # [mask_lo, mask_hi, rshift, w0, w1, w2, w3, waddr, offset, value] + push.4294967295 dup.1 u32shl # [mask_hi, rshift, w3..w0, waddr, offset, value] + dup.0 u32not # [mask_lo, mask_hi, rshift, w3, w2, w1, w0, waddr, offset, value] # manipulate the bits of the two target elements, such that the 32-bit word # we're storing is placed at the correct offset from the start of the memory # cell when viewing the cell as a set of 4 32-bit chunks - movup.6 u32and # [w3_masked, mask_hi, rshift, w0, w1, w2, waddr, offset, value] - movup.5 movup.2 u32and # [w2_masked, w3_masked, rshift, w0, w1, waddr, offset, value] + movup.3 u32and # [w3_masked, mask_hi, rshift, w2, w1, w0, waddr, offset, value] + movup.3 movup.2 u32and # [w2_masked, w3_masked, rshift, w1, w0, waddr, offset, value] # now, we need to shift/mask/split the 32-bit value into two elements, then # combine them with the preserved bits of the original contents of the cell # # first, the contents of w2 - dup.7 movup.7 u32shr u32or # [w2', w3_masked, rshift, w0, w1, waddr, value] + dup.7 movup.7 u32shr u32or # [w2', w3_masked, rshift, w1, w0, waddr, value] # then the contents of w3 swap.1 - movup.6 movup.3 u32shl u32or # [w3', w2', w0, w1, waddr] - - # ensure the elements are in order - swap.3 movup.2 + movup.6 movup.3 u32shl u32or # [w3', w2', w1, w0, waddr] # finally, write back the updated word, and clean up the operand stack movup.4 mem_storew dropw @@ -665,16 +669,13 @@ export.store_sw # [waddr, index, offset, value] mem_store # [mask_hi, waddr, offset, value] # next, update the last element of the lowest addressed word - padw dup.5 mem_loadw # [w0, w1, w2, w3, mask_hi, waddr, offset, value] + padw dup.5 mem_loadw # [w3, w2, w1, w0, mask_hi, waddr, offset, value] # mask out the bits of the value that are being overwritten - movup.3 movup.4 u32and # [w3_masked, w0, w1, w2, waddr, offset, value] + movup.4 u32and # [w3_masked, w2, w1, w0, waddr, offset, value] # extract the bits to be stored in this word and combine them - movup.6 movup.6 u32shr u32or # [w3', w0, w1, w2, waddr] - - # ensure elements of word are in order - movdn.3 + movup.6 movup.6 u32shr u32or # [w3', w2, w1, w0, waddr] # write updated word movup.4 mem_storew @@ -697,7 +698,7 @@ export.store_dw # [waddr, index, offset, value_hi, value_lo] # check for alignment and offset validity dup.2 eq.0 dup.3 push.8 u32lt assert # offset must be < 8 - # convert offset from bytes to bits + # convert offset from bytes to bits movup.3 push.8 u32wrapping_mul movdn.3 # [offset == 0, waddr, index, offset, value_hi, value_lo] # if the pointer is naturally aligned.. if.true @@ -707,36 +708,47 @@ export.store_dw # [waddr, index, offset, value_hi, value_lo] dup.1 eq.0 if.true # drop index - swap.1 drop # [waddr, value_hi, value_lo] - push.0 movdn.3 push.0 movdn.3 # [waddr, value_hi, value_lo, 0, 0] - mem_storew + swap.1 drop # [waddr, value_hi, value_lo] + swap.2 # [value_lo, value_hi, waddr] + padw dup.6 mem_loadw # [w3, w2, w1, w0, value_lo, value_hi, waddr] + swap.2 drop # [w2, w3, w0, value_lo, value_hi, waddr] + swap.2 drop # [w3, w2, value_lo, value_hi, waddr] + movup.4 # [waddr, w3, w2, value_lo, value_hi] + mem_storew # cleanup the operand stack dropw else dup.1 eq.1 if.true # drop index - swap.1 drop # [waddr, value_hi, value_lo] + swap.1 drop # [waddr, value_hi, value_lo] # store as the second and third elements of the word - push.0 swap.1 push.0 movdn.4 # [waddr, 0, value_hi, value_lo, 0] - mem_storew + swap.2 # [value_lo, value_hi, waddr] + padw dup.6 mem_loadw # [w3, w2, w1, w0, value_lo, value_hi, waddr] + movup.4 swap.2 drop # [w3, value_lo, w1, w0, value_hi, waddr] + movup.4 swap.3 drop # [w3, value_lo, value_hi, w0, waddr] + movup.4 mem_storew # cleanup the operand stack dropw else swap.1 eq.2 if.true # store as the third and fourth elements of the word - push.0 swap.1 push.0 swap.1 # [waddr, 0, 0, value_hi, value_lo] - mem_storew + swap.2 # [value_lo, value_hi, waddr] + padw dup.6 mem_loadw # [w3, w2, w1, w0, value_lo, value_hi, waddr] + movup.5 swap.2 drop # [w3, value_hi, w1, w0, value_lo, waddr] + drop movup.3 # [value_lo, value_hi, w1, w0, waddr] + movup.4 mem_storew # cleanup the operand stack dropw else # store the first element of the next word - dup.0 u32overflowing_add.1 assertz # [waddr + 1, waddr, value_hi, value_lo] - padw drop movup.6 movup.4 # [waddr + 1, value_lo, 0, 0, 0, waddr, value_hi] - mem_storew dropw # [waddr, value_hi] - # store the forth element - padw drop movup.3 # [waddr, 0, 0, 0, value_hi] + swap.2 # [value_lo, value_hi, waddr] + dup.2 u32overflowing_add.1 assertz # [waddr + 1, value_lo, value_hi, waddr] + mem_store # [value_hi, waddr] + # store the fourth element + padw dup.5 mem_loadw # [w3, w2, w1, w0, value_hi, waddr] + drop movup.3 movup.4 # [waddr, value_hi, w2, w1, w0] mem_storew dropw end end @@ -746,54 +758,54 @@ export.store_dw # [waddr, index, offset, value_hi, value_lo] movup.2 # [offset, waddr, index, value_hi, value_lo] movup.4 # [value_lo, offset, waddr, index, value_hi] movup.4 # [value_hi, value_lo, offset, waddr, index] - exec.offset_dw # [chunk_hi, chunk_mid, chunk_lo, waddr, index] - movup.4 # [index, chunk_hi, chunk_mid, chunk_lo, waddr] + exec.offset_dw # [chunk_lo, chunk_mid, chunk_hi, waddr, index] + movup.4 # [index, chunk_lo, chunk_mid, chunk_hi, waddr] # check if we start in the first element dup.0 eq.0 if.true - # target memory layout: [hi, mid, lo, 0] + # target memory layout: [0, lo, mid, hi] # drop the index - drop - push.0 swap.4 # [waddr, chunk_hi, chunk_mid, chunk_lo, 0] - mem_storew + drop # [lo, mid, hi, waddr] + padw dup.7 mem_loadw # [w3, w2, w1, w0, lo, mid, hi, waddr] + movdn.3 # [w2, w1, w0, w3, lo, mid, hi, waddr] + drop drop drop # [w3, lo, mid, hi, waddr] + movup.4 mem_storew dropw else # check if we start in the second element dup.0 eq.1 if.true - # target memory layout: [0, hi, mid, lo] + # target memory layout: [lo, mid, hi, 0] # drop the index - drop # [chunk_hi, chunk_mid, chunk_lo, waddr] - push.0 movup.4 # [waddr, 0, chunk_hi, chunk_mid, chunk_lo] - mem_storew + drop # [lo, mid, hi, waddr] + padw dup.7 mem_loadw # [w3, w2, w1, w0, lo, mid, hi, waddr] + drop drop drop # [w0, lo, mid, hi, waddr] + movdn.3 # [lo, mid, hi, w0, waddr] + movup.4 mem_storew dropw - else + else # check if we start in the third element - eq.2 # [chunk_hi, chunk_mid, chunk_lo, waddr] - if.true - # target memory layout: [0, 0, hi, mid], [lo, 0, 0, 0] - push.0 push.0 dup.5 # [waddr, 0, 0, chunk_hi, chunk_mid, chunk_lo, waddr] - mem_storew - dropw # [chunk_lo, waddr] - swap.1 # [waddr, chunk_lo] - push.1 u32overflowing_add assertz # [waddr + 1, chunk_lo] - padw movup.5 movup.5 # [waddr + 1, chunk_lo, 0, 0, 0, 0] - mem_storew - dropw # [0] - drop - else - # target memory layout: [0, 0, 0, hi], [mid, lo, 0, 0] - push.0 push.0 push.0 dup.6 # [waddr, 0, 0, 0, chunk_hi, chunk_mid, chunk_lo, waddr] - mem_storew - dropw # [chunk_mid, chunk_lo, waddr] - movup.2 # [waddr, chunk_mid, chunk_lo] - push.1 u32overflowing_add assertz # [waddr + 1, chunk_mid, chunk_lo] - padw movup.6 movup.6 movup.6 # [waddr + 1, chunk_mid, chunk_lo, 0, 0, 0, 0] - mem_storew - dropw # [0, 0] - drop drop + eq.2 # [lo, mid, hi, waddr] + if.true + # target memory layout: [mid, hi, ..], [..lo] + padw dup.7 mem_loadw # [w3, w2, w1, w0, lo, mid, hi, waddr] + drop drop movup.4 movup.4 # [mid, hi, w1, w0, lo, waddr] + dup.5 mem_storew dropw # [lo, waddr] + swap.1 u32overflowing_add.1 assertz # [waddr + 1, lo] + mem_store + else + # target memory layout: [hi, ..], [..lo, mid] + padw dup.7 mem_loadw # [w3, w2, w1, w0, lo, mid, hi, waddr] + drop movup.5 # [hi, w2, w1, w0, lo, mid, waddr] + dup.6 mem_storew dropw # [lo, mid, waddr] + movup.2 u32overflowing_add.1 assertz # [waddr + 1, lo, mid] + dup.0 movdn.3 # [waddr + 1, lo, mid, waddr + 1] + padw movup.4 mem_loadw # [w3, w2, w1, w0, lo, mid, waddr + 1] + movup.5 swap.4 drop # [w3, w2, w1, mid, lo, waddr + 1] + movup.4 swap.3 drop # [w3, w2, lo, mid, waddr + 1] + movup.4 mem_storew dropw end end - end - end + end + end end diff --git a/codegen/masm/src/codegen/emit/mem.rs b/codegen/masm/src/codegen/emit/mem.rs index 450b42d9c..320fbe156 100644 --- a/codegen/masm/src/codegen/emit/mem.rs +++ b/codegen/masm/src/codegen/emit/mem.rs @@ -175,10 +175,9 @@ impl<'a> OpEmitter<'a> { &[ Op::Padw, Op::MemLoadwImm(ptr.waddr), - Op::Movup(4), - Op::Movup(4), Op::Drop, Op::Drop, + Op::Swap(1), Op::Drop, ], span, @@ -190,8 +189,8 @@ impl<'a> OpEmitter<'a> { Op::Padw, Op::MemLoadwImm(ptr.waddr), Op::Drop, + Op::Movdn(2), Op::Drop, - Op::Swap(1), Op::Drop, ], span, @@ -199,7 +198,14 @@ impl<'a> OpEmitter<'a> { } 3 => { self.emit_all( - &[Op::Padw, Op::MemLoadwImm(ptr.waddr), Op::Drop, Op::Drop, Op::Drop], + &[ + Op::Padw, + Op::MemLoadwImm(ptr.waddr), + Op::Movdn(3), + Op::Drop, + Op::Drop, + Op::Drop, + ], span, ); } @@ -231,18 +237,13 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Move the two elements across which the desired machine word spans - // to the bottom of the stack temporarily - Op::Movdn(4), - Op::Movdn(4), - // Drop the unused elements Op::Drop, Op::Drop, - // Shift the high bits left by the offset - Op::U32ShlImm(ptr.offset as u32), - // Move the low bits to the top and shift them right - Op::Swap(1), + // shift low bits Op::U32ShrImm(rshift), + // shift high bits left by the offset + Op::Swap(1), + Op::U32ShlImm(ptr.offset as u32), // OR the high and low bits together Op::U32Or, ], @@ -254,12 +255,11 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop the first unused element + // Drop w3, w2 Op::Drop, - // Move the desired element past the last two unused - Op::Movdn(3), - // Drop the remaining unused elements Op::Drop, + // Drop w1 + Op::Swap(1), Op::Drop, ], span, @@ -270,19 +270,15 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop the first unused element + // Drop unused elements Op::Drop, - // Move the two elements across which the desired machine word spans - // to the bottom of the stack temporarily - Op::Movdn(3), - Op::Movdn(3), - // Drop the remaining unused element + Op::Movup(2), Op::Drop, - // Shift the high bits left by the offset - Op::U32ShlImm(ptr.offset as u32), - // Move the low bits to the top and shift them right - Op::Swap(1), + // Shift the low bits Op::U32ShrImm(rshift), + // Shift the high bits + Op::Swap(1), + Op::U32ShlImm(ptr.offset as u32), // OR the high and low bits together Op::U32Or, ], @@ -294,11 +290,12 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop the first two unused elements + // Drop w3 Op::Drop, + // Move w2 to bottom + Op::Movdn(2), + // Drop w1, w0 Op::Drop, - // Swap the last remaining unused element to the top and drop it - Op::Swap(1), Op::Drop, ], span, @@ -309,14 +306,15 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop the first two unused elements + // Drop unused elements + Op::Movup(3), + Op::Movup(3), Op::Drop, Op::Drop, - // Shift the high bits left by the offset - Op::U32ShlImm(ptr.offset as u32), - // Move the low bits to the top and shift them right - Op::Swap(1), + // Shift low bits Op::U32ShrImm(rshift), + // Shift high bits + Op::U32ShlImm(ptr.offset as u32), // OR the high and low bits together Op::U32Or, ], @@ -328,6 +326,8 @@ impl<'a> OpEmitter<'a> { // Load a quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), + // Move w3 to bottom + Op::Movdn(3), // Drop the three unused elements Op::Drop, Op::Drop, @@ -339,24 +339,18 @@ impl<'a> OpEmitter<'a> { self.emit_all( &[ // Load the quad-word containing the low bits - Op::Padw, - Op::MemLoadwImm(ptr.waddr + 1), - // Move the element we need to the bottom temporarily - Op::Movdn(4), - // Drop the unused elements - Op::Drop, - Op::Drop, - Op::Drop, - // Shift the low bits right by the offset + Op::MemLoadImm(ptr.waddr + 1), + // Shift the low bits Op::U32ShrImm(rshift), // Load the quad-word containing the high bits Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop the unused elements + // Drop unused elements + Op::Movdn(3), Op::Drop, Op::Drop, Op::Drop, - // Shift the high bits left by the offset + // Shift the high bits Op::U32ShlImm(ptr.offset as u32), // OR the high and low bits together Op::U32Or, @@ -406,6 +400,8 @@ impl<'a> OpEmitter<'a> { // Move the unused element to the top and drop it Op::Movup(4), Op::Drop, + // Move into stack order for realign_dw + Op::Swap(2), ], span, ); @@ -435,6 +431,8 @@ impl<'a> OpEmitter<'a> { Op::MemLoadwImm(ptr.waddr), // Drop the unused element Op::Drop, + // Move into stack order for realign_dw + Op::Swap(2), ], span, ); @@ -474,6 +472,8 @@ impl<'a> OpEmitter<'a> { // Drop the two unused elements Op::Drop, Op::Drop, + // Move into stack order for realign_dw + Op::Swap(2), ], span, ); @@ -513,6 +513,8 @@ impl<'a> OpEmitter<'a> { Op::Drop, Op::Drop, Op::Drop, + // Move into stack order for realign_dw + Op::Swap(2), ], span, ); @@ -535,22 +537,41 @@ impl<'a> OpEmitter<'a> { let aligned = ptr.is_element_aligned(); match ptr.index { // Naturally-aligned - 0 if aligned => self.emit_all(&[Op::Padw, Op::MemLoadwImm(ptr.waddr)], span), + 0 if aligned => self.emit_all( + &[ + // Load the word + Op::Padw, + // [w3, w2, w1, w0] + Op::MemLoadwImm(ptr.waddr), + // Swap the element order to lowest-address-first + // [w2, w3, w1, w0] + Op::Swap(1), + // [w1, w3, w2, w0] + Op::Swap(2), + // [w3, w1, w2, w0] + Op::Swap(1), + // [w0, w1, w2, w3] + Op::Swap(3), + ], + span, + ), 0 => { // An unaligned quad-word load spans five elements self.emit_all( &[ - // Load second quad-word - Op::Padw, - Op::MemLoadwImm(ptr.waddr + 1), - // Drop all but the first element - Op::Movdn(4), - Op::Drop, - Op::Drop, - Op::Drop, + // Load first element of second quad-word + // [e] + Op::MemLoadImm(ptr.waddr + 1), // Load first quad-word Op::Padw, + // [d, c, b, a, e] Op::MemLoadwImm(ptr.waddr), + // [a, c, b, d, e] + Op::Swap(3), + // [c, a, b, d, e] + Op::Swap(1), + // [a, b, c, d, e] + Op::Movdn(2), ], span, ); @@ -559,17 +580,18 @@ impl<'a> OpEmitter<'a> { 1 if aligned => { self.emit_all( &[ - // Load second quad-word - Op::Padw, - Op::MemLoadwImm(ptr.waddr + 1), - // Drop last element - Op::Movup(4), - Op::Drop, + // Load first element of second quad-word + // [d] + Op::MemLoadImm(ptr.waddr + 1), // Load first quad-word Op::Padw, + // [c, b, a, _, d] Op::MemLoadwImm(ptr.waddr), - // Drop first element + // [_, b, a, c, d] + Op::Swap(3), Op::Drop, + // [a, b, c, d] + Op::Swap(1), ], span, ); @@ -578,19 +600,28 @@ impl<'a> OpEmitter<'a> { // An unaligned double-word load spans five elements self.emit_all( &[ - // Load second quad-word + // Load first two elements of second quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr + 1), - // Drop all but the first two elements - Op::Movdn(4), - Op::Movdn(4), Op::Drop, + // [e, d] Op::Drop, - // Load first quad-word + // Load last three elements of first quad-word Op::Padw, + // [c, b, a, _, e, d] Op::MemLoadwImm(ptr.waddr), - // Drop the first word + // [_, b, a, c, e, d] + Op::Swap(3), + // [b, a, c, e, d] Op::Drop, + // [e, a, c, b, d] + Op::Swap(3), + // [d, a, c, b, e] + Op::Swap(4), + // [b, a, c, d, e] + Op::Swap(3), + // [a, b, c, d, e] + Op::Swap(1), ], span, ); @@ -599,18 +630,24 @@ impl<'a> OpEmitter<'a> { 2 if aligned => { self.emit_all( &[ - // Load second quad-word + // Load first two elements of second quad-word Op::Padw, + // [_, _, d, c] Op::MemLoadwImm(ptr.waddr), // Drop last two elements - Op::Movup(4), - Op::Movup(4), Op::Drop, + // [d, c] Op::Drop, - // Load first quad-word + // Load last two elements of first quad-word Op::Padw, + // [b, a, _, _, d, c] Op::MemLoadwImm(ptr.waddr), - // Drop first two elements + // [d, a, _, _, b, c] + Op::Swap(4), + // [a, _, _, b, c, d] + Op::Movdn(5), + // [_, _, a, b, c, d] + Op::Swap(2), Op::Drop, Op::Drop, ], @@ -621,17 +658,27 @@ impl<'a> OpEmitter<'a> { // An unaligned double-word load spans five elements self.emit_all( &[ - // Load the second quad-word + // Load the first three elements of the second quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr + 1), - // Drop the last element - Op::Movup(4), + // [e, d, c] Op::Drop, - // Load the first quad-word + // Load the last two elements of the first quad-word Op::Padw, + // [b, a, _, _, e, d, c] Op::MemLoadwImm(ptr.waddr), - // Drop the two unused elements + // [a, _, _, b, e, d, c] + Op::Movdn(3), + // [_, _, a, b, e, d, c] + Op::Movdn(2), + // [c, _, a, b, e, d, _] + Op::Swap(6), + // [e, _, a, b, c, d, _] + Op::Swap(4), + // [_, _, a, b, c, d, e] + Op::Swap(6), Op::Drop, + // [a, b, c, d, e] Op::Drop, ], span, @@ -641,15 +688,14 @@ impl<'a> OpEmitter<'a> { 3 if aligned => { self.emit_all( &[ - // Load second word, drop last element + // Load first three elements of second quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr + 1), - Op::Movup(4), Op::Drop, - // Load first word + // Load last element of first quad-word Op::Padw, Op::MemLoadwImm(ptr.waddr), - // Drop first three elements + Op::Movdn(3), Op::Drop, Op::Drop, Op::Drop, @@ -661,16 +707,28 @@ impl<'a> OpEmitter<'a> { // An unaligned quad-word load spans five elements, self.emit_all( &[ - // Load second word + // Load second quad-word Op::Padw, + // [e, d, c, b] Op::MemLoadwImm(ptr.waddr + 1), - // Load first word + // Load last element of first quad-word Op::Padw, + // [a, _, _, _, e, d, c, b] Op::MemLoadwImm(ptr.waddr), - // Drop unused elements + // [_, _, _, a, e, d, c, b] + Op::Movdn(3), Op::Drop, Op::Drop, + // [a, e, d, c, b] Op::Drop, + // [e, a, d, c, b] + Op::Swap(1), + // [b, a, d, c, e] + Op::Swap(4), + // [d, a, b, c, e] + Op::Swap(2), + // [a, b, c, d, e] + Op::Movdn(3), ], span, ); @@ -719,72 +777,8 @@ impl<'a> OpEmitter<'a> { /// have to perform a sequence of shifts and masks to get the bits where they belong. This /// function performs those steps, with the assumption that the caller has three values on /// the operand stack representing any unaligned double-word value - fn realign_double_word(&mut self, ptr: NativePtr, span: SourceSpan) { - // The stack starts as: [chunk_hi, chunk_mid, chunk_lo] - // - // We will refer to the parts of our desired double-word value - // as two parts, `x_hi` and `x_lo`. - self.emit_all( - &[ - // Re-align the high bits by shifting out the offset - // - // This gives us the first half of the first word. - // - // [x_hi_hi, chunk_mid, chunk__lo] - Op::U32ShlImm(ptr.offset as u32), - // Move the value below the other chunks temporarily - // - // [chunk_mid, chunk_lo, x_hi_hi] - Op::Movdn(3), - // We must split the middle chunk into two parts, - // one containing the bits to be combined with the - // first machine word; the other to be combined with - // the second machine word. - // - // First, we duplicate the chunk, since we need two - // copies of it: - // - // [chunk_mid, chunk_mid, chunk_lo, x_hi_hi] - Op::Dup(0), - // Then, we shift the chunk right by 32 - offset bits, - // re-aligning the low bits of the first word, and - // isolating them. - // - // [x_hi_lo, chunk_mid, chunk_lo, x_hi_hi] - Op::U32ShrImm(32 - ptr.offset as u32), - // Move the high bits back to the top - // - // [x_hi_hi, x_hi_lo, chunk_mid, chunk_lo] - Op::Movup(3), - // OR the two parts of the `x_hi` chunk together - // - // [x_hi, chunk_mid, chunk_lo] - Op::U32Or, - // Move `x_hi` to the bottom for later - Op::Movdn(2), - // Now, we need to re-align the high bits of the second word - // by shifting the remaining copy of the middle chunk, similar - // to what we did at the very beginning. - // - // This gives us the first half of the second word. - // - // [x_lo_hi, chunk_lo, x_hi] - Op::U32ShlImm(ptr.offset as u32), - // Next, swap the low bit chunk to the top temporarily - Op::Swap(1), - // Shift the value right, as done previously for the middle chunk - Op::U32ShrImm(32 - ptr.offset as u32), - // OR the two halves together, giving us our second word, `x_lo` - // - // [x_lo, x_hi] - Op::U32Or, - // Swap the words so they are in the correct order - // - // [x_hi, x_lo] - Op::Swap(1), - ], - span, - ); + fn realign_double_word(&mut self, _ptr: NativePtr, span: SourceSpan) { + self.emit(Op::Exec("intrinsics::mem::realign_dw".parse().unwrap()), span); } /// This handles emitting code that handles aligning an unaligned quad machine-word value @@ -1207,9 +1201,24 @@ impl<'a> OpEmitter<'a> { let aligned = ptr.is_element_aligned(); match ptr.index { // Naturally-aligned - 0 if aligned => self.emit_all(&[Op::Padw, Op::MemLoadwImm(ptr.waddr)], span), + 0 if aligned => self.emit_all( + &[ + // Stack: [a, b, c, d] + // Swap to highest-address-first order + // [d, b, c, a] + Op::Swap(3), + // [c, d, b, a] + Op::Movup(2), + // [d, c, b, a] + Op::Swap(1), + // Write to heap + Op::MemStorewImm(ptr.waddr), + Op::Dropw, + ], + span, + ), _ => { - todo!() + todo!("quad-word stores currently require 32-byte alignment") } } } @@ -1228,9 +1237,35 @@ impl<'a> OpEmitter<'a> { let aligned = ptr.is_element_aligned(); match ptr.index { // Naturally-aligned - 0 if aligned => self.emit_all(&[Op::Padw, Op::MemLoadwImm(ptr.waddr)], span), + 0 if aligned => self.emit_all( + &[ + // Swap value to highest-address-first order + Op::Swap(1), + // Load existing word + Op::Padw, + // [d, c, b, a, v_lo, v_hi] + Op::MemLoadwImm(ptr.waddr), + // Replace bottom two elements with value + // [b, c, d, a, v_lo, v_hi] + Op::Swap(2), + // [c, d, a, v_lo, v_hi] + Op::Drop, + // [a, d, c, v_lo, v_hi] + Op::Swap(2), + // [d, c, v_lo, v_hi] + Op::Drop, + Op::MemStorewImm(ptr.waddr), + Op::Dropw, + ], + span, + ), _ => { - todo!() + // TODO: Optimize double-word stores when pointer is contant + self.emit_all( + &[Op::PushU8(ptr.offset), Op::PushU8(ptr.index), Op::PushU32(ptr.waddr)], + span, + ); + self.emit(Op::Exec("intrinsics::mem::store_dw".parse().unwrap()), span); } } } @@ -1258,39 +1293,36 @@ impl<'a> OpEmitter<'a> { let mask_lo = u32::MAX >> (ptr.offset as u32); self.emit_all( &[ - // Load the full quad-word on to the operand stack + // Load the word Op::Padw, + // [w3, w2, w1, w0, value] Op::MemLoadwImm(ptr.waddr), - // Manipulate the bits of the first two elements, such that the 32-bit - // word we're storing is placed at the correct offset from the start - // of the memory cell when viewing the cell as a set of 4 32-bit chunks - // - // First, mask out the bits we plan to overwrite with the store op from the - // first two elements - Op::Swap(1), + // [w1, w3, w2, w0, value] + Op::Movup(2), Op::PushU32(mask_lo), + // [w1_masked, w3, w2, w0, value] Op::U32And, - Op::Swap(1), + // [w0, w1_masked, w3, w2, value] + Op::Movup(3), Op::PushU32(mask_hi), + // [w0_masked, w1_masked, w3, w2, value] Op::U32And, - // Now, we need to shift/mask/split the 32-bit value into two elements, - // then combine them with the preserved bits of the - // original contents of the cell - // - // We start with the bits belonging to the first element in the cell + // [value, w0_masked, w1_masked, w3, w2, value] Op::Dup(4), + // [value, w0_masked, w1_masked, w3, w2, value] Op::U32ShrImm(ptr.offset as u32), + // [w0', w1_masked, w3, w2, value] Op::U32Or, - // Then the bits belonging to the second element in the cell + // [w1_masked, w0', w3, w2, value] + Op::Swap(1), Op::Movup(4), Op::U32ShlImm(rshift), - Op::Movup(2), + // [w1', w0', w3, w2] Op::U32Or, - // Make sure the elements of the cell are in order - Op::Swap(1), - // Write the word back to the cell + Op::Movup(3), + // [w3, w2, w1', w0'] + Op::Movup(3), Op::MemStorewImm(ptr.waddr), - // Clean up the operand stack Op::Dropw, ], span, @@ -1300,10 +1332,13 @@ impl<'a> OpEmitter<'a> { &[ // Load a quad-word Op::Padw, + // [d, c, _, a, value] Op::MemLoadwImm(ptr.waddr), - // Replace the stored element + // [value, d, c, _, a] Op::Movup(4), - Op::Swap(2), + // [_, d, c, value, a] + Op::Swap(3), + // [d, c, value, a] Op::Drop, // Write the word back to the cell Op::MemStorewImm(ptr.waddr), @@ -1317,40 +1352,37 @@ impl<'a> OpEmitter<'a> { let mask_lo = u32::MAX >> (ptr.offset as u32); self.emit_all( &[ - // Load the full quad-word on to the operand stack Op::Padw, + // the load is across both the second and third elements + // [w3, w2, w1, w0, value] Op::MemLoadwImm(ptr.waddr), - // Manipulate the bits of the middle two elements, such that the 32-bit - // word we're storing is placed at the correct offset from the start - // of the memory cell when viewing the cell as a set of 4 32-bit chunks - // - // First, mask out the bits we plan to overwrite with the store op from the - // first two elements - Op::Swap(2), // [elem3, elem2, elem1, elem4, value] + // [w2, w3, w1, w0, value] + Op::Swap(1), Op::PushU32(mask_lo), + // [w2_masked, w3, w1, w0, value] Op::U32And, - Op::Swap(1), // [elem2, elem3, elem1, elem4, value] + // [w1, w2_masked, w3, w0, value] + Op::Movup(2), Op::PushU32(mask_hi), + // [w1_masked, w2_masked, w3, w0, value] Op::U32And, - // Now, we need to shift/mask/split the 32-bit value into two elements, - // then combine them with the preserved bits of the - // original contents of the cell - // - // We start with the bits belonging to the second element in the cell - Op::Dup(4), // [value, elem2, elem3, elem1, elem4, value] + // [value, w1_masked, w2_masked, w3, w0, value] + Op::Dup(4), Op::U32ShrImm(ptr.offset as u32), + // [w1', w2_masked, w3, w0, value] Op::U32Or, - // Then the bits belonging to the third element in the cell + // [w2_masked, w1', w3, w0, value] + Op::Swap(1), + // [value, w2_masked, w1', w3, w0] Op::Movup(4), Op::U32ShlImm(rshift), - Op::Movup(2), - Op::U32Or, // [elem3, elem2, elem1, elem4] - // Make sure the elements of the cell are in order - Op::Swap(1), - Op::Movup(2), // [elem1, elem2, elem3, elem4] - // Write the word back to the cell + // [w2', w1', w3, w0, value] + Op::U32Or, + // [w0, w2', w1', w3, value] + Op::Movup(3), + // [w3, w2', w1', w0, value] + Op::Swap(3), Op::MemStorewImm(ptr.waddr), - // Clean up the operand stack Op::Dropw, ], span, @@ -1360,10 +1392,12 @@ impl<'a> OpEmitter<'a> { &[ // Load a quad-word Op::Padw, + // [d, _, b, a, value] Op::MemLoadwImm(ptr.waddr), - // Replace the stored element - Op::Movup(5), - Op::Swap(3), + // [value, d, _, b, a] + Op::Movup(4), + // [_, d, value, b, a] + Op::Swap(2), Op::Drop, // Write the word back to the cell Op::MemStorewImm(ptr.waddr), @@ -1377,40 +1411,31 @@ impl<'a> OpEmitter<'a> { let mask_lo = u32::MAX >> (ptr.offset as u32); self.emit_all( &[ - // Load the full quad-word on to the operand stack + // the load is across both the third and fourth elements Op::Padw, + // [w3, w2, w1, w0, value] Op::MemLoadwImm(ptr.waddr), - // Manipulate the bits of the last two elements, such that the 32-bit - // word we're storing is placed at the correct offset from the start - // of the memory cell when viewing the cell as a set of 4 32-bit chunks - // - // First, mask out the bits we plan to overwrite with the store op from the - // first two elements - Op::Swap(3), // [elem4, elem2, elem3, elem1, value] Op::PushU32(mask_lo), + // [w3_masked, w2, w1, w0, value] Op::U32And, - Op::Movup(2), // [elem3, elem4, elem2, elem1, value] + // [w2, w3_masked, w1, w0, value] + Op::Swap(1), Op::PushU32(mask_hi), + // [w2_masked, w3_masked, w1, w0, value] Op::U32And, - // Now, we need to shift/mask/split the 32-bit value into two elements, - // then combine them with the preserved bits of the - // original contents of the cell - // - // We start with the bits belonging to the third element in the cell - Op::Dup(4), // [value, elem3, elem4, elem2, elem1, value] + // [value, w2_masked, w3_masked, w1, w0, value] + Op::Dup(4), Op::U32ShrImm(ptr.offset as u32), + // [w2', w3_masked, w1, w0, value] Op::U32Or, - // Then the bits belonging to the fourth element in the cell + // [w3_masked, w2', w1, w0, value] + Op::Swap(1), + // [value, w3_masked, w2', w1, w0] Op::Movup(4), Op::U32ShlImm(rshift), - Op::Movup(2), - Op::U32Or, // [elem4, elem3, elem2, elem1] - // Make sure the elements of the cell are in order - Op::Swap(2), // [elem2, elem3, elem4, elem1] - Op::Movup(3), // [elem1, elem2, elem3, elem4] - // Write the word back to the cell + // [w3', w2', w1, w0] + Op::U32Or, Op::MemStorewImm(ptr.waddr), - // Clean up the operand stack Op::Dropw, ], span, @@ -1420,10 +1445,12 @@ impl<'a> OpEmitter<'a> { &[ // Load a quad-word Op::Padw, + // [_, c, b, a, value] Op::MemLoadwImm(ptr.waddr), - // Replace the stored element - Op::Movup(4), + // [c, b, a, value] Op::Drop, + // [value, c, b, a] + Op::Movup(3), // Write the word back to the cell Op::MemStorewImm(ptr.waddr), // Clean up the operand stack @@ -1440,39 +1467,35 @@ impl<'a> OpEmitter<'a> { let mask_lo = u32::MAX >> (ptr.offset as u32); self.emit_all( &[ - // Load the full quad-word on to the operand stack + // the load crosses a word boundary, start with the element containing + // the highest-addressed bits + // [w0, value] + Op::MemLoadImm(ptr.waddr + 1), + Op::PushU32(mask_lo), + // [w0_masked, value] + Op::U32And, + // [value, w0_masked, value] + Op::Dup(1), + // [w0', value] + Op::U32ShlImm(rshift), + Op::U32Or, + // Store it + // [value] + Op::MemStoreImm(ptr.waddr + 1), + // Load the first word Op::Padw, + // [w3, w2, w1, w0, value] Op::MemLoadwImm(ptr.waddr), - // Manipulate the bits of the last element, such that the "high" bits - // of the 32-bit word we're storing is placed at the correct offset from - // the start of the memory cell when viewing the - // cell as a set of 4 32-bit chunks - // - // First, mask out the bits we plan to overwrite with the store op from the - // last element - Op::Swap(3), // [elem4, elem2, elem3, elem1, value] - Op::PushU32(mask_lo), + Op::PushU32(mask_hi), + // [w3_masked, w2, w1, w0, value] Op::U32And, - // Now, we need to shift/mask/split the 32-bit value into the bits that - // will be merged with this word - Op::Dup(4), // [value, elem4, elem2, elem3, elem1, value] + // [value, w3_masked, w2, w1, w0] + Op::Movup(4), Op::U32ShrImm(ptr.offset as u32), + // [w3', w2, w1, w0] Op::U32Or, - // Move the fourth element back into place - Op::Swap(3), // [elem1, elem2, elem3, elem4, value] - // Write the first word and clear the operand stack Op::MemStorewImm(ptr.waddr), Op::Dropw, - // Compute the bits of the value that we'll merge into the second word - Op::U32ShlImm(rshift), - // Load the first element of the second word - Op::MemLoadImm(ptr.waddr + 1), - // Mask out the bits we plan to overwrite - Op::PushU32(mask_hi), - Op::U32And, - // Merge the bits and write back the second word - Op::U32Or, - Op::MemStoreImm(ptr.waddr + 1), ], span, ); @@ -1500,9 +1523,13 @@ impl<'a> OpEmitter<'a> { self.emit_all( &[ Op::Padw, + // [d, c, _, a, value] Op::MemLoadwImm(ptr.waddr), + // [value, d, c, _, a] Op::Movup(4), - Op::Swap(2), + // [_, d, c, value, a] + Op::Swap(3), + // [d, c, value, a] Op::Drop, Op::MemStorewImm(ptr.waddr), Op::Dropw, @@ -1514,9 +1541,12 @@ impl<'a> OpEmitter<'a> { self.emit_all( &[ Op::Padw, + // [d, _, b, a, value] Op::MemLoadwImm(ptr.waddr), + // [value, d, _, b, a] Op::Movup(4), - Op::Swap(3), + // [_, d, value, b, a] + Op::Swap(2), Op::Drop, Op::MemStorewImm(ptr.waddr), Op::Dropw, @@ -1528,9 +1558,12 @@ impl<'a> OpEmitter<'a> { self.emit_all( &[ Op::Padw, + // [_, c, b, a, value] Op::MemLoadwImm(ptr.waddr), - Op::Movup(3), + // [c, b, a, value] Op::Drop, + // [value, c, b, a] + Op::Movup(3), Op::MemStorewImm(ptr.waddr), Op::Dropw, ], diff --git a/codegen/masm/src/codegen/emit/mod.rs b/codegen/masm/src/codegen/emit/mod.rs index e572f66a4..6f8eb0760 100644 --- a/codegen/masm/src/codegen/emit/mod.rs +++ b/codegen/masm/src/codegen/emit/mod.rs @@ -894,15 +894,16 @@ mod tests { emitter.add_imm(one, Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); + emitter.swap(1, SourceSpan::default()); emitter.drop(SourceSpan::default()); emitter.dup(0, SourceSpan::default()); emitter.add(Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); } #[test] @@ -929,15 +930,16 @@ mod tests { emitter.sub_imm(one, Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); + emitter.swap(1, SourceSpan::default()); emitter.drop(SourceSpan::default()); emitter.dup(0, SourceSpan::default()); emitter.sub(Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); } #[test] @@ -964,15 +966,16 @@ mod tests { emitter.mul_imm(one, Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); + emitter.swap(1, SourceSpan::default()); emitter.drop(SourceSpan::default()); emitter.dup(0, SourceSpan::default()); emitter.mul(Overflow::Overflowing, SourceSpan::default()); assert_eq!(emitter.stack_len(), 2); - assert_eq!(emitter.stack()[0], Type::I1); - assert_eq!(emitter.stack()[1], Type::U32); + assert_eq!(emitter.stack()[0], Type::U32); + assert_eq!(emitter.stack()[1], Type::I1); } #[test] diff --git a/codegen/masm/src/convert.rs b/codegen/masm/src/convert.rs index affcbac03..c54390a78 100644 --- a/codegen/masm/src/convert.rs +++ b/codegen/masm/src/convert.rs @@ -178,6 +178,11 @@ impl<'a> ConversionPass for ConvertHirToMasm<&'a hir::Function> { let scheduler = Scheduler::new(f, &mut f_prime, &domtree, &loops, &liveness); let schedule = scheduler.build(); + /* + if f.id.function.as_str().contains("get_inputs") { + dbg!(&schedule); + } + */ let emitter = FunctionEmitter::new(f, &mut f_prime, &domtree, &loops, &liveness, &globals); emitter.emit(schedule, stack); diff --git a/codegen/masm/src/emulator/mod.rs b/codegen/masm/src/emulator/mod.rs index 9a85a809c..701c18c73 100644 --- a/codegen/masm/src/emulator/mod.rs +++ b/codegen/masm/src/emulator/mod.rs @@ -1488,13 +1488,17 @@ impl Emulator { Op::MemLoadw => { let addr = pop_addr!(self); self.stack.dropw(); - self.stack.pushw(self.memory[addr]); + let mut word = self.memory[addr]; + word.reverse(); + self.stack.pushw(word); } Op::MemLoadwImm(addr) => { let addr = addr as usize; assert!(addr < self.memory.len() - 4, "out of bounds memory access"); self.stack.dropw(); - self.stack.pushw(self.memory[addr]); + let mut word = self.memory[addr]; + word.reverse(); + self.stack.pushw(word); } Op::MemStore => { let addr = pop_addr!(self); @@ -1519,8 +1523,9 @@ impl Emulator { } Op::MemStorew => { let addr = pop_addr!(self); - let word = + let mut word = self.stack.peekw().expect("operand stack does not contain a full word"); + word.reverse(); self.memory[addr] = word; self.callstack.push(state); return Ok(EmulatorEvent::MemoryWrite { @@ -1531,8 +1536,9 @@ impl Emulator { Op::MemStorewImm(addr) => { let addr = addr as usize; assert!(addr < self.memory.len() - 4, "out of bounds memory access"); - let word = + let mut word = self.stack.peekw().expect("operand stack does not contain a full word"); + word.reverse(); self.memory[addr] = word; self.callstack.push(state); return Ok(EmulatorEvent::MemoryWrite { diff --git a/codegen/masm/src/tests.rs b/codegen/masm/src/tests.rs index e6bb7ce4c..f5a52edbc 100644 --- a/codegen/masm/src/tests.rs +++ b/codegen/masm/src/tests.rs @@ -573,7 +573,7 @@ fn codegen_mem_store_dw_load_dw() { "store_load_dw", Signature::new( [AbiParam::new(Type::U32), AbiParam::new(Type::U64)], - [AbiParam::new(Type::U32)], + [AbiParam::new(Type::U64)], ), ) .expect("unexpected symbol conflict"); @@ -835,16 +835,15 @@ impl ToCanonicalRepr for u64 { } fn canonicalize(self) -> SmallVec<[Felt; 4]> { - let bytes = self.to_be_bytes(); - let a = Felt::new(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as u64); - let b = Felt::new(u32::from_be_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]) as u64); - smallvec![a, b] + let lo = self.rem_euclid(2u64.pow(32)); + let hi = self.div_euclid(2u64.pow(32)); + smallvec![Felt::new(hi), Felt::new(lo)] } fn from_stack(stack: &mut OperandStack) -> Self { - let hi = ::from_stack(stack) as u64; - let lo = ::from_stack(stack) as u64; - (hi << 32) | lo + let hi = stack.pop().unwrap().as_int() * 2u64.pow(32); + let lo = stack.pop().unwrap().as_int(); + hi + lo } } @@ -868,18 +867,17 @@ impl ToCanonicalRepr for i128 { } fn canonicalize(self) -> SmallVec<[Felt; 4]> { - let bytes = self.to_be_bytes(); - let a = Felt::new(u32::from_be_bytes([bytes[0], bytes[1], bytes[2], bytes[3]]) as u64); - let b = Felt::new(u32::from_be_bytes([bytes[4], bytes[5], bytes[6], bytes[7]]) as u64); - let c = Felt::new(u32::from_be_bytes([bytes[8], bytes[9], bytes[10], bytes[11]]) as u64); - let d = Felt::new(u32::from_be_bytes([bytes[12], bytes[13], bytes[14], bytes[15]]) as u64); - smallvec![a, b, c, d] + let lo = self.rem_euclid(2i128.pow(64)); + let hi = self.div_euclid(2i128.pow(64)); + let mut out = (hi as u64).canonicalize(); + out.extend_from_slice(&(lo as u64).canonicalize()); + out } fn from_stack(stack: &mut OperandStack) -> Self { - let hi = ::from_stack(stack) as i128; + let hi = (::from_stack(stack) as i128) * 2i128.pow(64); let lo = ::from_stack(stack) as i128; - (hi << 64) | lo + hi + lo } } From c010baeb6433c406055917ba45bdc8236e2444ab Mon Sep 17 00:00:00 2001 From: Denys Zadorozhnyi Date: Thu, 29 Aug 2024 18:54:27 +0300 Subject: [PATCH 14/18] fix: swap the lo and mid parts in the most shifted case in `load_dw` memory intrinsics. --- codegen/masm/intrinsics/mem.masm | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/codegen/masm/intrinsics/mem.masm b/codegen/masm/intrinsics/mem.masm index a279e2a85..b52903213 100644 --- a/codegen/masm/intrinsics/mem.masm +++ b/codegen/masm/intrinsics/mem.masm @@ -448,11 +448,12 @@ export.load_dw # [waddr, index, offset] # re-align it, and we're done exec.realign_dw else - # memory layout: [hi, ..], [, , mid, lo] + # memory layout: [hi, ..], [, , lo, mid] # load the two least-significant elements from the next word first dup.0 u32overflowing_add.1 assertz # [waddr + 1, waddr, offset] padw movup.4 # [waddr + 1, 0, 0, 0, 0, waddr, offset] - mem_loadw drop drop # [mid, lo, waddr, offset] + mem_loadw drop drop # [lo, mid, waddr, offset] + swap.1 # [mid, lo, waddr, offset] # load the most significant element from the first word padw movup.6 # [waddr, 0, 0, 0, 0, mid, lo, offset] mem_loadw movdn.3 drop drop drop # [hi, mid, lo, offset] From 7747dcd18052dd22d51fc926ab9fe3fd27636f22 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Thu, 29 Aug 2024 12:29:01 -0400 Subject: [PATCH 15/18] test: re-enable get_inputs test --- .../abi_transform_tx_kernel_get_inputs_4.hir | 812 +----- .../abi_transform_tx_kernel_get_inputs_4.masm | 2365 +---------------- .../abi_transform_tx_kernel_get_inputs_4.wat | 509 +--- .../abi_transform/tx_kernel.rs | 31 +- 4 files changed, 311 insertions(+), 3406 deletions(-) diff --git a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.hir b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.hir index cc32d5f97..64e441a26 100644 --- a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.hir +++ b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.hir @@ -10,7 +10,7 @@ ;; Functions (func (export #entrypoint) (param i32) (block 0 (param v0 i32) - (call #miden_tx_kernel_sys::get_inputs v0) + (call #miden_base_sys::bindings::tx::get_inputs v0) (br (block 1))) (block 1 @@ -20,7 +20,7 @@ (func (export #__rust_alloc) (param i32) (param i32) (result i32) (block 0 (param v0 i32) (param v1 i32) (let (v3 i32) (const.i32 1048576)) - (let (v4 i32) (call #::alloc v3 v1 v0)) + (let (v4 i32) (call #::alloc v3 v1 v0)) (br (block 1 v4))) (block 1 (param v2 i32) @@ -30,7 +30,7 @@ (func (export #__rust_alloc_zeroed) (param i32) (param i32) (result i32) (block 0 (param v0 i32) (param v1 i32) (let (v3 i32) (const.i32 1048576)) - (let (v4 i32) (call #::alloc v3 v1 v0)) + (let (v4 i32) (call #::alloc v3 v1 v0)) (let (v5 i1) (eq v4 0)) (let (v6 i32) (zext v5)) (let (v7 i1) (neq v6 0)) @@ -52,713 +52,106 @@ (br (block 2 v4))) ) - (func (export #wee_alloc::neighbors::Neighbors::remove) - (param i32) - (block 0 (param v0 i32) - (let (v1 i32) (const.i32 0)) - (let (v2 u32) (bitcast v0)) - (let (v3 u32) (mod.unchecked v2 4)) - (assertz 250 v3) - (let (v4 (ptr i32)) (inttoptr v2)) - (let (v5 i32) (load v4)) - (let (v6 i32) (const.i32 2)) - (let (v7 i32) (band v5 v6)) - (let (v8 i1) (neq v7 0)) - (condbr v8 (block 2 v0 v5) (block 3))) + (func (export #::alloc) + (param i32) (param i32) (param i32) (result i32) + (block 0 (param v0 i32) (param v1 i32) (param v2 i32) + (let (v4 i32) (const.i32 0)) + (let (v5 i32) (const.i32 32)) + (let (v6 i32) (const.i32 32)) + (let (v7 u32) (bitcast v1)) + (let (v8 u32) (bitcast v6)) + (let (v9 i1) (gt v7 v8)) + (let (v10 i32) (sext v9)) + (let (v11 i1) (neq v10 0)) + (let (v12 i32) (select v11 v1 v5)) + (let (v13 u32) (popcnt v12)) + (let (v14 i32) (bitcast v13)) + (let (v15 i32) (const.i32 1)) + (let (v16 i1) (neq v14 v15)) + (let (v17 i32) (zext v16)) + (let (v18 i1) (neq v17 0)) + (condbr v18 (block 2) (block 3))) - (block 1 - (ret)) + (block 1 (param v3 i32)) - (block 2 (param v37 i32) (param v54 i32) - (let (v38 u32) (bitcast v37)) - (let (v39 u32) (add.checked v38 4)) - (let (v40 u32) (mod.unchecked v39 4)) - (assertz 250 v40) - (let (v41 (ptr i32)) (inttoptr v39)) - (let (v42 i32) (load v41)) - (let (v43 i32) (const.i32 -4)) - (let (v44 i32) (band v42 v43)) - (let (v45 i1) (eq v44 0)) - (let (v46 i32) (zext v45)) - (let (v47 i1) (neq v46 0)) - (condbr v47 (block 5 v37 v42 v54) (block 6))) + (block 2 + (unreachable)) (block 3 - (let (v9 i32) (const.i32 -4)) - (let (v10 i32) (band v5 v9)) - (let (v11 i1) (eq v10 0)) - (let (v12 i32) (zext v11)) - (let (v13 i1) (neq v12 0)) - (condbr v13 (block 2 v0 v5) (block 4))) + (let (v19 i32) (const.i32 -2147483648)) + (let (v20 i32) (sub.wrapping v19 v12)) + (let (v21 u32) (bitcast v20)) + (let (v22 u32) (bitcast v2)) + (let (v23 i1) (lt v21 v22)) + (let (v24 i32) (sext v23)) + (let (v25 i1) (neq v24 0)) + (condbr v25 (block 2) (block 4))) (block 4 - (let (v14 u32) (bitcast v10)) - (let (v15 u32) (add.checked v14 4)) - (let (v16 u32) (mod.unchecked v15 4)) - (assertz 250 v16) - (let (v17 (ptr i32)) (inttoptr v15)) - (let (v18 i32) (load v17)) - (let (v19 i32) (const.i32 3)) - (let (v20 i32) (band v18 v19)) - (let (v21 u32) (bitcast v0)) - (let (v22 u32) (add.checked v21 4)) - (let (v23 u32) (mod.unchecked v22 4)) - (assertz 250 v23) - (let (v24 (ptr i32)) (inttoptr v22)) - (let (v25 i32) (load v24)) - (let (v26 i32) (const.i32 -4)) - (let (v27 i32) (band v25 v26)) - (let (v28 i32) (bor v20 v27)) - (let (v29 u32) (bitcast v10)) - (let (v30 u32) (add.checked v29 4)) - (let (v31 u32) (mod.unchecked v30 4)) - (assertz 250 v31) - (let (v32 (ptr i32)) (inttoptr v30)) - (store v32 v28) + (let (v26 i32) (const.i32 0)) + (let (v27 i32) (add.wrapping v12 v2)) + (let (v28 i32) (const.i32 -1)) + (let (v29 i32) (add.wrapping v27 v28)) + (let (v30 i32) (const.i32 0)) + (let (v31 i32) (sub.wrapping v30 v12)) + (let (v32 i32) (band v29 v31)) (let (v33 u32) (bitcast v0)) (let (v34 u32) (mod.unchecked v33 4)) (assertz 250 v34) (let (v35 (ptr i32)) (inttoptr v33)) (let (v36 i32) (load v35)) - (br (block 2 v0 v36))) - - (block 5 (param v70 i32) (param v71 i32) (param v78 i32) - (let (v72 i32) (const.i32 3)) - (let (v73 i32) (band v71 v72)) - (let (v74 u32) (bitcast v70)) - (let (v75 u32) (add.checked v74 4)) - (let (v76 u32) (mod.unchecked v75 4)) - (assertz 250 v76) - (let (v77 (ptr i32)) (inttoptr v75)) - (store v77 v73) - (let (v79 i32) (const.i32 3)) - (let (v80 i32) (band v78 v79)) - (let (v81 u32) (bitcast v70)) - (let (v82 u32) (mod.unchecked v81 4)) - (assertz 250 v82) - (let (v83 (ptr i32)) (inttoptr v81)) - (store v83 v80) - (br (block 1))) - - (block 6 - (let (v48 u32) (bitcast v44)) - (let (v49 u32) (mod.unchecked v48 4)) - (assertz 250 v49) - (let (v50 (ptr i32)) (inttoptr v48)) - (let (v51 i32) (load v50)) - (let (v52 i32) (const.i32 3)) - (let (v53 i32) (band v51 v52)) - (let (v55 i32) (const.i32 -4)) - (let (v56 i32) (band v54 v55)) - (let (v57 i32) (bor v53 v56)) - (let (v58 u32) (bitcast v44)) - (let (v59 u32) (mod.unchecked v58 4)) - (assertz 250 v59) - (let (v60 (ptr i32)) (inttoptr v58)) - (store v60 v57) - (let (v61 u32) (bitcast v37)) - (let (v62 u32) (add.checked v61 4)) - (let (v63 u32) (mod.unchecked v62 4)) - (assertz 250 v63) - (let (v64 (ptr i32)) (inttoptr v62)) - (let (v65 i32) (load v64)) - (let (v66 u32) (bitcast v37)) - (let (v67 u32) (mod.unchecked v66 4)) - (assertz 250 v67) - (let (v68 (ptr i32)) (inttoptr v66)) - (let (v69 i32) (load v68)) - (br (block 5 v37 v65 v69))) - ) - - (func (export #::new_cell_for_free_list) - (param i32) (param i32) (param i32) (param i32) - (block 0 (param v0 i32) (param v1 i32) (param v2 i32) (param v3 i32) - (let (v4 i32) (const.i32 2)) - (let (v5 u32) (bitcast v4)) - (let (v6 i32) (shl.wrapping v2 v5)) - (let (v7 i32) (const.i32 3)) - (let (v8 u32) (bitcast v7)) - (let (v9 i32) (shl.wrapping v3 v8)) - (let (v10 i32) (const.i32 512)) - (let (v11 i32) (add.wrapping v9 v10)) - (let (v12 u32) (bitcast v6)) - (let (v13 u32) (bitcast v11)) - (let (v14 i1) (gt v12 v13)) - (let (v15 i32) (sext v14)) - (let (v16 i1) (neq v15 0)) - (let (v17 i32) (select v16 v6 v11)) - (let (v18 i32) (const.i32 65543)) - (let (v19 i32) (add.wrapping v17 v18)) - (let (v20 i32) (const.i32 16)) - (let (v21 u32) (bitcast v19)) - (let (v22 u32) (bitcast v20)) - (let (v23 u32) (shr.wrapping v21 v22)) - (let (v24 i32) (bitcast v23)) - (let (v25 u32) (bitcast v24)) - (let (v26 i32) (memory.grow v25)) - (let (v27 i32) (const.i32 -1)) - (let (v28 i1) (neq v26 v27)) - (let (v29 i32) (zext v28)) - (let (v30 i1) (neq v29 0)) - (condbr v30 (block 3) (block 4))) - - (block 1 - (ret)) - - (block 2 (param v50 i32) (param v51 i32) (param v56 i32) - (let (v52 u32) (bitcast v50)) - (let (v53 u32) (add.checked v52 4)) - (let (v54 u32) (mod.unchecked v53 4)) - (assertz 250 v54) - (let (v55 (ptr i32)) (inttoptr v53)) - (store v55 v51) - (let (v57 u32) (bitcast v50)) - (let (v58 u32) (mod.unchecked v57 4)) - (assertz 250 v58) - (let (v59 (ptr i32)) (inttoptr v57)) - (store v59 v56) - (br (block 1))) - - (block 3 - (let (v33 i32) (const.i32 16)) - (let (v34 u32) (bitcast v33)) - (let (v35 i32) (shl.wrapping v26 v34)) - (let (v36 i64) (const.i64 0)) - (let (v37 u32) (bitcast v35)) - (let (v38 u32) (add.checked v37 4)) - (let (v39 u32) (mod.unchecked v38 4)) - (assertz 250 v39) - (let (v40 (ptr i64)) (inttoptr v38)) - (store v40 v36) - (let (v41 i32) (const.i32 -65536)) - (let (v42 i32) (band v19 v41)) - (let (v43 i32) (add.wrapping v35 v42)) - (let (v44 i32) (const.i32 2)) - (let (v45 i32) (bor v43 v44)) - (let (v46 u32) (bitcast v35)) - (let (v47 u32) (mod.unchecked v46 4)) - (assertz 250 v47) - (let (v48 (ptr i32)) (inttoptr v46)) - (store v48 v45) - (let (v49 i32) (const.i32 0)) - (br (block 2 v0 v35 v49))) - - (block 4 - (let (v31 i32) (const.i32 1)) - (let (v32 i32) (const.i32 0)) - (br (block 2 v0 v32 v31))) - ) - - (func (export #wee_alloc::alloc_first_fit) - (param i32) (param i32) (param i32) (result i32) - (block 0 (param v0 i32) (param v1 i32) (param v2 i32) - (let (v4 i32) (const.i32 0)) - (let (v5 i32) (const.i32 -1)) - (let (v6 i32) (add.wrapping v1 v5)) - (let (v7 i32) (const.i32 0)) - (let (v8 i32) (const.i32 0)) - (let (v9 i32) (sub.wrapping v8 v1)) - (let (v10 i32) (const.i32 2)) - (let (v11 u32) (bitcast v10)) - (let (v12 i32) (shl.wrapping v0 v11)) - (let (v13 u32) (bitcast v2)) - (let (v14 u32) (mod.unchecked v13 4)) - (assertz 250 v14) - (let (v15 (ptr i32)) (inttoptr v13)) - (let (v16 i32) (load v15)) - (br (block 2 v16 v12 v9 v6 v2 v7))) - - (block 1 (param v3 i32)) + (let (v37 i1) (neq v36 0)) + (condbr v37 (block 5 v0 v32 v12 v26) (block 6))) - (block 2 - (param v18 i32) - (param v206 i32) - (param v209 i32) - (param v212 i32) - (param v215 i32) - (param v237 i32) - (let (v19 i1) (eq v18 0)) - (let (v20 i32) (zext v19)) - (let (v21 i1) (neq v20 0)) - (condbr v21 (block 5 v237) (block 6))) - - (block 3 (param v17 i32)) - - (block 4 - (param v238 i32) - (param v239 i32) - (param v243 i32) - (param v244 i32) - (param v245 i32) - (param v246 i32) - (let (v240 u32) (bitcast v238)) - (let (v241 u32) (mod.unchecked v240 4)) - (assertz 250 v241) - (let (v242 (ptr i32)) (inttoptr v240)) - (store v242 v239) - (br (block 2 v239 v243 v244 v245 v238 v246))) - - (block 5 (param v236 i32) - (ret v236)) + (block 5 + (param v49 i32) + (param v55 i32) + (param v65 i32) + (param v68 i32) + (let (v48 i32) (const.i32 268435456)) + (let (v50 u32) (bitcast v49)) + (let (v51 u32) (mod.unchecked v50 4)) + (assertz 250 v51) + (let (v52 (ptr i32)) (inttoptr v50)) + (let (v53 i32) (load v52)) + (let (v54 i32) (sub.wrapping v48 v53)) + (let (v56 u32) (bitcast v54)) + (let (v57 u32) (bitcast v55)) + (let (v58 i1) (lt v56 v57)) + (let (v59 i32) (sext v58)) + (let (v60 i1) (neq v59 0)) + (condbr v60 (block 7 v68) (block 8))) (block 6 - (br (block 9 v18 v206 v209 v212 v215 v237))) - - (block 7 (param v227 i32) (param v228 i32) - (let (v229 i32) (const.i32 1)) - (let (v230 i32) (bor v228 v229)) - (let (v231 u32) (bitcast v227)) - (let (v232 u32) (mod.unchecked v231 4)) - (assertz 250 v232) - (let (v233 (ptr i32)) (inttoptr v231)) - (store v233 v230) - (let (v234 i32) (const.i32 8)) - (let (v235 i32) (add.wrapping v227 v234)) - (br (block 5 v235))) + (let (v38 u32) (call (#intrinsics::mem #heap_base))) + (let (v39 u32) (memory.size)) + (let (v40 i32) (const.i32 16)) + (let (v41 u32) (bitcast v40)) + (let (v42 u32) (shl.wrapping v39 v41)) + (let (v43 u32) (add.wrapping v38 v42)) + (let (v44 i32) (bitcast v43)) + (let (v45 u32) (bitcast v0)) + (let (v46 u32) (mod.unchecked v45 4)) + (assertz 250 v46) + (let (v47 (ptr i32)) (inttoptr v45)) + (store v47 v44) + (br (block 5 v0 v32 v12 v26))) + + (block 7 (param v67 i32) + (ret v67)) (block 8 - (let (v216 i32) (const.i32 -3)) - (let (v217 i32) (band v145 v216)) - (let (v218 u32) (bitcast v122)) - (let (v219 u32) (mod.unchecked v218 4)) - (assertz 250 v219) - (let (v220 (ptr i32)) (inttoptr v218)) - (store v220 v217) - (let (v221 u32) (bitcast v120)) - (let (v222 u32) (mod.unchecked v221 4)) - (assertz 250 v222) - (let (v223 (ptr i32)) (inttoptr v221)) - (let (v224 i32) (load v223)) - (let (v225 i32) (const.i32 2)) - (let (v226 i32) (bor v224 v225)) - (br (block 7 v120 v226))) - - (block 9 - (param v22 i32) - (param v40 i32) - (param v49 i32) - (param v56 i32) - (param v59 i32) - (param v247 i32) - (let (v23 u32) (bitcast v22)) - (let (v24 u32) (add.checked v23 8)) - (let (v25 u32) (mod.unchecked v24 4)) - (assertz 250 v25) - (let (v26 (ptr i32)) (inttoptr v24)) - (let (v27 i32) (load v26)) - (let (v28 i32) (const.i32 1)) - (let (v29 i32) (band v27 v28)) - (let (v30 i1) (neq v29 0)) - (condbr v30 (block 11) (block 12))) - - (block 10) - - (block 11 - (let (v156 i32) (const.i32 -2)) - (let (v157 i32) (band v27 v156)) - (let (v158 u32) (bitcast v22)) - (let (v159 u32) (add.checked v158 8)) - (let (v160 u32) (mod.unchecked v159 4)) - (assertz 250 v160) - (let (v161 (ptr i32)) (inttoptr v159)) - (store v161 v157) - (let (v162 u32) (bitcast v22)) - (let (v163 u32) (add.checked v162 4)) - (let (v164 u32) (mod.unchecked v163 4)) - (assertz 250 v164) - (let (v165 (ptr i32)) (inttoptr v163)) - (let (v166 i32) (load v165)) - (let (v167 i32) (const.i32 -4)) - (let (v168 i32) (band v166 v167)) - (let (v169 i1) (neq v168 0)) - (condbr v169 (block 22) (block 23))) - - (block 12 - (let (v31 u32) (bitcast v22)) - (let (v32 u32) (mod.unchecked v31 4)) - (assertz 250 v32) - (let (v33 (ptr i32)) (inttoptr v31)) - (let (v34 i32) (load v33)) - (let (v35 i32) (const.i32 -4)) - (let (v36 i32) (band v34 v35)) - (let (v37 i32) (const.i32 8)) - (let (v38 i32) (add.wrapping v22 v37)) - (let (v39 i32) (sub.wrapping v36 v38)) - (let (v41 u32) (bitcast v39)) - (let (v42 u32) (bitcast v40)) - (let (v43 i1) (lt v41 v42)) - (let (v44 i32) (sext v43)) - (let (v45 i1) (neq v44 0)) - (condbr v45 (block 4 v59 v27 v40 v49 v56 v247) (block 13))) - - (block 13 - (let (v46 i32) (const.i32 72)) - (let (v47 i32) (add.wrapping v38 v46)) - (let (v48 i32) (sub.wrapping v36 v40)) - (let (v50 i32) (band v48 v49)) - (let (v51 u32) (bitcast v47)) - (let (v52 u32) (bitcast v50)) - (let (v53 i1) (lte v51 v52)) - (let (v54 i32) (sext v53)) - (let (v55 i1) (neq v54 0)) - (condbr v55 (block 14) (block 15))) - - (block 14 - (let (v69 i32) (const.i32 0)) - (let (v70 i32) (const.i32 0)) - (let (v71 u32) (bitcast v50)) - (let (v72 u32) (mod.unchecked v71 4)) - (assertz 250 v72) - (let (v73 (ptr i32)) (inttoptr v71)) - (store v73 v70) - (let (v74 i32) (const.i32 -8)) - (let (v75 i32) (add.wrapping v50 v74)) - (let (v76 i64) (const.i64 0)) - (let (v77 u32) (bitcast v75)) - (let (v78 u32) (mod.unchecked v77 4)) - (assertz 250 v78) - (let (v79 (ptr i64)) (inttoptr v77)) - (store v79 v76) - (let (v80 u32) (bitcast v22)) - (let (v81 u32) (mod.unchecked v80 4)) - (assertz 250 v81) - (let (v82 (ptr i32)) (inttoptr v80)) - (let (v83 i32) (load v82)) - (let (v84 i32) (const.i32 -4)) - (let (v85 i32) (band v83 v84)) - (let (v86 u32) (bitcast v75)) - (let (v87 u32) (mod.unchecked v86 4)) - (assertz 250 v87) - (let (v88 (ptr i32)) (inttoptr v86)) - (store v88 v85) - (let (v89 u32) (bitcast v22)) - (let (v90 u32) (mod.unchecked v89 4)) - (assertz 250 v90) - (let (v91 (ptr i32)) (inttoptr v89)) - (let (v92 i32) (load v91)) - (let (v93 i32) (const.i32 2)) - (let (v94 i32) (band v92 v93)) - (let (v95 i1) (neq v94 0)) - (condbr v95 (block 17 v75 v69 v22) (block 18))) - - (block 15 - (let (v57 i32) (band v56 v38)) - (let (v58 i1) (neq v57 0)) - (condbr v58 (block 4 v59 v27 v40 v49 v56 v247) (block 16))) - - (block 16 - (let (v60 i32) (const.i32 -4)) - (let (v61 i32) (band v27 v60)) - (let (v62 u32) (bitcast v59)) + (let (v61 i32) (add.wrapping v53 v55)) + (let (v62 u32) (bitcast v49)) (let (v63 u32) (mod.unchecked v62 4)) (assertz 250 v63) (let (v64 (ptr i32)) (inttoptr v62)) (store v64 v61) - (let (v65 u32) (bitcast v22)) - (let (v66 u32) (mod.unchecked v65 4)) - (assertz 250 v66) - (let (v67 (ptr i32)) (inttoptr v65)) - (let (v68 i32) (load v67)) - (br (block 7 v22 v68))) - - (block 17 (param v120 i32) (param v121 i32) (param v122 i32) - (let (v123 i32) (bor v121 v122)) - (let (v124 u32) (bitcast v120)) - (let (v125 u32) (add.checked v124 4)) - (let (v126 u32) (mod.unchecked v125 4)) - (assertz 250 v126) - (let (v127 (ptr i32)) (inttoptr v125)) - (store v127 v123) - (let (v128 u32) (bitcast v122)) - (let (v129 u32) (add.checked v128 8)) - (let (v130 u32) (mod.unchecked v129 4)) - (assertz 250 v130) - (let (v131 (ptr i32)) (inttoptr v129)) - (let (v132 i32) (load v131)) - (let (v133 i32) (const.i32 -2)) - (let (v134 i32) (band v132 v133)) - (let (v135 u32) (bitcast v122)) - (let (v136 u32) (add.checked v135 8)) - (let (v137 u32) (mod.unchecked v136 4)) - (assertz 250 v137) - (let (v138 (ptr i32)) (inttoptr v136)) - (store v138 v134) - (let (v139 u32) (bitcast v122)) - (let (v140 u32) (mod.unchecked v139 4)) - (assertz 250 v140) - (let (v141 (ptr i32)) (inttoptr v139)) - (let (v142 i32) (load v141)) - (let (v143 i32) (const.i32 3)) - (let (v144 i32) (band v142 v143)) - (let (v145 i32) (bor v144 v120)) - (let (v146 u32) (bitcast v122)) - (let (v147 u32) (mod.unchecked v146 4)) - (assertz 250 v147) - (let (v148 (ptr i32)) (inttoptr v146)) - (store v148 v145) - (let (v149 i32) (const.i32 2)) - (let (v150 i32) (band v142 v149)) - (let (v151 i1) (neq v150 0)) - (condbr v151 (block 8) (block 20))) - - (block 18 - (let (v96 i32) (const.i32 -4)) - (let (v97 i32) (band v92 v96)) - (let (v98 i1) (eq v97 0)) - (let (v99 i32) (zext v98)) - (let (v100 i1) (neq v99 0)) - (condbr v100 (block 17 v75 v69 v22) (block 19))) - - (block 19 - (let (v101 u32) (bitcast v97)) - (let (v102 u32) (add.checked v101 4)) - (let (v103 u32) (mod.unchecked v102 4)) - (assertz 250 v103) - (let (v104 (ptr i32)) (inttoptr v102)) - (let (v105 i32) (load v104)) - (let (v106 i32) (const.i32 3)) - (let (v107 i32) (band v105 v106)) - (let (v108 i32) (bor v107 v75)) - (let (v109 u32) (bitcast v97)) - (let (v110 u32) (add.checked v109 4)) - (let (v111 u32) (mod.unchecked v110 4)) - (assertz 250 v111) - (let (v112 (ptr i32)) (inttoptr v110)) - (store v112 v108) - (let (v113 u32) (bitcast v75)) - (let (v114 u32) (add.checked v113 4)) - (let (v115 u32) (mod.unchecked v114 4)) - (assertz 250 v115) - (let (v116 (ptr i32)) (inttoptr v114)) - (let (v117 i32) (load v116)) - (let (v118 i32) (const.i32 3)) - (let (v119 i32) (band v117 v118)) - (br (block 17 v75 v119 v22))) - - (block 20 - (let (v152 u32) (bitcast v120)) - (let (v153 u32) (mod.unchecked v152 4)) - (assertz 250 v153) - (let (v154 (ptr i32)) (inttoptr v152)) - (let (v155 i32) (load v154)) - (br (block 7 v120 v155))) - - (block 21 - (param v180 i32) - (param v190 i32) - (param v201 i32) - (param v208 i32) - (param v211 i32) - (param v214 i32) - (param v249 i32) - (call #wee_alloc::neighbors::Neighbors::remove v180) - (let (v181 u32) (bitcast v180)) - (let (v182 (ptr u8)) (inttoptr v181)) - (let (v183 u8) (load v182)) - (let (v184 i32) (zext v183)) - (let (v185 i32) (const.i32 2)) - (let (v186 i32) (band v184 v185)) - (let (v187 i1) (eq v186 0)) - (let (v188 i32) (zext v187)) - (let (v189 i1) (neq v188 0)) - (condbr v189 (block 24 v201 v190 v208 v211 v214 v249) (block 25))) - - (block 22 - (let (v171 i32) (const.i32 0)) - (let (v172 u32) (bitcast v168)) - (let (v173 (ptr u8)) (inttoptr v172)) - (let (v174 u8) (load v173)) - (let (v175 i32) (zext v174)) - (let (v176 i32) (const.i32 1)) - (let (v177 i32) (band v175 v176)) - (let (v178 i1) (neq v177 0)) - (let (v179 i32) (select v178 v171 v168)) - (br (block 21 v22 v179 v59 v40 v49 v56 v247))) - - (block 23 - (let (v170 i32) (const.i32 0)) - (br (block 21 v22 v170 v59 v40 v49 v56 v247))) - - (block 24 - (param v200 i32) - (param v202 i32) - (param v207 i32) - (param v210 i32) - (param v213 i32) - (param v248 i32) - (let (v203 u32) (bitcast v200)) - (let (v204 u32) (mod.unchecked v203 4)) - (assertz 250 v204) - (let (v205 (ptr i32)) (inttoptr v203)) - (store v205 v202) - (br (block 9 v202 v207 v210 v213 v200 v248))) - - (block 25 - (let (v191 u32) (bitcast v190)) - (let (v192 u32) (mod.unchecked v191 4)) - (assertz 250 v192) - (let (v193 (ptr i32)) (inttoptr v191)) - (let (v194 i32) (load v193)) - (let (v195 i32) (const.i32 2)) - (let (v196 i32) (bor v194 v195)) - (let (v197 u32) (bitcast v190)) - (let (v198 u32) (mod.unchecked v197 4)) - (assertz 250 v198) - (let (v199 (ptr i32)) (inttoptr v197)) - (store v199 v196) - (br (block 24 v201 v190 v208 v211 v214 v249))) + (let (v66 i32) (add.wrapping v53 v65)) + (br (block 7 v66))) ) - (func (export #::alloc) - (param i32) (param i32) (param i32) (result i32) - (block 0 (param v0 i32) (param v1 i32) (param v2 i32) - (let (v4 i32) (const.i32 0)) - (let (v5 i32) (global.load i32 (global.symbol #__stack_pointer))) - (let (v6 i32) (const.i32 16)) - (let (v7 i32) (sub.wrapping v5 v6)) - (let (v8 (ptr i32)) (global.symbol #__stack_pointer)) - (store v8 v7) - (let (v9 i32) (const.i32 1)) - (let (v10 i32) (const.i32 1)) - (let (v11 u32) (bitcast v1)) - (let (v12 u32) (bitcast v10)) - (let (v13 i1) (gt v11 v12)) - (let (v14 i32) (sext v13)) - (let (v15 i1) (neq v14 0)) - (let (v16 i32) (select v15 v1 v9)) - (let (v17 i1) (eq v2 0)) - (let (v18 i32) (zext v17)) - (let (v19 i1) (neq v18 0)) - (condbr v19 (block 2 v7 v16) (block 3))) - - (block 1 (param v3 i32) - (ret v3)) - - (block 2 (param v95 i32) (param v100 i32) - (let (v97 i32) (const.i32 16)) - (let (v98 i32) (add.wrapping v95 v97)) - (let (v99 (ptr i32)) (global.symbol #__stack_pointer)) - (store v99 v98) - (br (block 1 v100))) - - (block 3 - (let (v20 u32) (bitcast v0)) - (let (v21 u32) (mod.unchecked v20 4)) - (assertz 250 v21) - (let (v22 (ptr i32)) (inttoptr v20)) - (let (v23 i32) (load v22)) - (let (v24 u32) (bitcast v7)) - (let (v25 u32) (add.checked v24 12)) - (let (v26 u32) (mod.unchecked v25 4)) - (assertz 250 v26) - (let (v27 (ptr i32)) (inttoptr v25)) - (store v27 v23) - (let (v28 i32) (const.i32 3)) - (let (v29 i32) (add.wrapping v2 v28)) - (let (v30 i32) (const.i32 2)) - (let (v31 u32) (bitcast v29)) - (let (v32 u32) (bitcast v30)) - (let (v33 u32) (shr.wrapping v31 v32)) - (let (v34 i32) (bitcast v33)) - (let (v35 i32) (const.i32 12)) - (let (v36 i32) (add.wrapping v7 v35)) - (let (v37 i32) (call #wee_alloc::alloc_first_fit v34 v16 v36)) - (let (v38 i1) (eq v37 0)) - (let (v39 i32) (zext v38)) - (let (v40 i1) (neq v39 0)) - (condbr v40 (block 4) (block 5))) - - (block 4 - (call #::new_cell_for_free_list v7 v7 v34 v16) - (let (v49 u32) (bitcast v7)) - (let (v50 u32) (mod.unchecked v49 4)) - (assertz 250 v50) - (let (v51 (ptr i32)) (inttoptr v49)) - (let (v52 i32) (load v51)) - (let (v53 i1) (eq v52 0)) - (let (v54 i32) (zext v53)) - (let (v55 i1) (neq v54 0)) - (condbr v55 (block 7) (block 8))) - - (block 5 - (let (v41 u32) (bitcast v7)) - (let (v42 u32) (add.checked v41 12)) - (let (v43 u32) (mod.unchecked v42 4)) - (assertz 250 v43) - (let (v44 (ptr i32)) (inttoptr v42)) - (let (v45 i32) (load v44)) - (let (v46 u32) (bitcast v0)) - (let (v47 u32) (mod.unchecked v46 4)) - (assertz 250 v47) - (let (v48 (ptr i32)) (inttoptr v46)) - (store v48 v45) - (br (block 2 v7 v37))) - - (block 6 (param v96 i32) - (let (v94 i32) (const.i32 0)) - (br (block 2 v96 v94))) - - (block 7 - (let (v64 u32) (bitcast v7)) - (let (v65 u32) (add.checked v64 4)) - (let (v66 u32) (mod.unchecked v65 4)) - (assertz 250 v66) - (let (v67 (ptr i32)) (inttoptr v65)) - (let (v68 i32) (load v67)) - (let (v69 u32) (bitcast v7)) - (let (v70 u32) (add.checked v69 12)) - (let (v71 u32) (mod.unchecked v70 4)) - (assertz 250 v71) - (let (v72 (ptr i32)) (inttoptr v70)) - (let (v73 i32) (load v72)) - (let (v74 u32) (bitcast v68)) - (let (v75 u32) (add.checked v74 8)) - (let (v76 u32) (mod.unchecked v75 4)) - (assertz 250 v76) - (let (v77 (ptr i32)) (inttoptr v75)) - (store v77 v73) - (let (v78 u32) (bitcast v7)) - (let (v79 u32) (add.checked v78 12)) - (let (v80 u32) (mod.unchecked v79 4)) - (assertz 250 v80) - (let (v81 (ptr i32)) (inttoptr v79)) - (store v81 v68) - (let (v82 i32) (const.i32 12)) - (let (v83 i32) (add.wrapping v7 v82)) - (let (v84 i32) (call #wee_alloc::alloc_first_fit v34 v16 v83)) - (let (v85 u32) (bitcast v7)) - (let (v86 u32) (add.checked v85 12)) - (let (v87 u32) (mod.unchecked v86 4)) - (assertz 250 v87) - (let (v88 (ptr i32)) (inttoptr v86)) - (let (v89 i32) (load v88)) - (let (v90 u32) (bitcast v0)) - (let (v91 u32) (mod.unchecked v90 4)) - (assertz 250 v91) - (let (v92 (ptr i32)) (inttoptr v90)) - (store v92 v89) - (let (v93 i1) (neq v84 0)) - (condbr v93 (block 2 v7 v84) (block 9))) - - (block 8 - (let (v56 u32) (bitcast v7)) - (let (v57 u32) (add.checked v56 12)) - (let (v58 u32) (mod.unchecked v57 4)) - (assertz 250 v58) - (let (v59 (ptr i32)) (inttoptr v57)) - (let (v60 i32) (load v59)) - (let (v61 u32) (bitcast v0)) - (let (v62 u32) (mod.unchecked v61 4)) - (assertz 250 v62) - (let (v63 (ptr i32)) (inttoptr v61)) - (store v63 v60) - (br (block 6 v7))) - - (block 9 - (br (block 6 v7))) - ) - - (func (export #miden_tx_kernel_sys::get_inputs) + (func (export #miden_base_sys::bindings::tx::get_inputs) (param i32) (block 0 (param v0 i32) (let (v1 i32) (const.i32 0)) @@ -799,29 +192,33 @@ (assertz 250 v30) (let (v31 (ptr i32)) (inttoptr v29)) (let (v32 i32) (load v31)) - (let [(v33 i32) (v34 felt)] (call (#miden::note #get_inputs) v32)) - (let (v35 i32) (const.i32 0)) - (let (v36 u32) (bitcast v0)) - (let (v37 u32) (add.checked v36 8)) - (let (v38 u32) (mod.unchecked v37 4)) - (assertz 250 v38) - (let (v39 (ptr i32)) (inttoptr v37)) - (store v39 v35) + (let (v33 i32) (const.i32 4)) + (let (v34 u32) (bitcast v32)) + (let (v35 u32) (bitcast v33)) + (let (v36 u32) (shr.wrapping v34 v35)) + (let (v37 i32) (bitcast v36)) + (let [(v38 i32) (v39 i32)] (call (#miden::note #get_inputs) v37)) (let (v40 u32) (bitcast v0)) - (let (v41 u32) (add.checked v40 4)) + (let (v41 u32) (add.checked v40 8)) (let (v42 u32) (mod.unchecked v41 4)) (assertz 250 v42) (let (v43 (ptr i32)) (inttoptr v41)) - (store v43 v32) + (store v43 v38) (let (v44 u32) (bitcast v0)) - (let (v45 u32) (mod.unchecked v44 4)) - (assertz 250 v45) - (let (v46 (ptr i32)) (inttoptr v44)) - (store v46 v14) - (let (v47 i32) (const.i32 16)) - (let (v48 i32) (add.wrapping v4 v47)) - (let (v49 (ptr i32)) (global.symbol #__stack_pointer)) - (store v49 v48) + (let (v45 u32) (add.checked v44 4)) + (let (v46 u32) (mod.unchecked v45 4)) + (assertz 250 v46) + (let (v47 (ptr i32)) (inttoptr v45)) + (store v47 v32) + (let (v48 u32) (bitcast v0)) + (let (v49 u32) (mod.unchecked v48 4)) + (assertz 250 v49) + (let (v50 (ptr i32)) (inttoptr v48)) + (store v50 v14) + (let (v51 i32) (const.i32 16)) + (let (v52 i32) (add.wrapping v4 v51)) + (let (v53 (ptr i32)) (global.symbol #__stack_pointer)) + (store v53 v52) (br (block 1))) (block 3 @@ -962,7 +359,8 @@ ) ;; Imports - (func (import #miden::note #get_inputs) (param i32) (result i32 felt)) + (func (import #intrinsics::mem #heap_base) (result u32)) + (func (import #miden::note #get_inputs) (param i32) (result i32 i32)) ) ) diff --git a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.masm b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.masm index 434f43cec..ed2b96ea5 100644 --- a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.masm +++ b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.masm @@ -1,16 +1,18 @@ # mod abi_transform_tx_kernel_get_inputs_4 use.miden::note +use.intrinsics::mem export.entrypoint - exec."miden_tx_kernel_sys::get_inputs" + exec."miden_base_sys::bindings::tx::get_inputs" end -export."miden_tx_kernel_sys::get_inputs" - mem_load.0x00001000 +export."miden_base_sys::bindings::tx::get_inputs" + mem_load.0x00011000 push.16 u32wrapping_sub + push.1114112 dup.1 swap.1 dup.0 @@ -81,12 +83,17 @@ export."miden_tx_kernel_sys::get_inputs" add.8 u32assert dup.1 + push.4 + u32shr exec.::miden::note::get_inputs - dup.7 + swap.1 + drop + dup.6 add.4 u32assert - push.0 - dup.4 + dup.2 + movup.2 + swap.1 dup.0 u32mod.16 dup.0 @@ -96,9 +103,9 @@ export."miden_tx_kernel_sys::get_inputs" movup.2 u32div.16 exec.::intrinsics::mem::store_sw - movup.8 - dup.1 movup.6 + dup.1 + movup.4 swap.1 dup.0 u32mod.16 @@ -110,7 +117,7 @@ export."miden_tx_kernel_sys::get_inputs" u32div.16 exec.::intrinsics::mem::store_sw dup.0 - movup.7 + movup.5 swap.1 dup.0 u32mod.16 @@ -122,9 +129,10 @@ export."miden_tx_kernel_sys::get_inputs" u32div.16 exec.::intrinsics::mem::store_sw push.16 - movup.7 + movup.5 swap.1 u32wrapping_add + push.1114112 dup.0 u32mod.16 dup.0 @@ -138,14 +146,10 @@ export."miden_tx_kernel_sys::get_inputs" assertz.err=250 u32mod.4 assertz.err=250 - movup.2 u32mod.4 assertz.err=250 - movup.2 u32mod.4 assertz.err=250 - dropw - dropw else movup.2 drop @@ -172,11 +176,6 @@ export."miden_tx_kernel_sys::get_inputs" end -export."alloc::raw_vec::handle_error" - push.0 assert -end - - export."alloc::raw_vec::RawVec::try_allocate_in" dup.1 neq.0 @@ -498,7 +497,7 @@ export."__rust_alloc" push.1048576 movup.2 swap.1 - exec."::alloc" + exec."::alloc" end @@ -508,7 +507,7 @@ export."__rust_alloc_zeroed" swap.2 swap.3 swap.1 - exec."::alloc" + exec."::alloc" dup.0 eq.0 neq.0 @@ -559,10 +558,10 @@ export."__rust_alloc_zeroed" end -export."::alloc" - push.1 +export."::alloc" + push.32 dup.2 - push.1 + push.32 u32gt push.0 push.0 @@ -574,102 +573,45 @@ export."::alloc" movup.3 swap.1 cdrop - mem_load.0x00001000 - push.16 - u32wrapping_sub - dup.1 - swap.1 dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.3 - eq.0 + u32popcnt + push.1 + neq neq.0 if.true - swap.3 - movup.2 - drop - drop - push.16 - movup.2 - swap.1 - u32wrapping_add - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw + push.0 assert else - dup.0 - add.12 - u32assert - dup.3 - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.12 + push.2147483648 dup.1 - swap.1 - u32wrapping_add - push.3 - movup.5 - swap.1 - u32wrapping_add - push.2 - u32shr - dup.0 - dup.4 + u32wrapping_sub + dup.3 + u32lt + push.0 + push.0 + push.4294967294 movup.2 - swap.3 - movdn.2 - swap.1 - exec."wee_alloc::alloc_first_fit" - dup.0 - eq.0 + cdrop + u32or neq.0 if.true - drop + push.0 assert + else dup.1 - dup.3 - dup.2 - dup.4 - dup.5 - exec."::new_cell_for_free_list" dup.0 u32mod.4 assertz.err=250 + dup.1 + swap.1 + swap.4 + u32wrapping_add + push.4294967295 + u32wrapping_add + push.0 + dup.2 + u32wrapping_sub + u32and + push.0 + movup.4 dup.0 u32mod.16 dup.0 @@ -679,78 +621,14 @@ export."::alloc" movup.2 u32div.16 exec.::intrinsics::mem::load_sw - eq.0 neq.0 if.true - dup.1 - add.4 - u32assert - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - dup.0 - add.8 - u32assert - dup.4 - add.12 - u32assert - dup.5 - add.12 - u32assert - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw dup.3 dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - movup.4 - swap.1 - dup.0 - u32mod.16 - dup.0 u32mod.4 + assertz.err=250 + push.268435456 swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.7 - dup.6 - add.12 - u32assert - push.12 - dup.8 - swap.1 - u32wrapping_add - swap.1 - swap.9 - swap.1 - swap.2 - swap.7 - exec."wee_alloc::alloc_first_fit" - dup.7 dup.0 u32mod.16 dup.0 @@ -760,41 +638,35 @@ export."::alloc" movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.6 - dup.0 - u32mod.16 dup.0 - u32mod.4 swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.5 - u32mod.4 - assertz.err=250 - movup.6 - u32mod.4 - assertz.err=250 + swap.2 swap.1 - u32mod.4 - assertz.err=250 + u32wrapping_sub + dup.3 + u32lt + push.0 + push.0 + push.4294967294 movup.2 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - dup.0 + cdrop + u32or neq.0 if.true - push.16 - movup.2 + drop + movdn.3 + drop + drop + drop + else + swap.1 + drop + movup.3 + dup.1 swap.1 + swap.3 u32wrapping_add + dup.2 dup.0 u32mod.16 dup.0 @@ -804,31 +676,23 @@ export."::alloc" movup.2 u32div.16 exec.::intrinsics::mem::store_sw - else - drop - push.16 - u32wrapping_add - dup.0 - u32mod.16 - dup.0 + swap.1 u32mod.4 + assertz.err=250 swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.0 + u32wrapping_add end else + dup.3 + exec.::intrinsics::mem::heap_base + dup.5 + exec.::intrinsics::mem::memory_size + push.16 + u32shl movup.2 swap.1 - drop - drop - swap.1 - dup.1 - add.12 - u32assert - dup.0 + u32wrapping_add + dup.2 dup.0 u32mod.16 dup.0 @@ -837,21 +701,15 @@ export."::alloc" u32div.4 movup.2 u32div.16 - exec.::intrinsics::mem::load_sw - dup.2 - dup.0 - u32mod.16 + exec.::intrinsics::mem::store_sw dup.0 u32mod.4 + assertz.err=250 swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.16 - movup.3 + u32mod.4 + assertz.err=250 + push.268435456 swap.1 - u32wrapping_add dup.0 u32mod.16 dup.0 @@ -860,137 +718,35 @@ export."::alloc" u32div.4 movup.2 u32div.16 - exec.::intrinsics::mem::store_sw + exec.::intrinsics::mem::load_sw + dup.0 swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 + swap.2 + swap.1 + u32wrapping_sub + dup.3 + u32lt push.0 - end - else - swap.3 - swap.1 - drop - drop - movup.2 - dup.1 - add.12 - u32assert - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.16 - movup.3 - swap.1 - u32wrapping_add - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - end - end -end - - -export."wee_alloc::alloc_first_fit" - dup.2 - dup.0 - u32mod.4 - assertz.err=250 - push.0 - push.4294967295 - dup.4 - swap.1 - u32wrapping_add - push.0 - movup.5 - u32wrapping_sub - push.2 - movup.5 - swap.1 - u32shl - movup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - movup.4 - swap.5 - movdn.4 - dup.0 - eq.0 - neq.0 - push.1 - while.true - if.true - dropw drop push.0 - else - dup.0 - add.8 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - dup.1 - swap.1 - u32and - neq.0 - push.1 - while.true + push.0 + push.4294967294 + movup.2 + cdrop + u32or + neq.0 if.true - dup.1 - add.8 - u32assert - dup.2 - add.4 - u32assert - push.4294967294 + drop + movdn.3 + drop + drop + drop + else + swap.1 + drop movup.3 + dup.1 swap.1 - u32and + swap.3 + u32wrapping_add dup.2 dup.0 u32mod.16 @@ -1001,1177 +757,11 @@ export."wee_alloc::alloc_first_fit" movup.2 u32div.16 exec.::intrinsics::mem::store_sw - dup.0 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - u32and - dup.0 - neq.0 - if.true - dup.1 - exec."wee_alloc::neighbors::Neighbors::remove" - push.0 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.128 - u32and - push.1 - u32and - neq.0 - cdrop - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.128 - u32and - push.2 - u32and - eq.0 - neq.0 - if.true - dup.4 - dup.0 - dup.2 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - dup.0 - add.8 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - dup.1 - swap.1 - u32and - neq.0 - push.1 - else - dup.0 - dup.1 - dup.6 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - u32or - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.4 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - dup.0 - add.8 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - dup.1 - swap.1 - u32and - neq.0 - push.1 - end - else - drop - dup.0 - exec."wee_alloc::neighbors::Neighbors::remove" - push.0 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.128 - u32and - push.2 - u32and - eq.0 - neq.0 - if.true - dup.4 - dup.0 - dup.2 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - dup.0 - add.8 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - dup.1 - swap.1 - u32and - neq.0 - push.1 - else - dup.0 - dup.1 - dup.6 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - u32or - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.4 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - dup.0 - add.8 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - dup.1 - swap.1 - u32and - neq.0 - push.1 - end - end - else - dup.1 - dup.0 u32mod.4 assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - u32and - push.8 - dup.3 swap.1 u32wrapping_add - dup.1 - dup.1 - u32wrapping_sub - dup.5 - u32lt - push.0 - push.0 - push.4294967294 - movup.2 - cdrop - u32or - neq.0 - if.true - movup.2 - swap.3 - movdn.2 - drop - drop - drop - dup.4 - dup.0 - dup.2 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - dup.0 - eq.0 - neq.0 - push.1 - push.0 - else - push.72 - dup.1 - swap.1 - u32wrapping_add - dup.5 - swap.1 - swap.3 - swap.1 - u32wrapping_sub - dup.6 - u32and - dup.0 - movup.3 - swap.1 - u32lte - push.0 - push.0 - push.4294967294 - movup.2 - cdrop - u32or - neq.0 - if.true - movdn.2 - drop - drop - movup.2 - drop - movup.2 - drop - movup.2 - drop - movup.2 - drop - movup.2 - drop - push.4294967288 - dup.1 - swap.1 - u32wrapping_add - swap.1 - dup.1 - push.0 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.2 - dup.4 - push.0.0 - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_dw - dup.5 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - push.0 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - dup.1 - swap.1 - u32and - neq.0 - if.true - drop - dup.1 - add.4 - u32assert - dup.3 - add.8 - u32assert - dup.4 - add.8 - u32assert - dup.5 - swap.1 - swap.4 - swap.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.4 - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967294 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.5 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - dup.1 - swap.1 - u32and - dup.7 - u32or - dup.2 - dup.1 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.2 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - push.2 - movup.2 - swap.1 - u32and - neq.0 - if.true - movup.2 - dup.2 - dup.3 - push.4294967293 - movup.4 - swap.1 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - u32or - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - else - drop - swap.1 - drop - dup.0 - dup.1 - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - end - else - push.4294967292 - u32and - dup.0 - eq.0 - neq.0 - if.true - drop - dup.1 - add.4 - u32assert - dup.3 - add.8 - u32assert - dup.4 - add.8 - u32assert - dup.5 - swap.1 - swap.4 - swap.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.4 - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967294 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.5 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - dup.1 - swap.1 - u32and - dup.7 - u32or - dup.2 - dup.1 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.2 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - push.2 - movup.2 - swap.1 - u32and - neq.0 - if.true - movup.2 - dup.2 - dup.3 - push.4294967293 - movup.4 - swap.1 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - u32or - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - else - drop - swap.1 - drop - dup.0 - dup.1 - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - end - else - swap.1 - drop - dup.0 - add.4 - u32assert - swap.1 - add.4 - u32assert - dup.2 - add.4 - u32assert - dup.3 - add.4 - u32assert - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.5 - u32or - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.5 - add.8 - u32assert - dup.6 - add.8 - u32assert - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.8 - u32or - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.7 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967294 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.8 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - dup.1 - swap.1 - u32and - dup.10 - u32or - dup.2 - dup.1 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.2 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - push.2 - movup.2 - swap.1 - u32and - neq.0 - if.true - movup.2 - dup.2 - dup.3 - push.4294967293 - movup.4 - swap.1 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - u32or - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - else - drop - swap.1 - drop - dup.0 - dup.1 - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - end - end - end - else - drop - dup.5 - swap.1 - u32and - neq.0 - if.true - swap.1 - drop - dup.4 - dup.0 - dup.2 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - dup.0 - eq.0 - neq.0 - push.1 - push.0 - else - movup.2 - drop - movup.2 - drop - movup.2 - drop - movup.3 - drop - movup.2 - dup.2 - dup.3 - push.4294967292 - movup.4 - swap.1 - u32and - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.1 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - push.8 - u32wrapping_add - push.0 - push.0 - end - end - end end end end @@ -2179,733 +769,8 @@ export."wee_alloc::alloc_first_fit" end -export."wee_alloc::neighbors::Neighbors::remove" - dup.0 - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.2 - dup.1 - swap.1 - u32and - neq.0 - if.true - dup.1 - add.4 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - dup.1 - swap.1 - u32and - dup.0 - eq.0 - neq.0 - if.true - drop - dup.2 - add.4 - u32assert - movup.3 - push.3 - movup.3 - swap.1 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.3 - movup.3 - swap.1 - u32and - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - else - swap.1 - drop - dup.0 - swap.1 - dup.3 - add.4 - u32assert - dup.4 - add.4 - u32assert - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - push.4294967292 - movup.6 - swap.1 - u32and - u32or - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.4 - movup.5 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - end - else - push.4294967292 - dup.1 - swap.1 - u32and - dup.0 - eq.0 - neq.0 - if.true - drop - dup.1 - add.4 - u32assert - dup.0 - u32mod.4 - assertz.err=250 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - dup.1 - swap.1 - u32and - dup.0 - eq.0 - neq.0 - if.true - drop - dup.2 - add.4 - u32assert - movup.3 - push.3 - movup.3 - swap.1 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.3 - movup.3 - swap.1 - u32and - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - else - swap.1 - drop - dup.0 - swap.1 - dup.3 - add.4 - u32assert - dup.4 - add.4 - u32assert - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - push.4294967292 - movup.6 - swap.1 - u32and - u32or - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.4 - movup.5 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - end - else - swap.1 - drop - dup.0 - add.4 - u32assert - dup.2 - add.4 - u32assert - movup.2 - add.4 - u32assert - dup.3 - add.4 - u32assert - dup.4 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - u32and - u32or - dup.5 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.1 - u32mod.4 - assertz.err=250 - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - swap.1 - u32mod.4 - assertz.err=250 - movup.4 - u32mod.4 - assertz.err=250 - movup.3 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.4294967292 - dup.1 - swap.1 - u32and - dup.0 - eq.0 - neq.0 - if.true - drop - dup.2 - add.4 - u32assert - movup.3 - push.3 - movup.3 - swap.1 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.3 - movup.3 - swap.1 - u32and - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - else - swap.1 - drop - dup.0 - swap.1 - dup.3 - add.4 - u32assert - dup.4 - add.4 - u32assert - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - push.4294967292 - movup.6 - swap.1 - u32and - u32or - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.4 - movup.5 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.4 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - dup.0 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::load_sw - push.3 - u32and - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - swap.1 - u32mod.4 - assertz.err=250 - movup.2 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - swap.1 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - end - end - end -end - - -export."::new_cell_for_free_list" - swap.1 - drop - push.3 - movup.3 - swap.1 - u32shl - push.512 - u32wrapping_add - push.2 - movup.3 - swap.1 - u32shl - dup.0 - dup.2 - u32gt - push.0 - push.0 - push.4294967294 - movup.2 - cdrop - u32or - neq.0 - cdrop - push.65543 - u32wrapping_add - dup.0 - push.16 - u32shr - exec.::intrinsics::mem::memory_grow - push.4294967295 - dup.1 - swap.1 - neq - neq.0 - if.true - push.16 - u32shl - dup.0 - add.4 - u32assert - dup.1 - push.0.0 - dup.3 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_dw - dup.4 - add.4 - u32assert - push.4294901760 - movup.5 - swap.1 - u32and - dup.4 - swap.1 - u32wrapping_add - push.2 - u32or - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - movup.4 - dup.1 - movup.5 - swap.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.0 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - else - drop - drop - dup.0 - add.4 - u32assert - swap.1 - push.0 - dup.2 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - push.1 - dup.1 - dup.0 - u32mod.16 - dup.0 - u32mod.4 - swap.1 - u32div.4 - movup.2 - u32div.16 - exec.::intrinsics::mem::store_sw - u32mod.4 - assertz.err=250 - u32mod.4 - assertz.err=250 - end +export."alloc::raw_vec::handle_error" + push.0 assert end diff --git a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.wat b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.wat index 2231ccad3..23e004675 100644 --- a/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.wat +++ b/tests/integration/expected/abi_transform_tx_kernel_get_inputs_4.wat @@ -1,28 +1,29 @@ (module $abi_transform_tx_kernel_get_inputs_4.wasm - (type (;0;) (func (param i32) (result i32))) - (type (;1;) (func (param i32))) - (type (;2;) (func (param i32 i32) (result i32))) - (type (;3;) (func (param i32 i32 i32 i32))) + (type (;0;) (func (result i32))) + (type (;1;) (func (param i32) (result i32))) + (type (;2;) (func (param i32))) + (type (;3;) (func (param i32 i32) (result i32))) (type (;4;) (func (param i32 i32 i32) (result i32))) (type (;5;) (func (param i32 i32 i32))) (type (;6;) (func (param i32 i32))) - (import "miden::note" "get_inputs<0x0000000000000000000000000000000000000000000000000000000000000000>" (func $miden_tx_kernel_sys::externs::extern_note_get_inputs (;0;) (type 0))) - (func $entrypoint (;1;) (type 1) (param i32) + (import "intrinsics::mem" "heap_base" (func $miden_sdk_alloc::heap_base (;0;) (type 0))) + (import "miden::note" "get_inputs<0x0000000000000000000000000000000000000000000000000000000000000000>" (func $miden_base_sys::bindings::tx::externs::extern_note_get_inputs (;1;) (type 1))) + (func $entrypoint (;2;) (type 2) (param i32) local.get 0 - call $miden_tx_kernel_sys::get_inputs + call $miden_base_sys::bindings::tx::get_inputs ) - (func $__rust_alloc (;2;) (type 2) (param i32 i32) (result i32) + (func $__rust_alloc (;3;) (type 3) (param i32 i32) (result i32) i32.const 1048576 local.get 1 local.get 0 - call $::alloc + call $::alloc ) - (func $__rust_alloc_zeroed (;3;) (type 2) (param i32 i32) (result i32) + (func $__rust_alloc_zeroed (;4;) (type 3) (param i32 i32) (result i32) block ;; label = @1 i32.const 1048576 local.get 1 local.get 0 - call $::alloc + call $::alloc local.tee 1 i32.eqz br_if 0 (;@1;) @@ -33,458 +34,76 @@ end local.get 1 ) - (func $wee_alloc::neighbors::Neighbors::remove (;4;) (type 1) (param i32) - (local i32 i32 i32) + (func $::alloc (;5;) (type 4) (param i32 i32 i32) (result i32) + (local i32 i32) block ;; label = @1 - local.get 0 - i32.load + local.get 1 + i32.const 32 + local.get 1 + i32.const 32 + i32.gt_u + select local.tee 1 - i32.const 2 - i32.and + i32.popcnt + i32.const 1 + i32.ne br_if 0 (;@1;) + i32.const -2147483648 local.get 1 - i32.const -4 - i32.and - local.tee 2 - i32.eqz - br_if 0 (;@1;) - local.get 2 + i32.sub local.get 2 - i32.load offset=4 - i32.const 3 - i32.and - local.get 0 - i32.load offset=4 - i32.const -4 - i32.and - i32.or - i32.store offset=4 - local.get 0 - i32.load - local.set 1 - end - block ;; label = @1 - local.get 0 - i32.load offset=4 - local.tee 2 - i32.const -4 - i32.and - local.tee 3 - i32.eqz + i32.lt_u br_if 0 (;@1;) - local.get 3 - local.get 3 - i32.load - i32.const 3 - i32.and + i32.const 0 + local.set 3 local.get 1 - i32.const -4 - i32.and - i32.or - i32.store - local.get 0 - i32.load offset=4 - local.set 2 - local.get 0 - i32.load - local.set 1 - end - local.get 0 - local.get 2 - i32.const 3 - i32.and - i32.store offset=4 - local.get 0 - local.get 1 - i32.const 3 - i32.and - i32.store - ) - (func $::new_cell_for_free_list (;5;) (type 3) (param i32 i32 i32 i32) - block ;; label = @1 - block ;; label = @2 - local.get 2 - i32.const 2 - i32.shl - local.tee 2 - local.get 3 - i32.const 3 - i32.shl - i32.const 512 - i32.add - local.tee 3 - local.get 2 - local.get 3 - i32.gt_u - select - i32.const 65543 - i32.add - local.tee 3 - i32.const 16 - i32.shr_u - memory.grow - local.tee 2 - i32.const -1 - i32.ne - br_if 0 (;@2;) - i32.const 1 - local.set 3 - i32.const 0 - local.set 2 - br 1 (;@1;) - end - local.get 2 - i32.const 16 - i32.shl - local.tee 2 - i64.const 0 - i64.store offset=4 align=4 local.get 2 - local.get 2 - local.get 3 - i32.const -65536 - i32.and i32.add - i32.const 2 - i32.or - i32.store + i32.const -1 + i32.add i32.const 0 - local.set 3 - end - local.get 0 - local.get 2 - i32.store offset=4 - local.get 0 - local.get 3 - i32.store - ) - (func $wee_alloc::alloc_first_fit (;6;) (type 4) (param i32 i32 i32) (result i32) - (local i32 i32 i32 i32 i32 i32) - local.get 1 - i32.const -1 - i32.add - local.set 3 - i32.const 0 - local.set 4 - i32.const 0 - local.get 1 - i32.sub - local.set 5 - local.get 0 - i32.const 2 - i32.shl - local.set 6 - local.get 2 - i32.load - local.set 0 - loop (result i32) ;; label = @1 - block ;; label = @2 - block ;; label = @3 - local.get 0 - i32.eqz - br_if 0 (;@3;) - local.get 0 - local.set 1 - block ;; label = @4 - block ;; label = @5 - loop ;; label = @6 - block ;; label = @7 - local.get 1 - i32.load offset=8 - local.tee 0 - i32.const 1 - i32.and - br_if 0 (;@7;) - local.get 1 - i32.load - i32.const -4 - i32.and - local.tee 7 - local.get 1 - i32.const 8 - i32.add - local.tee 8 - i32.sub - local.get 6 - i32.lt_u - br_if 5 (;@2;) - block ;; label = @8 - local.get 8 - i32.const 72 - i32.add - local.get 7 - local.get 6 - i32.sub - local.get 5 - i32.and - local.tee 7 - i32.le_u - br_if 0 (;@8;) - local.get 3 - local.get 8 - i32.and - br_if 6 (;@2;) - local.get 2 - local.get 0 - i32.const -4 - i32.and - i32.store - local.get 1 - i32.load - local.set 2 - local.get 1 - local.set 0 - br 4 (;@4;) - end - i32.const 0 - local.set 2 - local.get 7 - i32.const 0 - i32.store - local.get 7 - i32.const -8 - i32.add - local.tee 0 - i64.const 0 - i64.store align=4 - local.get 0 - local.get 1 - i32.load - i32.const -4 - i32.and - i32.store - block ;; label = @8 - local.get 1 - i32.load - local.tee 8 - i32.const 2 - i32.and - br_if 0 (;@8;) - local.get 8 - i32.const -4 - i32.and - local.tee 8 - i32.eqz - br_if 0 (;@8;) - local.get 8 - local.get 8 - i32.load offset=4 - i32.const 3 - i32.and - local.get 0 - i32.or - i32.store offset=4 - local.get 0 - i32.load offset=4 - i32.const 3 - i32.and - local.set 2 - end - local.get 0 - local.get 2 - local.get 1 - i32.or - i32.store offset=4 - local.get 1 - local.get 1 - i32.load offset=8 - i32.const -2 - i32.and - i32.store offset=8 - local.get 1 - local.get 1 - i32.load - local.tee 2 - i32.const 3 - i32.and - local.get 0 - i32.or - local.tee 8 - i32.store - local.get 2 - i32.const 2 - i32.and - br_if 2 (;@5;) - local.get 0 - i32.load - local.set 2 - br 3 (;@4;) - end - local.get 1 - local.get 0 - i32.const -2 - i32.and - i32.store offset=8 - block ;; label = @7 - block ;; label = @8 - local.get 1 - i32.load offset=4 - i32.const -4 - i32.and - local.tee 0 - br_if 0 (;@8;) - i32.const 0 - local.set 0 - br 1 (;@7;) - end - i32.const 0 - local.get 0 - local.get 0 - i32.load8_u - i32.const 1 - i32.and - select - local.set 0 - end - local.get 1 - call $wee_alloc::neighbors::Neighbors::remove - block ;; label = @7 - local.get 1 - i32.load8_u - i32.const 2 - i32.and - i32.eqz - br_if 0 (;@7;) - local.get 0 - local.get 0 - i32.load - i32.const 2 - i32.or - i32.store - end - local.get 2 - local.get 0 - i32.store - local.get 0 - local.set 1 - br 0 (;@6;) - end - end - local.get 1 - local.get 8 - i32.const -3 - i32.and - i32.store - local.get 0 - i32.load - i32.const 2 - i32.or - local.set 2 - end - local.get 0 - local.get 2 - i32.const 1 - i32.or - i32.store - local.get 0 - i32.const 8 - i32.add - local.set 4 - end - local.get 4 - return - end - local.get 2 - local.get 0 - i32.store - br 0 (;@1;) - end - ) - (func $::alloc (;7;) (type 4) (param i32 i32 i32) (result i32) - (local i32 i32) - global.get $__stack_pointer - i32.const 16 - i32.sub - local.tee 3 - global.set $__stack_pointer - local.get 1 - i32.const 1 - local.get 1 - i32.const 1 - i32.gt_u - select - local.set 1 - block ;; label = @1 - local.get 2 - i32.eqz - br_if 0 (;@1;) - local.get 3 - local.get 0 - i32.load - i32.store offset=12 + local.get 1 + i32.sub + i32.and + local.set 2 block ;; label = @2 - local.get 2 - i32.const 3 - i32.add - i32.const 2 - i32.shr_u - local.tee 4 - local.get 1 - local.get 3 - i32.const 12 - i32.add - call $wee_alloc::alloc_first_fit - local.tee 2 - i32.eqz + local.get 0 + i32.load br_if 0 (;@2;) local.get 0 - local.get 3 - i32.load offset=12 + call $miden_sdk_alloc::heap_base + memory.size + i32.const 16 + i32.shl + i32.add i32.store - local.get 2 - local.set 1 - br 1 (;@1;) end - local.get 3 - local.get 3 - local.get 4 - local.get 1 - call $::new_cell_for_free_list block ;; label = @2 - block ;; label = @3 - local.get 3 - i32.load - i32.eqz - br_if 0 (;@3;) - local.get 0 - local.get 3 - i32.load offset=12 - i32.store - br 1 (;@2;) - end - local.get 3 - i32.load offset=4 - local.tee 2 - local.get 3 - i32.load offset=12 - i32.store offset=8 - local.get 3 + i32.const 268435456 + local.get 0 + i32.load + local.tee 4 + i32.sub local.get 2 - i32.store offset=12 + i32.lt_u + br_if 0 (;@2;) + local.get 0 local.get 4 - local.get 1 - local.get 3 - i32.const 12 + local.get 2 i32.add - call $wee_alloc::alloc_first_fit - local.set 1 - local.get 0 - local.get 3 - i32.load offset=12 i32.store + local.get 4 local.get 1 - br_if 1 (;@1;) + i32.add + local.set 3 end - i32.const 0 - local.set 1 + local.get 3 + return end - local.get 3 - i32.const 16 - i32.add - global.set $__stack_pointer - local.get 1 + unreachable + unreachable ) - (func $miden_tx_kernel_sys::get_inputs (;8;) (type 1) (param i32) + (func $miden_base_sys::bindings::tx::get_inputs (;6;) (type 2) (param i32) (local i32 i32 i32) global.get $__stack_pointer i32.const 16 @@ -511,13 +130,13 @@ call $alloc::raw_vec::handle_error unreachable end + local.get 0 local.get 1 i32.load offset=12 local.tee 3 - call $miden_tx_kernel_sys::externs::extern_note_get_inputs - drop - local.get 0 - i32.const 0 + i32.const 4 + i32.shr_u + call $miden_base_sys::bindings::tx::externs::extern_note_get_inputs i32.store offset=8 local.get 0 local.get 3 @@ -530,7 +149,7 @@ i32.add global.set $__stack_pointer ) - (func $alloc::raw_vec::RawVec::try_allocate_in (;9;) (type 5) (param i32 i32 i32) + (func $alloc::raw_vec::RawVec::try_allocate_in (;7;) (type 5) (param i32 i32 i32) (local i32) block ;; label = @1 block ;; label = @2 @@ -604,7 +223,7 @@ local.get 1 i32.store ) - (func $alloc::raw_vec::handle_error (;10;) (type 6) (param i32 i32) + (func $alloc::raw_vec::handle_error (;8;) (type 6) (param i32 i32) unreachable unreachable ) diff --git a/tests/integration/src/rust_masm_tests/abi_transform/tx_kernel.rs b/tests/integration/src/rust_masm_tests/abi_transform/tx_kernel.rs index e38798769..8443c2a8f 100644 --- a/tests/integration/src/rust_masm_tests/abi_transform/tx_kernel.rs +++ b/tests/integration/src/rust_masm_tests/abi_transform/tx_kernel.rs @@ -20,7 +20,6 @@ fn setup_log() { } #[test] -#[ignore = "pending rodata fixes"] fn test_get_inputs_4() -> Result<(), Report> { test_get_inputs("4", vec![u32::MAX.into(), Felt::ONE, Felt::ZERO, u32::MAX.into()]) } @@ -56,10 +55,34 @@ end let package = test.compiled_package(); - let exec = Executor::for_package(&package, vec![], &test.session)?; + // Provide a place in memory where the vector returned by `get_inputs` should be stored + let out_addr = 18u32 * 65536; + let exec = Executor::for_package(&package, vec![Felt::new(out_addr as u64)], &test.session)?; let trace = exec.execute(&package.unwrap_program(), &test.session); - let vm_out = trace.into_outputs(); - dbg!(&vm_out); + // Verify that the vector contains the expected elements: + // + // Rust lays out the vector struct as follows (lowest addressed bytes first): + // + // [capacity, buf_ptr, len] + // + // 1. Extract the data pointer and length from the vector written to out_addr + let data_ptr = trace.read_memory_element(out_addr / 16, 1).unwrap().as_int() as u32; + assert_ne!(data_ptr, 0, "expected non-null data pointer"); + dbg!(data_ptr); + let len = trace.read_memory_element(out_addr / 16, 2).unwrap().as_int() as usize; + assert_eq!( + len, + expected_inputs.len(), + "expected vector to contain all of the expected inputs" + ); + // 2. Read the vector elements via data_ptr and ensure they match the inputs + dbg!(len); + let word = trace.read_memory_word(data_ptr / 16).unwrap(); + assert_eq!( + word.as_slice(), + expected_inputs.as_slice(), + "expected vector contents to match inputs" + ); // let ir_program = test.ir_masm_program(); // let emul_out = execute_emulator(ir_program.clone(), &[]); From ee46fe39c68b272c8d5fdb69f321ee30f876658b Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Thu, 29 Aug 2024 12:35:03 -0400 Subject: [PATCH 16/18] chore: update expect tests due to codegen changes --- frontend-wasm/src/code_translator/tests.rs | 5 + .../miden_sdk_account_test.hir | 192 +++++++++++------- .../miden_sdk_account_test.wat | 92 +++++---- .../rust-sdk/account-test/Cargo.lock | 2 +- 4 files changed, 177 insertions(+), 114 deletions(-) diff --git a/frontend-wasm/src/code_translator/tests.rs b/frontend-wasm/src/code_translator/tests.rs index 1a982ca51..8fa4ff9ca 100644 --- a/frontend-wasm/src/code_translator/tests.rs +++ b/frontend-wasm/src/code_translator/tests.rs @@ -467,6 +467,7 @@ fn i32_popcnt() { expect![[r#" (let (v0 i32) (const.i32 1)) (let (v1 u32) (popcnt v0)) + (let (v2 i32) (bitcast v1)) "#]], ) } @@ -482,6 +483,7 @@ fn i32_clz() { expect![[r#" (let (v0 i32) (const.i32 1)) (let (v1 u32) (clz v0)) + (let (v2 i32) (bitcast v1)) "#]], ) } @@ -497,6 +499,7 @@ fn i64_clz() { expect![[r#" (let (v0 i64) (const.i64 1)) (let (v1 u32) (clz v0)) + (let (v2 i32) (bitcast v1)) "#]], ) } @@ -512,6 +515,7 @@ fn i32_ctz() { expect![[r#" (let (v0 i32) (const.i32 1)) (let (v1 u32) (ctz v0)) + (let (v2 i32) (bitcast v1)) "#]], ) } @@ -527,6 +531,7 @@ fn i64_ctz() { expect![[r#" (let (v0 i64) (const.i64 1)) (let (v1 u32) (ctz v0)) + (let (v2 i32) (bitcast v1)) "#]], ) } diff --git a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir index afa26368b..f3be3b26f 100644 --- a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir +++ b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir @@ -566,70 +566,99 @@ (param i32) (param i32) (param i32) (result i32) (block 0 (param v0 i32) (param v1 i32) (param v2 i32) (let (v4 i32) (const.i32 0)) - (let (v5 i32) (const.i32 0)) - (let (v6 i32) (add.wrapping v1 v2)) - (let (v7 i32) (const.i32 -1)) - (let (v8 i32) (add.wrapping v6 v7)) - (let (v9 i32) (const.i32 0)) - (let (v10 i32) (sub.wrapping v9 v1)) - (let (v11 i32) (band v8 v10)) - (let (v12 u32) (bitcast v0)) - (let (v13 u32) (mod.unchecked v12 4)) - (assertz 250 v13) - (let (v14 (ptr i32)) (inttoptr v12)) - (let (v15 i32) (load v14)) - (let (v16 i1) (neq v15 0)) - (condbr v16 (block 2 v0 v11 v1 v5) (block 3))) - - (block 1 (param v3 i32) - (ret v3)) + (let (v5 i32) (const.i32 32)) + (let (v6 i32) (const.i32 32)) + (let (v7 u32) (bitcast v1)) + (let (v8 u32) (bitcast v6)) + (let (v9 i1) (gt v7 v8)) + (let (v10 i32) (sext v9)) + (let (v11 i1) (neq v10 0)) + (let (v12 i32) (select v11 v1 v5)) + (let (v13 u32) (popcnt v12)) + (let (v14 i32) (bitcast v13)) + (let (v15 i32) (const.i32 1)) + (let (v16 i1) (neq v14 v15)) + (let (v17 i32) (zext v16)) + (let (v18 i1) (neq v17 0)) + (condbr v18 (block 2) (block 3))) + + (block 1 (param v3 i32)) (block 2 - (param v28 i32) - (param v34 i32) - (param v44 i32) - (param v47 i32) - (let (v27 i32) (const.i32 268435456)) - (let (v29 u32) (bitcast v28)) - (let (v30 u32) (mod.unchecked v29 4)) - (assertz 250 v30) - (let (v31 (ptr i32)) (inttoptr v29)) - (let (v32 i32) (load v31)) - (let (v33 i32) (sub.wrapping v27 v32)) - (let (v35 u32) (bitcast v33)) - (let (v36 u32) (bitcast v34)) - (let (v37 i1) (lt v35 v36)) - (let (v38 i32) (sext v37)) - (let (v39 i1) (neq v38 0)) - (condbr v39 (block 4 v47) (block 5))) + (unreachable)) (block 3 - (let (v17 u32) (call (#intrinsics::mem #heap_base))) - (let (v18 u32) (memory.size)) - (let (v19 i32) (const.i32 16)) - (let (v20 u32) (bitcast v19)) - (let (v21 u32) (shl.wrapping v18 v20)) - (let (v22 u32) (add.wrapping v17 v21)) - (let (v23 i32) (bitcast v22)) - (let (v24 u32) (bitcast v0)) - (let (v25 u32) (mod.unchecked v24 4)) - (assertz 250 v25) - (let (v26 (ptr i32)) (inttoptr v24)) - (store v26 v23) - (br (block 2 v0 v11 v1 v5))) + (let (v19 i32) (const.i32 -2147483648)) + (let (v20 i32) (sub.wrapping v19 v12)) + (let (v21 u32) (bitcast v20)) + (let (v22 u32) (bitcast v2)) + (let (v23 i1) (lt v21 v22)) + (let (v24 i32) (sext v23)) + (let (v25 i1) (neq v24 0)) + (condbr v25 (block 2) (block 4))) - (block 4 (param v46 i32) - (br (block 1 v46))) + (block 4 + (let (v26 i32) (const.i32 0)) + (let (v27 i32) (add.wrapping v12 v2)) + (let (v28 i32) (const.i32 -1)) + (let (v29 i32) (add.wrapping v27 v28)) + (let (v30 i32) (const.i32 0)) + (let (v31 i32) (sub.wrapping v30 v12)) + (let (v32 i32) (band v29 v31)) + (let (v33 u32) (bitcast v0)) + (let (v34 u32) (mod.unchecked v33 4)) + (assertz 250 v34) + (let (v35 (ptr i32)) (inttoptr v33)) + (let (v36 i32) (load v35)) + (let (v37 i1) (neq v36 0)) + (condbr v37 (block 5 v0 v32 v12 v26) (block 6))) (block 5 - (let (v40 i32) (add.wrapping v32 v34)) - (let (v41 u32) (bitcast v28)) - (let (v42 u32) (mod.unchecked v41 4)) - (assertz 250 v42) - (let (v43 (ptr i32)) (inttoptr v41)) - (store v43 v40) - (let (v45 i32) (add.wrapping v32 v44)) - (br (block 4 v45))) + (param v49 i32) + (param v55 i32) + (param v65 i32) + (param v68 i32) + (let (v48 i32) (const.i32 268435456)) + (let (v50 u32) (bitcast v49)) + (let (v51 u32) (mod.unchecked v50 4)) + (assertz 250 v51) + (let (v52 (ptr i32)) (inttoptr v50)) + (let (v53 i32) (load v52)) + (let (v54 i32) (sub.wrapping v48 v53)) + (let (v56 u32) (bitcast v54)) + (let (v57 u32) (bitcast v55)) + (let (v58 i1) (lt v56 v57)) + (let (v59 i32) (sext v58)) + (let (v60 i1) (neq v59 0)) + (condbr v60 (block 7 v68) (block 8))) + + (block 6 + (let (v38 u32) (call (#intrinsics::mem #heap_base))) + (let (v39 u32) (memory.size)) + (let (v40 i32) (const.i32 16)) + (let (v41 u32) (bitcast v40)) + (let (v42 u32) (shl.wrapping v39 v41)) + (let (v43 u32) (add.wrapping v38 v42)) + (let (v44 i32) (bitcast v43)) + (let (v45 u32) (bitcast v0)) + (let (v46 u32) (mod.unchecked v45 4)) + (assertz 250 v46) + (let (v47 (ptr i32)) (inttoptr v45)) + (store v47 v44) + (br (block 5 v0 v32 v12 v26))) + + (block 7 (param v67 i32) + (ret v67)) + + (block 8 + (let (v61 i32) (add.wrapping v53 v55)) + (let (v62 u32) (bitcast v49)) + (let (v63 u32) (mod.unchecked v62 4)) + (assertz 250 v63) + (let (v64 (ptr i32)) (inttoptr v62)) + (store v64 v61) + (let (v66 i32) (add.wrapping v53 v65)) + (br (block 7 v66))) ) (func (export #miden_base_sys::bindings::tx::get_id) @@ -683,28 +712,33 @@ (assertz 250 v30) (let (v31 (ptr i32)) (inttoptr v29)) (let (v32 i32) (load v31)) - (let [(v33 i32) (v34 i32)] (call (#miden::note #get_inputs) v32)) - (let (v35 u32) (bitcast v0)) - (let (v36 u32) (add.checked v35 8)) - (let (v37 u32) (mod.unchecked v36 4)) - (assertz 250 v37) - (let (v38 (ptr i32)) (inttoptr v36)) - (store v38 v33) - (let (v39 u32) (bitcast v0)) - (let (v40 u32) (add.checked v39 4)) - (let (v41 u32) (mod.unchecked v40 4)) - (assertz 250 v41) - (let (v42 (ptr i32)) (inttoptr v40)) - (store v42 v32) - (let (v43 u32) (bitcast v0)) - (let (v44 u32) (mod.unchecked v43 4)) - (assertz 250 v44) - (let (v45 (ptr i32)) (inttoptr v43)) - (store v45 v14) - (let (v46 i32) (const.i32 16)) - (let (v47 i32) (add.wrapping v4 v46)) - (let (v48 (ptr i32)) (global.symbol #__stack_pointer)) - (store v48 v47) + (let (v33 i32) (const.i32 4)) + (let (v34 u32) (bitcast v32)) + (let (v35 u32) (bitcast v33)) + (let (v36 u32) (shr.wrapping v34 v35)) + (let (v37 i32) (bitcast v36)) + (let [(v38 i32) (v39 i32)] (call (#miden::note #get_inputs) v37)) + (let (v40 u32) (bitcast v0)) + (let (v41 u32) (add.checked v40 8)) + (let (v42 u32) (mod.unchecked v41 4)) + (assertz 250 v42) + (let (v43 (ptr i32)) (inttoptr v41)) + (store v43 v38) + (let (v44 u32) (bitcast v0)) + (let (v45 u32) (add.checked v44 4)) + (let (v46 u32) (mod.unchecked v45 4)) + (assertz 250 v46) + (let (v47 (ptr i32)) (inttoptr v45)) + (store v47 v32) + (let (v48 u32) (bitcast v0)) + (let (v49 u32) (mod.unchecked v48 4)) + (assertz 250 v49) + (let (v50 (ptr i32)) (inttoptr v48)) + (store v50 v14) + (let (v51 i32) (const.i32 16)) + (let (v52 i32) (add.wrapping v4 v51)) + (let (v53 (ptr i32)) (global.symbol #__stack_pointer)) + (store v53 v52) (br (block 1))) (block 3 diff --git a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat index ffdd67f90..5c8a428e1 100644 --- a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat +++ b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat @@ -389,50 +389,72 @@ ) (func $::alloc (;44;) (type 18) (param i32 i32 i32) (result i32) (local i32 i32) - i32.const 0 - local.set 3 - local.get 1 - local.get 2 - i32.add - i32.const -1 - i32.add - i32.const 0 - local.get 1 - i32.sub - i32.and - local.set 2 block ;; label = @1 - local.get 0 - i32.load + local.get 1 + i32.const 32 + local.get 1 + i32.const 32 + i32.gt_u + select + local.tee 1 + i32.popcnt + i32.const 1 + i32.ne br_if 0 (;@1;) - local.get 0 - call $miden_sdk_alloc::heap_base - memory.size - i32.const 16 - i32.shl - i32.add - i32.store - end - block ;; label = @1 - i32.const 268435456 - local.get 0 - i32.load - local.tee 4 + i32.const -2147483648 + local.get 1 i32.sub local.get 2 i32.lt_u br_if 0 (;@1;) - local.get 0 - local.get 4 + i32.const 0 + local.set 3 + local.get 1 local.get 2 i32.add - i32.store - local.get 4 - local.get 1 + i32.const -1 i32.add - local.set 3 + i32.const 0 + local.get 1 + i32.sub + i32.and + local.set 2 + block ;; label = @2 + local.get 0 + i32.load + br_if 0 (;@2;) + local.get 0 + call $miden_sdk_alloc::heap_base + memory.size + i32.const 16 + i32.shl + i32.add + i32.store + end + block ;; label = @2 + i32.const 268435456 + local.get 0 + i32.load + local.tee 4 + i32.sub + local.get 2 + i32.lt_u + br_if 0 (;@2;) + local.get 0 + local.get 4 + local.get 2 + i32.add + i32.store + local.get 4 + local.get 1 + i32.add + local.set 3 + end + local.get 3 + return end - local.get 3 + unreachable + unreachable ) (func $miden_base_sys::bindings::tx::get_id (;45;) (type 12) (result f32) call $miden_base_sys::bindings::tx::externs::extern_account_get_id @@ -468,6 +490,8 @@ local.get 1 i32.load offset=12 local.tee 3 + i32.const 4 + i32.shr_u call $miden_base_sys::bindings::tx::externs::extern_note_get_inputs i32.store offset=8 local.get 0 diff --git a/tests/rust-apps-wasm/rust-sdk/account-test/Cargo.lock b/tests/rust-apps-wasm/rust-sdk/account-test/Cargo.lock index 574c6cfbe..ec5c3ca0c 100644 --- a/tests/rust-apps-wasm/rust-sdk/account-test/Cargo.lock +++ b/tests/rust-apps-wasm/rust-sdk/account-test/Cargo.lock @@ -552,7 +552,7 @@ dependencies = [ [[package]] name = "miden-base-sys" -version = "0.0.0" +version = "0.0.1" dependencies = [ "miden-assembly", "miden-stdlib-sys", From 65365a033936e042edd8be745af070d9dce6cf6e Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Fri, 30 Aug 2024 01:56:06 -0400 Subject: [PATCH 17/18] fix(codegen): broken return via pointer transformation --- Cargo.lock | 20 +- Cargo.toml | 2 +- codegen/masm/src/masm/program.rs | 20 +- frontend-wasm/src/miden_abi/transform.rs | 13 +- hir-type/src/lib.rs | 2 +- midenc-debug/src/exec/trace.rs | 13 +- midenc-debug/src/felt.rs | 163 ++++++++++++---- midenc-debug/src/lib.rs | 2 +- sdk/stdlib-sys/src/intrinsics/mod.rs | 48 +++++ sdk/stdlib-sys/src/lib.rs | 2 +- sdk/stdlib-sys/src/stdlib/crypto/hashes.rs | 6 +- .../abi_transform_stdlib_blake3_hash.hir | 179 +++++++++--------- .../abi_transform_stdlib_blake3_hash.masm | 163 +++++----------- .../abi_transform_stdlib_blake3_hash.wat | 33 ++-- .../rust_masm_tests/abi_transform/stdlib.rs | 35 ++-- 15 files changed, 386 insertions(+), 315 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fca32ad7b..39481a2df 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2850,9 +2850,9 @@ dependencies = [ [[package]] name = "miden-air" -version = "0.10.2" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05e396af383ee2a38ca41f188173672cd70e221985eec4afcb854fe37d18fd1b" +checksum = "2702f8adb96844e3521f49149f6c3d4773ecdd2a96a3169e3c025a2e3ee32b5e" dependencies = [ "miden-core", "miden-thiserror", @@ -2862,9 +2862,9 @@ dependencies = [ [[package]] name = "miden-assembly" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3aefae8d99d66c3f8666e917cb3ef976edb39247099311f695e5ba57305616d" +checksum = "eae9cef4fbafb4fe26da18574bcdbd78815857cfe1099760782701ababb076c2" dependencies = [ "aho-corasick", "lalrpop", @@ -2888,9 +2888,9 @@ dependencies = [ [[package]] name = "miden-core" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e46df4105dc2ec15aa14182ce6de299720991bfb83a9b6aa9293c6ee2b12b18" +checksum = "fc3f6db878d6b56c1566cd5b832908675566d3919b9a3523d630dfb5e2f7422d" dependencies = [ "lock_api", "loom", @@ -3053,9 +3053,9 @@ dependencies = [ [[package]] name = "miden-processor" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6afc2ccde2a0d6dc1580a6515b130700484042bee810d84fd3cf3e4616cac632" +checksum = "01e7b212b152b69373e89b069a18cb01742ef2c3f9c328e7b24c44e44f022e52" dependencies = [ "miden-air", "miden-core", @@ -3078,9 +3078,9 @@ version = "0.0.1" [[package]] name = "miden-stdlib" -version = "0.10.3" +version = "0.10.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9314d34d66a677f819aaf9912b2110b29a24a28db66e9bf1be687b6f29389d9e" +checksum = "41623ad4f4ea6449760f70ab8928c682c3824d735d3e330f07e3d24d1ad20bfa" dependencies = [ "miden-assembly", ] diff --git a/Cargo.toml b/Cargo.toml index 099cba5cf..e33ab80c9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -85,7 +85,7 @@ indexmap = "2.2" miden-assembly = { version = "0.10.3" } miden-core = { version = "0.10.3" } miden-processor = { version = "0.10.3" } -miden-stdlib = { version = "0.10.3" } +miden-stdlib = { version = "0.10.3", features = ["with-debug-info"] } #miden-assembly = { git = "https://github.com/0xPolygonMiden/miden-vm", rev = "828557c28ca1d159bfe42195e7ea73256ce4aa06" } #miden-core = { git = "https://github.com/0xPolygonMiden/miden-vm", rev = "828557c28ca1d159bfe42195e7ea73256ce4aa06" } #miden-processor = { git = "https://github.com/0xPolygonMiden/miden-vm", rev = "828557c28ca1d159bfe42195e7ea73256ce4aa06" } diff --git a/codegen/masm/src/masm/program.rs b/codegen/masm/src/masm/program.rs index 1d3096b22..5a989c7ce 100644 --- a/codegen/masm/src/masm/program.rs +++ b/codegen/masm/src/masm/program.rs @@ -131,24 +131,10 @@ impl Program { // Advice Stack: [dest_ptr, num_words, ...] block.push(Op::AdvPush(2), span); // => [num_words, dest_ptr] on operand stack block.push(Op::Exec("std::mem::pipe_words_to_memory".parse().unwrap()), span); - // Drop the commitment + // Drop HASH + block.push(Op::Dropw, span); + // Drop dest_ptr block.push(Op::Drop, span); - // If we know the stack pointer address, update it to the value of `'write_ptr`, but cast - // into the Rust address space (multiplying it by 16). So a word address of 1, is equal to - // a byte address of 16, because each field element holds 4 bytes, and there are 4 elements - // in a word. - // - // If we don't know the stack pointer, just drop the `'write_ptr` value - if let Some(sp) = self.stack_pointer() { - block.push(Op::U32OverflowingMulImm(16), span); - block.push(Op::Assertz, span); - // Align the stack pointer to a word boundary - let elem_addr = (sp / 4) + (sp % 4 > 0) as u32; - let word_addr = (elem_addr / 4) + (elem_addr % 4 > 0) as u32; - block.push(Op::MemStoreImm(word_addr), span); - } else { - block.push(Op::Drop, span); - } } /// Emit the sequence of instructions necessary to consume rodata from the advice stack and diff --git a/frontend-wasm/src/miden_abi/transform.rs b/frontend-wasm/src/miden_abi/transform.rs index 6b67e2dce..d8f4dfd9d 100644 --- a/frontend-wasm/src/miden_abi/transform.rs +++ b/frontend-wasm/src/miden_abi/transform.rs @@ -115,20 +115,17 @@ pub fn return_via_pointer( let ptr_arg = *args.last().unwrap(); let ptr_arg_ty = builder.data_flow_graph().value_type(ptr_arg).clone(); assert_eq!(ptr_arg_ty, I32); - let ptr_u32 = builder.ins().cast(ptr_arg, U32, span); + let ptr_u32 = builder.ins().bitcast(ptr_arg, U32, span); + let result_ty = midenc_hir::StructType::new( + results.iter().map(|v| builder.data_flow_graph().value_type(*v).clone()), + ); for (idx, value) in results.iter().enumerate() { let value_ty = builder.data_flow_graph().value_type(*value).clone(); - let value_size = value_ty.aligned_size_in_bytes(); let eff_ptr = if idx == 0 { // We're assuming here that the base pointer is of the correct alignment ptr_u32 } else { - // We're computing the offset from Rust's perspective, so multiply the index by the - // aligned size in bytes to get the next aligned address. Note that this presumes - // that the pointer we have been given has the required minimum alignment, if it does - // not, then we'll be writing to the wrong locations in memory. - let offset = u32::try_from(idx * value_size).expect("offset overflow"); - let imm = Immediate::U32(offset); + let imm = Immediate::U32(result_ty.get(idx).offset); builder.ins().add_imm_checked(ptr_u32, imm, span) }; let addr = builder.ins().inttoptr(eff_ptr, Ptr(value_ty.into()), span); diff --git a/hir-type/src/lib.rs b/hir-type/src/lib.rs index db042ce88..c19c93c2d 100644 --- a/hir-type/src/lib.rs +++ b/hir-type/src/lib.rs @@ -402,7 +402,7 @@ pub struct StructField { pub index: u8, /// The specified alignment for this field pub align: u16, - /// The offset of this field relative to the previous field, or from the base of the struct + /// The offset of this field relative to the base of the struct pub offset: u32, /// The type of this field pub ty: Type, diff --git a/midenc-debug/src/exec/trace.rs b/midenc-debug/src/exec/trace.rs index fe707c948..3bce2b111 100644 --- a/midenc-debug/src/exec/trace.rs +++ b/midenc-debug/src/exec/trace.rs @@ -244,17 +244,18 @@ impl ExecutionTrace { } n => { let mut buf = VecDeque::default(); - let chunks_needed = n / 4; + let chunks_needed = ((n / 4) as u32) + ((n % 4) > 0) as u32; if ptr.offset > 0 { todo!() - } else if ptr.index > 0 { - todo!() } else { for i in 0..chunks_needed { - let word = self - .read_memory_word_in_context(ptr.waddr + i as u32, ctx, clk) + let abs_i = i + ptr.index as u32; + let word = ptr.waddr + (abs_i / 4); + let index = (abs_i % 4) as u8; + let elem = self + .read_memory_element_in_context(word, index, ctx, clk) .expect("invalid memory access"); - buf.extend(word.into_iter().map(TestFelt)); + buf.push_back(TestFelt(elem)); } } Some(T::try_pop(&mut buf).unwrap_or_else(|| { diff --git a/midenc-debug/src/felt.rs b/midenc-debug/src/felt.rs index 93df7ffcb..ac77d4d97 100644 --- a/midenc-debug/src/felt.rs +++ b/midenc-debug/src/felt.rs @@ -195,6 +195,23 @@ impl PopFromStack for RawFelt { } } +impl PushToStack for [RawFelt; 4] { + #[inline(always)] + fn try_push(&self, stack: &mut Vec) { + stack.extend(self.iter().copied().rev()); + } +} +impl PopFromStack for [RawFelt; 4] { + #[inline(always)] + fn try_pop(stack: &mut VecDeque) -> Option { + let a = stack.pop_front()?; + let b = stack.pop_front()?; + let c = stack.pop_front()?; + let d = stack.pop_front()?; + Some([a.0, b.0, c.0, d.0]) + } +} + impl PushToStack for Felt { #[inline(always)] fn try_push(&self, stack: &mut Vec) { @@ -208,50 +225,52 @@ impl PopFromStack for Felt { } } -impl PushToStack for [u8; N] { +impl PushToStack for [Felt; 4] { + #[inline(always)] fn try_push(&self, stack: &mut Vec) { - let mut iter = self.iter().array_chunks::<4>(); - let buf_size = (self.len() / 4) + (self.len() % 4 == 0) as usize; - let mut buf = vec![0u32; buf_size]; - let mut i = 0; - for chunk in iter.by_ref() { - let n = u32::from_be_bytes([*chunk[0], *chunk[1], *chunk[2], *chunk[3]]); - buf[i] = n; - i += 1; - } - if let Some(rest) = iter.into_remainder() { - let mut n_buf = [0u8; 4]; - for (i, byte) in rest.into_iter().enumerate() { - n_buf[i] = *byte; - } - buf[i] = u32::from_be_bytes(n_buf); - } - for chunk in buf.into_iter().rev() { - PushToStack::try_push(&chunk, stack); - } + stack.extend(self.iter().map(|f| f.0).rev()); + } +} +impl PopFromStack for [Felt; 4] { + #[inline(always)] + fn try_pop(stack: &mut VecDeque) -> Option { + let a = stack.pop_front()?; + let b = stack.pop_front()?; + let c = stack.pop_front()?; + let d = stack.pop_front()?; + Some([a, b, c, d]) } } impl PopFromStack for [u8; N] { fn try_pop(stack: &mut VecDeque) -> Option { + use midenc_hir::FieldElement; let mut out = [0u8; N]; - let byte_size = out.len(); - let mut i = 0; - while i < byte_size { - let chunk: u32 = PopFromStack::try_pop(stack).expect("invalid u32"); - let bytes = chunk.to_be_bytes(); - if i + 4 > byte_size { - for byte in bytes[..(byte_size - i)].iter().copied() { - out[i] = byte; - i += 1; - } + let chunk_size = (out.len() / 4) + (out.len() % 4 > 0) as usize; + for i in 0..chunk_size { + let elem: u32 = PopFromStack::try_pop(stack)?; + let bytes = elem.to_le_bytes(); + let offset = i * 4; + if offset + 3 < N { + out[offset] = bytes[0]; + out[offset + 1] = bytes[1]; + out[offset + 2] = bytes[2]; + out[offset + 3] = bytes[3]; + } else if offset + 2 < N { + out[offset] = bytes[0]; + out[offset + 1] = bytes[1]; + out[offset + 2] = bytes[2]; + break; + } else if offset + 1 < N { + out[offset] = bytes[0]; + out[offset + 1] = bytes[1]; + break; + } else if offset < N { + out[offset] = bytes[0]; break; } else { - for byte in bytes.iter().copied() { - out[i] = byte; - i += 1; - } + break; } } @@ -259,6 +278,49 @@ impl PopFromStack for [u8; N] { } } +/// Convert a byte array to an equivalent vector of words +/// +/// Given a byte slice laid out like so: +/// +/// [b0, b1, b2, b3, b4, b5, b6, b7, .., b31] +/// +/// This will produce a vector of words laid out like so: +/// +/// [[{b0, ..b3}, {b4, ..b7}, {b8..b11}, {b12, ..b15}], ..] +/// +/// In other words, it produces words that when placed on the stack and written to memory +/// word-by-word, that memory will be laid out in the correct byte order. +pub fn bytes_to_words(bytes: &[u8]) -> Vec<[RawFelt; 4]> { + // 1. Chunk bytes up into felts + let mut iter = bytes.iter().array_chunks::<4>(); + let buf_size = (bytes.len() / 4) + (bytes.len() % 4 > 0) as usize; + let padding = buf_size % 8; + let mut buf = Vec::with_capacity(buf_size + padding); + for chunk in iter.by_ref() { + let n = u32::from_le_bytes([*chunk[0], *chunk[1], *chunk[2], *chunk[3]]); + buf.push(n); + } + // Zero-pad the buffer to nearest whole element + if let Some(rest) = iter.into_remainder() { + let mut n_buf = [0u8; 4]; + for (i, byte) in rest.into_iter().enumerate() { + n_buf[i] = *byte; + } + buf.push(u32::from_le_bytes(n_buf)); + } + // Zero-pad the buffer to nearest whole word + let padded_buf_size = buf_size + padding; + buf.resize(padded_buf_size, 0); + // Chunk into words, and push them in largest-address first order + let word_size = (padded_buf_size / 4) + (padded_buf_size % 4 > 0) as usize; + let mut words = Vec::with_capacity(word_size); + for mut word_chunk in buf.into_iter().map(|elem| RawFelt::new(elem as u64)).array_chunks::<4>() + { + words.push(word_chunk); + } + words +} + /// Wrapper around `miden_processor::Felt` that implements useful traits that are not implemented /// for that type. #[derive(Debug, Copy, Clone, PartialEq, Eq)] @@ -460,3 +522,36 @@ impl Arbitrary for Felt { (0u64..RawFelt::MODULUS).prop_map(|v| Felt(RawFelt::new(v))).boxed() } } + +#[cfg(test)] +mod tests { + use std::collections::VecDeque; + + use super::{bytes_to_words, PopFromStack}; + + #[test] + fn bytes_to_words_test() { + let bytes = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let words = bytes_to_words(&bytes); + assert_eq!(words.len(), 2); + assert_eq!(words[0][0].as_int() as u32, u32::from_le_bytes([1, 2, 3, 4])); + assert_eq!(words[0][1].as_int() as u32, u32::from_le_bytes([5, 6, 7, 8])); + assert_eq!(words[0][2].as_int() as u32, u32::from_le_bytes([9, 10, 11, 12])); + assert_eq!(words[0][3].as_int() as u32, u32::from_le_bytes([13, 14, 15, 16])); + } + + #[test] + fn bytes_from_words_test() { + let bytes = [ + 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30, 31, 32, + ]; + let words = bytes_to_words(&bytes); + let mut stack = VecDeque::from_iter(words.into_iter().flatten().map(super::Felt)); + let out: [u8; 32] = PopFromStack::try_pop(&mut stack).unwrap(); + assert_eq!(&out, &bytes); + } +} diff --git a/midenc-debug/src/lib.rs b/midenc-debug/src/lib.rs index f3bf7cc32..ac50cffd2 100644 --- a/midenc-debug/src/lib.rs +++ b/midenc-debug/src/lib.rs @@ -22,7 +22,7 @@ pub use self::{ config::DebuggerConfig, debug::*, exec::*, - felt::{Felt, Felt as TestFelt, PopFromStack, PushToStack}, + felt::{bytes_to_words, Felt, Felt as TestFelt, PopFromStack, PushToStack}, }; pub type ExecutionResult = Result; diff --git a/sdk/stdlib-sys/src/intrinsics/mod.rs b/sdk/stdlib-sys/src/intrinsics/mod.rs index 345283410..f27a9d615 100644 --- a/sdk/stdlib-sys/src/intrinsics/mod.rs +++ b/sdk/stdlib-sys/src/intrinsics/mod.rs @@ -1,2 +1,50 @@ +use core::ops::{Deref, DerefMut}; + pub(crate) mod felt; pub(crate) mod word; + +#[repr(C, align(32))] +pub struct WordAligned(T); +impl WordAligned { + #[inline(always)] + pub const fn new(t: T) -> Self { + Self(t) + } + + #[inline(always)] + pub fn into_inner(self) -> T { + self.0 + } +} +impl From for WordAligned { + #[inline(always)] + fn from(t: T) -> Self { + Self(t) + } +} +impl AsRef for WordAligned { + #[inline(always)] + fn as_ref(&self) -> &T { + &self.0 + } +} +impl AsMut for WordAligned { + #[inline(always)] + fn as_mut(&mut self) -> &mut T { + &mut self.0 + } +} +impl Deref for WordAligned { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + &self.0 + } +} +impl DerefMut for WordAligned { + #[inline(always)] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} diff --git a/sdk/stdlib-sys/src/lib.rs b/sdk/stdlib-sys/src/lib.rs index 1d5f2f833..55185e101 100644 --- a/sdk/stdlib-sys/src/lib.rs +++ b/sdk/stdlib-sys/src/lib.rs @@ -3,5 +3,5 @@ mod intrinsics; mod stdlib; -pub use intrinsics::{felt::*, word::*}; +pub use intrinsics::{felt::*, word::*, WordAligned}; pub use stdlib::*; diff --git a/sdk/stdlib-sys/src/stdlib/crypto/hashes.rs b/sdk/stdlib-sys/src/stdlib/crypto/hashes.rs index e223a62c5..4994cc9b9 100644 --- a/sdk/stdlib-sys/src/stdlib/crypto/hashes.rs +++ b/sdk/stdlib-sys/src/stdlib/crypto/hashes.rs @@ -102,14 +102,16 @@ fn hash_1to1( input: [u8; 32], extern_hash_1to1: unsafe extern "C" fn(u32, u32, u32, u32, u32, u32, u32, u32, *mut u8), ) -> [u8; 32] { + use crate::WordAligned; + let input = unsafe { core::mem::transmute::<[u8; 32], [u32; 8]>(input) }; unsafe { - let mut ret_area = ::core::mem::MaybeUninit::<[u8; 32]>::uninit(); + let mut ret_area = ::core::mem::MaybeUninit::>::uninit(); let ptr = ret_area.as_mut_ptr() as *mut u8; extern_hash_1to1( input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], ptr, ); - ret_area.assume_init() + ret_area.assume_init().into_inner() } } diff --git a/tests/integration/expected/abi_transform_stdlib_blake3_hash.hir b/tests/integration/expected/abi_transform_stdlib_blake3_hash.hir index e1a2d5dfd..e299c818b 100644 --- a/tests/integration/expected/abi_transform_stdlib_blake3_hash.hir +++ b/tests/integration/expected/abi_transform_stdlib_blake3_hash.hir @@ -14,104 +14,109 @@ (let (v3 i32) (global.load i32 (global.symbol #__stack_pointer))) (let (v4 i32) (const.i32 32)) (let (v5 i32) (sub.wrapping v3 v4)) - (let (v6 (ptr i32)) (global.symbol #__stack_pointer)) - (store v6 v5) - (let (v7 u32) (cast v1)) - (let (v8 (ptr i32)) (inttoptr v7)) - (let (v9 i32) (load v8)) - (let (v10 u32) (cast v1)) - (let (v11 u32) (add.checked v10 4)) - (let (v12 (ptr i32)) (inttoptr v11)) - (let (v13 i32) (load v12)) - (let (v14 u32) (cast v1)) - (let (v15 u32) (add.checked v14 8)) - (let (v16 (ptr i32)) (inttoptr v15)) - (let (v17 i32) (load v16)) - (let (v18 u32) (cast v1)) - (let (v19 u32) (add.checked v18 12)) - (let (v20 (ptr i32)) (inttoptr v19)) - (let (v21 i32) (load v20)) - (let (v22 u32) (cast v1)) - (let (v23 u32) (add.checked v22 16)) - (let (v24 (ptr i32)) (inttoptr v23)) - (let (v25 i32) (load v24)) - (let (v26 u32) (cast v1)) - (let (v27 u32) (add.checked v26 20)) - (let (v28 (ptr i32)) (inttoptr v27)) - (let (v29 i32) (load v28)) - (let (v30 u32) (cast v1)) - (let (v31 u32) (add.checked v30 24)) - (let (v32 (ptr i32)) (inttoptr v31)) - (let (v33 i32) (load v32)) - (let (v34 u32) (cast v1)) - (let (v35 u32) (add.checked v34 28)) - (let (v36 (ptr i32)) (inttoptr v35)) - (let (v37 i32) (load v36)) - (let [(v38 i32) (v39 i32) (v40 i32) (v41 i32) (v42 i32) (v43 i32) (v44 i32) (v45 i32)] (call (#std::crypto::hashes::blake3 #hash_1to1) v9 v13 v17 v21 v25 v29 v33 v37)) - (let (v46 u32) (cast v5)) - (let (v47 (ptr i32)) (inttoptr v46)) - (store v47 v38) - (let (v48 u32) (add.checked v46 8)) + (let (v6 i32) (const.i32 -32)) + (let (v7 i32) (band v5 v6)) + (let (v8 (ptr i32)) (global.symbol #__stack_pointer)) + (store v8 v7) + (let (v9 u32) (bitcast v1)) + (let (v10 (ptr i32)) (inttoptr v9)) + (let (v11 i32) (load v10)) + (let (v12 u32) (bitcast v1)) + (let (v13 u32) (add.checked v12 4)) + (let (v14 (ptr i32)) (inttoptr v13)) + (let (v15 i32) (load v14)) + (let (v16 u32) (bitcast v1)) + (let (v17 u32) (add.checked v16 8)) + (let (v18 (ptr i32)) (inttoptr v17)) + (let (v19 i32) (load v18)) + (let (v20 u32) (bitcast v1)) + (let (v21 u32) (add.checked v20 12)) + (let (v22 (ptr i32)) (inttoptr v21)) + (let (v23 i32) (load v22)) + (let (v24 u32) (bitcast v1)) + (let (v25 u32) (add.checked v24 16)) + (let (v26 (ptr i32)) (inttoptr v25)) + (let (v27 i32) (load v26)) + (let (v28 u32) (bitcast v1)) + (let (v29 u32) (add.checked v28 20)) + (let (v30 (ptr i32)) (inttoptr v29)) + (let (v31 i32) (load v30)) + (let (v32 u32) (bitcast v1)) + (let (v33 u32) (add.checked v32 24)) + (let (v34 (ptr i32)) (inttoptr v33)) + (let (v35 i32) (load v34)) + (let (v36 u32) (bitcast v1)) + (let (v37 u32) (add.checked v36 28)) + (let (v38 (ptr i32)) (inttoptr v37)) + (let (v39 i32) (load v38)) + (let [(v40 i32) (v41 i32) (v42 i32) (v43 i32) (v44 i32) (v45 i32) (v46 i32) (v47 i32)] (call (#std::crypto::hashes::blake3 #hash_1to1) v11 v15 v19 v23 v27 v31 v35 v39)) + (let (v48 u32) (bitcast v7)) (let (v49 (ptr i32)) (inttoptr v48)) - (store v49 v39) - (let (v50 u32) (add.checked v46 16)) + (store v49 v40) + (let (v50 u32) (add.checked v48 4)) (let (v51 (ptr i32)) (inttoptr v50)) - (store v51 v40) - (let (v52 u32) (add.checked v46 24)) + (store v51 v41) + (let (v52 u32) (add.checked v48 8)) (let (v53 (ptr i32)) (inttoptr v52)) - (store v53 v41) - (let (v54 u32) (add.checked v46 32)) + (store v53 v42) + (let (v54 u32) (add.checked v48 12)) (let (v55 (ptr i32)) (inttoptr v54)) - (store v55 v42) - (let (v56 u32) (add.checked v46 40)) + (store v55 v43) + (let (v56 u32) (add.checked v48 16)) (let (v57 (ptr i32)) (inttoptr v56)) - (store v57 v43) - (let (v58 u32) (add.checked v46 48)) + (store v57 v44) + (let (v58 u32) (add.checked v48 20)) (let (v59 (ptr i32)) (inttoptr v58)) - (store v59 v44) - (let (v60 u32) (add.checked v46 56)) + (store v59 v45) + (let (v60 u32) (add.checked v48 24)) (let (v61 (ptr i32)) (inttoptr v60)) - (store v61 v45) - (let (v62 i32) (const.i32 24)) - (let (v63 i32) (add.wrapping v0 v62)) + (store v61 v46) + (let (v62 u32) (add.checked v48 28)) + (let (v63 (ptr i32)) (inttoptr v62)) + (store v63 v47) (let (v64 i32) (const.i32 24)) - (let (v65 i32) (add.wrapping v5 v64)) - (let (v66 u32) (cast v65)) - (let (v67 (ptr i64)) (inttoptr v66)) - (let (v68 i64) (load v67)) - (let (v69 u32) (cast v63)) - (let (v70 (ptr i64)) (inttoptr v69)) - (store v70 v68) - (let (v71 i32) (const.i32 16)) - (let (v72 i32) (add.wrapping v0 v71)) + (let (v65 i32) (add.wrapping v0 v64)) + (let (v66 u32) (bitcast v7)) + (let (v67 u32) (add.checked v66 24)) + (let (v68 u32) (mod.unchecked v67 8)) + (assertz 250 v68) + (let (v69 (ptr i64)) (inttoptr v67)) + (let (v70 i64) (load v69)) + (let (v71 u32) (bitcast v65)) + (let (v72 (ptr i64)) (inttoptr v71)) + (store v72 v70) (let (v73 i32) (const.i32 16)) - (let (v74 i32) (add.wrapping v5 v73)) - (let (v75 u32) (cast v74)) - (let (v76 (ptr i64)) (inttoptr v75)) - (let (v77 i64) (load v76)) - (let (v78 u32) (cast v72)) - (let (v79 (ptr i64)) (inttoptr v78)) - (store v79 v77) - (let (v80 i32) (const.i32 8)) - (let (v81 i32) (add.wrapping v0 v80)) + (let (v74 i32) (add.wrapping v0 v73)) + (let (v75 u32) (bitcast v7)) + (let (v76 u32) (add.checked v75 16)) + (let (v77 u32) (mod.unchecked v76 8)) + (assertz 250 v77) + (let (v78 (ptr i64)) (inttoptr v76)) + (let (v79 i64) (load v78)) + (let (v80 u32) (bitcast v74)) + (let (v81 (ptr i64)) (inttoptr v80)) + (store v81 v79) (let (v82 i32) (const.i32 8)) - (let (v83 i32) (add.wrapping v5 v82)) - (let (v84 u32) (cast v83)) - (let (v85 (ptr i64)) (inttoptr v84)) - (let (v86 i64) (load v85)) - (let (v87 u32) (cast v81)) - (let (v88 (ptr i64)) (inttoptr v87)) - (store v88 v86) - (let (v89 u32) (cast v5)) + (let (v83 i32) (add.wrapping v0 v82)) + (let (v84 u32) (bitcast v7)) + (let (v85 u32) (add.checked v84 8)) + (let (v86 u32) (mod.unchecked v85 8)) + (assertz 250 v86) + (let (v87 (ptr i64)) (inttoptr v85)) + (let (v88 i64) (load v87)) + (let (v89 u32) (bitcast v83)) (let (v90 (ptr i64)) (inttoptr v89)) - (let (v91 i64) (load v90)) - (let (v92 u32) (cast v0)) - (let (v93 (ptr i64)) (inttoptr v92)) - (store v93 v91) - (let (v94 i32) (const.i32 32)) - (let (v95 i32) (add.wrapping v5 v94)) - (let (v96 (ptr i32)) (global.symbol #__stack_pointer)) - (store v96 v95) + (store v90 v88) + (let (v91 u32) (bitcast v7)) + (let (v92 u32) (mod.unchecked v91 8)) + (assertz 250 v92) + (let (v93 (ptr i64)) (inttoptr v91)) + (let (v94 i64) (load v93)) + (let (v95 u32) (bitcast v0)) + (let (v96 (ptr i64)) (inttoptr v95)) + (store v96 v94) + (let (v97 (ptr i32)) (global.symbol #__stack_pointer)) + (store v97 v3) (br (block 1))) (block 1 diff --git a/tests/integration/expected/abi_transform_stdlib_blake3_hash.masm b/tests/integration/expected/abi_transform_stdlib_blake3_hash.masm index 2c0281d8b..b8ad0bce5 100644 --- a/tests/integration/expected/abi_transform_stdlib_blake3_hash.masm +++ b/tests/integration/expected/abi_transform_stdlib_blake3_hash.masm @@ -3,9 +3,14 @@ use.std::crypto::hashes::blake3 export.entrypoint - mem_load.0x00001000 + mem_load.0x00010000 push.32 + dup.1 + swap.1 u32wrapping_sub + push.4294967264 + u32and + push.1048576 dup.1 swap.1 dup.0 @@ -18,15 +23,7 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.0 - dup.0 - push.2147483648 - u32lte - assert - dup.3 - dup.0 - push.2147483648 - u32lte - assert + dup.4 add.28 u32assert dup.0 @@ -38,11 +35,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.4 - dup.0 - push.2147483648 - u32lte - assert + dup.5 add.24 u32assert dup.0 @@ -54,11 +47,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.5 - dup.0 - push.2147483648 - u32lte - assert + dup.6 add.20 u32assert dup.0 @@ -70,11 +59,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.6 - dup.0 - push.2147483648 - u32lte - assert + dup.7 add.16 u32assert dup.0 @@ -86,11 +71,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.7 - dup.0 - push.2147483648 - u32lte - assert + dup.8 add.12 u32assert dup.0 @@ -102,11 +83,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.8 - dup.0 - push.2147483648 - u32lte - assert + dup.9 add.8 u32assert dup.0 @@ -118,11 +95,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - dup.9 - dup.0 - push.2147483648 - u32lte - assert + dup.10 add.4 u32assert dup.0 @@ -134,11 +107,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_sw - movup.10 - dup.0 - push.2147483648 - u32lte - assert + movup.11 dup.0 u32mod.16 dup.0 @@ -150,8 +119,6 @@ export.entrypoint exec.::intrinsics::mem::load_sw exec.::std::crypto::hashes::blake3::hash_1to1 dup.8 - movup.8 - swap.1 dup.0 u32mod.16 dup.0 @@ -162,10 +129,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.7 - add.8 + add.4 u32assert - movup.7 - swap.1 dup.0 u32mod.16 dup.0 @@ -176,10 +141,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.6 - add.16 + add.8 u32assert - movup.6 - swap.1 dup.0 u32mod.16 dup.0 @@ -190,10 +153,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.5 - add.24 + add.12 u32assert - movup.5 - swap.1 dup.0 u32mod.16 dup.0 @@ -204,10 +165,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.4 - add.32 + add.16 u32assert - movup.4 - swap.1 dup.0 u32mod.16 dup.0 @@ -218,10 +177,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.3 - add.40 + add.20 u32assert - movup.3 - swap.1 dup.0 u32mod.16 dup.0 @@ -232,10 +189,8 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::store_sw dup.2 - add.48 + add.24 u32assert - movup.2 - swap.1 dup.0 u32mod.16 dup.0 @@ -245,9 +200,14 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_sw - swap.1 - add.56 + dup.2 + add.24 + u32assert + movup.2 + add.28 u32assert + movup.2 + swap.1 dup.0 u32mod.16 dup.0 @@ -257,14 +217,10 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_sw - push.24 dup.1 - swap.1 - u32wrapping_add - dup.0 - push.2147483648 - u32lte - assert + add.16 + u32assert + dup.1 dup.0 u32mod.16 dup.0 @@ -275,14 +231,10 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::load_dw push.24 - dup.4 + dup.7 swap.1 u32wrapping_add dup.0 - push.2147483648 - u32lte - assert - dup.0 u32mod.16 dup.0 u32mod.4 @@ -291,14 +243,10 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_dw - push.16 + dup.2 + add.8 + u32assert dup.1 - swap.1 - u32wrapping_add - dup.0 - push.2147483648 - u32lte - assert dup.0 u32mod.16 dup.0 @@ -309,14 +257,10 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::load_dw push.16 - dup.4 + dup.8 swap.1 u32wrapping_add dup.0 - push.2147483648 - u32lte - assert - dup.0 u32mod.16 dup.0 u32mod.4 @@ -325,14 +269,8 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_dw - push.8 + movup.3 dup.1 - swap.1 - u32wrapping_add - dup.0 - push.2147483648 - u32lte - assert dup.0 u32mod.16 dup.0 @@ -343,14 +281,10 @@ export.entrypoint u32div.16 exec.::intrinsics::mem::load_dw push.8 - dup.4 + dup.8 swap.1 u32wrapping_add dup.0 - push.2147483648 - u32lte - assert - dup.0 u32mod.16 dup.0 u32mod.4 @@ -361,10 +295,6 @@ export.entrypoint exec.::intrinsics::mem::store_dw dup.0 dup.0 - push.2147483648 - u32lte - assert - dup.0 u32mod.16 dup.0 u32mod.4 @@ -373,11 +303,7 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::load_dw - movup.3 - dup.0 - push.2147483648 - u32lte - assert + movup.7 dup.0 u32mod.16 dup.0 @@ -387,8 +313,9 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_dw - push.32 - u32wrapping_add + push.1048576 + movup.5 + swap.1 dup.0 u32mod.16 dup.0 @@ -398,6 +325,14 @@ export.entrypoint movup.2 u32div.16 exec.::intrinsics::mem::store_sw + u32mod.8 + assertz.err=250 + u32mod.8 + assertz.err=250 + u32mod.8 + assertz.err=250 + u32mod.8 + assertz.err=250 end diff --git a/tests/integration/expected/abi_transform_stdlib_blake3_hash.wat b/tests/integration/expected/abi_transform_stdlib_blake3_hash.wat index 651e02779..03b47714c 100644 --- a/tests/integration/expected/abi_transform_stdlib_blake3_hash.wat +++ b/tests/integration/expected/abi_transform_stdlib_blake3_hash.wat @@ -3,11 +3,14 @@ (type (;1;) (func (param i32 i32))) (import "std::crypto::hashes::blake3" "hash_1to1<0x0000000000000000000000000000000000000000000000000000000000000000>" (func $miden_stdlib_sys::stdlib::crypto::hashes::extern_blake3_hash_1to1 (;0;) (type 0))) (func $entrypoint (;1;) (type 1) (param i32 i32) - (local i32) + (local i32 i32) global.get $__stack_pointer + local.tee 2 i32.const 32 i32.sub - local.tee 2 + i32.const -32 + i32.and + local.tee 3 global.set $__stack_pointer local.get 1 i32.load align=1 @@ -25,39 +28,31 @@ i32.load offset=24 align=1 local.get 1 i32.load offset=28 align=1 - local.get 2 + local.get 3 call $miden_stdlib_sys::stdlib::crypto::hashes::extern_blake3_hash_1to1 local.get 0 i32.const 24 i32.add - local.get 2 - i32.const 24 - i32.add - i64.load align=1 + local.get 3 + i64.load offset=24 i64.store align=1 local.get 0 i32.const 16 i32.add - local.get 2 - i32.const 16 - i32.add - i64.load align=1 + local.get 3 + i64.load offset=16 i64.store align=1 local.get 0 i32.const 8 i32.add - local.get 2 - i32.const 8 - i32.add - i64.load align=1 + local.get 3 + i64.load offset=8 i64.store align=1 local.get 0 - local.get 2 - i64.load align=1 + local.get 3 + i64.load i64.store align=1 local.get 2 - i32.const 32 - i32.add global.set $__stack_pointer ) (table (;0;) 1 1 funcref) diff --git a/tests/integration/src/rust_masm_tests/abi_transform/stdlib.rs b/tests/integration/src/rust_masm_tests/abi_transform/stdlib.rs index 149bf0658..8f87df273 100644 --- a/tests/integration/src/rust_masm_tests/abi_transform/stdlib.rs +++ b/tests/integration/src/rust_masm_tests/abi_transform/stdlib.rs @@ -6,6 +6,7 @@ use miden_core::utils::group_slice_elements; use miden_processor::AdviceInputs; use midenc_debug::{Executor, PopFromStack, PushToStack, TestFelt}; use midenc_hir::Felt; +use midenc_session::Emit; use proptest::{ arbitrary::any, prelude::TestCaseError, @@ -16,7 +17,6 @@ use proptest::{ use crate::CompilerTest; #[test] -#[ignore = "pending rodata fixes"] fn test_blake3_hash() { let main_fn = "(a: [u8; 32]) -> [u8; 32] { miden_stdlib_sys::blake3_hash_1to1(a) }".to_string(); @@ -31,37 +31,44 @@ fn test_blake3_hash() { test.expect_wasm(expect_file![format!("../../../expected/{artifact_name}.wat")]); test.expect_ir(expect_file![format!("../../../expected/{artifact_name}.hir")]); test.expect_masm(expect_file![format!("../../../expected/{artifact_name}.masm")]); + let package = test.compiled_package(); // Run the Rust and compiled MASM code against a bunch of random inputs and compare the results let res = TestRunner::default().run(&any::<[u8; 32]>(), move |ibytes| { let hash_bytes = blake3::hash(&ibytes); let rs_out = hash_bytes.as_bytes(); + let in_addr = 21u32 * 65536; + let out_addr = 20u32 * 65536; let mut frame = Vec::::default(); - PushToStack::try_push(&ibytes, &mut frame); // words + // Convert input bytes to words + let words = midenc_debug::bytes_to_words(&ibytes); + for word in words.into_iter().rev() { + PushToStack::try_push(&word, &mut frame); + } PushToStack::try_push(&2u32, &mut frame); // num_words - PushToStack::try_push(&0u32, &mut frame); // dest_ptr - //let rs_ofelts = group_slice_elements::(rs_out) - // .iter() - // .map(|&bytes| u32::from_le_bytes(bytes).into()) - // .collect::>(); - //let ifelts = group_slice_elements::(&ibytes) - // .iter() - // .map(|&bytes| u32::from_le_bytes(bytes).into()) - // .collect::>(); + PushToStack::try_push(&(in_addr / 16), &mut frame); // dest_ptr dbg!(&ibytes, &frame, rs_out); - // Arguments are: [hash_input_ptr, hash_output_ptr] + // Arguments are: [hash_output_ptr, hash_input_ptr] let mut exec = Executor::for_package( &package, - vec![Felt::new(0), Felt::new(128 * 1024)], + // Place the hash output at 20 * PAGE_SIZE, and the hash input at 21 * PAGE_SIZE + vec![Felt::new(in_addr as u64), Felt::new(out_addr as u64)], &test.session, ) .map_err(|err| TestCaseError::fail(err.to_string()))?; + // Reverse the stack contents, so that the correct order is preserved after + // MemAdviceProvider does its own reverse + frame.reverse(); let advice_inputs = AdviceInputs::default().with_stack(frame); exec.with_advice_inputs(advice_inputs); let trace = exec.execute(&package.unwrap_program(), &test.session); + let vm_in: [u8; 32] = trace + .read_from_rust_memory(in_addr) + .expect("expected memory to have been written"); + dbg!(&vm_in); let vm_out: [u8; 32] = trace - .read_from_rust_memory(128 * 1024) + .read_from_rust_memory(out_addr) .expect("expected memory to have been written"); dbg!(&vm_out); prop_assert_eq!(rs_out, &vm_out, "VM output mismatch"); From ba72ef64491eec0a09278b70e6c9dc7d58193d62 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Fri, 30 Aug 2024 02:04:19 -0400 Subject: [PATCH 18/18] chore: update expect output for sdk account/wallet tests --- .../miden_sdk_account_test.hir | 229 +++++++++++------- .../miden_sdk_account_test.wat | 35 ++- .../rust_sdk_basic_wallet.hir | 16 +- 3 files changed, 182 insertions(+), 98 deletions(-) diff --git a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir index f3be3b26f..72a2e81c9 100644 --- a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir +++ b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.hir @@ -271,62 +271,113 @@ (func (export #test_blake3_hash_1to1) (param i32) (param i32) (block 0 (param v0 i32) (param v1 i32) - (let (v2 u32) (bitcast v1)) - (let (v3 (ptr i32)) (inttoptr v2)) - (let (v4 i32) (load v3)) - (let (v5 u32) (bitcast v1)) - (let (v6 u32) (add.checked v5 4)) - (let (v7 (ptr i32)) (inttoptr v6)) - (let (v8 i32) (load v7)) + (let (v2 i32) (const.i32 0)) + (let (v3 i32) (global.load i32 (global.symbol #__stack_pointer))) + (let (v4 i32) (const.i32 32)) + (let (v5 i32) (sub.wrapping v3 v4)) + (let (v6 i32) (const.i32 -32)) + (let (v7 i32) (band v5 v6)) + (let (v8 (ptr i32)) (global.symbol #__stack_pointer)) + (store v8 v7) (let (v9 u32) (bitcast v1)) - (let (v10 u32) (add.checked v9 8)) - (let (v11 (ptr i32)) (inttoptr v10)) - (let (v12 i32) (load v11)) - (let (v13 u32) (bitcast v1)) - (let (v14 u32) (add.checked v13 12)) - (let (v15 (ptr i32)) (inttoptr v14)) - (let (v16 i32) (load v15)) - (let (v17 u32) (bitcast v1)) - (let (v18 u32) (add.checked v17 16)) - (let (v19 (ptr i32)) (inttoptr v18)) - (let (v20 i32) (load v19)) - (let (v21 u32) (bitcast v1)) - (let (v22 u32) (add.checked v21 20)) - (let (v23 (ptr i32)) (inttoptr v22)) - (let (v24 i32) (load v23)) - (let (v25 u32) (bitcast v1)) - (let (v26 u32) (add.checked v25 24)) - (let (v27 (ptr i32)) (inttoptr v26)) - (let (v28 i32) (load v27)) - (let (v29 u32) (bitcast v1)) - (let (v30 u32) (add.checked v29 28)) - (let (v31 (ptr i32)) (inttoptr v30)) - (let (v32 i32) (load v31)) - (let [(v33 i32) (v34 i32) (v35 i32) (v36 i32) (v37 i32) (v38 i32) (v39 i32) (v40 i32)] (call (#std::crypto::hashes::blake3 #hash_1to1) v4 v8 v12 v16 v20 v24 v28 v32)) - (let (v41 u32) (cast v0)) - (let (v42 (ptr i32)) (inttoptr v41)) - (store v42 v33) - (let (v43 u32) (add.checked v41 8)) - (let (v44 (ptr i32)) (inttoptr v43)) - (store v44 v34) - (let (v45 u32) (add.checked v41 16)) - (let (v46 (ptr i32)) (inttoptr v45)) - (store v46 v35) - (let (v47 u32) (add.checked v41 24)) - (let (v48 (ptr i32)) (inttoptr v47)) - (store v48 v36) - (let (v49 u32) (add.checked v41 32)) - (let (v50 (ptr i32)) (inttoptr v49)) - (store v50 v37) - (let (v51 u32) (add.checked v41 40)) - (let (v52 (ptr i32)) (inttoptr v51)) - (store v52 v38) - (let (v53 u32) (add.checked v41 48)) - (let (v54 (ptr i32)) (inttoptr v53)) - (store v54 v39) - (let (v55 u32) (add.checked v41 56)) - (let (v56 (ptr i32)) (inttoptr v55)) - (store v56 v40) + (let (v10 (ptr i32)) (inttoptr v9)) + (let (v11 i32) (load v10)) + (let (v12 u32) (bitcast v1)) + (let (v13 u32) (add.checked v12 4)) + (let (v14 (ptr i32)) (inttoptr v13)) + (let (v15 i32) (load v14)) + (let (v16 u32) (bitcast v1)) + (let (v17 u32) (add.checked v16 8)) + (let (v18 (ptr i32)) (inttoptr v17)) + (let (v19 i32) (load v18)) + (let (v20 u32) (bitcast v1)) + (let (v21 u32) (add.checked v20 12)) + (let (v22 (ptr i32)) (inttoptr v21)) + (let (v23 i32) (load v22)) + (let (v24 u32) (bitcast v1)) + (let (v25 u32) (add.checked v24 16)) + (let (v26 (ptr i32)) (inttoptr v25)) + (let (v27 i32) (load v26)) + (let (v28 u32) (bitcast v1)) + (let (v29 u32) (add.checked v28 20)) + (let (v30 (ptr i32)) (inttoptr v29)) + (let (v31 i32) (load v30)) + (let (v32 u32) (bitcast v1)) + (let (v33 u32) (add.checked v32 24)) + (let (v34 (ptr i32)) (inttoptr v33)) + (let (v35 i32) (load v34)) + (let (v36 u32) (bitcast v1)) + (let (v37 u32) (add.checked v36 28)) + (let (v38 (ptr i32)) (inttoptr v37)) + (let (v39 i32) (load v38)) + (let [(v40 i32) (v41 i32) (v42 i32) (v43 i32) (v44 i32) (v45 i32) (v46 i32) (v47 i32)] (call (#std::crypto::hashes::blake3 #hash_1to1) v11 v15 v19 v23 v27 v31 v35 v39)) + (let (v48 u32) (bitcast v7)) + (let (v49 (ptr i32)) (inttoptr v48)) + (store v49 v40) + (let (v50 u32) (add.checked v48 4)) + (let (v51 (ptr i32)) (inttoptr v50)) + (store v51 v41) + (let (v52 u32) (add.checked v48 8)) + (let (v53 (ptr i32)) (inttoptr v52)) + (store v53 v42) + (let (v54 u32) (add.checked v48 12)) + (let (v55 (ptr i32)) (inttoptr v54)) + (store v55 v43) + (let (v56 u32) (add.checked v48 16)) + (let (v57 (ptr i32)) (inttoptr v56)) + (store v57 v44) + (let (v58 u32) (add.checked v48 20)) + (let (v59 (ptr i32)) (inttoptr v58)) + (store v59 v45) + (let (v60 u32) (add.checked v48 24)) + (let (v61 (ptr i32)) (inttoptr v60)) + (store v61 v46) + (let (v62 u32) (add.checked v48 28)) + (let (v63 (ptr i32)) (inttoptr v62)) + (store v63 v47) + (let (v64 i32) (const.i32 24)) + (let (v65 i32) (add.wrapping v0 v64)) + (let (v66 u32) (bitcast v7)) + (let (v67 u32) (add.checked v66 24)) + (let (v68 u32) (mod.unchecked v67 8)) + (assertz 250 v68) + (let (v69 (ptr i64)) (inttoptr v67)) + (let (v70 i64) (load v69)) + (let (v71 u32) (bitcast v65)) + (let (v72 (ptr i64)) (inttoptr v71)) + (store v72 v70) + (let (v73 i32) (const.i32 16)) + (let (v74 i32) (add.wrapping v0 v73)) + (let (v75 u32) (bitcast v7)) + (let (v76 u32) (add.checked v75 16)) + (let (v77 u32) (mod.unchecked v76 8)) + (assertz 250 v77) + (let (v78 (ptr i64)) (inttoptr v76)) + (let (v79 i64) (load v78)) + (let (v80 u32) (bitcast v74)) + (let (v81 (ptr i64)) (inttoptr v80)) + (store v81 v79) + (let (v82 i32) (const.i32 8)) + (let (v83 i32) (add.wrapping v0 v82)) + (let (v84 u32) (bitcast v7)) + (let (v85 u32) (add.checked v84 8)) + (let (v86 u32) (mod.unchecked v85 8)) + (assertz 250 v86) + (let (v87 (ptr i64)) (inttoptr v85)) + (let (v88 i64) (load v87)) + (let (v89 u32) (bitcast v83)) + (let (v90 (ptr i64)) (inttoptr v89)) + (store v90 v88) + (let (v91 u32) (bitcast v7)) + (let (v92 u32) (mod.unchecked v91 8)) + (assertz 250 v92) + (let (v93 (ptr i64)) (inttoptr v91)) + (let (v94 i64) (load v93)) + (let (v95 u32) (bitcast v0)) + (let (v96 (ptr i64)) (inttoptr v95)) + (store v96 v94) + (let (v97 (ptr i32)) (global.symbol #__stack_pointer)) + (store v97 v3) (br (block 1))) (block 1 @@ -399,28 +450,28 @@ (let (v63 (ptr i32)) (inttoptr v62)) (let (v64 i32) (load v63)) (let [(v65 i32) (v66 i32) (v67 i32) (v68 i32) (v69 i32) (v70 i32) (v71 i32) (v72 i32)] (call (#std::crypto::hashes::blake3 #hash_2to1) v4 v8 v12 v16 v20 v24 v28 v32 v36 v40 v44 v48 v52 v56 v60 v64)) - (let (v73 u32) (cast v0)) + (let (v73 u32) (bitcast v0)) (let (v74 (ptr i32)) (inttoptr v73)) (store v74 v65) - (let (v75 u32) (add.checked v73 8)) + (let (v75 u32) (add.checked v73 4)) (let (v76 (ptr i32)) (inttoptr v75)) (store v76 v66) - (let (v77 u32) (add.checked v73 16)) + (let (v77 u32) (add.checked v73 8)) (let (v78 (ptr i32)) (inttoptr v77)) (store v78 v67) - (let (v79 u32) (add.checked v73 24)) + (let (v79 u32) (add.checked v73 12)) (let (v80 (ptr i32)) (inttoptr v79)) (store v80 v68) - (let (v81 u32) (add.checked v73 32)) + (let (v81 u32) (add.checked v73 16)) (let (v82 (ptr i32)) (inttoptr v81)) (store v82 v69) - (let (v83 u32) (add.checked v73 40)) + (let (v83 u32) (add.checked v73 20)) (let (v84 (ptr i32)) (inttoptr v83)) (store v84 v70) - (let (v85 u32) (add.checked v73 48)) + (let (v85 u32) (add.checked v73 24)) (let (v86 (ptr i32)) (inttoptr v85)) (store v86 v71) - (let (v87 u32) (add.checked v73 56)) + (let (v87 u32) (add.checked v73 28)) (let (v88 (ptr i32)) (inttoptr v87)) (store v88 v72) (br (block 1))) @@ -779,16 +830,16 @@ (let (v19 (ptr felt)) (inttoptr v17)) (let (v20 felt) (load v19)) (let [(v21 felt) (v22 felt) (v23 felt) (v24 felt)] (call (#miden::account #add_asset) v5 v10 v15 v20)) - (let (v25 u32) (cast v0)) + (let (v25 u32) (bitcast v0)) (let (v26 (ptr felt)) (inttoptr v25)) (store v26 v21) - (let (v27 u32) (add.checked v25 8)) + (let (v27 u32) (add.checked v25 4)) (let (v28 (ptr felt)) (inttoptr v27)) (store v28 v22) - (let (v29 u32) (add.checked v25 16)) + (let (v29 u32) (add.checked v25 8)) (let (v30 (ptr felt)) (inttoptr v29)) (store v30 v23) - (let (v31 u32) (add.checked v25 24)) + (let (v31 u32) (add.checked v25 12)) (let (v32 (ptr felt)) (inttoptr v31)) (store v32 v24) (br (block 1))) @@ -824,16 +875,16 @@ (let (v19 (ptr felt)) (inttoptr v17)) (let (v20 felt) (load v19)) (let [(v21 felt) (v22 felt) (v23 felt) (v24 felt)] (call (#miden::account #remove_asset) v5 v10 v15 v20)) - (let (v25 u32) (cast v0)) + (let (v25 u32) (bitcast v0)) (let (v26 (ptr felt)) (inttoptr v25)) (store v26 v21) - (let (v27 u32) (add.checked v25 8)) + (let (v27 u32) (add.checked v25 4)) (let (v28 (ptr felt)) (inttoptr v27)) (store v28 v22) - (let (v29 u32) (add.checked v25 16)) + (let (v29 u32) (add.checked v25 8)) (let (v30 (ptr felt)) (inttoptr v29)) (store v30 v23) - (let (v31 u32) (add.checked v25 24)) + (let (v31 u32) (add.checked v25 12)) (let (v32 (ptr felt)) (inttoptr v31)) (store v32 v24) (br (block 1))) @@ -1123,19 +1174,19 @@ (let (v21 i32) (const.i32 32)) (let (v22 i32) (add.wrapping v7 v21)) (let [(v23 felt) (v24 felt) (v25 felt) (v26 felt) (v27 i32)] (call (#std::mem #pipe_words_to_memory) v1 v20)) - (let (v28 u32) (cast v22)) + (let (v28 u32) (bitcast v22)) (let (v29 (ptr felt)) (inttoptr v28)) (store v29 v23) - (let (v30 u32) (add.checked v28 8)) + (let (v30 u32) (add.checked v28 4)) (let (v31 (ptr felt)) (inttoptr v30)) (store v31 v24) - (let (v32 u32) (add.checked v28 16)) + (let (v32 u32) (add.checked v28 8)) (let (v33 (ptr felt)) (inttoptr v32)) (store v33 v25) - (let (v34 u32) (add.checked v28 24)) + (let (v34 u32) (add.checked v28 12)) (let (v35 (ptr felt)) (inttoptr v34)) (store v35 v26) - (let (v36 u32) (add.checked v28 32)) + (let (v36 u32) (add.checked v28 16)) (let (v37 (ptr i32)) (inttoptr v36)) (store v37 v27) (let (v38 i32) (const.i32 24)) @@ -1258,43 +1309,43 @@ (let (v27 i32) (const.i32 32)) (let (v28 i32) (add.wrapping v7 v27)) (let [(v29 felt) (v30 felt) (v31 felt) (v32 felt) (v33 felt) (v34 felt) (v35 felt) (v36 felt) (v37 felt) (v38 felt) (v39 felt) (v40 felt) (v41 i32)] (call (#std::mem #pipe_double_words_to_memory) v22 v22 v22 v22 v22 v22 v22 v22 v22 v22 v22 v22 v20 v26)) - (let (v42 u32) (cast v28)) + (let (v42 u32) (bitcast v28)) (let (v43 (ptr felt)) (inttoptr v42)) (store v43 v29) - (let (v44 u32) (add.checked v42 8)) + (let (v44 u32) (add.checked v42 4)) (let (v45 (ptr felt)) (inttoptr v44)) (store v45 v30) - (let (v46 u32) (add.checked v42 16)) + (let (v46 u32) (add.checked v42 8)) (let (v47 (ptr felt)) (inttoptr v46)) (store v47 v31) - (let (v48 u32) (add.checked v42 24)) + (let (v48 u32) (add.checked v42 12)) (let (v49 (ptr felt)) (inttoptr v48)) (store v49 v32) - (let (v50 u32) (add.checked v42 32)) + (let (v50 u32) (add.checked v42 16)) (let (v51 (ptr felt)) (inttoptr v50)) (store v51 v33) - (let (v52 u32) (add.checked v42 40)) + (let (v52 u32) (add.checked v42 20)) (let (v53 (ptr felt)) (inttoptr v52)) (store v53 v34) - (let (v54 u32) (add.checked v42 48)) + (let (v54 u32) (add.checked v42 24)) (let (v55 (ptr felt)) (inttoptr v54)) (store v55 v35) - (let (v56 u32) (add.checked v42 56)) + (let (v56 u32) (add.checked v42 28)) (let (v57 (ptr felt)) (inttoptr v56)) (store v57 v36) - (let (v58 u32) (add.checked v42 64)) + (let (v58 u32) (add.checked v42 32)) (let (v59 (ptr felt)) (inttoptr v58)) (store v59 v37) - (let (v60 u32) (add.checked v42 72)) + (let (v60 u32) (add.checked v42 36)) (let (v61 (ptr felt)) (inttoptr v60)) (store v61 v38) - (let (v62 u32) (add.checked v42 80)) + (let (v62 u32) (add.checked v42 40)) (let (v63 (ptr felt)) (inttoptr v62)) (store v63 v39) - (let (v64 u32) (add.checked v42 88)) + (let (v64 u32) (add.checked v42 44)) (let (v65 (ptr felt)) (inttoptr v64)) (store v65 v40) - (let (v66 u32) (add.checked v42 96)) + (let (v66 u32) (add.checked v42 48)) (let (v67 (ptr i32)) (inttoptr v66)) (store v67 v41) (let (v68 i32) (const.i32 24)) diff --git a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat index 5c8a428e1..5c4540ee7 100644 --- a/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat +++ b/tests/integration/expected/rust_sdk_account_test/miden_sdk_account_test.wat @@ -264,6 +264,15 @@ end ) (func $test_blake3_hash_1to1 (;35;) (type 19) (param i32 i32) + (local i32 i32) + global.get $__stack_pointer + local.tee 2 + i32.const 32 + i32.sub + i32.const -32 + i32.and + local.tee 3 + global.set $__stack_pointer local.get 1 i32.load align=1 local.get 1 @@ -280,8 +289,32 @@ i32.load offset=24 align=1 local.get 1 i32.load offset=28 align=1 - local.get 0 + local.get 3 call $miden_stdlib_sys::stdlib::crypto::hashes::extern_blake3_hash_1to1 + local.get 0 + i32.const 24 + i32.add + local.get 3 + i64.load offset=24 + i64.store align=1 + local.get 0 + i32.const 16 + i32.add + local.get 3 + i64.load offset=16 + i64.store align=1 + local.get 0 + i32.const 8 + i32.add + local.get 3 + i64.load offset=8 + i64.store align=1 + local.get 0 + local.get 3 + i64.load + i64.store align=1 + local.get 2 + global.set $__stack_pointer ) (func $test_blake3_hash_2to1 (;36;) (type 19) (param i32 i32) local.get 1 diff --git a/tests/integration/expected/rust_sdk_basic_wallet/rust_sdk_basic_wallet.hir b/tests/integration/expected/rust_sdk_basic_wallet/rust_sdk_basic_wallet.hir index 808dd763d..cd5e29cac 100644 --- a/tests/integration/expected/rust_sdk_basic_wallet/rust_sdk_basic_wallet.hir +++ b/tests/integration/expected/rust_sdk_basic_wallet/rust_sdk_basic_wallet.hir @@ -82,16 +82,16 @@ (let (v19 (ptr felt)) (inttoptr v17)) (let (v20 felt) (load v19)) (let [(v21 felt) (v22 felt) (v23 felt) (v24 felt)] (call (#miden::account #add_asset) v5 v10 v15 v20)) - (let (v25 u32) (cast v0)) + (let (v25 u32) (bitcast v0)) (let (v26 (ptr felt)) (inttoptr v25)) (store v26 v21) - (let (v27 u32) (add.checked v25 8)) + (let (v27 u32) (add.checked v25 4)) (let (v28 (ptr felt)) (inttoptr v27)) (store v28 v22) - (let (v29 u32) (add.checked v25 16)) + (let (v29 u32) (add.checked v25 8)) (let (v30 (ptr felt)) (inttoptr v29)) (store v30 v23) - (let (v31 u32) (add.checked v25 24)) + (let (v31 u32) (add.checked v25 12)) (let (v32 (ptr felt)) (inttoptr v31)) (store v32 v24) (br (block 1))) @@ -127,16 +127,16 @@ (let (v19 (ptr felt)) (inttoptr v17)) (let (v20 felt) (load v19)) (let [(v21 felt) (v22 felt) (v23 felt) (v24 felt)] (call (#miden::account #remove_asset) v5 v10 v15 v20)) - (let (v25 u32) (cast v0)) + (let (v25 u32) (bitcast v0)) (let (v26 (ptr felt)) (inttoptr v25)) (store v26 v21) - (let (v27 u32) (add.checked v25 8)) + (let (v27 u32) (add.checked v25 4)) (let (v28 (ptr felt)) (inttoptr v27)) (store v28 v22) - (let (v29 u32) (add.checked v25 16)) + (let (v29 u32) (add.checked v25 8)) (let (v30 (ptr felt)) (inttoptr v29)) (store v30 v23) - (let (v31 u32) (add.checked v25 24)) + (let (v31 u32) (add.checked v25 12)) (let (v32 (ptr felt)) (inttoptr v31)) (store v32 v24) (br (block 1)))