Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support the immediate values for u32 bitwise instructions #1362

Merged
merged 7 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Added support for the `nop` instruction, which corresponds to the VM opcode of the same name, and has the same semantics. This is implemented for use by compilers primarily.
- Added support for the `if.false` instruction, which can be used in the same manner as `if.true`
- Relaxed the parser to allow one branch of an `if.(true|false)` to be empty
- Added support for immediate values for `u32and`, `u32or`, `u32xor` and `u32not` bitwise instructions (#1362).

#### Changed

Expand Down
27 changes: 27 additions & 0 deletions assembly/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,25 @@ impl fmt::Display for HexErrorKind {
}
}

// BINARY ERROR KIND
// ================================================================================================

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BinErrorKind {
/// Occurs when the bin-encoded value is > 32 digits
TooLong,
}

impl fmt::Display for BinErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::TooLong => f.write_str(
"value has too many digits, binary string can contain no more than 32 digits",
),
}
}
}

// PARSING ERROR
// ================================================================================================

Expand Down Expand Up @@ -162,6 +181,13 @@ pub enum ParsingError {
span: SourceSpan,
kind: HexErrorKind,
},
#[error("invalid literal: {}", kind)]
#[diagnostic()]
InvalidBinaryLiteral {
#[label]
span: SourceSpan,
kind: BinErrorKind,
},
#[error("invalid MAST root literal")]
InvalidMastRoot {
#[label]
Expand Down Expand Up @@ -335,6 +361,7 @@ fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
"quoted_ident" => return Some("quoted identifier".to_string()),
"doc_comment" => return Some("doc comment".to_string()),
"hex_value" => return Some("hex-encoded literal".to_string()),
"bin_value" => return Some("bin-encoded literal".to_string()),
"uint" => return Some("integer literal".to_string()),
"EOF" => return Some("end of file".to_string()),
other => other[1..].strip_suffix('"').and_then(|t| Token::parse(t).ok()),
Expand Down
66 changes: 61 additions & 5 deletions assembly/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use vm_core::{Felt, FieldElement, StarkField, crypto::hash::RpoDigest};

use crate::{LibraryPath, LibraryNamespace, ast::*, diagnostics::SourceFile, SourceSpan};
use super::{
HexEncodedValue, Token, ParseError, ParsingError,
BinEncodedValue, HexEncodedValue, Token, ParseError, ParsingError,
LiteralErrorKind, HexErrorKind, Span, Spanned,
DocumentationType
};
Expand All @@ -35,6 +35,7 @@ extern {
const_ident => Token::ConstantIdent(<&'input str>),
quoted_ident => Token::QuotedIdent(<&'input str>),
hex_value => Token::HexValue(<HexEncodedValue>),
bin_value => Token::BinValue(<BinEncodedValue>),
doc_comment => Token::DocComment(<DocumentationType>),
comment => Token::Comment,
uint => Token::Int(<u64>),
Expand Down Expand Up @@ -480,10 +481,7 @@ Inst: Instruction = {
"rcomb_base" => Instruction::RCombBase,
"sdepth" => Instruction::Sdepth,
"swapdw" => Instruction::SwapDw,
"u32and" => Instruction::U32And,
"u32cast" => Instruction::U32Cast,
"u32not" => Instruction::U32Not,
"u32or" => Instruction::U32Or,
"u32overflowing_add3" => Instruction::U32OverflowingAdd3,
"u32overflowing_madd" => Instruction::U32OverflowingMadd,
"u32popcnt" => Instruction::U32Popcnt,
Expand All @@ -496,7 +494,6 @@ Inst: Instruction = {
"u32testw" => Instruction::U32TestW,
"u32wrapping_add3" => Instruction::U32WrappingAdd3,
"u32wrapping_madd" => Instruction::U32WrappingMadd,
"u32xor" => Instruction::U32Xor,
"xor" => Instruction::Xor,
}

Expand Down Expand Up @@ -715,6 +712,57 @@ FoldableInstWithU32Immediate: SmallOpsVec = {
None => Ok(smallvec![Op::Inst(Span::new(span, Instruction::U32Mod))]),
}
},
<l:@L> "u32and" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![Op::Inst(Span::new(span, Instruction::Drop)), Op::Inst(Span::new(span, Instruction::PushU8(0)))],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32And))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32And))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32And))],
}
},
<l:@L> "u32or" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Or))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Or))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Or))],
}
},
<l:@L> "u32xor" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Xor))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Xor))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Xor))],
}
},
<l:@L> "u32not" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Not))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Not))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Not))],
}
},
<l:@L> "u32wrapping_add" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Expand Down Expand Up @@ -1083,6 +1131,14 @@ U32: u32 = {
HexEncodedValue::U32(v) => Ok(v),
_ => Err(ParseError::User { error: ParsingError::InvalidLiteral { span: span!(l, r), kind: LiteralErrorKind::U32Overflow } }),
}
},

<l:@L> <value:bin_value> <r:@R> =>? {
match value {
BinEncodedValue::U8(v) => Ok(v as u32),
BinEncodedValue::U16(v) => Ok(v as u32),
BinEncodedValue::U32(v) => Ok(v),
}
}
}

Expand Down
70 changes: 66 additions & 4 deletions assembly/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use alloc::string::String;
use core::{num::IntErrorKind, ops::Range};

use super::{
DocumentationType, HexEncodedValue, HexErrorKind, LiteralErrorKind, ParsingError, Scanner,
SourceSpan, Token,
BinEncodedValue, BinErrorKind, DocumentationType, HexEncodedValue, HexErrorKind,
LiteralErrorKind, ParsingError, Scanner, SourceSpan, Token,
};

/// The value produced by the [Lexer] when iterated
Expand Down Expand Up @@ -293,6 +293,11 @@ impl<'input> Lexer<'input> {
self.skip();
self.lex_hex()
}
'b' => {
self.skip();
self.skip();
self.lex_bin()
}
'0'..='9' => self.lex_number(),
_ => pop!(self, Token::Int(0)),
},
Expand Down Expand Up @@ -524,6 +529,28 @@ impl<'input> Lexer<'input> {
let value = parse_hex(span, self.slice_span(digit_start..end))?;
Ok(Token::HexValue(value))
}

fn lex_bin(&mut self) -> Result<Token<'input>, ParsingError> {
// Expect the first character to be a valid binary digit
debug_assert!(is_ascii_binary(self.read()));

loop {
// If we hit a non-binary digit, we're done
let c1 = self.read();
if !is_ascii_binary(c1) {
break;
}
self.skip();
}

let span = self.span();
let start = span.start() as u32;
let digit_start = start + 2;
let end = span.end() as u32;
let span = SourceSpan::from(start..end);
let value = parse_bin(span, self.slice_span(digit_start..end))?;
Ok(Token::BinValue(value))
}
}

impl<'input> Iterator for Lexer<'input> {
Expand Down Expand Up @@ -561,7 +588,7 @@ fn parse_hex(span: SourceSpan, hex_digits: &str) -> Result<HexEncodedValue, Pars
kind: LiteralErrorKind::FeltOverflow,
});
}
Ok(shrink_u64(value))
Ok(shrink_u64_hex(value))
}
// Word
64 => {
Expand Down Expand Up @@ -609,8 +636,32 @@ fn parse_hex(span: SourceSpan, hex_digits: &str) -> Result<HexEncodedValue, Pars
}
}

fn parse_bin(span: SourceSpan, bin_digits: &str) -> Result<BinEncodedValue, ParsingError> {
if bin_digits.len() <= 32 {
let value =
u32::from_str_radix(bin_digits, 2).map_err(|error| ParsingError::InvalidLiteral {
span,
kind: int_error_kind_to_literal_error_kind(
error.kind(),
LiteralErrorKind::FeltOverflow,
bobbinth marked this conversation as resolved.
Show resolved Hide resolved
),
})?;
Ok(shrink_u32_bin(value))
} else {
Err(ParsingError::InvalidBinaryLiteral {
span,
kind: BinErrorKind::TooLong,
})
}
}

#[inline]
fn shrink_u64(n: u64) -> HexEncodedValue {
fn is_ascii_binary(c: char) -> bool {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggestion: Mark this #[inline(always)] to ensure that it gets inlined, otherwise it goes from a very cheap comparison operation to a quite expensive function call if it isn't inlined. While Rust may inline functions like this automatically, it is based on a heuristic, and is sensitive to what is happening in the caller, so in cases where you know it is important, it's always worth providing the inline annotation

matches!(c, '0'..='1')
}

#[inline]
fn shrink_u64_hex(n: u64) -> HexEncodedValue {
if n <= (u8::MAX as u64) {
HexEncodedValue::U8(n as u8)
} else if n <= (u16::MAX as u64) {
Expand All @@ -622,6 +673,17 @@ fn shrink_u64(n: u64) -> HexEncodedValue {
}
}

#[inline]
fn shrink_u32_bin(n: u32) -> BinEncodedValue {
if n <= (u8::MAX as u32) {
BinEncodedValue::U8(n as u8)
} else if n <= (u16::MAX as u32) {
BinEncodedValue::U16(n as u16)
} else {
BinEncodedValue::U32(n)
}
}

#[inline]
fn int_error_kind_to_literal_error_kind(
kind: &IntErrorKind,
Expand Down
4 changes: 2 additions & 2 deletions assembly/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ mod scanner;
mod span;
mod token;

pub use self::error::{HexErrorKind, LiteralErrorKind, ParsingError};
pub use self::error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError};
pub use self::lexer::Lexer;
pub use self::location::SourceLocation;
pub use self::scanner::Scanner;
pub use self::span::{SourceSpan, Span, Spanned};
pub use self::token::{DocumentationType, HexEncodedValue, Token};
pub use self::token::{BinEncodedValue, DocumentationType, HexEncodedValue, Token};

use crate::{
ast,
Expand Down
18 changes: 18 additions & 0 deletions assembly/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ pub enum HexEncodedValue {
Word([Felt; 4]),
}

// BINARY ENCODED VALUE
// ================================================================================================

/// Represents one of the various types of values that have a hex-encoded representation in Miden
/// Assembly source files.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BinEncodedValue {
/// A tiny value
U8(u8),
/// A small value
U16(u16),
/// A u32 constant, typically represents a memory address
U32(u32),
}

// TOKEN
// ================================================================================================

Expand Down Expand Up @@ -224,6 +239,7 @@ pub enum Token<'input> {
Rstab,
DocComment(DocumentationType),
HexValue(HexEncodedValue),
BinValue(BinEncodedValue),
Int(u64),
Ident(&'input str),
ConstantIdent(&'input str),
Expand Down Expand Up @@ -403,6 +419,7 @@ impl<'input> fmt::Display for Token<'input> {
Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
Token::HexValue(_) => f.write_str("hex-encoded value"),
Token::BinValue(_) => f.write_str("bin-encoded value"),
Token::Int(_) => f.write_str("integer"),
Token::Ident(_) => f.write_str("identifier"),
Token::ConstantIdent(_) => f.write_str("constant identifier"),
Expand Down Expand Up @@ -804,6 +821,7 @@ impl<'input> Token<'input> {
"doc comment" => Ok(Token::DocComment(DocumentationType::Form(String::new()))),
"comment" => Ok(Token::Comment),
"hex-encoded value" => Ok(Token::HexValue(HexEncodedValue::U8(0))),
"bin-encoded value" => Ok(Token::BinValue(BinEncodedValue::U8(0))),
"integer" => Ok(Token::Int(0)),
"identifier" => Ok(Token::Ident("")),
"constant identifier" => Ok(Token::ConstantIdent("")),
Expand Down
8 changes: 4 additions & 4 deletions docs/src/user_docs/assembly/u32_operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ If the error code is omitted, the default value of $0$ is assumed.

| Instruction | Stack input | Stack output | Notes |
| ------------------------------------------------------------------------------------- | -------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------ |
| u32and <br> - *(1 cycle)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32or <br> - *(6 cycle)s* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32xor <br> - *(1 cycle)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32not <br> - *(5 cycles)* | [a, ...] | [b, ...] | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$ |
| u32and <br> - *(1 cycle)* <br> u32and.*b* <br> - *(2 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32or <br> - *(6 cycle)s* <br> u32or.*b* <br> - *(7 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32xor <br> - *(1 cycle)* <br> u32xor.*b* <br> - *(2 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32not <br> - *(5 cycles)* <br> u32not.*a* <br> - *(6 cycles)* | [a, ...] | [b, ...] | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$ |
| u32shl <br> - *(18 cycles)* <br> u32shl.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | $c \leftarrow (a \cdot 2^b) \mod 2^{32}$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
| u32shr <br> - *(18 cycles)* <br> u32shr.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | $c \leftarrow \lfloor a/2^b \rfloor$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
| u32rotl <br> - *(18 cycles)* <br> u32rotl.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ by rotating a 32-bit representation of $a$ to the left by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
Expand Down
6 changes: 3 additions & 3 deletions miden/src/examples/fibonacci.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use super::{Example, ONE, ZERO};
use miden_vm::{
math::Felt, Assembler, DefaultHost, MemAdviceProvider, Program, ProvingOptions, StackInputs,
};
use miden_vm::{math::Felt, Assembler, DefaultHost, MemAdviceProvider, Program, StackInputs};

// EXAMPLE BUILDER
// ================================================================================================
Expand Down Expand Up @@ -73,6 +71,8 @@ fn test_fib_example_fail() {

#[test]
fn test_fib_example_rpo() {
use miden_vm::ProvingOptions;

let example = get_example(16);
super::test_example_with_options(example, false, ProvingOptions::with_96_bit_security(true));
}
2 changes: 1 addition & 1 deletion miden/src/repl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use assembly::{Assembler, Library, MaslLibrary};
use miden_vm::{math::Felt, DefaultHost, Program, StackInputs, Word};
use miden_vm::{math::Felt, DefaultHost, StackInputs, Word};
use processor::ContextId;
use rustyline::{error::ReadlineError, DefaultEditor};
use std::{collections::BTreeSet, path::PathBuf};
Expand Down
2 changes: 1 addition & 1 deletion miden/src/tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::cli::InputFile;
use assembly::diagnostics::{IntoDiagnostic, Report, WrapErr};
use clap::Parser;
use core::fmt;
use miden_vm::{Assembler, DefaultHost, Host, Operation, Program, StackInputs};
use miden_vm::{Assembler, DefaultHost, Host, Operation, StackInputs};
use processor::{AsmOpInfo, TraceLenSummary};
use std::{fs, path::PathBuf};
use stdlib::StdLibrary;
Expand Down
Loading
Loading