Skip to content

Commit

Permalink
Support the immediate values for u32 bitwise instructions (#1362)
Browse files Browse the repository at this point in the history
* feat: implement immediate values for u32 binary instructions,
implement binary parser for lexer

* test: add for u32 binary imms

* chore: fix cargo check

* refactor: emulate immediate value on the parsing stage

* refactor: remove binary encoding for Felt, update CHANGELOG and docs

* fix: fix bug in lex_bin, add inlining for is_ascii_binary

* refactor: fix inlining, fix the bug in error kind
  • Loading branch information
Fumuran authored Jul 2, 2024
1 parent 2da11ad commit ecd26a2
Show file tree
Hide file tree
Showing 11 changed files with 304 additions and 20 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Added support for the `nop` instruction, which corresponds to the VM opcode of the same name, and has the same semantics. This is implemented for use by compilers primarily.
- Added support for the `if.false` instruction, which can be used in the same manner as `if.true`
- Relaxed the parser to allow one branch of an `if.(true|false)` to be empty
- Added support for immediate values for `u32and`, `u32or`, `u32xor` and `u32not` bitwise instructions (#1362).

#### Changed

Expand Down
27 changes: 27 additions & 0 deletions assembly/src/parser/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,25 @@ impl fmt::Display for HexErrorKind {
}
}

// BINARY ERROR KIND
// ================================================================================================

#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BinErrorKind {
/// Occurs when the bin-encoded value is > 32 digits
TooLong,
}

impl fmt::Display for BinErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Self::TooLong => f.write_str(
"value has too many digits, binary string can contain no more than 32 digits",
),
}
}
}

// PARSING ERROR
// ================================================================================================

Expand Down Expand Up @@ -162,6 +181,13 @@ pub enum ParsingError {
span: SourceSpan,
kind: HexErrorKind,
},
#[error("invalid literal: {}", kind)]
#[diagnostic()]
InvalidBinaryLiteral {
#[label]
span: SourceSpan,
kind: BinErrorKind,
},
#[error("invalid MAST root literal")]
InvalidMastRoot {
#[label]
Expand Down Expand Up @@ -340,6 +366,7 @@ fn simplify_expected_tokens(expected: Vec<String>) -> Vec<String> {
"quoted_ident" => return Some("quoted identifier".to_string()),
"doc_comment" => return Some("doc comment".to_string()),
"hex_value" => return Some("hex-encoded literal".to_string()),
"bin_value" => return Some("bin-encoded literal".to_string()),
"uint" => return Some("integer literal".to_string()),
"EOF" => return Some("end of file".to_string()),
other => other[1..].strip_suffix('"').and_then(|t| Token::parse(t).ok()),
Expand Down
66 changes: 61 additions & 5 deletions assembly/src/parser/grammar.lalrpop
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ use vm_core::{Felt, FieldElement, StarkField, crypto::hash::RpoDigest};

use crate::{LibraryPath, LibraryNamespace, ast::*, diagnostics::SourceFile, SourceSpan};
use super::{
HexEncodedValue, Token, ParseError, ParsingError,
BinEncodedValue, HexEncodedValue, Token, ParseError, ParsingError,
LiteralErrorKind, HexErrorKind, Span, Spanned,
DocumentationType
};
Expand All @@ -35,6 +35,7 @@ extern {
const_ident => Token::ConstantIdent(<&'input str>),
quoted_ident => Token::QuotedIdent(<&'input str>),
hex_value => Token::HexValue(<HexEncodedValue>),
bin_value => Token::BinValue(<BinEncodedValue>),
doc_comment => Token::DocComment(<DocumentationType>),
comment => Token::Comment,
uint => Token::Int(<u64>),
Expand Down Expand Up @@ -509,10 +510,7 @@ Inst: Instruction = {
"rcomb_base" => Instruction::RCombBase,
"sdepth" => Instruction::Sdepth,
"swapdw" => Instruction::SwapDw,
"u32and" => Instruction::U32And,
"u32cast" => Instruction::U32Cast,
"u32not" => Instruction::U32Not,
"u32or" => Instruction::U32Or,
"u32overflowing_add3" => Instruction::U32OverflowingAdd3,
"u32overflowing_madd" => Instruction::U32OverflowingMadd,
"u32popcnt" => Instruction::U32Popcnt,
Expand All @@ -525,7 +523,6 @@ Inst: Instruction = {
"u32testw" => Instruction::U32TestW,
"u32wrapping_add3" => Instruction::U32WrappingAdd3,
"u32wrapping_madd" => Instruction::U32WrappingMadd,
"u32xor" => Instruction::U32Xor,
"xor" => Instruction::Xor,
}

Expand Down Expand Up @@ -718,6 +715,57 @@ FoldableInstWithU32Immediate: SmallOpsVec = {
None => Ok(smallvec![Op::Inst(Span::new(span, Instruction::U32Mod))]),
}
},
<l:@L> "u32and" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![Op::Inst(Span::new(span, Instruction::Drop)), Op::Inst(Span::new(span, Instruction::PushU8(0)))],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32And))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32And))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32And))],
}
},
<l:@L> "u32or" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Or))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Or))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Or))],
}
},
<l:@L> "u32xor" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) if imm == 0 => smallvec![],
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Xor))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Xor))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Xor))],
}
},
<l:@L> "u32not" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Some(imm) => {
match imm {
Immediate::Constant(name) => smallvec![Op::Inst(Span::new(span, Instruction::Push(Immediate::Constant(name)))), Op::Inst(Span::new(span, Instruction::U32Not))],
Immediate::Value(value) => smallvec![Op::Inst(Span::new(span, Instruction::PushU32(value.into_inner()))), Op::Inst(Span::new(span, Instruction::U32Not))],
}
}
None => smallvec![Op::Inst(Span::new(span, Instruction::U32Not))],
}
},
<l:@L> "u32wrapping_add" <imm:MaybeImm<U32>> <r:@R> => {
let span = span!(l, r);
match imm {
Expand Down Expand Up @@ -1086,6 +1134,14 @@ U32: u32 = {
HexEncodedValue::U32(v) => Ok(v),
_ => Err(ParseError::User { error: ParsingError::InvalidLiteral { span: span!(l, r), kind: LiteralErrorKind::U32Overflow } }),
}
},

<l:@L> <value:bin_value> <r:@R> =>? {
match value {
BinEncodedValue::U8(v) => Ok(v as u32),
BinEncodedValue::U16(v) => Ok(v as u32),
BinEncodedValue::U32(v) => Ok(v),
}
}
}

Expand Down
70 changes: 66 additions & 4 deletions assembly/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ use alloc::string::String;
use core::{num::IntErrorKind, ops::Range};

use super::{
DocumentationType, HexEncodedValue, HexErrorKind, LiteralErrorKind, ParsingError, Scanner,
SourceSpan, Token,
BinEncodedValue, BinErrorKind, DocumentationType, HexEncodedValue, HexErrorKind,
LiteralErrorKind, ParsingError, Scanner, SourceSpan, Token,
};

/// The value produced by the [Lexer] when iterated
Expand Down Expand Up @@ -293,6 +293,11 @@ impl<'input> Lexer<'input> {
self.skip();
self.lex_hex()
}
'b' => {
self.skip();
self.skip();
self.lex_bin()
}
'0'..='9' => self.lex_number(),
_ => pop!(self, Token::Int(0)),
},
Expand Down Expand Up @@ -524,6 +529,28 @@ impl<'input> Lexer<'input> {
let value = parse_hex(span, self.slice_span(digit_start..end))?;
Ok(Token::HexValue(value))
}

fn lex_bin(&mut self) -> Result<Token<'input>, ParsingError> {
// Expect the first character to be a valid binary digit
debug_assert!(is_ascii_binary(self.read()));

loop {
// If we hit a non-binary digit, we're done
let c1 = self.read();
if !is_ascii_binary(c1) {
break;
}
self.skip();
}

let span = self.span();
let start = span.start() as u32;
let digit_start = start + 2;
let end = span.end() as u32;
let span = SourceSpan::from(start..end);
let value = parse_bin(span, self.slice_span(digit_start..end))?;
Ok(Token::BinValue(value))
}
}

impl<'input> Iterator for Lexer<'input> {
Expand Down Expand Up @@ -561,7 +588,7 @@ fn parse_hex(span: SourceSpan, hex_digits: &str) -> Result<HexEncodedValue, Pars
kind: LiteralErrorKind::FeltOverflow,
});
}
Ok(shrink_u64(value))
Ok(shrink_u64_hex(value))
}
// Word
64 => {
Expand Down Expand Up @@ -609,8 +636,32 @@ fn parse_hex(span: SourceSpan, hex_digits: &str) -> Result<HexEncodedValue, Pars
}
}

fn parse_bin(span: SourceSpan, bin_digits: &str) -> Result<BinEncodedValue, ParsingError> {
if bin_digits.len() <= 32 {
let value =
u32::from_str_radix(bin_digits, 2).map_err(|error| ParsingError::InvalidLiteral {
span,
kind: int_error_kind_to_literal_error_kind(
error.kind(),
LiteralErrorKind::U32Overflow,
),
})?;
Ok(shrink_u32_bin(value))
} else {
Err(ParsingError::InvalidBinaryLiteral {
span,
kind: BinErrorKind::TooLong,
})
}
}

#[inline(always)]
fn is_ascii_binary(c: char) -> bool {
matches!(c, '0'..='1')
}

#[inline]
fn shrink_u64(n: u64) -> HexEncodedValue {
fn shrink_u64_hex(n: u64) -> HexEncodedValue {
if n <= (u8::MAX as u64) {
HexEncodedValue::U8(n as u8)
} else if n <= (u16::MAX as u64) {
Expand All @@ -622,6 +673,17 @@ fn shrink_u64(n: u64) -> HexEncodedValue {
}
}

#[inline]
fn shrink_u32_bin(n: u32) -> BinEncodedValue {
if n <= (u8::MAX as u32) {
BinEncodedValue::U8(n as u8)
} else if n <= (u16::MAX as u32) {
BinEncodedValue::U16(n as u16)
} else {
BinEncodedValue::U32(n)
}
}

#[inline]
fn int_error_kind_to_literal_error_kind(
kind: &IntErrorKind,
Expand Down
4 changes: 2 additions & 2 deletions assembly/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ mod scanner;
mod span;
mod token;

pub use self::error::{HexErrorKind, LiteralErrorKind, ParsingError};
pub use self::error::{BinErrorKind, HexErrorKind, LiteralErrorKind, ParsingError};
pub use self::lexer::Lexer;
pub use self::location::SourceLocation;
pub use self::scanner::Scanner;
pub use self::span::{SourceSpan, Span, Spanned};
pub use self::token::{DocumentationType, HexEncodedValue, Token};
pub use self::token::{BinEncodedValue, DocumentationType, HexEncodedValue, Token};

use crate::{
ast,
Expand Down
18 changes: 18 additions & 0 deletions assembly/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,21 @@ pub enum HexEncodedValue {
Word([Felt; 4]),
}

// BINARY ENCODED VALUE
// ================================================================================================

/// Represents one of the various types of values that have a hex-encoded representation in Miden
/// Assembly source files.
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum BinEncodedValue {
/// A tiny value
U8(u8),
/// A small value
U16(u16),
/// A u32 constant, typically represents a memory address
U32(u32),
}

// TOKEN
// ================================================================================================

Expand Down Expand Up @@ -224,6 +239,7 @@ pub enum Token<'input> {
Rstab,
DocComment(DocumentationType),
HexValue(HexEncodedValue),
BinValue(BinEncodedValue),
Int(u64),
Ident(&'input str),
ConstantIdent(&'input str),
Expand Down Expand Up @@ -403,6 +419,7 @@ impl<'input> fmt::Display for Token<'input> {
Token::DocComment(DocumentationType::Module(_)) => f.write_str("module doc"),
Token::DocComment(DocumentationType::Form(_)) => f.write_str("doc comment"),
Token::HexValue(_) => f.write_str("hex-encoded value"),
Token::BinValue(_) => f.write_str("bin-encoded value"),
Token::Int(_) => f.write_str("integer"),
Token::Ident(_) => f.write_str("identifier"),
Token::ConstantIdent(_) => f.write_str("constant identifier"),
Expand Down Expand Up @@ -804,6 +821,7 @@ impl<'input> Token<'input> {
"doc comment" => Ok(Token::DocComment(DocumentationType::Form(String::new()))),
"comment" => Ok(Token::Comment),
"hex-encoded value" => Ok(Token::HexValue(HexEncodedValue::U8(0))),
"bin-encoded value" => Ok(Token::BinValue(BinEncodedValue::U8(0))),
"integer" => Ok(Token::Int(0)),
"identifier" => Ok(Token::Ident("")),
"constant identifier" => Ok(Token::ConstantIdent("")),
Expand Down
8 changes: 4 additions & 4 deletions docs/src/user_docs/assembly/u32_operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,10 @@ If the error code is omitted, the default value of $0$ is assumed.

| Instruction | Stack input | Stack output | Notes |
| ------------------------------------------------------------------------------------- | -------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------------ |
| u32and <br> - *(1 cycle)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32or <br> - *(6 cycle)s* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32xor <br> - *(1 cycle)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32not <br> - *(5 cycles)* | [a, ...] | [b, ...] | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$ |
| u32and <br> - *(1 cycle)* <br> u32and.*b* <br> - *(2 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `AND` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32or <br> - *(6 cycle)s* <br> u32or.*b* <br> - *(7 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `OR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32xor <br> - *(1 cycle)* <br> u32xor.*b* <br> - *(2 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ as a bitwise `XOR` of binary representations of $a$ and $b$. <br> Fails if $max(a,b) \ge 2^{32}$ |
| u32not <br> - *(5 cycles)* <br> u32not.*a* <br> - *(6 cycles)* | [a, ...] | [b, ...] | Computes $b$ as a bitwise `NOT` of binary representation of $a$. <br> Fails if $a \ge 2^{32}$ |
| u32shl <br> - *(18 cycles)* <br> u32shl.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | $c \leftarrow (a \cdot 2^b) \mod 2^{32}$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
| u32shr <br> - *(18 cycles)* <br> u32shr.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | $c \leftarrow \lfloor a/2^b \rfloor$ <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
| u32rotl <br> - *(18 cycles)* <br> u32rotl.*b* <br> - *(3 cycles)* | [b, a, ...] | [c, ...] | Computes $c$ by rotating a 32-bit representation of $a$ to the left by $b$ bits. <br> Undefined if $a \ge 2^{32}$ or $b > 31$ |
Expand Down
6 changes: 3 additions & 3 deletions miden/src/examples/fibonacci.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
use super::{Example, ONE, ZERO};
use miden_vm::{
math::Felt, Assembler, DefaultHost, MemAdviceProvider, Program, ProvingOptions, StackInputs,
};
use miden_vm::{math::Felt, Assembler, DefaultHost, MemAdviceProvider, Program, StackInputs};

// EXAMPLE BUILDER
// ================================================================================================
Expand Down Expand Up @@ -73,6 +71,8 @@ fn test_fib_example_fail() {

#[test]
fn test_fib_example_rpo() {
use miden_vm::ProvingOptions;

let example = get_example(16);
super::test_example_with_options(example, false, ProvingOptions::with_96_bit_security(true));
}
2 changes: 1 addition & 1 deletion miden/src/repl/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use assembly::{Assembler, Library, MaslLibrary};
use miden_vm::{math::Felt, DefaultHost, Program, StackInputs, Word};
use miden_vm::{math::Felt, DefaultHost, StackInputs, Word};
use processor::ContextId;
use rustyline::{error::ReadlineError, DefaultEditor};
use std::{collections::BTreeSet, path::PathBuf};
Expand Down
2 changes: 1 addition & 1 deletion miden/src/tools/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use super::cli::InputFile;
use assembly::diagnostics::{IntoDiagnostic, Report, WrapErr};
use clap::Parser;
use core::fmt;
use miden_vm::{Assembler, DefaultHost, Host, Operation, Program, StackInputs};
use miden_vm::{Assembler, DefaultHost, Host, Operation, StackInputs};
use processor::{AsmOpInfo, TraceLenSummary};
use std::{fs, path::PathBuf};
use stdlib::StdLibrary;
Expand Down
Loading

0 comments on commit ecd26a2

Please sign in to comment.