diff --git a/.vscode/launch.json b/.vscode/launch.json index cda1c28..0ce87c5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,88 +1,83 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "type": "java", - "name": "HelloWorld", - "request": "launch", - "mainClass": "tests.units.HelloWorld", - "projectName": "java-decompiler_8d168fe2" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in library 'jvm-class-format'", - "cargo": { - "args": ["test", "--no-run", "--lib", "--package=jvm-class-format"], - "filter": { - "name": "jvm-class-format", - "kind": "lib" + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in library 'jaded'", + "cargo": { + "args": [ + "test", + "--no-run", + "--lib", + "--package=jaded" + ], + "filter": { + "name": "jaded", + "kind": "lib" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug executable 'jaded'", + "cargo": { + "args": [ + "build", + "--bin=jaded", + "--package=jaded" + ], + "filter": { + "name": "jaded", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug unit tests in executable 'jaded'", + "cargo": { + "args": [ + "test", + "--no-run", + "--bin=jaded", + "--package=jaded" + ], + "filter": { + "name": "jaded", + "kind": "bin" + } + }, + "args": [], + "cwd": "${workspaceFolder}" + }, + { + "type": "lldb", + "request": "launch", + "name": "Debug integration test 'run_units'", + "cargo": { + "args": [ + "test", + "--no-run", + "--test=run_units", + "--package=jaded" + ], + "filter": { + "name": "run_units", + "kind": "test" + } + }, + "args": [], + "cwd": "${workspaceFolder}" } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in library 'java-decompiler'", - "cargo": { - "args": [ - "test", - "--no-run", - "--lib", - "--package=java-decompiler", - "--features=\"clap\"" - ], - "filter": { - "name": "java-decompiler", - "kind": "lib" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug executable 'java_decompiler'", - "cargo": { - "args": [ - "build", - "--bin=java_decompiler", - "--package=java-decompiler", - "--features=clap" - ], - "filter": { - "name": "java_decompiler", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - }, - { - "type": "lldb", - "request": "launch", - "name": "Debug unit tests in executable 'java_decompiler'", - "cargo": { - "args": [ - "test", - "--no-run", - "--bin=java_decompiler", - "--package=java-decompiler", - "--features=clap" - ], - "filter": { - "name": "java_decompiler", - "kind": "bin" - } - }, - "args": [], - "cwd": "${workspaceFolder}" - } - ] -} + ] +} \ No newline at end of file diff --git a/class_format/src/error.rs b/class_format/src/error.rs index 47a90bc..5a1ee1e 100644 --- a/class_format/src/error.rs +++ b/class_format/src/error.rs @@ -1,6 +1,6 @@ use crate::constant::ReferenceKind; use crate::ty::JVMType; -use crate::ConstantTag; +use crate::{ConstantTag, Op}; use thiserror::Error; #[derive(Error, Debug)] @@ -14,6 +14,18 @@ pub enum ConstantPoolError { }, } +#[derive(Error, Debug)] +pub enum OpReadError { + #[error("unknown op code: 0x{0:X}")] + Unknown(u8), + #[error("op '{op}' requires {expected} arguments; found {available}")] + MissingArgs { + op: Op, + expected: usize, + available: usize, + }, +} + #[derive(Error, Debug)] pub enum ClassReadError { #[error("expected magic: 0xCAFEBABE; got 0x{found:X}")] diff --git a/class_format/src/op.rs b/class_format/src/op.rs index 7adaeda..0c52a21 100644 --- a/class_format/src/op.rs +++ b/class_format/src/op.rs @@ -4,9 +4,12 @@ use std::convert::TryFrom; use std::fmt::{Display, Formatter}; use std::io::{ErrorKind, Read}; +use crate::error::OpReadError; + macro_rules! impl_ops { [$(($op: ident, $code: literal, $argc: literal)),+] => {paste::paste!{ #[derive(Debug, Copy, Clone, Eq, PartialEq, IntoPrimitive, TryFromPrimitive)] + #[num_enum(error_type(name = OpReadError, constructor = OpReadError::Unknown))] #[non_exhaustive] #[repr(u8)] pub enum Op { @@ -251,41 +254,157 @@ impl Display for Op { } } -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Clone, Copy)] +#[repr(C)] +union OpArgs { + none: [u8; 0], + one: [u8; 1], + two: [u8; 2], + three: [u8; 3], + four: [u8; 4], +} + +impl OpArgs { + fn new(op: Op) -> OpArgs { + match op.argc() { + 0 => OpArgs { none: [0; 0] }, + 1 => OpArgs { one: [0; 1] }, + 2 => OpArgs { two: [0; 2] }, + 3 => OpArgs { three: [0; 3] }, + 4 => OpArgs { four: [0; 4] }, + _ => unreachable!("invalid argument count"), + } + } + + unsafe fn args_of(&self, op: Op) -> &[u8] { + match op.argc() { + 0 => &self.none, + 1 => &self.one, + 2 => &self.two, + 3 => &self.three, + 4 => &self.four, + argc => unreachable!("invalid argument count: {}", argc), + } + } + + unsafe fn mut_args_of(&mut self, op: Op) -> &mut [u8] { + match op.argc() { + 0 => &mut self.none, + 1 => &mut self.one, + 2 => &mut self.two, + 3 => &mut self.three, + 4 => &mut self.four, + argc => unreachable!("invalid argument count: {}", argc), + } + } +} + +// TODO: Switch to #[repr(C, u8)] union? +// - Using an union feels less cumbersome. +// - Construction from bytes is weird with repr(C, u8) +#[derive(Clone, Copy)] +#[repr(C)] pub struct Instruction { - pub op: Op, - pub args: [u8; 4], + op: Op, + args: OpArgs, +} + +impl std::fmt::Debug for Instruction { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("Instruction") + .field("op", &self.op) + .field("args", &self.args()) + .finish() + } +} + +impl PartialEq for Instruction { + fn eq(&self, other: &Self) -> bool { + self.op == other.op && self.args() == other.args() + } } +impl Eq for Instruction {} impl Instruction { pub fn read_from(r: &mut R) -> Result { let op = Op::try_from(r.read_u8()?) .map_err(|e| std::io::Error::new(ErrorKind::InvalidData, e))?; - let mut args = [0; 4]; - r.read_exact(&mut args[..op.argc()])?; + let mut instruction = Instruction { + op, + args: OpArgs::new(op), + }; + r.read_exact(instruction.args_mut())?; - Ok(Instruction { op, args }) + Ok(instruction) } - pub fn from_bytecode(code: impl AsRef<[u8]>) -> Vec { - let code = code.as_ref(); - let mut instructions = Vec::with_capacity(code.len()); + pub fn from_slice<'a>(slice: &'a [u8]) -> Result<&'a Instruction, OpReadError> { + let op = Op::try_from(*slice.get(0).ok_or(OpReadError::Unknown(0))?)?; + if slice.len() <= op.argc() { + return Err(OpReadError::MissingArgs { + op, + expected: op.argc(), + available: slice.len() - 1, + }); + } + Ok(unsafe { + // SAFETY: + // - Checked that the slice is big enough, that the Op code is valid + // and that there's enough tailing argument bytes - so the memory is + // definitely a valid Instruction. + // - Instruction is unaligned so alignment doesn't have to be checked. + // - 'a reference is passed to returned data so we're not messing up + // borrow lifetimes. - let mut pos = 0; - while pos < code.len() { - let op = Op::try_from(code[pos]).unwrap(); - pos += 1; - let mut instruction = Instruction { op, args: [0; 4] }; + (slice.as_ptr() as *const Instruction) + .as_ref() + .unwrap_unchecked() + }) + } - let argc = op.argc(); - for offset in 0..argc { - instruction.args[offset] = code[pos + offset]; - } - instructions.push(instruction); - pos += argc; + pub fn collect_instructions(code: &[u8]) -> Vec<&Instruction> { + InstructionIterator::new(code).collect() + } + + #[inline] + pub fn op(&self) -> Op { + self.op + } + + #[inline] + pub fn args(&self) -> &[u8] { + unsafe { self.args.args_of(self.op) } + } + + #[inline] + pub fn args_mut(&mut self) -> &mut [u8] { + unsafe { self.args.mut_args_of(self.op) } + } +} + +pub struct InstructionIterator<'a> { + pub bytecode: &'a [u8], + pub pos: usize, +} + +impl<'a> InstructionIterator<'a> { + pub fn new(bytecode: &'a [u8]) -> Self { + InstructionIterator { bytecode, pos: 0 } + } +} + +impl<'a> Iterator for InstructionIterator<'a> { + type Item = &'a Instruction; + + fn next(&mut self) -> Option { + if self.pos >= self.bytecode.len() { + return None; } - instructions + let i = Instruction::from_slice(&self.bytecode[self.pos..]).expect("invalid instruction"); + self.pos += 1 + i.op.argc(); + + return Some(i); } } diff --git a/src/gen/java/code.rs b/src/gen/java/code.rs index c95a598..6aff0fa 100644 --- a/src/gen/java/code.rs +++ b/src/gen/java/code.rs @@ -52,8 +52,8 @@ impl<'m, 'data, B: GeneratorBackend> GenerateCode Result { w.write(b"// asm: ")?; - w.write(input.0.op.name().as_bytes())?; - for arg in &input.0.args[..input.0.op.argc()] { + w.write(input.0.op().name().as_bytes())?; + for arg in input.0.args() { write!(w, " 0x{:X}", *arg)?; } w.write(b"\n")?; diff --git a/src/ir/expression.rs b/src/ir/expression.rs index c57f0e9..dc7cfb1 100644 --- a/src/ir/expression.rs +++ b/src/ir/expression.rs @@ -5,12 +5,12 @@ use super::frame::RuntimeFrame; pub struct OpSeq(pub [Op; LENGTH]); impl OpSeq { - pub fn test(&self, buffer: impl AsRef<[Instruction]>, offset: usize) -> bool { + pub fn test<'code>(&self, buffer: &[&'code Instruction], offset: usize) -> bool { if L > buffer.as_ref()[offset..].len() { return false; } for (i, op) in self.0.iter().enumerate() { - if buffer.as_ref()[offset + i].op != *op { + if buffer.as_ref()[offset + i].op() != *op { return false; } } @@ -31,7 +31,7 @@ macro_rules! test_many_expr { pub trait CheckExpression { fn test<'cp, 'code>( - buffer: impl AsRef<[Instruction]>, + buffer: &[&'code Instruction], offset: usize, ctx: &RuntimeFrame<'cp, 'code>, ) -> Option<(usize, Expression)>; @@ -50,16 +50,14 @@ pub struct InstructionComment(pub Instruction); impl CheckExpression for InstructionComment { fn test<'cp, 'code>( - instr: impl AsRef<[Instruction]>, + instr: &[&'code Instruction], offset: usize, _: &RuntimeFrame<'cp, 'code>, ) -> Option<(usize, Expression)> { unsafe { Some(( 1, - Expression::Comment(InstructionComment( - instr.as_ref().get_unchecked(offset).clone(), - )), + Expression::Comment(InstructionComment(**instr.as_ref().get_unchecked(offset))), )) } } @@ -70,7 +68,7 @@ pub struct EmptyConstructor; impl CheckExpression for EmptyConstructor { fn test<'cp, 'code>( - buffer: impl AsRef<[Instruction]>, + buffer: &[&'code Instruction], offset: usize, _: &RuntimeFrame<'cp, 'code>, ) -> Option<(usize, Expression)> { @@ -98,7 +96,7 @@ pub struct ReturnStatement; impl CheckExpression for ReturnStatement { fn test<'cp, 'code>( - buffer: impl AsRef<[Instruction]>, + buffer: &[&'code Instruction], offset: usize, _: &RuntimeFrame<'cp, 'code>, ) -> Option<(usize, Expression)> { @@ -120,7 +118,7 @@ pub struct EmptySuperCall; impl CheckExpression for EmptySuperCall { fn test<'cp, 'code>( - buffer: impl AsRef<[Instruction]>, + buffer: &[&'code Instruction], offset: usize, _: &RuntimeFrame<'cp, 'code>, ) -> Option<(usize, Expression)> { diff --git a/src/ir/mod.rs b/src/ir/mod.rs index c7ae8c0..f2447fd 100644 --- a/src/ir/mod.rs +++ b/src/ir/mod.rs @@ -16,14 +16,14 @@ pub fn decompile<'cp, 'code>( method: &Member, code: &CodeData, ) -> Vec { - let instructions = Instruction::from_bytecode(&code.code); + let instructions = Instruction::collect_instructions(&code.code); let frame = RuntimeFrame::new(constant_pool, code); let mut result = Vec::with_capacity(instructions.len()); if method.is_constructor() { - if let Some((_, expr)) = EmptyConstructor::test(&instructions, 0, &frame) { + if let Some((_, expr)) = EmptyConstructor::test(instructions.as_slice(), 0, &frame) { return vec![expr]; } } @@ -34,7 +34,7 @@ pub fn decompile<'cp, 'code>( let (instruction_count, expr) = test_many_expr!(&[ EmptySuperCall, InstructionComment - ], &instructions, offset, &frame) + ], instructions.as_slice(), offset, &frame) .unwrap(); offset += instruction_count;