From aa56fb800fdfa8d49952e0baa96f6efca2e72096 Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 09:49:48 -0700 Subject: [PATCH 1/6] Highjack OPCODES to run other common instructions --- pyjvm/Machine.py | 357 +++++++++++++++++++++++++++-------------------- 1 file changed, 206 insertions(+), 151 deletions(-) diff --git a/pyjvm/Machine.py b/pyjvm/Machine.py index 02ee6d5..ed38459 100644 --- a/pyjvm/Machine.py +++ b/pyjvm/Machine.py @@ -6,126 +6,134 @@ import io from enum import Enum + class Inst(Enum): - ICONST_M1 = 0x02 - ICONST_0 = 0x03 - ICONST_1 = 0x04 - ICONST_2 = 0x05 - ICONST_3 = 0x06 - ICONST_4 = 0x07 - ICONST_5 = 0x08 - LCONST_0 = 0x09 - LCONST_1 = 0x0A - DCONST_0 = 0x0E - DCONST_1 = 0x0F - BIPUSH = 0x10 - SIPUSH = 0x11 - LDC = 0x12 - LDC2_W = 0x14 - ILOAD = 0x15 - LLOAD = 0x16 - DLOAD = 0x18 - ILOAD_0 = 0x1A - ILOAD_1 = 0x1B - ILOAD_2 = 0x1C - ILOAD_3 = 0x1D - LLOAD_0 = 0x1E - LLOAD_1 = 0x1F - LLOAD_2 = 0x20 - LLOAD_3 = 0x21 - DLOAD_3 = 0x29 - ALOAD_0 = 0x2A - ALOAD_1 = 0x2B - ALOAD_2 = 0x2C - ISTORE = 0x36 - LSTORE = 0x37 - DSTORE = 0x39 - ISTORE_0 = 0x3B - ISTORE_1 = 0x3C - ISTORE_2 = 0x3D - ISTORE_3 = 0x3E - LSTORE_0 = 0x3F - LSTORE_1 = 0x40 - LSTORE_2 = 0x41 - LSTORE_3 = 0x42 - DSTORE_3 = 0x4A - ASTORE_0 = 0x4B - ASTORE_1 = 0x4C - ASTORE_2 = 0x4D - ASTORE_3 = 0x4E - POP = 0x57 - DUP = 0x59 - IADD = 0x60 - LADD = 0x61 - DADD = 0x63 - ISUB = 0x64 - DSUB = 0x67 - IMUL = 0x68 - DMUL = 0x6B - DDIV = 0x6F - IREM = 0x70 - IINC = 0x84 - I2D = 0x87 - I2C = 0x92 - DCMPG = 0x98 - IFNE = 0x9A - IFGE = 0x9C - IFLE = 0x9E - IF_ICMPLT = 0xA1 - IF_ICMPGE = 0xA2 - IF_ICMPGT = 0xA3 - GOTO = 0xA7 - IRET = 0xAC - LRET = 0xAD - DRETURN = 0xAF - ARETURN = 0xB0 - RETURN = 0xB1 - GETSTATIC = 0xB2 - PUTSTATIC = 0xB3 - GETFIELD = 0xB4 - PUTFIELD = 0xB5 + ICONST_M1 = 0x02 + ICONST_0 = 0x03 + ICONST_1 = 0x04 + ICONST_2 = 0x05 + ICONST_3 = 0x06 + ICONST_4 = 0x07 + ICONST_5 = 0x08 + LCONST_0 = 0x09 + LCONST_1 = 0x0A + DCONST_0 = 0x0E + DCONST_1 = 0x0F + BIPUSH = 0x10 + SIPUSH = 0x11 + LDC = 0x12 + LDC2_W = 0x14 + ILOAD = 0x15 + LLOAD = 0x16 + DLOAD = 0x18 + ILOAD_0 = 0x1A + ILOAD_1 = 0x1B + ILOAD_2 = 0x1C + ILOAD_3 = 0x1D + LLOAD_0 = 0x1E + LLOAD_1 = 0x1F + LLOAD_2 = 0x20 + LLOAD_3 = 0x21 + DLOAD_3 = 0x29 + ALOAD_0 = 0x2A + ALOAD_1 = 0x2B + ALOAD_2 = 0x2C + ISTORE = 0x36 + LSTORE = 0x37 + DSTORE = 0x39 + ISTORE_0 = 0x3B + ISTORE_1 = 0x3C + ISTORE_2 = 0x3D + ISTORE_3 = 0x3E + LSTORE_0 = 0x3F + LSTORE_1 = 0x40 + LSTORE_2 = 0x41 + LSTORE_3 = 0x42 + DSTORE_3 = 0x4A + ASTORE_0 = 0x4B + ASTORE_1 = 0x4C + ASTORE_2 = 0x4D + ASTORE_3 = 0x4E + POP = 0x57 + DUP = 0x59 + IADD = 0x60 + LADD = 0x61 + DADD = 0x63 + ISUB = 0x64 + DSUB = 0x67 + IMUL = 0x68 + DMUL = 0x6B + DDIV = 0x6F + IREM = 0x70 + IINC = 0x84 + I2D = 0x87 + I2C = 0x92 + DCMPG = 0x98 + IFNE = 0x9A + IFGE = 0x9C + IFLE = 0x9E + IF_ICMPLT = 0xA1 + IF_ICMPGE = 0xA2 + IF_ICMPGT = 0xA3 + GOTO = 0xA7 + IRET = 0xAC + LRET = 0xAD + DRETURN = 0xAF + ARETURN = 0xB0 + RETURN = 0xB1 + GETSTATIC = 0xB2 + PUTSTATIC = 0xB3 + GETFIELD = 0xB4 + PUTFIELD = 0xB5 INVOKEVIRTUAL = 0xB6 INVOKESPECIAL = 0xB7 - INVOKESTATIC = 0xB8 - NEW = 0xBB + INVOKESTATIC = 0xB8 + NEW = 0xBB + def argumentCount(desc): - arg = desc.split(')', 2)[0][1:] + arg = desc.split(")", 2)[0][1:] i = 0 parsingClass = False for c in arg: if parsingClass: - if c == ';': + if c == ";": parsingClass = False continue - if c == 'L': + if c == "L": parsingClass = True i += 1 return i + def read_unsigned_short(frame): - val = struct.unpack('!H', frame.code[frame.ip+1:frame.ip+3])[0] + val = struct.unpack("!H", frame.code[frame.ip + 1 : frame.ip + 3])[0] frame.ip += 2 return val + def read_signed_short(frame): - val = struct.unpack('!h', frame.code[frame.ip+1:frame.ip+3])[0] + val = struct.unpack("!h", frame.code[frame.ip + 1 : frame.ip + 3])[0] frame.ip += 2 return val + def read_byte(frame): frame.ip += 1 return frame.code[frame.ip] + def read_signed_byte(frame): frame.ip += 1 - signed = struct.unpack('!b', frame.code[frame.ip:frame.ip+1])[0] + signed = struct.unpack("!b", frame.code[frame.ip : frame.ip + 1])[0] return signed + OPCODES = {} + def opcode(inst): def inner(fn): OPCODES[inst] = fn @@ -133,66 +141,79 @@ def inner(fn): return inner + @opcode(Inst.ICONST_M1) def iconst_m1(frame): frame.push(-1) + @opcode(Inst.ICONST_0) @opcode(Inst.LCONST_0) def iconst_0(frame): frame.push(0) + @opcode(Inst.ICONST_1) @opcode(Inst.LCONST_1) def iconst_1(frame): frame.push(1) + @opcode(Inst.ICONST_2) def iconst_2(frame): frame.push(2) + @opcode(Inst.ICONST_3) def iconst_3(frame): frame.push(3) + @opcode(Inst.ICONST_4) def iconst_4(frame): frame.push(4) + @opcode(Inst.ICONST_5) def iconst_5(frame): frame.push(5) + @opcode(Inst.DCONST_0) def dconst_0(frame): frame.push(0.0) + @opcode(Inst.DCONST_1) def dconst_1(frame): frame.push(1.0) + @opcode(Inst.BIPUSH) def bipush(frame): val = read_byte(frame) frame.push(val) + @opcode(Inst.SIPUSH) def sipush(frame): val = read_signed_short(frame) frame.push(val) + @opcode(Inst.LDC) def ldc(frame): index = read_byte(frame) const = frame.current_class.const_pool[index - 1] - if 'integer' in const.__dict__: + if "integer" in const.__dict__: const = const.integer else: const = const.string frame.push(const) + @opcode(Inst.LDC2_W) def ldc2_w(frame): index = read_unsigned_short(frame) @@ -200,6 +221,7 @@ def ldc2_w(frame): frame.push(const) + @opcode(Inst.ILOAD) @opcode(Inst.LLOAD) @opcode(Inst.DLOAD) @@ -207,30 +229,35 @@ def iload(frame): index = read_byte(frame) frame.push(frame.get_local(index)) + @opcode(Inst.ILOAD_0) @opcode(Inst.LLOAD_0) @opcode(Inst.ALOAD_0) def iload_0(frame): frame.push(frame.get_local(0)) + @opcode(Inst.ILOAD_1) @opcode(Inst.LLOAD_1) @opcode(Inst.ALOAD_1) def iload_1(frame): frame.push(frame.get_local(1)) + @opcode(Inst.ILOAD_2) @opcode(Inst.LLOAD_2) @opcode(Inst.ALOAD_2) def iload_2(frame): frame.push(frame.get_local(2)) + @opcode(Inst.ILOAD_3) @opcode(Inst.LLOAD_3) @opcode(Inst.DLOAD_3) def iload_3(frame): frame.push(frame.get_local(3)) + @opcode(Inst.ISTORE) @opcode(Inst.LSTORE) @opcode(Inst.DSTORE) @@ -239,43 +266,51 @@ def istore(frame): val = frame.pop() frame.set_local(index, val) + @opcode(Inst.LSTORE_0) def lstore_0(frame): val = frame.pop() frame.set_local(0, val) + @opcode(Inst.LSTORE_1) def lstore_1(frame): val = frame.pop() frame.set_local(1, val) + @opcode(Inst.LSTORE_2) def lstore_2(frame): val = frame.pop() frame.set_local(1, val) + @opcode(Inst.LSTORE_3) @opcode(Inst.DSTORE_3) def lstore_3(frame): val = frame.pop() frame.set_local(3, val) + @opcode(Inst.POP) def pop(frame): frame.pop() + @opcode(Inst.DUP) def dup(frame): val = frame.pop() frame.push(val) frame.push(val) + @opcode(Inst.IADD) @opcode(Inst.LADD) @opcode(Inst.DADD) def iadd(frame): frame.push(frame.pop() + frame.pop()) + @opcode(Inst.ISUB) @opcode(Inst.DSUB) def isub(frame): @@ -290,6 +325,7 @@ def isub(frame): frame.push(val1 - val2) + @opcode(Inst.IMUL) @opcode(Inst.DMUL) def imul(frame): @@ -297,20 +333,24 @@ def imul(frame): val1 = frame.pop() frame.push(val2 * val1) + @opcode(Inst.DDIV) def ddiv(frame): val2 = frame.pop() val1 = frame.pop() frame.push(val1 / val2) + @opcode(Inst.I2D) def i2d(frame): frame.push(float(frame.pop())) + @opcode(Inst.I2C) def i2c(frame): frame.push(chr(frame.pop())) + @opcode(Inst.DCMPG) def dcmpg(frame): val2 = frame.pop() @@ -323,25 +363,63 @@ def dcmpg(frame): else: frame.push(-1) + class Machine: def __init__(self): self.class_files = {} + # This is ugly and just for demonstration. Don't really do this like this. + OPCODES.update({Inst.INVOKESTATIC: self.do_INVOKESTATIC, Inst.IINC: self.do_IINC}) def load_class_file(self, path): c = ClassFile().from_file(path) self.class_files[c.class_name] = c + def do_IINC(self, frame): + index = read_byte(frame) + const = read_signed_byte(frame) + + frame.set_local(index, frame.get_local(index) + const) + + def do_INVOKESTATIC(self, frame): + index = read_unsigned_short(frame) + + methodRef = frame.current_class.const_pool[index - 1] + cname = frame.current_class.const_pool[methodRef.class_index - 1].name + natIndex = methodRef.name_and_type_index + nat = frame.current_class.const_pool[natIndex - 1] + + # print(vars(methodRef)) + # print(vars(nat)) + # print(cname) + + if cname in self.class_files: + cl = self.class_files[cname] + if cl.canHandleMethod(nat.name, nat.desc): + ret = cl.handleStatic(nat.name, nat.desc, frame) + if not nat.desc.endswith("V"): + frame.push(ret) + def execute_code(self, frame): code = frame.code + while True: inst = Inst(code[frame.ip]) - #print(frame.ip, inst) + # print(frame.ip, inst) - #if len(frame.stack) > frame.max_stack + 1: + # if len(frame.stack) > frame.max_stack + 1: # print("MAX STACK") if inst in OPCODES: OPCODES[inst](frame) + # print(frame.stack, frame.locals) + frame.ip += 1 + continue + + if inst == Inst.GOTO: + branch = read_signed_short(frame) + + frame.ip -= 3 + frame.ip += branch elif inst == Inst.ISTORE_0: val = frame.stack.pop() frame.set_local(0, val) @@ -367,11 +445,6 @@ def execute_code(self, frame): v2 = frame.stack.pop() v1 = frame.stack.pop() frame.stack.append(v1 % v2) - elif inst == Inst.IINC: - index = read_byte(frame) - const = read_signed_byte(frame) - - frame.set_local(index, frame.get_local(index) + const) elif inst == Inst.IFNE: v1 = frame.stack.pop() @@ -441,12 +514,12 @@ def execute_code(self, frame): if v1 > v2: frame.ip -= 3 frame.ip += branch - elif inst == Inst.GOTO: - branch = read_signed_short(frame) - - frame.ip -= 3 - frame.ip += branch - elif inst == Inst.IRET or inst == Inst.LRET or inst == Inst.ARETURN or inst == Inst.DRETURN: + elif ( + inst == Inst.IRET + or inst == Inst.LRET + or inst == Inst.ARETURN + or inst == Inst.DRETURN + ): return frame.stack.pop() elif inst == Inst.RETURN: return @@ -462,12 +535,12 @@ def execute_code(self, frame): cl = self.class_files[name] if not cl.static_initialized: cl.static_initialized = True - cl.handleMethod('', '()V', frame) + cl.handleMethod("", "()V", frame) frame.stack.append(cl.get_field(nat.name)) - #print(name) - #print(vars(nat)) - #frame.stack.append(PrintStream()) + # print(name) + # print(vars(nat)) + # frame.stack.append(PrintStream()) elif inst == Inst.PUTSTATIC: index = read_unsigned_short(frame) @@ -480,7 +553,7 @@ def execute_code(self, frame): cl = self.class_files[name] if not cl.static_initialized: cl.static_initialized = True - cl.handleMethod('', '()V', frame, code, self, ip) + cl.handleMethod("", "()V", frame, code, self, ip) cl.set_field(nat.name, frame.stack.pop()) elif inst == Inst.GETFIELD: index = read_unsigned_short(frame) @@ -490,10 +563,10 @@ def execute_code(self, frame): natIndex = ref.name_and_type_index nat = frame.current_class.const_pool[natIndex - 1] - #print(vars(nat)) + # print(vars(nat)) obj = frame.stack.pop() - #print(obj) + # print(obj) frame.stack.append(obj.get_field(nat.name)) elif inst == Inst.PUTFIELD: index = read_unsigned_short(frame) @@ -503,7 +576,7 @@ def execute_code(self, frame): natIndex = ref.name_and_type_index nat = frame.current_class.const_pool[natIndex - 1] - #print(vars(nat)) + # print(vars(nat)) value = frame.stack.pop() obj = frame.stack.pop() @@ -516,14 +589,14 @@ def execute_code(self, frame): natIndex = methodRef.name_and_type_index nat = frame.current_class.const_pool[natIndex - 1] - #print(name) - #print(vars(nat)) + # print(name) + # print(vars(nat)) if name in self.class_files: cl = self.class_files[name] if cl.canHandleMethod(nat.name, nat.desc): ret = cl.handleMethod(nat.name, nat.desc, frame) - if not nat.desc.endswith('V'): + if not nat.desc.endswith("V"): frame.push(ret) else: for i in range(argumentCount(nat.desc)): @@ -537,37 +610,19 @@ def execute_code(self, frame): natIndex = methodRef.name_and_type_index nat = frame.current_class.const_pool[natIndex - 1] - #print(vars(methodRef)) - #print(vars(nat)) - #print(name) + # print(vars(methodRef)) + # print(vars(nat)) + # print(name) if name in self.class_files: cl = self.class_files[name] if cl.canHandleMethod(nat.name, nat.desc): ret = cl.handleMethod(nat.name, nat.desc, frame) - if not nat.desc.endswith('V'): - frame.push(ret) - elif inst == Inst.INVOKESTATIC: - index = read_unsigned_short(frame) - - methodRef = frame.current_class.const_pool[index - 1] - cname = frame.current_class.const_pool[methodRef.class_index - 1].name - natIndex = methodRef.name_and_type_index - nat = frame.current_class.const_pool[natIndex - 1] - - #print(vars(methodRef)) - #print(vars(nat)) - #print(cname) - - if cname in self.class_files: - cl = self.class_files[cname] - if cl.canHandleMethod(nat.name, nat.desc): - ret = cl.handleStatic(nat.name, nat.desc, frame) - if not nat.desc.endswith('V'): + if not nat.desc.endswith("V"): frame.push(ret) elif inst == Inst.NEW: index = read_unsigned_short(frame) - + methodRef = frame.current_class.const_pool[index - 1] if methodRef.name in self.class_files: @@ -581,18 +636,18 @@ def execute_code(self, frame): else: frame.stack.append(None) - #print(frame.stack, frame.locals) + # print(frame.stack, frame.locals) frame.ip += 1 def call_function(self, methodName, *args): - cname = '/'.join(methodName.split('/')[:-1]) - mname = methodName.split('/')[-1] + cname = "/".join(methodName.split("/")[:-1]) + mname = methodName.split("/")[-1] if cname in self.class_files: cf = self.class_files[cname] for m in cf.methods: if m.name == mname: - code = m.find_attr('Code').info + code = m.find_attr("Code").info code = CodeAttr().from_reader(io.BytesIO(code)) frame = Frame(code, cf, self) @@ -601,32 +656,32 @@ def call_function(self, methodName, *args): return self.execute_code(frame) def dump(self): - print('Machine Dump') + print("Machine Dump") - print('Loaded Classes') + print("Loaded Classes") for name in self.class_files: - print(' ', name) + print(" ", name) c = self.class_files[name] for method in c.methods: - print(' [METHOD]', method.name, '->', method.desc) + print(" [METHOD]", method.name, "->", method.desc) for attr in method.attributes: - print(' [ATTR] {} ({} bytes)'.format(attr.name, len(attr.info))) + print(" [ATTR] {} ({} bytes)".format(attr.name, len(attr.info))) - if attr.name == 'Code': + if attr.name == "Code": code = CodeAttr().from_reader(io.BytesIO(attr.info)) - print(' ', vars(code)) + print(" ", vars(code)) print() for field in c.fields: - print(' [FIELD]', field.name, ':', field.desc) + print(" [FIELD]", field.name, ":", field.desc) for attr in field.attributes: - print(' [ATTR] {} ({} bytes)'.format(attr.name, len(attr.info))) - if attr.name == 'ConstantValue': - index = struct.unpack('!H', attr.info)[0] - print(' ', c.const_pool[index - 1].string) + print(" [ATTR] {} ({} bytes)".format(attr.name, len(attr.info))) + if attr.name == "ConstantValue": + index = struct.unpack("!H", attr.info)[0] + print(" ", c.const_pool[index - 1].string) print() for attr in c.attributes: - print(' [ATTR] {} ({} bytes)'.format(attr.name, len(attr.info))) - print() \ No newline at end of file + print(" [ATTR] {} ({} bytes)".format(attr.name, len(attr.info))) + print() From 2391004b14c4e5a1d85f36054841663dfb630208 Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 12:24:30 -0700 Subject: [PATCH 2/6] Use @opcode for instance methods. Convert more of them. --- pyjvm/Machine.py | 152 ++++++++++++++++++++++++++--------------------- 1 file changed, 83 insertions(+), 69 deletions(-) diff --git a/pyjvm/Machine.py b/pyjvm/Machine.py index ed38459..9af6f13 100644 --- a/pyjvm/Machine.py +++ b/pyjvm/Machine.py @@ -143,66 +143,66 @@ def inner(fn): @opcode(Inst.ICONST_M1) -def iconst_m1(frame): +def iconst_m1(instance, frame): frame.push(-1) @opcode(Inst.ICONST_0) @opcode(Inst.LCONST_0) -def iconst_0(frame): +def iconst_0(instance, frame): frame.push(0) @opcode(Inst.ICONST_1) @opcode(Inst.LCONST_1) -def iconst_1(frame): +def iconst_1(instance, frame): frame.push(1) @opcode(Inst.ICONST_2) -def iconst_2(frame): +def iconst_2(instance, frame): frame.push(2) @opcode(Inst.ICONST_3) -def iconst_3(frame): +def iconst_3(instance, frame): frame.push(3) @opcode(Inst.ICONST_4) -def iconst_4(frame): +def iconst_4(instance, frame): frame.push(4) @opcode(Inst.ICONST_5) -def iconst_5(frame): +def iconst_5(instance, frame): frame.push(5) @opcode(Inst.DCONST_0) -def dconst_0(frame): +def dconst_0(instance, frame): frame.push(0.0) @opcode(Inst.DCONST_1) -def dconst_1(frame): +def dconst_1(instance, frame): frame.push(1.0) @opcode(Inst.BIPUSH) -def bipush(frame): +def bipush(instance, frame): val = read_byte(frame) frame.push(val) @opcode(Inst.SIPUSH) -def sipush(frame): +def sipush(instance, frame): val = read_signed_short(frame) frame.push(val) @opcode(Inst.LDC) -def ldc(frame): +def ldc(instance, frame): index = read_byte(frame) const = frame.current_class.const_pool[index - 1] @@ -215,7 +215,7 @@ def ldc(frame): @opcode(Inst.LDC2_W) -def ldc2_w(frame): +def ldc2_w(instance, frame): index = read_unsigned_short(frame) const = frame.current_class.const_pool[index - 1].double @@ -225,7 +225,7 @@ def ldc2_w(frame): @opcode(Inst.ILOAD) @opcode(Inst.LLOAD) @opcode(Inst.DLOAD) -def iload(frame): +def iload(instance, frame): index = read_byte(frame) frame.push(frame.get_local(index)) @@ -233,72 +233,72 @@ def iload(frame): @opcode(Inst.ILOAD_0) @opcode(Inst.LLOAD_0) @opcode(Inst.ALOAD_0) -def iload_0(frame): +def iload_0(instance, frame): frame.push(frame.get_local(0)) @opcode(Inst.ILOAD_1) @opcode(Inst.LLOAD_1) @opcode(Inst.ALOAD_1) -def iload_1(frame): +def iload_1(instance, frame): frame.push(frame.get_local(1)) @opcode(Inst.ILOAD_2) @opcode(Inst.LLOAD_2) @opcode(Inst.ALOAD_2) -def iload_2(frame): +def iload_2(instance, frame): frame.push(frame.get_local(2)) @opcode(Inst.ILOAD_3) @opcode(Inst.LLOAD_3) @opcode(Inst.DLOAD_3) -def iload_3(frame): +def iload_3(instance, frame): frame.push(frame.get_local(3)) @opcode(Inst.ISTORE) @opcode(Inst.LSTORE) @opcode(Inst.DSTORE) -def istore(frame): +def istore(instance, frame): index = read_byte(frame) val = frame.pop() frame.set_local(index, val) @opcode(Inst.LSTORE_0) -def lstore_0(frame): +def lstore_0(instance, frame): val = frame.pop() frame.set_local(0, val) @opcode(Inst.LSTORE_1) -def lstore_1(frame): +def lstore_1(instance, frame): val = frame.pop() frame.set_local(1, val) @opcode(Inst.LSTORE_2) -def lstore_2(frame): +def lstore_2(instance, frame): val = frame.pop() frame.set_local(1, val) @opcode(Inst.LSTORE_3) @opcode(Inst.DSTORE_3) -def lstore_3(frame): +def lstore_3(instance, frame): val = frame.pop() frame.set_local(3, val) @opcode(Inst.POP) -def pop(frame): +def pop(instance, frame): frame.pop() @opcode(Inst.DUP) -def dup(frame): +def dup(instance, frame): val = frame.pop() frame.push(val) frame.push(val) @@ -307,13 +307,13 @@ def dup(frame): @opcode(Inst.IADD) @opcode(Inst.LADD) @opcode(Inst.DADD) -def iadd(frame): +def iadd(instance, frame): frame.push(frame.pop() + frame.pop()) @opcode(Inst.ISUB) @opcode(Inst.DSUB) -def isub(frame): +def isub(instance, frame): val2 = frame.pop() val1 = frame.pop() @@ -328,31 +328,31 @@ def isub(frame): @opcode(Inst.IMUL) @opcode(Inst.DMUL) -def imul(frame): +def imul(instance, frame): val2 = frame.pop() val1 = frame.pop() frame.push(val2 * val1) @opcode(Inst.DDIV) -def ddiv(frame): +def ddiv(instance, frame): val2 = frame.pop() val1 = frame.pop() frame.push(val1 / val2) @opcode(Inst.I2D) -def i2d(frame): +def i2d(instance, frame): frame.push(float(frame.pop())) @opcode(Inst.I2C) -def i2c(frame): +def i2c(instance, frame): frame.push(chr(frame.pop())) @opcode(Inst.DCMPG) -def dcmpg(frame): +def dcmpg(instance, frame): val2 = frame.pop() val1 = frame.pop() @@ -367,19 +367,28 @@ def dcmpg(frame): class Machine: def __init__(self): self.class_files = {} - # This is ugly and just for demonstration. Don't really do this like this. - OPCODES.update({Inst.INVOKESTATIC: self.do_INVOKESTATIC, Inst.IINC: self.do_IINC}) def load_class_file(self, path): c = ClassFile().from_file(path) self.class_files[c.class_name] = c + # Instructions defined as methods because they affect the machine state + + @opcode(Inst.GOTO) + def do_GOTO(self, frame): + branch = read_signed_short(frame) + + frame.ip -= 3 + frame.ip += branch + + @opcode(Inst.IINC) def do_IINC(self, frame): index = read_byte(frame) const = read_signed_byte(frame) frame.set_local(index, frame.get_local(index) + const) + @opcode(Inst.INVOKESTATIC) def do_INVOKESTATIC(self, frame): index = read_unsigned_short(frame) @@ -399,6 +408,43 @@ def do_INVOKESTATIC(self, frame): if not nat.desc.endswith("V"): frame.push(ret) + @opcode(Inst.IF_ICMPGE) + def do_IF_ICMPGE(self, frame): + v2 = frame.stack.pop() + v1 = frame.stack.pop() + + branch = read_signed_short(frame) + + if type(v1) is str and len(v1) == 1: + v1 = ord(v1) + + if type(v2) is str and len(v2) == 1: + v2 = ord(v2) + + if v1 >= v2: + frame.ip -= 3 + frame.ip += branch + + @opcode(Inst.IFGE) + def do_IFGE(self, frame): + v1 = frame.stack.pop() + + branch = read_signed_short(frame) + + if v1 >= 0: + frame.ip -= 3 + frame.ip += branch + + @opcode(Inst.ISTORE_1) + def do_ISTORE_1(self, frame): + val = frame.stack.pop() + frame.set_local(1, val) + + @opcode(Inst.ISTORE_2) + def do_ISTORE_2(self, frame): + val = frame.stack.pop() + frame.set_local(2, val) + def execute_code(self, frame): code = frame.code @@ -410,25 +456,16 @@ def execute_code(self, frame): # print("MAX STACK") if inst in OPCODES: - OPCODES[inst](frame) + OPCODES[inst](self, frame) # print(frame.stack, frame.locals) frame.ip += 1 continue - if inst == Inst.GOTO: - branch = read_signed_short(frame) + print(f"inst: {inst}") - frame.ip -= 3 - frame.ip += branch - elif inst == Inst.ISTORE_0: + if inst == Inst.ISTORE_0: val = frame.stack.pop() frame.set_local(0, val) - elif inst == Inst.ISTORE_1: - val = frame.stack.pop() - frame.set_local(1, val) - elif inst == Inst.ISTORE_2: - val = frame.stack.pop() - frame.set_local(2, val) elif inst == Inst.ISTORE_3: val = frame.stack.pop() frame.set_local(3, val) @@ -453,14 +490,6 @@ def execute_code(self, frame): if v1 != 0: frame.ip -= 3 frame.ip += branch - elif inst == Inst.IFGE: - v1 = frame.stack.pop() - - branch = read_signed_short(frame) - - if v1 >= 0: - frame.ip -= 3 - frame.ip += branch elif inst == Inst.IFLE: v1 = frame.stack.pop() @@ -484,21 +513,6 @@ def execute_code(self, frame): if v1 < v2: frame.ip -= 3 frame.ip += branch - elif inst == Inst.IF_ICMPGE: - v2 = frame.stack.pop() - v1 = frame.stack.pop() - - branch = read_signed_short(frame) - - if type(v1) is str and len(v1) == 1: - v1 = ord(v1) - - if type(v2) is str and len(v2) == 1: - v2 = ord(v2) - - if v1 >= v2: - frame.ip -= 3 - frame.ip += branch elif inst == Inst.IF_ICMPGT: v2 = frame.stack.pop() v1 = frame.stack.pop() From 826e9d621ac98a757da3b6456dc3bb1fb40b825d Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 12:28:22 -0700 Subject: [PATCH 3/6] Easier to ask forgiveness than permissions. Sets are fast. --- pyjvm/Machine.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pyjvm/Machine.py b/pyjvm/Machine.py index 9af6f13..d198104 100644 --- a/pyjvm/Machine.py +++ b/pyjvm/Machine.py @@ -455,8 +455,12 @@ def execute_code(self, frame): # if len(frame.stack) > frame.max_stack + 1: # print("MAX STACK") - if inst in OPCODES: - OPCODES[inst](self, frame) + try: + func = OPCODES[inst] + except KeyError: + pass + else: + func(self, frame) # print(frame.stack, frame.locals) frame.ip += 1 continue @@ -528,12 +532,7 @@ def execute_code(self, frame): if v1 > v2: frame.ip -= 3 frame.ip += branch - elif ( - inst == Inst.IRET - or inst == Inst.LRET - or inst == Inst.ARETURN - or inst == Inst.DRETURN - ): + elif inst in {Inst.IRET, Inst.LRET, Inst.ARETURN, Inst.DRETURN}: return frame.stack.pop() elif inst == Inst.RETURN: return From e63924219f2fc4fef29803eeeba25186114180b1 Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 12:51:42 -0700 Subject: [PATCH 4/6] More verbose failure when functions can't be found --- pyjvm/Machine.py | 37 ++++++++++++++++++++----------------- pyjvm/exceptions.py | 6 ++++++ 2 files changed, 26 insertions(+), 17 deletions(-) create mode 100644 pyjvm/exceptions.py diff --git a/pyjvm/Machine.py b/pyjvm/Machine.py index d198104..c69fe1f 100644 --- a/pyjvm/Machine.py +++ b/pyjvm/Machine.py @@ -1,6 +1,7 @@ from .ClassFile import ClassFile from .CodeAttr import CodeAttr from .Frame import Frame +from . import exceptions import struct import io @@ -458,15 +459,13 @@ def execute_code(self, frame): try: func = OPCODES[inst] except KeyError: - pass + print(f"Searching for inst: {inst}") else: func(self, frame) # print(frame.stack, frame.locals) frame.ip += 1 continue - print(f"inst: {inst}") - if inst == Inst.ISTORE_0: val = frame.stack.pop() frame.set_local(0, val) @@ -653,20 +652,24 @@ def execute_code(self, frame): frame.ip += 1 def call_function(self, methodName, *args): - cname = "/".join(methodName.split("/")[:-1]) - mname = methodName.split("/")[-1] - - if cname in self.class_files: - cf = self.class_files[cname] - for m in cf.methods: - if m.name == mname: - code = m.find_attr("Code").info - code = CodeAttr().from_reader(io.BytesIO(code)) - - frame = Frame(code, cf, self) - for i, arg in enumerate(args): - frame.set_local(i, arg) - return self.execute_code(frame) + cname, _, mname = methodName.rpartition("/") + + try: + class_file = self.class_files[cname] + except KeyError as exc: + raise exceptions.UnknownClass(cname) from exc + + for m in class_file.methods: + if m.name == mname: + code = m.find_attr("Code").info + code = CodeAttr().from_reader(io.BytesIO(code)) + + frame = Frame(code, class_file, self) + for i, arg in enumerate(args): + frame.set_local(i, arg) + return self.execute_code(frame) + else: + raise exceptions.UnknownFunction(mname) def dump(self): print("Machine Dump") diff --git a/pyjvm/exceptions.py b/pyjvm/exceptions.py new file mode 100644 index 0000000..d8736ac --- /dev/null +++ b/pyjvm/exceptions.py @@ -0,0 +1,6 @@ +class UnknownClass(KeyError): + """The given class can't be found.""" + + +class UnknownFunction(KeyError): + """The given function can't be found in its class.""" From 66184f9b77584730b47a7801b987b1e6a1631f58 Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 14:46:23 -0700 Subject: [PATCH 5/6] Easier to switch between test cases --- test.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/test.py b/test.py index ed40aed..7535a50 100755 --- a/test.py +++ b/test.py @@ -2,19 +2,29 @@ from pyjvm.Machine import Machine from pyjvm.jstdlib.StdlibLoader import load_stdlib_classes +TEST = "monteCarloPi" +RUNS = 10 + +tests = { + "iterativeFibonacci": ( + "example/IntegerTest.class", + "jvmtest/IntegerTest/iterativeFibonacci", + 100, + ), + "monteCarloPi": ("example/Hello.class", "com/gkbrk/JVMTest/Hello/monteCarloPi", 5000), +} + m = Machine() # Load stdlib load_stdlib_classes(m) # Load local classes -m.load_class_file('example/Hello.class') -#m.load_class_file('example/TestImport.class') -#m.load_class_file('example/IntegerTest.class') -#m.load_class_file('example/Rot13.class') -#m.load_class_file('example/InstanceTest.class') +class_name, func, args = tests[TEST] +m.load_class_file(class_name) # Dump machine state -#m.dump() +# m.dump() -print(m.call_function('com/gkbrk/JVMTest/Hello/monteCarloPi', 5000)) \ No newline at end of file +for i in range(RUNS): + print(m.call_function(func, args)) From 9879a1c666005d7eb370443a88f5d0aa8fefa59c Mon Sep 17 00:00:00 2001 From: Kirk Strauser Date: Sat, 11 Aug 2018 14:47:15 -0700 Subject: [PATCH 6/6] More quick opcode lookups --- pyjvm/Machine.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/pyjvm/Machine.py b/pyjvm/Machine.py index c69fe1f..66cf797 100644 --- a/pyjvm/Machine.py +++ b/pyjvm/Machine.py @@ -436,6 +436,16 @@ def do_IFGE(self, frame): frame.ip -= 3 frame.ip += branch + @opcode(Inst.IFLE) + def do_IFLE(self, frame): + v1 = frame.stack.pop() + + branch = read_signed_short(frame) + + if v1 <= 0: + frame.ip -= 3 + frame.ip += branch + @opcode(Inst.ISTORE_1) def do_ISTORE_1(self, frame): val = frame.stack.pop() @@ -446,6 +456,11 @@ def do_ISTORE_2(self, frame): val = frame.stack.pop() frame.set_local(2, val) + @opcode(Inst.ISTORE_3) + def do_ISTORE_3(self, frame): + val = frame.stack.pop() + frame.set_local(3, val) + def execute_code(self, frame): code = frame.code @@ -459,19 +474,20 @@ def execute_code(self, frame): try: func = OPCODES[inst] except KeyError: - print(f"Searching for inst: {inst}") + pass else: func(self, frame) # print(frame.stack, frame.locals) frame.ip += 1 continue + # Instructions that break out of the current loop can't be converted to methods + if inst in {Inst.IRET, Inst.LRET, Inst.ARETURN, Inst.DRETURN}: + return frame.stack.pop() + if inst == Inst.ISTORE_0: val = frame.stack.pop() frame.set_local(0, val) - elif inst == Inst.ISTORE_3: - val = frame.stack.pop() - frame.set_local(3, val) elif inst == Inst.ASTORE_0: obj = frame.stack.pop() frame.set_local(0, obj) @@ -493,14 +509,6 @@ def execute_code(self, frame): if v1 != 0: frame.ip -= 3 frame.ip += branch - elif inst == Inst.IFLE: - v1 = frame.stack.pop() - - branch = read_signed_short(frame) - - if v1 <= 0: - frame.ip -= 3 - frame.ip += branch elif inst == Inst.IF_ICMPLT: v2 = frame.stack.pop() v1 = frame.stack.pop() @@ -531,8 +539,6 @@ def execute_code(self, frame): if v1 > v2: frame.ip -= 3 frame.ip += branch - elif inst in {Inst.IRET, Inst.LRET, Inst.ARETURN, Inst.DRETURN}: - return frame.stack.pop() elif inst == Inst.RETURN: return elif inst == Inst.GETSTATIC: