From c1e6cedf68c0fdfc879c6d7289edd824ba86a654 Mon Sep 17 00:00:00 2001 From: Axel Tillequin Date: Mon, 16 Feb 2015 17:59:57 +0100 Subject: [PATCH] merge z3 interface and fast backward analysis --- README.rst | 803 +++++++++++++++++++++++++--- amoco/__init__.py | 2 + amoco/arch/arm/cpu_armv7.py | 4 +- amoco/arch/arm/cpu_armv8.py | 4 +- amoco/arch/arm/v7/asm.py | 28 +- amoco/arch/arm/v7/env.py | 36 +- amoco/arch/arm/v7/formats.py | 2 + amoco/arch/arm/v7/spec_armv7.py | 4 +- amoco/arch/arm/v7/spec_thumb.py | 6 +- amoco/arch/arm/v7/spec_thumb2.py | 10 +- amoco/arch/arm/v7/utils.py | 18 +- amoco/arch/arm/v8/asm64.py | 4 +- amoco/arch/arm/v8/env64.py | 4 +- amoco/arch/arm/v8/formats.py | 2 + amoco/arch/arm/v8/spec_armv8.py | 6 +- amoco/arch/arm/v8/utils.py | 4 +- amoco/arch/core.py | 22 +- amoco/arch/gas.py | 4 +- amoco/arch/msp430/asm.py | 6 +- amoco/arch/msp430/cpu.py | 2 + amoco/arch/msp430/env.py | 4 +- amoco/arch/msp430/formats.py | 2 + amoco/arch/msp430/parsers.py | 3 +- amoco/arch/msp430/spec_msp430.py | 6 +- amoco/arch/pic/F46K22/asm.py | 4 +- amoco/arch/pic/F46K22/env.py | 4 +- amoco/arch/pic/F46K22/formats.py | 2 + amoco/arch/pic/F46K22/spec_pic18.py | 4 +- amoco/arch/pic/cpu_pic18f46k22.py | 2 + amoco/arch/sparc/asm.py | 4 +- amoco/arch/sparc/cpu_v8.py | 2 + amoco/arch/sparc/env.py | 4 +- amoco/arch/sparc/formats.py | 2 + amoco/arch/sparc/parsers.py | 3 +- amoco/arch/sparc/spec_v8.py | 4 +- amoco/arch/sparc/utils.py | 2 + amoco/arch/x64/asm.py | 57 +- amoco/arch/x64/cpu_x64.py | 2 + amoco/arch/x64/env.py | 40 +- amoco/arch/x64/formats.py | 2 + amoco/arch/x64/spec_fpu.py | 4 +- amoco/arch/x64/spec_ia32e.py | 4 +- amoco/arch/x64/spec_sse.py | 8 +- amoco/arch/x64/utils.py | 4 +- amoco/arch/x86/asm.py | 75 ++- amoco/arch/x86/cpu_x86.py | 2 + amoco/arch/x86/env.py | 36 +- amoco/arch/x86/formats.py | 4 +- amoco/arch/x86/parsers.py | 3 +- amoco/arch/x86/spec_fpu.py | 4 +- amoco/arch/x86/spec_ia32.py | 4 +- amoco/arch/x86/spec_sse.py | 6 +- amoco/arch/x86/utils.py | 4 +- amoco/arch/z80/asm.py | 2 +- amoco/arch/z80/cpu_gb.py | 2 + amoco/arch/z80/cpu_z80.py | 2 + amoco/arch/z80/env.py | 2 +- amoco/arch/z80/formats.py | 2 + amoco/arch/z80/spec_gb.py | 2 +- amoco/arch/z80/spec_mostek.py | 2 +- amoco/cas/expressions.py | 529 +++++++++++------- amoco/cas/mapper.py | 179 +++++-- amoco/cas/parser.py | 61 +++ amoco/cas/smt.py | 120 +++++ amoco/cas/tracker.py | 4 +- amoco/cas/utils.py | 4 +- amoco/cfg.py | 90 +++- amoco/code.py | 89 ++- amoco/config.py | 6 +- amoco/logger.py | 35 +- amoco/main.py | 272 +++++++--- amoco/system/__init__.py | 2 + amoco/system/core.py | 98 ++-- amoco/system/elf.py | 8 +- amoco/system/gameboy.py | 16 +- amoco/system/leon2.py | 5 +- amoco/system/linux_arm.py | 299 +---------- amoco/system/linux_arm64.py | 4 +- amoco/system/linux_x64.py | 24 +- amoco/system/linux_x86.py | 31 +- amoco/system/loader.py | 10 +- amoco/system/msp430.py | 4 +- amoco/system/pe.py | 4 +- amoco/system/pic18.py | 16 +- amoco/system/raw.py | 2 + amoco/system/win32.py | 10 +- amoco/system/win64.py | 10 +- 87 files changed, 2182 insertions(+), 1046 deletions(-) create mode 100644 amoco/cas/parser.py create mode 100644 amoco/cas/smt.py diff --git a/README.rst b/README.rst index 88b614d..3422d73 100644 --- a/README.rst +++ b/README.rst @@ -6,9 +6,13 @@ Amoco +-----------+-----------------------------------+ | Location: | https://github.com/bdcht/amoco | +-----------+-----------------------------------+ -| Version: | 2.3 | +| Version: | 2.4 | +-----------+-----------------------------------+ +.. contents:: **Table of Contents** + :local: + :depth: 3 + :backlinks: top Description =========== @@ -43,7 +47,6 @@ It features: Amoco is still *work in progress*. See Todo_ for a list of features to be merged from develop branch or to be more thoroughly implemented. - History ======= @@ -69,8 +72,10 @@ More precisely: - x86 fpu and sse instructions semantics are not implemented, - arm SIMD, VFP, NEON, TrustZone, Jazelle instruction sets are not implemented, - pretty printers based on pygments package are not merged, -- interface to z3 solver (and associated analysis) is currently not merged, -- backward and solver-based disassembling strategies are not merged yet. +- solver-based disassembling strategies are not merged yet. +- persistent database (session) and idb import/export features are planned (Q2 2015). +- sphinx documentation is planned. +- MIPS, 6502 and PPC archs are planned. Contributions to fulfill uncomplete/unimplemented parts are welcome. @@ -82,8 +87,8 @@ Amoco is tested on python 2.7 and depends on the following python packages: - grandalf_ used for building CFG (and eventually rendering it) - crysp_ used by the generic intruction decoder (``arch/core.py``) -- z3_ (not in current release) -- pygments_ (not in current release) +- z3_ used to simplify expressions and solve constraints +- pygments_ (not in current release, planned for 2.4.2 release) - pyparsing_ for parsing instruction decoder formats - ply_ (optional), for parsing *GNU as* files @@ -91,7 +96,7 @@ Amoco is tested on python 2.7 and depends on the following python packages: Quickstart ========== -Below is a very simple example where basic blocks are built with linear sweep: +Below is a very simple example where basic blocks are build with linear sweep: .. sourcecode:: python @@ -115,14 +120,15 @@ creates a ``linux_x86.ELF`` object which shall represent the program task. >>> print p.mmap + + - + - > + > >>> p.mmap.read(0x0804a004,4) [] @@ -197,17 +203,17 @@ Lets look at the symbolic execution of this block: ebp <- { | [0:32]->0x0 | } esi <- { | [0:32]->M32(esp) | } ecx <- { | [0:32]->(esp+0x4) | } - eflags <- { | [0:1]->0x0 | [6:7]->((((esp+0x4)&0xfffffff0)==0x0) ? 0x1 : 0x0) | [12:32]->eflags[12:32] | [11:12]->0x0 | [8:11]->eflags[8:11] | [1:6]->eflags[1:6] | [7:8]->((((esp+0x4)&0xfffffff0)<0x0) ? 0x1 : 0x0) | } - ((((esp+0x4)&0xfffffff0)-0x4)) <- eax - ((((esp+0x4)&0xfffffff0)-0x8)) <- (((esp+0x4)&0xfffffff0)-0x4) - ((((esp+0x4)&0xfffffff0)-0xc)) <- edx - ((((esp+0x4)&0xfffffff0)-0x10)) <- 0x8048610 - ((((esp+0x4)&0xfffffff0)-0x14)) <- 0x80485a0 - ((((esp+0x4)&0xfffffff0)-0x18)) <- (esp+0x4) - ((((esp+0x4)&0xfffffff0)-0x1c)) <- M32(esp) - ((((esp+0x4)&0xfffffff0)-0x20)) <- 0x80484fd + eflags <- { | [0:1]->0x0 | [1:2]->eflags[1:2] | [2:3]->(0x6996>>(((esp+0x4)&0xfffffff0)[0:8]^(((esp+0x4)&0xfffffff0)[0:8]>>0x4))[0:4])[0:1] | [3:6]->eflags[3:6] | [6:7]->(((esp+0x4)&0xfffffff0)==0x0) | [7:8]->(((esp+0x4)&0xfffffff0)<0x0) | [8:11]->eflags[8:11] | [11:12]->0x0 | [12:32]->eflags[12:32] | } + ((((esp+0x4)&0xfffffff0)-4)) <- eax + ((((esp+0x4)&0xfffffff0)-8)) <- (((esp+0x4)&0xfffffff0)-0x4) + ((((esp+0x4)&0xfffffff0)-12)) <- edx + ((((esp+0x4)&0xfffffff0)-16)) <- 0x8048610 + ((((esp+0x4)&0xfffffff0)-20)) <- 0x80485a0 + ((((esp+0x4)&0xfffffff0)-24)) <- (esp+0x4) + ((((esp+0x4)&0xfffffff0)-28)) <- M32(esp) + ((((esp+0x4)&0xfffffff0)-32)) <- 0x80484fd esp <- { | [0:32]->(((esp+0x4)&0xfffffff0)-0x24) | } - ((((esp+0x4)&0xfffffff0)-0x24)) <- (eip+0x21) + ((((esp+0x4)&0xfffffff0)-36)) <- (eip+0x21) eip <- { | [0:32]->(eip+-0x10) | } >>> b.map[p.cpu.esi] @@ -224,6 +230,38 @@ When a block is instanciated, a ``mapper`` object is automatically created. This function can map any input state to an output state corresponding to the interpretation of this block. +A mapper object is now also equipped with a MemoryMap to mitigate aliasing issues +and ease updating the global mmap state. + +.. sourcecode:: python + + >>> print b.map.memory() + + + + + + + + + + > + >>> print b.map(p.cpu.mem(p.cpu.esp,64)) + { | [0:32]->(eip+0x21) | [32:64]->0x80484fd | } + >>> print b.map(p.cpu.mem(p.cpu.ebx,32)) + M32$9(ebx) + + +As shown above, reading memory in the mapper can return a compound expression. +Note also that unmapped areas are returned as symbolic mem objects. +Since aliasing between different MemoryZones is possible, the returned +symbolic expression of fetching memory at pointer ``ebx`` is special: +the ``M32$9(ebx)`` expression says "in input state, take 32 bits found at +pointer ebx *after* applying 9 possibly aliasing memory writes to the state. +More details in mapper_. + + ----- Lets try a (little) more elaborated analysis that will not only allow to @@ -238,8 +276,8 @@ the control flow graph of the program: >>> ff.policy['branch-lazy']=False >>> ff.getcfg() amoco.cas.expressions: INFO: stub __libc_start_main called - amoco.main: INFO: fforward analysis failed at block 0x8048370 - + amoco.main: INFO: fforward analysis stopped at block 0x8048370 + >>> G=_ >>> G.C [] @@ -265,13 +303,14 @@ Let's have a look at the graph instance: # --- block 0x8048370 --- 0x8048370 'ff250ca00408' jmp [@__libc_start_main] >>> print n.data.map - eip <- { | [0:32]->M32((esp+0x4)) | } + eip <- { | [0:32]->M32(esp+4) | } esp <- { | [0:32]->(esp-0x4) | } - ((esp-0x4)) <- @exit + (esp-4) <- @exit Ok, so the program counter is correctly pointing to the ``#main`` address located at offset +4 in the stack, but since the fast-forward method only look at one block, it cannot know that this location holds this address. + A little more elaborated analysis like **link-forward** would have started analysing ``#main``: @@ -280,17 +319,18 @@ A little more elaborated analysis like **link-forward** would have started analy >>> lf = amoco.lforward(p) >>> lf.getcfg() amoco.cas.expressions: INFO: stub __libc_start_main called - amoco.main: INFO: lforward analysis failed at block 0x8048483 - + amoco.main: INFO: lforward analysis stopped at block 0x80484d4 + >>> G=_ >>> print G.C - [, - , + [, + , + , ] >>> for g in G.C: ... print g.sV ... print '------' - ... + ... 0.| 1.| 2.| @@ -298,6 +338,9 @@ A little more elaborated analysis like **link-forward** would have started analy 0.| ------ 0.| + 1.| + ------ + 0.| ------ >>> print G.get_node('0x8048434').data # --- block 0x8048434 --- @@ -324,10 +367,59 @@ A little more elaborated analysis like **link-forward** would have started analy 0x804848f 'c3' ret +The **fast-backward** is another analysis that tries to evaluate the expression of +the program counter backwardly and thus reconstructs function frames in simple cases. + +.. sourcecode:: python + + >>> amoco.Log.loggers['amoco.main'].setLevel(15) + >>> z = amoco.fbackward(p) + >>> z.getcfg() + amoco.main: VERBOSE: root node 0x8048380 added + amoco.main: VERBOSE: block #PLT@__libc_start_main starts a new cfg component + amoco.cas.expressions: INFO: stub __libc_start_main called + amoco.main: VERBOSE: function f:#PLT@__libc_start_main{2} created + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: block 0x8048434 starts a new cfg component + amoco.main: VERBOSE: block 0x8048483 starts a new cfg component + amoco.main: VERBOSE: function fct_b:0x8048483{1} created + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: block 0x80484d4 starts a new cfg component + amoco.main: VERBOSE: function fct_e:0x80484d4{1} created + amoco.main: VERBOSE: pc is memory aliased in fct_e:0x80484d4{1} (assume_no_aliasing) + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: function fct_a:0x8048434{5} created + amoco.main: VERBOSE: pc is memory aliased in fct_a:0x8048434{5} (assume_no_aliasing) + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: function fct_b:0x8048483{1} called + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: block 0x8048490 starts a new cfg component + amoco.main: VERBOSE: block 0x80484ab starts a new cfg component + amoco.main: VERBOSE: block #PLT@malloc starts a new cfg component + amoco.cas.expressions: INFO: stub malloc called + amoco.main: VERBOSE: function f:#PLT@malloc{2} created + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: function fct_d:0x80484ab{3} created + amoco.main: VERBOSE: pc is memory aliased in fct_d:0x80484ab{3} (assume_no_aliasing) + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: function fct_c:0x8048490{3} created + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: edge -?-> added + amoco.main: VERBOSE: block #PLT@__stack_chk_fail starts a new cfg component + amoco.cas.expressions: INFO: stub __stack_chk_fail called + amoco.main: VERBOSE: function f:#PLT@__stack_chk_fail{2} created + amoco.main: VERBOSE: edge ---> added + amoco.main: VERBOSE: function f:0x8048380{12} created + amoco.main: VERBOSE: pc is memory aliased in f:0x8048380{12} (assume_no_aliasing) + amoco.main: INFO: fbackward analysis stopped at + amoco.main: VERBOSE: edge -?-> added + + >>> + .. ** -Overview -======== +API Overview +============ Amoco is composed of 3 packages arch_, cas_ and system_, on top of which the classes implemented in ``code.py``, ``cfg.py`` and ``main.py`` provide high-level @@ -337,6 +429,9 @@ disassembling/analysis techniques. We will now describe this architecture starting from low-level layers (arch_, cas_) up to system_ and finally to higher level classes. +A *Sphinx* generated doc will be available soon. + + arch ---- @@ -434,11 +529,10 @@ For example (in ``arch/x86/asm.py``): .. sourcecode:: python def i_CMOVcc(i,fmap): - op1 = i.operands[0] - op2 = fmap(i.operands[1]) - fmap[eip] = fmap[eip]+i.length - a = fmap(op1) - fmap[op1] = tst(fmap(i.cond[1]),op2,a) + fmap[eip] = fmap(eip)+i.length + op1 = i.operands[0] + op2 = i.operands[1] + fmap[op1] = fmap(tst(i.cond[1],op2,op1)) The function takes as input the instruction instance *i* and a ``mapper`` instance *fmap* (see cas_) and implements (an approximation of) the opcode semantics. @@ -462,9 +556,9 @@ An example follows from ``arch/x86/formats.py``: .. sourcecode:: python def mnemo(i): - mnemo = i.mnemonic.replace('cc','') - if hasattr(i,'cond'): mnemo += i.cond[0].split('/')[0] - return '{: <12}'.format(mnemo.lower()) + mnemo = i.mnemonic.replace('cc','') + if hasattr(i,'cond'): mnemo += i.cond[0].split('/')[0] + return '{: <12}'.format(mnemo.lower()) def opsize(i): s = [op.size for op in i.operands if op._is_mem] @@ -489,14 +583,14 @@ the destination register is X0/W0 : .. sourcecode:: python def alias_AND(i): - m = mnemo(i) - r = regs(i) - if i.setflags and i.d==0: - m = 'tst' - r.pop(0) - return m.ljust(12) + ', '.join(r) - - + m = mnemo(i) + r = regs(i) + if i.setflags and i.d==0: + m = 'tst' + r.pop(0) + return m.ljust(12) + ', '.join(r) + + cas --- @@ -507,12 +601,14 @@ in ``cas/expressions.py``: - Symbol ``sym``, a Constant equipped with a reference string (non-external symbol), - Register ``reg``, a fixed size CPU register **location**, - External ``ext``, a reference to an external location (external symbol), +- Floats ``cfp``, constant (fixed size) floating-point values, - Composite ``comp``, a bitvector composed of several elements, - Pointer ``ptr``, a memory **location** in a segment, with possible displacement, - Memory ``mem``, a Pointer to represent a value of fixed size in memory, - Slice ``slc``, a bitvector slice of any element, - Test ``tst``, a conditional expression, (see Tests_ below.) -- Operator ``op``, an operation on (1 or 2) elements. The list of supported operations is +- Operator ``uop``, an unary operator expression, +- Operator ``op``, a binary operator expression. The list of supported operations is not fixed althrough several predefined operators allow to build expressions directly from Python expressions: say, you don't need to write ``op('+',x,y)``, but can write ``x+y``. Supported operators are: @@ -531,7 +627,10 @@ Common attributes and methods for all elements are: - ``sf``, the True/False *sign-flag*. - ``length`` (size/8) - ``mask`` (1<>> c2 = _ >>> print c2.sf, c2 False 0xfd + >>> assert c2.bytes(1,2)==0 >>> e = c2+c.signextend(16)+5 >>> print e 0xff >>> c3 = e[0:8] >>> print c3==cst(-1,8) - 0x0 - >>> c3.sf=True - >>> print c3==cst(-1,8) 0x1 Here, after declaring an 8-bit constant with value 253, we can see that by default the @@ -592,7 +689,7 @@ Python boolean type: >>> print t 0x1 >>> if t==True: print 'OK' - ... + ... OK >>> t.size 1 @@ -627,82 +724,484 @@ or left-values (locations). More details on *locations* in mapper_. .. sourcecode:: python - >>> r1 = reg('%r1',32) - >>> print r1 - %r1 - >>> e = 2+r1 + >>> a = reg('%a',32) + >>> print a + %a + >>> e = 2+a >>> print e - (0x2+%r1) + (%a+0x2) >>> x = e-2 >>> print x - (0x0+%r1) + (%a-0x0) >>> x.simplify() >>> print _ - %r1 + %a -As shown above, elementary simplification rules are applied such that ``(2+r1)-2`` -leads to an ``op`` expression with operator ``+``, left member 0 and right member ``r1``, +As shown above, elementary simplification rules are applied such that ``(2+a)-2`` +leads to an ``op`` expression with operator ``-``, right member 0 and left member ``r1``, which eventually also simplifies further to the r1 register. -Most real simplification rules should rely on SMT solvers like z3_ [TBC]. +Most real simplification rules should rely on SMT solvers like z3_ (see smt_). Externals ~~~~~~~~~ Class ``ext`` inherit from registers as pure symbolic values but is used to represent external symbols that are equipped with a ``stub`` function. -When "called", these objects invoke their stub function. +When "called", these objects can invoke their stub function in two ways: + +- when the program counter is an ``ext`` expression, + the object invokes its __call__ method to modify the provided mapper by calling the + registered *stub* with the mapper and possibly other needed parameters. +- when used to simulate actions of *interruptions* like for example + in the semantics of ``IN/OUT`` or ``INT`` instructions which invoke the object's ``call`` + method to eventually return an expression. + (More details on ``@stub`` decorated functions are provided in system_.) Pointers and Memory objects ~~~~~~~~~~~~~~~~~~~~~~~~~~~ A ``ptr`` object is a memory **location**. These objects are generally not found -in expressions but only as mapper locations or addresses in ``mem`` objects. -These objects have a ``base`` expression, and optional ``disp`` and ``seg`` fields -to be used by MemoryZone_ objects. +in expressions but only as mapper_ locations or addresses in ``mem`` objects. +They have a ``base`` expression, a ``disp`` integer offset, +and an optional ``seg`` attribute to be used by MemoryZone_ objects. + +As illustrated below, simplification of ``ptr`` objects tends to extract constant +offsets found in the base expression to adjust the ``disp`` field. + +.. sourcecode:: python + + >>> a = reg('a',32) + >>> p = ptr(a) + >>> q = ptr(a,disp=17) + >>> print p,q + (a) (a+17) + >>> assert p+17 == q + >>> assert p+2 == q-15 + >>> assert (p+3).base == (q-5).base + + +A ``mem`` object is a symbolic memory value equipped with a pointer, a size, and +a special ``.mods`` attribute that will be discussed in mapper_. + +.. sourcecode:: python + + >>> x = mem(p,64,disp=2) + >>> y = mem(q-5,48,disp=-10) + >>> print x,y + M64(a+2) M48(a+2) + >>> assert x.bytes(4,6) == y[32:48] + + +Note: the segment attribute is currently not used by the core memory classes. -A ``mem`` object is a symbolic memory value equipped with a pointer and a size. -There is no direct relation between such expression and a MemoryZone_ state. It is -up to analysis methods to eventually update states according to such expressions. Operators ~~~~~~~~~ +Unary operators (``+``, ``-`` and ``~``) have elementary simplification rules: + +.. sourcecode:: python + + >>> a = reg('a',32) + >>> assert +a == -(-a) + >>> assert -a == 0-a + Most operations in Amoco involve left and right members sub-expressions. The operation will then usually proceed only if both member have the same size. If one member is not an expression but a Python integer, it will be implicitly "casted" to a constant of size required by the other expression member. Thus, it is possible to write ``r1+2`` and not ``r1+cst(2,32)``. +Binary operations have elementary simplification rules that try to arrange symbols +in lexical order and move constants to the right side of the expression. + +.. sourcecode:: python + + >>> a = reg('a',32) + >>> b = reg('b',32) + >>> print a+0, a*1, a^a, a*0, a&0, a|0 + a a 0x0 0x0 0x0 a + >>> print (b-a)|0 + ((-a)+b) + >>> assert b-a == (-a)+b + >>> assert -(a+b) == (-a)-b + >>> assert -(a-b) == b-a + >>> assert -(b-a) == (a-b)*1 + >>> assert -(1-a) == a-1 + >>> assert (-a+(b-1)) == b-a-1 + >>> e = -((b-1)-a) + >>> assert e == 1+(a-b) + >>> print e + ((a-b)+0x1) + >>> extract_offset(e) + (, 1) + >>> print _[0] + (a-b) + +Internal attributes and methods of ``op`` instances are: + +- ``.op``, the operator symbol (``.op.symbol``) and function (``.op.impl``), +- ``.r``, the left member sub-expression, +- ``.l``, the right member sub-expression of binary ops. +- ``.prop``, an or-ed flag indicating the kind of operators involved: + + + 1 means only arithmetic, + + 2 means only logic, + + 4 means only conditional, + + 8 means only shifts and rotations, + +- ``depth()`` returns the expression tree depth, +- ``limit(value)`` is a class method used to set a threshold parameter involved + in simplifying the expression to ``top`` when the expression's complexity is too high. + +The ``symbols_of(e)`` function returns the list of registers expressions involved in ``e``. +The ``locations_of(e)`` function returns the list of *locations* used in ``e``. +The ``complexity(e)`` function computes an arbitrary complexity measure of expression ``e`` +which is linear in depth and number of symbols, and increases by a factor of ``prop``. + Composer and Slicer ~~~~~~~~~~~~~~~~~~~ +A ``comp`` object is a composite expression corresponding to a bit-vector made of +several expression parts. +A ``slc`` object is the expression obtained by extracting a bit-vector slice out +of an expression. + +The ``composer(parts)`` function, which takes as input the parts as a list of expressions in +least-to-most significant order, is the preferred method for instanciating composite objects. +Since ``comp`` is essentially a container class for other expressions, the resulting object +is possibly of another class if some simplification occured. + +.. sourcecode:: python + + >>> composer([cst(1,8),cst(2,8),cst(3,8)]) + + >>> c=_ + >>> assert c == 0x030201 + >>> a = reg('a',32) + >>> b = reg('b',32) + >>> c = comp(24) + >>> c[0:8] = (a+b)[24:32] + >>> c[8:24] = b[0:16] + >>> print c + { | [0:8]->(a+b)[24:32] | [8:24]->b[0:16] | } + >>> c[8:16] = cst(0xff,8) + >>> print c + { | [0:8]->(a+b)[24:32] | [8:16]->0xff | [16:24]->b[8:16] | } + >>> c[0:8] = cst(0x01,8) + >>> print c + { | [0:8]->0x1 | [8:16]->0xff | [16:24]->b[8:16] | } + >>> print c.simplify() + { | [0:16]->0xff01 | [16:24]->b[8:16] | } + +As shown above, a composite instance supports dynamic asignment of any parts defined by a python +slice object. Simplification of composite objects tends to merge contiguous constant parts. + +A ``slc`` expression is obtained by using a python slice object of the form [start:stop] +where start/stop are non-negative integers in the bit range of the sliced expression. +Simplification occurs when the sliced expression is itself of class ``slc`` or ``mem``: + +.. sourcecode:: python + + >>> a = reg('%a',32) + >>> ah = slc(a,24,8,ref='%ah') + >>> assert ah.x == a + >>> print ah.pos + 24 + >>> print ah + %ah + >>> ax = a[16:32] + >>> print ax + %a[16:32] + >>> print ax[0:8] + %a[16:24] + >>> print ax[8:16] + ah + >>> y = mem(a,64) + >>> print y[16:48] + M32(%a+2) + +Note that, as shown above, slices of registers can be instanciated with an optional +reference string that is used for printing whenever the matching register slice is involved. + +Note also that parts and slices [start:stop] bounds are limited to python integers only +(indices can't be symbolic!) + + Conditionals ~~~~~~~~~~~~ +The ``tst`` class is used for conditional expressions in the form ``tst(cond, eT, eF)`` +where ``cond`` is an expression, ``eT`` is the resulting expression whenever +``cond==1`` and ``eF`` is the resulting expression whenever ``cond==0``. + +.. sourcecode:: python + + >>> t = tst(a>0, c, cst(0xdeadbe,24)) + >>> print t + ((%a>0x0) ? { | [0:16]->0xff01 | [16:24]->b[8:16] | } : 0xdeadbe) + >>> t.l[16:24] = cst(0xab,8) + >>> print t.simplify() + ((%a>0x0) ? 0xabff01 : 0xdeadbe) + >>> t.tst.l = cst(-1,32) + >>> print t + ((-0x1>0x0) ? 0xabff01 : 0xdeadbe) + >>> print t.simplify() + 0xdeadbe + + mapper ~~~~~~ A ``mapper`` object captures the symbolic operations of a sequence of instructions by -mapping left-value expressions to right-value expressions. It represents the transition -function that allows to transform a memory state into another memory state that corresponds +mapping input expressions to output *locations* which are registers or pointers. +It represents the transition function from an input state to an output state corresponding to the execution of the captured instructions. -As shown in the ``i_MOVcc`` example above, the ``fmap`` object -is the mapper on which every instruction semantics operate (see asm_). +As shown in the ``i_MOVcc`` example above, the ``fmap`` argument of every instruction semantics +is a mapper on which the instruction currently operates (see asm_). .. sourcecode:: python + >>> from amoco.arch.x86.env import * + >>> from amoco.cas.mapper import mapper >>> m = mapper() - >>> m[esp] = cst(0x10,32) + >>> m[eax] = cst(0xabff01,32) + >>> print m + eax <- { | [0:32]->0xabff01 | } + >>> print m(eax) + 0xabff01 + >>> print m(ah) + 0xff + >>> m[eax[16:32]] = bx + >>> print m + eax <- { | [0:16]->0xff01 | [16:32]->bx | } + >>> print m(ax+cx) + (cx+0xff01) + >>> print m(eax[16:32]^ecx[16:32]) + (bx^ecx[16:32]) + >>> print m(mem(ecx+2,8)) + M8(ecx+2) + >>> print m(mem(eax+2,8)) + M8({ | [0:16]->0xff01 | [16:32]->bx | }+2) + +The mapper class defines two essential methods to set and get expressions in and out. + +- ``__setitem__`` is used for mapping any expression to a location which can be a register + (or a register slice), a pointer or a memory expression. When the location is a pointer, + the base expression refers to input state values, whereas a memory expression refers to + the output state (see example below). +- ``__call__`` is used for evaluating any expression in the mapper, by replacing every + register and memory object of the expression by their mapped expressions. + +A *push* instruction could thus be implemented using: + +.. sourcecode:: python + + >>> def push(fmap,x): + ... fmap[esp] = fmap(esp)-x.length + ... fmap[mem(esp,x.size)] = x # put x at the current (updated) esp address + ... + >>> m.clear() + >>> push(m, cst(0x41414141,32)) + >>> print m + esp <- { | [0:32]->(esp-0x4) | } + (esp-4) <- 0x41414141 + >>> push(m, ebx) >>> print m + (esp-4) <- 0x41414141 + esp <- { | [0:32]->(esp-0x8) | } + (esp-8) <- ebx + +Note that a ``__getitem__`` method is implemented as well in order to fetch items +that are locations of the mapper. So here, to get the value at the top of stack, we +can do: + +.. sourcecode:: python + + >>> print m[mem(esp-8,32)] # fetch the expression associated with ptr (esp-8) + ebx + >>> print m(mem(esp,32)) # evaluates mem(esp,32) => first evaluate ptr, then fetch. + ebx + >>> print m(mem(esp+4,32)) + 0x41414141 + >>> print m[mem(esp-4,32)] + 0x41414141 + +The internal memory model of a mapper is a MemoryMap_: symbolic memory locations are related +to individual separated MemoryZone_ objects that deal with all read/write to/from location's +``ptr.base`` expression. + +.. sourcecode:: python + + >>> print m.memory() + + + > + +This model allows to access offsets that have not been explicitly written to before. +For example, if we now execute *mov ecx, [esp+2]* we still fetch the correct expression: + +.. sourcecode:: python + + >>> m[ecx] = m(mem(esp+2,32)) + >>> print m(ecx) + { | [0:16]->ebx[16:32] | [16:32]->0x4141 | } + +However, aliasing between zones is possible a must be avoided: imagine that we now +execute *mov byte ptr [eax], 0x42*, we obtain: + +.. sourcecode:: python + + >>> m[mem(eax,8)] = cst(0x42,8) + >>> print m + (esp-4) <- 0x41414141 + esp <- { | [0:32]->(esp-0x8) | } + (esp-8) <- ebx + ecx <- { | [0:16]->ebx[16:32] | [16:32]->0x4141 | } + (eax) <- 0x42 + >>> print m.memory() + + > + + > + +If we now again fetch memory at ``esp+2`` the previous answer is not valid anymore due +to a possible aliasing (overlapping) of ``eax`` and ``esp`` zones. Think of what should +the memory look like if ``eax`` value was ``esp-4`` for example. Let's try: + +.. sourcecode:: python + + >>> print m(mem(esp+2,32)) + M32$3(esp-6) + >>> mprev = mapper() + >>> mprev[eax] = esp-4 + >>> print mprev( m(mem(esp+2,32)) ) + { | [0:16]->ebx[16:32] | [16:32]->0x4142 | } + +Indeed, the mapper returns a special memory expression that embeds modifications +(saved in ``.mods`` of the mem expression) that have been applied on its memory until now, +and that must be executed in order to return a correct answer. As demonstrated above, +these mods are taken into account whenever the expression is evaluated in another mapper. + +Note that it is possible to force the mapper class to *assume no aliasing* : + +.. sourcecode:: python + + >>> print mapper.assume_no_aliasing + False + >>> mapper.assume_no_aliasing = True + >>> print m(mem(esp+2,32)) + { | [0:16]->ebx[16:32] | [16:32]->0x4141 | } + +In Amoco, a mapper instance is created for every basic block. The right +and left shift operators allow for right of left composition so that symbolic +forward or backward execution of several basic blocks is easy: + +.. sourcecode:: python + + >>> m1 = mapper() + >>> m1[eax] = ebx + >>> push(m1,eax) + >>> m2 = mapper() + >>> m2[ebx] = cst(0x33,32) + >>> push(m2,ebx) + >>> m2[eax] = m2(mem(esp,32)) + >>> print m1 + eax <- { | [0:32]->ebx | } + esp <- { | [0:32]->(esp-0x4) | } + (esp-4) <- eax + >>> print m2 + ebx <- { | [0:32]->0x33 | } + esp <- { | [0:32]->(esp-0x4) | } + (esp-4) <- ebx + eax <- { | [0:32]->ebx | } + >>> print m1>>m2 # forward execute m1 -> m2 + (esp-4) <- eax + ebx <- { | [0:32]->0x33 | } + esp <- { | [0:32]->(esp-0x8) | } + (esp-8) <- ebx + eax <- { | [0:32]->ebx | } + >>> print m2<0x33 | } + esp <- { | [0:32]->(esp-0x8) | } + (esp-8) <- ebx + eax <- { | [0:32]->ebx | } + +TODO: mapper unions. + +smt +~~~ + +Amoco uses z3_ for constraint solving by translating its equation expressions +into z3_ equivalent objects. The interface with z3_ is implemented in ``cas/smt.py``. + +- ``cst`` expressions are translated as ``BitVecVal`` objects +- ``cfp`` expressions are translated as ``RealVal`` objects +- ``reg`` expressions are translated as ``BitVec`` objects +- ``comp`` expressions use the z3_ ``Concat`` function +- ``slc`` expressions use the z3_ ``Extract`` function +- ``mem`` expressions are converted as Concat of ``Array`` of ``BitVecSort(8)``, + with current endianess taken into account. +- ``tst`` expressions use the z3_ ``If`` function +- operators are translated by propagating translations to left & right sides. + +When the ``smt`` module is imported it replaces the ``.to_smtlib()`` method of +every expression class (which by default raises UnImplementedError). + +.. sourcecode:: python + + >>> from amoco.arch.x86.env import * + >>> from amoco.cas import smt + >>> z = (eax^cst(0xcafebabe,32))+(ebx+(eax>>2)) + >>> print z + ((eax^0xcafebabe)+(ebx+(eax>>0x2))) + >>> print z.to_smtlib() + (eax ^ 3405691582) + ebx + LShR(eax, 2) + >>> print z.to_smtlib().sexpr() + (bvadd (bvxor eax #xcafebabe) ebx (bvlshr eax #x00000002)) + >>> r = smt.solver([z==cst(0x0,32),al==0xa,ah==0x84]).get_model() + >>> print r + [eax = 33802, ebx = 889299018] + >>> x,y = [r[v].as_long() for v in r] + >>> ((x^0xcafebabe)+(y+(x>>2)))&0xffffffffL + 0L + >>> p = mem(esp,32) + >>> q = mem(esp+2,32) + >>> ql = q[0:16] + >>> ph = p[16:32] + >>> z = (p^cst(0xcafebabe,32))+(q+(p>>2)) + >>> m = smt.solver().get_mapper([z==cst(0,32),esp==0x0804abcd]) + >>> print m + (esp+2) <- 0x7ffc9151 + (esp) <- 0x9151babe + esp <- { | [0:32] -> 0x0804abcd | } + + +In the ``smt`` module, the ``solver`` class is typically used to verify that some +properties hold and find a set of input (concrete) values to be set for example in +an emulator or debugger to reach a chosen branch. A solver instance can be created with +a python list of expressions, or expressions can be added afterward. + +The ``.get_model()`` method will check added contraint equations and return a +z3_ ``ModelRef`` object if the z3_ solver has returned ``z3.sat`` or None otherwise. +A list of equations to be taken into account can be provided as well with ``.add()``. + +The ``.get_mapper()`` method calls ``get_model`` and returns a mapper object with +locations set to their ``cst`` values. A list of equations can be provided here too. main.py ------- This module contains *high-level* analysis techniques implemented as classes that take a program abstraction provided by the system_ package. -Currently, only 3 simple techniques are released: + +The first 3 basic techniques are: - *linear-sweep* (``lsweep`` class) disassembles instructions without taking into account any branching instruction. @@ -724,25 +1223,170 @@ Currently, only 3 simple techniques are released: follow branch policy to avoid linear sweep and evaluates the program counter by taking into account the parent block semantics. +Other more elaborated techniques are: + +- *fast backward* (``fbackward``) inherits from ``lforward`` but evaluates the + program counter backardly by taking *first-parent* block until either the + expression is a constant target or the root node of the graph component (entry of function) + is reached. The analysis proceeds then by evaluating the pc expression in every + caller blocks, assuming that no frame-aliasing occured (pointer arguments did not + mess up with the caller's stack.) A ``func`` instance is created but its mapper + contains by default only the computed pc expression. + +- *link-backward* (``lbackward``) inherits from ``fbackward`` but walks back *all* + parent-paths up to the entry node, composing and assembling all mappers to end up + with an approximated mapper of the entire function. code.py ------- +The ``code`` module defines two main classes: + +- a ``block`` contains a list of instructions and computes the associated mapper object. + The arch-dependent CoreExec classes (see system_ below) can add ``tag`` indicators like + ``FUNC_START`` (if block looks like a function entry), ``FUNC_CALL`` if block makes a call, etc. +- a ``func`` contains the cfg graph component of a function once it has been fully + recovered by an analysis class. It inherits from ``block`` and contains a mapper that + captures an approximation of the entire function. + +blocks are created by the ``lsweep.iterblocks()`` iterator (or by ``.get_block()``) which +is inherited by all ``main`` analysis classes discussed above. Functions are created by +``fbackward`` and ``lbackward`` classes only. + +The ``xfunc`` class is used when an external expression is called. It contains a mapper +build by a ``stub`` function. Instances are present in graph nodes but have a zero length +and no address and thus do not exist in memory. + cfg.py ------ +Classes ``node``, ``link`` and ``graph`` use *grandalf* Vertex/Edge/Graph with additional +formatters or way to compare instances by name. A node's data is a block instance, and an +edge's data is possibly a set of conditional expressions. A graph connected component is +a function's control-flow graph (a *graph_core* object). +The ``graph.add_vertex`` extends Graph.add_vertex to detect that the node to be added *cuts* +an existing node and adjust the graph structure accordingly. +The ``graph.spool()`` method provides a list of the current leaves in the graph. +The ``graph.get_node(name)`` method allows to get a node object by its name. + system ------ +The system_ package is the main interface with the binary program. It contains executable +format parsers, the memory model, the execution engine, and some operating system +models responsible for mapping the binary in the memory model, setting up the environment +and taking care of system calls. + +The ``loader.py`` module is the frontend that will try to parse the input file and import the +targeted system_ and arch_ modules. If the executable format is unkown or if the input is a +bytecode python string, the binary is mapped at address 0 in a ``RawExec`` instance. + +The ``elf.py`` module implements the ``Elf32`` and ``Elf64`` classes. The ``pe.py`` module +implements the ``PE`` class which handles both PE32 and PE32+ (64-bits). + +The ``core.py`` module implements the memory model classes and the CoreExec_ generic +execution engine inherited by various system's classes like ``linux_x86.ELF``, +``linux_arm.ELF`` or ``win32.PE`` and ``win64.PE``. + MemoryZone ~~~~~~~~~~ +The memory model in Amoco is implemented by the MemoryMap class in ``system/core.py``. Instance +of MemoryMap are created by the system's CoreExec classes and by every block's mapper_ objects. +This model associates memory locations with raw bytes or symbolic expressions in separated *zones* +implemented by the MemoryZone_ class. +Each zone is associated with a symbolic location reference, the default ``None`` reference zone +being used for concrete (cst) locations. +In a MemoryZone_, an *address* is an integer offset to the reference location expression, and +the associated *value* is a ``mo`` memory object that stores bytes or an expression wrapped in +a ``datadiv`` object. + CoreExec ~~~~~~~~ +The execution engine core class is the users's frontend to the binary. It is responsible for +creating a MemoryMap with the binary image, reading data in memory, or reading instructions +at some address by calling ``cpu.disassemble()``. + stubs ~~~~~ +System calls and externals are emulated by implementing ``stubs`` that modify a mapper instance. A *stub* +is a Python function decorated with ``@stub``. For example, for example in +the *Linux* system (see ``linux_x86.py``), the *__libc_start_main* is approximated by: + +.. sourcecode:: python + + @stub + def __libc_start_main(m,**kargs): + m[cpu.eip] = m(cpu.mem(cpu.esp+4,32)) + cpu.push(m,cpu.ext('exit',size=32)) + +The default stub performs only a ``ret``-like instruction. + +Licence +======= + +Please see `LICENSE`_. + + +Changelog +========= + +- `v2.4.0`_ + + * merge Z3 solver interface, see smt.py and smtlib() exp method + * merge fbackward analysis and code func class. + * improve expressions: separate unary and binary ops, "normalize" expressions + * improve mapper with memory() method and aliasing-resistant composition operators + * improve MemoryZone class: return top expression parts instead of raising MemoryError. + * adding RawExec class for shellcode-like input + * support string input in ELF/PE classes. + * fix various x86/x64 bugs + * protect against resizing of env registers + * add win64 loader + * adjust log levels and optional file from conf + * update README + +- `v2.3.5`_ + + * add x64 arch + full x86/64 SSE decoder + * hotfix x86/x64 inversion of {88}/{8a} mov instructions + * fix various x86 decoders and semantics + * code cosmetics + +- `v2.3.4`_ + + * merge armv7/thumb fixed semantics + * add x86 fpu decoders + * add locate function in MemoryMap + * Fix core read_instruction on map boundary + * Fix PE import parsing and TLS Table builder + * faster generic decoder + * hotfix various x86 decoders + * add some x86 SSE decoders + +- `v2.3.3`_ + + * support for MSP430 and PIC18 microcontrollers + * fix sparc rett, udiv/sdiv and formats + * fix x86 jcxz instruction decoding + +- `v2.3.2`_ + + * merge z80/GB architecture, fix sparc reported issues + * add example of SSE2 decoding (fixed) + +- `v2.3.1`_ + + * add licence file + * fix sparc architecture + * avoid ptr expression when address is not deref + * fix eqn_helpers simplifier rules + * README updated + * new PE class (tested on CoST.exe) + support for multiple entrypoints. + + .. _grandalf: https://github.com/bdcht/grandalf .. _crysp: https://github.com/bdcht/crysp .. _minisat: http://minisat.se/ @@ -751,3 +1395,10 @@ stubs .. _armv8: http://www.cs.utexas.edu/~peterson/arm/DDI0487A_a_armv8_arm_errata.pdf .. _pyparsing: http://pyparsing.wikispaces.com/ .. _ply: http://www.dabeaz.com/ply/ +.. _LICENSE: https://github.com/bdcht/amoco/blob/release/LICENSE +.. _v2.4.0: https://github.com/bdcht/amoco/releases/tag/v2.4.0 +.. _v2.3.5: https://github.com/bdcht/amoco/releases/tag/v2.3.5 +.. _v2.3.4: https://github.com/bdcht/amoco/releases/tag/v2.3.4 +.. _v2.3.3: https://github.com/bdcht/amoco/releases/tag/v2.3.3 +.. _v2.3.2: https://github.com/bdcht/amoco/releases/tag/v2.3.2 +.. _v2.3.1: https://github.com/bdcht/amoco/releases/tag/v2.3.1 diff --git a/amoco/__init__.py b/amoco/__init__.py index 204623d..63caa76 100644 --- a/amoco/__init__.py +++ b/amoco/__init__.py @@ -1,2 +1,4 @@ +# -*- coding: utf-8 -*- + from .config import conf from .main import * diff --git a/amoco/arch/arm/cpu_armv7.py b/amoco/arch/arm/cpu_armv7.py index 8854c76..a94001a 100644 --- a/amoco/arch/arm/cpu_armv7.py +++ b/amoco/arch/arm/cpu_armv7.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.arch.arm.v7.asm import * diff --git a/amoco/arch/arm/cpu_armv8.py b/amoco/arch/arm/cpu_armv8.py index a8b0d71..6e4d361 100644 --- a/amoco/arch/arm/cpu_armv8.py +++ b/amoco/arch/arm/cpu_armv8.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.arch.arm.v8.asm64 import * diff --git a/amoco/arch/arm/v7/asm.py b/amoco/arch/arm/v7/asm.py index 321fe6a..b51ce21 100644 --- a/amoco/arch/arm/v7/asm.py +++ b/amoco/arch/arm/v7/asm.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * @@ -32,7 +34,7 @@ def __check_state(i,fmap): if address._is_cst: raise InstructionError(i) else: - logger.warning('impossible to check isetstate (ARM/Thumb) until pc is cst') + logger.verbose('impossible to check isetstate (ARM/Thumb) until pc is cst') def __pre(i,fmap): fmap[pc] = fmap(pc+i.length) @@ -49,6 +51,7 @@ def __pre(i,fmap): return cond,dest,op1 def __setflags(fmap,cond,cout,result,overflow=None): + if cout is None: cout = fmap(C) fmap[C] = tst(cond,cout,fmap(C)) fmap[Z] = tst(cond,(result==0),fmap(Z)) fmap[N] = tst(cond,(result<0),fmap(N)) @@ -164,13 +167,13 @@ def i_BIC(i,fmap): __setflags(fmap,cond,cout,result) def i_CMN(i,fmap): - cond,dest,op1,op2 = __pre(i,fmap) - result,cout,overflow = AddWithCarry(fmap(op1),fmap(op2)) + cond,dest,op1 = __pre(i,fmap) + result,cout,overflow = AddWithCarry(fmap(dest),fmap(op1)) __setflags(fmap,cond,cout,result,overflow) def i_CMP(i,fmap): - cond,dest,op1,op2 = __pre(i,fmap) - result,cout,overflow = SubWithBorrow(fmap(op1),fmap(op2)) + cond,dest,op1 = __pre(i,fmap) + result,cout,overflow = SubWithBorrow(fmap(dest),fmap(op1)) __setflags(fmap,cond,cout,result,overflow) def i_EOR(i,fmap): @@ -270,15 +273,15 @@ def i_SUB(i,fmap): __setflags(fmap,cond,cout,result,overflow) def i_TEQ(i,fmap): - cond,dest,op1,op2 = __pre(i,fmap) - result = fmap(op1 ^ op2) - cout = fmap(op2.bit(31)) + cond,dest,op1 = __pre(i,fmap) + result = fmap(dest ^ op1) + cout = fmap(op1.bit(31)) __setflags(fmap,cond,cout,result) def i_TST(i,fmap): - cond,dest,op1,op2 = __pre(i,fmap) - result = fmap(op1 & op2) - cout = fmap(op2.bit(31)) + cond,dest,op1 = __pre(i,fmap) + result = fmap(dest & op1) + cout = fmap(op1.bit(31)) __setflags(fmap,cond,cout,result) # shifts (4.4.2) @@ -864,6 +867,7 @@ def i_PLI(i,fmap): def i_SETEND(i,fmap): fmap[pc] = fmap(pc+i.length) internals['endianstate'] = 1 if i.set_bigend else 0 + exp.setendian(-1 if i.set_bigend else +1) # event hint def i_SEV(i,fmap): diff --git a/amoco/arch/arm/v7/env.py b/amoco/arch/arm/v7/env.py index f1ed206..028cbe6 100644 --- a/amoco/arch/arm/v7/env.py +++ b/amoco/arch/arm/v7/env.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: @@ -11,22 +13,22 @@ #registers (application level, see B1.3.2) : #------------------------------------------- -r0 = reg('r0',32) # -r1 = reg('r1',32) # -r2 = reg('r2',32) # -r3 = reg('r3',32) # -r4 = reg('r4',32) # -r5 = reg('r5',32) # -r6 = reg('r6',32) # -r7 = reg('r7',32) # -r8 = reg('r8',32) # -r9 = reg('r9',32) # -r10 = reg('r10',32) # -r11 = reg('r11',32) # -r12 = reg('r12',32) # -r13 = reg('r13',32) # -r14 = reg('r14',32) # -r15 = reg('r15',32) # +r0 = reg('r0',32) # +r1 = reg('r1',32) # +r2 = reg('r2',32) # +r3 = reg('r3',32) # +r4 = reg('r4',32) # +r5 = reg('r5',32) # +r6 = reg('r6',32) # +r7 = reg('r7',32) # +r8 = reg('r8',32) # +r9 = reg('r9',32) # +r10 = reg('r10',32) # +r11 = reg('r11',32) # +r12 = reg('r12',32) # +r13 = reg('r13',32) # +r14 = reg('r14',32) # +r15 = reg('r15',32) # regs = [r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15] diff --git a/amoco/arch/arm/v7/formats.py b/amoco/arch/arm/v7/formats.py index 14cfaba..39ce1ba 100644 --- a/amoco/arch/arm/v7/formats.py +++ b/amoco/arch/arm/v7/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from .env import * from .utils import * from amoco.arch.core import Formatter diff --git a/amoco/arch/arm/v7/spec_armv7.py b/amoco/arch/arm/v7/spec_armv7.py index 81663e2..b65175b 100644 --- a/amoco/arch/arm/v7/spec_armv7.py +++ b/amoco/arch/arm/v7/spec_armv7.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/arm/v7/spec_thumb.py b/amoco/arch/arm/v7/spec_thumb.py index a942f48..5621be3 100644 --- a/amoco/arch/arm/v7/spec_thumb.py +++ b/amoco/arch/arm/v7/spec_thumb.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. @@ -117,7 +117,7 @@ def A_default(obj,Rm): obj.type = type_data_processing obj.cond = env.CONDITION_AL -@ispec("16[ 1010 0 Rd(3) imm8(8) ]", mnemonic="ADR") +@ispec("16[ 1010 0 Rd(3) imm8(8) ]", mnemonic="ADR", add=True) def A_adr(obj,Rd,imm8): obj.d = env.regs[Rd] obj.imm32 = env.cst(imm8<<2,32) diff --git a/amoco/arch/arm/v7/spec_thumb2.py b/amoco/arch/arm/v7/spec_thumb2.py index ea6ff35..d9def98 100644 --- a/amoco/arch/arm/v7/spec_thumb2.py +++ b/amoco/arch/arm/v7/spec_thumb2.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. @@ -147,8 +149,8 @@ def A_bits(obj,Rn,imm3,Rd,imm2,msb): @ispec("32[ 11 J1 0 J2 #imm10L(10) 0 11110 S #imm10H(10) ]", mnemonic="BLX") def A_label(obj,S,imm10H,J1,J2,imm10L): - I1, I2 = str(~(J1^S)), str(~(J2^S)) - v = int(S+I1+I2+imm10H+imm10L+'00',2) + I1, I2 = str(~(J1^S)&0x1), str(~(J2^S)&0x1) + v = int(str(S)+I1+I2+imm10H+imm10L+'00',2) obj.imm32 = env.cst(v,25).signextend(32) obj.operands = [obj.imm32] obj.type = type_control_flow @@ -564,7 +566,7 @@ def A_reglist(obj,Rt): obj.cond = env.CONDITION_AL @ispec("32[ 0 #M 0 #register_list(13) 11101 00 100 1 0 1101 ]", mnemonic="PUSH") -def A_reglist(obj,P,M,register_list): +def A_reglist(obj,M,register_list): obj.registers = [env.regs[i] for i,r in enumerate(register_list[::-1]+'0'+M+'0') if r=='1'] if len(obj.registers)<2: raise InstructionError(obj) obj.operands = [obj.registers] diff --git a/amoco/arch/arm/v7/utils.py b/amoco/arch/arm/v7/utils.py index cfd7f35..aeeb5c6 100644 --- a/amoco/arch/arm/v7/utils.py +++ b/amoco/arch/arm/v7/utils.py @@ -1,12 +1,14 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.cas.expressions import * def LSL_C(x,shift): - assert shift>0 - carry_out = x.bit(-shift) + assert shift>=0 + carry_out = x.bit(-shift) if shift>0 else None return (x<0 + assert shift>=0 + if shift==0: return (x,None) carry_out = x.bit(shift-1) if shift>shift,carry_out) @@ -23,16 +26,17 @@ def LSR(x,shift): return x>>shift def ASR_C(x,shift): - assert shift>0 + assert shift>=0 n = x.size xx = x.signextend(n+shift) - return (xx[shift:shift+n-1],xx.bit(shift-1)) + carry_out = xx.bit(shift-1) if shift>0 else None + return (xx[shift:shift+n],carry_out) def ASR(x,shift): assert shift>=0 n = x.size xx = x.signextend(n+shift) - return xx[shift:shift+n-1] + return xx[shift:shift+n] def ROR_C(x,shift): assert shift != 0 diff --git a/amoco/arch/arm/v8/asm64.py b/amoco/arch/arm/v8/asm64.py index 3983e87..9bb83a6 100644 --- a/amoco/arch/arm/v8/asm64.py +++ b/amoco/arch/arm/v8/asm64.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.logger import Log diff --git a/amoco/arch/arm/v8/env64.py b/amoco/arch/arm/v8/env64.py index 22b37d7..07b2998 100644 --- a/amoco/arch/arm/v8/env64.py +++ b/amoco/arch/arm/v8/env64.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: diff --git a/amoco/arch/arm/v8/formats.py b/amoco/arch/arm/v8/formats.py index 7d93163..d7ed75e 100644 --- a/amoco/arch/arm/v8/formats.py +++ b/amoco/arch/arm/v8/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from .env64 import * from .utils import * from amoco.arch.core import Formatter diff --git a/amoco/arch/arm/v8/spec_armv8.py b/amoco/arch/arm/v8/spec_armv8.py index f5905e7..1065d7f 100644 --- a/amoco/arch/arm/v8/spec_armv8.py +++ b/amoco/arch/arm/v8/spec_armv8.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. @@ -41,7 +43,7 @@ def ExtendReg(r,etype,shift=0): return r[0:l].extend(signed,N)< will decode 4 bits at position [28,29,30,31] and provide this value as an integer # in 'obj.cond' instruction instance attribute. # => will decode 24 bits at positions 23..0 and provide this value as an integer as -# argument 'imm24' of the decorated function f. +# argument 'imm24' of the decorated function f. # => will set obj.mnemonic to 'BL' and pass argument _flag=True to f. -# => will call f(obj,...) +# => will call f(obj,...) # => will return obj # additional arguments to ispec decorator **must** be provided with symbol=value form and @@ -257,6 +256,7 @@ def __call__(self,bytestring,**kargs): # with value 'BL' when the function is called. # ----------------------------------------- class ispec(object): + __slots__ = ['format','iattr','fargs','ast','fix','mask','pfx','size','hook'] def __init__(self,format,**kargs): self.format = format diff --git a/amoco/arch/gas.py b/amoco/arch/gas.py index f24d727..2dc5486 100644 --- a/amoco/arch/gas.py +++ b/amoco/arch/gas.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license try: diff --git a/amoco/arch/msp430/asm.py b/amoco/arch/msp430/asm.py index 4fd36a8..1e02b4e 100644 --- a/amoco/arch/msp430/asm.py +++ b/amoco/arch/msp430/asm.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * @@ -13,7 +13,7 @@ def autoinc(i,fmap): sz = 2 if i.BW else 1 if rr is not None: fmap[rr] = fmap(rr+sz) -# Ref: MSP430x1xx Family Users's Guide (Rev. F) +# Ref: MSP430x1xx Family Users's Guide (Rev. F) #------------------------------------------------------------------------------ def i_MOV(i,fmap): diff --git a/amoco/arch/msp430/cpu.py b/amoco/arch/msp430/cpu.py index 7df0cd1..e5af80a 100644 --- a/amoco/arch/msp430/cpu.py +++ b/amoco/arch/msp430/cpu.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.msp430.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/msp430/env.py b/amoco/arch/msp430/env.py index 659342e..21d628b 100644 --- a/amoco/arch/msp430/env.py +++ b/amoco/arch/msp430/env.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: diff --git a/amoco/arch/msp430/formats.py b/amoco/arch/msp430/formats.py index 8bda531..6ebc64a 100644 --- a/amoco/arch/msp430/formats.py +++ b/amoco/arch/msp430/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from .env import * from amoco.arch.core import Formatter diff --git a/amoco/arch/msp430/parsers.py b/amoco/arch/msp430/parsers.py index f4fc054..8eeea30 100644 --- a/amoco/arch/msp430/parsers.py +++ b/amoco/arch/msp430/parsers.py @@ -1,7 +1,8 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import pyparsing as pp diff --git a/amoco/arch/msp430/spec_msp430.py b/amoco/arch/msp430/spec_msp430.py index ff1ffc4..7732764 100644 --- a/amoco/arch/msp430/spec_msp430.py +++ b/amoco/arch/msp430/spec_msp430.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # ref: MSP430x1xx User's Guide. @@ -15,7 +15,7 @@ # instruction MSP430 decoders #------------------------------------------------------- -# get operand type/value based on addressing mode: +# get operand type/value based on addressing mode: def getopd(obj,mode,reg,data,CGR=False): r = env.R[reg] size = 8 if obj.BW else 16 diff --git a/amoco/arch/pic/F46K22/asm.py b/amoco/arch/pic/F46K22/asm.py index f253a77..1f70a65 100644 --- a/amoco/arch/pic/F46K22/asm.py +++ b/amoco/arch/pic/F46K22/asm.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * diff --git a/amoco/arch/pic/F46K22/env.py b/amoco/arch/pic/F46K22/env.py index e6b29ee..a4a1717 100644 --- a/amoco/arch/pic/F46K22/env.py +++ b/amoco/arch/pic/F46K22/env.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: diff --git a/amoco/arch/pic/F46K22/formats.py b/amoco/arch/pic/F46K22/formats.py index 0076d7c..34b399a 100644 --- a/amoco/arch/pic/F46K22/formats.py +++ b/amoco/arch/pic/F46K22/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from .env import * from amoco.arch.core import Formatter diff --git a/amoco/arch/pic/F46K22/spec_pic18.py b/amoco/arch/pic/F46K22/spec_pic18.py index 744b8c1..b41f825 100644 --- a/amoco/arch/pic/F46K22/spec_pic18.py +++ b/amoco/arch/pic/F46K22/spec_pic18.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.arch.pic.F46K22 import env diff --git a/amoco/arch/pic/cpu_pic18f46k22.py b/amoco/arch/pic/cpu_pic18f46k22.py index 62814d0..4e5ea1f 100644 --- a/amoco/arch/pic/cpu_pic18f46k22.py +++ b/amoco/arch/pic/cpu_pic18f46k22.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.pic.F46K22.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/sparc/asm.py b/amoco/arch/sparc/asm.py index f3e55cb..e4f363c 100644 --- a/amoco/arch/sparc/asm.py +++ b/amoco/arch/sparc/asm.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * diff --git a/amoco/arch/sparc/cpu_v8.py b/amoco/arch/sparc/cpu_v8.py index 65a3004..561eb37 100644 --- a/amoco/arch/sparc/cpu_v8.py +++ b/amoco/arch/sparc/cpu_v8.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.sparc.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/sparc/env.py b/amoco/arch/sparc/env.py index fb3c782..3f6116b 100644 --- a/amoco/arch/sparc/env.py +++ b/amoco/arch/sparc/env.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: diff --git a/amoco/arch/sparc/formats.py b/amoco/arch/sparc/formats.py index aee73a6..862de7f 100644 --- a/amoco/arch/sparc/formats.py +++ b/amoco/arch/sparc/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from .env import * from .utils import * from amoco.arch.core import Formatter diff --git a/amoco/arch/sparc/parsers.py b/amoco/arch/sparc/parsers.py index 7720ad4..5090ccf 100644 --- a/amoco/arch/sparc/parsers.py +++ b/amoco/arch/sparc/parsers.py @@ -1,7 +1,8 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import pyparsing as pp diff --git a/amoco/arch/sparc/spec_v8.py b/amoco/arch/sparc/spec_v8.py index a5bce60..911d39b 100644 --- a/amoco/arch/sparc/spec_v8.py +++ b/amoco/arch/sparc/spec_v8.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/sparc/utils.py b/amoco/arch/sparc/utils.py index 738f37c..4daf4c5 100644 --- a/amoco/arch/sparc/utils.py +++ b/amoco/arch/sparc/utils.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + CONDB = { 0b1000: 'ba', 0b0000: 'bn', diff --git a/amoco/arch/x64/asm.py b/amoco/arch/x64/asm.py index b2e37a9..9c8dfdb 100644 --- a/amoco/arch/x64/asm.py +++ b/amoco/arch/x64/asm.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * @@ -13,12 +13,12 @@ #------------------------------------------------------------------------------ # utils : def push(fmap,x): - fmap[rsp] = fmap[rsp]-x.length + fmap[rsp] = fmap(rsp-x.length) fmap[mem(rsp,x.size)] = x def pop(fmap,l): fmap[l] = fmap(mem(rsp,l.size)) - fmap[rsp] = fmap[rsp]+l.length + fmap[rsp] = fmap(rsp+l.length) def parity(x): x = x.zeroextend(64) @@ -78,13 +78,13 @@ def i_RET(i,fmap): pop(fmap,rip) def i_HLT(i,fmap): - ext('halt').call(fmap) + fmap[rip] = top(64) #------------------------------------------------------------------------------ def _ins_(i,fmap,l): counter = cx if i.misc['adrsz'] else rcx - loc = mem(fmap(rdi),l*8) - src = ext('IN%s'%fmap(dx),l*8).call(fmap) + loc = mem(rdi,l*8) + src = ext('IN',size=l*8).call(port=fmap(dx)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -105,13 +105,11 @@ def i_INSD(i,fmap): def _outs_(i,fmap,l): counter = cx if i.misc['adrsz'] else rcx src = fmap(mem(rsi,l*8)) - loc = ext('OUT%s'%fmap(dx),l*8).call(fmap) + ext('OUT').call(fmap,src=fmap(mem(rsi,l*8))) if i.misc['rep']: - fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 fmap[rip] = tst(fmap(counter)==0, fmap[rip]+i.length, fmap[rip]) else: - fmap[loc] = src fmap[rip] = fmap[rip]+i.length fmap[rdi] = tst(fmap(df),fmap(rdi)-l,fmap(rdi)+l) @@ -124,8 +122,7 @@ def i_OUTSD(i,fmap): #------------------------------------------------------------------------------ def i_INT3(i,fmap): - fmap[rip] = fmap[rip]+i.length - ext('INT3').call(fmap) + fmap[rip] = ext('INT3',size=64) def i_CLC(i,fmap): fmap[rip] = fmap[rip]+i.length @@ -188,8 +185,8 @@ def i_POPFQ(i,fmap): #------------------------------------------------------------------------------ def _cmps_(i,fmap,l): counter,d,s = ecx,edi,esi if i.misc['adrsz'] else rcx,rdi,rsi - dst = mem(fmap(d),l*8) - src = mem(fmap(s),l*8) + dst = fmap(mem(d,l*8)) + src = fmap(mem(s,l*8)) x, carry, overflow = SubWithBorrow(dst,src) if i.misc['rep']: fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(dst,src)) @@ -208,8 +205,8 @@ def _cmps_(i,fmap,l): fmap[cf] = carry fmap[of] = overflow fmap[rip] = fmap[rip]+i.length - fmap[d] = tst(fmap(df),fmap(d)-l,fmap(d)+l) - fmap[s] = tst(fmap(df),fmap(s)-l,fmap(s)+l) + fmap[d] = fmap(tst(df,d-l,d+l)) + fmap[s] = fmap(tst(df,s-l,s+l)) def i_CMPSB(i,fmap): _cmps_(i,fmap,1) @@ -223,8 +220,8 @@ def i_CMPSQ(i,fmap): #------------------------------------------------------------------------------ def _scas_(i,fmap,l): counter,d = ecx,edi if i.misc['adrsz'] else rcx,rdi - a = {1:al, 2:ax, 4:eax, 8:rax}[l] - src = mem(fmap(d),l*8) + a = fmap({1:al, 2:ax, 4:eax, 8:rax}[l]) + src = fmap(mem(d,l*8)) x, carry, overflow = SubWithBorrow(a,src) if i.misc['rep']: fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(a,src)) @@ -258,7 +255,7 @@ def i_SCASQ(i,fmap): def _lods_(i,fmap,l): counter,s = (ecx,esi) if i.misc['adrsz'] else (rcx,rsi) loc = {1:al, 2:ax, 4:eax, 8:rax}[l] - src = mem(fmap(s),l*8) + src = fmap(mem(s,l*8)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -266,7 +263,7 @@ def _lods_(i,fmap,l): else: fmap[loc] = src fmap[rip] = fmap[rip]+i.length - fmap[s] = tst(fmap(df),fmap(s)-l,fmap(s)+l) + fmap[s] = fmap(tst(df,s-l,s+l)) def i_LODSB(i,fmap): _lods_(i,fmap,1) @@ -283,8 +280,8 @@ def _stos_(i,fmap,l): counter,d = ecx,edi else: counter,d = rcx,rdi - src = {1:al, 2:ax, 4:eax, 8:rax}[l] - loc = mem(fmap(d),l*8) + loc = mem(d,l*8) + src = fmap({1:al, 2:ax, 4:eax, 8:rax}[l]) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -309,8 +306,8 @@ def _movs_(i,fmap,l): counter,d,s = ecx,edi,esi else: counter,d,s = rcx,rdi,rsi - loc = mem(fmap(d),l*8) - src = mem(fmap(s),l*8) + loc = mem(d,l*8) + src = fmap(mem(s,l*8)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -343,14 +340,14 @@ def i_OUT(i,fmap): op2 = fmap(i.operands[1]) ext('OUT%s'%op1).call(fmap,arg=op2) -#op1_src retreives fmap[op1] (op1 value): +#op1_src retreives fmap[op1] (op1 value): def i_PUSH(i,fmap): fmap[rip] = fmap[rip]+i.length op1 = fmap(i.operands[0]) if op1.size==8: op1 = op1.signextend(64) push(fmap,op1) -#op1_dst retreives op1 location: +#op1_dst retreives op1 location: def i_POP(i,fmap): fmap[rip] = fmap[rip]+i.length op1 = i.operands[0] @@ -362,8 +359,7 @@ def i_CALL(i,fmap): op1 = fmap(i.operands[0]) op1 = op1.signextend(pc.size) target = pc+op1 if not i.misc['absolute'] else op1 - if target._is_ext: target.call(fmap) - else: fmap[rip] = target + fmap[rip] = target def i_CALLF(i,fmap): @@ -375,8 +371,7 @@ def i_JMP(i,fmap): op1 = fmap(i.operands[0]) op1 = op1.signextend(pc.size) target = pc+op1 if not i.misc['absolute'] else op1 - if target._is_ext: target.call(fmap) - else: fmap[rip] = target + fmap[rip] = target def i_JMPF(i,fmap): logger.verbose('%s semantic is not defined'%i.mnemonic) @@ -434,7 +429,7 @@ def i_INT(i,fmap): fmap[rip] = fmap[rip]+i.length op1 = fmap(i.operands[0]) push(fmap,fmap[rip]) - ext('INT%s'%op1).call(fmap) + fmap[eip] = ext('INT',port=op1,size=64) def i_INC(i,fmap): op1 = i.operands[0] diff --git a/amoco/arch/x64/cpu_x64.py b/amoco/arch/x64/cpu_x64.py index 5dcc056..922f6e6 100644 --- a/amoco/arch/x64/cpu_x64.py +++ b/amoco/arch/x64/cpu_x64.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.x64.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/x64/env.py b/amoco/arch/x64/env.py index 10ac0c0..b4187fd 100644 --- a/amoco/arch/x64/env.py +++ b/amoco/arch/x64/env.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: @@ -10,15 +10,15 @@ # 64bits registers : #------------------- -rax = reg('rax',64) # accumulator for operands and results data -rbx = reg('rbx',64) # pointer to data in the DS segment -rcx = reg('rcx',64) # counter for string and loop operations -rdx = reg('rdx',64) # I/O pointer -rbp = reg('rbp',64) # pointer to data in the stack (SS segment) -rsp = reg('rsp',64) # stack pointer (SS segment) -rsi = reg('rsi',64) # ptr to data in segment pointed by DS; src ptr for strings -rdi = reg('rdi',64) # ptr to data in segment pointed by ES; dst ptr for strings -rip = reg('rip',64) # instruction pointer in 64 bit mode +rax = reg('rax',64) # accumulator for operands and results data +rbx = reg('rbx',64) # pointer to data in the DS segment +rcx = reg('rcx',64) # counter for string and loop operations +rdx = reg('rdx',64) # I/O pointer +rbp = reg('rbp',64) # pointer to data in the stack (SS segment) +rsp = reg('rsp',64) # stack pointer (SS segment) +rsi = reg('rsi',64) # ptr to data in segment pointed by DS; src ptr for strings +rdi = reg('rdi',64) # ptr to data in segment pointed by ES; dst ptr for strings +rip = reg('rip',64) # instruction pointer in 64 bit mode rflags = reg('rflags',64) @@ -59,19 +59,19 @@ dh = slc(rdx,8,8,'dh') cf = slc(rflags,0,1,'cf') # carry/borrow flag -pf = slc(rflags,2,1,'pf') # parity flag -zf = slc(rflags,6,1,'zf') # zero flag -sf = slc(rflags,7,1,'sf') # sign flag -df = slc(rflags,10,1,'df') # direction flag -of = slc(rflags,11,1,'of') # overflow flag +pf = slc(rflags,2,1,'pf') # parity flag +zf = slc(rflags,6,1,'zf') # zero flag +sf = slc(rflags,7,1,'sf') # sign flag +df = slc(rflags,10,1,'df') # direction flag +of = slc(rflags,11,1,'of') # overflow flag # segment registers & other mappings: cs = reg('cs',16) # segment selector for the code segment -ds = reg('ds',16) # segment selector to a data segment +ds = reg('ds',16) # segment selector to a data segment ss = reg('ss',16) # segment selector to the stack segment -es = reg('es',16) # (data) -fs = reg('fs',16) # (data) -gs = reg('gs',16) # (data) +es = reg('es',16) # (data) +fs = reg('fs',16) # (data) +gs = reg('gs',16) # (data) r8 = reg('r8',64); r8d = slc(r8,0,32,'r8d'); r8w = slc(r8,0,16,'r8w'); r8l = slc(r8,0,8,'r8l') r9 = reg('r9',64); r9d = slc(r9,0,32,'r9d'); r9w = slc(r9,0,16,'r9w'); r9l = slc(r9,0,8,'r9l') diff --git a/amoco/arch/x64/formats.py b/amoco/arch/x64/formats.py index 3e4f69c..238bab3 100644 --- a/amoco/arch/x64/formats.py +++ b/amoco/arch/x64/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.core import Formatter def pfx(i): diff --git a/amoco/arch/x64/spec_fpu.py b/amoco/arch/x64/spec_fpu.py index f1f76f6..811524f 100644 --- a/amoco/arch/x64/spec_fpu.py +++ b/amoco/arch/x64/spec_fpu.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/x64/spec_ia32e.py b/amoco/arch/x64/spec_ia32e.py index 88e6e6f..ee6313e 100644 --- a/amoco/arch/x64/spec_ia32e.py +++ b/amoco/arch/x64/spec_ia32e.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/x64/spec_sse.py b/amoco/arch/x64/spec_sse.py index ca1c209..1cb91a7 100644 --- a/amoco/arch/x64/spec_sse.py +++ b/amoco/arch/x64/spec_sse.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. @@ -111,7 +111,7 @@ def sse_ps(obj,Mod,REG,RM,data): obj.misc['REX'] = None op2,data = getModRM(obj,Mod,RM,data) if not op2._is_reg: raise InstructionError(obj) - op1 = env.getreg(REG,op2.size) # + op1 = env.getreg(REG,op2.size) # obj.operands = [op1,op2] obj.type = type_data_processing @@ -545,7 +545,7 @@ def sse_sd(obj,Mod,REG,RM,data): obj.operands = [op1,op2] obj.type = type_data_processing -# 66 prefixed : +# 66 prefixed : # ------------- # Note that thos specs MUST APPEAR AFTER f2/f3 prefixes which have priority over 66, # so that 66-related specs will be matched after identical f2/f3 specs diff --git a/amoco/arch/x64/utils.py b/amoco/arch/x64/utils.py index be7e596..2464dca 100644 --- a/amoco/arch/x64/utils.py +++ b/amoco/arch/x64/utils.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/x86/asm.py b/amoco/arch/x86/asm.py index b8be8fd..4639d6b 100644 --- a/amoco/arch/x86/asm.py +++ b/amoco/arch/x86/asm.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from .env import * @@ -13,12 +13,12 @@ #------------------------------------------------------------------------------ # utils : def push(fmap,x): - fmap[esp] = fmap[esp]-x.length + fmap[esp] = fmap(esp-x.length) fmap[mem(esp,x.size)] = x def pop(fmap,l): fmap[l] = fmap(mem(esp,l.size)) - fmap[esp] = fmap[esp]+l.length + fmap[esp] = fmap(esp+l.length) def parity(x): x = x ^ (x>>1) @@ -129,21 +129,20 @@ def i_WAIT(i,fmap): # LEAVE instruction is a shortcut for 'mov esp,ebp ; pop ebp ;' def i_LEAVE(i,fmap): fmap[eip] = fmap[eip]+i.length - fmap[esp] = fmap[ebp] + fmap[esp] = fmap(ebp) pop(fmap,ebp) def i_RET(i,fmap): pop(fmap,eip) def i_HLT(i,fmap): - fmap[eip] = fmap[eip]+i.length - ext('halt').call(fmap) + fmap[eip] = top(32) #------------------------------------------------------------------------------ def _ins_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx - loc = mem(fmap(edi),l*8) - src = ext('IN%s'%fmap(dx),l*8).call(fmap) + loc = mem(edi,l*8) + src = ext('IN',size=l*8).call(port=fmap(dx)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -163,14 +162,11 @@ def i_INSD(i,fmap): #------------------------------------------------------------------------------ def _outs_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx - src = fmap(mem(esi,l*8)) - loc = ext('OUT%s'%fmap(dx),l*8).call(fmap) + ext('OUT').call(fmap,src=fmap(mem(esi,l*8))) if i.misc['rep']: - fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 fmap[eip] = tst(fmap(counter)==0, fmap[eip]+i.length, fmap[eip]) else: - fmap[loc] = src fmap[eip] = fmap[eip]+i.length fmap[edi] = tst(fmap(df),fmap(edi)-l,fmap(edi)+l) @@ -183,8 +179,7 @@ def i_OUTSD(i,fmap): #------------------------------------------------------------------------------ def i_INT3(i,fmap): - fmap[eip] = fmap[eip]+i.length - ext('INT3').call(fmap) + fmap[eip] = ext('INT3',size=32) def i_CLC(i,fmap): fmap[eip] = fmap[eip]+i.length @@ -269,8 +264,8 @@ def i_SAHF(i,fmap): #------------------------------------------------------------------------------ def _cmps_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx - dst = mem(fmap(edi),l*8) - src = mem(fmap(esi),l*8) + dst = fmap(mem(edi,l*8)) + src = fmap(mem(esi,l*8)) x, carry, overflow = SubWithBorrow(dst,src) if i.misc['rep']: fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(dst,src)) @@ -303,7 +298,7 @@ def i_CMPSD(i,fmap): def _scas_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx a = {1:al, 2:ax, 4:eax}[l] - src = mem(fmap(edi),l*8) + src = fmap(mem(edi,l*8)) x, carry, overflow = SubWithBorrow(a,src) if i.misc['rep']: fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(a,src)) @@ -335,7 +330,7 @@ def i_SCASD(i,fmap): def _lods_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx loc = {1:al, 2:ax, 4:eax}[l] - src = mem(fmap(esi),l*8) + src = fmap(mem(esi,l*8)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -356,7 +351,7 @@ def i_LODSD(i,fmap): def _stos_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx src = {1:al, 2:ax, 4:eax}[l] - loc = mem(fmap(edi),l*8) + loc = mem(edi,l*8) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -376,8 +371,8 @@ def i_STOSD(i,fmap): #------------------------------------------------------------------------------ def _movs_(i,fmap,l): counter = cx if i.misc['adrsz'] else ecx - loc = mem(fmap(edi),l*8) - src = mem(fmap(esi),l*8) + loc = mem(edi,l*8) + src = fmap(mem(esi,l*8)) if i.misc['rep']: fmap[loc] = tst(fmap(counter)==0, fmap(loc), src) fmap[counter] = fmap(counter)-1 @@ -400,22 +395,22 @@ def i_IN(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = i.operands[0] op2 = fmap(i.operands[1]) - fmap[op1] = ext('IN%s'%op2,op1.size).call(fmap) + fmap[op1] = ext('IN',size=op1.size).call(port=op2) def i_OUT(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = fmap(i.operands[0]) op2 = fmap(i.operands[1]) - ext('OUT%s'%op1).call(fmap,arg=op2) + ext('OUT').call(fmap,port=op1,src=op2) -#op1_src retreives fmap[op1] (op1 value): +#op1_src retreives fmap[op1] (op1 value): def i_PUSH(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = fmap(i.operands[0]) if op1.size==8: op1 = op1.signextend(32) push(fmap,op1) -#op1_dst retreives op1 location: +#op1_dst retreives op1 location: def i_POP(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = i.operands[0] @@ -427,8 +422,7 @@ def i_CALL(i,fmap): op1 = fmap(i.operands[0]) op1 = op1.signextend(pc.size) target = pc+op1 if not i.misc['absolute'] else op1 - if target._is_ext: target.call(fmap) - else: fmap[eip] = target + fmap[eip] = target def i_CALLF(i,fmap): @@ -440,8 +434,7 @@ def i_JMP(i,fmap): op1 = fmap(i.operands[0]) op1 = op1.signextend(pc.size) target = pc+op1 if not i.misc['absolute'] else op1 - if target._is_ext: target.call(fmap) - else: fmap[eip] = target + fmap[eip] = target def i_JMPF(i,fmap): logger.verbose('%s semantic is not defined'%i.mnemonic) @@ -498,7 +491,7 @@ def i_INT(i,fmap): fmap[eip] = fmap[eip]+i.length op1 = fmap(i.operands[0]) push(fmap,fmap[eip]) - ext('INT%s'%op1).call(fmap) + fmap[eip] = ext('INT',port=op1,size=32) def i_INC(i,fmap): op1 = i.operands[0] @@ -573,15 +566,15 @@ def i_MOVBE(i,fmap): def i_MOVSX(i,fmap): op1 = i.operands[0] - op2 = fmap(i.operands[1]) + op2 = i.operands[1] fmap[eip] = fmap[eip]+i.length - fmap[op1] = op2.signextend(op1.size) + fmap[op1] = fmap(op2).signextend(op1.size) def i_MOVZX(i,fmap): op1 = i.operands[0] - op2 = fmap(i.operands[1]) + op2 = i.operands[1] fmap[eip] = fmap[eip]+i.length - fmap[op1] = op2.zeroextend(op1.size) + fmap[op1] = fmap(op2).zeroextend(op1.size) def i_ADC(i,fmap): op1 = i.operands[0] @@ -909,17 +902,17 @@ def i_SHLD(i,fmap): def i_IMUL(i,fmap): fmap[eip] = fmap[eip]+i.length if len(i.operands)==1: - src = fmap(i.operands[0]) + src = i.operands[0] m,d = {8:(al,ah), 16:(ax,dx), 32:(eax,edx)}[src.size] - r = m**src + r = fmap(m**src) elif len(i.operands)==2: dst,src = i.operands m = d = dst - r = dst**src + r = fmap(dst**src) else: dst,src,imm = i.operands m = d = dst - r = src**imm.signextend(src.size) + r = fmap(src)**imm.signextend(src.size) lo = r[0:src.size] hi = r[src.size:r.size] fmap[d] = hi @@ -929,9 +922,9 @@ def i_IMUL(i,fmap): def i_MUL(i,fmap): fmap[eip] = fmap[eip]+i.length - src = fmap(i.operands[0]) + src = i.operands[0] m,d = {8:(al,ah), 16:(ax,dx), 32:(eax,edx)}[src.size] - r = m**src + r = fmap(m**src) lo = r[0:src.size] hi = r[src.size:r.size] fmap[d] = hi diff --git a/amoco/arch/x86/cpu_x86.py b/amoco/arch/x86/cpu_x86.py index df1c889..659476b 100644 --- a/amoco/arch/x86/cpu_x86.py +++ b/amoco/arch/x86/cpu_x86.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.x86.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/x86/env.py b/amoco/arch/x86/env.py index 847289d..dc48486 100644 --- a/amoco/arch/x86/env.py +++ b/amoco/arch/x86/env.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # import expressions: @@ -11,11 +11,11 @@ #------------------- eax = reg('eax',32) # accumulator for operands and results data -ebx = reg('ebx',32) # pointer to data in the DS segment -ecx = reg('ecx',32) # counter for string and loop operations -edx = reg('edx',32) # I/O pointer -ebp = reg('ebp',32) # pointer to data in the stack (SS segment) -esp = reg('esp',32) # stack pointer (SS segment) +ebx = reg('ebx',32) # pointer to data in the DS segment +ecx = reg('ecx',32) # counter for string and loop operations +edx = reg('edx',32) # I/O pointer +ebp = reg('ebp',32) # pointer to data in the stack (SS segment) +esp = reg('esp',32) # stack pointer (SS segment) esi = reg('esi',32) # ptr to data in segment pointed by DS; src ptr for strings edi = reg('edi',32) # ptr to data in segment pointed by ES; dst ptr for strings eip = reg('eip',32) # instruction pointer in 32 bit mode @@ -41,21 +41,21 @@ dh = slc(edx,8,8,'dh') cf = slc(eflags,0,1,'cf') # carry/borrow flag -pf = slc(eflags,2,1,'pf') # parity flag -af = slc(eflags,4,1,'pf') # aux carry flag -zf = slc(eflags,6,1,'zf') # zero flag -sf = slc(eflags,7,1,'sf') # sign flag -tf = slc(eflags,8,1,'sf') # trap flag -df = slc(eflags,10,1,'df') # direction flag -of = slc(eflags,11,1,'of') # overflow flag +pf = slc(eflags,2,1,'pf') # parity flag +af = slc(eflags,4,1,'pf') # aux carry flag +zf = slc(eflags,6,1,'zf') # zero flag +sf = slc(eflags,7,1,'sf') # sign flag +tf = slc(eflags,8,1,'sf') # trap flag +df = slc(eflags,10,1,'df') # direction flag +of = slc(eflags,11,1,'of') # overflow flag # segment registers & other mappings: cs = reg('cs',16) # segment selector for the code segment -ds = reg('ds',16) # segment selector to a data segment +ds = reg('ds',16) # segment selector to a data segment ss = reg('ss',16) # segment selector to the stack segment -es = reg('es',16) # (data) -fs = reg('fs',16) # (data) -gs = reg('gs',16) # (data) +es = reg('es',16) # (data) +fs = reg('fs',16) # (data) +gs = reg('gs',16) # (data) # fpu registers (80 bits holds double extended floats see Intel Vol1--4.4.2): def st(num): diff --git a/amoco/arch/x86/formats.py b/amoco/arch/x86/formats.py index 55f624e..8d99755 100644 --- a/amoco/arch/x86/formats.py +++ b/amoco/arch/x86/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.core import Formatter def pfx(i): @@ -15,7 +17,7 @@ def deref(op): if not op._is_mem: return str(op) d = '%+d'%op.a.disp if op.a.disp else '' s = {8:'byte ptr ',16:'word ptr ', 64:'qword ptr ', 128:'xmmword ptr '}.get(op.size,'') - s += '%s:'%op.a.seg if op.a.seg else '' + s += '%s:'%op.a.seg if (op.a.seg is not '') else '' s += '[%s%s]'%(op.a.base,d) return s diff --git a/amoco/arch/x86/parsers.py b/amoco/arch/x86/parsers.py index 650ed48..2e2fdc6 100644 --- a/amoco/arch/x86/parsers.py +++ b/amoco/arch/x86/parsers.py @@ -1,7 +1,8 @@ #!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import pyparsing as pp diff --git a/amoco/arch/x86/spec_fpu.py b/amoco/arch/x86/spec_fpu.py index f1f76f6..811524f 100644 --- a/amoco/arch/x86/spec_fpu.py +++ b/amoco/arch/x86/spec_fpu.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/x86/spec_ia32.py b/amoco/arch/x86/spec_ia32.py index 9dfc2be..a11458f 100644 --- a/amoco/arch/x86/spec_ia32.py +++ b/amoco/arch/x86/spec_ia32.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/x86/spec_sse.py b/amoco/arch/x86/spec_sse.py index 8427e9c..c7a019b 100644 --- a/amoco/arch/x86/spec_sse.py +++ b/amoco/arch/x86/spec_sse.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. @@ -540,7 +540,7 @@ def sse_sd(obj,Mod,REG,RM,data): obj.operands = [op1,op2] obj.type = type_data_processing -# 66 prefixed : +# 66 prefixed : # ------------- # Note that thos specs MUST APPEAR AFTER f2/f3 prefixes which have priority over 66, # so that 66-related specs will be matched after identical f2/f3 specs diff --git a/amoco/arch/x86/utils.py b/amoco/arch/x86/utils.py index 2a17b58..5877e31 100644 --- a/amoco/arch/x86/utils.py +++ b/amoco/arch/x86/utils.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # spec_xxx files are providers for instruction objects. diff --git a/amoco/arch/z80/asm.py b/amoco/arch/z80/asm.py index 7ecf5ea..0a72593 100644 --- a/amoco/arch/z80/asm.py +++ b/amoco/arch/z80/asm.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) diff --git a/amoco/arch/z80/cpu_gb.py b/amoco/arch/z80/cpu_gb.py index 4dee22a..9f594aa 100644 --- a/amoco/arch/z80/cpu_gb.py +++ b/amoco/arch/z80/cpu_gb.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.z80.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/z80/cpu_z80.py b/amoco/arch/z80/cpu_z80.py index fd35283..5011082 100644 --- a/amoco/arch/z80/cpu_z80.py +++ b/amoco/arch/z80/cpu_z80.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.z80.asm import * # expose "microarchitecture" (instructions semantics) uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems())) diff --git a/amoco/arch/z80/env.py b/amoco/arch/z80/env.py index a17cceb..6bdf732 100644 --- a/amoco/arch/z80/env.py +++ b/amoco/arch/z80/env.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) diff --git a/amoco/arch/z80/formats.py b/amoco/arch/z80/formats.py index 9ecc7c5..852144e 100644 --- a/amoco/arch/z80/formats.py +++ b/amoco/arch/z80/formats.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from amoco.arch.core import Formatter def mnemo(i): diff --git a/amoco/arch/z80/spec_gb.py b/amoco/arch/z80/spec_gb.py index 216bf8e..0c28207 100644 --- a/amoco/arch/z80/spec_gb.py +++ b/amoco/arch/z80/spec_gb.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) diff --git a/amoco/arch/z80/spec_mostek.py b/amoco/arch/z80/spec_mostek.py index d881271..55aab4e 100644 --- a/amoco/arch/z80/spec_mostek.py +++ b/amoco/arch/z80/spec_mostek.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) diff --git a/amoco/cas/expressions.py b/amoco/cas/expressions.py index 874bef1..4cdff7d 100644 --- a/amoco/cas/expressions.py +++ b/amoco/cas/expressions.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.logger import Log @@ -13,13 +15,16 @@ def checkarg1_exp(*args): if len(args)>0 and isinstance(args[0],exp): return f(*args) else: - raise TypeError('arg is not an expression') + logger.error('first arg is not an expression') + raise TypeError(args) return checkarg1_exp def _checkarg_sizes(f): def checkarg_sizes(self,n): if self.size<>n.size: - if self.size>0 and n.size>0: raise ValueError,'size mismatch' + if self.size>0 and n.size>0: + logger.error('size mismatch') + raise ValueError,n return f(self,n) return checkarg_sizes @@ -38,9 +43,13 @@ def checkarg_slice(self,*args): if isinstance(i,slice): if i.step<>None: raise ValueError,i if i.start<0 or i.stop>self.size: - raise ValueError,i - if i.stop<=i.start: raise ValueError,i + logger.error('size mismatch') + raise ValueError,i + if i.stop<=i.start: + logger.error('invalid slice') + raise ValueError,i else: + logger.error('argument should be a slice') raise TypeError,i return f(self,*args) return checkarg_slice @@ -55,6 +64,7 @@ def checkarg_slice(self,*args): #------------------------------------------------------------------------------ class exp(object): __slots__ = ['size','sf'] + _endian = 1 # defaults to little-endian _is_def = False _is_cst = False _is_reg = False @@ -72,10 +82,23 @@ def __init__(self,size=0,sf=False): def __len__(self): return self.length + @classmethod + def setendian(cls,e): + assert e in (-1,+1) + cls._endian = e + @property def length(self): # length value is in bytes return self.size/8 + def bytes(self,sta=0,sto=None): + s = slice(sta,sto) + l = self.length + sta,sto,stp = s.indices(l) + if self._endian==-1: + sta,sto = l-sto,l-sta + return self[sta*8:sto*8] + @property def mask(self): return (1<',self,n) + + def to_smtlib(self): + logger.warning('no SMT solver defined') + raise NotImplementedError ## class top(exp): @@ -263,6 +291,9 @@ def __int__(self): def __str__(self): return '{:#x}'.format(self.value) + def to_sym(self,ref): + return sym(ref,self.v,self.size) + # eval of cst is always itself: (sf flag conserved) def eval(self,env): return cst(self.value,self.size) @@ -373,12 +404,12 @@ def __nonzero__(self): @_checkarg_numeric @_checkarg_sizes def __eq__(self,n): - if n._is_cst: return cst(self.value==n.value) + if n._is_cst: return cst(self.v==n.v) else : return exp.__eq__(self,n) @_checkarg_numeric @_checkarg_sizes def __ne__(self,n): - if n._is_cst: return cst(self.value!=n.value) + if n._is_cst: return cst(self.v!=n.v) else : return exp.__ne__(self,n) @_checkarg_numeric @@ -417,9 +448,9 @@ def __str__(self): return "#%s"%self.ref #--------------------------------- -# flt holds float immediate values +# cfp holds float immediate values #--------------------------------- -class flt(exp): +class cfp(exp): __slots__ = ['v'] _is_def = True _is_cst = True @@ -548,6 +579,16 @@ def __setattr__(self,a,v): if a is 'size' and self.__protect is True: raise AttributeError,'protected attribute' exp.__setattr__(self,a,v) + + #howto pickle/unpickle reg objects: + def __setstate__(self,state): + v = state[1] + self.__protect = False + self.size = v['size'] + self.sf = v['sf'] + self.ref = v['ref'] + self._subrefs = v['_subrefs'] + self.__protect = v['_reg__protect'] ## #------------------------------------------------------------------------------ @@ -556,9 +597,18 @@ def __setattr__(self,a,v): class ext(reg): _is_ext = True + def __init__(self,refname,**kargs): + self.ref = refname + self._subrefs = kargs + self.size = kargs.get('size',None) + self.sf = False + def __str__(self): return '@%s'%self.ref + def __setattr__(self,a,v): + exp.__setattr__(self,a,v) + @classmethod def stub(cls,ref): try: @@ -569,11 +619,15 @@ def stub(cls,ref): def call(self,env,**kargs): logger.info('stub %s called'%self.ref) + if not 'size' in kargs: kargs.update(size=self.size) res = self.stub(self.ref)(env,**kargs) - if res is None: - return top(self.size) - else: - return res[0:self.size] + if res is None: return top(self.size) + return res[0:self.size] + + # used when the expression is a target used to build a block + def __call__(self,env): + logger.info('stub %s called'%self.ref) + self.stub(self.ref)(env,**self._subrefs) ## # complex expressions are build with atoms attributes: @@ -615,8 +669,11 @@ def __init__(self,s): def __str__(self): s = '{ |' - for nk,nv in self.parts.iteritems(): + cur = 0 + for nv in self: + nk = cur,cur+nv.size s += ' %s->%s |'%('[%d:%d]'%nk,str(nv)) + cur += nv.size return s+' }' def eval(self,env): @@ -716,6 +773,17 @@ def cut(self,start,stop): self.smask[start:stop] = [(start,stop)]*(stop-start) ## + def __iter__(self): + # gather cst as possible: + rcmp = lambda x,y: cmp(x[0],y[0]) + part = self.parts.keys() + part.sort(rcmp) + cur = 0 + for p in part: + assert p[0]==cur + yield self.parts[p] + cur = p[1] + # restruct will concatenate cst expressions when possible # to minimize the number of parts. def restruct(self): @@ -743,23 +811,32 @@ def restruct(self): #------------------------------------------------------------------------------ # mem holds memory fetches, ie a read operation of length size, in segment seg, # at given address expression. +# The mods list allows to handle aliasing issues detected at fetching time +# and adjust the eval result accordingly. #------------------------------------------------------------------------------ class mem(exp): - __slots__ = ['a'] + __slots__ = ['a', 'mods'] _is_def = True _is_mem = True - def __init__(self,a,size=32,seg='',disp=0): + def __init__(self,a,size=32,seg='',disp=0,mods=None): self.size = size self.sf = False self.a = ptr(a,seg,disp) + self.mods = mods or [] def __str__(self): - return 'M%d%s'%(self.size,self.a) + n = len(self.mods) + n = '$%d'%n if n>0 else '' + return 'M%d%s%s'%(self.size,n,self.a) def eval(self,env): a = self.a.eval(env) - return env[mem(a,self.size)] + m = env.use() + for loc,v in self.mods: + if loc._is_ptr: loc = env(loc) + m[loc] = env(v) + return m[mem(a,self.size)] def simplify(self): self.a.simplify() @@ -768,8 +845,9 @@ def simplify(self): def addr(self,env): return self.a.eval(env) + #------------------------------------------------------------------------------ -# ptr holds memory addresses with segment, base expressions and +# ptr holds memory addresses with segment, base expressions and # displacement integer (offset relative to base). #------------------------------------------------------------------------------ class ptr(exp): @@ -782,8 +860,8 @@ def __init__(self,base,seg='',disp=0): if seg is '': seg=base.seg disp = base.disp+disp base = base.base - self.base = base - self.disp = disp + self.base,offset = extract_offset(base) + self.disp = disp+offset self.seg = seg self.size = base.size self.sf = False @@ -793,7 +871,8 @@ def __str__(self): return '%s(%s%s)'%(self.seg,self.base,d) def simplify(self): - self.base = self.base.simplify() + self.base,offset = extract_offset(self.base) + self.disp += offset if isinstance(self.seg,exp): self.seg = self.seg.simplify() return self @@ -816,10 +895,11 @@ def eval(self,env): #------------------------------------------------------------------------------ def slicer(x,pos,size): if not isinstance(x,exp): raise TypeError,x + if not x._is_def: return top(size) if pos==0 and size==x.size: return x else: - if x._is_mem: + if x._is_mem and size%8==0: off,rst = divmod(pos,8) if rst==0: a = ptr(x.a.base,x.a.seg,x.a.disp+off) @@ -827,7 +907,7 @@ def slicer(x,pos,size): return slc(x,pos,size) #------------------------------------------------------------------------------ -# slc holds bit-slice of a non-cst (and non-slc) expressions +# slc holds bit-slice of a non-cst (and non-slc) expressions #------------------------------------------------------------------------------ class slc(exp): __slots__ = ['x','pos','ref','__protect','_is_reg'] @@ -847,6 +927,7 @@ def __init__(self,x,pos,size,ref=None): self.setref(ref) def setref(self,ref): + self._is_reg = False if self.x._is_reg: self._is_reg = True if ref is None: @@ -873,16 +954,18 @@ def eval(self,env): n = self.x.eval(env) return n[self.pos:self.pos+self.size] + # slc of mem objects are simplified by adjusting the disp offset of + # the sliced mem object. def simplify(self): self.x = self.x.simplify() - if self.x._is_mem: + if self.x._is_mem and self.size%8==0: off,rst = divmod(self.pos,8) if rst==0: a = ptr(self.x.a.base,self.x.a.seg,self.x.a.disp+off) return mem(a,self.size) return self - # slice of a slice: + # slice of a slice: @_checkarg_slice def __getitem__(self,i): if i.start==0 and i.stop==self.size: @@ -902,6 +985,17 @@ def addr(self,env): return self.x else: raise TypeError('this expression is not a location') + + def __setstate__(self,state): + v = state[1] + self.__protect = False + self.size = v['size'] + self.sf = v['sf'] + self.x = v['x'] + self.pos = v['pos'] + self.ref = v['ref'] + self._is_reg = v['_is_reg'] + self.__protect = v['_slc__protect'] ## #------------------------------------------------------------------------------ @@ -912,15 +1006,8 @@ class tst(exp): _is_def = True _is_tst = True - def __new__(cls,t,l,r): - if t is True or t==1: return l - if t is False or t==0: return r - obj=super(exp,cls).__new__(cls) - tst.__init__(obj,t,l,r) - return obj - def __init__(self,t,l,r): - if t in [True,False]: t=cst(t) + if t is True or t is False: t=cst(t,1) self.tst = t # the expression to test, probably a 'op' expressions. if l.size<>r.size: raise ValueError,(l,r) self.l = l # true (tst evals to val) @@ -941,6 +1028,7 @@ def eval(self,env): else : return r def simplify(self): + if self.l is self.r: return self.l self.tst = self.tst.simplify() self.l = self.l.simplify() self.r = self.r.simplify() @@ -953,22 +1041,22 @@ def simplify(self): # oper returns a possibly simplified op() object (see below) #------------------------------------------------------------------------------ def oper(opsym,l,r=None): + if r is None: return uop(opsym,l).simplify() return op(opsym,l,r).simplify() #------------------------------------------------------------------------------ -# op holds binary operations, integer arithmetic and bitwise logic -# internal representation is either in tree form or flat form (std=True). +# op holds binary integer arithmetic and bitwise logic expressions #------------------------------------------------------------------------------ class op(exp): __slots__ = ['op','l','r','prop'] _is_def = True _is_eqn = True - def __init__(self,op,l,r=None): + def __init__(self,op,l,r): self.op = _operator(op) self.prop = self.op.type - if r is not None and self.prop<4 and l.size <> r.size: - raise ValueError,"size mismatch" + if self.prop<4: + if l.size <> r.size: raise ValueError,"size mismatch" self.l = l self.r = r self.size = self.l.size @@ -976,10 +1064,9 @@ def __init__(self,op,l,r=None): self.size=1 elif self.op.symbol in ['**']: self.size *= 2 self.sf = l.sf + if self.prop==1: self.sf |= r.sf if self.l._is_eqn: self.prop |= self.l.prop - if self.r is not None: - if self.prop==1: self.sf |= r.sf - if self.r._is_eqn : self.prop |= self.r.prop + if self.r._is_eqn : self.prop |= self.r.prop @classmethod def limit(cls,v): @@ -988,34 +1075,89 @@ def limit(cls,v): def eval(self,env): # single-operand : l = self.l.eval(env) - r = None - if self.r is not None: - r = self.r.eval(env) + r = self.r.eval(env) res = self.op(l,r) res.sf = self.sf return res ## def __str__(self): - if self.r is None: - return '(%s%s)'%(self.op.symbol,str(self.l)) - else: - return '(%s%s%s)'%(str(self.l),self.op.symbol,str(self.r)) + return '(%s%s%s)'%(str(self.l),self.op.symbol,str(self.r)) def simplify(self): - self.l = self.l.simplify() - if self.r is not None: - self.r = self.r.simplify() - return eqn_helpers(self) - return self + minus = (self.op.symbol=='-') + l = self.l.simplify() + r = self.r.simplify() + if not l._is_def or not r._is_def: + return top(self.size) + if self.prop<4: + # arithm/logic normalisation: + # push cst to the right + if l._is_cst: + if r._is_cst: return self.op(l,r) + if minus: + l,r = (-r),l + self.op = _operator('+') + else: + l,r = r,l + # lexical ordering of symbols: + elif not r._is_cst: + lh = ''.join(map(str,symbols_of(l))) + rh = ''.join(map(str,symbols_of(r))) + if lh>rh: + if minus: + l,r = (-r),l + self.op = _operator('+') + else: + l,r=r,l + self.l = l + self.r = r + return eqn2_helpers(self) def depth(self): - if self.r is not None: d = self.r.depth() - else: d = 0. - return self.l.depth()+d + return self.l.depth()+self.r.depth() ## +#------------------------------------------------------------------------------ +# uop holds unary operations (+x, -x, ~x) +#------------------------------------------------------------------------------ +class uop(exp): + __slots__ = ['op','r','prop'] + _is_def = True + _is_eqn = True + + def __init__(self,op,r): + self.op = _operator(op,unary=1) + self.prop = self.op.type + self.r = r + self.size = r.size + self.sf = r.sf + if self.r._is_eqn: self.prop |= self.r.prop + + def eval(self,env): + # single-operand : + r = self.r.eval(env) + res = self.op(r) + res.sf = self.sf + return res + ## + + @property + def l(self): return None + + def __str__(self): + return '(%s%s)'%(self.op.symbol,str(self.r)) + + def simplify(self): + self.r = self.r.simplify() + if not self.r._is_def: return top(self.size) + return eqn1_helpers(self) + + def depth(self): + return self.r.depth() + +## # operators: #----------- @@ -1054,34 +1196,39 @@ def rol(x,n): } class _operator(object): - def __init__(self,op): - self.symbol = op - if op in OP_ARITH: - self.type = 1 - self.impl = OP_ARITH[op] - elif op in OP_LOGIC: - self.type = 2 - self.impl = OP_LOGIC[op] - elif op in OP_CONDT: - self.type = 4 - self.impl = OP_CONDT[op] - elif op in OP_SHIFT: - self.type = 8 - self.impl = OP_SHIFT[op] - else: - raise NotImplementedError - - def __call__(self,l,r=None): - if r is None: - impl = {'+': operator.pos, '-': operator.neg}.get(self.symbol,self.impl) - return impl(l) - return self.impl(l,r) - - def __mul__(self,op): - ss = self.symbol+op.symbol - if ss in ('++','--'): return '+' - if ss in ('+-','-+'): return '-' - return None + def __init__(self,op,unary=0): + self.symbol = op + self.unary = unary + if op in OP_ARITH: + self.type = 1 + if self.unary: + self.impl = {'+': operator.pos, '-': operator.neg}[op] + else: + self.impl = OP_ARITH[op] + elif op in OP_LOGIC: + self.type = 2 + if self.unary: assert op == '~' + self.impl = OP_LOGIC[op] + elif op in OP_CONDT: + self.type = 4 + self.impl = OP_CONDT[op] + elif op in OP_SHIFT: + self.type = 8 + self.impl = OP_SHIFT[op] + else: + raise NotImplementedError + + def __call__(self,l,r=None): + if r is None: + assert self.unary + return self.impl(l) + return self.impl(l,r) + + def __mul__(self,op): + ss = self.symbol+op.symbol + if ss in ('++','--'): return '+' + if ss in ('+-','-+'): return '-' + return None # basic simplifier: #------------------ @@ -1089,53 +1236,76 @@ def __mul__(self,op): op.limit(30) def symbols_of(e): - if e is None: return [] - if e._is_cst: return [] - if e._is_reg: return [e] - if e._is_mem: return symbols_of(e.a.base) - if e._is_eqn: return symbols_of(e.l)+symbols_of(e.r) - # tst/slc/comp cases: - if isinstance(e,tst): return sum(map(symbols_of,(e.tst,e.l,e.r)),[]) - if isinstance(e,slc): return symbols_of(e.x) - return sum(map(symbols_of,e.parts.itervalues()),[]) + if e is None: return [] + if e._is_cst: return [] + if e._is_reg: return [e] + if e._is_mem: return symbols_of(e.a.base) + if e._is_ptr: return symbols_of(e.base) + if e._is_eqn: return symbols_of(e.l)+symbols_of(e.r) + if e._is_tst: return sum(map(symbols_of,(e.tst,e.l,e.r)),[]) + if e._is_slc: return symbols_of(e.x) + if e._is_cmp: return sum(map(symbols_of,e.parts.itervalues()),[]) + if not e._is_def: return [] + raise ValueError(e) + +def locations_of(e): + if e is None: return [] + if e._is_cst: return [] + if e._is_reg: return [e] + if e._is_mem: return [e] + if e._is_ptr: return [e] + if e._is_eqn: return locations_of(e.l)+locations_of(e.r) + if e._is_tst: return sum(map(locations_of,(e.tst,e.l,e.r)),[]) + if e._is_slc: return locations_of(e.x) + if e._is_cmp: return sum(map(locations_of,e.parts.itervalues()),[]) + if not e._is_def: return [] + raise ValueError(e) def complexity(e): - return e.depth()+len(symbols_of(e)) + factor = e.prop if e._is_eqn else 1 + return (e.depth()+len(symbols_of(e)))*factor + +# helpers for unary expressions: +def eqn1_helpers(e): + assert e.op.unary + if not e.r._is_def: return e.r + if e.r._is_eqn: + if e.r.op.unary: + ss = e.op*e.r.op + if ss == '+': return e.r.r + elif ss == '-': return -e.r.r + elif e.op.symbol == '-': + if e.r.op.symbol in ('-','+'): + l = -e.r.l + r = e.r.r + return OP_ARITH[e.op*e.r.op](l,r) + return e -def eqn_helpers(e): - if e.r is None: return e - if hasattr(e,'threshold'): - if e.l.depth()>e.threshold: e.l = top(e.l.size) - if e.r.depth()>e.threshold: e.r = top(e.r.size) +# helpers for binary expressions: +# reminder: be careful not to modify the internal structure of +# e.l or e.r because these objects might be used also in other +# expressions. See tests/test_cas_exp.py for details. +def eqn2_helpers(e): + if e.r.depth()>e.threshold: e.r = top(e.r.size) + if e.l.depth()>e.threshold: e.l = top(e.l.size) if False in (e.l._is_def, e.r._is_def): return top(e.size) - if e.l._is_cst and e.r._is_cst: - return e.op(e.l,e.r) - if e.l is e.r: - if e.op.symbol in ('!=','<', '>' ): return bit0 - if e.op.symbol in ('==','<=','>='): return bit1 - if e.op.symbol is '-' : return cst(0,e.size) - if e.op.symbol is '^' : return cst(0,e.size) - if e.op.symbol is '&' : return e.l - if e.op.symbol is '|' : return e.l - if e.l._is_cst: - if e.l.value==0: - if e.op.symbol in ('*','&','>>','<<','>>>','<<<'): - return cst(0,e.size) - if e.op.symbol in ('|','^','+'): - return e.r - elif e.l.value==1 and e.op.symbol=='*': - return e.r - elif e.r._is_eqn: - xop = e.op*e.r.op - if xop: - if e.r.l._is_cst: - cc = e.op(e.l,e.r.l) - return op(xop, cc, e.r.r) - elif e.r.r._is_cst: - cc = OP_ARITH[xop](e.l, e.r.r) - e.l = cc - e.r = e.r.l - return e + if e.l._is_eqn and e.l.r._is_cst: + assert e.l.op.unary==0 + xop = e.op*e.l.op + if xop: + e.op,lop = e.l.op,e.op + lr,e.r = e.r,e.l.r + e.l = lop(e.l.l,lr) + if e.r._is_eqn and e.r.op.unary: + if e.op.symbol == '+' and e.r.op.symbol == '-': + e.op = _operator('-') + e.r = e.r.r + if e.r._is_eqn and e.r.r._is_cst: + xop = e.op*e.r.op + if xop: + e.l = e.op(e.l,e.r.l) + e.r = e.r.r + e.op = _operator(xop) if e.r._is_cst: if e.r.value==0: if e.op.symbol in ('|','^','+','-','>>','<<','>>>','<<<'): @@ -1144,68 +1314,35 @@ def eqn_helpers(e): return cst(0,e.size) elif e.r.value==1 and e.op.symbol in ('*','/'): return e.l - elif e.l._is_eqn: + if e.l._is_eqn: xop = e.op*e.l.op if xop: - if e.l.l._is_cst: - cc = e.op(e.l.l,e.r) - e.r = e.l.r - e.op = e.l.op - e.l = cc - elif e.l.r._is_cst: + if e.l.r._is_cst: cc = OP_ARITH[xop](e.l.r,e.r) e.op = e.l.op - e.l = e.l.l + if not e.l.op.unary: e.l = e.l.l e.r = cc + return e + elif e.l._is_ptr: + if e.op.symbol in ('-','+'): + return ptr(e.l,disp=e.op(0,e.r.value)) + elif e.l._is_cst: + return e.op(e.l,e.r) + if str(e.l)==str(e.r): + if e.op.symbol in ('!=','<', '>' ): return bit0 + if e.op.symbol in ('==','<=','>='): return bit1 + if e.op.symbol is '-' : return cst(0,e.size) + if e.op.symbol is '^' : return cst(0,e.size) + if e.op.symbol is '&' : return e.l + if e.op.symbol is '|' : return e.l return e -# expression parser: -#------------------- - -import pyparsing as pp - -#terminals: -p_bottop = pp.oneOf('_ T') -p_symbol = pp.Word(pp.alphas) -p_extern = pp.Suppress('@')+p_symbol -p_cst = pp.Suppress('0x')+pp.Combine(pp.Optional('-')+pp.Regex('[0-9a-f]+')) -p_int = pp.Word(pp.nums).setParseAction(lambda r:int(r[0])) -p_slc = '['+p_int.setResultsName('start')+':'+p_int.setResultsName('stop')+']' -p_op1 = pp.oneOf('~ -') -p_op2 = pp.oneOf('+ - / // * & | ^ << >> < > == <= >= != ? :') -p_term = p_bottop|p_symbol|p_extern|p_cst - -#nested expressions: -p_expr = pp.Forward() - -p_csl = pp.Suppress('|')+p_slc+pp.Suppress('->') -p_comp = pp.Group(pp.Suppress('{')+pp.ZeroOrMore(p_expr)+pp.Suppress('| }')) -p_mem = 'M'+p_int+pp.Optional(p_symbol) - -operators = [(p_op1,1,pp.opAssoc.RIGHT), - (p_mem,1,pp.opAssoc.RIGHT), - (p_slc,1,pp.opAssoc.LEFT), - (p_op2,2,pp.opAssoc.LEFT), - (p_csl,1,pp.opAssoc.RIGHT), - ] - -p_expr << pp.operatorPrecedence(p_term|p_comp,operators) - -p_bottop.setParseAction(lambda r: bot if r[0]=='_' else top) -p_symbol.setParseAction(lambda r: reg(r[0])) -p_extern.setParseAction(lambda r: ext(r[0])) -p_cst.setParseAction(lambda r: int(r[0],16)) -p_slc.setParseAction(lambda r: slice(r['start'],r['stop'])) - - -def parse(s): - p_expr.parseString(s,True) - -def test_parser(): - while 1: - try: - res = raw_input('amoco[test_parser]>') - E = p_expr.parseString(res,True) - print E - except EOFError: - return +# separate expression e into (e' + C) with C cst offset. +def extract_offset(e): + x = e.simplify() + if x._is_eqn and x.r._is_cst: + if e.op.symbol == '+': + return (x.l,x.r.v) + elif e.op.symbol == '-': + return (x.l,-x.r.v) + return (x,0) diff --git a/amoco/cas/mapper.py b/amoco/cas/mapper.py index db11f10..6960c8d 100644 --- a/amoco/cas/mapper.py +++ b/amoco/cas/mapper.py @@ -1,21 +1,35 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.logger import Log logger = Log(__name__) -from .expressions import reg,cst,mem,comp,top +from .expressions import * from amoco.cas.tracker import generation - +from amoco.system.core import MemoryMap +from amoco.arch.core import Bits + +# a mapper is a symbolic functional representation of the execution +# of a set of instructions. +# __map : is an ordered list of mappings of expressions associated with a +# location (a register or a memory pointer). The order is relevant only +# to reflect the order of write-to-memory instructions. +# __Mem : is a memory model where symbolic memory pointers are using +# individual separated zones. class mapper(object): + assume_no_aliasing = False - __slots__ = ['__map'] + __slots__ = ['__map','__Mem'] - # a mapper is inited with a list of instructions - # provided by a disassembler (see x86) + # a mapper is inited with a list of instructions + # provided by a disassembler def __init__(self,instrlist=None): - self.__map = generation() + self.__map = generation() + self.__map.lastw = 0 + self.__Mem = MemoryMap() icache = [] # if the __map needs to be inited before executing instructions # one solution is to prepend the instrlist with a function dedicated @@ -27,27 +41,38 @@ def __init__(self,instrlist=None): for instr in icache: instr(self) + def __len__(self): + return len(self.__map) + def __str__(self): return '\n'.join(["%s <- %s"%x for x in self]) # list antecedent locations (used in the mapping) def inputs(self): - pass + return sum(map(locations_of,self.__map.itervalues()),[]) # list image locations (modified in the mapping) def outputs(self): - pass + return sum(map(locations_of,self.__map.iterkeys()),[]) + + def rw(self): + r = filter(lambda x:x._is_mem, self.inputs()) + w = filter(lambda x:x._is_ptr, self.outputs()) + sr = ''.join(("r%d"%x.size for x in r)) + sw = ''.join(("w%d"%self.__map[x].size for x in w)) + return sr+sw def clear(self): self.__map.clear() + self.__Mem = MemoryMap() + + def memory(self): + return self.__Mem # compare self with mapper m: def __cmp__(self,m): d = cmp(self.__map.lastdict(),m.__map.lastdict()) return d - #if d<>0: return d - #shall we compare also the order ? - #return cmp(self.__order,m.__order) # iterate over ordered correspondances: def __iter__(self): @@ -59,16 +84,54 @@ def R(self,x): return self.__map.get(x,x) # get a memory location value (fetch) : + # k must be mem expressions def M(self,k): if k.a.base._is_ext: return k.a.base - x = self.__map.get(k.a,k) - if x.size0: + f = lambda e:e[0]._is_ptr + items = filter(f,self.__map.items()[0:n]) + res = mem(k.a,k.size,mods=items) + else: + res = self._Mem_read(k.a,k.length) + res.sf = k.sf + return res + + def aliasing(self,k): + if self.assume_no_aliasing: return 0 + K = self.__map.keys() + n = self.__map.lastw + try: + i = K.index(k.a) + except ValueError: + # k has never been written to explicitly + # but it is maybe in a zone that was written to + i = -1 + for l in K[i+1:n]: + if not l._is_ptr: continue + if l.base==k.a.base: continue + return n + return 0 + + # read MemoryMap and return the result as an expression: + def _Mem_read(self,a,l): + try: + res = self.__Mem.read(a,l) + except MemoryError,e: # no zone for location a; + res = [top(l*8)] + if exp._endian==-1: res.reverse() + P = [] + cur = 0 + for p in res: + plen = len(p) + if isinstance(p,str): p = cst(Bits(p[::c._endian],bitorder=1).int(),plen*8) + elif not p._is_def: p = mem(a,p.size,disp=cur) + P.append(p) + cur += plen + return composer(P) + + def _Mem_write(self,a,v): + self.__Mem.write(a,v) # just a convenient wrapper around M/R: def __getitem__(self,k): @@ -79,47 +142,68 @@ def __getitem__(self,k): # define image v of antecedent k: def __setitem__(self,k,v): if k._is_ptr: - self.__map[k] = v - return - if k.size<>v.size: raise ValueError('size mismatch') - try: - loc = k.addr(self) - except TypeError: - logger.error('setitem ignored (invalid left-value expression)') - return + loc = k + else: + if k.size<>v.size: + raise ValueError('size mismatch') + try: + loc = k.addr(self) + except TypeError: + logger.error('setitem ignored (invalid left-value expression)') + return if k._is_slc and not loc._is_reg: raise ValueError('memory location slc is not supported') - elif k._is_mem: + elif k._is_ptr or k._is_mem: r = v + self.__map.lastw = len(self.__map)+1 else: r = self.R(loc) if r._is_reg: r = comp(loc.size) r[0:loc.size] = loc pos = k.pos if k._is_slc else 0 - r[pos:pos+k.size] = v + r[pos:pos+k.size] = v.simplify() + if loc._is_ptr: + oldr = self.__map.get(loc,None) + if oldr is not None and oldr.size>r.size: + r = composer([r,oldr[r.size:oldr.size]]) + self._Mem_write(loc,r) self.__map[loc] = r def update(self,instr): instr(self) # eval of x in this map: - # note the difference between a mapper[mem(x)] and mapper(mem(x)): - # in the call form, x is first evaluated so that it uses "x_out" - # whereas the item form uses "x_in". + # note the difference between a mapper[mem(p)] and mapper(mem(p)): + # in the call form, p is first evaluated so that the target address + # is the expression of p "after execution" whereas the indexing form + # uses p as an input (i.e "before execution") expression. + # example, suppose str(mapper) is: + # (esp) <- eax + # esp <- { | [0:32]->(esp-0x4) | } + # (esp-4) <- ebx + # then: + # mapper[mem(esp)] returns eax (what is pointed by "esp before execution") + # mapper(mem(esp)) returns ebx (what is pointed by "esp after execution") def __call__(self,x): return x.eval(self) def restruct(self): - pass + self.__Mem.restruct() # return a new mapper instance where all input locations have # been replaced by there corresponding values in m. # example: # in self: eax <- ebx # in m : ebx <- 4 + # edx <- (ecx+1) # => - # in mm : eax <- 4 + # result : eax <- 4 + # The compose flag indicates whether the resulting mapper contains + # all mappings of m or only mappings of self. For example, if + # we use compose=True we get instead: + # result : eax <- 4 + # edx <- (ecx+1) def eval(self,m,compose=False): mm = mapper() if not compose else m.use() for loc,v in self: @@ -128,24 +212,33 @@ def eval(self,m,compose=False): mm[loc] = m(v) return mm - # composition operator (°) returns a new mapper + # composition operator returns a new mapper # corresponding to function x -> self(m(x)) def rcompose(self,m): return self.eval(m,compose=True) - # self << m : composition (self°m) + # self << m : composition (self(m)) def __lshift__(self,m): return self.rcompose(m) - # self >> m : composition (m°self) + # self >> m : composition (m(self)) def __rshift__(self,m): return m.rcompose(self) def interact(self): - pass - - def use(self,**kargs): + raise NotImplementedError + + # return a mapper corresponding to the evaluation of the current mapper + # where all key symbols found in kargs are replaced by their values in + # all expressions. The kargs "size=value" allows for adjusting symbols/values + # sizes for all arguments. + # if kargs is empty, a copy of the result is just a copy of current mapper. + def use(self,*args,**kargs): m = mapper() - for k,v in kargs.iteritems(): - m[reg(k)] = cst(v) + for loc,v in args: + m[loc] = v + if len(kargs)>0: + argsz = kargs.get('size',32) + for k,v in kargs.iteritems(): + m[reg(k,argsz)] = cst(v,argsz) return self.eval(m) diff --git a/amoco/cas/parser.py b/amoco/cas/parser.py new file mode 100644 index 0000000..af9f0fb --- /dev/null +++ b/amoco/cas/parser.py @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- + +# This code is part of Amoco +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# published under GPLv2 license + +from amoco.logger import Log +logger = Log(__name__) + +from .expressions import * + +# expression parser: +#------------------- + +import pyparsing as pp + +#terminals: +p_bottop = pp.oneOf('⊥ T') +p_symbol = pp.Word(pp.alphas) +p_extern = pp.Suppress('@')+p_symbol +p_cst = pp.Suppress('0x')+pp.Combine(pp.Optional('-')+pp.Regex('[0-9a-f]+')) +p_int = pp.Word(pp.nums).setParseAction(lambda r:int(r[0])) +p_slc = '['+p_int.setResultsName('start')+':'+p_int.setResultsName('stop')+']' +p_op1 = pp.oneOf('~ -') +p_op2 = pp.oneOf('+ - / // * & | ^ << >> < > == <= >= != ? :') +p_term = p_bottop|p_symbol|p_extern|p_cst + +#nested expressions: +p_expr = pp.Forward() + +p_csl = pp.Suppress('|')+p_slc+pp.Suppress('->') +p_comp = pp.Group(pp.Suppress('{')+pp.ZeroOrMore(p_expr)+pp.Suppress('| }')) +p_mem = 'M'+p_int+pp.Optional(p_symbol) + +operators = [(p_op1,1,pp.opAssoc.RIGHT), + (p_mem,1,pp.opAssoc.RIGHT), + (p_slc,1,pp.opAssoc.LEFT), + (p_op2,2,pp.opAssoc.LEFT), + (p_csl,1,pp.opAssoc.RIGHT), + ] + +p_expr << pp.operatorPrecedence(p_term|p_comp,operators) + +p_bottop.setParseAction(lambda r: bot if r[0]=='_' else top) +p_symbol.setParseAction(lambda r: reg(r[0])) +p_extern.setParseAction(lambda r: ext(r[0])) +p_cst.setParseAction(lambda r: int(r[0],16)) +p_slc.setParseAction(lambda r: slice(r['start'],r['stop'])) + + +def parse(s): + return p_expr.parseString(s,True) + +def test_parser(): + while 1: + try: + res = raw_input('amoco[test_parser]>') + E = p_expr.parseString(res,True) + print E + except EOFError: + return diff --git a/amoco/cas/smt.py b/amoco/cas/smt.py new file mode 100644 index 0000000..cbc3371 --- /dev/null +++ b/amoco/cas/smt.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +# This code is part of Amoco +# Copyright (C) 2015 Axel Tillequin (bdcht3@gmail.com) +# published under GPLv2 license + +from amoco.logger import Log +logger = Log(__name__) + +from .expressions import * +from .mapper import mapper + +try: + import z3 +except ImportError: + logger.info('z3 package not found => solve() method is not implemented') + class solver(object): + def __init__(self,eqns=None): + raise NotImplementedError + has_solver = False +else: + logger.info('z3 package imported') + class solver(object): + def __init__(self,eqns=None): + self.eqns = [] + self.locs = [] + self.solver = z3.Solver() + if eqns: self.add(eqns) + def add(self,eqns): + for e in eqns: + assert e._is_eqn + self.eqns.append(e) + self.solver.add(e.to_smtlib()) + self.locs.extend(locations_of(e)) + def check(self): + return self.solver.check() + def get_model(self,eqns=None): + if eqns is not None: self.add(eqns) + if self.check() == z3.sat: + r = self.solver.model() + return r + def get_mapper(self,eqns=None): + r = self.get_model(eqns) + if r is not None: + return model_to_mapper(r,self.locs) + has_solver = True + +def cst_to_z3(e): + return z3.BitVecVal(e.v,e.size) + +def cfp_to_z3(e): + return z3.RealVal(e.v) + +def reg_to_z3(e): + return z3.BitVec(e.ref,e.size) + +def comp_to_z3(e): + e.simplify() + parts = [x.to_smtlib() for x in e] + parts.reverse() + return z3.Concat(*parts) + +def slc_to_z3(e): + x = e.x.to_smtlib() + return z3.Extract(e.pos+e.size-1,e.pos,x) + +def ptr_to_z3(e): + return e.base.to_smtlib()+e.disp + +def mem_to_z3(e): + e.simplify() + M = z3.Array('M',z3.BitVecSort(e.a.size),z3.BitVecSort(8)) + p = e.a.to_smtlib() + b = [] + for i in range(0,e.length): + b.insert(0,M[p+i]) + if e._endian==-1: b.reverse() # big-endian case + return z3.Concat(*b) + +def tst_to_z3(e): + e.simplify() + return z3.If(e.tst.to_smtlib(), e.l.to_smtlib(), e.r.to_smtlib()) + +def op_to_z3(e): + e.simplify() + l,r = e.l,e.r + op = e.op + if op.symbol == '>>' : op = z3.LShR + elif op.symbol == '//' : op = operator.rshift + elif op.symbol == '>>>': op = z3.RotateRight + elif op.symbol == '<<<': op = z3.RotateLeft + z3l = l.to_smtlib() + if r is None: return op(z3l) + z3r = r.to_smtlib() + return op(z3l,z3r) + +cst.to_smtlib = cst_to_z3 +cfp.to_smtlib = cfp_to_z3 +reg.to_smtlib = reg_to_z3 +comp.to_smtlib = comp_to_z3 +slc.to_smtlib = slc_to_z3 +ptr.to_smtlib = ptr_to_z3 +mem.to_smtlib = mem_to_z3 +tst.to_smtlib = tst_to_z3 +op.to_smtlib = op_to_z3 + +def to_smtlib(e): + return e.to_smtlib() + +def model_to_mapper(r,locs): + m = mapper() + mlocs = [] + for l in locs: + if l._is_mem: + mlocs.append(l) + else: + m[l] = cst(r.eval(l.to_smtlib()).as_long(),l.size) + for l in mlocs: + m[l] = cst(r.eval(l.to_smtlib()).as_long(),l.size) + return m diff --git a/amoco/cas/tracker.py b/amoco/cas/tracker.py index 541f2e8..2ecb33d 100644 --- a/amoco/cas/tracker.py +++ b/amoco/cas/tracker.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from collections import OrderedDict diff --git a/amoco/cas/utils.py b/amoco/cas/utils.py index 9fb65ce..8c3bee1 100644 --- a/amoco/cas/utils.py +++ b/amoco/cas/utils.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license diff --git a/amoco/cfg.py b/amoco/cfg.py index 8188d5a..e8ca677 100644 --- a/amoco/cfg.py +++ b/amoco/cfg.py @@ -1,14 +1,21 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license # we wrap the grandalf classes here +from amoco.logger import Log +logger = Log(__name__) + from grandalf.graphs import Vertex,Edge,Graph from amoco.system.core import MemoryZone #------------------------------------------------------------------------------ +# node class is a graph vertex that embeds a block instance and inherits its +# name (default to the address of the block). class node(Vertex): # protect from None data node: def __init__(self,acode): @@ -28,18 +35,18 @@ def __len__(self): return self.data.length def __getitem__(self,i): - self.data = self.data.__getitem__(i) + res = node(self.data.__getitem__(i)) return res #------------------------------------------------------------------------------ +# link is a direct graph edge between two nodes. class link(Edge): - def __init__(self,orig,dest): - Edge.__init__(self,orig,dest) def __str__(self): n0 = repr(self.v[0]) n1 = repr(self.v[1]) - return "%s -> %s"%(n0,n1) + c = '?' if self.data else '-' + return "%s -%s-> %s"%(n0,c,n1) def __repr__(self): return '<%s [%s] at 0x%x>'%(self.__class__.__name__,self.name,id(self)) @@ -54,10 +61,12 @@ def __cmp__(self,e): return cmp(self.name,e.name) #------------------------------------------------------------------------------ -class func(Graph): +# graph is a Graph that represents a set of functions as individual components +class graph(Graph): def __init__(self,*args,**kargs): self.support = MemoryZone() + self.overlay = None Graph.__init__(self,*args,**kargs) def spool(self,n=None): @@ -66,38 +75,65 @@ def spool(self,n=None): if len(v.e_out())==0: L.append(v) return L - def add_vertex(self,v): + def __cut_add_vertex(self,v,mz,vaddr,mo): + oldnode = mo.data.val + if oldnode==v: return 0 + # so v cuts an existing node/block: + # repair oldblock and fix self + childs = oldnode.N(+1) + oldblock = oldnode.data + # if vaddr is aligned with an oldblock instr, cut it: + # this reduces oldblock up to vaddr if the cut is possible. + cutdone = oldblock.cut(vaddr) + if not cutdone: + if mz is self.overlay: + logger.warning("double overlay block at %s"%vaddr) + Graph.add_vertex(self,v) + v.data.misc['double-overlay'] = 1 + return 1 + overlay = self.overlay or MemoryZone() + return self.add_vertex(v,support=overlay) + else: + Graph.add_vertex(self,v) # ! avoid recursion for add_edge + mz.write(vaddr,v) + self.add_edge(link(oldnode,v)) + for n in childs: + self.add_edge(link(v,n)) + self.remove_edge(oldnode.e_to(n)) + return 1 + + def add_vertex(self,v,support=None): + if len(v)==0: return Graph.add_vertex(self,v) vaddr=v.data.address - i = self.support.locate(vaddr) + if support is None: + support=self.support + else: + logger.verbose("add overlay block at %s"%vaddr) + self.overlay = support + i = support.locate(vaddr) if i is not None: - mo = self.support._map[i] + mo = support._map[i] if vaddr in mo: - oldnode = mo.data.val - if oldnode==v: return 0 - # so v cuts an existing node/block: - # repair oldblock and fix self - childs = oldnode.N(+1) - oldblock = oldnode.data - oldblock.cut(vaddr) - Graph.add_vertex(self,v) # ! avoid recursion for add_edge - self.support.write(vaddr,v) - self.add_edge(link(oldnode,v)) - for n in childs: - self.add_edge(link(v,n)) - self.remove_edge(oldnode.e_to(n)) - return 1 + return self.__cut_add_vertex(v,support,vaddr,mo) else: #v does not cut an existing block, try: # but may swallow next one... - nextmo = self.support._map[i+1] + nextmo = support._map[i+1] except IndexError: # no more nodes here so back to default case: pass else: nextnode = nextmo.data.val - if vaddr+len(v)>=nextnode.data.address: - v.data.cut(nextnode.data.address) + if vaddr+len(v)>nextnode.data.address: + cutdone = v.data.cut(nextnode.data.address) + if not cutdone: + if support is self.overlay: + logger.warning("double overlay block at %s"%vaddr) + Graph.add_vertex(self,v) + v.data.misc['double-overlay'] = 1 + return 1 + support = self.overlay or MemoryZone() Graph.add_vertex(self,v) # before support write !! - self.support.write(vaddr,v) + support.write(vaddr,v) return 1 def get_node(self,name): diff --git a/amoco/code.py b/amoco/code.py index 7b88032..524f5d7 100644 --- a/amoco/code.py +++ b/amoco/code.py @@ -1,15 +1,18 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from collections import defaultdict from amoco.cas.mapper import mapper from amoco.config import conf +from amoco.logger import Log +logger = Log(__name__) #------------------------------------------------------------------------------ -# A block instance is a 'continuous' (atomic) set of instructions. -# It is build from a bytecode +# A block instance is a 'continuous' sequence of instructions. #------------------------------------------------------------------------------ class block(object): __slots__=['_map','instr','_name','misc'] @@ -21,13 +24,13 @@ def __init__(self, instrlist, name=None): self.instr = instrlist self._name = name self.misc = defaultdict(lambda :0) - # translate into a interpreter: - #acode.__init__(self,mapper(self.instr)) @property def map(self): if self._map is None: self._map = mapper(self.instr) + if self.misc['func']: + return self.misc['func'].map return self._map @map.setter def map(self,m): @@ -57,23 +60,32 @@ def __getitem__(self,i): pos = [0] for i in self.instr: pos.append(pos[-1]+i.length) - ista = pos.index(sta) - isto = pos.index(sto) + try: + ista = pos.index(sta) + isto = pos.index(sto) + except ValueError: + logger.warning("can't slice block: indices must match instruction boudaries") + return None I = self.instr[ista:isto] if len(I)>0: return block(self.instr[ista:isto]) + # cut the block at given address will remove instructions after this address, + # which needs to be aligned with instructions boundaries. The effect is thus to + # reduce the block size. The returned value is the number of instruction removed. def cut(self,address): I = [i.address for i in self.instr] try: pos = I.index(address) except ValueError: - pass + logger.warning("invalid attempt to cut block %s at %s"%(self.name,address)) + return 0 else: self.instr = self.instr[:pos] self.map.clear() for i in self.instr: i(self.map) # TODO: update misc annotations too + return len(I)-pos def __str__(self): L = [] @@ -103,19 +115,62 @@ def __cmp__(self,b): #------------------------------------------------------------------------------ -# A func instance is an acode where the map is build from a cfg by -# unions and fixpoints on (sub)maps contained in this cfg. +# func is a cfg connected component that generally represents a called function +# It appears in the other graphs whenever the function is called and provides a +# synthetic map that captures the semantics of the function. #------------------------------------------------------------------------------ -class func(object): - __slots__ = ['name','cfg'] - def __init__(self,name,cfg): - self.name = name - self.cfg = cfg +class func(block): + __slots__ = ['cfg'] + + # the init of a func takes a core_graph and creates a map of it: + def __init__(self, g=None, name=None): + self._map = None + self.cfg = g + self.instr = [] + # base/offset need to be defined before code (used in setcode) + self._name = name + self.misc = defaultdict(lambda :0) + + @property + def address(self): + return self.blocks[0].address + + @property + def blocks(self): + V = self.cfg.sV.o + return [n.data for n in V] + + @property + def support(self): + smin = self.address + smax = max((b.address+b.length for b in self.blocks)) + return (smin,smax) + + def makemap(self): + raise NotImplementedError def __str__(self): - s = '# --- func %s ---\n%s' % (self.name,str(self.cfg)) - return s + return "%s{%d}"%(self.name,len(self.blocks)) +#------------------------------------------------------------------------------ +# xfunc represents external functions. It is associated with an ext expression. +# The map provided by an xfunc instance is constructed by executing the stub +# defined in the ext expression. +#------------------------------------------------------------------------------ +class xfunc(object): + __slots__ = ['map','name','address','length','misc'] + + def __init__(self, x): + self.map = mapper() + x(self.map) + self.name = str(x) + self.address = x + self.length = 0 + self.misc = defaultdict(lambda :0) + + @property + def support(self): + return (self.address,self.address) #------------------------------------------------------------------------------ class tag: diff --git a/amoco/config.py b/amoco/config.py index ffff642..6a30dbb 100644 --- a/amoco/config.py +++ b/amoco/config.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + from collections import defaultdict try: @@ -15,7 +17,7 @@ conf.set('block', 'bytecode', 'True') conf.set('block', 'padding', '4') conf.add_section('log') - conf.set('log', 'level', '20') + conf.set('log', 'level', 'ERROR') conf.read([os.path.expanduser('~/.amocorc')]) else: conf = None @@ -58,6 +60,6 @@ def setdefaults(self): self.mset('block', header=True) self.mset('block', bytecode=True) self.mset('block', padding=4) - self.mset('log', level=20) + self.mset('log', level='ERROR') conf = DefaultConf() diff --git a/amoco/logger.py b/amoco/logger.py index c8b3a34..2428036 100644 --- a/amoco/logger.py +++ b/amoco/logger.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import logging @@ -13,9 +15,25 @@ try: from amoco import conf - default_level = conf.getint('log','level') + try: + default_level = conf.getint('log','level') + if default_level is None: default_level = 0 + except ValueError: + default_level = logging._levelNames.get(conf.get('log','level'),0) + if conf.has_option('log','file'): + logfilename = conf.get('log','file') + else: + logfilename = None except ImportError: default_level = logging.ERROR + logfilename = None + +if logfilename: + logfile = logging.FileHandler(logfilename,mode='w') + logfile.setFormatter(default_format) + logfile.setLevel(logging.DEBUG) +else: + logfile = None class Log(logging.Logger): def __init__(self,name,handler=logging.StreamHandler()): @@ -23,11 +41,15 @@ def __init__(self,name,handler=logging.StreamHandler()): handler.setFormatter(default_format) self.addHandler(handler) self.setLevel(default_level) + if logfile: self.addHandler(logfile) self.register(name,self) def verbose(self,msg,*args,**kargs): return self.log(VERBOSE,msg,*args,**kargs) + def setLevel(self,lvl): + self.handlers[0].setLevel(lvl) + @classmethod def register(cls,name,self): if name in self.loggers: @@ -47,4 +69,13 @@ def set_log_all(level): for l in Log.loggers.itervalues(): l.setLevel(level) +def set_log_file(filename): + if logfile is not None: + logfile.close() + logfile = logging.FileHandler(logfilename,mode='w') + logfile.setFormatter(default_format) + logfile.setLevel(logging.DEBUG) + for l in Log.loggers.itervalues(): + l.addHandler(logfile) + Log.loggers = {} diff --git a/amoco/main.py b/amoco/main.py index 6672b8d..2fe3489 100644 --- a/amoco/main.py +++ b/amoco/main.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.logger import Log, set_debug,set_quiet,set_log_all @@ -11,15 +13,18 @@ from amoco.arch.core import INSTRUCTION_TYPES -# linear sweep based analysis. +# linear sweep based analysis: +# fast & dumb way of disassembling prog, +# but provides iterblocks() for all parent classes. class lsweep(object): __slots__ = ['prog','G'] def __init__(self,prog): self.prog = prog - self.G = {} + self.G = cfg.graph() # iterator over linearly sweeped instructions # starting at address loc (defaults to entrypoint). + # If not None, loc argument should be a cst object. def sequence(self,loc=None): p = self.prog if loc is None: @@ -34,6 +39,9 @@ def sequence(self,loc=None): loc += i.length yield i + # iterator over basic blocks using the instruction.type attribute + # to detect the end of block (type_control_flow). The returned block + # object is enhanced with plateform-specific infos (see block.misc). def iterblocks(self,loc=None): inblock = (lambda i: INSTRUCTION_TYPES[i.type]!='control_flow') l = [] @@ -60,115 +68,225 @@ def iterblocks(self,loc=None): b = code.block(l) yield self.prog.codehelper(block=b) + # getblock is a handy wrapper of iterblocks to + # return the block located at address val provided as Python Int. def getblock(self,val): p = self.prog target = p.cpu.cst(val,p.PC().size) return next(self.iterblocks(target)) + # poorman's cfg builder that only groups blocks that belong to the + # same function based on FUNC_START/FUNC_STOP tags heuristics. def getcfg(self,loc=None): - F = [] + nprev = None for b in self.iterblocks(loc): + n = cfg.node(b) if b.misc[code.tag.FUNC_START]: - f = cfg.func() - if b.misc[code.tag.FUNC_END]: - F.append(f) - try: - f.add_vertex(cfg.node(b)) - except NameError: - logger.warning('linear sweep orfan block %s'%b.name) - F.append(b) - return F + nprev = None + if nprev is None: + self.G.add_vertex(n) + else: + self.G.add_edge(cfg.link(nprev,n)) + nprev = n # ----------------------------------------------------------------------------- -# fast forward based analysis -# follows PC expression evaluated within a single block only. +class _target(object): + def __init__(self,cst,parent,econd=None): + self.cst = cst + self.parent = parent + self.econd = econd + + def expand(self): + if self.cst._is_ext: + return [self] + if self.cst._is_cst: + return [self] + if self.cst._is_tst: + ltrue = self.select(True).expand() + lfalse = self.select(False).expand() + return ltrue+lfalse + return [] + + def select(self,side): + assert self.cst._is_tst + x = self.cst.l if side is True else self.cst.r + econd = self.econd or [] + econd.append(self.cst.tst==side) + return _target(x,self.parent,econd) + + +# ----------------------------------------------------------------------------- +# fast forward based analysis: +# follows PC expression evaluated within a single block only. # exploration goes forward until expressions are not cst. class fforward(lsweep): policy = {'depth-first': True, 'branch-lazy': True} def init_spool(self,loc): - return [(loc,None)] + return [_target(loc,None)] + + def update_spool(self,spool,vtx,parent): + T = self.get_targets(vtx,parent) + if len(T)>0: + spool.extend(T) + return + err = '%s analysis stopped at %s'%(self.__class__.__name__,vtx) + logger.info(err) + vtx.data.misc['tbc'] = 1 - def get_target(self,blk,withmap): - # withmap unused in fforward + # compute expression of target address (PC) in node.data.map + def get_targets(self,node,parent): + blk = node.data m = code.mapper() pc = self.prog.PC() m[pc] = blk.address - target = (blk.map(pc)).eval(m) - return target.simplify() + pc = (blk.map(pc)).eval(m) + return _target(pc,node).expand() + + def add_root_node(self,vtx): + vtx.data.misc[code.tag.FUNC_START]=1 + vtx.data.misc['callers'] = [] + self.G.add_vertex(vtx) + logger.verbose('root node %s added'%vtx.name) + + def add_call_node(self,vtx,parent,econd): + b = vtx.data + b.misc[code.tag.FUNC_START]+=1 + parent.data.misc[code.tag.FUNC_CALL] += 1 + try: + b.misc['callers'] += [parent] + except TypeError: + b.misc['callers'] = [parent] + if b.misc['func']: + logger.verbose('function %s called'%b.misc['func']) + vtx = cfg.node(b.misc['func']) + e = cfg.link(parent,vtx,data=econd) + self.G.add_edge(e) + else: + self.G.add_vertex(vtx) + logger.verbose('block %s starts a new cfg component'%vtx.name) + return vtx + + def check_ext_target(self,t,spool): + if t.cst is None: return False + if t.cst._is_ext: + b = code.xfunc(t.cst) + vtx = cfg.node(b) + e = cfg.link(t.parent,vtx,data=t.econd) + self.G.add_edge(e) + self.update_spool(spool,vtx,t.parent) + return True + return False # generic 'forward' analysis explorer. # default explore policy is depth-first search (use policy=0 for breadth-first search.) - # return instructions are not followed (see backward analysis). + # return instructions are not followed (see lbackward analysis). def getcfg(self,loc=None): + G = self.G + # spool is the list of (target,parent) addresses to be analysed spool = self.init_spool(loc) + # order is the index to pop elements from spool order = -1 if self.policy['depth-first'] else 0 + # lazy is a flag to fallback to linear sweep lazy = self.policy['branch-lazy'] - F = cfg.func() + # proceed with exploration of every spool element: while len(spool)>0: - current,parent = spool.pop(order) - for b in self.iterblocks(loc=current): - err = '%s analysis failed at block %s'%(self.__class__.__name__,b.name) - sta,sto = b.support - vtx = cfg.node(b) - if vtx in F.V(): - e = cfg.link(parent,F.get_node(vtx.name)) - F.add_edge(e) - logger.verbose('edge %s added'%e) - break - if parent is None or (parent.data.address is None): - b.misc[code.tag.FUNC_START]=1 - F.add_vertex(vtx) - logger.verbose('root node %s added'%vtx.name) + t = spool.pop(order) + parent = t.parent + econd = t.econd + if self.check_ext_target(t,spool): continue + for b in self.iterblocks(loc=t.cst): + vtx = G.get_node(b.name) or cfg.node(b) + b = vtx.data + # if block is a FUNC_START, we add it as a new graph component (no link to parent), + # otherwise we add the new (parent,vtx) edge. + if parent is None: + self.add_root_node(vtx) + elif parent.data.misc[code.tag.FUNC_CALL]: + vtx = self.add_call_node(vtx,parent,econd) else: - if b.misc[code.tag.FUNC_START] and parent.data.misc[code.tag.FUNC_CALL]: - b.misc[code.tag.FUNC_START]+=1 - F.add_vertex(vtx) - logger.verbose('function node %s added'%vtx.name) - else: - e = cfg.link(parent,vtx) - F.add_edge(e) - logger.verbose('edge %s added'%e) - # continue and update spool... - target = self.get_target(b,withmap=parent) + e = cfg.link(parent,vtx,data=econd) + G.add_edge(e) + logger.verbose('edge %s added'%e) + # if vtx was visited before targets have been added already: + if len(vtx.e_in())>1: break + # now we try to populate spool with target addresses of current block: + self.update_spool(spool,vtx,parent) + if not lazy or b.misc[code.tag.FUNC_END]: break + logger.verbose("lsweep fallback at %s"%b.name) parent = vtx - if target==sto: - continue - elif target._is_cst: - spool.append((target,parent)) - if not lazy: break - elif target._is_tst: - t1 = target.l - t2 = target.r - if t1._is_cst: - spool.append((t1,parent)) - else: - logger.info(err+' (true branch)') - if t2._is_cst: - spool.append((t2,parent)) - else: - logger.info(err+' (false branch)') - break - else: - logger.info(err) - if not lazy: break - return F + econd = None + return G # ----------------------------------------------------------------------------- -# link forward based analysis +# link forward based analysis: # follows PC expression evaluated with parent block mapping. -# exploration goes forward until expressions are not cst. +# Exploration goes forward until expressions are not cst. class lforward(fforward): policy = {'depth-first': True, 'branch-lazy': False} - def init_spool(self,loc): - return [(loc,cfg.node(code.block([])))] + def get_targets(self,node,parent): + blk = node.data + pc = self.prog.PC() + if parent is None: + pc = blk.map.use((pc,blk.address))(pc) + else: + m = parent.data.map.use((pc,parent.data.address)) # work on copy + m[pc] = blk.address + pc = m(blk.map(pc)) + return _target(pc,node).expand() + - def get_target(self,blk,withmap): - # use withmap for blk.map eval: - m = withmap.data.map.use() #work on copy +# ----------------------------------------------------------------------------- +# fast backward based analysis: +# a generalisation of link forward where pc is evaluated backwardly by taking +# the first-parent-node path until no parent exists (entry of a function) +# fbackward is the first class to instanciate code.func objects. +# The 'frame_aliasing' policy indicates wether memory aliasing of pc expression +# outside of the function frame can occur or if the frame is assumed to be clean. +# Default frame-aliasing is set to False (assume no aliasing) otherwise any +# function that writes in memory results in potential aliasing (say for an arch +# that uses a memory stack for storing return addresses). +class fbackward(lforward): + policy = {'depth-first': True, 'branch-lazy': False, 'frame-aliasing':False} + + def get_targets(self,node,parent): pc = self.prog.PC() - m[pc] = blk.address - target = (blk.map(pc)).eval(m) - return target.simplify() + n = node + mpc = pc + while True: + m = n.data.map.use((pc,n.data.address)) + mpc = m(mpc) + T = _target(mpc,node).expand() + if len(T)>0: return T + try: + n = n.N(-1)[0] # get first parent node (parent arg is unused) + except IndexError: + break # we are at function entry node + # create func nodes: + xpc = [] + if n.data.misc[code.tag.FUNC_START]: + if node.data.misc[code.tag.FUNC_END]: + n.data.misc[code.tag.FUNC_START] += 1 + try: + fsym = n.data.misc['callers'][0].data.misc['to'].ref + except (IndexError,TypeError,AttributeError): + fsym = 'f' + func = code.func(n.c,name="%s:%s"%(fsym,n.name)) + logger.verbose("function %s created"%func) + if mpc._is_mem and len(mpc.mods)>0: + pol = '(assume_no_aliasing)' if self.policy['frame-aliasing']==False else '' + logger.verbose("pc is memory aliased in %s %s"%(str(func),pol)) + if self.policy['frame-aliasing']==False: mpc.mods = [] + func.map[pc] = mpc + for cn in n.data.misc['callers']: + cnpc = cn.data.map.use((pc,cn.data.address))(mpc) + f = cfg.node(func) + cfg.link(cn,f,connect=True) + xpc.extend(_target(cnpc,f).expand()) + n.data.misc['func'] = func + else: + xpc.extend(_target(mpc,node).expand()) + return xpc + diff --git a/amoco/system/__init__.py b/amoco/system/__init__.py index 9822d53..d3f893a 100644 --- a/amoco/system/__init__.py +++ b/amoco/system/__init__.py @@ -1 +1,3 @@ +# -*- coding: utf-8 -*- + import loader diff --git a/amoco/system/core.py b/amoco/system/core.py index daac3ed..f70dc78 100644 --- a/amoco/system/core.py +++ b/amoco/system/core.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2007-2013 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2007-2013 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license @@ -8,6 +10,8 @@ from bisect import bisect_left +from amoco.cas.expressions import top + #------------------------------------------------------------------------------ # datadiv provides the API for manipulating data values extracted from memory. # These values are either considered as 'raw' (byte strings) or can be any @@ -33,7 +37,9 @@ def __len__(self): def __repr__(self): s = repr(self.val) - if len(s)>32: s=s[:32]+'...' + if len(s)>32: + s=s[:32]+"..." + if isinstance(self.val,str): s+="'" return ''%s def __str__(self): @@ -43,9 +49,9 @@ def cut(self,l): if self._is_raw: self.val = self.val[l:] else: - self.val = self.val[8*l:] + self.val = self.val.bytes(l) - # returns (result, counter) where result is a part of val of length l + # returns (result, counter) where result is a part of val of length l # located at offset o, and counter is the number of bytes that still # need to be read from another div. def getpart(self,o,l): @@ -63,10 +69,8 @@ def getpart(self,o,l): res = self.val[o:o+l] return (res,l-len(res)) if o>=lv: return (None,l) - o,l = 8*o,8*l - n,r = divmod(o+l,s) - if n>0: return (self.val[o:s],r/8) - return (self.val[o:o+l],0) + res = self.val.bytes(o,o+l) + return (res,l-res.length) # returns a list of (contiguous) datadiv objects resulting from # overwriting self with data at offset o, possibly extending self. @@ -114,7 +118,9 @@ def __contains__(self,vaddr): def __repr__(self): data = str(self.data) - if len(data)>32: data=data[:32]+'...' + if len(data)>32: + data=data[:32]+"..." + if self.data._is_raw: data+="'" return ''%(self.vaddr,self.end,data) # change current obj to start at provided vaddr @@ -129,7 +135,7 @@ def read(self,vaddr,l): if vaddr in self: return self.data.getpart(vaddr-self.vaddr,l) else: - logger.warning('%s read out of bound (vaddr=%08x, l=%d)',repr(self),vaddr,l) + logger.debug('%s read out of bound (vaddr=%08x, l=%d)',repr(self),vaddr,l) return (None,l) # update current obj resulting from writing datadiv at vaddr, returning the @@ -145,16 +151,17 @@ def write(self,vaddr,data): vaddr += len(p) return O else: - logger.verbose('%s write out of bound (vaddr=%08x,data=%.32s)',repr(self),vaddr,repr(data)) + logger.debug('%s write out of bound (vaddr=%08x,data=%.32s)',repr(self),vaddr,repr(data)) return [mo(vaddr,data)] #------------------------------------------------------------------------------ class MemoryZone(object): - __slot__ = ['rel','_map'] + __slot__ = ['rel','_map','__cache'] def __init__(self,rel=None,D=None): self.rel = rel self._map = [] + self.__cache = [] # speedup locate method if D != None and isinstance(D,dict): for vaddr,data in D.iteritems(): self.addtomap(mo(vaddr,data)) @@ -169,30 +176,49 @@ def __str__(self): l.append("\t %s"%str(z)) return '\n'.join(l)+'>' + def __update_cache(self): + self.__cache = [z.vaddr for z in self._map] + # locate the index that contains the given address in the mmap: def locate(self,vaddr): - p = [z.vaddr for z in self._map] + p = self.__cache if vaddr in p: return p.index(vaddr) i = bisect_left(p,vaddr) if i==0: return None else: return i-1 # read l bytes starting at vaddr. - # A MemoryError is raised if some bytes are not mapped. + # return value is a list of datadiv values, unmapped areas + # are returned as 'top' expressions. def read(self,vaddr,l): + res = [] i = self.locate(vaddr) - if i is None: raise MemoryError(l) + if i is None: + if len(self._map)==0: return [top(l*8)] + v0 = self._map[0].vaddr + if (vaddr+l)<=v0: return [top(l*8)] + res.append(top((v0-vaddr)*8)) + l = (vaddr+l)-v0 + vaddr = v0 + i = 0 ll = l - res = [] while ll>0: try: data,ll = self._map[i].read(vaddr,ll) except IndexError: - data=None + res.append(top(ll*8)) + ll=0 + break if data is None: - raise MemoryError(ll) - vaddr += len(data) - res.append(data) + vi = self.__cache[i] + if vaddr < vi: + l = min(vaddr+ll,vi)-vaddr + data = top(l*8) + ll -= l + i -=1 + if data is not None: + vaddr += len(data) + res.append(data) i += 1 assert ll==0 return res @@ -210,6 +236,7 @@ def addtomap(self,z): if j is None: assert i is None self._map.insert(0,z) + self.__update_cache() return if j==i: Z = self._map[i].write(z.vaddr,z.data.val) @@ -217,6 +244,7 @@ def addtomap(self,z): for newz in Z: self._map.insert(i,newz) i+=1 + self.__update_cache() return # i!=j cases: # delete & update every overwritten zones @@ -237,6 +265,7 @@ def addtomap(self,z): for newz in Z: self._map.insert(i,newz) i+=1 + self.__update_cache() def restruct(self): if len(self._map)==0: return @@ -251,6 +280,7 @@ def restruct(self): else: m.append(z) self._map = m + self.__update_cache() #------------------------------------------------------------------------------ class MemoryMap(object): @@ -264,10 +294,11 @@ def newzone(self,label): z = MemoryZone() z.rel = label self._zones[label] = z + return z def locate(self,address): r, a = self.reference(address) - idx = self._zones[r].locate(address) + idx = self._zones[r].locate(a) return self._zones[r]._map[idx] def reference(self,address): @@ -277,6 +308,8 @@ def reference(self,address): return (address,0) try: r,a = (address.base,address.disp) + if r._is_cst: + return (None,(r+a).v) return (r,a) except AttributeError: if address._is_cst: @@ -301,12 +334,20 @@ def __getitem__(self,aslc): def read(self,address,l): r,o = self.reference(address) - return self._zones[r].read(o,l) + if r in self._zones: + return self._zones[r].read(o,l) + else: + raise MemoryError(address) def write(self,address,expr): r,o = self.reference(address) + if not r in self._zones: + self.newzone(r) self._zones[r].write(o,expr) + def restruct(self): + for z in self._zones.itervalues(): z.restruct() + #------------------------------------------------------------------------------ class CoreExec(object): __slots__ = ['bin','cpu','mmap'] @@ -336,15 +377,12 @@ def read_instruction(self,vaddr,**kargs): try: istr = self.mmap.read(vaddr,maxlen) except MemoryError,e: - ll = e.message - l = maxlen-ll - if l == 0: + logger.verbose("vaddr %s is not mapped"%vaddr) + raise MemoryError(e) + else: + if len(istr)>1 or not isinstance(istr[0],str): + logger.verbose("failed to read instruction at %s"%vaddr) return None - logger.warning("instruction fetch error: reducing fetch size (%d)"%l) - istr = self.mmap.read(vaddr,l) - if len(istr)>1: - logger.warning("read_instruction: can't fetch vaddr %s"%vaddr) - raise MemoryError i = self.cpu.disassemble(istr[0],**kargs) if i is None: logger.warning("disassemble failed at vaddr %s"%vaddr) diff --git a/amoco/system/elf.py b/amoco/system/elf.py index bded1e3..cee6a79 100644 --- a/amoco/system/elf.py +++ b/amoco/system/elf.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import struct @@ -51,7 +53,7 @@ class Elf32_Ehdr(Elfcore): 'e_shentsize', 'e_shnum', 'e_shstrndx') - # overload Elfcore methods to take into account the e_ident dict: + # overload Elfcore methods to take into account the e_ident dict: def __init__(self, data): S = struct.unpack('B3sBBBBBxxxxxxx',data[:16]) if S[0]!=0x7f or S[1]!='ELF': @@ -974,7 +976,7 @@ class Elf64_Ehdr(Elfcore): 'e_shentsize', 'e_shnum', 'e_shstrndx') - # overload Elfcore methods to take into account the e_ident dict: + # overload Elfcore methods to take into account the e_ident dict: def __init__(self, data): S = struct.unpack('B3sBBBBBxxxxxxx',data[:16]) if S[0]!=0x7f or S[1]!='ELF': diff --git a/amoco/system/gameboy.py b/amoco/system/gameboy.py index 1c83cea..3a54574 100644 --- a/amoco/system/gameboy.py +++ b/amoco/system/gameboy.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -125,16 +125,14 @@ def read_instruction(self,vaddr,**kargs): try: istr = self.mmap.read(vaddr,maxlen) except MemoryError,e: - ll = e.message - l = maxlen-ll - logger.warning("instruction fetch error: reducing fetch size (%d)"%l) - istr = self.mmap.read(vaddr,l) - if len(istr)>1: - logger.warning("read_instruction: can't fetch vaddr %s"%vaddr) - raise MemoryError + logger.warning("vaddr %s is not mapped"%vaddr) + raise MemoryError(e) i = self.cpu.disassemble(istr[0],**kargs) if i is None: logger.warning("disassemble failed at vaddr %s"%vaddr) + if len(istr)>1 and istr[1]._is_def: + logger.warning("symbol found in instruction buffer"%vaddr) + raise MemoryError(vaddr) return None else: i.address = vaddr diff --git a/amoco/system/leon2.py b/amoco/system/leon2.py index c043855..9022453 100644 --- a/amoco/system/leon2.py +++ b/amoco/system/leon2.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -11,6 +13,7 @@ class ELF(CoreExec): def __init__(self,p): CoreExec.__init__(self,p,cpu) + cpu.exp.setendian(-1) # set endianess to big-endian # load the program into virtual memory (populate the mmap dict) def load_binary(self): diff --git a/amoco/system/linux_arm.py b/amoco/system/linux_arm.py index 501e230..4576381 100644 --- a/amoco/system/linux_arm.py +++ b/amoco/system/linux_arm.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -30,7 +32,7 @@ def load_binary(self): # for now, the external libs are seen through the elf dynamic section: def load_shlib(self): for k,f in self.bin._Elf32__dynamic(None).iteritems(): - self.mmap.write(k,cpu.ext(f)) + self.mmap.write(k,cpu.ext(f,size=32)) def initenv(self): from amoco.cas.mapper import mapper @@ -45,299 +47,8 @@ def PC(self): # LIBC HOOKS DEFINED HERE : #---------------------------------------------------------------------------- -def __libc_start_main(): - s = code.func('__libc_start_main',cfg=None) - pass - return s -def exit(): - s = code.func('exit',cfg=None) - return s -def abort(): - s = code.func('abort',cfg=None) - return s -def __assert(): - s = code.func('__assert',cfg=None) - return s -def __assert_fail(): - s = code.func('__assert_fail',cfg=None) - return s -def _assert_perror_fail(): - s = code.func('__assert_perror_fail',cfg=None) - return s + #---------------------------------------------------------------------------- # SYSCALLS: #---------------------------------------------------------------------------- -IDT={ - 1: "exit", - 2: "fork", - 3: "read", - 4: "write", - 5: "open", - 6: "close", - 7: "waitpid", - 8: "creat", - 9: "link", - 10: "unlink", - 11: "execve", - 12: "chdir", - 13: "time", - 14: "mknod", - 15: "chmod", - 16: "lchown", - 17: "break", - 18: "oldstat", - 19: "lseek", - 20: "getpid", - 21: "mount", - 22: "umount", - 23: "setuid", - 24: "getuid", - 25: "stime", - 26: "ptrace", - 27: "alarm", - 28: "oldfstat", - 29: "pause", - 30: "utime", - 31: "stty", - 32: "gtty", - 33: "access", - 34: "nice", - 35: "ftime", - 36: "sync", - 37: "kill", - 38: "rename", - 39: "mkdir", - 40: "rmdir", - 41: "dup", - 42: "pipe", - 43: "times", - 44: "prof", - 45: "brk", - 46: "setgid", - 47: "getgid", - 48: "signal", - 49: "geteuid", - 50: "getegid", - 51: "acct", - 52: "umount2", - 53: "lock", - 54: "ioctl", - 55: "fcntl", - 56: "mpx", - 57: "setpgid", - 58: "ulimit", - 59: "oldolduname", - 60: "umask", - 61: "chroot", - 62: "ustat", - 63: "dup2", - 64: "getppid", - 65: "getpgrp", - 66: "setsid", - 67: "sigaction", - 68: "sgetmask", - 69: "ssetmask", - 70: "setreuid", - 71: "setregid", - 72: "sigsuspend", - 73: "sigpending", - 74: "sethostname", - 75: "setrlimit", - 76: "getrlimit", - 77: "getrusage", - 78: "gettimeofday", - 79: "settimeofday", - 80: "getgroups", - 81: "setgroups", - 82: "select", - 83: "symlink", - 84: "oldlstat", - 85: "readlink", - 86: "uselib", - 87: "swapon", - 88: "reboot", - 89: "readdir", - 90: "mmap", - 91: "munmap", - 92: "truncate", - 93: "ftruncate", - 94: "fchmod", - 95: "fchown", - 96: "getpriority", - 97: "setpriority", - 98: "profil", - 99: "statfs", -100: "fstatfs", -101: "ioperm", -102: "socketcall", -103: "syslog", -104: "setitimer", -105: "getitimer", -106: "stat", -107: "lstat", -108: "fstat", -109: "olduname", -110: "iopl", -111: "vhangup", -112: "idle", -113: "vm86old", -114: "wait4", -115: "swapoff", -116: "sysinfo", -117: "ipc", -118: "fsync", -119: "sigreturn", -120: "clone", -121: "setdomainname", -122: "uname", -123: "modify_ldt", -124: "adjtimex", -125: "mprotect", -126: "sigprocmask", -127: "create_module", -128: "init_module", -129: "delete_module", -130: "get_kernel_syms", -131: "quotactl", -132: "getpgid", -133: "fchdir", -134: "bdflush", -135: "sysfs", -136: "personality", -137: "afs_syscall", -138: "setfsuid", -139: "setfsgid", -140: "_llseek", -141: "getdents", -142: "_newselect", -143: "flock", -144: "msync", -145: "readv", -146: "writev", -147: "getsid", -148: "fdatasync", -149: "_sysctl", -150: "mlock", -151: "munlock", -152: "mlockall", -153: "munlockall", -154: "sched_setparam", -155: "sched_getparam", -156: "sched_setscheduler", -157: "sched_getscheduler", -158: "sched_yield", -159: "sched_get_priority_max", -160: "sched_get_priority_min", -161: "sched_rr_get_interval", -162: "nanosleep", -163: "mremap", -164: "setresuid", -165: "getresuid", -166: "vm86", -167: "query_module", -168: "poll", -169: "nfsservctl", -170: "setresgid", -171: "getresgid", -172: "prctl", -173: "rt_sigreturn", -174: "rt_sigaction", -175: "rt_sigprocmask", -176: "rt_sigpending", -177: "rt_sigtimedwait", -178: "rt_sigqueueinfo", -179: "rt_sigsuspend", -180: "pread64", -181: "pwrite64", -182: "chown", -183: "getcwd", -184: "capget", -185: "capset", -186: "sigaltstack", -187: "sendfile", -188: "getpmsg", -189: "putpmsg", -190: "vfork", -191: "ugetrlimit", -192: "mmap2", -193: "truncate64", -194: "ftruncate64", -195: "stat64", -196: "lstat64", -197: "fstat64", -198: "lchown32", -199: "getuid32", -200: "getgid32", -201: "geteuid32", -202: "getegid32", -203: "setreuid32", -204: "setregid32", -205: "getgroups32", -206: "setgroups32", -207: "fchown32", -208: "setresuid32", -209: "getresuid32", -210: "setresgid32", -211: "getresgid32", -212: "chown32", -213: "setuid32", -214: "setgid32", -215: "setfsuid32", -216: "setfsgid32", -217: "pivot_root", -218: "mincore", -219: "madvise", -219: "madvise1", -220: "getdents64", -221: "fcntl64", -224: "gettid", -225: "readahead", -226: "setxattr", -227: "lsetxattr", -228: "fsetxattr", -229: "getxattr", -230: "lgetxattr", -231: "fgetxattr", -232: "listxattr", -233: "llistxattr", -234: "flistxattr", -235: "removexattr", -236: "lremovexattr", -237: "fremovexattr", -238: "tkill", -239: "sendfile64", -240: "futex", -241: "sched_setaffinity", -242: "sched_getaffinity", -243: "set_thread_area", -244: "get_thread_area", -245: "io_setup", -246: "io_destroy", -247: "io_getevents", -248: "io_submit", -249: "io_cancel", -250: "fadvise64", -252: "exit_group", -253: "lookup_dcookie", -254: "epoll_create", -255: "epoll_ctl", -256: "epoll_wait", -257: "remap_file_pages", -258: "set_tid_address", -259: "timer_create", -260: "timer_settime", -261: "timer_gettime", -262: "timer_getoverrun", -263: "timer_delete", -264: "clock_settime", -265: "clock_gettime", -266: "clock_getres", -267: "clock_nanosleep", -268: "statfs64", -269: "fstatfs64", -270: "tgkill", -271: "utimes", -272: "fadvise64_64", -273: "vserver" } - diff --git a/amoco/system/linux_arm64.py b/amoco/system/linux_arm64.py index cf0a206..526aec8 100644 --- a/amoco/system/linux_arm64.py +++ b/amoco/system/linux_arm64.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * diff --git a/amoco/system/linux_x64.py b/amoco/system/linux_x64.py index bf34734..1e4e73c 100644 --- a/amoco/system/linux_x64.py +++ b/amoco/system/linux_x64.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -33,14 +35,14 @@ def load_binary(self): # for now, the external libs are seen through the elf dynamic section: def load_shlib(self): for k,f in self.bin._Elf64__dynamic(None).iteritems(): - self.mmap.write(k,cpu.ext(f)) + self.mmap.write(k,cpu.ext(f,size=64)) # lookup in bin if v is associated with a function or variable name: def check_sym(self,v): if v._is_cst: x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None) if x is not None: - if isinstance(x,str): x=cpu.ext(x) + if isinstance(x,str): x=cpu.ext(x,size=64) else: x=cpu.sym(x[0],v.value,v.size) return x return None @@ -129,28 +131,28 @@ def blockhelper(self,block): #---------------------------------------------------------------------------- @stub_default -def pop_rip(m): +def pop_rip(m,**kargs): cpu.pop(m,cpu.rip) @stub -def __libc_start_main(m): +def __libc_start_main(m,**kargs): m[cpu.rip] = m(cpu.mem(cpu.rsp+4,64)) - cpu.push(m,cpu.ext('exit')) + cpu.push(m,cpu.ext('exit',size=64)) @stub -def exit(m): +def exit(m,**kargs): m[cpu.rip] = top(64) @stub -def abort(m): +def abort(m,**kargs): m[cpu.rip] = top(64) @stub -def __assert(m): +def __assert(m,**kargs): m[cpu.rip] = top(64) @stub -def __assert_fail(m): +def __assert_fail(m,**kargs): m[cpu.rip] = top(64) @stub -def _assert_perror_fail(m): +def _assert_perror_fail(m,**kargs): m[cpu.rip] = top(64) #---------------------------------------------------------------------------- diff --git a/amoco/system/linux_x86.py b/amoco/system/linux_x86.py index f6f1655..2e92c3b 100644 --- a/amoco/system/linux_x86.py +++ b/amoco/system/linux_x86.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -33,14 +35,14 @@ def load_binary(self): # for now, the external libs are seen through the elf dynamic section: def load_shlib(self): for k,f in self.bin._Elf32__dynamic(None).iteritems(): - self.mmap.write(k,cpu.ext(f)) + self.mmap.write(k,cpu.ext(f,size=32)) # lookup in bin if v is associated with a function or variable name: def check_sym(self,v): if v._is_cst: x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None) if x is not None: - if isinstance(x,str): x=cpu.ext(x) + if isinstance(x,str): x=cpu.ext(x,size=32) else: x=cpu.sym(x[0],v.value,v.size) return x return None @@ -103,7 +105,12 @@ def seqhelper(self,seq): else: i.misc[tag.FUNC_VAR]=1 elif op.a.base._is_cst: x = self.check_sym(op.a.base) - if x is not None: op.a.base=x + if x is not None: + op.a.base=x + if i.mnemonic == 'JMP': # PLT jumps: + i.address = i.address.to_sym('PLT%s'%x) + i.misc[tag.FUNC_START]=1 + i.misc[tag.FUNC_END]=1 elif op._is_cst: x = self.check_sym(op) i.misc['imm_ref'] = x @@ -129,28 +136,28 @@ def blockhelper(self,block): #---------------------------------------------------------------------------- @stub_default -def pop_eip(m): +def pop_eip(m,**kargs): cpu.pop(m,cpu.eip) @stub -def __libc_start_main(m): +def __libc_start_main(m,**kargs): m[cpu.eip] = m(cpu.mem(cpu.esp+4,32)) - cpu.push(m,cpu.ext('exit')) + cpu.push(m,cpu.ext('exit',size=32)) @stub -def exit(m): +def exit(m,**kargs): m[cpu.eip] = top(32) @stub -def abort(m): +def abort(m,**kargs): m[cpu.eip] = top(32) @stub -def __assert(m): +def __assert(m,**kargs): m[cpu.eip] = top(32) @stub -def __assert_fail(m): +def __assert_fail(m,**kargs): m[cpu.eip] = top(32) @stub -def _assert_perror_fail(m): +def _assert_perror_fail(m,**kargs): m[cpu.eip] = top(32) #---------------------------------------------------------------------------- diff --git a/amoco/system/loader.py b/amoco/system/loader.py index 2d3aae2..c37293a 100644 --- a/amoco/system/loader.py +++ b/amoco/system/loader.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.logger import * @@ -12,7 +14,7 @@ #------------------------------------------------------------------------------ # read_program is responsible of identifying the program header (ELF/PE). # It returns an ELF or PE class instance. -# loading the associated "system" (Linux/Windows) and "environment" (x86/etc), +# loading the associated "system" (Linux/Windows) and "environment" (x86/etc), # based on information from its header. #------------------------------------------------------------------------------ def read_program(filename): @@ -33,13 +35,13 @@ def read_program(filename): except pe.PEError: pass - logger.error('unknown format') + logger.warning('unknown format') try: data = file(filename,'rb') except (TypeError,IOError): data = filename return DataIO(data) - ## + ## ## #------------------------------------------------------------------------------ diff --git a/amoco/system/msp430.py b/amoco/system/msp430.py index 903984c..a897857 100644 --- a/amoco/system/msp430.py +++ b/amoco/system/msp430.py @@ -1,7 +1,7 @@ -#!/usr/bin/env python +# -*- coding: utf-8 -*- # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * diff --git a/amoco/system/pe.py b/amoco/system/pe.py index c032cd5..12c8533 100644 --- a/amoco/system/pe.py +++ b/amoco/system/pe.py @@ -1,6 +1,8 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco # based on elf.py, improving pefile to work out corkami's CoST.exe. -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license import struct diff --git a/amoco/system/pic18.py b/amoco/system/pic18.py index 1834135..d23ef36 100644 --- a/amoco/system/pic18.py +++ b/amoco/system/pic18.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -26,16 +28,14 @@ def read_instruction(self,vaddr,**kargs): try: istr = self.cmap.read(vaddr,maxlen) except MemoryError,e: - ll = e.message - l = maxlen-ll - logger.warning("instruction fetch error: reducing fetch size (%d)"%l) - istr = self.cmap.read(vaddr,l) - if len(istr)>1: - logger.warning("read_instruction: can't fetch vaddr %s"%vaddr) - raise MemoryError + logger.warning("vaddr %s is not mapped"%vaddr) + raise MemoryError(e) i = self.cpu.disassemble(istr[0],**kargs) if i is None: logger.warning("disassemble failed at vaddr %s"%vaddr) + if len(istr)>1 and istr[1]._is_def: + logger.warning("symbol found in instruction buffer"%vaddr) + raise MemoryError(vaddr) return None else: i.address = vaddr diff --git a/amoco/system/raw.py b/amoco/system/raw.py index 797aadd..faedbef 100644 --- a/amoco/system/raw.py +++ b/amoco/system/raw.py @@ -1,3 +1,5 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco # Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license diff --git a/amoco/system/win32.py b/amoco/system/win32.py index 43904bc..e18a27b 100644 --- a/amoco/system/win32.py +++ b/amoco/system/win32.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -34,7 +36,7 @@ def load_binary(self): # for now, the external libs are seen through the elf dynamic section: def load_shlib(self): for k,f in self.bin.functions.iteritems(): - self.mmap.write(k,cpu.ext(f)) + self.mmap.write(k,cpu.ext(f,size=32)) def initenv(self): from amoco.cas.mapper import mapper @@ -57,7 +59,7 @@ def check_sym(self,v): if v._is_cst: x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None) if x is not None: - if isinstance(x,str): x=cpu.ext(x) + if isinstance(x,str): x=cpu.ext(x,size=32) else: x=cpu.sym(x[0],v.value,v.size) return x return None @@ -128,7 +130,7 @@ def blockhelper(self,block): #---------------------------------------------------------------------------- @stub_default -def pop_eip(m): +def pop_eip(m,**kargs): cpu.pop(m,cpu.eip) diff --git a/amoco/system/win64.py b/amoco/system/win64.py index 6be62c8..b6c1557 100644 --- a/amoco/system/win64.py +++ b/amoco/system/win64.py @@ -1,5 +1,7 @@ +# -*- coding: utf-8 -*- + # This code is part of Amoco -# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) +# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) # published under GPLv2 license from amoco.system.core import * @@ -34,7 +36,7 @@ def load_binary(self): # for now, the external libs are seen through the elf dynamic section: def load_shlib(self): for k,f in self.bin.functions.iteritems(): - self.mmap.write(k,cpu.ext(f)) + self.mmap.write(k,cpu.ext(f,size=64)) def initenv(self): from amoco.cas.mapper import mapper @@ -57,7 +59,7 @@ def check_sym(self,v): if v._is_cst: x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None) if x is not None: - if isinstance(x,str): x=cpu.ext(x) + if isinstance(x,str): x=cpu.ext(x,size=64) else: x=cpu.sym(x[0],v.value,v.size) return x return None @@ -128,7 +130,7 @@ def blockhelper(self,block): #---------------------------------------------------------------------------- @stub_default -def pop_rip(m): +def pop_rip(m,**kargs): cpu.pop(m,cpu.rip)