From c1e6cedf68c0fdfc879c6d7289edd824ba86a654 Mon Sep 17 00:00:00 2001
From: Axel Tillequin <bdcht3@gmail.com>
Date: Mon, 16 Feb 2015 17:59:57 +0100
Subject: [PATCH] merge z3 interface and fast backward analysis

---
 README.rst                          | 803 +++++++++++++++++++++++++---
 amoco/__init__.py                   |   2 +
 amoco/arch/arm/cpu_armv7.py         |   4 +-
 amoco/arch/arm/cpu_armv8.py         |   4 +-
 amoco/arch/arm/v7/asm.py            |  28 +-
 amoco/arch/arm/v7/env.py            |  36 +-
 amoco/arch/arm/v7/formats.py        |   2 +
 amoco/arch/arm/v7/spec_armv7.py     |   4 +-
 amoco/arch/arm/v7/spec_thumb.py     |   6 +-
 amoco/arch/arm/v7/spec_thumb2.py    |  10 +-
 amoco/arch/arm/v7/utils.py          |  18 +-
 amoco/arch/arm/v8/asm64.py          |   4 +-
 amoco/arch/arm/v8/env64.py          |   4 +-
 amoco/arch/arm/v8/formats.py        |   2 +
 amoco/arch/arm/v8/spec_armv8.py     |   6 +-
 amoco/arch/arm/v8/utils.py          |   4 +-
 amoco/arch/core.py                  |  22 +-
 amoco/arch/gas.py                   |   4 +-
 amoco/arch/msp430/asm.py            |   6 +-
 amoco/arch/msp430/cpu.py            |   2 +
 amoco/arch/msp430/env.py            |   4 +-
 amoco/arch/msp430/formats.py        |   2 +
 amoco/arch/msp430/parsers.py        |   3 +-
 amoco/arch/msp430/spec_msp430.py    |   6 +-
 amoco/arch/pic/F46K22/asm.py        |   4 +-
 amoco/arch/pic/F46K22/env.py        |   4 +-
 amoco/arch/pic/F46K22/formats.py    |   2 +
 amoco/arch/pic/F46K22/spec_pic18.py |   4 +-
 amoco/arch/pic/cpu_pic18f46k22.py   |   2 +
 amoco/arch/sparc/asm.py             |   4 +-
 amoco/arch/sparc/cpu_v8.py          |   2 +
 amoco/arch/sparc/env.py             |   4 +-
 amoco/arch/sparc/formats.py         |   2 +
 amoco/arch/sparc/parsers.py         |   3 +-
 amoco/arch/sparc/spec_v8.py         |   4 +-
 amoco/arch/sparc/utils.py           |   2 +
 amoco/arch/x64/asm.py               |  57 +-
 amoco/arch/x64/cpu_x64.py           |   2 +
 amoco/arch/x64/env.py               |  40 +-
 amoco/arch/x64/formats.py           |   2 +
 amoco/arch/x64/spec_fpu.py          |   4 +-
 amoco/arch/x64/spec_ia32e.py        |   4 +-
 amoco/arch/x64/spec_sse.py          |   8 +-
 amoco/arch/x64/utils.py             |   4 +-
 amoco/arch/x86/asm.py               |  75 ++-
 amoco/arch/x86/cpu_x86.py           |   2 +
 amoco/arch/x86/env.py               |  36 +-
 amoco/arch/x86/formats.py           |   4 +-
 amoco/arch/x86/parsers.py           |   3 +-
 amoco/arch/x86/spec_fpu.py          |   4 +-
 amoco/arch/x86/spec_ia32.py         |   4 +-
 amoco/arch/x86/spec_sse.py          |   6 +-
 amoco/arch/x86/utils.py             |   4 +-
 amoco/arch/z80/asm.py               |   2 +-
 amoco/arch/z80/cpu_gb.py            |   2 +
 amoco/arch/z80/cpu_z80.py           |   2 +
 amoco/arch/z80/env.py               |   2 +-
 amoco/arch/z80/formats.py           |   2 +
 amoco/arch/z80/spec_gb.py           |   2 +-
 amoco/arch/z80/spec_mostek.py       |   2 +-
 amoco/cas/expressions.py            | 529 +++++++++++-------
 amoco/cas/mapper.py                 | 179 +++++--
 amoco/cas/parser.py                 |  61 +++
 amoco/cas/smt.py                    | 120 +++++
 amoco/cas/tracker.py                |   4 +-
 amoco/cas/utils.py                  |   4 +-
 amoco/cfg.py                        |  90 +++-
 amoco/code.py                       |  89 ++-
 amoco/config.py                     |   6 +-
 amoco/logger.py                     |  35 +-
 amoco/main.py                       | 272 +++++++---
 amoco/system/__init__.py            |   2 +
 amoco/system/core.py                |  98 ++--
 amoco/system/elf.py                 |   8 +-
 amoco/system/gameboy.py             |  16 +-
 amoco/system/leon2.py               |   5 +-
 amoco/system/linux_arm.py           | 299 +----------
 amoco/system/linux_arm64.py         |   4 +-
 amoco/system/linux_x64.py           |  24 +-
 amoco/system/linux_x86.py           |  31 +-
 amoco/system/loader.py              |  10 +-
 amoco/system/msp430.py              |   4 +-
 amoco/system/pe.py                  |   4 +-
 amoco/system/pic18.py               |  16 +-
 amoco/system/raw.py                 |   2 +
 amoco/system/win32.py               |  10 +-
 amoco/system/win64.py               |  10 +-
 87 files changed, 2182 insertions(+), 1046 deletions(-)
 create mode 100644 amoco/cas/parser.py
 create mode 100644 amoco/cas/smt.py

diff --git a/README.rst b/README.rst
index 88b614d..3422d73 100644
--- a/README.rst
+++ b/README.rst
@@ -6,9 +6,13 @@ Amoco
 +-----------+-----------------------------------+
 | Location: | https://github.com/bdcht/amoco    |
 +-----------+-----------------------------------+
-| Version:  | 2.3                               |
+| Version:  | 2.4                               |
 +-----------+-----------------------------------+
 
+.. contents:: **Table of Contents**
+    :local:
+    :depth: 3
+    :backlinks: top
 
 Description
 ===========
@@ -43,7 +47,6 @@ It features:
 Amoco is still *work in progress*. See Todo_ for a list of features to be
 merged from develop branch or to be more thoroughly implemented.
 
-
 History
 =======
 
@@ -69,8 +72,10 @@ More precisely:
 - x86 fpu and sse instructions semantics are not implemented,
 - arm SIMD, VFP, NEON, TrustZone, Jazelle instruction sets are not implemented,
 - pretty printers based on pygments package are not merged,
-- interface to z3 solver (and associated analysis) is currently not merged,
-- backward and solver-based disassembling strategies are not merged yet.
+- solver-based disassembling strategies are not merged yet.
+- persistent database (session) and idb import/export features are planned (Q2 2015).
+- sphinx documentation is planned.
+- MIPS, 6502 and PPC archs are planned.
 
 Contributions to fulfill uncomplete/unimplemented parts are welcome.
 
@@ -82,8 +87,8 @@ Amoco is tested on python 2.7 and depends on the following python packages:
 
 - grandalf_ used for building CFG (and eventually rendering it)
 - crysp_    used by the generic intruction decoder (``arch/core.py``)
-- z3_       (not in current release)
-- pygments_ (not in current release)
+- z3_       used to simplify expressions and solve constraints
+- pygments_ (not in current release, planned for 2.4.2 release)
 - pyparsing_ for parsing instruction decoder formats
 - ply_ (optional), for parsing *GNU as* files
 
@@ -91,7 +96,7 @@ Amoco is tested on python 2.7 and depends on the following python packages:
 Quickstart
 ==========
 
-Below is a very simple example where basic blocks are built with linear sweep:
+Below is a very simple example where basic blocks are build with linear sweep:
 
 .. sourcecode:: python
 
@@ -115,14 +120,15 @@ creates a ``linux_x86.ELF`` object which shall represent the program task.
  <amoco.system.elf.Elf32 object at 0xb721a48c>
  >>> print p.mmap
  <MemoryZone rel=None :
-          <mo [08048000,08049ff0] data:'\x7fELF\x01\x01\x01\x00\x00\x00...>
+          <mo [08048000,08049ff0] data:'\x7fELF\x01\x01\x01\x00\x00\x00...'>
+          <mo [08049f14,08049ff0] data:'\xff\xff\xff\xff\x00\x00\x00\x0...'>
           <mo [08049ff0,08049ff4] data:@__gmon_start__>
-          <mo [08049ff4,0804a000] data:'(\x9f\x04\x08\x00\x00\x00\x00\x...>
+          <mo [08049ff4,0804a000] data:'(\x9f\x04\x08\x00\x00\x00\x00\x...'>
           <mo [0804a000,0804a004] data:@__stack_chk_fail>
           <mo [0804a004,0804a008] data:@malloc>
           <mo [0804a008,0804a00c] data:@__gmon_start__>
           <mo [0804a00c,0804a010] data:@__libc_start_main>
-          <mo [0804a010,0804a02c] data:'\x00\x00\x00\x00\x00\x00\x00\x0...>>
+          <mo [0804a010,0804af14] data:'\x00\x00\x00\x00\x00\x00\x00\x0...'>>
  <MemoryZone rel=esp :>
  >>> p.mmap.read(0x0804a004,4)
  [<amoco.cas.expressions.ext object at 0x8cff054>]
@@ -197,17 +203,17 @@ Lets look at the symbolic execution of this block:
  ebp <- { | [0:32]->0x0 | }
  esi <- { | [0:32]->M32(esp) | }
  ecx <- { | [0:32]->(esp+0x4) | }
- eflags <- { | [0:1]->0x0 | [6:7]->((((esp+0x4)&0xfffffff0)==0x0) ? 0x1 : 0x0) | [12:32]->eflags[12:32] | [11:12]->0x0 | [8:11]->eflags[8:11] | [1:6]->eflags[1:6] | [7:8]->((((esp+0x4)&0xfffffff0)<0x0) ? 0x1 : 0x0) | }
- ((((esp+0x4)&0xfffffff0)-0x4)) <- eax
- ((((esp+0x4)&0xfffffff0)-0x8)) <- (((esp+0x4)&0xfffffff0)-0x4)
- ((((esp+0x4)&0xfffffff0)-0xc)) <- edx
- ((((esp+0x4)&0xfffffff0)-0x10)) <- 0x8048610
- ((((esp+0x4)&0xfffffff0)-0x14)) <- 0x80485a0
- ((((esp+0x4)&0xfffffff0)-0x18)) <- (esp+0x4)
- ((((esp+0x4)&0xfffffff0)-0x1c)) <- M32(esp)
- ((((esp+0x4)&0xfffffff0)-0x20)) <- 0x80484fd
+ eflags <- { | [0:1]->0x0 | [1:2]->eflags[1:2] | [2:3]->(0x6996>>(((esp+0x4)&0xfffffff0)[0:8]^(((esp+0x4)&0xfffffff0)[0:8]>>0x4))[0:4])[0:1] | [3:6]->eflags[3:6] | [6:7]->(((esp+0x4)&0xfffffff0)==0x0) | [7:8]->(((esp+0x4)&0xfffffff0)<0x0) | [8:11]->eflags[8:11] | [11:12]->0x0 | [12:32]->eflags[12:32] | }
+ ((((esp+0x4)&0xfffffff0)-4)) <- eax
+ ((((esp+0x4)&0xfffffff0)-8)) <- (((esp+0x4)&0xfffffff0)-0x4)
+ ((((esp+0x4)&0xfffffff0)-12)) <- edx
+ ((((esp+0x4)&0xfffffff0)-16)) <- 0x8048610
+ ((((esp+0x4)&0xfffffff0)-20)) <- 0x80485a0
+ ((((esp+0x4)&0xfffffff0)-24)) <- (esp+0x4)
+ ((((esp+0x4)&0xfffffff0)-28)) <- M32(esp)
+ ((((esp+0x4)&0xfffffff0)-32)) <- 0x80484fd
  esp <- { | [0:32]->(((esp+0x4)&0xfffffff0)-0x24) | }
- ((((esp+0x4)&0xfffffff0)-0x24)) <- (eip+0x21)
+ ((((esp+0x4)&0xfffffff0)-36)) <- (eip+0x21)
  eip <- { | [0:32]->(eip+-0x10) | }
  >>> b.map[p.cpu.esi]
  <amoco.cas.expressions.mem object at 0x8b2fa6c>
@@ -224,6 +230,38 @@ When a block is instanciated, a ``mapper`` object is automatically created.
 This function can map any input state to an output state corresponding to the
 interpretation of this block.
 
+A mapper object is now also equipped with a MemoryMap to mitigate aliasing issues
+and ease updating the global mmap state.
+
+.. sourcecode:: python
+
+ >>> print b.map.memory()
+ <MemoryZone rel=None :>
+ <MemoryZone rel=((esp+0x4)&0xfffffff0) :
+          <mo [-0000024,-0000020] data:(eip+0x21)>
+          <mo [-0000020,-000001c] data:0x80484fd>
+          <mo [-000001c,-0000018] data:M32(esp)>
+          <mo [-0000018,-0000014] data:(esp+0x4)>
+          <mo [-0000014,-0000010] data:0x80485a0>
+          <mo [-0000010,-000000c] data:0x8048610>
+          <mo [-000000c,-0000008] data:edx>
+          <mo [-0000008,-0000004] data:(((esp+0x4)&0xfffffff0)-0x4)>
+          <mo [-0000004,00000000] data:eax>>
+ >>> print b.map(p.cpu.mem(p.cpu.esp,64))
+ { | [0:32]->(eip+0x21) | [32:64]->0x80484fd | }
+ >>> print b.map(p.cpu.mem(p.cpu.ebx,32))
+ M32$9(ebx)
+
+
+As shown above, reading memory in the mapper can return a compound expression.
+Note also that unmapped areas are returned as symbolic mem objects.
+Since aliasing between different MemoryZones is possible, the returned
+symbolic expression of fetching memory at pointer ``ebx`` is special:
+the ``M32$9(ebx)`` expression says "in input state, take 32 bits found at
+pointer ebx *after* applying 9 possibly aliasing memory writes to the state.
+More details in mapper_.
+
+
 -----
 
 Lets try a (little) more elaborated analysis that will not only allow to
@@ -238,8 +276,8 @@ the control flow graph of the program:
  >>> ff.policy['branch-lazy']=False
  >>> ff.getcfg()
  amoco.cas.expressions: INFO: stub __libc_start_main called
- amoco.main: INFO: fforward analysis failed at block 0x8048370
- <amoco.cfg.func object at 0xb72e330c>
+ amoco.main: INFO: fforward analysis stopped at block 0x8048370
+ <amoco.cfg.graph object at 0xb72e330c>
  >>> G=_
  >>> G.C
  [<grandalf.graphs.graph_core object at 0x8f6d78c>]
@@ -265,13 +303,14 @@ Let's have a look at the graph instance:
  # --- block 0x8048370 ---
  0x8048370  'ff250ca00408'     jmp         [@__libc_start_main]
  >>> print n.data.map
- eip <- { | [0:32]->M32((esp+0x4)) | }
+ eip <- { | [0:32]->M32(esp+4) | }
  esp <- { | [0:32]->(esp-0x4) | }
- ((esp-0x4)) <- @exit
+ (esp-4) <- @exit
 
 Ok, so the program counter is correctly pointing to the ``#main`` address located
 at offset +4 in the stack, but since the fast-forward method only look at one block,
 it cannot know that this location holds this address.
+
 A little more elaborated analysis like **link-forward** would have started analysing
 ``#main``:
 
@@ -280,17 +319,18 @@ A little more elaborated analysis like **link-forward** would have started analy
  >>> lf = amoco.lforward(p)
  >>> lf.getcfg()
  amoco.cas.expressions: INFO: stub __libc_start_main called
- amoco.main: INFO: lforward analysis failed at block 0x8048483
- <amoco.cfg.func object at 0x88552ec>
+ amoco.main: INFO: lforward analysis stopped at block 0x80484d4
+ <amoco.cfg.graph object at 0x88552ec>
  >>> G=_
  >>> print G.C
- [<grandalf.graphs.graph_core object at 0x8a0b7ec>, 
- <grandalf.graphs.graph_core object at 0x8a0c1cc>, 
+ [<grandalf.graphs.graph_core object at 0x8a0b7ec>,
+ <grandalf.graphs.graph_core object at 0x8a0c1cc>,
+ <grandalf.graphs.graph_core object at 0x8a0d2fc>,
  <grandalf.graphs.graph_core object at 0x8a3156c>]
  >>> for g in G.C:
  ...   print g.sV
  ...   print '------'
- ... 
+ ...
  0.| <node [0x8048380] at 0x885566c>
  1.| <node [0x8048370] at 0xb72c830c>
  2.| <node [0x80484fd] at 0x885532c>
@@ -298,6 +338,9 @@ A little more elaborated analysis like **link-forward** would have started analy
  0.| <node [0x8048434] at 0x8a0c16c>
  ------
  0.| <node [0x8048483] at 0x8a31dec>
+ 1.| <node [0x804845e] at 0x8a3316c>
+ ------
+ 0.| <node [0x80484d4] at 0x8a38a1c>
  ------
  >>> print G.get_node('0x8048434').data
  # --- block 0x8048434 ---
@@ -324,10 +367,59 @@ A little more elaborated analysis like **link-forward** would have started analy
  0x804848f  'c3'         ret
 
 
+The **fast-backward** is another analysis that tries to evaluate the expression of
+the program counter backwardly and thus reconstructs function frames in simple cases.
+
+.. sourcecode:: python
+
+ >>> amoco.Log.loggers['amoco.main'].setLevel(15)
+ >>> z = amoco.fbackward(p)
+ >>> z.getcfg()
+ amoco.main: VERBOSE: root node 0x8048380 added
+ amoco.main: VERBOSE: block #PLT@__libc_start_main starts a new cfg component
+ amoco.cas.expressions: INFO: stub __libc_start_main called
+ amoco.main: VERBOSE: function f:#PLT@__libc_start_main{2} created
+ amoco.main: VERBOSE: edge <node [f:#PLT@__libc_start_main] at 0x7f422393ccd0> ---> <node [0x80484fd] at 0x7f422389a050> added
+ amoco.main: VERBOSE: block 0x8048434 starts a new cfg component
+ amoco.main: VERBOSE: block 0x8048483 starts a new cfg component
+ amoco.main: VERBOSE: function fct_b:0x8048483{1} created
+ amoco.main: VERBOSE: edge <node [fct_b:0x8048483] at 0x7f42238bd1d0> ---> <node [0x804845e] at 0x7f4223c0bbd0> added
+ amoco.main: VERBOSE: block 0x80484d4 starts a new cfg component
+ amoco.main: VERBOSE: function fct_e:0x80484d4{1} created
+ amoco.main: VERBOSE: pc is memory aliased in fct_e:0x80484d4{1} (assume_no_aliasing)
+ amoco.main: VERBOSE: edge <node [fct_e:0x80484d4] at 0x7f4223847950> ---> <node [0x804846d] at 0x7f42238bdc50> added
+ amoco.main: VERBOSE: function fct_a:0x8048434{5} created
+ amoco.main: VERBOSE: pc is memory aliased in fct_a:0x8048434{5} (assume_no_aliasing)
+ amoco.main: VERBOSE: edge <node [fct_a:0x8048434] at 0x7f4223868150> ---> <node [0x8048561] at 0x7f4223868950> added
+ amoco.main: VERBOSE: function fct_b:0x8048483{1} called
+ amoco.main: VERBOSE: edge <node [fct_b:0x8048483] at 0x7f4223868c10> ---> <node [0x8048576] at 0x7f4223868f10> added
+ amoco.main: VERBOSE: block 0x8048490 starts a new cfg component
+ amoco.main: VERBOSE: block 0x80484ab starts a new cfg component
+ amoco.main: VERBOSE: block #PLT@malloc starts a new cfg component
+ amoco.cas.expressions: INFO: stub malloc called
+ amoco.main: VERBOSE: function f:#PLT@malloc{2} created
+ amoco.main: VERBOSE: edge <node [f:#PLT@malloc] at 0x7f422385dd90> ---> <node [0x80484c4] at 0x7f422385d9d0> added
+ amoco.main: VERBOSE: function fct_d:0x80484ab{3} created
+ amoco.main: VERBOSE: pc is memory aliased in fct_d:0x80484ab{3} (assume_no_aliasing)
+ amoco.main: VERBOSE: edge <node [fct_d:0x80484ab] at 0x7f422385d6d0> ---> <node [0x80484a1] at 0x7f422387ba90> added
+ amoco.main: VERBOSE: function fct_c:0x8048490{3} created
+ amoco.main: VERBOSE: edge <node [fct_c:0x8048490] at 0x7f422387b850> ---> <node [0x8048582] at 0x7f422387bf10> added
+ amoco.main: VERBOSE: edge <node [0x8048582] at 0x7f422387bf10> -?-> <node [0x8048598] at 0x7f422387bc50> added
+ amoco.main: VERBOSE: block #PLT@__stack_chk_fail starts a new cfg component
+ amoco.cas.expressions: INFO: stub __stack_chk_fail called
+ amoco.main: VERBOSE: function f:#PLT@__stack_chk_fail{2} created
+ amoco.main: VERBOSE: edge <node [f:#PLT@__stack_chk_fail] at 0x7f4223802350> ---> <node [0x804859d] at 0x7f4223802b10> added
+ amoco.main: VERBOSE: function f:0x8048380{12} created
+ amoco.main: VERBOSE: pc is memory aliased in f:0x8048380{12} (assume_no_aliasing)
+ amoco.main: INFO: fbackward analysis stopped at <node [0x804859d] at 0x7f4223802b10>
+ amoco.main: VERBOSE: edge <node [0x8048582] at 0x7f422387bf10> -?-> <node [0x804859d] at 0x7f4223802b10> added
+ <amoco.cfg.graph at 0x7f13466d18d0>
+ >>>
+
 .. **
 
-Overview
-========
+API Overview
+============
 
 Amoco is composed of 3 packages arch_, cas_ and system_, on top of which the
 classes implemented in ``code.py``, ``cfg.py`` and ``main.py`` provide high-level
@@ -337,6 +429,9 @@ disassembling/analysis techniques.
 We will now describe this architecture starting from low-level layers (arch_, cas_)
 up to system_ and finally to higher level classes.
 
+A *Sphinx* generated doc will be available soon.
+
+
 arch
 ----
 
@@ -434,11 +529,10 @@ For example (in ``arch/x86/asm.py``):
 .. sourcecode:: python
 
  def i_CMOVcc(i,fmap):
-   op1 = i.operands[0]
-   op2 = fmap(i.operands[1])
-   fmap[eip] = fmap[eip]+i.length
-   a = fmap(op1)
-   fmap[op1] = tst(fmap(i.cond[1]),op2,a)
+     fmap[eip] = fmap(eip)+i.length
+     op1 = i.operands[0]
+     op2 = i.operands[1]
+     fmap[op1] = fmap(tst(i.cond[1],op2,op1))
 
 The function takes as input the instruction instance *i* and a ``mapper``
 instance *fmap* (see cas_) and implements (an approximation of) the opcode semantics.
@@ -462,9 +556,9 @@ An example follows from ``arch/x86/formats.py``:
 .. sourcecode:: python
 
  def mnemo(i):
-    mnemo = i.mnemonic.replace('cc','')
-    if hasattr(i,'cond'): mnemo += i.cond[0].split('/')[0]
-    return '{: <12}'.format(mnemo.lower())
+     mnemo = i.mnemonic.replace('cc','')
+     if hasattr(i,'cond'): mnemo += i.cond[0].split('/')[0]
+     return '{: <12}'.format(mnemo.lower())
 
  def opsize(i):
      s = [op.size for op in i.operands if op._is_mem]
@@ -489,14 +583,14 @@ the destination register is X0/W0 :
 .. sourcecode:: python
 
  def alias_AND(i):
-    m = mnemo(i)
-    r = regs(i)
-    if i.setflags and i.d==0:
-        m = 'tst'
-        r.pop(0)
-    return m.ljust(12) + ', '.join(r)
-
- 
+     m = mnemo(i)
+     r = regs(i)
+     if i.setflags and i.d==0:
+         m = 'tst'
+         r.pop(0)
+     return m.ljust(12) + ', '.join(r)
+
+
 cas
 ---
 
@@ -507,12 +601,14 @@ in ``cas/expressions.py``:
 - Symbol ``sym``, a Constant equipped with a reference string (non-external symbol),
 - Register ``reg``, a fixed size CPU register **location**,
 - External ``ext``, a reference to an external location (external symbol),
+- Floats ``cfp``, constant (fixed size) floating-point values,
 - Composite ``comp``, a bitvector composed of several elements,
 - Pointer ``ptr``, a memory **location** in a segment, with possible displacement,
 - Memory ``mem``, a Pointer to represent a value of fixed size in memory,
 - Slice ``slc``, a bitvector slice of any element,
 - Test ``tst``, a conditional expression, (see Tests_ below.)
-- Operator ``op``, an operation on (1 or 2) elements. The list of supported operations is
+- Operator ``uop``, an unary operator expression,
+- Operator ``op``, a binary operator expression. The list of supported operations is
   not fixed althrough several predefined operators allow to build expressions directly from
   Python expressions: say, you don't need to write ``op('+',x,y)``, but can write ``x+y``.
   Supported operators are:
@@ -531,7 +627,10 @@ Common attributes and methods for all elements are:
 - ``sf``,    the True/False *sign-flag*.
 - ``length`` (size/8)
 - ``mask``   (1<<size)-1
-- extend methods (``signextend``, ``zeroextend``)
+- extend methods (``signextend(newsize)``, ``zeroextend(newsize)``)
+- ``_endian`` the (global class attribute) endianess for writing expression in memory can
+  be set to 1 (default little endian) or -1 (big endian) with setendian() method.
+- ``bytes(sta,sto)`` method to retreive the expression of extracted bytes from sta to sto indices.
 
 All manipulation of an expression object usually result in a new expression object except for
 ``simplify()`` which performs in-place elementary simplifications.
@@ -561,14 +660,12 @@ Some examples of ``cst`` and ``sym`` expressions follow:
  >>> c2 = _
  >>> print c2.sf, c2
  False 0xfd
+ >>> assert c2.bytes(1,2)==0
  >>> e = c2+c.signextend(16)+5
  >>> print e
  0xff
  >>> c3 = e[0:8]
  >>> print c3==cst(-1,8)
- 0x0
- >>> c3.sf=True
- >>> print c3==cst(-1,8)
  0x1
 
 Here, after declaring an 8-bit constant with value 253, we can see that by default the
@@ -592,7 +689,7 @@ Python boolean type:
  >>> print t
  0x1
  >>> if t==True: print 'OK'
- ... 
+ ...
  OK
  >>> t.size
  1
@@ -627,82 +724,484 @@ or left-values (locations). More details on *locations* in mapper_.
 
 .. sourcecode:: python
 
- >>> r1 = reg('%r1',32)
- >>> print r1
- %r1
- >>> e = 2+r1
+ >>> a = reg('%a',32)
+ >>> print a
+ %a
+ >>> e = 2+a
  >>> print e
- (0x2+%r1)
+ (%a+0x2)
  >>> x = e-2
  >>> print x
- (0x0+%r1)
+ (%a-0x0)
  >>> x.simplify()
  <amoco.cas.expressions.reg object at 0xb7250f6c>
  >>> print _
- %r1
+ %a
 
-As shown above, elementary simplification rules are applied such that ``(2+r1)-2``
-leads to an ``op`` expression with operator ``+``, left member 0 and right member ``r1``,
+As shown above, elementary simplification rules are applied such that ``(2+a)-2``
+leads to an ``op`` expression with operator ``-``, right member 0 and left member ``r1``,
 which eventually also simplifies further to the r1 register.
-Most real simplification rules should rely on SMT solvers like z3_ [TBC].
+Most real simplification rules should rely on SMT solvers like z3_ (see smt_).
 
 Externals
 ~~~~~~~~~
 
 Class ``ext`` inherit from registers as pure symbolic values
 but is used to represent external symbols that are equipped with a ``stub`` function.
-When "called", these objects invoke their stub function.
+When "called", these objects can invoke their stub function in two ways:
+
+- when the program counter is an ``ext`` expression,
+  the object invokes its __call__ method to modify the provided mapper by calling the
+  registered *stub* with the mapper and possibly other needed parameters.
+- when used to simulate actions of *interruptions* like for example
+  in the semantics of ``IN/OUT`` or ``INT`` instructions which invoke the object's ``call``
+  method to eventually return an expression.
+
 (More details on ``@stub`` decorated functions are provided in system_.)
 
 Pointers and Memory objects
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 A ``ptr`` object is a memory **location**. These objects are generally not found
-in expressions but only as mapper locations or addresses in ``mem`` objects.
-These objects have a ``base`` expression, and optional ``disp`` and ``seg`` fields
-to be used by MemoryZone_ objects.
+in expressions but only as mapper_ locations or addresses in ``mem`` objects.
+They have a ``base`` expression, a ``disp`` integer offset,
+and an optional ``seg`` attribute to be used by MemoryZone_ objects.
+
+As illustrated below, simplification of ``ptr`` objects tends to extract constant
+offsets found in the base expression to adjust the ``disp`` field.
+
+.. sourcecode:: python
+
+ >>> a = reg('a',32)
+ >>> p = ptr(a)
+ >>> q = ptr(a,disp=17)
+ >>> print p,q
+ (a) (a+17)
+ >>> assert p+17 == q
+ >>> assert p+2  == q-15
+ >>> assert (p+3).base == (q-5).base
+
+
+A ``mem`` object is a symbolic memory value equipped with a pointer, a size, and
+a special ``.mods`` attribute that will be discussed in mapper_.
+
+.. sourcecode:: python
+
+ >>> x = mem(p,64,disp=2)
+ >>> y = mem(q-5,48,disp=-10)
+ >>> print x,y
+ M64(a+2) M48(a+2)
+ >>> assert x.bytes(4,6) == y[32:48]
+
+
+Note: the segment attribute is currently not used by the core memory classes.
 
-A ``mem`` object is a symbolic memory value equipped with a pointer and a size.
-There is no direct relation between such expression and a MemoryZone_ state. It is
-up to analysis methods to eventually update states according to such expressions.
 
 Operators
 ~~~~~~~~~
 
+Unary operators (``+``, ``-`` and ``~``) have elementary simplification rules:
+
+.. sourcecode:: python
+
+ >>> a = reg('a',32)
+ >>> assert +a == -(-a)
+ >>> assert -a == 0-a
+
 Most operations in Amoco involve left and right members sub-expressions. The operation
 will then usually proceed only if both member have the same size. If one member is not
 an expression but a Python integer, it will be implicitly "casted" to a constant of size
 required by the other expression member. Thus, it is possible to write ``r1+2`` and not
 ``r1+cst(2,32)``.
 
+Binary operations have elementary simplification rules that try to arrange symbols
+in lexical order and move constants to the right side of the expression.
+
+.. sourcecode:: python
+
+ >>> a = reg('a',32)
+ >>> b = reg('b',32)
+ >>> print a+0, a*1, a^a, a*0, a&0, a|0
+ a a 0x0 0x0 0x0 a
+ >>> print (b-a)|0
+ ((-a)+b)
+ >>> assert b-a == (-a)+b
+ >>> assert -(a+b) == (-a)-b
+ >>> assert -(a-b) == b-a
+ >>> assert -(b-a) == (a-b)*1
+ >>> assert -(1-a) == a-1
+ >>> assert (-a+(b-1)) == b-a-1
+ >>> e = -((b-1)-a)
+ >>> assert e == 1+(a-b)
+ >>> print e
+ ((a-b)+0x1)
+ >>> extract_offset(e)
+ (<amoco.cas.expressions.op object at 0x7f864e8496b0>, 1)
+ >>> print _[0]
+ (a-b)
+
+Internal attributes and methods of ``op`` instances are:
+
+- ``.op``, the operator symbol (``.op.symbol``) and function (``.op.impl``),
+- ``.r``, the left member sub-expression,
+- ``.l``, the right member sub-expression of binary ops.
+- ``.prop``, an or-ed flag indicating the kind of operators involved:
+
+  + 1 means only arithmetic,
+  + 2 means only logic,
+  + 4 means only conditional,
+  + 8 means only shifts and rotations,
+
+- ``depth()`` returns the expression tree depth,
+- ``limit(value)`` is a class method used to set a threshold parameter involved
+  in simplifying the expression to ``top`` when the expression's complexity is too high.
+
+The ``symbols_of(e)`` function returns the list of registers expressions involved in ``e``.
+The ``locations_of(e)`` function returns the list of *locations* used in ``e``.
+The ``complexity(e)`` function computes an arbitrary complexity measure of expression ``e``
+which is linear in depth and number of symbols, and increases by a factor of ``prop``.
+
 Composer and Slicer
 ~~~~~~~~~~~~~~~~~~~
 
+A ``comp`` object is a composite expression corresponding to a bit-vector made of
+several expression parts.
+A ``slc`` object is the expression obtained by extracting a bit-vector slice out
+of an expression.
+
+The ``composer(parts)`` function, which takes as input the parts as a list of expressions in
+least-to-most significant order, is the preferred method for instanciating composite objects.
+Since ``comp`` is essentially a container class for other expressions, the resulting object
+is possibly of another class if some simplification occured.
+
+.. sourcecode:: python
+
+ >>> composer([cst(1,8),cst(2,8),cst(3,8)])
+ <amoco.cas.expressions.cst at 0x7f9468252c20>
+ >>> c=_
+ >>> assert c == 0x030201
+ >>> a = reg('a',32)
+ >>> b = reg('b',32)
+ >>> c = comp(24)
+ >>> c[0:8] = (a+b)[24:32]
+ >>> c[8:24] = b[0:16]
+ >>> print c
+ { | [0:8]->(a+b)[24:32] | [8:24]->b[0:16] | }
+ >>> c[8:16] = cst(0xff,8)
+ >>> print c
+ { | [0:8]->(a+b)[24:32] | [8:16]->0xff | [16:24]->b[8:16] | }
+ >>> c[0:8] = cst(0x01,8)
+ >>> print c
+ { | [0:8]->0x1 | [8:16]->0xff | [16:24]->b[8:16] | }
+ >>> print c.simplify()
+ { | [0:16]->0xff01 | [16:24]->b[8:16] | }
+
+As shown above, a composite instance supports dynamic asignment of any parts defined by a python
+slice object. Simplification of composite objects tends to merge contiguous constant parts.
+
+A ``slc`` expression is obtained by using a python slice object of the form [start:stop]
+where start/stop are non-negative integers in the bit range of the sliced expression.
+Simplification occurs when the sliced expression is itself of class ``slc`` or ``mem``:
+
+.. sourcecode:: python
+
+ >>> a = reg('%a',32)
+ >>> ah = slc(a,24,8,ref='%ah')
+ >>> assert ah.x == a
+ >>> print ah.pos
+ 24
+ >>> print ah
+ %ah
+ >>> ax = a[16:32]
+ >>> print ax
+ %a[16:32]
+ >>> print ax[0:8]
+ %a[16:24]
+ >>> print ax[8:16]
+ ah
+ >>> y = mem(a,64)
+ >>> print y[16:48]
+ M32(%a+2)
+
+Note that, as shown above, slices of registers can be instanciated with an optional
+reference string that is used for printing whenever the matching register slice is involved.
+
+Note also that parts and slices [start:stop] bounds are limited to python integers only
+(indices can't be symbolic!)
+
+
 Conditionals
 ~~~~~~~~~~~~
 
+The ``tst`` class is used for conditional expressions in the form ``tst(cond, eT, eF)``
+where ``cond`` is an expression, ``eT`` is the resulting expression whenever
+``cond==1`` and ``eF`` is the resulting expression whenever ``cond==0``.
+
+.. sourcecode:: python
+
+ >>> t = tst(a>0, c, cst(0xdeadbe,24))
+ >>> print t
+ ((%a>0x0) ? { | [0:16]->0xff01 | [16:24]->b[8:16] | } : 0xdeadbe)
+ >>> t.l[16:24] = cst(0xab,8)
+ >>> print t.simplify()
+ ((%a>0x0) ? 0xabff01 : 0xdeadbe)
+ >>> t.tst.l = cst(-1,32)
+ >>> print t
+ ((-0x1>0x0) ? 0xabff01 : 0xdeadbe)
+ >>> print t.simplify()
+ 0xdeadbe
+
+
 mapper
 ~~~~~~
 
 A ``mapper`` object captures the symbolic operations of a sequence of instructions by
-mapping left-value expressions to right-value expressions. It represents the transition
-function that allows to transform a memory state into another memory state that corresponds
+mapping input expressions to output *locations* which are registers or pointers.
+It represents the transition function from an input state to an output state corresponding
 to the execution of the captured instructions.
-As shown in the ``i_MOVcc`` example above, the ``fmap`` object
-is the mapper on which every instruction semantics operate (see asm_).
+As shown in the ``i_MOVcc`` example above, the ``fmap`` argument of every instruction semantics
+is a mapper on which the instruction currently operates (see asm_).
 
 .. sourcecode:: python
 
+ >>> from amoco.arch.x86.env import *
+ >>> from amoco.cas.mapper import mapper
  >>> m = mapper()
- >>> m[esp] = cst(0x10,32)
+ >>> m[eax] = cst(0xabff01,32)
+ >>> print m
+ eax <- { | [0:32]->0xabff01 | }
+ >>> print m(eax)
+ 0xabff01
+ >>> print m(ah)
+ 0xff
+ >>> m[eax[16:32]] = bx
+ >>> print m
+ eax <- { | [0:16]->0xff01 | [16:32]->bx | }
+ >>> print m(ax+cx)
+ (cx+0xff01)
+ >>> print m(eax[16:32]^ecx[16:32])
+ (bx^ecx[16:32])
+ >>> print m(mem(ecx+2,8))
+ M8(ecx+2)
+ >>> print m(mem(eax+2,8))
+ M8({ | [0:16]->0xff01 | [16:32]->bx | }+2)
+
+The mapper class defines two essential methods to set and get expressions in and out.
+
+- ``__setitem__`` is used for mapping any expression to a location which can be a register
+  (or a register slice), a pointer or a memory expression. When the location is a pointer,
+  the base expression refers to input state values, whereas a memory expression refers to
+  the output state (see example below).
+- ``__call__`` is used for evaluating any expression in the mapper, by replacing every
+  register and memory object of the expression by their mapped expressions.
+
+A *push* instruction could thus be implemented using:
+
+.. sourcecode:: python
+
+ >>> def push(fmap,x):
+ ...   fmap[esp] = fmap(esp)-x.length
+ ...   fmap[mem(esp,x.size)] = x      # put x at the current (updated) esp address
+ ...
+ >>> m.clear()
+ >>> push(m, cst(0x41414141,32))
+ >>> print m
+ esp <- { | [0:32]->(esp-0x4) | }
+ (esp-4) <- 0x41414141
+ >>> push(m, ebx)
  >>> print m
+ (esp-4) <- 0x41414141
+ esp <- { | [0:32]->(esp-0x8) | }
+ (esp-8) <- ebx
+
+Note that a ``__getitem__`` method is implemented as well in order to fetch items
+that are locations of the mapper. So here, to get the value at the top of stack, we
+can do:
+
+.. sourcecode:: python
+
+ >>> print m[mem(esp-8,32)]  # fetch the expression associated with ptr (esp-8)
+ ebx
+ >>> print m(mem(esp,32))    # evaluates mem(esp,32) => first evaluate ptr, then fetch.
+ ebx
+ >>> print m(mem(esp+4,32))
+ 0x41414141
+ >>> print m[mem(esp-4,32)]
+ 0x41414141
+
+The internal memory model of a mapper is a MemoryMap_: symbolic memory locations are related
+to individual separated MemoryZone_ objects that deal with all read/write to/from location's
+``ptr.base`` expression.
+
+.. sourcecode:: python
+
+ >>> print m.memory()
+ <MemoryZone rel=None :>
+ <MemoryZone rel=esp :
+          <mo [-0000008,-0000004] data:ebx>
+          <mo [-0000004,00000000] data:0x41414141>>
+
+This model allows to access offsets that have not been explicitly written to before.
+For example, if we now execute *mov ecx, [esp+2]* we still fetch the correct expression:
+
+.. sourcecode:: python
+
+ >>> m[ecx] = m(mem(esp+2,32))
+ >>> print m(ecx)
+ { | [0:16]->ebx[16:32] | [16:32]->0x4141 | }
+
+However, aliasing between zones is possible a must be avoided: imagine that we now
+execute *mov byte ptr [eax], 0x42*, we obtain:
+
+.. sourcecode:: python
+
+ >>> m[mem(eax,8)] = cst(0x42,8)
+ >>> print m
+ (esp-4) <- 0x41414141
+ esp <- { | [0:32]->(esp-0x8) | }
+ (esp-8) <- ebx
+ ecx <- { | [0:16]->ebx[16:32] | [16:32]->0x4141 | }
+ (eax) <- 0x42
+ >>> print m.memory()
+ <MemoryZone rel=None :>
+ <MemoryZone rel=eax :
+         <mo [00000000,00000001] data:0x42>>
+ <MemoryZone rel=esp :
+         <mo [-0000008,-0000004] data:ebx>
+         <mo [-0000004,00000000] data:0x41414141>>
+
+If we now again fetch memory at ``esp+2`` the previous answer is not valid anymore due
+to a possible aliasing (overlapping) of ``eax`` and ``esp`` zones. Think of what should
+the memory look like if ``eax`` value was ``esp-4`` for example. Let's try:
+
+.. sourcecode:: python
+
+ >>> print m(mem(esp+2,32))
+ M32$3(esp-6)
+ >>> mprev = mapper()
+ >>> mprev[eax] = esp-4
+ >>> print mprev( m(mem(esp+2,32)) )
+ { | [0:16]->ebx[16:32] | [16:32]->0x4142 | }
+
+Indeed, the mapper returns a special memory expression that embeds modifications
+(saved in ``.mods`` of the mem expression) that have been applied on its memory until now,
+and that must be executed in order to return a correct answer. As demonstrated above,
+these mods are taken into account whenever the expression is evaluated in another mapper.
+
+Note that it is possible to force the mapper class to *assume no aliasing* :
+
+.. sourcecode:: python
+
+ >>> print mapper.assume_no_aliasing
+ False
+ >>> mapper.assume_no_aliasing = True
+ >>> print m(mem(esp+2,32))
+ { | [0:16]->ebx[16:32] | [16:32]->0x4141 | }
+
+In Amoco, a mapper instance is created for every basic block. The right
+and left shift operators allow for right of left composition so that symbolic
+forward or backward execution of several basic blocks is easy:
+
+.. sourcecode:: python
+
+ >>> m1 = mapper()
+ >>> m1[eax] = ebx
+ >>> push(m1,eax)
+ >>> m2 = mapper()
+ >>> m2[ebx] = cst(0x33,32)
+ >>> push(m2,ebx)
+ >>> m2[eax] = m2(mem(esp,32))
+ >>> print m1
+ eax <- { | [0:32]->ebx | }
+ esp <- { | [0:32]->(esp-0x4) | }
+ (esp-4) <- eax
+ >>> print m2
+ ebx <- { | [0:32]->0x33 | }
+ esp <- { | [0:32]->(esp-0x4) | }
+ (esp-4) <- ebx
+ eax <- { | [0:32]->ebx | }
+ >>> print m1>>m2 # forward execute m1 -> m2
+ (esp-4) <- eax
+ ebx <- { | [0:32]->0x33 | }
+ esp <- { | [0:32]->(esp-0x8) | }
+ (esp-8) <- ebx
+ eax <- { | [0:32]->ebx | }
+ >>> print m2<<m1 # backward execute the same blocks/mappers
+ (esp-4) <- eax
+ ebx <- { | [0:32]->0x33 | }
+ esp <- { | [0:32]->(esp-0x8) | }
+ (esp-8) <- ebx
+ eax <- { | [0:32]->ebx | }
+
+TODO: mapper unions.
+
+smt
+~~~
+
+Amoco uses z3_ for constraint solving by translating its equation expressions
+into z3_ equivalent objects. The interface with z3_ is implemented in ``cas/smt.py``.
+
+- ``cst`` expressions are translated as ``BitVecVal`` objects
+- ``cfp`` expressions are translated as ``RealVal`` objects
+- ``reg`` expressions are translated as ``BitVec`` objects
+- ``comp`` expressions use the z3_ ``Concat`` function
+- ``slc`` expressions use the z3_ ``Extract`` function
+- ``mem`` expressions are converted as Concat of ``Array`` of ``BitVecSort(8)``,
+  with current endianess taken into account.
+- ``tst`` expressions use the z3_ ``If`` function
+- operators are translated by propagating translations to left & right sides.
+
+When the ``smt`` module is imported it replaces the ``.to_smtlib()`` method of
+every expression class (which by default raises UnImplementedError).
+
+.. sourcecode:: python
+
+ >>> from amoco.arch.x86.env import *
+ >>> from amoco.cas import smt
+ >>> z = (eax^cst(0xcafebabe,32))+(ebx+(eax>>2))
+ >>> print z
+ ((eax^0xcafebabe)+(ebx+(eax>>0x2)))
+ >>> print z.to_smtlib()
+ (eax ^ 3405691582) + ebx + LShR(eax, 2)
+ >>> print z.to_smtlib().sexpr()
+ (bvadd (bvxor eax #xcafebabe) ebx (bvlshr eax #x00000002))
+ >>> r = smt.solver([z==cst(0x0,32),al==0xa,ah==0x84]).get_model()
+ >>> print r
+ [eax = 33802, ebx = 889299018]
+ >>> x,y = [r[v].as_long() for v in r]
+ >>> ((x^0xcafebabe)+(y+(x>>2)))&0xffffffffL
+ 0L
+ >>> p = mem(esp,32)
+ >>> q = mem(esp+2,32)
+ >>> ql = q[0:16]
+ >>> ph = p[16:32]
+ >>> z = (p^cst(0xcafebabe,32))+(q+(p>>2))
+ >>> m = smt.solver().get_mapper([z==cst(0,32),esp==0x0804abcd])
+ >>> print m
+ (esp+2) <- 0x7ffc9151
+ (esp) <- 0x9151babe
+ esp <- { | [0:32] -> 0x0804abcd | }
+
+
+In the ``smt`` module, the ``solver`` class is typically used to verify that some
+properties hold and find a set of input (concrete) values to be set for example in
+an emulator or debugger to reach a chosen branch. A solver instance can be created with
+a python list of expressions, or expressions can be added afterward.
+
+The ``.get_model()`` method will check added contraint equations and return a
+z3_ ``ModelRef`` object if the z3_ solver has returned ``z3.sat`` or None otherwise.
+A list of equations to be taken into account can be provided as well with ``.add()``.
+
+The ``.get_mapper()`` method calls ``get_model`` and returns a mapper object with
+locations set to their ``cst`` values. A list of equations can be provided here too.
 
 main.py
 -------
 
 This module contains *high-level* analysis techniques implemented as classes that
 take a program abstraction provided by the system_ package.
-Currently, only 3 simple techniques are released:
+
+The first 3 basic techniques are:
 
 - *linear-sweep* (``lsweep`` class) disassembles instructions without taking
   into account any branching instruction.
@@ -724,25 +1223,170 @@ Currently, only 3 simple techniques are released:
   follow branch policy to avoid linear sweep and evaluates the program counter
   by taking into account the parent block semantics.
 
+Other more elaborated techniques are:
+
+- *fast backward* (``fbackward``) inherits from ``lforward`` but evaluates the
+  program counter backardly by taking *first-parent* block until either the
+  expression is a constant target or the root node of the graph component (entry of function)
+  is reached. The analysis proceeds then by evaluating the pc expression in every
+  caller blocks, assuming that no frame-aliasing occured (pointer arguments did not
+  mess up with the caller's stack.) A ``func`` instance is created but its mapper
+  contains by default only the computed pc expression.
+
+- *link-backward* (``lbackward``) inherits from ``fbackward`` but walks back *all*
+  parent-paths up to the entry node, composing and assembling all mappers to end up
+  with an approximated mapper of the entire function.
 
 code.py
 -------
 
+The ``code`` module defines two main classes:
+
+- a ``block`` contains a list of instructions and computes the associated mapper object.
+  The arch-dependent CoreExec classes (see system_ below) can add ``tag`` indicators like
+  ``FUNC_START`` (if block looks like a function entry), ``FUNC_CALL`` if block makes a call, etc.
+- a ``func`` contains the cfg graph component of a function once it has been fully
+  recovered by an analysis class. It inherits from ``block`` and contains a mapper that
+  captures an approximation of the entire function.
+
+blocks are created by the ``lsweep.iterblocks()`` iterator (or by  ``.get_block()``) which
+is inherited by all ``main`` analysis classes discussed above. Functions are created by
+``fbackward`` and ``lbackward`` classes only.
+
+The ``xfunc`` class is used when an external expression is called. It contains a mapper
+build by a ``stub`` function. Instances are present in graph nodes but have a zero length
+and no address and thus do not exist in memory.
+
 cfg.py
 ------
 
+Classes ``node``, ``link`` and ``graph`` use *grandalf* Vertex/Edge/Graph with additional
+formatters or way to compare instances by name. A node's data is a block instance, and an
+edge's data is possibly a set of conditional expressions. A graph connected component is
+a function's control-flow graph  (a *graph_core* object).
+The ``graph.add_vertex`` extends Graph.add_vertex to detect that the node to be added *cuts*
+an existing node and adjust the graph structure accordingly.
+The ``graph.spool()`` method provides a list of the current leaves in the graph.
+The ``graph.get_node(name)`` method allows to get a node object by its name.
+
 system
 ------
 
+The system_ package is the main interface with the binary program. It contains executable
+format parsers, the memory model, the execution engine, and some operating system
+models responsible for mapping the binary in the memory model, setting up the environment
+and taking care of system calls.
+
+The ``loader.py`` module is the frontend that will try to parse the input file and import the
+targeted system_ and arch_ modules. If the executable format is unkown or if the input is a
+bytecode python string, the binary is mapped at address 0 in a ``RawExec`` instance.
+
+The ``elf.py`` module implements the ``Elf32`` and ``Elf64`` classes. The ``pe.py`` module
+implements the ``PE`` class which handles both PE32 and PE32+ (64-bits).
+
+The ``core.py`` module implements the memory model classes and the CoreExec_ generic
+execution engine inherited by various system's classes like ``linux_x86.ELF``,
+``linux_arm.ELF`` or ``win32.PE`` and ``win64.PE``.
+
 MemoryZone
 ~~~~~~~~~~
 
+The memory model in Amoco is implemented by the MemoryMap class in ``system/core.py``. Instance
+of MemoryMap are created by the system's CoreExec classes and by every block's mapper_ objects.
+This model associates memory locations with raw bytes or symbolic expressions in separated *zones*
+implemented by the MemoryZone_ class.
+Each zone is associated with a symbolic location reference, the default ``None`` reference zone
+being used for concrete (cst) locations.
+In a MemoryZone_, an *address* is an integer offset to the reference location expression, and
+the associated *value* is a ``mo`` memory object that stores bytes or an expression wrapped in
+a ``datadiv`` object.
+
 CoreExec
 ~~~~~~~~
 
+The execution engine core class is the users's frontend to the binary. It is responsible for
+creating a MemoryMap with the binary image, reading data in memory, or reading instructions
+at some address by calling ``cpu.disassemble()``.
+
 stubs
 ~~~~~
 
+System calls and externals are emulated by implementing ``stubs`` that modify a mapper instance. A *stub*
+is a Python function decorated with ``@stub``. For example, for example in
+the *Linux* system (see ``linux_x86.py``), the *__libc_start_main* is approximated by:
+
+.. sourcecode:: python
+
+ @stub
+ def __libc_start_main(m,**kargs):
+     m[cpu.eip] = m(cpu.mem(cpu.esp+4,32))
+     cpu.push(m,cpu.ext('exit',size=32))
+
+The default stub performs only a ``ret``-like instruction.
+
+Licence
+=======
+
+Please see `LICENSE`_.
+
+
+Changelog
+=========
+
+- `v2.4.0`_
+
+  * merge Z3 solver interface, see smt.py and smtlib() exp method
+  * merge fbackward analysis and code func class.
+  * improve expressions: separate unary and binary ops, "normalize" expressions
+  * improve mapper with memory() method and aliasing-resistant composition operators
+  * improve MemoryZone class: return top expression parts instead of raising MemoryError.
+  * adding RawExec class for shellcode-like input
+  * support string input in ELF/PE classes.
+  * fix various x86/x64 bugs
+  * protect against resizing of env registers
+  * add win64 loader
+  * adjust log levels and optional file from conf
+  * update README
+
+- `v2.3.5`_
+
+  * add x64 arch + full x86/64 SSE decoder
+  * hotfix x86/x64 inversion of {88}/{8a} mov instructions
+  * fix various x86 decoders and semantics
+  * code cosmetics
+
+- `v2.3.4`_
+
+  * merge armv7/thumb fixed semantics
+  * add x86 fpu decoders
+  * add locate function in MemoryMap
+  * Fix core read_instruction on map boundary
+  * Fix PE import parsing and TLS Table builder
+  * faster generic decoder
+  * hotfix various x86 decoders
+  * add some x86 SSE decoders
+
+- `v2.3.3`_
+
+  * support for MSP430 and PIC18 microcontrollers
+  * fix sparc rett, udiv/sdiv and formats
+  * fix x86 jcxz instruction decoding
+
+- `v2.3.2`_
+
+  * merge z80/GB architecture, fix sparc reported issues
+  * add example of SSE2 decoding (fixed)
+
+- `v2.3.1`_
+
+  * add licence file
+  * fix sparc architecture
+  * avoid ptr expression when address is not deref
+  * fix eqn_helpers simplifier rules
+  * README updated
+  * new PE class (tested on CoST.exe) + support for multiple entrypoints.
+
+
 .. _grandalf: https://github.com/bdcht/grandalf
 .. _crysp: https://github.com/bdcht/crysp
 .. _minisat: http://minisat.se/
@@ -751,3 +1395,10 @@ stubs
 .. _armv8: http://www.cs.utexas.edu/~peterson/arm/DDI0487A_a_armv8_arm_errata.pdf
 .. _pyparsing: http://pyparsing.wikispaces.com/
 .. _ply: http://www.dabeaz.com/ply/
+.. _LICENSE: https://github.com/bdcht/amoco/blob/release/LICENSE
+.. _v2.4.0: https://github.com/bdcht/amoco/releases/tag/v2.4.0
+.. _v2.3.5: https://github.com/bdcht/amoco/releases/tag/v2.3.5
+.. _v2.3.4: https://github.com/bdcht/amoco/releases/tag/v2.3.4
+.. _v2.3.3: https://github.com/bdcht/amoco/releases/tag/v2.3.3
+.. _v2.3.2: https://github.com/bdcht/amoco/releases/tag/v2.3.2
+.. _v2.3.1: https://github.com/bdcht/amoco/releases/tag/v2.3.1
diff --git a/amoco/__init__.py b/amoco/__init__.py
index 204623d..63caa76 100644
--- a/amoco/__init__.py
+++ b/amoco/__init__.py
@@ -1,2 +1,4 @@
+# -*- coding: utf-8 -*-
+
 from .config import conf
 from .main import *
diff --git a/amoco/arch/arm/cpu_armv7.py b/amoco/arch/arm/cpu_armv7.py
index 8854c76..a94001a 100644
--- a/amoco/arch/arm/cpu_armv7.py
+++ b/amoco/arch/arm/cpu_armv7.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.arch.arm.v7.asm import *
diff --git a/amoco/arch/arm/cpu_armv8.py b/amoco/arch/arm/cpu_armv8.py
index a8b0d71..6e4d361 100644
--- a/amoco/arch/arm/cpu_armv8.py
+++ b/amoco/arch/arm/cpu_armv8.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.arch.arm.v8.asm64 import *
diff --git a/amoco/arch/arm/v7/asm.py b/amoco/arch/arm/v7/asm.py
index 321fe6a..b51ce21 100644
--- a/amoco/arch/arm/v7/asm.py
+++ b/amoco/arch/arm/v7/asm.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
@@ -32,7 +34,7 @@ def __check_state(i,fmap):
         if address._is_cst:
             raise InstructionError(i)
         else:
-            logger.warning('impossible to check isetstate (ARM/Thumb) until pc is cst')
+            logger.verbose('impossible to check isetstate (ARM/Thumb) until pc is cst')
 
 def __pre(i,fmap):
     fmap[pc] = fmap(pc+i.length)
@@ -49,6 +51,7 @@ def __pre(i,fmap):
     return cond,dest,op1
 
 def __setflags(fmap,cond,cout,result,overflow=None):
+    if cout is None: cout = fmap(C)
     fmap[C] = tst(cond,cout,fmap(C))
     fmap[Z] = tst(cond,(result==0),fmap(Z))
     fmap[N] = tst(cond,(result<0),fmap(N))
@@ -164,13 +167,13 @@ def i_BIC(i,fmap):
         __setflags(fmap,cond,cout,result)
 
 def i_CMN(i,fmap):
-    cond,dest,op1,op2 = __pre(i,fmap)
-    result,cout,overflow = AddWithCarry(fmap(op1),fmap(op2))
+    cond,dest,op1 = __pre(i,fmap)
+    result,cout,overflow = AddWithCarry(fmap(dest),fmap(op1))
     __setflags(fmap,cond,cout,result,overflow)
 
 def i_CMP(i,fmap):
-    cond,dest,op1,op2 = __pre(i,fmap)
-    result,cout,overflow = SubWithBorrow(fmap(op1),fmap(op2))
+    cond,dest,op1 = __pre(i,fmap)
+    result,cout,overflow = SubWithBorrow(fmap(dest),fmap(op1))
     __setflags(fmap,cond,cout,result,overflow)
 
 def i_EOR(i,fmap):
@@ -270,15 +273,15 @@ def i_SUB(i,fmap):
         __setflags(fmap,cond,cout,result,overflow)
 
 def i_TEQ(i,fmap):
-    cond,dest,op1,op2 = __pre(i,fmap)
-    result = fmap(op1 ^ op2)
-    cout = fmap(op2.bit(31))
+    cond,dest,op1 = __pre(i,fmap)
+    result = fmap(dest ^ op1)
+    cout = fmap(op1.bit(31))
     __setflags(fmap,cond,cout,result)
 
 def i_TST(i,fmap):
-    cond,dest,op1,op2 = __pre(i,fmap)
-    result = fmap(op1 & op2)
-    cout = fmap(op2.bit(31))
+    cond,dest,op1 = __pre(i,fmap)
+    result = fmap(dest & op1)
+    cout = fmap(op1.bit(31))
     __setflags(fmap,cond,cout,result)
 
 # shifts (4.4.2)
@@ -864,6 +867,7 @@ def i_PLI(i,fmap):
 def i_SETEND(i,fmap):
     fmap[pc] = fmap(pc+i.length)
     internals['endianstate'] = 1 if i.set_bigend else 0
+    exp.setendian(-1 if i.set_bigend else +1)
 
 # event hint
 def i_SEV(i,fmap):
diff --git a/amoco/arch/arm/v7/env.py b/amoco/arch/arm/v7/env.py
index f1ed206..028cbe6 100644
--- a/amoco/arch/arm/v7/env.py
+++ b/amoco/arch/arm/v7/env.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
@@ -11,22 +13,22 @@
 #registers (application level, see B1.3.2) :
 #-------------------------------------------
 
-r0     = reg('r0',32)     # 
-r1     = reg('r1',32)     # 
-r2     = reg('r2',32)     # 
-r3     = reg('r3',32)     # 
-r4     = reg('r4',32)     # 
-r5     = reg('r5',32)     # 
-r6     = reg('r6',32)     # 
-r7     = reg('r7',32)     # 
-r8     = reg('r8',32)     # 
-r9     = reg('r9',32)     # 
-r10    = reg('r10',32)    # 
-r11    = reg('r11',32)    # 
-r12    = reg('r12',32)    # 
-r13    = reg('r13',32)    # 
-r14    = reg('r14',32)    # 
-r15    = reg('r15',32)    # 
+r0     = reg('r0',32)     #
+r1     = reg('r1',32)     #
+r2     = reg('r2',32)     #
+r3     = reg('r3',32)     #
+r4     = reg('r4',32)     #
+r5     = reg('r5',32)     #
+r6     = reg('r6',32)     #
+r7     = reg('r7',32)     #
+r8     = reg('r8',32)     #
+r9     = reg('r9',32)     #
+r10    = reg('r10',32)    #
+r11    = reg('r11',32)    #
+r12    = reg('r12',32)    #
+r13    = reg('r13',32)    #
+r14    = reg('r14',32)    #
+r15    = reg('r15',32)    #
 
 regs = [r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11,r12,r13,r14,r15]
 
diff --git a/amoco/arch/arm/v7/formats.py b/amoco/arch/arm/v7/formats.py
index 14cfaba..39ce1ba 100644
--- a/amoco/arch/arm/v7/formats.py
+++ b/amoco/arch/arm/v7/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .env import *
 from .utils import *
 from amoco.arch.core import Formatter
diff --git a/amoco/arch/arm/v7/spec_armv7.py b/amoco/arch/arm/v7/spec_armv7.py
index 81663e2..b65175b 100644
--- a/amoco/arch/arm/v7/spec_armv7.py
+++ b/amoco/arch/arm/v7/spec_armv7.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/arm/v7/spec_thumb.py b/amoco/arch/arm/v7/spec_thumb.py
index a942f48..5621be3 100644
--- a/amoco/arch/arm/v7/spec_thumb.py
+++ b/amoco/arch/arm/v7/spec_thumb.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
@@ -117,7 +117,7 @@ def A_default(obj,Rm):
   obj.type = type_data_processing
   obj.cond = env.CONDITION_AL
 
-@ispec("16[ 1010 0 Rd(3) imm8(8) ]", mnemonic="ADR")
+@ispec("16[ 1010 0 Rd(3) imm8(8) ]", mnemonic="ADR", add=True)
 def A_adr(obj,Rd,imm8):
   obj.d = env.regs[Rd]
   obj.imm32 = env.cst(imm8<<2,32)
diff --git a/amoco/arch/arm/v7/spec_thumb2.py b/amoco/arch/arm/v7/spec_thumb2.py
index ea6ff35..d9def98 100644
--- a/amoco/arch/arm/v7/spec_thumb2.py
+++ b/amoco/arch/arm/v7/spec_thumb2.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
@@ -147,8 +149,8 @@ def A_bits(obj,Rn,imm3,Rd,imm2,msb):
 
 @ispec("32[ 11 J1 0 J2 #imm10L(10) 0 11110 S #imm10H(10) ]", mnemonic="BLX")
 def A_label(obj,S,imm10H,J1,J2,imm10L):
-  I1, I2 = str(~(J1^S)), str(~(J2^S))
-  v = int(S+I1+I2+imm10H+imm10L+'00',2)
+  I1, I2 = str(~(J1^S)&0x1), str(~(J2^S)&0x1)
+  v = int(str(S)+I1+I2+imm10H+imm10L+'00',2)
   obj.imm32 = env.cst(v,25).signextend(32)
   obj.operands = [obj.imm32]
   obj.type = type_control_flow
@@ -564,7 +566,7 @@ def A_reglist(obj,Rt):
   obj.cond = env.CONDITION_AL
 
 @ispec("32[ 0 #M 0 #register_list(13) 11101 00 100 1 0 1101 ]", mnemonic="PUSH")
-def A_reglist(obj,P,M,register_list):
+def A_reglist(obj,M,register_list):
   obj.registers = [env.regs[i] for i,r in enumerate(register_list[::-1]+'0'+M+'0') if r=='1']
   if len(obj.registers)<2: raise InstructionError(obj)
   obj.operands = [obj.registers]
diff --git a/amoco/arch/arm/v7/utils.py b/amoco/arch/arm/v7/utils.py
index cfd7f35..aeeb5c6 100644
--- a/amoco/arch/arm/v7/utils.py
+++ b/amoco/arch/arm/v7/utils.py
@@ -1,12 +1,14 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.cas.expressions import *
 
 def LSL_C(x,shift):
-    assert shift>0
-    carry_out = x.bit(-shift)
+    assert shift>=0
+    carry_out = x.bit(-shift) if shift>0 else None
     return (x<<shift,carry_out)
 
 def LSL(x,shift):
@@ -14,7 +16,8 @@ def LSL(x,shift):
     return x<<shift
 
 def LSR_C(x,shift):
-    assert shift>0
+    assert shift>=0
+    if shift==0: return (x,None)
     carry_out = x.bit(shift-1) if shift<x.size else 0
     return (x>>shift,carry_out)
 
@@ -23,16 +26,17 @@ def LSR(x,shift):
     return x>>shift
 
 def ASR_C(x,shift):
-    assert shift>0
+    assert shift>=0
     n = x.size
     xx = x.signextend(n+shift)
-    return (xx[shift:shift+n-1],xx.bit(shift-1))
+    carry_out = xx.bit(shift-1) if shift>0 else None
+    return (xx[shift:shift+n],carry_out)
 
 def ASR(x,shift):
     assert shift>=0
     n = x.size
     xx = x.signextend(n+shift)
-    return xx[shift:shift+n-1]
+    return xx[shift:shift+n]
 
 def ROR_C(x,shift):
     assert shift != 0
diff --git a/amoco/arch/arm/v8/asm64.py b/amoco/arch/arm/v8/asm64.py
index 3983e87..9bb83a6 100644
--- a/amoco/arch/arm/v8/asm64.py
+++ b/amoco/arch/arm/v8/asm64.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.logger import Log
diff --git a/amoco/arch/arm/v8/env64.py b/amoco/arch/arm/v8/env64.py
index 22b37d7..07b2998 100644
--- a/amoco/arch/arm/v8/env64.py
+++ b/amoco/arch/arm/v8/env64.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
diff --git a/amoco/arch/arm/v8/formats.py b/amoco/arch/arm/v8/formats.py
index 7d93163..d7ed75e 100644
--- a/amoco/arch/arm/v8/formats.py
+++ b/amoco/arch/arm/v8/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .env64 import *
 from .utils import *
 from amoco.arch.core import Formatter
diff --git a/amoco/arch/arm/v8/spec_armv8.py b/amoco/arch/arm/v8/spec_armv8.py
index f5905e7..1065d7f 100644
--- a/amoco/arch/arm/v8/spec_armv8.py
+++ b/amoco/arch/arm/v8/spec_armv8.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
@@ -41,7 +43,7 @@ def ExtendReg(r,etype,shift=0):
     return r[0:l].extend(signed,N)<<shift
 
 def System_Reg(*args):
-    # TODO: decode args into system register name (see §D.8). 
+    # TODO: decode args into system register name (see §D.8).
     return reg('sysreg{%s}'%(' '.join(['{:b}'.format(x) for x in args])),64)
 
 def sp2z(x):
diff --git a/amoco/arch/arm/v8/utils.py b/amoco/arch/arm/v8/utils.py
index e356f76..9775714 100644
--- a/amoco/arch/arm/v8/utils.py
+++ b/amoco/arch/arm/v8/utils.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.cas.expressions import *
diff --git a/amoco/arch/core.py b/amoco/arch/core.py
index 3227d19..bf8edb2 100644
--- a/amoco/arch/core.py
+++ b/amoco/arch/core.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 # This code is part of Amoco
-# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from crysp.bits import *
@@ -32,7 +32,6 @@
 }
 
 class icore(object):
-    __slots__ = ['bytes', 'type', 'mnemonic', 'operands']
 
     def __init__(self,istr=''):
         self.bytes    = istr
@@ -182,13 +181,13 @@ def __call__(self,bytestring,**kargs):
 # ispec (parametrable) decorator
 # -----------------------------------------
 # @ispec allows to easily define instruction decoders based on architectures specifications.
-# The 'spec' argument is a human-friendly string that describes how the ispec object will 
+# The 'spec' argument is a human-friendly string that describes how the ispec object will
 # (on request) decode a given bytestring and how it will expose various decoded entities to
-# the decorated function in order to define an instruction instance. 
+# the decorated function in order to define an instruction instance.
 # It uses the following syntax :
-# 
+#
 #   'LEN<[ FORMAT ]' : LEN indicates the bit length corresponding to the FORMAT. Here,
-#                      FORMAT is interpreted as a list of directives ordered 
+#                      FORMAT is interpreted as a list of directives ordered
 #                      from MSB (bit index LEN-1) to LSB (bit index 0). This is the default
 #                      direction if the '<' indicator is missing. LEN%8!=0 is unsupported.
 # or
@@ -199,7 +198,7 @@ def __call__(self,bytestring,**kargs):
 # possibly terminated with an optional '+' char to indicate that the spec is a prefix.
 # In this case, the bytestring prefix matching the ispec format is stacked temporarily
 # until the rest of the bytestring matches a non prefix ispec.
-# 
+#
 # The directives composing the FORMAT string are used to associate symbols to bits
 # located at dedicated offsets within the bitstring to be decoded. A directive has the
 # following syntax:
@@ -223,7 +222,7 @@ def __call__(self,bytestring,**kargs):
 #    location: is an optional string matching the following expressions
 #      '( len )'    : indicates that the value is decoded from the next len bits starting
 #                     from the current position of the directive within the FORMAT string.
-#      '(*)'        : indicates a 'variable length directive' for which the value is decoded 
+#      '(*)'        : indicates a 'variable length directive' for which the value is decoded
 #                     from the current position with all remaining bits in the FORMAT.
 #                     If the FORMAT LEN is also variable then all remaining bits from the
 #                     instruction buffer input string are used.
@@ -234,7 +233,7 @@ def __call__(self,bytestring,**kargs):
 #
 # Example:
 #
-# @ispec(32[ .cond(4) 101 1 imm24(24) ]", mnemonic="BL", _flag=True) 
+# @ispec(32[ .cond(4) 101 1 imm24(24) ]", mnemonic="BL", _flag=True)
 # def f(obj,imm24,_flag):
 #     [...]
 #
@@ -246,9 +245,9 @@ def __call__(self,bytestring,**kargs):
 #  => will decode 4 bits at position [28,29,30,31] and provide this value as an integer
 #     in 'obj.cond' instruction instance attribute.
 #  => will decode 24 bits at positions 23..0 and provide this value as an integer as
-#     argument 'imm24' of the decorated function f. 
+#     argument 'imm24' of the decorated function f.
 #  => will set obj.mnemonic to 'BL' and pass argument _flag=True to f.
-#  => will call f(obj,...) 
+#  => will call f(obj,...)
 #  => will return obj
 
 # additional arguments to ispec decorator **must** be provided with symbol=value form and
@@ -257,6 +256,7 @@ def __call__(self,bytestring,**kargs):
 # with value 'BL' when the function is called.
 # -----------------------------------------
 class ispec(object):
+    __slots__ = ['format','iattr','fargs','ast','fix','mask','pfx','size','hook']
 
     def __init__(self,format,**kargs):
         self.format = format
diff --git a/amoco/arch/gas.py b/amoco/arch/gas.py
index f24d727..2dc5486 100644
--- a/amoco/arch/gas.py
+++ b/amoco/arch/gas.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 try:
diff --git a/amoco/arch/msp430/asm.py b/amoco/arch/msp430/asm.py
index 4fd36a8..1e02b4e 100644
--- a/amoco/arch/msp430/asm.py
+++ b/amoco/arch/msp430/asm.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
@@ -13,7 +13,7 @@ def autoinc(i,fmap):
     sz = 2 if i.BW else 1
     if rr is not None: fmap[rr] = fmap(rr+sz)
 
-# Ref: MSP430x1xx Family Users's Guide (Rev. F) 
+# Ref: MSP430x1xx Family Users's Guide (Rev. F)
 #------------------------------------------------------------------------------
 
 def i_MOV(i,fmap):
diff --git a/amoco/arch/msp430/cpu.py b/amoco/arch/msp430/cpu.py
index 7df0cd1..e5af80a 100644
--- a/amoco/arch/msp430/cpu.py
+++ b/amoco/arch/msp430/cpu.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.msp430.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/msp430/env.py b/amoco/arch/msp430/env.py
index 659342e..21d628b 100644
--- a/amoco/arch/msp430/env.py
+++ b/amoco/arch/msp430/env.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
diff --git a/amoco/arch/msp430/formats.py b/amoco/arch/msp430/formats.py
index 8bda531..6ebc64a 100644
--- a/amoco/arch/msp430/formats.py
+++ b/amoco/arch/msp430/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .env import *
 from amoco.arch.core import Formatter
 
diff --git a/amoco/arch/msp430/parsers.py b/amoco/arch/msp430/parsers.py
index f4fc054..8eeea30 100644
--- a/amoco/arch/msp430/parsers.py
+++ b/amoco/arch/msp430/parsers.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import pyparsing as pp
diff --git a/amoco/arch/msp430/spec_msp430.py b/amoco/arch/msp430/spec_msp430.py
index ff1ffc4..7732764 100644
--- a/amoco/arch/msp430/spec_msp430.py
+++ b/amoco/arch/msp430/spec_msp430.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # ref: MSP430x1xx User's Guide.
@@ -15,7 +15,7 @@
 # instruction MSP430 decoders
 #-------------------------------------------------------
 
-# get operand type/value based on addressing mode: 
+# get operand type/value based on addressing mode:
 def getopd(obj,mode,reg,data,CGR=False):
     r = env.R[reg]
     size = 8 if obj.BW else 16
diff --git a/amoco/arch/pic/F46K22/asm.py b/amoco/arch/pic/F46K22/asm.py
index f253a77..1f70a65 100644
--- a/amoco/arch/pic/F46K22/asm.py
+++ b/amoco/arch/pic/F46K22/asm.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
diff --git a/amoco/arch/pic/F46K22/env.py b/amoco/arch/pic/F46K22/env.py
index e6b29ee..a4a1717 100644
--- a/amoco/arch/pic/F46K22/env.py
+++ b/amoco/arch/pic/F46K22/env.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
diff --git a/amoco/arch/pic/F46K22/formats.py b/amoco/arch/pic/F46K22/formats.py
index 0076d7c..34b399a 100644
--- a/amoco/arch/pic/F46K22/formats.py
+++ b/amoco/arch/pic/F46K22/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .env import *
 from amoco.arch.core import Formatter
 
diff --git a/amoco/arch/pic/F46K22/spec_pic18.py b/amoco/arch/pic/F46K22/spec_pic18.py
index 744b8c1..b41f825 100644
--- a/amoco/arch/pic/F46K22/spec_pic18.py
+++ b/amoco/arch/pic/F46K22/spec_pic18.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.arch.pic.F46K22 import env
diff --git a/amoco/arch/pic/cpu_pic18f46k22.py b/amoco/arch/pic/cpu_pic18f46k22.py
index 62814d0..4e5ea1f 100644
--- a/amoco/arch/pic/cpu_pic18f46k22.py
+++ b/amoco/arch/pic/cpu_pic18f46k22.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.pic.F46K22.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/sparc/asm.py b/amoco/arch/sparc/asm.py
index f3e55cb..e4f363c 100644
--- a/amoco/arch/sparc/asm.py
+++ b/amoco/arch/sparc/asm.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
diff --git a/amoco/arch/sparc/cpu_v8.py b/amoco/arch/sparc/cpu_v8.py
index 65a3004..561eb37 100644
--- a/amoco/arch/sparc/cpu_v8.py
+++ b/amoco/arch/sparc/cpu_v8.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.sparc.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/sparc/env.py b/amoco/arch/sparc/env.py
index fb3c782..3f6116b 100644
--- a/amoco/arch/sparc/env.py
+++ b/amoco/arch/sparc/env.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
diff --git a/amoco/arch/sparc/formats.py b/amoco/arch/sparc/formats.py
index aee73a6..862de7f 100644
--- a/amoco/arch/sparc/formats.py
+++ b/amoco/arch/sparc/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from .env import *
 from .utils import *
 from amoco.arch.core import Formatter
diff --git a/amoco/arch/sparc/parsers.py b/amoco/arch/sparc/parsers.py
index 7720ad4..5090ccf 100644
--- a/amoco/arch/sparc/parsers.py
+++ b/amoco/arch/sparc/parsers.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import pyparsing as pp
diff --git a/amoco/arch/sparc/spec_v8.py b/amoco/arch/sparc/spec_v8.py
index a5bce60..911d39b 100644
--- a/amoco/arch/sparc/spec_v8.py
+++ b/amoco/arch/sparc/spec_v8.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2012-2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/sparc/utils.py b/amoco/arch/sparc/utils.py
index 738f37c..4daf4c5 100644
--- a/amoco/arch/sparc/utils.py
+++ b/amoco/arch/sparc/utils.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 CONDB = {
   0b1000: 'ba',
   0b0000: 'bn',
diff --git a/amoco/arch/x64/asm.py b/amoco/arch/x64/asm.py
index b2e37a9..9c8dfdb 100644
--- a/amoco/arch/x64/asm.py
+++ b/amoco/arch/x64/asm.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
@@ -13,12 +13,12 @@
 #------------------------------------------------------------------------------
 # utils :
 def push(fmap,x):
-  fmap[rsp] = fmap[rsp]-x.length
+  fmap[rsp] = fmap(rsp-x.length)
   fmap[mem(rsp,x.size)] = x
 
 def pop(fmap,l):
   fmap[l] = fmap(mem(rsp,l.size))
-  fmap[rsp] = fmap[rsp]+l.length
+  fmap[rsp] = fmap(rsp+l.length)
 
 def parity(x):
   x = x.zeroextend(64)
@@ -78,13 +78,13 @@ def i_RET(i,fmap):
   pop(fmap,rip)
 
 def i_HLT(i,fmap):
-  ext('halt').call(fmap)
+  fmap[rip] = top(64)
 
 #------------------------------------------------------------------------------
 def _ins_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else rcx
-  loc = mem(fmap(rdi),l*8)
-  src = ext('IN%s'%fmap(dx),l*8).call(fmap)
+  loc = mem(rdi,l*8)
+  src = ext('IN',size=l*8).call(port=fmap(dx))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -105,13 +105,11 @@ def i_INSD(i,fmap):
 def _outs_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else rcx
   src = fmap(mem(rsi,l*8))
-  loc = ext('OUT%s'%fmap(dx),l*8).call(fmap)
+  ext('OUT').call(fmap,src=fmap(mem(rsi,l*8)))
   if i.misc['rep']:
-      fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
       fmap[rip] = tst(fmap(counter)==0, fmap[rip]+i.length, fmap[rip])
   else:
-      fmap[loc] = src
       fmap[rip] = fmap[rip]+i.length
   fmap[rdi] = tst(fmap(df),fmap(rdi)-l,fmap(rdi)+l)
 
@@ -124,8 +122,7 @@ def i_OUTSD(i,fmap):
 
 #------------------------------------------------------------------------------
 def i_INT3(i,fmap):
-  fmap[rip] = fmap[rip]+i.length
-  ext('INT3').call(fmap)
+  fmap[rip] = ext('INT3',size=64)
 
 def i_CLC(i,fmap):
   fmap[rip] = fmap[rip]+i.length
@@ -188,8 +185,8 @@ def i_POPFQ(i,fmap):
 #------------------------------------------------------------------------------
 def _cmps_(i,fmap,l):
   counter,d,s = ecx,edi,esi if i.misc['adrsz'] else rcx,rdi,rsi
-  dst = mem(fmap(d),l*8)
-  src = mem(fmap(s),l*8)
+  dst = fmap(mem(d,l*8))
+  src = fmap(mem(s,l*8))
   x, carry, overflow = SubWithBorrow(dst,src)
   if i.misc['rep']:
       fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(dst,src))
@@ -208,8 +205,8 @@ def _cmps_(i,fmap,l):
       fmap[cf] = carry
       fmap[of] = overflow
       fmap[rip] = fmap[rip]+i.length
-  fmap[d] = tst(fmap(df),fmap(d)-l,fmap(d)+l)
-  fmap[s] = tst(fmap(df),fmap(s)-l,fmap(s)+l)
+  fmap[d] = fmap(tst(df,d-l,d+l))
+  fmap[s] = fmap(tst(df,s-l,s+l))
 
 def i_CMPSB(i,fmap):
   _cmps_(i,fmap,1)
@@ -223,8 +220,8 @@ def i_CMPSQ(i,fmap):
 #------------------------------------------------------------------------------
 def _scas_(i,fmap,l):
   counter,d = ecx,edi if i.misc['adrsz'] else rcx,rdi
-  a = {1:al, 2:ax, 4:eax, 8:rax}[l]
-  src = mem(fmap(d),l*8)
+  a = fmap({1:al, 2:ax, 4:eax, 8:rax}[l])
+  src = fmap(mem(d,l*8))
   x, carry, overflow = SubWithBorrow(a,src)
   if i.misc['rep']:
       fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(a,src))
@@ -258,7 +255,7 @@ def i_SCASQ(i,fmap):
 def _lods_(i,fmap,l):
   counter,s = (ecx,esi) if i.misc['adrsz'] else (rcx,rsi)
   loc = {1:al, 2:ax, 4:eax, 8:rax}[l]
-  src = mem(fmap(s),l*8)
+  src = fmap(mem(s,l*8))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -266,7 +263,7 @@ def _lods_(i,fmap,l):
   else:
       fmap[loc] = src
       fmap[rip] = fmap[rip]+i.length
-  fmap[s] = tst(fmap(df),fmap(s)-l,fmap(s)+l)
+  fmap[s] = fmap(tst(df,s-l,s+l))
 
 def i_LODSB(i,fmap):
   _lods_(i,fmap,1)
@@ -283,8 +280,8 @@ def _stos_(i,fmap,l):
       counter,d = ecx,edi
   else:
       counter,d = rcx,rdi
-  src = {1:al, 2:ax, 4:eax, 8:rax}[l]
-  loc = mem(fmap(d),l*8)
+  loc = mem(d,l*8)
+  src = fmap({1:al, 2:ax, 4:eax, 8:rax}[l])
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -309,8 +306,8 @@ def _movs_(i,fmap,l):
       counter,d,s = ecx,edi,esi
   else:
       counter,d,s = rcx,rdi,rsi
-  loc = mem(fmap(d),l*8)
-  src = mem(fmap(s),l*8)
+  loc = mem(d,l*8)
+  src = fmap(mem(s,l*8))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -343,14 +340,14 @@ def i_OUT(i,fmap):
   op2 = fmap(i.operands[1])
   ext('OUT%s'%op1).call(fmap,arg=op2)
 
-#op1_src retreives fmap[op1] (op1 value): 
+#op1_src retreives fmap[op1] (op1 value):
 def i_PUSH(i,fmap):
   fmap[rip] = fmap[rip]+i.length
   op1 = fmap(i.operands[0])
   if op1.size==8: op1 = op1.signextend(64)
   push(fmap,op1)
 
-#op1_dst retreives op1 location: 
+#op1_dst retreives op1 location:
 def i_POP(i,fmap):
   fmap[rip] = fmap[rip]+i.length
   op1 = i.operands[0]
@@ -362,8 +359,7 @@ def i_CALL(i,fmap):
   op1 = fmap(i.operands[0])
   op1 = op1.signextend(pc.size)
   target = pc+op1 if not i.misc['absolute'] else op1
-  if target._is_ext: target.call(fmap)
-  else: fmap[rip] = target
+  fmap[rip] = target
 
 
 def i_CALLF(i,fmap):
@@ -375,8 +371,7 @@ def i_JMP(i,fmap):
   op1 = fmap(i.operands[0])
   op1 = op1.signextend(pc.size)
   target = pc+op1 if not i.misc['absolute'] else op1
-  if target._is_ext: target.call(fmap)
-  else: fmap[rip] = target
+  fmap[rip] = target
 
 def i_JMPF(i,fmap):
   logger.verbose('%s semantic is not defined'%i.mnemonic)
@@ -434,7 +429,7 @@ def i_INT(i,fmap):
   fmap[rip] = fmap[rip]+i.length
   op1 = fmap(i.operands[0])
   push(fmap,fmap[rip])
-  ext('INT%s'%op1).call(fmap)
+  fmap[eip] = ext('INT',port=op1,size=64)
 
 def i_INC(i,fmap):
   op1 = i.operands[0]
diff --git a/amoco/arch/x64/cpu_x64.py b/amoco/arch/x64/cpu_x64.py
index 5dcc056..922f6e6 100644
--- a/amoco/arch/x64/cpu_x64.py
+++ b/amoco/arch/x64/cpu_x64.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.x64.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/x64/env.py b/amoco/arch/x64/env.py
index 10ac0c0..b4187fd 100644
--- a/amoco/arch/x64/env.py
+++ b/amoco/arch/x64/env.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
@@ -10,15 +10,15 @@
 # 64bits registers :
 #-------------------
 
-rax    = reg('rax',64)     # accumulator for operands and results data                 
-rbx    = reg('rbx',64)     # pointer to data in the DS segment                         
-rcx    = reg('rcx',64)     # counter for string and loop operations                    
-rdx    = reg('rdx',64)     # I/O pointer                                               
-rbp    = reg('rbp',64)     # pointer to data in the stack (SS segment)                 
-rsp    = reg('rsp',64)     # stack pointer (SS segment)                                
-rsi    = reg('rsi',64)     # ptr to data in segment pointed by DS; src ptr for strings 
-rdi    = reg('rdi',64)     # ptr to data in segment pointed by ES; dst ptr for strings 
-rip    = reg('rip',64)     # instruction pointer in 64 bit mode                        
+rax    = reg('rax',64)     # accumulator for operands and results data
+rbx    = reg('rbx',64)     # pointer to data in the DS segment
+rcx    = reg('rcx',64)     # counter for string and loop operations
+rdx    = reg('rdx',64)     # I/O pointer
+rbp    = reg('rbp',64)     # pointer to data in the stack (SS segment)
+rsp    = reg('rsp',64)     # stack pointer (SS segment)
+rsi    = reg('rsi',64)     # ptr to data in segment pointed by DS; src ptr for strings
+rdi    = reg('rdi',64)     # ptr to data in segment pointed by ES; dst ptr for strings
+rip    = reg('rip',64)     # instruction pointer in 64 bit mode
 rflags = reg('rflags',64)
 
 
@@ -59,19 +59,19 @@
 dh = slc(rdx,8,8,'dh')
 
 cf = slc(rflags,0,1,'cf')   # carry/borrow flag
-pf = slc(rflags,2,1,'pf')   # parity flag      
-zf = slc(rflags,6,1,'zf')   # zero flag        
-sf = slc(rflags,7,1,'sf')   # sign flag        
-df = slc(rflags,10,1,'df')  # direction flag   
-of = slc(rflags,11,1,'of')  # overflow flag    
+pf = slc(rflags,2,1,'pf')   # parity flag
+zf = slc(rflags,6,1,'zf')   # zero flag
+sf = slc(rflags,7,1,'sf')   # sign flag
+df = slc(rflags,10,1,'df')  # direction flag
+of = slc(rflags,11,1,'of')  # overflow flag
 
 # segment registers & other mappings:
 cs = reg('cs',16)      # segment selector for the code segment
-ds = reg('ds',16)      # segment selector to a data segment   
+ds = reg('ds',16)      # segment selector to a data segment
 ss = reg('ss',16)      # segment selector to the stack segment
-es = reg('es',16)      # (data)                               
-fs = reg('fs',16)      # (data)                               
-gs = reg('gs',16)      # (data)                               
+es = reg('es',16)      # (data)
+fs = reg('fs',16)      # (data)
+gs = reg('gs',16)      # (data)
 
 r8 = reg('r8',64); r8d = slc(r8,0,32,'r8d'); r8w = slc(r8,0,16,'r8w'); r8l = slc(r8,0,8,'r8l')
 r9 = reg('r9',64); r9d = slc(r9,0,32,'r9d'); r9w = slc(r9,0,16,'r9w'); r9l = slc(r9,0,8,'r9l')
diff --git a/amoco/arch/x64/formats.py b/amoco/arch/x64/formats.py
index 3e4f69c..238bab3 100644
--- a/amoco/arch/x64/formats.py
+++ b/amoco/arch/x64/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.core import Formatter
 
 def pfx(i):
diff --git a/amoco/arch/x64/spec_fpu.py b/amoco/arch/x64/spec_fpu.py
index f1f76f6..811524f 100644
--- a/amoco/arch/x64/spec_fpu.py
+++ b/amoco/arch/x64/spec_fpu.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/x64/spec_ia32e.py b/amoco/arch/x64/spec_ia32e.py
index 88e6e6f..ee6313e 100644
--- a/amoco/arch/x64/spec_ia32e.py
+++ b/amoco/arch/x64/spec_ia32e.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/x64/spec_sse.py b/amoco/arch/x64/spec_sse.py
index ca1c209..1cb91a7 100644
--- a/amoco/arch/x64/spec_sse.py
+++ b/amoco/arch/x64/spec_sse.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
@@ -111,7 +111,7 @@ def sse_ps(obj,Mod,REG,RM,data):
     obj.misc['REX'] = None
     op2,data = getModRM(obj,Mod,RM,data)
     if not op2._is_reg: raise InstructionError(obj)
-    op1 = env.getreg(REG,op2.size) # 
+    op1 = env.getreg(REG,op2.size) #
     obj.operands = [op1,op2]
     obj.type = type_data_processing
 
@@ -545,7 +545,7 @@ def sse_sd(obj,Mod,REG,RM,data):
     obj.operands = [op1,op2]
     obj.type = type_data_processing
 
-# 66 prefixed : 
+# 66 prefixed :
 # -------------
 # Note that thos specs MUST APPEAR AFTER f2/f3 prefixes which have priority over 66,
 # so that 66-related specs will be matched after identical f2/f3 specs
diff --git a/amoco/arch/x64/utils.py b/amoco/arch/x64/utils.py
index be7e596..2464dca 100644
--- a/amoco/arch/x64/utils.py
+++ b/amoco/arch/x64/utils.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/x86/asm.py b/amoco/arch/x86/asm.py
index b8be8fd..4639d6b 100644
--- a/amoco/arch/x86/asm.py
+++ b/amoco/arch/x86/asm.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from .env import *
@@ -13,12 +13,12 @@
 #------------------------------------------------------------------------------
 # utils :
 def push(fmap,x):
-  fmap[esp] = fmap[esp]-x.length
+  fmap[esp] = fmap(esp-x.length)
   fmap[mem(esp,x.size)] = x
 
 def pop(fmap,l):
   fmap[l] = fmap(mem(esp,l.size))
-  fmap[esp] = fmap[esp]+l.length
+  fmap[esp] = fmap(esp+l.length)
 
 def parity(x):
   x = x ^ (x>>1)
@@ -129,21 +129,20 @@ def i_WAIT(i,fmap):
 # LEAVE instruction is a shortcut for 'mov esp,ebp ; pop ebp ;'
 def i_LEAVE(i,fmap):
   fmap[eip] = fmap[eip]+i.length
-  fmap[esp] = fmap[ebp]
+  fmap[esp] = fmap(ebp)
   pop(fmap,ebp)
 
 def i_RET(i,fmap):
   pop(fmap,eip)
 
 def i_HLT(i,fmap):
-  fmap[eip] = fmap[eip]+i.length
-  ext('halt').call(fmap)
+  fmap[eip] = top(32)
 
 #------------------------------------------------------------------------------
 def _ins_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
-  loc = mem(fmap(edi),l*8)
-  src = ext('IN%s'%fmap(dx),l*8).call(fmap)
+  loc = mem(edi,l*8)
+  src = ext('IN',size=l*8).call(port=fmap(dx))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -163,14 +162,11 @@ def i_INSD(i,fmap):
 #------------------------------------------------------------------------------
 def _outs_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
-  src = fmap(mem(esi,l*8))
-  loc = ext('OUT%s'%fmap(dx),l*8).call(fmap)
+  ext('OUT').call(fmap,src=fmap(mem(esi,l*8)))
   if i.misc['rep']:
-      fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
       fmap[eip] = tst(fmap(counter)==0, fmap[eip]+i.length, fmap[eip])
   else:
-      fmap[loc] = src
       fmap[eip] = fmap[eip]+i.length
   fmap[edi] = tst(fmap(df),fmap(edi)-l,fmap(edi)+l)
 
@@ -183,8 +179,7 @@ def i_OUTSD(i,fmap):
 
 #------------------------------------------------------------------------------
 def i_INT3(i,fmap):
-  fmap[eip] = fmap[eip]+i.length
-  ext('INT3').call(fmap)
+  fmap[eip] = ext('INT3',size=32)
 
 def i_CLC(i,fmap):
   fmap[eip] = fmap[eip]+i.length
@@ -269,8 +264,8 @@ def i_SAHF(i,fmap):
 #------------------------------------------------------------------------------
 def _cmps_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
-  dst = mem(fmap(edi),l*8)
-  src = mem(fmap(esi),l*8)
+  dst = fmap(mem(edi,l*8))
+  src = fmap(mem(esi,l*8))
   x, carry, overflow = SubWithBorrow(dst,src)
   if i.misc['rep']:
       fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(dst,src))
@@ -303,7 +298,7 @@ def i_CMPSD(i,fmap):
 def _scas_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
   a = {1:al, 2:ax, 4:eax}[l]
-  src = mem(fmap(edi),l*8)
+  src = fmap(mem(edi,l*8))
   x, carry, overflow = SubWithBorrow(a,src)
   if i.misc['rep']:
       fmap[af] = tst(fmap(counter)==0, fmap(af), halfborrow(a,src))
@@ -335,7 +330,7 @@ def i_SCASD(i,fmap):
 def _lods_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
   loc = {1:al, 2:ax, 4:eax}[l]
-  src = mem(fmap(esi),l*8)
+  src = fmap(mem(esi,l*8))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -356,7 +351,7 @@ def i_LODSD(i,fmap):
 def _stos_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
   src = {1:al, 2:ax, 4:eax}[l]
-  loc = mem(fmap(edi),l*8)
+  loc = mem(edi,l*8)
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -376,8 +371,8 @@ def i_STOSD(i,fmap):
 #------------------------------------------------------------------------------
 def _movs_(i,fmap,l):
   counter = cx if i.misc['adrsz'] else ecx
-  loc = mem(fmap(edi),l*8)
-  src = mem(fmap(esi),l*8)
+  loc = mem(edi,l*8)
+  src = fmap(mem(esi,l*8))
   if i.misc['rep']:
       fmap[loc] = tst(fmap(counter)==0, fmap(loc), src)
       fmap[counter] = fmap(counter)-1
@@ -400,22 +395,22 @@ def i_IN(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   op1 = i.operands[0]
   op2 = fmap(i.operands[1])
-  fmap[op1] = ext('IN%s'%op2,op1.size).call(fmap)
+  fmap[op1] = ext('IN',size=op1.size).call(port=op2)
 
 def i_OUT(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   op1 = fmap(i.operands[0])
   op2 = fmap(i.operands[1])
-  ext('OUT%s'%op1).call(fmap,arg=op2)
+  ext('OUT').call(fmap,port=op1,src=op2)
 
-#op1_src retreives fmap[op1] (op1 value): 
+#op1_src retreives fmap[op1] (op1 value):
 def i_PUSH(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   op1 = fmap(i.operands[0])
   if op1.size==8: op1 = op1.signextend(32)
   push(fmap,op1)
 
-#op1_dst retreives op1 location: 
+#op1_dst retreives op1 location:
 def i_POP(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   op1 = i.operands[0]
@@ -427,8 +422,7 @@ def i_CALL(i,fmap):
   op1 = fmap(i.operands[0])
   op1 = op1.signextend(pc.size)
   target = pc+op1 if not i.misc['absolute'] else op1
-  if target._is_ext: target.call(fmap)
-  else: fmap[eip] = target
+  fmap[eip] = target
 
 
 def i_CALLF(i,fmap):
@@ -440,8 +434,7 @@ def i_JMP(i,fmap):
   op1 = fmap(i.operands[0])
   op1 = op1.signextend(pc.size)
   target = pc+op1 if not i.misc['absolute'] else op1
-  if target._is_ext: target.call(fmap)
-  else: fmap[eip] = target
+  fmap[eip] = target
 
 def i_JMPF(i,fmap):
   logger.verbose('%s semantic is not defined'%i.mnemonic)
@@ -498,7 +491,7 @@ def i_INT(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   op1 = fmap(i.operands[0])
   push(fmap,fmap[eip])
-  ext('INT%s'%op1).call(fmap)
+  fmap[eip] = ext('INT',port=op1,size=32)
 
 def i_INC(i,fmap):
   op1 = i.operands[0]
@@ -573,15 +566,15 @@ def i_MOVBE(i,fmap):
 
 def i_MOVSX(i,fmap):
   op1 = i.operands[0]
-  op2 = fmap(i.operands[1])
+  op2 = i.operands[1]
   fmap[eip] = fmap[eip]+i.length
-  fmap[op1] = op2.signextend(op1.size)
+  fmap[op1] = fmap(op2).signextend(op1.size)
 
 def i_MOVZX(i,fmap):
   op1 = i.operands[0]
-  op2 = fmap(i.operands[1])
+  op2 = i.operands[1]
   fmap[eip] = fmap[eip]+i.length
-  fmap[op1] = op2.zeroextend(op1.size)
+  fmap[op1] = fmap(op2).zeroextend(op1.size)
 
 def i_ADC(i,fmap):
   op1 = i.operands[0]
@@ -909,17 +902,17 @@ def i_SHLD(i,fmap):
 def i_IMUL(i,fmap):
   fmap[eip] = fmap[eip]+i.length
   if len(i.operands)==1:
-    src = fmap(i.operands[0])
+    src = i.operands[0]
     m,d = {8:(al,ah), 16:(ax,dx), 32:(eax,edx)}[src.size]
-    r = m**src
+    r = fmap(m**src)
   elif len(i.operands)==2:
     dst,src = i.operands
     m = d = dst
-    r = dst**src
+    r = fmap(dst**src)
   else:
     dst,src,imm = i.operands
     m = d = dst
-    r = src**imm.signextend(src.size)
+    r = fmap(src)**imm.signextend(src.size)
   lo = r[0:src.size]
   hi = r[src.size:r.size]
   fmap[d]  = hi
@@ -929,9 +922,9 @@ def i_IMUL(i,fmap):
 
 def i_MUL(i,fmap):
   fmap[eip] = fmap[eip]+i.length
-  src = fmap(i.operands[0])
+  src = i.operands[0]
   m,d = {8:(al,ah), 16:(ax,dx), 32:(eax,edx)}[src.size]
-  r = m**src
+  r = fmap(m**src)
   lo = r[0:src.size]
   hi = r[src.size:r.size]
   fmap[d]  = hi
diff --git a/amoco/arch/x86/cpu_x86.py b/amoco/arch/x86/cpu_x86.py
index df1c889..659476b 100644
--- a/amoco/arch/x86/cpu_x86.py
+++ b/amoco/arch/x86/cpu_x86.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.x86.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/x86/env.py b/amoco/arch/x86/env.py
index 847289d..dc48486 100644
--- a/amoco/arch/x86/env.py
+++ b/amoco/arch/x86/env.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # import expressions:
@@ -11,11 +11,11 @@
 #-------------------
 
 eax    = reg('eax',32)     # accumulator for operands and results data
-ebx    = reg('ebx',32)     # pointer to data in the DS segment        
-ecx    = reg('ecx',32)     # counter for string and loop operations   
-edx    = reg('edx',32)     # I/O pointer                              
-ebp    = reg('ebp',32)     # pointer to data in the stack (SS segment)                
-esp    = reg('esp',32)     # stack pointer (SS segment)                               
+ebx    = reg('ebx',32)     # pointer to data in the DS segment
+ecx    = reg('ecx',32)     # counter for string and loop operations
+edx    = reg('edx',32)     # I/O pointer
+ebp    = reg('ebp',32)     # pointer to data in the stack (SS segment)
+esp    = reg('esp',32)     # stack pointer (SS segment)
 esi    = reg('esi',32)     # ptr to data in segment pointed by DS; src ptr for strings
 edi    = reg('edi',32)     # ptr to data in segment pointed by ES; dst ptr for strings
 eip    = reg('eip',32)     # instruction pointer in 32 bit mode
@@ -41,21 +41,21 @@
 dh = slc(edx,8,8,'dh')
 
 cf = slc(eflags,0,1,'cf')   # carry/borrow flag
-pf = slc(eflags,2,1,'pf')   # parity flag      
-af = slc(eflags,4,1,'pf')   # aux carry flag      
-zf = slc(eflags,6,1,'zf')   # zero flag        
-sf = slc(eflags,7,1,'sf')   # sign flag        
-tf = slc(eflags,8,1,'sf')   # trap flag        
-df = slc(eflags,10,1,'df')  # direction flag   
-of = slc(eflags,11,1,'of')  # overflow flag    
+pf = slc(eflags,2,1,'pf')   # parity flag
+af = slc(eflags,4,1,'pf')   # aux carry flag
+zf = slc(eflags,6,1,'zf')   # zero flag
+sf = slc(eflags,7,1,'sf')   # sign flag
+tf = slc(eflags,8,1,'sf')   # trap flag
+df = slc(eflags,10,1,'df')  # direction flag
+of = slc(eflags,11,1,'of')  # overflow flag
 
 # segment registers & other mappings:
 cs = reg('cs',16)      # segment selector for the code segment
-ds = reg('ds',16)      # segment selector to a data segment   
+ds = reg('ds',16)      # segment selector to a data segment
 ss = reg('ss',16)      # segment selector to the stack segment
-es = reg('es',16)      # (data)                               
-fs = reg('fs',16)      # (data)                               
-gs = reg('gs',16)      # (data)                               
+es = reg('es',16)      # (data)
+fs = reg('fs',16)      # (data)
+gs = reg('gs',16)      # (data)
 
 # fpu registers (80 bits holds double extended floats see Intel Vol1--4.4.2):
 def st(num):
diff --git a/amoco/arch/x86/formats.py b/amoco/arch/x86/formats.py
index 55f624e..8d99755 100644
--- a/amoco/arch/x86/formats.py
+++ b/amoco/arch/x86/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.core import Formatter
 
 def pfx(i):
@@ -15,7 +17,7 @@ def deref(op):
     if not op._is_mem: return str(op)
     d = '%+d'%op.a.disp if op.a.disp else ''
     s = {8:'byte ptr ',16:'word ptr ', 64:'qword ptr ', 128:'xmmword ptr '}.get(op.size,'')
-    s += '%s:'%op.a.seg  if op.a.seg  else ''
+    s += '%s:'%op.a.seg  if (op.a.seg is not '')  else ''
     s += '[%s%s]'%(op.a.base,d)
     return s
 
diff --git a/amoco/arch/x86/parsers.py b/amoco/arch/x86/parsers.py
index 650ed48..2e2fdc6 100644
--- a/amoco/arch/x86/parsers.py
+++ b/amoco/arch/x86/parsers.py
@@ -1,7 +1,8 @@
 #!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import pyparsing as pp
diff --git a/amoco/arch/x86/spec_fpu.py b/amoco/arch/x86/spec_fpu.py
index f1f76f6..811524f 100644
--- a/amoco/arch/x86/spec_fpu.py
+++ b/amoco/arch/x86/spec_fpu.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/x86/spec_ia32.py b/amoco/arch/x86/spec_ia32.py
index 9dfc2be..a11458f 100644
--- a/amoco/arch/x86/spec_ia32.py
+++ b/amoco/arch/x86/spec_ia32.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/x86/spec_sse.py b/amoco/arch/x86/spec_sse.py
index 8427e9c..c7a019b 100644
--- a/amoco/arch/x86/spec_sse.py
+++ b/amoco/arch/x86/spec_sse.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
@@ -540,7 +540,7 @@ def sse_sd(obj,Mod,REG,RM,data):
     obj.operands = [op1,op2]
     obj.type = type_data_processing
 
-# 66 prefixed : 
+# 66 prefixed :
 # -------------
 # Note that thos specs MUST APPEAR AFTER f2/f3 prefixes which have priority over 66,
 # so that 66-related specs will be matched after identical f2/f3 specs
diff --git a/amoco/arch/x86/utils.py b/amoco/arch/x86/utils.py
index 2a17b58..5877e31 100644
--- a/amoco/arch/x86/utils.py
+++ b/amoco/arch/x86/utils.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # spec_xxx files are providers for instruction objects.
diff --git a/amoco/arch/z80/asm.py b/amoco/arch/z80/asm.py
index 7ecf5ea..0a72593 100644
--- a/amoco/arch/z80/asm.py
+++ b/amoco/arch/z80/asm.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
 # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
diff --git a/amoco/arch/z80/cpu_gb.py b/amoco/arch/z80/cpu_gb.py
index 4dee22a..9f594aa 100644
--- a/amoco/arch/z80/cpu_gb.py
+++ b/amoco/arch/z80/cpu_gb.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.z80.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/z80/cpu_z80.py b/amoco/arch/z80/cpu_z80.py
index fd35283..5011082 100644
--- a/amoco/arch/z80/cpu_z80.py
+++ b/amoco/arch/z80/cpu_z80.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.z80.asm import *
 # expose "microarchitecture" (instructions semantics)
 uarch = dict(filter(lambda kv:kv[0].startswith('i_'),locals().iteritems()))
diff --git a/amoco/arch/z80/env.py b/amoco/arch/z80/env.py
index a17cceb..6bdf732 100644
--- a/amoco/arch/z80/env.py
+++ b/amoco/arch/z80/env.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
 # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
diff --git a/amoco/arch/z80/formats.py b/amoco/arch/z80/formats.py
index 9ecc7c5..852144e 100644
--- a/amoco/arch/z80/formats.py
+++ b/amoco/arch/z80/formats.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from amoco.arch.core import Formatter
 
 def mnemo(i):
diff --git a/amoco/arch/z80/spec_gb.py b/amoco/arch/z80/spec_gb.py
index 216bf8e..0c28207 100644
--- a/amoco/arch/z80/spec_gb.py
+++ b/amoco/arch/z80/spec_gb.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
 # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
diff --git a/amoco/arch/z80/spec_mostek.py b/amoco/arch/z80/spec_mostek.py
index d881271..55aab4e 100644
--- a/amoco/arch/z80/spec_mostek.py
+++ b/amoco/arch/z80/spec_mostek.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
 # Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
diff --git a/amoco/cas/expressions.py b/amoco/cas/expressions.py
index 874bef1..4cdff7d 100644
--- a/amoco/cas/expressions.py
+++ b/amoco/cas/expressions.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.logger import Log
@@ -13,13 +15,16 @@ def checkarg1_exp(*args):
         if len(args)>0 and isinstance(args[0],exp):
             return f(*args)
         else:
-            raise TypeError('arg is not an expression')
+            logger.error('first arg is not an expression')
+            raise TypeError(args)
     return checkarg1_exp
 
 def _checkarg_sizes(f):
     def checkarg_sizes(self,n):
         if self.size<>n.size:
-            if self.size>0 and n.size>0: raise ValueError,'size mismatch'
+            if self.size>0 and n.size>0:
+                logger.error('size mismatch')
+                raise ValueError,n
         return f(self,n)
     return checkarg_sizes
 
@@ -38,9 +43,13 @@ def checkarg_slice(self,*args):
         if isinstance(i,slice):
             if i.step<>None: raise ValueError,i
             if i.start<0 or i.stop>self.size:
-                    raise ValueError,i
-            if i.stop<=i.start: raise ValueError,i
+                logger.error('size mismatch')
+                raise ValueError,i
+            if i.stop<=i.start:
+                logger.error('invalid slice')
+                raise ValueError,i
         else:
+            logger.error('argument should be a slice')
             raise TypeError,i
         return f(self,*args)
     return checkarg_slice
@@ -55,6 +64,7 @@ def checkarg_slice(self,*args):
 #------------------------------------------------------------------------------
 class exp(object):
     __slots__ = ['size','sf']
+    _endian   = 1      # defaults to little-endian
     _is_def   = False
     _is_cst   = False
     _is_reg   = False
@@ -72,10 +82,23 @@ def __init__(self,size=0,sf=False):
 
     def __len__(self): return self.length
 
+    @classmethod
+    def setendian(cls,e):
+        assert e in (-1,+1)
+        cls._endian = e
+
     @property
     def length(self): # length value is in bytes
         return self.size/8
 
+    def bytes(self,sta=0,sto=None):
+        s = slice(sta,sto)
+        l = self.length
+        sta,sto,stp = s.indices(l)
+        if self._endian==-1:
+            sta,sto = l-sto,l-sta
+        return self[sta*8:sto*8]
+
     @property
     def mask(self):
         return (1<<self.size)-1
@@ -99,10 +122,11 @@ def dumps(self):
     def loads(self,s):
         from pickle import loads
         self = loads(s)
+        return self
 
     def __str__(self):
-        if self._is_def is 0: return 'T'
-        if self._is_def is False: return '_'
+        if self._is_def is 0: return 'T%d'%self.size
+        if self._is_def is False: return '⊥%d'%self.size
         raise ValueError("void expression")
 
     def bit(self,i):
@@ -110,11 +134,9 @@ def bit(self,i):
         return self[i:i+1]
 
     # get item allows to extract the expression of a slice of the exp
+    @_checkarg_slice
     def __getitem__(self,i)  :
-        if isinstance(i,slice):
-            return slicer(self,i.start,i.stop-i.start)
-        else:
-            raise TypeError,i
+        return slicer(self,i.start,i.stop-i.start)
 
     # set item allows to insert the expression of a slice in the exp
     @_checkarg_slice
@@ -143,9 +165,11 @@ def zeroextend(self,size):
         return self.extend(False,size)
 
     # arithmetic / logic methods : These methods are shared by all nodes.
+    # unary operators:
     def __invert__(self): return oper('~',self)
-    def __neg__(self): return oper('*',self,cst(-1,self.size))
-
+    def __neg__(self): return oper('-',self)
+    def __pos__(self): return self
+    # binary operators:
     @_checkarg_numeric
     def __add__(self,n): return oper('+',self,n)
     @_checkarg_numeric
@@ -166,7 +190,7 @@ def __and__(self,n): return oper('&',self,n)
     def __or__(self,n): return oper('|',self,n)
     @_checkarg_numeric
     def __xor__(self,n): return oper('^',self,n)
-
+    # reflected operand cases:
     @_checkarg_numeric
     def __radd__(self,n): return oper('+',n,self)
     @_checkarg_numeric
@@ -181,7 +205,7 @@ def __rand__(self,n): return oper('&',n,self)
     def __ror__(self,n): return oper('|',n,self)
     @_checkarg_numeric
     def __rxor__(self,n): return oper('^',n,self)
-
+    # shifts:
     @_checkarg_numeric
     def __lshift__(self,n): return oper('<<',self,n)
     @_checkarg_numeric
@@ -220,6 +244,10 @@ def __ge__(self,n):
     def __gt__(self,n):
         if exp.__cmp__(self,n)==0: return bit0
         return oper('>',self,n)
+
+    def to_smtlib(self):
+        logger.warning('no SMT solver defined')
+        raise NotImplementedError
 ##
 
 class top(exp):
@@ -263,6 +291,9 @@ def __int__(self):
     def __str__(self):
         return '{:#x}'.format(self.value)
 
+    def to_sym(self,ref):
+        return sym(ref,self.v,self.size)
+
     # eval of cst is always itself: (sf flag conserved)
     def eval(self,env): return cst(self.value,self.size)
 
@@ -373,12 +404,12 @@ def __nonzero__(self):
     @_checkarg_numeric
     @_checkarg_sizes
     def __eq__(self,n):
-        if n._is_cst: return cst(self.value==n.value)
+        if n._is_cst: return cst(self.v==n.v)
         else : return exp.__eq__(self,n)
     @_checkarg_numeric
     @_checkarg_sizes
     def __ne__(self,n):
-        if n._is_cst: return cst(self.value!=n.value)
+        if n._is_cst: return cst(self.v!=n.v)
         else : return exp.__ne__(self,n)
 
     @_checkarg_numeric
@@ -417,9 +448,9 @@ def __str__(self):
         return "#%s"%self.ref
 
 #---------------------------------
-# flt holds float immediate values
+# cfp holds float immediate values
 #---------------------------------
-class flt(exp):
+class cfp(exp):
     __slots__ = ['v']
     _is_def   = True
     _is_cst   = True
@@ -548,6 +579,16 @@ def __setattr__(self,a,v):
         if a is 'size' and self.__protect is True:
             raise AttributeError,'protected attribute'
         exp.__setattr__(self,a,v)
+
+    #howto pickle/unpickle reg objects:
+    def __setstate__(self,state):
+        v = state[1]
+        self.__protect = False
+        self.size = v['size']
+        self.sf = v['sf']
+        self.ref = v['ref']
+        self._subrefs = v['_subrefs']
+        self.__protect = v['_reg__protect']
 ##
 
 #------------------------------------------------------------------------------
@@ -556,9 +597,18 @@ def __setattr__(self,a,v):
 class ext(reg):
     _is_ext = True
 
+    def __init__(self,refname,**kargs):
+        self.ref = refname
+        self._subrefs = kargs
+        self.size = kargs.get('size',None)
+        self.sf = False
+
     def __str__(self):
         return '@%s'%self.ref
 
+    def __setattr__(self,a,v):
+        exp.__setattr__(self,a,v)
+
     @classmethod
     def stub(cls,ref):
         try:
@@ -569,11 +619,15 @@ def stub(cls,ref):
 
     def call(self,env,**kargs):
         logger.info('stub %s called'%self.ref)
+        if not 'size' in kargs: kargs.update(size=self.size)
         res = self.stub(self.ref)(env,**kargs)
-        if res is None:
-            return top(self.size)
-        else:
-            return res[0:self.size]
+        if res is None: return top(self.size)
+        return res[0:self.size]
+
+    # used when the expression is a target used to build a block
+    def __call__(self,env):
+        logger.info('stub %s called'%self.ref)
+        self.stub(self.ref)(env,**self._subrefs)
 ##
 
 # complex expressions are build with atoms attributes:
@@ -615,8 +669,11 @@ def __init__(self,s):
 
     def __str__(self):
         s = '{ |'
-        for nk,nv in self.parts.iteritems():
+        cur = 0
+        for nv in self:
+            nk = cur,cur+nv.size
             s += ' %s->%s |'%('[%d:%d]'%nk,str(nv))
+            cur += nv.size
         return s+' }'
 
     def eval(self,env):
@@ -716,6 +773,17 @@ def cut(self,start,stop):
         self.smask[start:stop] = [(start,stop)]*(stop-start)
     ##
 
+    def __iter__(self):
+        # gather cst as possible:
+        rcmp = lambda x,y: cmp(x[0],y[0])
+        part = self.parts.keys()
+        part.sort(rcmp)
+        cur = 0
+        for p in part:
+            assert p[0]==cur
+            yield self.parts[p]
+            cur = p[1]
+
     # restruct will concatenate cst expressions when possible
     # to minimize the number of parts.
     def restruct(self):
@@ -743,23 +811,32 @@ def restruct(self):
 #------------------------------------------------------------------------------
 # mem holds memory fetches, ie a read operation of length size, in segment seg,
 # at given address expression.
+# The mods list allows to handle aliasing issues detected at fetching time
+# and adjust the eval result accordingly.
 #------------------------------------------------------------------------------
 class mem(exp):
-    __slots__ = ['a']
+    __slots__ = ['a', 'mods']
     _is_def   = True
     _is_mem   = True
 
-    def __init__(self,a,size=32,seg='',disp=0):
+    def __init__(self,a,size=32,seg='',disp=0,mods=None):
         self.size  = size
         self.sf    = False
         self.a  = ptr(a,seg,disp)
+        self.mods = mods or []
 
     def __str__(self):
-        return 'M%d%s'%(self.size,self.a)
+        n = len(self.mods)
+        n = '$%d'%n if n>0 else ''
+        return 'M%d%s%s'%(self.size,n,self.a)
 
     def eval(self,env):
         a = self.a.eval(env)
-        return env[mem(a,self.size)]
+        m = env.use()
+        for loc,v in self.mods:
+            if loc._is_ptr: loc = env(loc)
+            m[loc] = env(v)
+        return m[mem(a,self.size)]
 
     def simplify(self):
         self.a.simplify()
@@ -768,8 +845,9 @@ def simplify(self):
     def addr(self,env):
         return self.a.eval(env)
 
+
 #------------------------------------------------------------------------------
-# ptr holds memory addresses with segment, base expressions and 
+# ptr holds memory addresses with segment, base expressions and
 # displacement integer (offset relative to base).
 #------------------------------------------------------------------------------
 class ptr(exp):
@@ -782,8 +860,8 @@ def __init__(self,base,seg='',disp=0):
             if seg is '': seg=base.seg
             disp = base.disp+disp
             base = base.base
-        self.base = base
-        self.disp = disp
+        self.base,offset = extract_offset(base)
+        self.disp = disp+offset
         self.seg  = seg
         self.size = base.size
         self.sf   = False
@@ -793,7 +871,8 @@ def __str__(self):
         return '%s(%s%s)'%(self.seg,self.base,d)
 
     def simplify(self):
-        self.base = self.base.simplify()
+        self.base,offset = extract_offset(self.base)
+        self.disp += offset
         if isinstance(self.seg,exp):
             self.seg = self.seg.simplify()
         return self
@@ -816,10 +895,11 @@ def eval(self,env):
 #------------------------------------------------------------------------------
 def slicer(x,pos,size):
     if not isinstance(x,exp): raise TypeError,x
+    if not x._is_def: return top(size)
     if pos==0 and size==x.size:
         return x
     else:
-        if x._is_mem:
+        if x._is_mem and size%8==0:
             off,rst = divmod(pos,8)
             if rst==0:
                 a = ptr(x.a.base,x.a.seg,x.a.disp+off)
@@ -827,7 +907,7 @@ def slicer(x,pos,size):
         return slc(x,pos,size)
 
 #------------------------------------------------------------------------------
-# slc holds bit-slice of a non-cst (and non-slc) expressions 
+# slc holds bit-slice of a non-cst (and non-slc) expressions
 #------------------------------------------------------------------------------
 class slc(exp):
     __slots__ = ['x','pos','ref','__protect','_is_reg']
@@ -847,6 +927,7 @@ def __init__(self,x,pos,size,ref=None):
         self.setref(ref)
 
     def setref(self,ref):
+        self._is_reg = False
         if self.x._is_reg:
             self._is_reg = True
             if ref is None:
@@ -873,16 +954,18 @@ def eval(self,env):
         n = self.x.eval(env)
         return n[self.pos:self.pos+self.size]
 
+    # slc of mem objects are simplified by adjusting the disp offset of
+    # the sliced mem object.
     def simplify(self):
         self.x = self.x.simplify()
-        if self.x._is_mem:
+        if self.x._is_mem and self.size%8==0:
             off,rst = divmod(self.pos,8)
             if rst==0:
                 a = ptr(self.x.a.base,self.x.a.seg,self.x.a.disp+off)
                 return mem(a,self.size)
         return self
 
-    # slice of a slice: 
+    # slice of a slice:
     @_checkarg_slice
     def __getitem__(self,i):
         if i.start==0 and i.stop==self.size:
@@ -902,6 +985,17 @@ def addr(self,env):
             return self.x
         else:
             raise TypeError('this expression is not a location')
+
+    def __setstate__(self,state):
+        v = state[1]
+        self.__protect = False
+        self.size = v['size']
+        self.sf = v['sf']
+        self.x = v['x']
+        self.pos = v['pos']
+        self.ref = v['ref']
+        self._is_reg = v['_is_reg']
+        self.__protect = v['_slc__protect']
 ##
 
 #------------------------------------------------------------------------------
@@ -912,15 +1006,8 @@ class tst(exp):
     _is_def   = True
     _is_tst   = True
 
-    def __new__(cls,t,l,r):
-        if t is True or t==1: return l
-        if t is False or t==0: return r
-        obj=super(exp,cls).__new__(cls)
-        tst.__init__(obj,t,l,r)
-        return obj
-
     def __init__(self,t,l,r):
-        if t in [True,False]: t=cst(t)
+        if t is True or t is False: t=cst(t,1)
         self.tst = t   # the expression to test, probably a 'op' expressions.
         if l.size<>r.size: raise ValueError,(l,r)
         self.l  = l    # true (tst evals to val)
@@ -941,6 +1028,7 @@ def eval(self,env):
         else          : return r
 
     def simplify(self):
+        if self.l is self.r: return self.l
         self.tst = self.tst.simplify()
         self.l   = self.l.simplify()
         self.r   = self.r.simplify()
@@ -953,22 +1041,22 @@ def simplify(self):
 # oper returns a possibly simplified op() object (see below)
 #------------------------------------------------------------------------------
 def oper(opsym,l,r=None):
+    if r is None: return uop(opsym,l).simplify()
     return op(opsym,l,r).simplify()
 
 #------------------------------------------------------------------------------
-# op holds binary operations, integer arithmetic and bitwise logic
-# internal representation is either in tree form or flat form (std=True).
+# op holds binary integer arithmetic and bitwise logic expressions
 #------------------------------------------------------------------------------
 class op(exp):
     __slots__ = ['op','l','r','prop']
     _is_def   = True
     _is_eqn   = True
 
-    def __init__(self,op,l,r=None):
+    def __init__(self,op,l,r):
         self.op = _operator(op)
         self.prop = self.op.type
-        if r is not None and self.prop<4 and l.size <> r.size:
-            raise ValueError,"size mismatch"
+        if self.prop<4:
+            if l.size <> r.size: raise ValueError,"size mismatch"
         self.l  = l
         self.r  = r
         self.size = self.l.size
@@ -976,10 +1064,9 @@ def __init__(self,op,l,r=None):
             self.size=1
         elif self.op.symbol in ['**']: self.size *= 2
         self.sf = l.sf
+        if self.prop==1: self.sf |= r.sf
         if self.l._is_eqn: self.prop |= self.l.prop
-        if self.r is not None:
-            if self.prop==1: self.sf |= r.sf
-            if self.r._is_eqn : self.prop |= self.r.prop
+        if self.r._is_eqn : self.prop |= self.r.prop
 
     @classmethod
     def limit(cls,v):
@@ -988,34 +1075,89 @@ def limit(cls,v):
     def eval(self,env):
         # single-operand :
         l = self.l.eval(env)
-        r = None
-        if self.r is not None:
-            r = self.r.eval(env)
+        r = self.r.eval(env)
         res = self.op(l,r)
         res.sf = self.sf
         return res
     ##
 
     def __str__(self):
-        if self.r is None:
-            return '(%s%s)'%(self.op.symbol,str(self.l))
-        else:
-            return '(%s%s%s)'%(str(self.l),self.op.symbol,str(self.r))
+        return '(%s%s%s)'%(str(self.l),self.op.symbol,str(self.r))
 
     def simplify(self):
-        self.l = self.l.simplify()
-        if self.r is not None:
-            self.r = self.r.simplify()
-            return eqn_helpers(self)
-        return self
+        minus = (self.op.symbol=='-')
+        l = self.l.simplify()
+        r = self.r.simplify()
+        if not l._is_def or not r._is_def:
+            return top(self.size)
+        if self.prop<4:
+            # arithm/logic normalisation:
+            # push cst to the right
+            if l._is_cst:
+                if r._is_cst: return self.op(l,r)
+                if minus:
+                    l,r = (-r),l
+                    self.op = _operator('+')
+                else:
+                    l,r = r,l
+            # lexical ordering of symbols:
+            elif not r._is_cst:
+                lh = ''.join(map(str,symbols_of(l)))
+                rh = ''.join(map(str,symbols_of(r)))
+                if lh>rh:
+                    if minus:
+                        l,r = (-r),l
+                        self.op = _operator('+')
+                    else:
+                        l,r=r,l
+        self.l = l
+        self.r = r
+        return eqn2_helpers(self)
 
     def depth(self):
-        if self.r is not None: d = self.r.depth()
-        else: d = 0.
-        return self.l.depth()+d
+        return self.l.depth()+self.r.depth()
 
 ##
 
+#------------------------------------------------------------------------------
+# uop holds unary operations (+x, -x, ~x)
+#------------------------------------------------------------------------------
+class uop(exp):
+    __slots__ = ['op','r','prop']
+    _is_def   = True
+    _is_eqn   = True
+
+    def __init__(self,op,r):
+        self.op = _operator(op,unary=1)
+        self.prop = self.op.type
+        self.r  = r
+        self.size = r.size
+        self.sf = r.sf
+        if self.r._is_eqn: self.prop |= self.r.prop
+
+    def eval(self,env):
+        # single-operand :
+        r = self.r.eval(env)
+        res = self.op(r)
+        res.sf = self.sf
+        return res
+    ##
+
+    @property
+    def l(self): return None
+
+    def __str__(self):
+        return '(%s%s)'%(self.op.symbol,str(self.r))
+
+    def simplify(self):
+        self.r = self.r.simplify()
+        if not self.r._is_def: return top(self.size)
+        return eqn1_helpers(self)
+
+    def depth(self):
+        return self.r.depth()
+
+##
 # operators:
 #-----------
 
@@ -1054,34 +1196,39 @@ def rol(x,n):
            }
 
 class _operator(object):
-   def __init__(self,op):
-       self.symbol = op
-       if   op in OP_ARITH:
-           self.type = 1
-           self.impl = OP_ARITH[op]
-       elif op in OP_LOGIC:
-           self.type = 2
-           self.impl = OP_LOGIC[op]
-       elif op in OP_CONDT:
-           self.type = 4
-           self.impl = OP_CONDT[op]
-       elif op in OP_SHIFT:
-           self.type = 8
-           self.impl = OP_SHIFT[op]
-       else:
-           raise NotImplementedError
-
-   def __call__(self,l,r=None):
-       if r  is None:
-           impl = {'+': operator.pos, '-': operator.neg}.get(self.symbol,self.impl)
-           return impl(l)
-       return self.impl(l,r)
-
-   def __mul__(self,op):
-       ss = self.symbol+op.symbol
-       if ss in ('++','--'): return '+'
-       if ss in ('+-','-+'): return '-'
-       return None
+    def __init__(self,op,unary=0):
+        self.symbol = op
+        self.unary = unary
+        if   op in OP_ARITH:
+            self.type = 1
+            if self.unary:
+                self.impl = {'+': operator.pos, '-': operator.neg}[op]
+            else:
+                self.impl = OP_ARITH[op]
+        elif op in OP_LOGIC:
+            self.type = 2
+            if self.unary: assert op == '~'
+            self.impl = OP_LOGIC[op]
+        elif op in OP_CONDT:
+            self.type = 4
+            self.impl = OP_CONDT[op]
+        elif op in OP_SHIFT:
+            self.type = 8
+            self.impl = OP_SHIFT[op]
+        else:
+            raise NotImplementedError
+
+    def __call__(self,l,r=None):
+        if r  is None:
+            assert self.unary
+            return self.impl(l)
+        return self.impl(l,r)
+
+    def __mul__(self,op):
+        ss = self.symbol+op.symbol
+        if ss in ('++','--'): return '+'
+        if ss in ('+-','-+'): return '-'
+        return None
 
 # basic simplifier:
 #------------------
@@ -1089,53 +1236,76 @@ def __mul__(self,op):
 op.limit(30)
 
 def symbols_of(e):
-   if e is None: return []
-   if e._is_cst: return []
-   if e._is_reg: return [e]
-   if e._is_mem: return symbols_of(e.a.base)
-   if e._is_eqn: return symbols_of(e.l)+symbols_of(e.r)
-   # tst/slc/comp cases:
-   if isinstance(e,tst): return sum(map(symbols_of,(e.tst,e.l,e.r)),[])
-   if isinstance(e,slc): return symbols_of(e.x)
-   return sum(map(symbols_of,e.parts.itervalues()),[])
+    if e is None: return []
+    if e._is_cst: return []
+    if e._is_reg: return [e]
+    if e._is_mem: return symbols_of(e.a.base)
+    if e._is_ptr: return symbols_of(e.base)
+    if e._is_eqn: return symbols_of(e.l)+symbols_of(e.r)
+    if e._is_tst: return sum(map(symbols_of,(e.tst,e.l,e.r)),[])
+    if e._is_slc: return symbols_of(e.x)
+    if e._is_cmp: return sum(map(symbols_of,e.parts.itervalues()),[])
+    if not e._is_def: return []
+    raise ValueError(e)
+
+def locations_of(e):
+    if e is None: return []
+    if e._is_cst: return []
+    if e._is_reg: return [e]
+    if e._is_mem: return [e]
+    if e._is_ptr: return [e]
+    if e._is_eqn: return locations_of(e.l)+locations_of(e.r)
+    if e._is_tst: return sum(map(locations_of,(e.tst,e.l,e.r)),[])
+    if e._is_slc: return locations_of(e.x)
+    if e._is_cmp: return sum(map(locations_of,e.parts.itervalues()),[])
+    if not e._is_def: return []
+    raise ValueError(e)
 
 def complexity(e):
-   return e.depth()+len(symbols_of(e))
+    factor = e.prop if e._is_eqn else 1
+    return (e.depth()+len(symbols_of(e)))*factor
+
+# helpers for unary expressions:
+def eqn1_helpers(e):
+    assert e.op.unary
+    if not e.r._is_def: return e.r
+    if e.r._is_eqn:
+        if e.r.op.unary:
+            ss = e.op*e.r.op
+            if   ss == '+': return e.r.r
+            elif ss == '-': return -e.r.r
+        elif e.op.symbol == '-':
+            if e.r.op.symbol in ('-','+'):
+                l = -e.r.l
+                r = e.r.r
+                return OP_ARITH[e.op*e.r.op](l,r)
+    return e
 
-def eqn_helpers(e):
-    if e.r is None: return e
-    if hasattr(e,'threshold'):
-        if e.l.depth()>e.threshold: e.l = top(e.l.size)
-        if e.r.depth()>e.threshold: e.r = top(e.r.size)
+# helpers for binary expressions:
+# reminder: be careful not to modify the internal structure of
+# e.l or e.r because these objects might be used also in other
+# expressions. See tests/test_cas_exp.py for details.
+def eqn2_helpers(e):
+    if e.r.depth()>e.threshold: e.r = top(e.r.size)
+    if e.l.depth()>e.threshold: e.l = top(e.l.size)
     if False in (e.l._is_def, e.r._is_def): return top(e.size)
-    if e.l._is_cst and e.r._is_cst:
-        return e.op(e.l,e.r)
-    if e.l is e.r:
-        if e.op.symbol in ('!=','<', '>' ): return bit0
-        if e.op.symbol in ('==','<=','>='): return bit1
-        if e.op.symbol is '-' : return cst(0,e.size)
-        if e.op.symbol is '^' : return cst(0,e.size)
-        if e.op.symbol is '&' : return e.l
-        if e.op.symbol is '|' : return e.l
-    if e.l._is_cst:
-        if e.l.value==0:
-            if e.op.symbol in ('*','&','>>','<<','>>>','<<<'):
-                return cst(0,e.size)
-            if e.op.symbol in ('|','^','+'):
-                return e.r
-        elif e.l.value==1 and e.op.symbol=='*':
-            return e.r
-        elif e.r._is_eqn:
-            xop = e.op*e.r.op
-            if xop:
-                if e.r.l._is_cst:
-                    cc = e.op(e.l,e.r.l)
-                    return op(xop, cc, e.r.r)
-                elif e.r.r._is_cst:
-                    cc = OP_ARITH[xop](e.l, e.r.r)
-                    e.l = cc
-                    e.r = e.r.l
-            return e
+    if e.l._is_eqn and e.l.r._is_cst:
+        assert e.l.op.unary==0
+        xop = e.op*e.l.op
+        if xop:
+            e.op,lop = e.l.op,e.op
+            lr,e.r   = e.r,e.l.r
+            e.l = lop(e.l.l,lr)
+    if e.r._is_eqn and e.r.op.unary:
+        if e.op.symbol == '+' and e.r.op.symbol == '-':
+            e.op = _operator('-')
+            e.r  = e.r.r
+    if e.r._is_eqn and e.r.r._is_cst:
+        xop = e.op*e.r.op
+        if xop:
+            e.l = e.op(e.l,e.r.l)
+            e.r = e.r.r
+            e.op = _operator(xop)
     if e.r._is_cst:
         if e.r.value==0:
             if e.op.symbol in ('|','^','+','-','>>','<<','>>>','<<<'):
@@ -1144,68 +1314,35 @@ def eqn_helpers(e):
                 return cst(0,e.size)
         elif e.r.value==1 and e.op.symbol in ('*','/'):
             return e.l
-        elif e.l._is_eqn:
+        if e.l._is_eqn:
             xop = e.op*e.l.op
             if xop:
-                if e.l.l._is_cst:
-                    cc = e.op(e.l.l,e.r)
-                    e.r = e.l.r
-                    e.op = e.l.op
-                    e.l = cc
-                elif e.l.r._is_cst:
+                if e.l.r._is_cst:
                     cc = OP_ARITH[xop](e.l.r,e.r)
                     e.op = e.l.op
-                    e.l = e.l.l
+                    if not e.l.op.unary: e.l = e.l.l
                     e.r = cc
+                return e
+        elif e.l._is_ptr:
+            if e.op.symbol in ('-','+'):
+                return ptr(e.l,disp=e.op(0,e.r.value))
+        elif e.l._is_cst:
+            return e.op(e.l,e.r)
+    if str(e.l)==str(e.r):
+        if e.op.symbol in ('!=','<', '>' ): return bit0
+        if e.op.symbol in ('==','<=','>='): return bit1
+        if e.op.symbol is '-' : return cst(0,e.size)
+        if e.op.symbol is '^' : return cst(0,e.size)
+        if e.op.symbol is '&' : return e.l
+        if e.op.symbol is '|' : return e.l
     return e
 
-# expression parser:
-#-------------------
-
-import pyparsing as pp
-
-#terminals:
-p_bottop  = pp.oneOf('_ T')
-p_symbol  = pp.Word(pp.alphas)
-p_extern  = pp.Suppress('@')+p_symbol
-p_cst     = pp.Suppress('0x')+pp.Combine(pp.Optional('-')+pp.Regex('[0-9a-f]+'))
-p_int     = pp.Word(pp.nums).setParseAction(lambda r:int(r[0]))
-p_slc     = '['+p_int.setResultsName('start')+':'+p_int.setResultsName('stop')+']'
-p_op1     = pp.oneOf('~ -')
-p_op2     = pp.oneOf('+ - / // * & | ^ << >> < > == <= >= != ? :')
-p_term    = p_bottop|p_symbol|p_extern|p_cst
-
-#nested expressions:
-p_expr    = pp.Forward()
-
-p_csl     = pp.Suppress('|')+p_slc+pp.Suppress('->')
-p_comp    = pp.Group(pp.Suppress('{')+pp.ZeroOrMore(p_expr)+pp.Suppress('| }'))
-p_mem     = 'M'+p_int+pp.Optional(p_symbol)
-
-operators = [(p_op1,1,pp.opAssoc.RIGHT),
-             (p_mem,1,pp.opAssoc.RIGHT),
-             (p_slc,1,pp.opAssoc.LEFT),
-             (p_op2,2,pp.opAssoc.LEFT),
-             (p_csl,1,pp.opAssoc.RIGHT),
-            ]
-
-p_expr   << pp.operatorPrecedence(p_term|p_comp,operators)
-
-p_bottop.setParseAction(lambda r: bot if r[0]=='_' else top)
-p_symbol.setParseAction(lambda r: reg(r[0]))
-p_extern.setParseAction(lambda r: ext(r[0]))
-p_cst.setParseAction(lambda r: int(r[0],16))
-p_slc.setParseAction(lambda r: slice(r['start'],r['stop']))
-
-
-def parse(s):
-    p_expr.parseString(s,True)
-
-def test_parser():
-    while 1:
-        try:
-            res = raw_input('amoco[test_parser]>')
-            E = p_expr.parseString(res,True)
-            print E
-        except EOFError:
-            return
+# separate expression e into (e' + C) with C cst offset.
+def extract_offset(e):
+    x = e.simplify()
+    if x._is_eqn and x.r._is_cst:
+        if e.op.symbol == '+':
+            return (x.l,x.r.v)
+        elif e.op.symbol == '-':
+            return (x.l,-x.r.v)
+    return (x,0)
diff --git a/amoco/cas/mapper.py b/amoco/cas/mapper.py
index db11f10..6960c8d 100644
--- a/amoco/cas/mapper.py
+++ b/amoco/cas/mapper.py
@@ -1,21 +1,35 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.logger import Log
 logger = Log(__name__)
 
-from .expressions import reg,cst,mem,comp,top
+from .expressions import *
 from amoco.cas.tracker import generation
-
+from amoco.system.core import MemoryMap
+from amoco.arch.core   import Bits
+
+# a mapper is a symbolic functional representation of the execution
+# of a set of instructions.
+# __map  : is an ordered list of mappings of expressions associated with a
+# location (a register or a memory pointer). The order is relevant only
+# to reflect the order of write-to-memory instructions.
+# __Mem  : is a memory model where symbolic memory pointers are using
+# individual separated zones.
 class mapper(object):
+    assume_no_aliasing = False
 
-    __slots__ = ['__map']
+    __slots__ = ['__map','__Mem']
 
-    # a mapper is inited with a list of instructions 
-    # provided by a disassembler (see x86)
+    # a mapper is inited with a list of instructions
+    # provided by a disassembler
     def __init__(self,instrlist=None):
-        self.__map  = generation()
+        self.__map = generation()
+        self.__map.lastw = 0
+        self.__Mem = MemoryMap()
         icache = []
         # if the __map needs to be inited before executing instructions
         # one solution is to prepend the instrlist with a function dedicated
@@ -27,27 +41,38 @@ def __init__(self,instrlist=None):
         for instr in icache:
             instr(self)
 
+    def __len__(self):
+        return len(self.__map)
+
     def __str__(self):
         return '\n'.join(["%s <- %s"%x for x in self])
 
     # list antecedent locations (used in the mapping)
     def inputs(self):
-        pass
+        return sum(map(locations_of,self.__map.itervalues()),[])
 
     # list image locations (modified in the mapping)
     def outputs(self):
-        pass
+        return sum(map(locations_of,self.__map.iterkeys()),[])
+
+    def rw(self):
+        r = filter(lambda x:x._is_mem, self.inputs())
+        w = filter(lambda x:x._is_ptr, self.outputs())
+        sr = ''.join(("r%d"%x.size for x in r))
+        sw = ''.join(("w%d"%self.__map[x].size for x in w))
+        return sr+sw
 
     def clear(self):
         self.__map.clear()
+        self.__Mem = MemoryMap()
+
+    def memory(self):
+        return self.__Mem
 
     # compare self with mapper m:
     def __cmp__(self,m):
         d = cmp(self.__map.lastdict(),m.__map.lastdict())
         return d
-        #if d<>0: return d
-        #shall we compare also the order ?
-        #return cmp(self.__order,m.__order)
 
     # iterate over ordered correspondances:
     def __iter__(self):
@@ -59,16 +84,54 @@ def R(self,x):
         return self.__map.get(x,x)
 
     # get a memory location value (fetch) :
+    # k must be mem expressions
     def M(self,k):
         if k.a.base._is_ext: return k.a.base
-        x = self.__map.get(k.a,k)
-        if x.size<k.size:
-            logger.warning('read memory out of bound')
-            c = comp(k.size)
-            c[0:x.size] = x
-            c[x.size:k.size] = top(k.size-x.size)
-            x = c
-        return x[0:k.size]
+        n = self.aliasing(k)
+        if n>0:
+            f = lambda e:e[0]._is_ptr
+            items = filter(f,self.__map.items()[0:n])
+            res = mem(k.a,k.size,mods=items)
+        else:
+            res = self._Mem_read(k.a,k.length)
+            res.sf = k.sf
+        return res
+
+    def aliasing(self,k):
+        if self.assume_no_aliasing: return 0
+        K = self.__map.keys()
+        n = self.__map.lastw
+        try:
+            i = K.index(k.a)
+        except ValueError:
+            # k has never been written to explicitly
+            # but it is maybe in a zone that was written to
+            i = -1
+        for l in K[i+1:n]:
+            if not l._is_ptr: continue
+            if l.base==k.a.base: continue
+            return n
+        return 0
+
+    # read MemoryMap and return the result as an expression:
+    def _Mem_read(self,a,l):
+        try:
+            res = self.__Mem.read(a,l)
+        except MemoryError,e: # no zone for location a;
+            res = [top(l*8)]
+        if exp._endian==-1: res.reverse()
+        P = []
+        cur = 0
+        for p in res:
+            plen = len(p)
+            if isinstance(p,str): p = cst(Bits(p[::c._endian],bitorder=1).int(),plen*8)
+            elif not p._is_def: p = mem(a,p.size,disp=cur)
+            P.append(p)
+            cur += plen
+        return composer(P)
+
+    def _Mem_write(self,a,v):
+        self.__Mem.write(a,v)
 
     # just a convenient wrapper around M/R:
     def __getitem__(self,k):
@@ -79,47 +142,68 @@ def __getitem__(self,k):
     # define image v of antecedent k:
     def __setitem__(self,k,v):
         if k._is_ptr:
-            self.__map[k] = v
-            return
-        if k.size<>v.size: raise ValueError('size mismatch')
-        try:
-            loc = k.addr(self)
-        except TypeError:
-            logger.error('setitem ignored (invalid left-value expression)')
-            return
+            loc = k
+        else:
+            if k.size<>v.size:
+                raise ValueError('size mismatch')
+            try:
+                loc = k.addr(self)
+            except TypeError:
+                logger.error('setitem ignored (invalid left-value expression)')
+                return
         if k._is_slc and not loc._is_reg:
             raise ValueError('memory location slc is not supported')
-        elif k._is_mem:
+        elif k._is_ptr or k._is_mem:
             r = v
+            self.__map.lastw = len(self.__map)+1
         else:
             r = self.R(loc)
             if r._is_reg:
                 r = comp(loc.size)
                 r[0:loc.size] = loc
             pos = k.pos if k._is_slc else 0
-            r[pos:pos+k.size] = v
+            r[pos:pos+k.size] = v.simplify()
+        if loc._is_ptr:
+            oldr = self.__map.get(loc,None)
+            if oldr is not None and oldr.size>r.size:
+                r = composer([r,oldr[r.size:oldr.size]])
+            self._Mem_write(loc,r)
         self.__map[loc] = r
 
     def update(self,instr):
         instr(self)
 
     # eval of x in this map:
-    # note the difference between a mapper[mem(x)] and mapper(mem(x)):
-    # in the call form, x is first evaluated so that it uses "x_out"
-    # whereas the item form uses "x_in".
+    # note the difference between a mapper[mem(p)] and mapper(mem(p)):
+    # in the call form, p is first evaluated so that the target address
+    # is the expression of p "after execution" whereas the indexing form
+    # uses p as an input (i.e "before execution") expression.
+    # example, suppose str(mapper) is:
+    #   (esp)   <- eax
+    #       esp <- { | [0:32]->(esp-0x4) | }
+    #   (esp-4) <- ebx
+    # then:
+    # mapper[mem(esp)] returns eax (what is pointed by "esp before execution")
+    # mapper(mem(esp)) returns ebx (what is pointed by "esp after execution")
     def __call__(self,x):
         return x.eval(self)
 
     def restruct(self):
-        pass
+        self.__Mem.restruct()
 
     # return a new mapper instance where all input locations have
     # been replaced by there corresponding values in m.
     # example:
     # in self: eax <- ebx
     # in m   : ebx <- 4
+    #          edx <- (ecx+1)
     # =>
-    # in mm  : eax <- 4
+    # result : eax <- 4
+    # The compose flag indicates whether the resulting mapper contains
+    # all mappings of m or only mappings of self. For example, if
+    # we use compose=True we get instead:
+    # result : eax <- 4
+    #          edx <- (ecx+1)
     def eval(self,m,compose=False):
         mm = mapper() if not compose else m.use()
         for loc,v in self:
@@ -128,24 +212,33 @@ def eval(self,m,compose=False):
             mm[loc] = m(v)
         return mm
 
-    # composition operator (°) returns a new mapper
+    # composition operator returns a new mapper
     # corresponding to function x -> self(m(x))
     def rcompose(self,m):
         return self.eval(m,compose=True)
 
-    # self << m : composition (self°m)
+    # self << m : composition (self(m))
     def __lshift__(self,m):
         return self.rcompose(m)
 
-    # self >> m : composition (m°self)
+    # self >> m : composition (m(self))
     def __rshift__(self,m):
         return m.rcompose(self)
 
     def interact(self):
-            pass
-
-    def use(self,**kargs):
+        raise NotImplementedError
+
+    # return a mapper corresponding to the evaluation of the current mapper
+    # where all key symbols found in kargs are replaced by their values in
+    # all expressions. The kargs "size=value" allows for adjusting symbols/values
+    # sizes for all arguments.
+    # if kargs is empty, a copy of the result is just a copy of current mapper.
+    def use(self,*args,**kargs):
         m = mapper()
-        for k,v in kargs.iteritems():
-            m[reg(k)] = cst(v)
+        for loc,v in args:
+            m[loc] = v
+        if len(kargs)>0:
+            argsz = kargs.get('size',32)
+            for k,v in kargs.iteritems():
+                m[reg(k,argsz)] = cst(v,argsz)
         return self.eval(m)
diff --git a/amoco/cas/parser.py b/amoco/cas/parser.py
new file mode 100644
index 0000000..af9f0fb
--- /dev/null
+++ b/amoco/cas/parser.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+
+# This code is part of Amoco
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
+# published under GPLv2 license
+
+from amoco.logger import Log
+logger = Log(__name__)
+
+from .expressions import *
+
+# expression parser:
+#-------------------
+
+import pyparsing as pp
+
+#terminals:
+p_bottop  = pp.oneOf('⊥ T')
+p_symbol  = pp.Word(pp.alphas)
+p_extern  = pp.Suppress('@')+p_symbol
+p_cst     = pp.Suppress('0x')+pp.Combine(pp.Optional('-')+pp.Regex('[0-9a-f]+'))
+p_int     = pp.Word(pp.nums).setParseAction(lambda r:int(r[0]))
+p_slc     = '['+p_int.setResultsName('start')+':'+p_int.setResultsName('stop')+']'
+p_op1     = pp.oneOf('~ -')
+p_op2     = pp.oneOf('+ - / // * & | ^ << >> < > == <= >= != ? :')
+p_term    = p_bottop|p_symbol|p_extern|p_cst
+
+#nested expressions:
+p_expr    = pp.Forward()
+
+p_csl     = pp.Suppress('|')+p_slc+pp.Suppress('->')
+p_comp    = pp.Group(pp.Suppress('{')+pp.ZeroOrMore(p_expr)+pp.Suppress('| }'))
+p_mem     = 'M'+p_int+pp.Optional(p_symbol)
+
+operators = [(p_op1,1,pp.opAssoc.RIGHT),
+             (p_mem,1,pp.opAssoc.RIGHT),
+             (p_slc,1,pp.opAssoc.LEFT),
+             (p_op2,2,pp.opAssoc.LEFT),
+             (p_csl,1,pp.opAssoc.RIGHT),
+            ]
+
+p_expr   << pp.operatorPrecedence(p_term|p_comp,operators)
+
+p_bottop.setParseAction(lambda r: bot if r[0]=='_' else top)
+p_symbol.setParseAction(lambda r: reg(r[0]))
+p_extern.setParseAction(lambda r: ext(r[0]))
+p_cst.setParseAction(lambda r: int(r[0],16))
+p_slc.setParseAction(lambda r: slice(r['start'],r['stop']))
+
+
+def parse(s):
+    return p_expr.parseString(s,True)
+
+def test_parser():
+    while 1:
+        try:
+            res = raw_input('amoco[test_parser]>')
+            E = p_expr.parseString(res,True)
+            print E
+        except EOFError:
+            return
diff --git a/amoco/cas/smt.py b/amoco/cas/smt.py
new file mode 100644
index 0000000..cbc3371
--- /dev/null
+++ b/amoco/cas/smt.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+
+# This code is part of Amoco
+# Copyright (C) 2015 Axel Tillequin (bdcht3@gmail.com)
+# published under GPLv2 license
+
+from amoco.logger import Log
+logger = Log(__name__)
+
+from .expressions import *
+from .mapper import mapper
+
+try:
+    import z3
+except ImportError:
+    logger.info('z3 package not found => solve() method is not implemented')
+    class solver(object):
+        def __init__(self,eqns=None):
+            raise NotImplementedError
+    has_solver = False
+else:
+    logger.info('z3 package imported')
+    class solver(object):
+        def __init__(self,eqns=None):
+            self.eqns = []
+            self.locs = []
+            self.solver = z3.Solver()
+            if eqns: self.add(eqns)
+        def add(self,eqns):
+            for e in eqns:
+                assert e._is_eqn
+                self.eqns.append(e)
+                self.solver.add(e.to_smtlib())
+                self.locs.extend(locations_of(e))
+        def check(self):
+            return self.solver.check()
+        def get_model(self,eqns=None):
+            if eqns is not None: self.add(eqns)
+            if self.check() == z3.sat:
+                r = self.solver.model()
+                return r
+        def get_mapper(self,eqns=None):
+            r = self.get_model(eqns)
+            if r is not None:
+                return model_to_mapper(r,self.locs)
+    has_solver = True
+
+def cst_to_z3(e):
+    return z3.BitVecVal(e.v,e.size)
+
+def cfp_to_z3(e):
+    return z3.RealVal(e.v)
+
+def reg_to_z3(e):
+    return z3.BitVec(e.ref,e.size)
+
+def comp_to_z3(e):
+    e.simplify()
+    parts = [x.to_smtlib() for x in e]
+    parts.reverse()
+    return z3.Concat(*parts)
+
+def slc_to_z3(e):
+    x = e.x.to_smtlib()
+    return z3.Extract(e.pos+e.size-1,e.pos,x)
+
+def ptr_to_z3(e):
+    return e.base.to_smtlib()+e.disp
+
+def mem_to_z3(e):
+    e.simplify()
+    M = z3.Array('M',z3.BitVecSort(e.a.size),z3.BitVecSort(8))
+    p = e.a.to_smtlib()
+    b = []
+    for i in range(0,e.length):
+        b.insert(0,M[p+i])
+    if e._endian==-1: b.reverse() # big-endian case
+    return z3.Concat(*b)
+
+def tst_to_z3(e):
+    e.simplify()
+    return z3.If(e.tst.to_smtlib(), e.l.to_smtlib(), e.r.to_smtlib())
+
+def op_to_z3(e):
+    e.simplify()
+    l,r = e.l,e.r
+    op = e.op
+    if   op.symbol == '>>' : op = z3.LShR
+    elif op.symbol == '//' : op = operator.rshift
+    elif op.symbol == '>>>': op = z3.RotateRight
+    elif op.symbol == '<<<': op = z3.RotateLeft
+    z3l = l.to_smtlib()
+    if r is None: return op(z3l)
+    z3r = r.to_smtlib()
+    return op(z3l,z3r)
+
+cst.to_smtlib  = cst_to_z3
+cfp.to_smtlib  = cfp_to_z3
+reg.to_smtlib  = reg_to_z3
+comp.to_smtlib = comp_to_z3
+slc.to_smtlib  = slc_to_z3
+ptr.to_smtlib  = ptr_to_z3
+mem.to_smtlib  = mem_to_z3
+tst.to_smtlib  = tst_to_z3
+op.to_smtlib   = op_to_z3
+
+def to_smtlib(e):
+    return e.to_smtlib()
+
+def model_to_mapper(r,locs):
+    m = mapper()
+    mlocs = []
+    for l in locs:
+        if l._is_mem:
+            mlocs.append(l)
+        else:
+            m[l] = cst(r.eval(l.to_smtlib()).as_long(),l.size)
+    for l in mlocs:
+        m[l] = cst(r.eval(l.to_smtlib()).as_long(),l.size)
+    return m
diff --git a/amoco/cas/tracker.py b/amoco/cas/tracker.py
index 541f2e8..2ecb33d 100644
--- a/amoco/cas/tracker.py
+++ b/amoco/cas/tracker.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from collections import OrderedDict
diff --git a/amoco/cas/utils.py b/amoco/cas/utils.py
index 9fb65ce..8c3bee1 100644
--- a/amoco/cas/utils.py
+++ b/amoco/cas/utils.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 
diff --git a/amoco/cfg.py b/amoco/cfg.py
index 8188d5a..e8ca677 100644
--- a/amoco/cfg.py
+++ b/amoco/cfg.py
@@ -1,14 +1,21 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 # we wrap the grandalf classes here
 
+from amoco.logger import Log
+logger = Log(__name__)
+
 from grandalf.graphs import Vertex,Edge,Graph
 
 from amoco.system.core import MemoryZone
 
 #------------------------------------------------------------------------------
+# node class is a graph vertex that embeds a block instance and inherits its
+# name (default to the address of the block).
 class node(Vertex):
     # protect from None data node:
     def __init__(self,acode):
@@ -28,18 +35,18 @@ def __len__(self):
         return self.data.length
 
     def __getitem__(self,i):
-        self.data = self.data.__getitem__(i)
+        res = node(self.data.__getitem__(i))
         return res
 
 #------------------------------------------------------------------------------
+# link is a direct graph edge between two nodes.
 class link(Edge):
-    def __init__(self,orig,dest):
-        Edge.__init__(self,orig,dest)
 
     def __str__(self):
         n0 = repr(self.v[0])
         n1 = repr(self.v[1])
-        return "%s -> %s"%(n0,n1)
+        c = '?' if self.data else '-'
+        return "%s -%s-> %s"%(n0,c,n1)
 
     def __repr__(self):
         return '<%s [%s] at 0x%x>'%(self.__class__.__name__,self.name,id(self))
@@ -54,10 +61,12 @@ def __cmp__(self,e):
         return cmp(self.name,e.name)
 
 #------------------------------------------------------------------------------
-class func(Graph):
+# graph is a Graph that represents a set of functions as individual components
+class graph(Graph):
 
     def __init__(self,*args,**kargs):
         self.support = MemoryZone()
+        self.overlay = None
         Graph.__init__(self,*args,**kargs)
 
     def spool(self,n=None):
@@ -66,38 +75,65 @@ def spool(self,n=None):
             if len(v.e_out())==0: L.append(v)
         return L
 
-    def add_vertex(self,v):
+    def __cut_add_vertex(self,v,mz,vaddr,mo):
+        oldnode = mo.data.val
+        if oldnode==v: return 0
+        # so v cuts an existing node/block:
+        # repair oldblock and fix self
+        childs = oldnode.N(+1)
+        oldblock = oldnode.data
+        # if vaddr is aligned with an oldblock instr, cut it:
+        # this reduces oldblock up to vaddr if the cut is possible.
+        cutdone = oldblock.cut(vaddr)
+        if not cutdone:
+            if mz is self.overlay:
+                logger.warning("double overlay block at %s"%vaddr)
+                Graph.add_vertex(self,v)
+                v.data.misc['double-overlay'] = 1
+                return 1
+            overlay = self.overlay or MemoryZone()
+            return self.add_vertex(v,support=overlay)
+        else:
+            Graph.add_vertex(self,v) # ! avoid recursion for add_edge
+            mz.write(vaddr,v)
+            self.add_edge(link(oldnode,v))
+            for n in childs:
+                self.add_edge(link(v,n))
+                self.remove_edge(oldnode.e_to(n))
+        return 1
+
+    def add_vertex(self,v,support=None):
+        if len(v)==0: return Graph.add_vertex(self,v)
         vaddr=v.data.address
-        i = self.support.locate(vaddr)
+        if support is None:
+            support=self.support
+        else:
+            logger.verbose("add overlay block at %s"%vaddr)
+            self.overlay = support
+        i = support.locate(vaddr)
         if i is not None:
-            mo = self.support._map[i]
+            mo = support._map[i]
             if vaddr in mo:
-                oldnode = mo.data.val
-                if oldnode==v: return 0
-                # so v cuts an existing node/block:
-                # repair oldblock and fix self
-                childs = oldnode.N(+1)
-                oldblock = oldnode.data
-                oldblock.cut(vaddr)
-                Graph.add_vertex(self,v) # ! avoid recursion for add_edge
-                self.support.write(vaddr,v)
-                self.add_edge(link(oldnode,v))
-                for n in childs:
-                    self.add_edge(link(v,n))
-                    self.remove_edge(oldnode.e_to(n))
-                return 1
+                return self.__cut_add_vertex(v,support,vaddr,mo)
             else: #v does not cut an existing block,
                 try: # but may swallow next one...
-                    nextmo = self.support._map[i+1]
+                    nextmo = support._map[i+1]
                 except IndexError:
                     # no more nodes here so back to default case:
                     pass
                 else:
                     nextnode = nextmo.data.val
-                    if vaddr+len(v)>=nextnode.data.address:
-                        v.data.cut(nextnode.data.address)
+                    if vaddr+len(v)>nextnode.data.address:
+                        cutdone = v.data.cut(nextnode.data.address)
+                        if not cutdone:
+                            if support is self.overlay:
+                                logger.warning("double overlay block at %s"%vaddr)
+                                Graph.add_vertex(self,v)
+                                v.data.misc['double-overlay'] = 1
+                                return 1
+                            support = self.overlay or MemoryZone()
         Graph.add_vertex(self,v) # before support write !!
-        self.support.write(vaddr,v)
+        support.write(vaddr,v)
         return 1
 
     def get_node(self,name):
diff --git a/amoco/code.py b/amoco/code.py
index 7b88032..524f5d7 100644
--- a/amoco/code.py
+++ b/amoco/code.py
@@ -1,15 +1,18 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from collections import defaultdict
 from amoco.cas.mapper import mapper
 
 from amoco.config import conf
+from amoco.logger import Log
+logger = Log(__name__)
 
 #------------------------------------------------------------------------------
-# A block instance is a 'continuous' (atomic) set of instructions.
-# It is build from a bytecode 
+# A block instance is a 'continuous' sequence of instructions.
 #------------------------------------------------------------------------------
 class block(object):
     __slots__=['_map','instr','_name','misc']
@@ -21,13 +24,13 @@ def __init__(self, instrlist, name=None):
         self.instr = instrlist
         self._name = name
         self.misc  = defaultdict(lambda :0)
-        # translate into a interpreter:
-        #acode.__init__(self,mapper(self.instr))
 
     @property
     def map(self):
         if self._map is None:
             self._map = mapper(self.instr)
+        if self.misc['func']:
+            return self.misc['func'].map
         return self._map
     @map.setter
     def map(self,m):
@@ -57,23 +60,32 @@ def __getitem__(self,i):
         pos = [0]
         for i in self.instr:
             pos.append(pos[-1]+i.length)
-        ista = pos.index(sta)
-        isto = pos.index(sto)
+        try:
+            ista = pos.index(sta)
+            isto = pos.index(sto)
+        except ValueError:
+            logger.warning("can't slice block: indices must match instruction boudaries")
+            return None
         I = self.instr[ista:isto]
         if len(I)>0:
             return block(self.instr[ista:isto])
 
+    # cut the block at given address will remove instructions after this address,
+    # which needs to be aligned with instructions boundaries. The effect is thus to
+    # reduce the block size. The returned value is the number of instruction removed.
     def cut(self,address):
         I = [i.address for i in self.instr]
         try:
             pos = I.index(address)
         except ValueError:
-            pass
+            logger.warning("invalid attempt to cut block %s at %s"%(self.name,address))
+            return 0
         else:
             self.instr = self.instr[:pos]
             self.map.clear()
             for i in self.instr: i(self.map)
             # TODO: update misc annotations too
+            return len(I)-pos
 
     def __str__(self):
         L = []
@@ -103,19 +115,62 @@ def __cmp__(self,b):
 
 
 #------------------------------------------------------------------------------
-# A func instance is an acode where the map is build from a cfg by
-# unions and fixpoints on (sub)maps contained in this cfg.
+# func is a cfg connected component that generally represents a called function
+# It appears in the other graphs whenever the function is called and provides a
+# synthetic map that captures the semantics of the function.
 #------------------------------------------------------------------------------
-class func(object):
-    __slots__ = ['name','cfg']
-    def __init__(self,name,cfg):
-        self.name = name
-        self.cfg = cfg
+class func(block):
+    __slots__ = ['cfg']
+
+    # the init of a func takes a core_graph and creates a map of it:
+    def __init__(self, g=None, name=None):
+        self._map  = None
+        self.cfg = g
+        self.instr = []
+        # base/offset need to be defined before code (used in setcode)
+        self._name = name
+        self.misc  = defaultdict(lambda :0)
+
+    @property
+    def address(self):
+        return self.blocks[0].address
+
+    @property
+    def blocks(self):
+        V = self.cfg.sV.o
+        return [n.data for n in V]
+
+    @property
+    def support(self):
+        smin = self.address
+        smax = max((b.address+b.length for b in self.blocks))
+        return (smin,smax)
+
+    def makemap(self):
+        raise NotImplementedError
 
     def __str__(self):
-        s = '# --- func %s ---\n%s' % (self.name,str(self.cfg))
-        return s
+        return "%s{%d}"%(self.name,len(self.blocks))
 
+#------------------------------------------------------------------------------
+# xfunc represents external functions. It is associated with an ext expression.
+# The map provided by an xfunc instance is constructed by executing the stub
+# defined in the ext expression.
+#------------------------------------------------------------------------------
+class xfunc(object):
+    __slots__ = ['map','name','address','length','misc']
+
+    def __init__(self, x):
+        self.map = mapper()
+        x(self.map)
+        self.name = str(x)
+        self.address = x
+        self.length = 0
+        self.misc  = defaultdict(lambda :0)
+
+    @property
+    def support(self):
+        return (self.address,self.address)
 
 #------------------------------------------------------------------------------
 class tag:
diff --git a/amoco/config.py b/amoco/config.py
index ffff642..6a30dbb 100644
--- a/amoco/config.py
+++ b/amoco/config.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 from collections import defaultdict
 
 try:
@@ -15,7 +17,7 @@
     conf.set('block', 'bytecode', 'True')
     conf.set('block', 'padding', '4')
     conf.add_section('log')
-    conf.set('log', 'level', '20')
+    conf.set('log', 'level', 'ERROR')
     conf.read([os.path.expanduser('~/.amocorc')])
 else:
     conf = None
@@ -58,6 +60,6 @@ def setdefaults(self):
             self.mset('block', header=True)
             self.mset('block', bytecode=True)
             self.mset('block', padding=4)
-            self.mset('log', level=20)
+            self.mset('log', level='ERROR')
 
     conf = DefaultConf()
diff --git a/amoco/logger.py b/amoco/logger.py
index c8b3a34..2428036 100644
--- a/amoco/logger.py
+++ b/amoco/logger.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import logging
@@ -13,9 +15,25 @@
 
 try:
     from amoco import conf
-    default_level  = conf.getint('log','level')
+    try:
+        default_level = conf.getint('log','level')
+        if default_level is None: default_level = 0
+    except ValueError:
+        default_level = logging._levelNames.get(conf.get('log','level'),0)
+    if conf.has_option('log','file'):
+        logfilename  = conf.get('log','file')
+    else:
+        logfilename  = None
 except ImportError:
     default_level  = logging.ERROR
+    logfilename = None
+
+if logfilename:
+    logfile = logging.FileHandler(logfilename,mode='w')
+    logfile.setFormatter(default_format)
+    logfile.setLevel(logging.DEBUG)
+else:
+    logfile = None
 
 class Log(logging.Logger):
     def __init__(self,name,handler=logging.StreamHandler()):
@@ -23,11 +41,15 @@ def __init__(self,name,handler=logging.StreamHandler()):
         handler.setFormatter(default_format)
         self.addHandler(handler)
         self.setLevel(default_level)
+        if logfile: self.addHandler(logfile)
         self.register(name,self)
 
     def verbose(self,msg,*args,**kargs):
         return self.log(VERBOSE,msg,*args,**kargs)
 
+    def setLevel(self,lvl):
+        self.handlers[0].setLevel(lvl)
+
     @classmethod
     def register(cls,name,self):
         if name in self.loggers:
@@ -47,4 +69,13 @@ def set_log_all(level):
     for l in Log.loggers.itervalues():
         l.setLevel(level)
 
+def set_log_file(filename):
+    if logfile is not None:
+        logfile.close()
+    logfile = logging.FileHandler(logfilename,mode='w')
+    logfile.setFormatter(default_format)
+    logfile.setLevel(logging.DEBUG)
+    for l in Log.loggers.itervalues():
+        l.addHandler(logfile)
+
 Log.loggers = {}
diff --git a/amoco/main.py b/amoco/main.py
index 6672b8d..2fe3489 100644
--- a/amoco/main.py
+++ b/amoco/main.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.logger import Log, set_debug,set_quiet,set_log_all
@@ -11,15 +13,18 @@
 
 from amoco.arch.core import INSTRUCTION_TYPES
 
-# linear sweep based analysis.
+# linear sweep based analysis:
+# fast & dumb way of disassembling prog,
+# but provides iterblocks() for all parent classes.
 class lsweep(object):
     __slots__ = ['prog','G']
     def __init__(self,prog):
         self.prog = prog
-        self.G = {}
+        self.G = cfg.graph()
 
     # iterator over linearly sweeped instructions
     # starting at address loc (defaults to entrypoint).
+    # If not None, loc argument should be a cst object.
     def sequence(self,loc=None):
         p = self.prog
         if loc is None:
@@ -34,6 +39,9 @@ def sequence(self,loc=None):
             loc += i.length
             yield i
 
+    # iterator over basic blocks using the instruction.type attribute
+    # to detect the end of block (type_control_flow). The returned block
+    # object is enhanced with plateform-specific infos (see block.misc).
     def iterblocks(self,loc=None):
         inblock = (lambda i: INSTRUCTION_TYPES[i.type]!='control_flow')
         l = []
@@ -60,115 +68,225 @@ def iterblocks(self,loc=None):
             b = code.block(l)
             yield self.prog.codehelper(block=b)
 
+    # getblock is a handy wrapper of iterblocks to
+    # return the block located at address val provided as Python Int.
     def getblock(self,val):
         p = self.prog
         target = p.cpu.cst(val,p.PC().size)
         return next(self.iterblocks(target))
 
+    # poorman's cfg builder that only groups blocks that belong to the
+    # same function based on FUNC_START/FUNC_STOP tags heuristics.
     def getcfg(self,loc=None):
-        F = []
+        nprev = None
         for b in self.iterblocks(loc):
+            n = cfg.node(b)
             if b.misc[code.tag.FUNC_START]:
-                f = cfg.func()
-            if b.misc[code.tag.FUNC_END]:
-                F.append(f)
-            try:
-                f.add_vertex(cfg.node(b))
-            except NameError:
-                logger.warning('linear sweep orfan block %s'%b.name)
-                F.append(b)
-        return F
+                nprev = None
+            if nprev is None:
+                self.G.add_vertex(n)
+            else:
+                self.G.add_edge(cfg.link(nprev,n))
+            nprev = n
 
 # -----------------------------------------------------------------------------
-# fast forward based analysis
-# follows PC expression evaluated within a single block only. 
+class _target(object):
+    def __init__(self,cst,parent,econd=None):
+        self.cst = cst
+        self.parent = parent
+        self.econd = econd
+
+    def expand(self):
+        if self.cst._is_ext:
+            return [self]
+        if self.cst._is_cst:
+            return [self]
+        if self.cst._is_tst:
+            ltrue  = self.select(True).expand()
+            lfalse = self.select(False).expand()
+            return ltrue+lfalse
+        return []
+
+    def select(self,side):
+        assert self.cst._is_tst
+        x = self.cst.l if side is True else self.cst.r
+        econd = self.econd or []
+        econd.append(self.cst.tst==side)
+        return _target(x,self.parent,econd)
+
+
+# -----------------------------------------------------------------------------
+# fast forward based analysis:
+# follows PC expression evaluated within a single block only.
 # exploration goes forward until expressions are not cst.
 class fforward(lsweep):
     policy = {'depth-first': True, 'branch-lazy': True}
 
     def init_spool(self,loc):
-        return [(loc,None)]
+        return [_target(loc,None)]
+
+    def update_spool(self,spool,vtx,parent):
+        T = self.get_targets(vtx,parent)
+        if len(T)>0:
+            spool.extend(T)
+            return
+        err = '%s analysis stopped at %s'%(self.__class__.__name__,vtx)
+        logger.info(err)
+        vtx.data.misc['tbc'] = 1
 
-    def get_target(self,blk,withmap):
-        # withmap unused in fforward
+    # compute expression of target address (PC) in node.data.map
+    def get_targets(self,node,parent):
+        blk = node.data
         m = code.mapper()
         pc = self.prog.PC()
         m[pc] = blk.address
-        target = (blk.map(pc)).eval(m)
-        return target.simplify()
+        pc = (blk.map(pc)).eval(m)
+        return _target(pc,node).expand()
+
+    def add_root_node(self,vtx):
+        vtx.data.misc[code.tag.FUNC_START]=1
+        vtx.data.misc['callers'] = []
+        self.G.add_vertex(vtx)
+        logger.verbose('root node %s added'%vtx.name)
+
+    def add_call_node(self,vtx,parent,econd):
+        b = vtx.data
+        b.misc[code.tag.FUNC_START]+=1
+        parent.data.misc[code.tag.FUNC_CALL] += 1
+        try:
+            b.misc['callers'] += [parent]
+        except TypeError:
+            b.misc['callers']  = [parent]
+        if b.misc['func']:
+            logger.verbose('function %s called'%b.misc['func'])
+            vtx = cfg.node(b.misc['func'])
+            e = cfg.link(parent,vtx,data=econd)
+            self.G.add_edge(e)
+        else:
+            self.G.add_vertex(vtx)
+            logger.verbose('block %s starts a new cfg component'%vtx.name)
+        return vtx
+
+    def check_ext_target(self,t,spool):
+        if t.cst is None: return False
+        if t.cst._is_ext:
+            b = code.xfunc(t.cst)
+            vtx = cfg.node(b)
+            e = cfg.link(t.parent,vtx,data=t.econd)
+            self.G.add_edge(e)
+            self.update_spool(spool,vtx,t.parent)
+            return True
+        return False
 
     # generic 'forward' analysis explorer.
     # default explore policy is depth-first search (use policy=0 for breadth-first search.)
-    # return instructions are not followed (see backward analysis).
+    # return instructions are not followed (see lbackward analysis).
     def getcfg(self,loc=None):
+        G = self.G
+        # spool is the list of (target,parent) addresses to be analysed
         spool = self.init_spool(loc)
+        # order is the index to pop elements from spool
         order = -1 if self.policy['depth-first'] else 0
+        # lazy is a flag to fallback to linear sweep
         lazy  = self.policy['branch-lazy']
-        F = cfg.func()
+        # proceed with exploration of every spool element:
         while len(spool)>0:
-            current,parent = spool.pop(order)
-            for b in self.iterblocks(loc=current):
-                err = '%s analysis failed at block %s'%(self.__class__.__name__,b.name)
-                sta,sto = b.support
-                vtx = cfg.node(b)
-                if vtx in F.V():
-                    e = cfg.link(parent,F.get_node(vtx.name))
-                    F.add_edge(e)
-                    logger.verbose('edge %s added'%e)
-                    break
-                if parent is None or (parent.data.address is None):
-                    b.misc[code.tag.FUNC_START]=1
-                    F.add_vertex(vtx)
-                    logger.verbose('root node %s added'%vtx.name)
+            t = spool.pop(order)
+            parent = t.parent
+            econd  = t.econd
+            if self.check_ext_target(t,spool): continue
+            for b in self.iterblocks(loc=t.cst):
+                vtx = G.get_node(b.name) or cfg.node(b)
+                b = vtx.data
+                # if block is a FUNC_START, we add it as a new graph component (no link to parent),
+                # otherwise we add the new (parent,vtx) edge.
+                if parent is None:
+                    self.add_root_node(vtx)
+                elif parent.data.misc[code.tag.FUNC_CALL]:
+                    vtx = self.add_call_node(vtx,parent,econd)
                 else:
-                    if b.misc[code.tag.FUNC_START] and parent.data.misc[code.tag.FUNC_CALL]:
-                        b.misc[code.tag.FUNC_START]+=1
-                        F.add_vertex(vtx)
-                        logger.verbose('function node %s added'%vtx.name)
-                    else:
-                        e = cfg.link(parent,vtx)
-                        F.add_edge(e)
-                        logger.verbose('edge %s added'%e)
-                # continue and update spool...
-                target = self.get_target(b,withmap=parent)
+                    e = cfg.link(parent,vtx,data=econd)
+                    G.add_edge(e)
+                    logger.verbose('edge %s added'%e)
+                # if vtx was visited before targets have been added already:
+                if len(vtx.e_in())>1: break
+                # now we try to populate spool with target addresses of current block:
+                self.update_spool(spool,vtx,parent)
+                if not lazy or b.misc[code.tag.FUNC_END]: break
+                logger.verbose("lsweep fallback at %s"%b.name)
                 parent = vtx
-                if target==sto:
-                    continue
-                elif target._is_cst:
-                    spool.append((target,parent))
-                    if not lazy: break
-                elif target._is_tst:
-                    t1 = target.l
-                    t2 = target.r
-                    if t1._is_cst:
-                        spool.append((t1,parent))
-                    else:
-                        logger.info(err+' (true branch)')
-                    if t2._is_cst:
-                        spool.append((t2,parent))
-                    else:
-                        logger.info(err+' (false branch)')
-                    break
-                else:
-                    logger.info(err)
-                    if not lazy: break
-        return F
+                econd  = None
+        return G
 
 # -----------------------------------------------------------------------------
-# link forward based analysis
+# link forward based analysis:
 # follows PC expression evaluated with parent block mapping.
-# exploration goes forward until expressions are not cst.
+# Exploration goes forward until expressions are not cst.
 class lforward(fforward):
     policy = {'depth-first': True, 'branch-lazy': False}
 
-    def init_spool(self,loc):
-        return [(loc,cfg.node(code.block([])))]
+    def get_targets(self,node,parent):
+        blk = node.data
+        pc = self.prog.PC()
+        if parent is None:
+            pc = blk.map.use((pc,blk.address))(pc)
+        else:
+            m = parent.data.map.use((pc,parent.data.address)) # work on copy
+            m[pc] = blk.address
+            pc = m(blk.map(pc))
+        return _target(pc,node).expand()
+
 
-    def get_target(self,blk,withmap):
-        # use withmap for blk.map eval:
-        m = withmap.data.map.use() #work on copy
+# -----------------------------------------------------------------------------
+# fast backward based analysis:
+# a generalisation of link forward where pc is evaluated backwardly by taking
+# the first-parent-node path until no parent exists (entry of a function)
+# fbackward is the first class to instanciate code.func objects.
+# The 'frame_aliasing' policy indicates wether memory aliasing of pc expression
+# outside of the function frame can occur or if the frame is assumed to be clean.
+# Default frame-aliasing is set to False (assume no aliasing) otherwise any
+# function that writes in memory results in potential aliasing (say for an arch
+# that uses a memory stack for storing return addresses).
+class fbackward(lforward):
+    policy = {'depth-first': True, 'branch-lazy': False, 'frame-aliasing':False}
+
+    def get_targets(self,node,parent):
         pc = self.prog.PC()
-        m[pc] = blk.address
-        target = (blk.map(pc)).eval(m)
-        return target.simplify()
+        n = node
+        mpc = pc
+        while True:
+            m = n.data.map.use((pc,n.data.address))
+            mpc = m(mpc)
+            T = _target(mpc,node).expand()
+            if len(T)>0: return T
+            try:
+                n = n.N(-1)[0] # get first parent node (parent arg is unused)
+            except IndexError:
+                break # we are at function entry node
+        # create func nodes:
+        xpc = []
+        if n.data.misc[code.tag.FUNC_START]:
+            if node.data.misc[code.tag.FUNC_END]:
+                n.data.misc[code.tag.FUNC_START] += 1
+            try:
+                fsym = n.data.misc['callers'][0].data.misc['to'].ref
+            except (IndexError,TypeError,AttributeError):
+                fsym = 'f'
+            func = code.func(n.c,name="%s:%s"%(fsym,n.name))
+            logger.verbose("function %s created"%func)
+            if mpc._is_mem and len(mpc.mods)>0:
+                pol = '(assume_no_aliasing)' if self.policy['frame-aliasing']==False else ''
+                logger.verbose("pc is memory aliased in %s %s"%(str(func),pol))
+                if self.policy['frame-aliasing']==False: mpc.mods = []
+            func.map[pc] = mpc
+            for cn in n.data.misc['callers']:
+                cnpc = cn.data.map.use((pc,cn.data.address))(mpc)
+                f = cfg.node(func)
+                cfg.link(cn,f,connect=True)
+                xpc.extend(_target(cnpc,f).expand())
+            n.data.misc['func'] = func
+        else:
+            xpc.extend(_target(mpc,node).expand())
+        return xpc
+
 
diff --git a/amoco/system/__init__.py b/amoco/system/__init__.py
index 9822d53..d3f893a 100644
--- a/amoco/system/__init__.py
+++ b/amoco/system/__init__.py
@@ -1 +1,3 @@
+# -*- coding: utf-8 -*-
+
 import loader
diff --git a/amoco/system/core.py b/amoco/system/core.py
index daac3ed..f70dc78 100644
--- a/amoco/system/core.py
+++ b/amoco/system/core.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2007-2013 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2007-2013 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 
@@ -8,6 +10,8 @@
 
 from bisect import bisect_left
 
+from amoco.cas.expressions import top
+
 #------------------------------------------------------------------------------
 # datadiv provides the API for manipulating data values extracted from memory.
 # These values are either considered as 'raw' (byte strings) or can be any
@@ -33,7 +37,9 @@ def __len__(self):
 
     def __repr__(self):
         s = repr(self.val)
-        if len(s)>32: s=s[:32]+'...'
+        if len(s)>32:
+            s=s[:32]+"..."
+            if isinstance(self.val,str): s+="'"
         return '<datadiv:%s>'%s
 
     def __str__(self):
@@ -43,9 +49,9 @@ def cut(self,l):
         if self._is_raw:
             self.val = self.val[l:]
         else:
-            self.val = self.val[8*l:]
+            self.val = self.val.bytes(l)
 
-    # returns (result, counter) where result is a part of val of length l 
+    # returns (result, counter) where result is a part of val of length l
     # located at offset o, and counter is the number of bytes that still
     # need to be read from another div.
     def getpart(self,o,l):
@@ -63,10 +69,8 @@ def getpart(self,o,l):
             res = self.val[o:o+l]
             return (res,l-len(res))
         if o>=lv: return (None,l)
-        o,l = 8*o,8*l
-        n,r = divmod(o+l,s)
-        if n>0: return (self.val[o:s],r/8)
-        return (self.val[o:o+l],0)
+        res = self.val.bytes(o,o+l)
+        return (res,l-res.length)
 
     # returns a list of (contiguous) datadiv objects resulting from
     # overwriting self with data at offset o, possibly extending self.
@@ -114,7 +118,9 @@ def __contains__(self,vaddr):
 
     def __repr__(self):
         data = str(self.data)
-        if len(data)>32: data=data[:32]+'...'
+        if len(data)>32:
+            data=data[:32]+"..."
+            if self.data._is_raw: data+="'"
         return '<mo [%08x,%08x] data:%s>'%(self.vaddr,self.end,data)
 
     # change current obj to start at provided vaddr
@@ -129,7 +135,7 @@ def read(self,vaddr,l):
         if vaddr in self:
             return self.data.getpart(vaddr-self.vaddr,l)
         else:
-            logger.warning('%s read out of bound (vaddr=%08x, l=%d)',repr(self),vaddr,l)
+            logger.debug('%s read out of bound (vaddr=%08x, l=%d)',repr(self),vaddr,l)
             return (None,l)
 
     # update current obj resulting from writing datadiv at vaddr, returning the
@@ -145,16 +151,17 @@ def write(self,vaddr,data):
                 vaddr += len(p)
             return O
         else:
-            logger.verbose('%s write out of bound (vaddr=%08x,data=%.32s)',repr(self),vaddr,repr(data))
+            logger.debug('%s write out of bound (vaddr=%08x,data=%.32s)',repr(self),vaddr,repr(data))
             return [mo(vaddr,data)]
 
 #------------------------------------------------------------------------------
 class MemoryZone(object):
-    __slot__ = ['rel','_map']
+    __slot__ = ['rel','_map','__cache']
 
     def __init__(self,rel=None,D=None):
         self.rel = rel
         self._map = []
+        self.__cache = [] # speedup locate method
         if D != None and isinstance(D,dict):
             for vaddr,data in D.iteritems():
                 self.addtomap(mo(vaddr,data))
@@ -169,30 +176,49 @@ def __str__(self):
             l.append("\t %s"%str(z))
         return '\n'.join(l)+'>'
 
+    def __update_cache(self):
+        self.__cache = [z.vaddr for z in self._map]
+
     # locate the index that contains the given address in the mmap:
     def locate(self,vaddr):
-        p = [z.vaddr for z in self._map]
+        p = self.__cache
         if vaddr in p: return p.index(vaddr)
         i = bisect_left(p,vaddr)
         if i==0: return None
         else: return i-1
 
     # read l bytes starting at vaddr.
-    # A MemoryError is raised if some bytes are not mapped.
+    # return value is a list of datadiv values, unmapped areas
+    # are returned as 'top' expressions.
     def read(self,vaddr,l):
+        res = []
         i = self.locate(vaddr)
-        if i is None: raise MemoryError(l)
+        if i is None:
+            if len(self._map)==0: return [top(l*8)]
+            v0 = self._map[0].vaddr
+            if (vaddr+l)<=v0: return [top(l*8)]
+            res.append(top((v0-vaddr)*8))
+            l = (vaddr+l)-v0
+            vaddr = v0
+            i = 0
         ll = l
-        res = []
         while ll>0:
             try:
                 data,ll = self._map[i].read(vaddr,ll)
             except IndexError:
-                data=None
+                res.append(top(ll*8))
+                ll=0
+                break
             if data is None:
-                raise MemoryError(ll)
-            vaddr += len(data)
-            res.append(data)
+                vi = self.__cache[i]
+                if vaddr < vi:
+                    l = min(vaddr+ll,vi)-vaddr
+                    data = top(l*8)
+                    ll -= l
+                    i -=1
+            if data is not None:
+                vaddr += len(data)
+                res.append(data)
             i += 1
         assert ll==0
         return res
@@ -210,6 +236,7 @@ def addtomap(self,z):
         if j is None:
             assert i is None
             self._map.insert(0,z)
+            self.__update_cache()
             return
         if j==i:
             Z = self._map[i].write(z.vaddr,z.data.val)
@@ -217,6 +244,7 @@ def addtomap(self,z):
             for newz in Z:
                 self._map.insert(i,newz)
                 i+=1
+            self.__update_cache()
             return
         # i!=j cases:
         # delete & update every overwritten zones
@@ -237,6 +265,7 @@ def addtomap(self,z):
         for newz in Z:
             self._map.insert(i,newz)
             i+=1
+        self.__update_cache()
 
     def restruct(self):
         if len(self._map)==0: return
@@ -251,6 +280,7 @@ def restruct(self):
             else:
                 m.append(z)
         self._map = m
+        self.__update_cache()
 
 #------------------------------------------------------------------------------
 class MemoryMap(object):
@@ -264,10 +294,11 @@ def newzone(self,label):
         z = MemoryZone()
         z.rel = label
         self._zones[label] = z
+        return z
 
     def locate(self,address):
         r, a = self.reference(address)
-        idx = self._zones[r].locate(address)
+        idx = self._zones[r].locate(a)
         return self._zones[r]._map[idx]
 
     def reference(self,address):
@@ -277,6 +308,8 @@ def reference(self,address):
             return (address,0)
         try:
             r,a = (address.base,address.disp)
+            if r._is_cst:
+                return (None,(r+a).v)
             return (r,a)
         except AttributeError:
             if address._is_cst:
@@ -301,12 +334,20 @@ def __getitem__(self,aslc):
 
     def read(self,address,l):
         r,o = self.reference(address)
-        return self._zones[r].read(o,l)
+        if r in self._zones:
+            return self._zones[r].read(o,l)
+        else:
+            raise MemoryError(address)
 
     def write(self,address,expr):
         r,o = self.reference(address)
+        if not r in self._zones:
+            self.newzone(r)
         self._zones[r].write(o,expr)
 
+    def restruct(self):
+        for z in self._zones.itervalues(): z.restruct()
+
 #------------------------------------------------------------------------------
 class CoreExec(object):
     __slots__ = ['bin','cpu','mmap']
@@ -336,15 +377,12 @@ def read_instruction(self,vaddr,**kargs):
         try:
             istr = self.mmap.read(vaddr,maxlen)
         except MemoryError,e:
-            ll = e.message
-            l = maxlen-ll
-            if l == 0:
+            logger.verbose("vaddr %s is not mapped"%vaddr)
+            raise MemoryError(e)
+        else:
+            if len(istr)>1 or not isinstance(istr[0],str):
+                logger.verbose("failed to read instruction at %s"%vaddr)
                 return None
-            logger.warning("instruction fetch error: reducing fetch size (%d)"%l)
-            istr = self.mmap.read(vaddr,l)
-        if len(istr)>1:
-            logger.warning("read_instruction: can't fetch vaddr %s"%vaddr)
-            raise MemoryError
         i = self.cpu.disassemble(istr[0],**kargs)
         if i is None:
             logger.warning("disassemble failed at vaddr %s"%vaddr)
diff --git a/amoco/system/elf.py b/amoco/system/elf.py
index bded1e3..cee6a79 100644
--- a/amoco/system/elf.py
+++ b/amoco/system/elf.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import struct
@@ -51,7 +53,7 @@ class Elf32_Ehdr(Elfcore):
         'e_shentsize',
         'e_shnum',
         'e_shstrndx')
-    # overload Elfcore methods to take into account the e_ident dict: 
+    # overload Elfcore methods to take into account the e_ident dict:
     def __init__(self, data):
         S = struct.unpack('B3sBBBBBxxxxxxx',data[:16])
         if S[0]!=0x7f or S[1]!='ELF':
@@ -974,7 +976,7 @@ class Elf64_Ehdr(Elfcore):
         'e_shentsize',
         'e_shnum',
         'e_shstrndx')
-    # overload Elfcore methods to take into account the e_ident dict: 
+    # overload Elfcore methods to take into account the e_ident dict:
     def __init__(self, data):
         S = struct.unpack('B3sBBBBBxxxxxxx',data[:16])
         if S[0]!=0x7f or S[1]!='ELF':
diff --git a/amoco/system/gameboy.py b/amoco/system/gameboy.py
index 1c83cea..3a54574 100644
--- a/amoco/system/gameboy.py
+++ b/amoco/system/gameboy.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2012 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -125,16 +125,14 @@ def read_instruction(self,vaddr,**kargs):
         try:
             istr = self.mmap.read(vaddr,maxlen)
         except MemoryError,e:
-            ll = e.message
-            l = maxlen-ll
-            logger.warning("instruction fetch error: reducing fetch size (%d)"%l)
-            istr = self.mmap.read(vaddr,l)
-        if len(istr)>1:
-            logger.warning("read_instruction: can't fetch vaddr %s"%vaddr)
-            raise MemoryError
+            logger.warning("vaddr %s is not mapped"%vaddr)
+            raise MemoryError(e)
         i = self.cpu.disassemble(istr[0],**kargs)
         if i is None:
             logger.warning("disassemble failed at vaddr %s"%vaddr)
+            if len(istr)>1 and istr[1]._is_def:
+                logger.warning("symbol found in instruction buffer"%vaddr)
+                raise MemoryError(vaddr)
             return None
         else:
             i.address = vaddr
diff --git a/amoco/system/leon2.py b/amoco/system/leon2.py
index c043855..9022453 100644
--- a/amoco/system/leon2.py
+++ b/amoco/system/leon2.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -11,6 +13,7 @@ class ELF(CoreExec):
 
     def __init__(self,p):
         CoreExec.__init__(self,p,cpu)
+        cpu.exp.setendian(-1) # set endianess to big-endian
 
     # load the program into virtual memory (populate the mmap dict)
     def load_binary(self):
diff --git a/amoco/system/linux_arm.py b/amoco/system/linux_arm.py
index 501e230..4576381 100644
--- a/amoco/system/linux_arm.py
+++ b/amoco/system/linux_arm.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -30,7 +32,7 @@ def load_binary(self):
     # for now, the external libs are seen through the elf dynamic section:
     def load_shlib(self):
         for k,f in self.bin._Elf32__dynamic(None).iteritems():
-            self.mmap.write(k,cpu.ext(f))
+            self.mmap.write(k,cpu.ext(f,size=32))
 
     def initenv(self):
         from amoco.cas.mapper import mapper
@@ -45,299 +47,8 @@ def PC(self):
 
 # LIBC HOOKS DEFINED HERE :
 #----------------------------------------------------------------------------
-def __libc_start_main():
-    s = code.func('__libc_start_main',cfg=None)
-    pass
-    return s
-def exit():
-    s = code.func('exit',cfg=None)
-    return s
-def abort():
-    s = code.func('abort',cfg=None)
-    return s
-def __assert():
-    s = code.func('__assert',cfg=None)
-    return s
-def __assert_fail():
-    s = code.func('__assert_fail',cfg=None)
-    return s
-def _assert_perror_fail():
-    s = code.func('__assert_perror_fail',cfg=None)
-    return s
+
 #----------------------------------------------------------------------------
 
 # SYSCALLS:
 #----------------------------------------------------------------------------
-IDT={
-  1: "exit",
-  2: "fork",
-  3: "read",
-  4: "write",
-  5: "open",
-  6: "close",
-  7: "waitpid",
-  8: "creat",
-  9: "link",
- 10: "unlink",
- 11: "execve",
- 12: "chdir",
- 13: "time",
- 14: "mknod",
- 15: "chmod",
- 16: "lchown",
- 17: "break",
- 18: "oldstat",
- 19: "lseek",
- 20: "getpid",
- 21: "mount",
- 22: "umount",
- 23: "setuid",
- 24: "getuid",
- 25: "stime",
- 26: "ptrace",
- 27: "alarm",
- 28: "oldfstat",
- 29: "pause",
- 30: "utime",
- 31: "stty",
- 32: "gtty",
- 33: "access",
- 34: "nice",
- 35: "ftime",
- 36: "sync",
- 37: "kill",
- 38: "rename",
- 39: "mkdir",
- 40: "rmdir",
- 41: "dup",
- 42: "pipe",
- 43: "times",
- 44: "prof",
- 45: "brk",
- 46: "setgid",
- 47: "getgid",
- 48: "signal",
- 49: "geteuid",
- 50: "getegid",
- 51: "acct",
- 52: "umount2",
- 53: "lock",
- 54: "ioctl",
- 55: "fcntl",
- 56: "mpx",
- 57: "setpgid",
- 58: "ulimit",
- 59: "oldolduname",
- 60: "umask",
- 61: "chroot",
- 62: "ustat",
- 63: "dup2",
- 64: "getppid",
- 65: "getpgrp",
- 66: "setsid",
- 67: "sigaction",
- 68: "sgetmask",
- 69: "ssetmask",
- 70: "setreuid",
- 71: "setregid",
- 72: "sigsuspend",
- 73: "sigpending",
- 74: "sethostname",
- 75: "setrlimit",
- 76: "getrlimit",
- 77: "getrusage",
- 78: "gettimeofday",
- 79: "settimeofday",
- 80: "getgroups",
- 81: "setgroups",
- 82: "select",
- 83: "symlink",
- 84: "oldlstat",
- 85: "readlink",
- 86: "uselib",
- 87: "swapon",
- 88: "reboot",
- 89: "readdir",
- 90: "mmap",
- 91: "munmap",
- 92: "truncate",
- 93: "ftruncate",
- 94: "fchmod",
- 95: "fchown",
- 96: "getpriority",
- 97: "setpriority",
- 98: "profil",
- 99: "statfs",
-100: "fstatfs",
-101: "ioperm",
-102: "socketcall",
-103: "syslog",
-104: "setitimer",
-105: "getitimer",
-106: "stat",
-107: "lstat",
-108: "fstat",
-109: "olduname",
-110: "iopl",
-111: "vhangup",
-112: "idle",
-113: "vm86old",
-114: "wait4",
-115: "swapoff",
-116: "sysinfo",
-117: "ipc",
-118: "fsync",
-119: "sigreturn",
-120: "clone",
-121: "setdomainname",
-122: "uname",
-123: "modify_ldt",
-124: "adjtimex",
-125: "mprotect",
-126: "sigprocmask",
-127: "create_module",
-128: "init_module",
-129: "delete_module",
-130: "get_kernel_syms",
-131: "quotactl",
-132: "getpgid",
-133: "fchdir",
-134: "bdflush",
-135: "sysfs",
-136: "personality",
-137: "afs_syscall",
-138: "setfsuid",
-139: "setfsgid",
-140: "_llseek",
-141: "getdents",
-142: "_newselect",
-143: "flock",
-144: "msync",
-145: "readv",
-146: "writev",
-147: "getsid",
-148: "fdatasync",
-149: "_sysctl",
-150: "mlock",
-151: "munlock",
-152: "mlockall",
-153: "munlockall",
-154: "sched_setparam",
-155: "sched_getparam",
-156: "sched_setscheduler",
-157: "sched_getscheduler",
-158: "sched_yield",
-159: "sched_get_priority_max",
-160: "sched_get_priority_min",
-161: "sched_rr_get_interval",
-162: "nanosleep",
-163: "mremap",
-164: "setresuid",
-165: "getresuid",
-166: "vm86",
-167: "query_module",
-168: "poll",
-169: "nfsservctl",
-170: "setresgid",
-171: "getresgid",
-172: "prctl",
-173: "rt_sigreturn",
-174: "rt_sigaction",
-175: "rt_sigprocmask",
-176: "rt_sigpending",
-177: "rt_sigtimedwait",
-178: "rt_sigqueueinfo",
-179: "rt_sigsuspend",
-180: "pread64",
-181: "pwrite64",
-182: "chown",
-183: "getcwd",
-184: "capget",
-185: "capset",
-186: "sigaltstack",
-187: "sendfile",
-188: "getpmsg",
-189: "putpmsg",
-190: "vfork",
-191: "ugetrlimit",
-192: "mmap2",
-193: "truncate64",
-194: "ftruncate64",
-195: "stat64",
-196: "lstat64",
-197: "fstat64",
-198: "lchown32",
-199: "getuid32",
-200: "getgid32",
-201: "geteuid32",
-202: "getegid32",
-203: "setreuid32",
-204: "setregid32",
-205: "getgroups32",
-206: "setgroups32",
-207: "fchown32",
-208: "setresuid32",
-209: "getresuid32",
-210: "setresgid32",
-211: "getresgid32",
-212: "chown32",
-213: "setuid32",
-214: "setgid32",
-215: "setfsuid32",
-216: "setfsgid32",
-217: "pivot_root",
-218: "mincore",
-219: "madvise",
-219: "madvise1",
-220: "getdents64",
-221: "fcntl64",
-224: "gettid",
-225: "readahead",
-226: "setxattr",
-227: "lsetxattr",
-228: "fsetxattr",
-229: "getxattr",
-230: "lgetxattr",
-231: "fgetxattr",
-232: "listxattr",
-233: "llistxattr",
-234: "flistxattr",
-235: "removexattr",
-236: "lremovexattr",
-237: "fremovexattr",
-238: "tkill",
-239: "sendfile64",
-240: "futex",
-241: "sched_setaffinity",
-242: "sched_getaffinity",
-243: "set_thread_area",
-244: "get_thread_area",
-245: "io_setup",
-246: "io_destroy",
-247: "io_getevents",
-248: "io_submit",
-249: "io_cancel",
-250: "fadvise64",
-252: "exit_group",
-253: "lookup_dcookie",
-254: "epoll_create",
-255: "epoll_ctl",
-256: "epoll_wait",
-257: "remap_file_pages",
-258: "set_tid_address",
-259: "timer_create",
-260: "timer_settime",
-261: "timer_gettime",
-262: "timer_getoverrun",
-263: "timer_delete",
-264: "clock_settime",
-265: "clock_gettime",
-266: "clock_getres",
-267: "clock_nanosleep",
-268: "statfs64",
-269: "fstatfs64",
-270: "tgkill",
-271: "utimes",
-272: "fadvise64_64",
-273: "vserver" }
-
diff --git a/amoco/system/linux_arm64.py b/amoco/system/linux_arm64.py
index cf0a206..526aec8 100644
--- a/amoco/system/linux_arm64.py
+++ b/amoco/system/linux_arm64.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
diff --git a/amoco/system/linux_x64.py b/amoco/system/linux_x64.py
index bf34734..1e4e73c 100644
--- a/amoco/system/linux_x64.py
+++ b/amoco/system/linux_x64.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -33,14 +35,14 @@ def load_binary(self):
     # for now, the external libs are seen through the elf dynamic section:
     def load_shlib(self):
         for k,f in self.bin._Elf64__dynamic(None).iteritems():
-            self.mmap.write(k,cpu.ext(f))
+            self.mmap.write(k,cpu.ext(f,size=64))
 
     # lookup in bin if v is associated with a function or variable name:
     def check_sym(self,v):
         if v._is_cst:
             x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None)
             if x is not None:
-                if isinstance(x,str): x=cpu.ext(x)
+                if isinstance(x,str): x=cpu.ext(x,size=64)
                 else: x=cpu.sym(x[0],v.value,v.size)
                 return x
         return None
@@ -129,28 +131,28 @@ def blockhelper(self,block):
 #----------------------------------------------------------------------------
 
 @stub_default
-def pop_rip(m):
+def pop_rip(m,**kargs):
     cpu.pop(m,cpu.rip)
 
 @stub
-def __libc_start_main(m):
+def __libc_start_main(m,**kargs):
     m[cpu.rip] = m(cpu.mem(cpu.rsp+4,64))
-    cpu.push(m,cpu.ext('exit'))
+    cpu.push(m,cpu.ext('exit',size=64))
 
 @stub
-def exit(m):
+def exit(m,**kargs):
     m[cpu.rip] = top(64)
 @stub
-def abort(m):
+def abort(m,**kargs):
     m[cpu.rip] = top(64)
 @stub
-def __assert(m):
+def __assert(m,**kargs):
     m[cpu.rip] = top(64)
 @stub
-def __assert_fail(m):
+def __assert_fail(m,**kargs):
     m[cpu.rip] = top(64)
 @stub
-def _assert_perror_fail(m):
+def _assert_perror_fail(m,**kargs):
     m[cpu.rip] = top(64)
 
 #----------------------------------------------------------------------------
diff --git a/amoco/system/linux_x86.py b/amoco/system/linux_x86.py
index f6f1655..2e92c3b 100644
--- a/amoco/system/linux_x86.py
+++ b/amoco/system/linux_x86.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -33,14 +35,14 @@ def load_binary(self):
     # for now, the external libs are seen through the elf dynamic section:
     def load_shlib(self):
         for k,f in self.bin._Elf32__dynamic(None).iteritems():
-            self.mmap.write(k,cpu.ext(f))
+            self.mmap.write(k,cpu.ext(f,size=32))
 
     # lookup in bin if v is associated with a function or variable name:
     def check_sym(self,v):
         if v._is_cst:
             x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None)
             if x is not None:
-                if isinstance(x,str): x=cpu.ext(x)
+                if isinstance(x,str): x=cpu.ext(x,size=32)
                 else: x=cpu.sym(x[0],v.value,v.size)
                 return x
         return None
@@ -103,7 +105,12 @@ def seqhelper(self,seq):
                         else: i.misc[tag.FUNC_VAR]=1
                     elif op.a.base._is_cst:
                         x = self.check_sym(op.a.base)
-                        if x is not None: op.a.base=x
+                        if x is not None:
+                            op.a.base=x
+                            if i.mnemonic == 'JMP': # PLT jumps:
+                                i.address = i.address.to_sym('PLT%s'%x)
+                                i.misc[tag.FUNC_START]=1
+                                i.misc[tag.FUNC_END]=1
                 elif op._is_cst:
                     x = self.check_sym(op)
                     i.misc['imm_ref'] = x
@@ -129,28 +136,28 @@ def blockhelper(self,block):
 #----------------------------------------------------------------------------
 
 @stub_default
-def pop_eip(m):
+def pop_eip(m,**kargs):
     cpu.pop(m,cpu.eip)
 
 @stub
-def __libc_start_main(m):
+def __libc_start_main(m,**kargs):
     m[cpu.eip] = m(cpu.mem(cpu.esp+4,32))
-    cpu.push(m,cpu.ext('exit'))
+    cpu.push(m,cpu.ext('exit',size=32))
 
 @stub
-def exit(m):
+def exit(m,**kargs):
     m[cpu.eip] = top(32)
 @stub
-def abort(m):
+def abort(m,**kargs):
     m[cpu.eip] = top(32)
 @stub
-def __assert(m):
+def __assert(m,**kargs):
     m[cpu.eip] = top(32)
 @stub
-def __assert_fail(m):
+def __assert_fail(m,**kargs):
     m[cpu.eip] = top(32)
 @stub
-def _assert_perror_fail(m):
+def _assert_perror_fail(m,**kargs):
     m[cpu.eip] = top(32)
 
 #----------------------------------------------------------------------------
diff --git a/amoco/system/loader.py b/amoco/system/loader.py
index 2d3aae2..c37293a 100644
--- a/amoco/system/loader.py
+++ b/amoco/system/loader.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.logger import *
@@ -12,7 +14,7 @@
 #------------------------------------------------------------------------------
 # read_program is responsible of identifying the program header (ELF/PE).
 # It returns an ELF or PE class instance.
-# loading the associated "system" (Linux/Windows) and "environment" (x86/etc), 
+# loading the associated "system" (Linux/Windows) and "environment" (x86/etc),
 # based on information from its header.
 #------------------------------------------------------------------------------
 def read_program(filename):
@@ -33,13 +35,13 @@ def read_program(filename):
     except pe.PEError:
         pass
 
-    logger.error('unknown format')
+    logger.warning('unknown format')
     try:
         data = file(filename,'rb')
     except (TypeError,IOError):
         data = filename
     return DataIO(data)
-    ## 
+    ##
 ##
 
 #------------------------------------------------------------------------------
diff --git a/amoco/system/msp430.py b/amoco/system/msp430.py
index 903984c..a897857 100644
--- a/amoco/system/msp430.py
+++ b/amoco/system/msp430.py
@@ -1,7 +1,7 @@
-#!/usr/bin/env python
+# -*- coding: utf-8 -*-
 
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
diff --git a/amoco/system/pe.py b/amoco/system/pe.py
index c032cd5..12c8533 100644
--- a/amoco/system/pe.py
+++ b/amoco/system/pe.py
@@ -1,6 +1,8 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
 # based on elf.py, improving pefile to work out corkami's CoST.exe.
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 import struct
diff --git a/amoco/system/pic18.py b/amoco/system/pic18.py
index 1834135..d23ef36 100644
--- a/amoco/system/pic18.py
+++ b/amoco/system/pic18.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2014 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -26,16 +28,14 @@ def read_instruction(self,vaddr,**kargs):
         try:
             istr = self.cmap.read(vaddr,maxlen)
         except MemoryError,e:
-            ll = e.message
-            l = maxlen-ll
-            logger.warning("instruction fetch error: reducing fetch size (%d)"%l)
-            istr = self.cmap.read(vaddr,l)
-        if len(istr)>1:
-            logger.warning("read_instruction: can't fetch vaddr %s"%vaddr)
-            raise MemoryError
+            logger.warning("vaddr %s is not mapped"%vaddr)
+            raise MemoryError(e)
         i = self.cpu.disassemble(istr[0],**kargs)
         if i is None:
             logger.warning("disassemble failed at vaddr %s"%vaddr)
+            if len(istr)>1 and istr[1]._is_def:
+                logger.warning("symbol found in instruction buffer"%vaddr)
+                raise MemoryError(vaddr)
             return None
         else:
             i.address = vaddr
diff --git a/amoco/system/raw.py b/amoco/system/raw.py
index 797aadd..faedbef 100644
--- a/amoco/system/raw.py
+++ b/amoco/system/raw.py
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
 # Copyright (C) 2006-2011 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
diff --git a/amoco/system/win32.py b/amoco/system/win32.py
index 43904bc..e18a27b 100644
--- a/amoco/system/win32.py
+++ b/amoco/system/win32.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -34,7 +36,7 @@ def load_binary(self):
     # for now, the external libs are seen through the elf dynamic section:
     def load_shlib(self):
         for k,f in self.bin.functions.iteritems():
-            self.mmap.write(k,cpu.ext(f))
+            self.mmap.write(k,cpu.ext(f,size=32))
 
     def initenv(self):
         from amoco.cas.mapper import mapper
@@ -57,7 +59,7 @@ def check_sym(self,v):
         if v._is_cst:
             x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None)
             if x is not None:
-                if isinstance(x,str): x=cpu.ext(x)
+                if isinstance(x,str): x=cpu.ext(x,size=32)
                 else: x=cpu.sym(x[0],v.value,v.size)
                 return x
         return None
@@ -128,7 +130,7 @@ def blockhelper(self,block):
 #----------------------------------------------------------------------------
 
 @stub_default
-def pop_eip(m):
+def pop_eip(m,**kargs):
     cpu.pop(m,cpu.eip)
 
 
diff --git a/amoco/system/win64.py b/amoco/system/win64.py
index 6be62c8..b6c1557 100644
--- a/amoco/system/win64.py
+++ b/amoco/system/win64.py
@@ -1,5 +1,7 @@
+# -*- coding: utf-8 -*-
+
 # This code is part of Amoco
-# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com) 
+# Copyright (C) 2007 Axel Tillequin (bdcht3@gmail.com)
 # published under GPLv2 license
 
 from amoco.system.core import *
@@ -34,7 +36,7 @@ def load_binary(self):
     # for now, the external libs are seen through the elf dynamic section:
     def load_shlib(self):
         for k,f in self.bin.functions.iteritems():
-            self.mmap.write(k,cpu.ext(f))
+            self.mmap.write(k,cpu.ext(f,size=64))
 
     def initenv(self):
         from amoco.cas.mapper import mapper
@@ -57,7 +59,7 @@ def check_sym(self,v):
         if v._is_cst:
             x = self.bin.functions.get(v.value,None) or self.bin.variables.get(v.value,None)
             if x is not None:
-                if isinstance(x,str): x=cpu.ext(x)
+                if isinstance(x,str): x=cpu.ext(x,size=64)
                 else: x=cpu.sym(x[0],v.value,v.size)
                 return x
         return None
@@ -128,7 +130,7 @@ def blockhelper(self,block):
 #----------------------------------------------------------------------------
 
 @stub_default
-def pop_rip(m):
+def pop_rip(m,**kargs):
     cpu.pop(m,cpu.rip)