add docs, implement refactorings

* The optimisation of splitting on preconditions is no longer optional, because we rely on existential instantation for logical variables in posts of modules that optimise pres - the original way of dealing with existentials via checkpoints has been completely removed due to its error proneness. * Added inline and haddock toplevel docstrings for various functions.
NethermindEth · Mar 10, 2023 · 5eaa318 · 5eaa318
1 parent a2d7963
commit 5eaa318
Show file tree

Hide file tree

Showing 5 changed files with 221 additions and 107 deletions.
diff --git a/src/Horus/CFGBuild.hs b/src/Horus/CFGBuild.hs
@@ -125,8 +125,8 @@ liftF' = CFGBuildL . liftF
 addVertex :: Label -> CFGBuildL Vertex
 addVertex l = liftF' (AddVertex l Nothing id)
 
-addOptimizingVertex :: Label -> ScopedFunction -> CFGBuildL Vertex
-addOptimizingVertex l f = liftF' (AddVertex l (Just f) id)
+addOptimisingVertex :: Label -> ScopedFunction -> CFGBuildL Vertex
+addOptimisingVertex l f = liftF' (AddVertex l (Just f) id)
 
 addArc :: Vertex -> Vertex -> [LabeledInst] -> ArcCondition -> FInfo -> CFGBuildL ()
 addArc vFrom vTo insts test f = liftF' (AddArc vFrom vTo insts test f ())
@@ -155,11 +155,16 @@ getSvarSpecs = liftF' (GetSvarSpecs id)
 getVerts :: Label -> CFGBuildL [Vertex]
 getVerts l = liftF' (GetVerts l id)
 
--- It is enforced that for any one PC, one can add at most a single salient vertex
+{- | Saliant vertices can be thought of as 'main' vertices of the CFG, meaning that
+if one wants to reason about flow control of the program, one should query salient vertices.
+
+Certain program transformations and optimisations can add various additional nodes into the CFG,
+whose primary purpose is not to reason about control flow.
+
+It is enforced that for any one PC, one can add at most a single salient vertex. -}
 getSalientVertex :: Label -> CFGBuildL Vertex
 getSalientVertex l = do
   verts <- filter (not . isOptimising) <$> getVerts l
-  -- This can be at most one, so len <> 1 implies there are no vertices
   case verts of
     [vert] -> pure vert
     _ -> throw $ "No vertex with label: " <> tShow l
@@ -192,9 +197,13 @@ segmentInsts (Segment ne) = toList ne
 buildFrame :: Set ScopedFunction -> [LabeledInst] -> Program -> CFGBuildL ()
 buildFrame inlinables rows prog = do
   let segments = breakIntoSegments (programLabels rows $ p_identifiers prog) rows
+  -- It is necessary to add all vertices prior to calling `addArcsFrom`.
   segmentsWithVerts <- for segments $ \s -> addVertex (segmentLabel s) <&> (s,)
-  for_ segmentsWithVerts $ \(s, v) -> addArcsFrom inlinables prog rows s v True
+  for_ segmentsWithVerts . uncurry $ addArcsFrom inlinables prog rows
 
+{- | A simple procedure for splitting a stream of instructions into nonempty Segments based
+on program labels, which more-or-less correspond with changes in control flow in the program. 
+We thus obtain linear segments of instructions without control flow. -}
 breakIntoSegments :: [Label] -> [LabeledInst] -> [Segment]
 breakIntoSegments _ [] = []
 breakIntoSegments ls_ (i_ : is_) = coerce (go [] (i_ :| []) ls_ is_)
@@ -209,29 +218,24 @@ breakIntoSegments ls_ (i_ : is_) = coerce (go [] (i_ :| []) ls_ is_)
 addArc' :: Vertex -> Vertex -> [LabeledInst] -> CFGBuildL ()
 addArc' lFrom lTo insts = addArc lFrom lTo insts ACNone Nothing
 
-addArcsFrom :: Set ScopedFunction -> Program -> [LabeledInst] -> Segment -> Vertex -> Bool -> CFGBuildL ()
-addArcsFrom inlinables prog rows seg@(Segment s) vFrom optimiseWithSplit
+{- | This function adds arcs (edges) into the CFG and labels them with instructions that are
+to be executed when traversing from one vertex to another. 
+
+Currently, we do not have an optimisation post-processing pass in Horus and we therefore
+also include an optimisation here that generates an extra vertex in order to implement
+separate checking of preconditions for abstracted functions.-}
+addArcsFrom :: Set ScopedFunction -> Program -> [LabeledInst] -> Segment -> Vertex -> CFGBuildL ()
+addArcsFrom inlinables prog rows seg@(Segment s) vFrom
   | Call <- i_opCode endInst =
-      let callee = uncheckedScopedFOfPc (p_identifiers prog) (uncheckedCallDestination lInst)
-       in do
-            if callee `Set.member` inlinables
-              then do
-                salientCalleeV <- getSalientVertex (sf_pc callee)
-                addArc vFrom salientCalleeV insts ACNone . Just $ ArcCall endPc (sf_pc callee)
-              else do
-                salientLinearV <- getSalientVertex (nextSegmentLabel seg)
-                addArc' vFrom salientLinearV insts
-                svarSpecs <- getSvarSpecs
-                when (optimiseWithSplit && sf_scopedName callee `Set.notMember` svarSpecs) $ do
-                  -- Suppose F calls G where G has a precondition. We synthesize an extra module
-                  -- Pre(F) -> Pre(G) to check whether Pre(G) holds. The standard module for F
-                  -- is then Pre(F) -> Post(F) (conceptually, unless there's a split in the middle, etc.),
-                  -- in which Pre(G) is assumed to hold at the PC of the G call site, as it will have
-                  -- been checked by the module induced by the ghost vertex.
-                  ghostV <- addOptimizingVertex (nextSegmentLabel seg) callee
-                  pre <- maybe (mkPre Expr.True) mkPre . fs'_pre <$> getFuncSpec callee
-                  addAssertion ghostV $ quantifyEx pre
-                  addArc' vFrom ghostV insts
+      -- An inlined call pretends as though the stream of instructions continues without breaking
+      -- through the function being inlined.
+
+      -- An abstracted call does not break control flow and CONCEPTUALLY asserts 'pre implies post'.
+      -- Conceptually is the operative word here because due to the way that APs 'flow' 
+      -- in the program thus making the actual implication unnecessary plus the existance
+      -- of the optimisation in optimiseCheckingOfPre makes it such that the pre is checked
+      -- in a separate module.
+      if callee `Set.member` inlinables then beginInlining else abstractOver
   | Ret <- i_opCode endInst =
       -- Find the function corresponding to `endPc` and lookup its label. If we
       -- found the label, add arcs for each caller.
@@ -256,6 +260,45 @@ addArcsFrom inlinables prog rows seg@(Segment s) vFrom optimiseWithSplit
   insts = segmentInsts seg
   inlinableLabels = Set.map sf_pc inlinables
 
+  callee = uncheckedScopedFOfPc (p_identifiers prog) (uncheckedCallDestination lInst)
+
+  beginInlining = do
+    salientCalleeV <- getSalientVertex (sf_pc callee)
+    addArc vFrom salientCalleeV insts ACNone . Just $ ArcCall endPc (sf_pc callee)
+
+  optimiseCheckingOfPre = do
+    -- Suppose F calls G where G has a precondition. We synthesize an extra module
+    -- Pre(F) -> Pre(G) to check whether Pre(G) holds. The standard module for F
+    -- is then Pre(F) -> Post(F) (conceptually, unless there's a split in the middle, etc.),
+    -- in which Pre(G) is assumed to hold at the PC of the G call site, as it will have
+    -- been checked by the module induced by the ghost vertex.
+    ghostV <- addOptimisingVertex (nextSegmentLabel seg) callee
+    pre <- maybe (mkPre Expr.True) mkPre . fs'_pre <$> getFuncSpec callee
+
+    -- Important note on the way we deal with logical variables. These are @declare-d and
+    -- their values can be bound in preconditions. They generate existentials which only occur
+    -- in our models here and require special treatment, in addition to being somewhat
+    -- difficult for SMT checkers to deal with.
+
+    -- First note that these preconditions now become optimising-module postconditions.
+    -- We existentially quantify all logical variables present in the expression, thus in the
+    -- following example:
+    -- func foo:
+    --   call bar // where bar refers to $my_logical_var
+    -- We get an optimising module along the lines of:
+    -- Pre(foo) -> Pre(bar) where Pre(bar) contains \exists my_logical_var, ...
+    -- We can then check whether this instantiation exists in the optimising module exclusively.
+    -- The module that then considers that pre holds as a fact now has the luxury of not having
+    -- to deal with existential quantifiers, as it can simply 'declare' them as free variables.
+    addAssertion ghostV $ quantifyEx pre
+    addArc' vFrom ghostV insts
+
+  abstractOver = do
+    salientLinearV <- getSalientVertex (nextSegmentLabel seg)
+    addArc' vFrom salientLinearV insts
+    svarSpecs <- getSvarSpecs
+    when (sf_scopedName callee `Set.notMember` svarSpecs) optimiseCheckingOfPre
+
   addRetArc :: Label -> CFGBuildL ()
   addRetArc pc = do
     retV <- getSalientVertex endPc
@@ -274,6 +317,8 @@ addArcsFrom inlinables prog rows seg@(Segment s) vFrom optimiseWithSplit
     let lvars = gatherLogicalVariables expr
      in foldr Expr.ExistsFelt expr lvars
 
+{- | This function labels appropriate vertices (at 'ret'urns) with their respective postconditions.
+-}
 addAssertions :: Set ScopedFunction -> Identifiers -> CFGBuildL ()
 addAssertions inlinables identifiers = do
   for_ (Map.toList identifiers) $ \(idName, def) -> case def of
@@ -284,6 +329,8 @@ addAssertions inlinables identifiers = do
             post <- fs'_post <$> getFuncSpec func
             retVs <- mapM getSalientVertex =<< getRets idName
             case (pre, post) of
+              -- (Nothing, Nothing) means the function has no pre nor post. We refrain from
+              -- adding Pre := True and Post := True in case the function can be inlined.
               (Nothing, Nothing) ->
                 when (fu_pc f `Set.notMember` Set.map sf_pc inlinables) $
                   for_ retVs (`addAssertion` mkPost Expr.True)

diff --git a/src/Horus/CFGBuild/Runner.hs b/src/Horus/CFGBuild/Runner.hs
@@ -27,6 +27,11 @@ import Horus.FunctionAnalysis (FInfo)
 
 type Impl = ReaderT ContractInfo (ExceptT Text (State CFG))
 
+{- | This represents a quasi Control Flow Graph.
+
+Normally, they store instructions in nodes and edges represent flow control / jumps.
+In our case, we store instructions in edges and nodes represent points of program
+with associated logical assertions - preconditions, postconditions and invariants. -}
 data CFG = CFG
   { cfg_vertices :: [Vertex]
   , cfg_arcs :: Map Vertex [(Vertex, [LabeledInst], ArcCondition, FInfo)]

diff --git a/src/Horus/CairoSemantics.hs b/src/Horus/CairoSemantics.hs
@@ -28,7 +28,6 @@ import Lens.Micro ((^.), _1)
 import Horus.CallStack as CS (CallEntry, CallStack)
 import Horus.Expr (Cast (..), Expr ((:+)), Ty (..), (.&&), (./=), (.<), (.<=), (.==), (.=>), (.||))
 import Horus.Expr qualified as Expr
-import Horus.Expr qualified as TSMT
 import Horus.Expr.Util (gatherLogicalVariables, suffixLogicalVariables)
 import Horus.Expr.Vars
   ( builtinAligned
@@ -57,7 +56,7 @@ import Horus.Instruction
   , uncheckedCallDestination
   )
 import Horus.Label (Label (..), tShowLabel)
-import Horus.Module (Module (..), ModuleSpec (..), PlainSpec (..), isOptimising, richToPlainSpec)
+import Horus.Module (Module (..), ModuleSpec (..), PlainSpec (..), isOptimising, richToPlainSpec, ModuleData (ModuleData, md_prog, md_lastPc, md_spec), moduleData, moduleOptimisesPreForF)
 import Horus.Program (ApTracking (..))
 import Horus.SW.Builtin (Builtin, BuiltinOffsets (..))
 import Horus.SW.Builtin qualified as Builtin (name)
@@ -105,7 +104,8 @@ data CairoSemanticsF a
   | UpdateStorage Storage a
   | GetStorage (Storage -> a)
   | Throw Text
-  deriving stock (Functor)
+
+deriving instance Functor CairoSemanticsF
 
 type CairoSemanticsL = F CairoSemanticsF
 
@@ -273,17 +273,21 @@ getFp :: CairoSemanticsL (Expr TFelt)
 getFp = getFp' Nothing
 
 moduleStartAp :: Module -> CairoSemanticsL (Expr TFelt)
-moduleStartAp Module{m_prog = []} = getStackTraceDescr <&> Expr.const . ("ap!" <>)
-moduleStartAp Module{m_prog = (pc0, _) : _} = getAp pc0
+moduleStartAp mdl =
+  case md_prog (moduleData mdl) of
+    [] -> getStackTraceDescr <&> Expr.const . ("ap!" <>)
+    (pc0, _) : _ -> getAp pc0
 
 moduleEndAp :: Module -> CairoSemanticsL (Expr TFelt)
-moduleEndAp Module{m_prog = []} = getStackTraceDescr <&> Expr.const . ("ap!" <>)
-moduleEndAp m@Module{m_prog = _} = getAp' (Just callstack) pc where (callstack, pc) = m_lastPc m
+moduleEndAp mdl = 
+  case md_prog (moduleData mdl) of
+    [] -> getStackTraceDescr <&> Expr.const . ("ap!" <>)
+    _ -> getAp' (Just callstack) pc where (callstack, pc) = md_lastPc (moduleData mdl)
 
 encodeModule :: Module -> CairoSemanticsL ()
-encodeModule m@Module{..} = case m_spec of
-  MSRich spec -> encodeRichSpec m spec
-  MSPlain spec -> encodePlainSpec m spec
+encodeModule mdl = case md_spec (moduleData mdl) of
+  MSRich spec -> encodeRichSpec mdl spec
+  MSPlain spec -> encodePlainSpec mdl spec
 
 {- | Gather the assertions and other state (in the `ConstraintsState` contained
  in `CairoSemanticsL`) associated with a function specification that contains
@@ -316,11 +320,8 @@ encodePlainSpec mdl PlainSpec{..} = do
   assert (fp .<= apStart)
   assertPre =<< prepare' apStart fp ps_pre
 
-  let instrs = m_prog mdl
-
   -- The last FP might be influenced in the optimizing case, we need to grab it as propagated
   -- by the encoding of the semantics of the call.
-
   lastFp <-
     fromMaybe fp . join . safeLast
       <$> for
@@ -329,13 +330,13 @@ encodePlainSpec mdl PlainSpec{..} = do
             mkInstructionConstraints
               instr
               (getNextPcInlinedWithFallback instrs idx)
-              (m_optimisesF mdl)
-              (m_jnzOracle mdl)
+              (moduleOptimisesPreForF mdl)
+              oracle
         )
 
   expect =<< preparePost apEnd lastFp ps_post (isOptimising mdl)
 
-  whenJust (nonEmpty (m_prog mdl)) $ \neInsts -> do
+  whenJust (nonEmpty instrs) $ \neInsts -> do
     -- Normally, 'call's are accompanied by 'ret's for inlined functions. With the optimisation
     -- that splits modules on pre of every non-inlined function, some modules 'finish' their
     -- enconding without actually reaching a subsequent 'ret' for every inlined 'call', thus
@@ -347,7 +348,9 @@ encodePlainSpec mdl PlainSpec{..} = do
     resetStack
     mkApConstraints apEnd neInsts
     resetStack
-    mkBuiltinConstraints apEnd neInsts (m_optimisesF mdl)
+    mkBuiltinConstraints apEnd neInsts (moduleOptimisesPreForF mdl)
+  where
+    (ModuleData _ instrs oracle _ _) = moduleData mdl
 
 exMemoryRemoval :: Expr TBool -> CairoSemanticsL ([MemoryVariable] -> Expr TBool)
 exMemoryRemoval expr = do
@@ -445,6 +448,8 @@ mkInstructionConstraints lInst@(pc, Instruction{..}) nextPc optimisingF jnzOracl
         Nothing -> pure Nothing
     Ret -> pop $> Nothing
 
+{- | A particular case of mkInstructionConstraints for the instruction 'call'.
+-}
 mkCallConstraints ::
   Label ->
   Label ->
@@ -457,44 +462,46 @@ mkCallConstraints pc nextPc fp optimisingF f = do
   nextAp <- prepare pc calleeFp (Vars.fp .== Vars.ap + 2)
   saveOldFp <- prepare pc fp (memory Vars.ap .== Vars.fp)
   setNextPc <- prepare pc fp (memory (Vars.ap + 1) .== fromIntegral (unLabel nextPc))
-  assert (TSMT.and [nextAp, saveOldFp, setNextPc])
+  assert (Expr.and [nextAp, saveOldFp, setNextPc])
   push stackFrame
-  -- We need only a part of the call instruction's semantics for optimizing modules.
   -- Considering we have already pushed the stackFrame by now, we need to make sure that either
   -- the function is inlinable and we'll encounter a 'ret', or we need to pop right away
   -- once we encode semantics of the function.
 
-  -- Determine whether the current function matches the function being optimised exactly -
-  -- this necessitates comparing execution traces.
-  executionContext <- getStackTraceDescr
-  optimisedFExecutionContext <- getStackTraceDescr' ((^. _1) <$> optimisingF)
-  if isJust optimisingF && executionContext == optimisedFExecutionContext
-    then Just <$> getFp <* pop
-    else do
-      -- This is reachable unless the module is optimizing, in which case the precondition
-      -- of the function is necessarily the last thing being checked; as such, the semantics
-      -- of the function being invoked must not be considered.
-      canInline <- isInlinable f
-      if canInline
-        then pure Nothing -- An inlined function will have a 'ret' at some point, do not pop here.
-        else do
-          (FuncSpec pre post storage) <- getFuncSpec f <&> toFuncSpec
-          let pre' = suffixLogicalVariables lvarSuffix pre
-              post' = suffixLogicalVariables lvarSuffix post
-          preparedPre <- prepare nextPc calleeFp =<< storageRemoval pre'
-          -- Grab the state of all storage variables prior to executing the function body.
-          precedingStorage <- getStorage
-          updateStorage =<< traverseStorage (prepare nextPc calleeFp) storage
-          pop
-          -- Dereference storage variable reads with respect to `precedingStorage`.
-          preparedPost <- prepare nextPc calleeFp =<< storageRemoval' (Just precedingStorage) post'
-          assert preparedPre
-          assert preparedPost
-          pure Nothing
+  -- We need only a part of the call instruction's semantics for optimizing modules.
+  guardWith isModuleCheckingPre (Just <$> getFp <* pop) $ do
+  -- This is reachable unless the module is optimizing, in which case the precondition
+  -- of the function is necessarily the last thing being checked; as such, the semantics
+  -- of the function being invoked must not be considered.
+  guardWith (isInlinable f) (pure Nothing) $ do -- An inlined function will have a 'ret' at some point, do not pop here. 
+  (FuncSpec pre post storage) <- getFuncSpec f <&> toFuncSpec
+  let pre' = suffixLogicalVariables lvarSuffix pre
+      post' = suffixLogicalVariables lvarSuffix post
+  preparedPre <- prepare nextPc calleeFp =<< storageRemoval pre'
+  -- Grab the state of all storage variables prior to executing the function body.
+  precedingStorage <- getStorage
+  updateStorage =<< traverseStorage (prepare nextPc calleeFp) storage
+  pop
+  -- Dereference storage variable reads with respect to `precedingStorage`.
+  preparedPost <- prepare nextPc calleeFp =<< storageRemoval' (Just precedingStorage) post'
+  -- One would normally expect to see assert (pre -> post).
+  -- However, pre will be checked in a separate 'optimising' module and we can therefore simply
+  -- assert it holds here. If it does not, the corresponding pre-checking module will fail,
+  -- thus failing the judgement for the entire function.
+  assert preparedPre
+  assert preparedPost
+  pure Nothing
  where
   lvarSuffix = "+" <> tShowLabel pc
   calleePc = sf_pc f
   stackFrame = (pc, calleePc)
+  -- Determine whether the current function matches the function being optimised exactly -
+  -- this necessitates comparing execution traces.
+  isModuleCheckingPre = do
+    executionContext <- getStackTraceDescr
+    optimisedFExecutionContext <- getStackTraceDescr' ((^. _1) <$> optimisingF)
+    pure (isJust optimisingF && executionContext == optimisedFExecutionContext)
+  guardWith condM val cont = do cond <- condM; if cond then val else cont
 
 traverseStorage :: (forall a. Expr a -> CairoSemanticsL (Expr a)) -> Storage -> CairoSemanticsL Storage
 traverseStorage preparer = traverse prepareWrites