Skip to content
This repository has been archived by the owner on Jun 18, 2021. It is now read-only.

Towards being able to handle crashing processes. #317

Merged
merged 3 commits into from
Jun 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ semantics bug cmd = case cmd of
where
-- One of the problems is a bug that writes a wrong value to the
-- reference.
i' | i `elem` [5..10] = if bug == Logic then i + 1 else i
| otherwise = i
i' | bug == Logic && i `elem` [5..10] = i + 1
| otherwise = i
Increment ref -> do
-- The other problem is that we introduce a possible race condition
-- when incrementing.
Expand Down
2 changes: 2 additions & 0 deletions src/Test/StateMachine.hs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ module Test.StateMachine
-- * Parallel property combinators
, forAllParallelCommands
, runParallelCommands
, runParallelCommands'
, runParallelCommandsNTimes
, runParallelCommandsNTimes'
, prettyParallelCommands

-- * Types
Expand Down
105 changes: 84 additions & 21 deletions src/Test/StateMachine/Parallel.hs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ module Test.StateMachine.Parallel
, shrinkParallelCommands
, shrinkAndValidateParallel
, runParallelCommands
, runParallelCommands'
, runParallelCommandsNTimes
, runParallelCommandsNTimes'
, executeParallelCommands
, linearise
, toBoxDrawings
Expand All @@ -35,7 +37,7 @@ module Test.StateMachine.Parallel
import Control.Arrow
((***))
import Control.Monad
(foldM, replicateM)
(replicateM)
import Control.Monad.Catch
(MonadCatch)
import Control.Monad.State.Strict
Expand All @@ -54,7 +56,8 @@ import Data.Tree
(Tree(Node))
import Prelude
import Test.QuickCheck
(Gen, Property, Testable, choose, property, sized, forAllShrinkShow)
(Gen, Property, Testable, choose, forAllShrinkShow,
property, sized)
import Test.QuickCheck.Monadic
(PropertyM, run)
import Text.PrettyPrint.ANSI.Leijen
Expand Down Expand Up @@ -306,6 +309,15 @@ runParallelCommands :: (Show (cmd Concrete), Show (resp Concrete))
-> PropertyM m [(History cmd resp, Logic)]
runParallelCommands sm = runParallelCommandsNTimes 10 sm

runParallelCommands' :: (Show (cmd Concrete), Show (resp Concrete))
=> (Rank2.Traversable cmd, Rank2.Foldable resp)
=> (MonadCatch m, MonadUnliftIO m)
=> StateMachine model cmd m resp
-> (cmd Concrete -> resp Concrete)
-> ParallelCommands cmd resp
-> PropertyM m [(History cmd resp, Logic)]
runParallelCommands' sm = runParallelCommandsNTimes' 10 sm

runParallelCommandsNTimes :: (Show (cmd Concrete), Show (resp Concrete))
=> (Rank2.Traversable cmd, Rank2.Foldable resp)
=> (MonadCatch m, MonadUnliftIO m)
Expand All @@ -315,48 +327,99 @@ runParallelCommandsNTimes :: (Show (cmd Concrete), Show (resp Concrete))
-> PropertyM m [(History cmd resp, Logic)]
runParallelCommandsNTimes n sm cmds =
replicateM n $ do
(hist, _reason) <- run (executeParallelCommands sm cmds)
(hist, _reason1, _reason2) <- run (executeParallelCommands sm cmds)
return (hist, linearise sm hist)

runParallelCommandsNTimes' :: (Show (cmd Concrete), Show (resp Concrete))
=> (Rank2.Traversable cmd, Rank2.Foldable resp)
=> (MonadCatch m, MonadUnliftIO m)
=> Int -- ^ How many times to execute the parallel program.
-> StateMachine model cmd m resp
-> (cmd Concrete -> resp Concrete)
-> ParallelCommands cmd resp
-> PropertyM m [(History cmd resp, Logic)]
runParallelCommandsNTimes' n sm complete cmds =
replicateM n $ do
(hist, _reason1, _reason2) <- run (executeParallelCommands sm cmds)
let hist' = completeHistory complete hist
return (hist', linearise sm hist')

executeParallelCommands :: (Show (cmd Concrete), Show (resp Concrete))
=> (Rank2.Traversable cmd, Rank2.Foldable resp)
=> (MonadCatch m, MonadUnliftIO m)
=> StateMachine model cmd m resp
-> ParallelCommands cmd resp
-> m (History cmd resp, Reason)
-> m (History cmd resp, Reason, Reason)
executeParallelCommands sm@StateMachine{ initModel } (ParallelCommands prefix suffixes) = do

hchan <- newTChanIO

(reason0, (env0, _smodel, _counter, _cmodel)) <- runStateT
(executeCommands sm hchan (Pid 0) True prefix)
(executeCommands sm hchan (Pid 0) CheckEverything prefix)
(emptyEnvironment, initModel, newCounter, initModel)

if reason0 /= Ok
then do
hist <- getChanContents hchan
return (History hist, reason0)
return (History hist, reason0, reason0)
else do
(reason, _) <- foldM (go hchan) (reason0, env0) suffixes
(reason1, reason2, _) <- go hchan (Ok, Ok, env0) suffixes
hist <- getChanContents hchan
return (History hist, reason)
return (History hist, reason1, reason2)
where
go hchan (_, env) (Pair cmds1 cmds2) = do
go _hchan (res1, res2, env) [] = return (res1, res2, env)
go hchan (Ok, Ok, env) (Pair cmds1 cmds2 : pairs) = do

((reason1, (env1, _, _, _)), (reason2, (env2, _, _, _))) <- concurrently

-- XXX: Post-conditions not checked, so we can pass in initModel here...
-- It would be better if we made executeCommands take a Maybe model
-- instead of the boolean...

(runStateT (executeCommands sm hchan (Pid 1) False cmds1) (env, initModel, newCounter, initModel))
(runStateT (executeCommands sm hchan (Pid 2) False cmds2) (env, initModel, newCounter, initModel))
return ( reason1 `combineReason` reason2
, env1 <> env2
)
where
combineReason :: Reason -> Reason -> Reason
combineReason Ok r2 = r2
combineReason r1 _ = r1
-- It would be better if we made executeCommands take a Maybe Environment
-- instead of the Check...

(runStateT (executeCommands sm hchan (Pid 1) CheckNothing cmds1) (env, initModel, newCounter, initModel))
(runStateT (executeCommands sm hchan (Pid 2) CheckNothing cmds2) (env, initModel, newCounter, initModel))
go hchan ( reason1
, reason2
, env1 <> env2
) pairs
go hchan (Ok, ExceptionThrown, env) (Pair cmds1 _cmds2 : pairs) = do
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@stevana is there any reason to continue execution if there is an ExceptionThrown?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sequential execution also stops if there is an exception.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we should stop execution for a specific process (or Pid) if there's an ExceptionThrown on that process (but continue executing on the other processes). I believe that's what I've implemented here.

This is also why we need your genernalisation to allow execution on more than two processes, so that we can introduce faults (exceptions) without stopping all execution altogether.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, I guess it depends on how we want to simulate user usage of the api and since this gives more user option I think it's on the right direction. @stevana do you think this is close to being merged? I see some errors like executeCommands: impossible and some false positives tests, which I believe will be solved with this pr.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Another important thing that this PR adds is the ability to complete a history which contains crashed processes. We do so by appending a user specified response to the end of the history. For example in the case of the memory reference example, if a Write fails then we can have a history like this:

Process 1: |-- x <- Create --|                           |---- Read x ---|
Process 2:                       |---- Write 5 x  --...        

In this case Read can return both 0 (the default value of create) or 5 depending on if Write crashed before or after writing.

To account of this we complete the above history as follows:

Process 1: |-- x <- Create --|                           |---- Read x ---|
Process 2:                       |---- Write 5 x  -------------------------|    

Notice that the Write is concurrent with the Read. Now because of the way linearise works (it tries all possible sequential interleavings of the action calls) it will accept Read returning both 0 and 5.

To complete for Write is easy as it's just an Ack, and it doesn't really matter what we complete Read with as it doesn't change the model. If a Create crashes, we are kind of screwed that's why in this PR I also allowed the pre-condition failed exception to be thrown. No doubt will there be examples for which complete will be harder...

A possible alternative would be to have the user catch all exceptions (due to fault injection) and account of the non-determinism, e.g. have not just a single value for each memory reference, but a set of values. This complicates the model and is a lot more work for the user though.

Yet another possibility might be to have the fault injection be more precise, so that we know if the Write failed before or after it wrote to the memory. That way we know if it should return 0 or 5. I'm not sure if having this precision is always possible. It also complicates the model because we need to keep track of exactly what faults are injected and return the correct response in the presence of the fault.

Does this make sense? If not, let me know where I lost you and I can try to explain further.

It's my current understanding of the Linearizability paper and what Jepsen does. It took me a while to get here, and I'm still not certain if I understand things correctly. So I think it would be great if: 1) I could convince you that this approach makes sense, and 2) that we develop more examples that confirm that it indeed works.


-- XXX: It's possible that pre-conditions fail at this point, because
-- commands may depend on references that never got created in the crashed
-- process. For example, consider:
--
-- x <- Create
-- ------------+----------
-- Write 1 x | Write 2 x
-- y <- Create |
-- ------------+----------
-- Write 3 x | Write 4 y
-- | Read x
--
-- If the @Write 1 x@ fails, @y@ will never be created and the
-- pre-condition for @Write 4 y@ will fail. This also means that @Read x@
-- will never get executed, and so there could be a bug in @Write@ that
-- never gets discovered. Not sure if we can do something better here?
--
(reason1, (env1, _, _, _)) <- runStateT (executeCommands sm hchan (Pid 1) CheckPrecondition cmds1)
(env, initModel, newCounter, initModel)
go hchan ( reason1
, ExceptionThrown
, env1
) pairs
go hchan (ExceptionThrown, Ok, env) (Pair _cmds1 cmds2 : pairs) = do

(reason2, (env2, _, _, _)) <- runStateT (executeCommands sm hchan (Pid 2) CheckPrecondition cmds2)
(env, initModel, newCounter, initModel)
go hchan ( ExceptionThrown
, reason2
, env2
) pairs
go _hchan out@(ExceptionThrown, ExceptionThrown, _env) (_ : _) = return out
go _hchan out@(PreconditionFailed {}, ExceptionThrown, _env) (_ : _) = return out
go _hchan out@(ExceptionThrown, PreconditionFailed {}, _env) (_ : _) = return out
go _hchan (res1, res2, _env) (Pair _cmds1 _cmds2 : _pairs) =
error ("executeParallelCommands, unexpected result: " ++ show (res1, res2))

------------------------------------------------------------------------

Expand Down
38 changes: 23 additions & 15 deletions src/Test/StateMachine/Sequential.hs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ module Test.StateMachine.Sequential
, initValidateEnv
, runCommands
, getChanContents
, Check(..)
, executeCommands
, prettyPrintHistory
, prettyPrintHistory'
Expand Down Expand Up @@ -327,7 +328,7 @@ runCommands sm@StateMachine { initModel } = run . go
go cmds = do
hchan <- newTChanIO
(reason, (_, _, _, model)) <- runStateT
(executeCommands sm hchan (Pid 0) True cmds)
(executeCommands sm hchan (Pid 0) CheckEverything cmds)
(emptyEnvironment, initModel, newCounter, initModel)
hist <- getChanContents hchan
return (History hist, model, reason)
Expand All @@ -341,13 +342,18 @@ getChanContents chan = reverse <$> atomically (go' [])
Just x -> go' (x : acc)
Nothing -> return acc

data Check
= CheckPrecondition
| CheckEverything
| CheckNothing

executeCommands :: (Show (cmd Concrete), Show (resp Concrete))
=> (Rank2.Traversable cmd, Rank2.Foldable resp)
=> (MonadCatch m, MonadIO m)
=> StateMachine model cmd m resp
-> TChan (Pid, HistoryEvent cmd resp)
-> Pid
-> Bool -- ^ Check invariant and post-condition?
-> Check
-> Commands cmd resp
-> StateT (Environment, model Symbolic, Counter, model Concrete) m Reason
executeCommands StateMachine {..} hchan pid check =
Expand All @@ -357,8 +363,9 @@ executeCommands StateMachine {..} hchan pid check =
go (Command scmd _ vars : cmds) = do
(env, smodel, counter, cmodel) <- get
case (check, logic (precondition smodel scmd)) of
(True, VFalse ce) -> return (PreconditionFailed (show ce))
_ -> do
(CheckPrecondition, VFalse ce) -> return (PreconditionFailed (show ce))
(CheckEverything, VFalse ce) -> return (PreconditionFailed (show ce))
_otherwise -> do
let ccmd = fromRight (error "executeCommands: impossible") (reify env scmd)
atomically (writeTChan hchan (pid, Invocation ccmd (S.fromList vars)))
!ecresp <- lift $ (fmap Right (semantics ccmd)) `catch`
Expand All @@ -379,17 +386,18 @@ executeCommands StateMachine {..} hchan pid check =
else do
atomically (writeTChan hchan (pid, Response cresp))
case (check, logic (postcondition cmodel ccmd cresp)) of
(True, VFalse ce) -> return (PostconditionFailed (show ce))
_ -> case (check, logic (fromMaybe (const Top) invariant cmodel)) of
(True, VFalse ce') -> return (InvariantBroken (show ce'))
_ -> do
let (sresp, counter') = runGenSym (mock smodel scmd) counter
put ( insertConcretes vars cvars env
, transition smodel scmd sresp
, counter'
, transition cmodel ccmd cresp
)
go cmds
(CheckEverything, VFalse ce) -> return (PostconditionFailed (show ce))
_otherwise ->
case (check, logic (fromMaybe (const Top) invariant cmodel)) of
(CheckEverything, VFalse ce') -> return (InvariantBroken (show ce'))
_otherwise -> do
let (sresp, counter') = runGenSym (mock smodel scmd) counter
put ( insertConcretes vars cvars env
, transition smodel scmd sresp
, counter'
, transition cmodel ccmd cresp
)
go cmds

isSomeAsyncException :: SomeException -> Bool
isSomeAsyncException se = case fromException se of
Expand Down
35 changes: 33 additions & 2 deletions src/Test/StateMachine/Types/History.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE StandaloneDeriving #-}

-----------------------------------------------------------------------------
Expand All @@ -23,9 +24,12 @@ module Test.StateMachine.Types.History
, Operation(..)
, makeOperations
, interleavings
, completeHistory
)
where

import Data.List
((\\))
import Data.Set
(Set)
import Data.Tree
Expand Down Expand Up @@ -86,6 +90,8 @@ data Operation cmd resp
deriving instance (Show (cmd Concrete), Show (resp Concrete)) =>
Show (Operation cmd resp)

-- | Given a sequential history, group invocation and response events into
-- operations.
makeOperations :: History' cmd resp -> [Operation cmd resp]
makeOperations [] = []
makeOperations [(pid1, Invocation cmd _), (pid2, Exception err)]
Expand All @@ -96,9 +102,9 @@ makeOperations ((pid1, Invocation cmd _) : (pid2, Response resp) : hist)
| otherwise = error "makeOperations: impossible, pid mismatch."
makeOperations _ = error "makeOperations: impossible."

-- | Given a history, return all possible interleavings of invocations
-- | Given a parallel history, return all possible interleavings of invocations
-- and corresponding response events.
interleavings :: [(Pid, HistoryEvent cmd resp)] -> Forest (Operation cmd resp)
interleavings :: History' cmd resp -> Forest (Operation cmd resp)
interleavings [] = []
interleavings es =
[ Node (Operation cmd resp pid) (interleavings es')
Expand All @@ -113,3 +119,28 @@ interleavings es =
filter1 _ [] = []
filter1 p (x : xs) | p x = x : filter1 p xs
| otherwise = xs

------------------------------------------------------------------------

crashingInvocations :: History' cmd resp -> History' cmd resp
crashingInvocations = go [] [] . reverse
where
go :: [Pid] -> History' cmd resp -> History' cmd resp -> History' cmd resp
go _crashedPids crashedInvs [] = reverse crashedInvs
go crashedPids crashedInvs ((pid, Exception _err) : es)
| pid `elem` crashedPids = error "impossible, a process cannot crash more than once."
| otherwise = go (pid : crashedPids) crashedInvs es
go crashedPids crashedInvs (e@(pid, Invocation {}) : es)
| pid `elem` crashedPids = go (crashedPids \\ [pid]) (e : crashedInvs) es
| otherwise = go crashedPids crashedInvs es
go crashedPids crashedInvs ((_pid, Response {}) : es) =
go crashedPids crashedInvs es

completeHistory :: (cmd Concrete -> resp Concrete) -> History cmd resp
-> History cmd resp
completeHistory complete hist = History (hist' ++ resps)
where
hist' = unHistory hist
resps = [ (pid, Response (complete cmd))
| (pid, Invocation cmd _vars) <- crashingInvocations hist'
]
Loading