Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify epoch state diff logging #5863

Merged
merged 1 commit into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion cardano-testnet/cardano-testnet.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ library
build-depends: aeson
, aeson-pretty
, ansi-terminal
, async
, bytestring
, cardano-api ^>= 8.46
, cardano-cli ^>= 8.23
Expand Down
3 changes: 2 additions & 1 deletion cardano-testnet/src/Testnet/Components/Query.hs
Original file line number Diff line number Diff line change
Expand Up @@ -248,12 +248,13 @@ getEpochStateView
:: HasCallStack
=> MonadResource m
=> MonadTest m
=> MonadCatch m
=> NodeConfigFile In -- ^ node Yaml configuration file path
-> SocketPath -- ^ node socket path
-> m EpochStateView
getEpochStateView nodeConfigFile socketPath = withFrozenCallStack $ do
epochStateView <- H.evalIO $ newIORef Nothing
runInBackground (return ()) . runExceptT . foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) Nothing
runInBackground . runExceptT . foldEpochState nodeConfigFile socketPath QuickValidation (EpochNo maxBound) Nothing
$ \epochState slotNumber blockNumber -> do
liftIO . writeIORef epochStateView $ Just (epochState, slotNumber, blockNumber)
pure ConditionNotMet
Expand Down
45 changes: 8 additions & 37 deletions cardano-testnet/src/Testnet/Property/Util.hs
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,11 @@ module Testnet.Property.Util

import Cardano.Api

import Control.Concurrent.Async
import qualified Control.Exception as E
import Control.Exception.Safe (MonadCatch)
import Control.Monad
import Control.Monad.Trans.Resource
import qualified Data.Aeson as Aeson
import GHC.Stack
import Network.Mux.Trace
import qualified System.Environment as IO
import System.Info (os)
import qualified System.IO.Unsafe as IO
Expand Down Expand Up @@ -64,43 +62,16 @@ isLinux :: Bool
isLinux = os == "linux"

-- | Runs an action in background, and registers cleanup to `MonadResource m`
-- Concurrency is tricky in the 'ResourceT' monad. See the "Concurrency" section of
-- https://www.fpcomplete.com/blog/understanding-resourcet/.
-- The argument forces IO monad to prevent leaking of `MonadResource` to the child thread
runInBackground :: MonadTest m
=> MonadResource m
=> IO ()
-> IO a
=> MonadCatch m
=> IO a
-> m ()
runInBackground runOnException act =
void . H.evalIO
$ runResourceT
-- We don't 'wait' because this "background process" may not terminate.
-- If we 'wait' and it doesn't terminate, 'ResourceT' will not kill it
-- and the test will hang indefinitely.
-- Not waiting isn't a problem because this "background process"
-- is meant to run indefinitely and will be cleaned up by
-- 'ResourceT' when the test ends or fails.
-- We use 'asyncWithUnmask' because our logging thread is terminated via an exception.
-- In order to avoid competing for a file handle we must catch the exception which signals
-- the logging file is no longer being written to and we can now run the desired additional IO action we
-- want (runOnException). Attempting to share the 'FileHandle' and use concurrency primitives was not fruitful
-- and the section "Other ways to abuse ResourceT" in https://www.fpcomplete.com/blog/understanding-resourcet/
-- confirms this is problematic in 'ResourceT'.
$ resourceForkWith (\_ -> do r <- H.asyncWithUnmask (\restore -> restore act `E.onException` runOnException)
linkOnly ignoreException r
) $ return ()
where
ignoreException :: E.SomeException -> Bool
ignoreException e =
case E.fromException e of
Just (MuxError errType _) ->
case errType of
MuxBearerClosed -> False
-- This is expected as the background thread is killed.
-- However we do want to be made aware about other
-- exceptions.
_ -> True
_ -> False
runInBackground act = void . H.evalM $ allocate (H.async act) cleanUp
where
cleanUp :: H.Async a -> IO ()
cleanUp a = H.cancel a >> H.link a

decodeEraUTxO :: (IsShelleyBasedEra era, MonadTest m) => ShelleyBasedEra era -> Aeson.Value -> m (UTxO era)
decodeEraUTxO _ = H.jsonErrorFail . Aeson.fromJSON
Expand Down
121 changes: 41 additions & 80 deletions cardano-testnet/src/Testnet/Runtime.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE DuplicateRecordFields #-}
{-# LANGUAGE ExistentialQuantification #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE GADTs #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
Expand Down Expand Up @@ -30,11 +32,8 @@ import Data.Aeson.Encode.Pretty (encodePretty)
import Data.Algorithm.Diff
import Data.Algorithm.DiffOutput
import qualified Data.ByteString.Lazy.Char8 as BSC
import Data.Function
import qualified Data.List as List
import Data.Text (Text, unpack)
import qualified Data.Text as Text
import qualified Data.Text.IO as Text
import GHC.Stack
import qualified GHC.Stack as GHC
import Network.Socket (PortNumber)
Expand Down Expand Up @@ -204,6 +203,7 @@ startLedgerNewEpochStateLogging testnetRuntime tmpWorkspace = withFrozenCallStac
let logDir = makeLogDir (TmpAbsolutePath tmpWorkspace)
-- used as a lock to start only a single instance of epoch state logging
logFile = logDir </> "ledger-epoch-state.log"
diffFile = logDir </> "ledger-epoch-state-diffs.log"

H.evalIO (IO.doesDirectoryExist logDir) >>= \case
True -> pure ()
Expand All @@ -218,97 +218,58 @@ startLedgerNewEpochStateLogging testnetRuntime tmpWorkspace = withFrozenCallStac
H.evalIO $ appendFile logFile ""
socketPath <- H.noteM $ H.sprocketSystemName <$> H.headM (poolSprockets testnetRuntime)

runInBackground
(do logFileContents <- IO.readFile logFile
let epochStateValues = epochStateBeforeAfterValues logFileContents
epochStateDiffs' = epochStateDiffs epochStateValues
Text.writeFile (logDir </> "ledger-epoch-state-diffs.log") epochStateDiffs'
)
(do void $ runExceptT $
foldEpochState
(configurationFile testnetRuntime)
(Api.File socketPath)
Api.QuickValidation
(EpochNo maxBound)
()
(\epochState _ _ ->
liftIO $ evalStateT (handler logFile epochState) 0
)
)


H.note_ $ "Started logging epoch states to: " <> logFile
_ <- runInBackground . runExceptT $
foldEpochState
(configurationFile testnetRuntime)
(Api.File socketPath)
Api.QuickValidation
(EpochNo maxBound)
Nothing
(handler logFile diffFile)

H.note_ $ "Started logging epoch states to: " <> logFile <> "\nEpoch state diffs are logged to: " <> diffFile

where
handler :: FilePath -> AnyNewEpochState -> StateT Int IO LedgerStateCondition
handler outputFpHandle (AnyNewEpochState sbe nes) = do
handleException . liftIO $ do
appendFile outputFpHandle $ "#### BLOCK ####" <> "\n"
appendFile outputFpHandle $ BSC.unpack (shelleyBasedEraConstraints sbe $ encodePretty nes) <> "\n"
pure ConditionNotMet
handler :: FilePath -- ^ log file
-> FilePath -- ^ diff file
-> AnyNewEpochState
-> SlotNo
-> BlockNo
-> StateT (Maybe AnyNewEpochState) IO LedgerStateCondition
handler outputFp diffFp anes@(AnyNewEpochState !sbe !nes) _ (BlockNo blockNo) = handleException $ do
let prettyNes = shelleyBasedEraConstraints sbe (encodePretty nes)
blockLabel = "#### BLOCK " <> show blockNo <> " ####"
liftIO . BSC.appendFile outputFp $ BSC.unlines [BSC.pack blockLabel, prettyNes, ""]

-- store epoch state for logging of differences
mPrevEpochState <- get
put (Just anes)
forM_ mPrevEpochState $ \(AnyNewEpochState sbe' pnes) -> do
let prettyPnes = shelleyBasedEraConstraints sbe' (encodePretty pnes)
difference = calculateEpochStateDiff prettyPnes prettyNes
liftIO . appendFile diffFp $ unlines [blockLabel, difference, ""]

pure ConditionNotMet
where
-- | Handle all sync exceptions and log them into the log file. We don't want to fail the test just
-- because logging has failed.
handleException = handle $ \(e :: SomeException) -> do
liftIO $ appendFile outputFpHandle $ "Ledger new epoch logging failed - caught exception:\n"
liftIO $ appendFile outputFp $ "Ledger new epoch logging failed - caught exception:\n"
<> displayException e <> "\n"
pure ConditionMet
-- TODO: Not sure why this isn't terminating. Read up on resourcet and how it works.
-- See concurrency section: https://www.fpcomplete.com/blog/understanding-resourcet/
-- Probably need to use resourceForkWith but best to not use concurrency at all!


-- | Produce tuples that represent the change of the 'NewEpochState' after
-- a transition.
epochStateBeforeAfterValues
:: String
-> [(Text, Text)]
epochStateBeforeAfterValues logFileContents =
let allEpochStates = filter (/= "") . Text.splitOn "#### BLOCK ####" $ Text.pack logFileContents
in getAllTransitions allEpochStates

getAllTransitions :: [Text] -> [(Text, Text)]
getAllTransitions ts = do
let ts' = drop 1 $ ts <> [""]
zip ts ts'

epochStateDiffs
:: [(Text,Text)]
-> Text
epochStateDiffs [] = "No epoch state values to compare"
epochStateDiffs states =
-- We first get the block number changes
labelAllEpochStateTransitions
[ Text.pack $ epochStateTransitionDiff (Text.unpack i) (Text.unpack n)
| (i, n) <- states
] & Text.intercalate "\n"
where
labelSingleEpochStateTransition :: Int -> Text
labelSingleEpochStateTransition transitionNumber =
"Epoch state transition: " <> Text.pack (show transitionNumber)

labelAllEpochStateTransitions [] = []
labelAllEpochStateTransitions trans =
go trans (1 :: Int) []
where
go [] _ acc = acc
go (x:xs) 1 _ = go xs 2 [labelSingleEpochStateTransition 1, x]
go (x:xs) n acc = go xs (n + 1) (acc ++ [labelSingleEpochStateTransition n, x])

epochStateTransitionDiff
:: String -- ^ Initial epoch state
-> String -- ^ Following epoch state

calculateEpochStateDiff
:: BSC.ByteString -- ^ Current epoch state
-> BSC.ByteString -- ^ Following epoch state
-> String
epochStateTransitionDiff initialState next =
let removeBlockNumberChangeInitial = lines initialState
removeBlockNumberChangeNext = lines next
diffResult = getGroupedDiff removeBlockNumberChangeInitial removeBlockNumberChangeNext
calculateEpochStateDiff current next =
let diffResult = getGroupedDiff (BSC.unpack <$> BSC.lines current) (BSC.unpack <$> BSC.lines next)
in if null diffResult
then "No changes in epoch state"
else ppDiff diffResult

instance (L.EraTxOut ledgerera, L.EraGov ledgerera) => ToJSON (L.NewEpochState ledgerera) where
toJSON (L.NewEpochState nesEL nesBprev nesBCur nesEs nesRu nesPd _stashedAvvm)=
toJSON (L.NewEpochState nesEL nesBprev nesBCur nesEs nesRu nesPd _stashedAvvm) =
object
[ "currentEpoch" .= nesEL
, "priorBlocks" .= nesBprev
Expand Down
Loading