From d88f5f6100ff4a63b664e6aa26372c983e624d50 Mon Sep 17 00:00:00 2001 From: Maximilian Algehed Date: Thu, 21 Mar 2024 09:54:51 +0100 Subject: [PATCH] Fix discard ratio and maxSuccess interacting poorly (#371) * Fix discard ratio and maxSuccess interacting poorly when both are quite small + fix `withMaxSuccess` not updating result on discard * factor out `computeSize` to make sure the size computation is the same regardless of if we are using `withMaxSuccess` and `withDiscardRatio` or `stdArgs{maxSuccess=...,maxDiscardRatio=...}` * make sure we run the discard ratio tests always * make sure test builds on ghc 7 --------- Co-authored-by: Ulf Norell --- QuickCheck.cabal | 2 -- src/Test/QuickCheck/Features.hs | 4 ++-- src/Test/QuickCheck/State.hs | 6 +++-- src/Test/QuickCheck/Test.hs | 42 +++++++++++++++++++++------------ tests/DiscardRatio.hs | 35 +++++++++++++++++++-------- 5 files changed, 58 insertions(+), 31 deletions(-) diff --git a/QuickCheck.cabal b/QuickCheck.cabal index 7244e604..c08a8c1f 100644 --- a/QuickCheck.cabal +++ b/QuickCheck.cabal @@ -264,5 +264,3 @@ Test-Suite test-quickcheck-discard hs-source-dirs: tests main-is: DiscardRatio.hs build-depends: base, QuickCheck - if !flag(templateHaskell) || !impl(ghc >= 7.10) || impl(haste) - buildable: False diff --git a/src/Test/QuickCheck/Features.hs b/src/Test/QuickCheck/Features.hs index 9db90c8b..6fd6be84 100644 --- a/src/Test/QuickCheck/Features.hs +++ b/src/Test/QuickCheck/Features.hs @@ -49,7 +49,7 @@ prop_noNewFeatures feats prop = -- > where count x xs = length (filter (== x) xs) -- -- 'labelledExamples' generates three example test cases, one for each label: --- +-- -- >>> labelledExamples prop_delete -- *** Found example of count x xs == 0 -- 0 @@ -100,7 +100,7 @@ labelledExamplesWithResult args prop = mapM_ (putLine (terminal state)) (failingTestCase res) putStrLn "" loop (Set.union feats feats') - state{randomSeed = usedSeed res, computeSize = computeSize state `at0` usedSize res} + state{randomSeed = usedSeed res, replayStartSize = Just $ usedSize res} _ -> do out <- terminalOutput nullterm putStr out diff --git a/src/Test/QuickCheck/State.hs b/src/Test/QuickCheck/State.hs index 7a8df8ea..394d2b57 100644 --- a/src/Test/QuickCheck/State.hs +++ b/src/Test/QuickCheck/State.hs @@ -25,12 +25,14 @@ data State , maxDiscardedRatio :: Int -- ^ maximum number of discarded tests per successful test , coverageConfidence :: Maybe Confidence - -- ^ required coverage confidence - , computeSize :: Int -> Int -> Int -- ^ how to compute the size of test cases from -- #tests and #discarded tests , numTotMaxShrinks :: !Int -- ^ How many shrinks to try before giving up + , replayStartSize :: Maybe Int + -- ^ Size to start at when replaying + , maxTestSize :: !Int + -- ^ Maximum size of test -- dynamic , numSuccessTests :: !Int diff --git a/src/Test/QuickCheck/Test.hs b/src/Test/QuickCheck/Test.hs index bb436f15..1fbca6d3 100644 --- a/src/Test/QuickCheck/Test.hs +++ b/src/Test/QuickCheck/Test.hs @@ -12,6 +12,7 @@ module Test.QuickCheck.Test where -------------------------------------------------------------------------- -- imports +import Control.Applicative import Test.QuickCheck.Gen import Test.QuickCheck.Property hiding ( Result( reason, theException, labels, classes, tables ), (.&.) ) import qualified Test.QuickCheck.Property as P @@ -206,9 +207,8 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $ , maxSuccessTests = maxSuccess a , coverageConfidence = Nothing , maxDiscardedRatio = maxDiscardRatio a - , computeSize = case replay a of - Nothing -> computeSize' - Just (_,s) -> computeSize' `at0` s + , replayStartSize = snd <$> replay a + , maxTestSize = maxSize a , numTotMaxShrinks = maxShrinks a , numSuccessTests = 0 , numDiscardedTests = 0 @@ -223,17 +223,28 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $ , numTryShrinks = 0 , numTotTryShrinks = 0 } - where computeSize' n d - -- e.g. with maxSuccess = 250, maxSize = 100, goes like this: - -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98. - | n `roundTo` maxSize a + maxSize a <= maxSuccess a || - n >= maxSuccess a || - maxSuccess a `mod` maxSize a == 0 = (n `mod` maxSize a + d `div` 10) `min` maxSize a - | otherwise = - ((n `mod` maxSize a) * maxSize a `div` (maxSuccess a `mod` maxSize a) + d `div` 10) `min` maxSize a - n `roundTo` m = (n `div` m) * m - at0 f s 0 0 = s - at0 f s n d = f n d + +computeSize :: State -> Int +computeSize MkState{replayStartSize = Just s,numSuccessTests = 0,numRecentlyDiscardedTests=0} = s +computeSize MkState{maxSuccessTests = ms, maxTestSize = mts, maxDiscardedRatio = md,numSuccessTests=n,numRecentlyDiscardedTests=d} + -- e.g. with maxSuccess = 250, maxSize = 100, goes like this: + -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98. + | n `roundTo` mts + mts <= ms || + n >= ms || + ms `mod` mts == 0 = (n `mod` mts + d `div` dDenom) `min` mts + | otherwise = + ((n `mod` mts) * mts `div` (ms `mod` mts) + d `div` dDenom) `min` mts + where + -- The inverse of the rate at which we increase size as a function of discarded tests + -- if the discard ratio is high we can afford this to be slow, but if the discard ratio + -- is low we risk bowing out too early + dDenom + | md > 0 = (ms * md `div` 3) `clamp` (1, 10) + | otherwise = 1 -- Doesn't matter because there will be no discards allowed + n `roundTo` m = (n `div` m) * m + +clamp :: Ord a => a -> (a, a) -> a +clamp x (l, h) = max l (min x h) -- | Tests a property and prints the results and all test cases generated to 'stdout'. -- This is just a convenience function that means the same as @'quickCheck' . 'verbose'@. @@ -345,7 +356,7 @@ runATest st f = Just confidence | (1 + numSuccessTests st) `mod` 100 == 0 && powerOfTwo ((1 + numSuccessTests st) `div` 100) -> addCoverageCheck confidence st f _ -> f - let size = computeSize st (numSuccessTests st) (numRecentlyDiscardedTests st) + let size = computeSize st MkRose res ts <- protectRose (reduceRose (unProp (unGen (unProperty f_or_cov) rnd1 size))) res <- callbackPostTest st res @@ -378,6 +389,7 @@ runATest st f = -- Don't add coverage info from this test st{ numDiscardedTests = numDiscardedTests st' + 1 , numRecentlyDiscardedTests = numRecentlyDiscardedTests st' + 1 + , maxSuccessTests = fromMaybe (maxSuccessTests st) (maybeNumTests res) , maxDiscardedRatio = fromMaybe (maxDiscardedRatio st) (maybeDiscardedRatio res) , randomSeed = rnd2 } f diff --git a/tests/DiscardRatio.hs b/tests/DiscardRatio.hs index e800d4bf..5d2f8280 100644 --- a/tests/DiscardRatio.hs +++ b/tests/DiscardRatio.hs @@ -8,15 +8,18 @@ assert :: String -> Bool -> IO () assert s False = do putStrLn $ s ++ " failed!" exitFailure -assert _ _ = pure () +assert _ _ = return () -quickCheckYes, quickCheckNo :: Property -> IO () -quickCheckYes p = do - res <- quickCheckResult p +quickCheckYesWith, quickCheckNoWith :: Testable p => Args -> p -> IO () +quickCheckYesWith args p = do + res <- quickCheckWithResult args p unless (isSuccess res) exitFailure -quickCheckNo p = do - res <- quickCheckResult p +quickCheckNoWith args p = do + res <- quickCheckWithResult args p when (isSuccess res) exitFailure +quickCheckYes, quickCheckNo :: Testable p => p -> IO () +quickCheckYes = quickCheckYesWith stdArgs +quickCheckNo = quickCheckNoWith stdArgs check :: Result -> Int -> Int -> IO () check res n d = do @@ -25,18 +28,30 @@ check res n d = do main :: IO () main = do - putStrLn "Testing: False ==> True" + putStrLn "Expecting gave up after 200 tries: False ==> True" res <- quickCheckResult $ withDiscardRatio 2 $ False ==> True check res 0 200 + res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ False ==> True + check res 0 200 - putStrLn "Testing: x == x" + putStrLn "\nExpecting success after 100 tests: x == x" res <- quickCheckResult $ withDiscardRatio 2 $ \ x -> (x :: Int) == x check res 100 0 + res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ \ x -> (x :: Int) == x + check res 100 0 -- The real ratio is 20, if 1 works or 40 doesn't it's -- probably because we broke something! let p50 = forAll (choose (1, 1000)) $ \ x -> (x :: Int) < 50 ==> True - putStrLn "Expecting failure (discard ratio 1): x < 50 ==> True" + putStrLn "\nExpecting failure (discard ratio 1): x < 50 ==> True" quickCheckNo $ withDiscardRatio 1 p50 - putStrLn "Expecting success (discard ratio 40): x < 50 ==> True" + quickCheckNoWith stdArgs{maxDiscardRatio = 1} p50 + putStrLn "\nExpecting success (discard ratio 40): x < 50 ==> True" quickCheckYes $ withDiscardRatio 40 p50 + quickCheckYesWith stdArgs{maxDiscardRatio = 40} p50 + + -- This was brought to our attention by @robx in issue #338 + let p k k' = k /= k' ==> (k :: Int) /= k' + putStrLn "\nExpecting success (maxSuccess = 1): k /= k' ==> k /= k'" + quickCheckYes $ withMaxSuccess 1 p + quickCheckYesWith stdArgs{maxSuccess = 1} p