Fix discard ratio and maxSuccess interacting poorly (#371)

* Fix discard ratio and maxSuccess interacting poorly when both are quite small + fix `withMaxSuccess` not updating result on discard * factor out `computeSize` to make sure the size computation is the same regardless of if we are using `withMaxSuccess` and `withDiscardRatio` or `stdArgs{maxSuccess=...,maxDiscardRatio=...}` * make sure we run the discard ratio tests always * make sure test builds on ghc 7 --------- Co-authored-by: Ulf Norell <[email protected]>
nick8325 · Mar 21, 2024 · d88f5f6 · d88f5f6
1 parent 60a95f7
commit d88f5f6
Show file tree

Hide file tree

Showing 5 changed files with 58 additions and 31 deletions.
diff --git a/QuickCheck.cabal b/QuickCheck.cabal
@@ -264,5 +264,3 @@ Test-Suite test-quickcheck-discard
     hs-source-dirs: tests
     main-is: DiscardRatio.hs
     build-depends: base, QuickCheck
-    if !flag(templateHaskell) || !impl(ghc >= 7.10) || impl(haste)
-        buildable: False
diff --git a/src/Test/QuickCheck/Features.hs b/src/Test/QuickCheck/Features.hs
@@ -49,7 +49,7 @@ prop_noNewFeatures feats prop =
 -- >   where count x xs = length (filter (== x) xs)
 --
 -- 'labelledExamples' generates three example test cases, one for each label:
--- 
+--
 -- >>> labelledExamples prop_delete
 -- *** Found example of count x xs == 0
 -- 0
@@ -100,7 +100,7 @@ labelledExamplesWithResult args prop =
             mapM_ (putLine (terminal state)) (failingTestCase res)
             putStrLn ""
             loop (Set.union feats feats')
-              state{randomSeed = usedSeed res, computeSize = computeSize state `at0` usedSize res}
+              state{randomSeed = usedSeed res, replayStartSize = Just $ usedSize res}
           _ -> do
             out <- terminalOutput nullterm
             putStr out

diff --git a/src/Test/QuickCheck/State.hs b/src/Test/QuickCheck/State.hs
@@ -25,12 +25,14 @@ data State
   , maxDiscardedRatio         :: Int
     -- ^ maximum number of discarded tests per successful test
   , coverageConfidence        :: Maybe Confidence
-    -- ^ required coverage confidence
-  , computeSize               :: Int -> Int -> Int
     -- ^ how to compute the size of test cases from
     --   #tests and #discarded tests
   , numTotMaxShrinks          :: !Int
     -- ^ How many shrinks to try before giving up
+  , replayStartSize           :: Maybe Int
+    -- ^ Size to start at when replaying
+  , maxTestSize               :: !Int
+    -- ^ Maximum size of test
 
     -- dynamic
   , numSuccessTests           :: !Int

diff --git a/src/Test/QuickCheck/Test.hs b/src/Test/QuickCheck/Test.hs
@@ -12,6 +12,7 @@ module Test.QuickCheck.Test where
 --------------------------------------------------------------------------
 -- imports
 
+import Control.Applicative
 import Test.QuickCheck.Gen
 import Test.QuickCheck.Property hiding ( Result( reason, theException, labels, classes, tables ), (.&.) )
 import qualified Test.QuickCheck.Property as P
@@ -206,9 +207,8 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $
                  , maxSuccessTests           = maxSuccess a
                  , coverageConfidence        = Nothing
                  , maxDiscardedRatio         = maxDiscardRatio a
-                 , computeSize               = case replay a of
-                                                 Nothing    -> computeSize'
-                                                 Just (_,s) -> computeSize' `at0` s
+                 , replayStartSize           = snd <$> replay a
+                 , maxTestSize               = maxSize a
                  , numTotMaxShrinks          = maxShrinks a
                  , numSuccessTests           = 0
                  , numDiscardedTests         = 0
@@ -223,17 +223,28 @@ withState a test = (if chatty a then withStdioTerminal else withNullTerminal) $
                  , numTryShrinks             = 0
                  , numTotTryShrinks          = 0
                  }
-  where computeSize' n d
-          -- e.g. with maxSuccess = 250, maxSize = 100, goes like this:
-          -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98.
-          | n `roundTo` maxSize a + maxSize a <= maxSuccess a ||
-            n >= maxSuccess a ||
-            maxSuccess a `mod` maxSize a == 0 = (n `mod` maxSize a + d `div` 10) `min` maxSize a
-          | otherwise =
-            ((n `mod` maxSize a) * maxSize a `div` (maxSuccess a `mod` maxSize a) + d `div` 10) `min` maxSize a
-        n `roundTo` m = (n `div` m) * m
-        at0 f s 0 0 = s
-        at0 f s n d = f n d
+
+computeSize :: State -> Int
+computeSize MkState{replayStartSize = Just s,numSuccessTests = 0,numRecentlyDiscardedTests=0} = s
+computeSize MkState{maxSuccessTests = ms, maxTestSize = mts, maxDiscardedRatio = md,numSuccessTests=n,numRecentlyDiscardedTests=d}
+    -- e.g. with maxSuccess = 250, maxSize = 100, goes like this:
+    -- 0, 1, 2, ..., 99, 0, 1, 2, ..., 99, 0, 2, 4, ..., 98.
+    | n `roundTo` mts + mts <= ms ||
+      n >= ms ||
+      ms `mod` mts == 0 = (n `mod` mts + d `div` dDenom) `min` mts
+    | otherwise =
+      ((n `mod` mts) * mts `div` (ms `mod` mts) + d `div` dDenom) `min` mts
+  where
+    -- The inverse of the rate at which we increase size as a function of discarded tests
+    -- if the discard ratio is high we can afford this to be slow, but if the discard ratio
+    -- is low we risk bowing out too early
+    dDenom
+      | md > 0 = (ms * md `div` 3) `clamp` (1, 10)
+      | otherwise = 1 -- Doesn't matter because there will be no discards allowed
+    n `roundTo` m = (n `div` m) * m
+
+clamp :: Ord a => a -> (a, a) -> a
+clamp x (l, h) = max l (min x h)
 
 -- | Tests a property and prints the results and all test cases generated to 'stdout'.
 -- This is just a convenience function that means the same as @'quickCheck' . 'verbose'@.
@@ -345,7 +356,7 @@ runATest st f =
              Just confidence | (1 + numSuccessTests st) `mod` 100 == 0 && powerOfTwo ((1 + numSuccessTests st) `div` 100) ->
                addCoverageCheck confidence st f
              _ -> f
-     let size = computeSize st (numSuccessTests st) (numRecentlyDiscardedTests st)
+     let size = computeSize st
      MkRose res ts <- protectRose (reduceRose (unProp (unGen (unProperty f_or_cov) rnd1 size)))
      res <- callbackPostTest st res
 
@@ -378,6 +389,7 @@ runATest st f =
               -- Don't add coverage info from this test
               st{ numDiscardedTests         = numDiscardedTests st' + 1
                 , numRecentlyDiscardedTests = numRecentlyDiscardedTests st' + 1
+                , maxSuccessTests           = fromMaybe (maxSuccessTests st) (maybeNumTests res)
                 , maxDiscardedRatio         = fromMaybe (maxDiscardedRatio st) (maybeDiscardedRatio res)
                 , randomSeed                = rnd2
                 } f

diff --git a/tests/DiscardRatio.hs b/tests/DiscardRatio.hs
@@ -8,15 +8,18 @@ assert :: String -> Bool -> IO  ()
 assert s False = do
   putStrLn $ s ++ " failed!"
   exitFailure
-assert _ _     = pure ()
+assert _ _     = return ()
 
-quickCheckYes, quickCheckNo :: Property -> IO ()
-quickCheckYes p = do
-  res <- quickCheckResult p
+quickCheckYesWith, quickCheckNoWith :: Testable p => Args -> p -> IO ()
+quickCheckYesWith args p = do
+  res <- quickCheckWithResult args p
   unless (isSuccess res) exitFailure
-quickCheckNo p = do
-  res <- quickCheckResult p
+quickCheckNoWith args p = do
+  res <- quickCheckWithResult args p
   when (isSuccess res) exitFailure
+quickCheckYes, quickCheckNo :: Testable p => p -> IO ()
+quickCheckYes = quickCheckYesWith stdArgs
+quickCheckNo = quickCheckNoWith stdArgs
 
 check :: Result -> Int -> Int -> IO ()
 check res n d = do
@@ -25,18 +28,30 @@ check res n d = do
 
 main :: IO ()
 main = do
-  putStrLn "Testing: False ==> True"
+  putStrLn "Expecting gave up after 200 tries: False ==> True"
   res <- quickCheckResult $ withDiscardRatio 2 $ False ==> True
   check res 0 200
+  res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ False ==> True
+  check res 0 200
 
-  putStrLn "Testing: x == x"
+  putStrLn "\nExpecting success after 100 tests: x == x"
   res <- quickCheckResult $ withDiscardRatio 2 $ \ x -> (x :: Int) == x
   check res 100 0
+  res <- quickCheckWithResult stdArgs{maxDiscardRatio = 2} $ \ x -> (x :: Int) == x
+  check res 100 0
 
   -- The real ratio is 20, if 1 works or 40 doesn't it's
   -- probably because we broke something!
   let p50 = forAll (choose (1, 1000)) $ \ x -> (x :: Int) < 50 ==> True
-  putStrLn "Expecting failure (discard ratio 1): x < 50 ==> True"
+  putStrLn "\nExpecting failure (discard ratio 1): x < 50 ==> True"
   quickCheckNo $ withDiscardRatio 1 p50
-  putStrLn "Expecting success (discard ratio 40): x < 50 ==> True"
+  quickCheckNoWith stdArgs{maxDiscardRatio = 1} p50
+  putStrLn "\nExpecting success (discard ratio 40): x < 50 ==> True"
   quickCheckYes $ withDiscardRatio 40 p50
+  quickCheckYesWith stdArgs{maxDiscardRatio = 40} p50
+
+  -- This was brought to our attention by @robx in issue #338
+  let p k k' = k /= k' ==> (k :: Int) /= k'
+  putStrLn "\nExpecting success (maxSuccess = 1): k /= k' ==> k /= k'"
+  quickCheckYes $ withMaxSuccess 1 p
+  quickCheckYesWith stdArgs{maxSuccess = 1} p