Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add the possibility to compute statistics #369

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions .github/workflows/common.yml
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,6 @@ jobs:

runs-on: ${{ inputs.runs_on }}

timeout-minutes: ${{ inputs.timeout }}

outputs:
skippart2: ${{ steps.winonlyone.outputs.skippart2 }}

Expand Down Expand Up @@ -305,10 +303,14 @@ jobs:
if: env.ONLY_TEST == ''

- name: Run the multicore test suite (Linux / macOS)
env:
QCHECK_STATISTICS_FILE: 'stats'
run: opam exec -- dune runtest -j1 --no-buffer --display=quiet --cache=disabled --error-reporting=twice $SUBSUITE
if: "runner.os != 'Windows' && env.ONLY_TEST == ''"

- name: Run the multicore test suite (Windows / Cygwin)
env:
QCHECK_STATISTICS_FILE: 'stats'
run: opam exec -- dune runtest -j1 --no-buffer --display=quiet --cache=disabled --error-reporting=twice @(-Split $Env:SUBSUITE)
if: "runner.os == 'Windows' && env.ONLY_TEST == ''"

Expand Down
12 changes: 6 additions & 6 deletions lib/STM_domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ module Make (Spec: Spec) = struct
let obs1 = match obs1 with Ok v -> v | Error exn -> raise exn in
let obs2 = match obs2 with Ok v -> v | Error exn -> raise exn in
check_obs pref_obs obs1 obs2 Spec.init_state
|| Test.fail_reportf " Results incompatible with linearized model\n\n%s"
|| Util.fail_reportf " Results incompatible with linearized model\n\n%s"
@@ print_triple_vertical ~fig_indent:5 ~res_width:35
(fun (c,r) -> Printf.sprintf "%s : %s" (Spec.show_cmd c) (show_res r))
(pref_obs,obs1,obs2)
Expand All @@ -57,7 +57,7 @@ module Make (Spec: Spec) = struct
let parent_obs = match parent_obs with Ok v -> v | Error exn -> raise exn in
let child_obs = match child_obs with Ok v -> v | Error exn -> raise exn in
check_obs pref_obs parent_obs child_obs Spec.init_state
|| Test.fail_reportf " Results incompatible with linearized model:\n\n%s"
|| Util.fail_reportf " Results incompatible with linearized model:\n\n%s"
@@ print_triple_vertical ~fig_indent:5 ~res_width:35 ~center_prefix:false
(fun (c,r) -> Printf.sprintf "%s : %s" (Spec.show_cmd c) (show_res r))
(pref_obs,parent_obs,child_obs)
Expand All @@ -66,7 +66,7 @@ module Make (Spec: Spec) = struct
let rep_count = 25 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make ~retries:10 ~max_gen ~count ~name
Util.make_test ~retries:10 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun triple ->
assume (all_interleavings_ok triple);
Expand All @@ -76,7 +76,7 @@ module Make (Spec: Spec) = struct
let rep_count = 25 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make_neg ~retries:10 ~max_gen ~count ~name
Util.make_neg_test ~retries:10 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun triple ->
assume (all_interleavings_ok triple);
Expand All @@ -86,7 +86,7 @@ module Make (Spec: Spec) = struct
let rep_count = 25 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make ~retries:10 ~max_gen ~count ~name
Util.make_test ~retries:10 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun triple ->
assume (all_interleavings_ok triple);
Expand All @@ -96,7 +96,7 @@ module Make (Spec: Spec) = struct
let rep_count = 25 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make_neg ~retries:10 ~max_gen ~count ~name
Util.make_neg_test ~retries:10 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun triple ->
assume (all_interleavings_ok triple);
Expand Down
6 changes: 3 additions & 3 deletions lib/STM_sequential.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,13 @@ module Make (Spec: Spec) = struct
match res with
| None -> true
| Some trace ->
Test.fail_reportf " Results incompatible with model\n%s"
Util.fail_reportf " Results incompatible with model\n%s"
@@ print_seq_trace trace

let agree_test ~count ~name =
Test.make ~name ~count (arb_cmds Spec.init_state) agree_prop
Util.make_test ~name ~count (arb_cmds Spec.init_state) (Util.repeat 1 agree_prop)

let neg_agree_test ~count ~name =
Test.make_neg ~name ~count (arb_cmds Spec.init_state) agree_prop
Util.make_neg_test ~name ~count (arb_cmds Spec.init_state) (Util.repeat 1 agree_prop)

end
6 changes: 3 additions & 3 deletions lib/STM_thread.ml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ module Make (Spec: Spec) = struct
let obs1 = match !obs1 with Ok v -> v | Error exn -> raise exn in
let obs2 = match !obs2 with Ok v -> v | Error exn -> raise exn in
check_obs pref_obs obs1 obs2 Spec.init_state
|| Test.fail_reportf " Results incompatible with linearized model\n\n%s"
|| Util.fail_reportf " Results incompatible with linearized model\n\n%s"
@@ print_triple_vertical ~fig_indent:5 ~res_width:35
(fun (c,r) -> Printf.sprintf "%s : %s" (Spec.show_cmd c) (show_res r))
(pref_obs,obs1,obs2)
Expand All @@ -43,7 +43,7 @@ module Make (Spec: Spec) = struct
let rep_count = 100 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make ~retries:15 ~max_gen ~count ~name
Util.make_test ~retries:15 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun ((seq_pref,cmds1,cmds2) as triple) ->
assume (all_interleavings_ok seq_pref cmds1 cmds2 Spec.init_state);
Expand All @@ -53,7 +53,7 @@ module Make (Spec: Spec) = struct
let rep_count = 100 in
let seq_len,par_len = 20,12 in
let max_gen = 3*count in (* precond filtering may require extra generation: max. 3*count though *)
Test.make_neg ~retries:15 ~max_gen ~count ~name
Util.make_neg_test ~retries:15 ~max_gen ~count ~name
(arb_cmds_triple seq_len par_len)
(fun ((seq_pref,cmds1,cmds2) as triple) ->
assume (all_interleavings_ok seq_pref cmds1 cmds2 Spec.init_state);
Expand Down
4 changes: 2 additions & 2 deletions lib/lin.ml
Original file line number Diff line number Diff line change
Expand Up @@ -121,13 +121,13 @@ struct
(* Linearization test *)
let lin_test ~rep_count ~retries ~count ~name ~lin_prop =
let arb_cmd_triple = arb_cmds_triple 20 12 in
Test.make ~count ~retries ~name
Util.make_test ~count ~retries ~name
arb_cmd_triple (repeat rep_count lin_prop)

(* Negative linearization test *)
let neg_lin_test ~rep_count ~retries ~count ~name ~lin_prop =
let arb_cmd_triple = arb_cmds_triple 20 12 in
Test.make_neg ~count ~retries ~name
Util.make_neg_test ~count ~retries ~name
arb_cmd_triple (repeat rep_count lin_prop)
end
end
Expand Down
2 changes: 1 addition & 1 deletion lib/lin_domain.ml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module Make_internal (Spec : Internal.CmdSpec [@alert "-internal"]) = struct
let obs2 = match obs2 with Ok v -> v | Error exn -> raise exn in
let seq_sut = Spec.init () in
check_seq_cons pref_obs obs1 obs2 seq_sut []
|| QCheck.Test.fail_reportf " Results incompatible with sequential execution\n\n%s"
|| Util.fail_reportf " Results incompatible with sequential execution\n\n%s"
@@ Util.print_triple_vertical ~fig_indent:5 ~res_width:35
(fun (c,r) -> Printf.sprintf "%s : %s" (Spec.show_cmd c) (Spec.show_res r))
(pref_obs,obs1,obs2)
Expand Down
6 changes: 3 additions & 3 deletions lib/lin_effect.ml
Original file line number Diff line number Diff line change
Expand Up @@ -107,21 +107,21 @@ module Make_internal (Spec : Internal.CmdSpec [@alert "-internal"]) = struct
let seq_sut = Spec.init () in
(* exclude [Yield]s from sequential executions when searching for an interleaving *)
EffTest.check_seq_cons (filter_res pref_obs) (filter_res !obs1) (filter_res !obs2) seq_sut []
|| QCheck.Test.fail_reportf " Results incompatible with linearized model\n\n%s"
|| Util.fail_reportf " Results incompatible with linearized model\n\n%s"
@@ Util.print_triple_vertical ~fig_indent:5 ~res_width:35
(fun (c,r) -> Printf.sprintf "%s : %s" (EffSpec.show_cmd c) (EffSpec.show_res r))
(pref_obs,!obs1,!obs2)

let lin_test ~count ~name =
let arb_cmd_triple = EffTest.arb_cmds_triple 20 12 in
let rep_count = 1 in
QCheck.Test.make ~count ~retries:10 ~name
Util.make_test ~count ~retries:10 ~name
arb_cmd_triple (Util.repeat rep_count lin_prop)

let neg_lin_test ~count ~name =
let arb_cmd_triple = EffTest.arb_cmds_triple 20 12 in
let rep_count = 1 in
QCheck.Test.make_neg ~count ~retries:10 ~name
Util.make_neg_test ~count ~retries:10 ~name
arb_cmd_triple (Util.repeat rep_count lin_prop)
end

Expand Down
119 changes: 113 additions & 6 deletions lib/util.ml
Original file line number Diff line number Diff line change
@@ -1,9 +1,3 @@
let rec repeat n prop = fun input ->
if n<0 then failwith "repeat: negative repetition count";
if n=0
then true
else prop input && repeat (n-1) prop input

exception Timeout

let prop_timeout sec p x =
Expand Down Expand Up @@ -170,3 +164,116 @@ module Equal = struct
| _ -> false
let equal_array eq x y = equal_seq eq (Array.to_seq x) (Array.to_seq y)
end

module Stats = struct
let env_var = "QCHECK_STATISTICS_FILE"

let enabled =
match Sys.getenv_opt env_var with None | Some "" -> false | _ -> true

let out_channel =
match Sys.getenv_opt env_var with
| None | Some "" -> None
| Some "-" -> Some stdout
| Some path ->
Some (open_out_gen [ Open_wronly; Open_append; Open_creat ] 0o644 path)

type t = {
mutable iterations : int;
mutable failures : int;
mutable exceptions : int;
}

let current = { iterations = 0; failures = 0; exceptions = 0 }

let reset () =
current.iterations <- 0;
current.failures <- 0;
current.exceptions <- 0

let incr_iterations () = current.iterations <- current.iterations + 1
let incr_failures () = current.failures <- current.failures + 1
let incr_exceptions () = current.exceptions <- current.exceptions + 1

let record verbose (QCheck2.Test.Test cell) =
let open QCheck2.Test in
let name = get_name cell in
let { iterations; failures; exceptions } = current in
Option.fold ~none:()
~some:(fun o ->
Printf.fprintf o "%d %d %d %s\n%!" iterations failures exceptions name)
out_channel;
if verbose then
Printf.printf "Stats for %s: %diters %dfails %dexns\n%!" name iterations
failures exceptions
end

let repeat n prop =
let rec normal_repeat n input =
if n = 0 then true else prop input && normal_repeat (n - 1) input
and stats_repeat n input =
(* In Stats mode, we always run all the iterations, but we
count failures and exceptions on the way *)
if n = 0 then true
else (
Stats.incr_iterations ();
try
if not (prop input) then Stats.incr_failures ();
stats_repeat (n - 1) input
with _ ->
Stats.incr_exceptions ();
false)
in
if n < 0 then failwith "repeat: negative repetition count"
else if Stats.enabled then stats_repeat n
else normal_repeat n

let fail_reportf m =
if Stats.enabled then Format.ikfprintf (Fun.const false) Format.err_formatter m
else QCheck.Test.fail_reportf m

let make_test ?if_assumptions_fail ?count ?long_factor ?max_gen ?max_fail ?small
?retries ?name arb law =
QCheck.Test.make ?if_assumptions_fail ?count ?long_factor ?max_gen ?max_fail
?small ?retries ?name arb law

let make_neg_test ?if_assumptions_fail ?count ?long_factor ?max_gen ?max_fail
?small ?retries ?name arb law =
if Stats.enabled then
(* Note that, even negative tests are run as QCheck positive
tests, since we are hijacking the failure reports and counting
them separately *)
QCheck.Test.make ?if_assumptions_fail ?count ?long_factor ?max_gen ?max_fail
?small ?retries ?name arb law
else
QCheck.Test.make_neg ?if_assumptions_fail ?count ?long_factor ?max_gen
?max_fail ?small ?retries ?name arb law

let run_tests_main ?(argv = Sys.argv) l =
let cli_args =
try QCheck_base_runner.Raw.parse_cli ~full_options:false argv with
| Arg.Bad msg ->
print_endline msg;
exit 1
| Arg.Help msg ->
print_endline msg;
exit 0
in
let run_tests l =
QCheck_base_runner.run_tests l ~colors:cli_args.cli_colors
~verbose:cli_args.cli_verbose
~long:(cli_args.cli_long_tests || Stats.enabled)
~out:stdout ~rand:cli_args.cli_rand
in
if Stats.enabled then
let res =
List.map
(fun tst ->
Stats.reset ();
let r = run_tests [ tst ] in
Stats.record cli_args.cli_verbose tst;
r = 0)
l
in
exit (if List.fold_left ( && ) true res then 0 else 1)
else exit (run_tests l)
42 changes: 41 additions & 1 deletion lib/util.mli
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ val repeat : int -> ('a -> bool) -> 'a -> bool
(** [repeat num prop] iterates a property [prop] [num] times. The function stops
early and returns false if just one of the iterations returns false.
This is handy if the property outcome is non-determistic, for example,
if it depends on scheduling. *)
if it depends on scheduling.
TODO *)

exception Timeout
(** exception raised by [prop_timeout] and [fork_prop_with_timeout]. *)
Expand Down Expand Up @@ -161,3 +162,42 @@ module Equal : sig
val equal_seq : 'a t -> 'a Seq.t t
val equal_array : 'a t -> 'a array t
end

module Stats : sig
val enabled : bool
(** TODO *)
end

val make_test :
?if_assumptions_fail:[ `Fatal | `Warning ] * float ->
?count:int ->
?long_factor:int ->
?max_gen:int ->
?max_fail:int ->
?small:('a -> int) ->
?retries:int ->
?name:string ->
'a QCheck.arbitrary ->
('a -> bool) ->
QCheck.Test.t
(** TODO *)

val make_neg_test :
?if_assumptions_fail:[ `Fatal | `Warning ] * float ->
?count:int ->
?long_factor:int ->
?max_gen:int ->
?max_fail:int ->
?small:('a -> int) ->
?retries:int ->
?name:string ->
'a QCheck.arbitrary ->
('a -> bool) ->
QCheck.Test.t
(** TODO *)

val fail_reportf : ('a, Format.formatter, unit, bool) format4 -> 'a
(** TODO *)

val run_tests_main : ?argv:string array -> QCheck2.Test.t list -> 'a
(** TODO *)
2 changes: 1 addition & 1 deletion src/array/lin_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,6 @@ end

module AT_domain = Lin_domain.Make_internal(AConf) [@alert "-internal"]
;;
QCheck_base_runner.run_tests_main [
Util.run_tests_main [
AT_domain.neg_lin_test ~count:1000 ~name:"Lin Array test with Domain";
]
2 changes: 1 addition & 1 deletion src/array/lin_tests_dsl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,6 @@ end

module AT_domain = Lin_domain.Make(AConf)
;;
QCheck_base_runner.run_tests_main [
Util.run_tests_main [
AT_domain.neg_lin_test ~count:1000 ~name:"Lin DSL Array test with Domain";
]
2 changes: 1 addition & 1 deletion src/array/stm_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ end
module ArraySTM_seq = STM_sequential.Make(AConf)
module ArraySTM_dom = STM_domain.Make(AConf)
;;
QCheck_base_runner.run_tests_main
Util.run_tests_main
(let count = 1000 in
[ArraySTM_seq.agree_test ~count ~name:"STM Array test sequential";
ArraySTM_dom.neg_agree_test_par ~count ~name:"STM Array test parallel" (* this test is expected to fail *)
Expand Down
2 changes: 1 addition & 1 deletion src/atomic/lin_tests.ml
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ end

module A3T_domain = Lin_domain.Make_internal(A3Conf) [@alert "-internal"]
;;
QCheck_base_runner.run_tests_main [
Util.run_tests_main [
AT_domain.lin_test ~count:1000 ~name:"Lin Atomic test with Domain";
A3T_domain.lin_test ~count:1000 ~name:"Lin Atomic3 test with Domain";
]
2 changes: 1 addition & 1 deletion src/atomic/lin_tests_dsl.ml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,6 @@ end
module Lin_atomic_domain = Lin_domain.Make (Atomic_spec)

let () =
QCheck_base_runner.run_tests_main
Util.run_tests_main
[ Lin_atomic_domain.lin_test ~count:1000 ~name:"Lin DSL Atomic test with Domain";
]
Loading