diff --git a/benchmarks/mergesort/common.ml b/benchmarks/mergesort/common.ml new file mode 100644 index 0000000000..0a367455ce --- /dev/null +++ b/benchmarks/mergesort/common.ml @@ -0,0 +1,131 @@ + module T = Domainslib.Task +module A = Array +module AS = CCArray_slice + +type point3d = float * float * float + +let print_point3d (x,y,z) = + print_string ("(" ^ Float.to_string x ^ ", " ^ Float.to_string y ^ ", " ^ Float.to_string z ^ ")") + +let compare_point3d (axis : int) ((x1,y1,z1) : point3d) ((x2,y2,z2) : point3d) : int = + if axis == 0 + then Float.compare x1 x2 + else if axis == 1 + then Float.compare y1 y2 + else Float.compare z1 z2 + +let dist_point3d ((x1,y1,z1) : point3d) ((x2,y2,z2) : point3d) : float = + let (d1, d2, d3) = (x1 -. x2, y1 -. y2, z1 -. z2) in + (d1 *. d1) +. (d2 *. d2) +. (d3 *. d3) + +let min_point3d ((x1,y1,z1) : point3d) ((x2,y2,z2) : point3d) : point3d = + ((Float.min x1 x2), (Float.min y1 y2), (Float.min z1 z2)) + +let max_point3d ((x1,y1,z1) : point3d) ((x2,y2,z2) : point3d) : point3d = + ((Float.max x1 x2), (Float.max y1 y2), (Float.max z1 z2)) + +let coord (i : int) ((x,y,z) : point3d) = + match i with + 0 -> x + | 1 -> y + | 2 -> z + | _ -> z + +type point2d = float * float + +let compare_point2d (axis : int) ((x1,y1) : point2d) ((x2,y2) : point2d) : int = + if axis == 0 + then Float.compare x1 x2 + else Float.compare y2 y2 + +let dist_point2d ((x1,y1) : point2d) ((x2,y2) : point2d) : float = + let (d1, d2) = (x1 -. x2, y1 -. y2) in + (d1 *. d1) +. (d2 *. d2) + +let min_point2d ((x1,y1) : point2d) ((x2,y2) : point2d) : point2d = + ((Float.min x1 x2), (Float.min y1 y2)) + +let max_point2d ((x1,y1) : point2d) ((x2,y2) : point2d) : point2d = + ((Float.max x1 x2), (Float.max y1 y2)) + + +(* https://stackoverflow.com/questions/5774934 *) +let read_file (filename : string) : string list = + let lines = ref [] in + let chan = open_in filename in + try + while true; do + lines := input_line chan :: !lines + done; !lines + with End_of_file -> + close_in chan; + List.rev !lines ;; + +(* A.init n (fun i -> let line = List.nth lines i in + * if (String.length line) == 0 + * then (0.01, 0.01, 0.01) + * else + * let words = String.split_on_char ' ' line in + * let a = Float.of_string (List.nth words 0) in + * let b = Float.of_string (List.nth words 1) in + * let c = Float.of_string (List.nth words 2) in + * (a, b, c)) + *) + +let read3DArrayFile (fp : string) : point3d array = + let lines = read_file fp in + let n = List.length lines in + let _ = print_endline ("length: " ^ string_of_int n ^ "\n") in + let pool = T.setup_pool ~num_domains:48 in + let result = A.make n (0.01, 0.01, 0.01) in + let _ = T.parallel_for ~start:0 ~finish:(n-1) + ~body:(fun i -> let line = List.nth lines i in + if (String.length line) == 0 + then () + else + let words = String.split_on_char ' ' line in + let a = Float.of_string (List.nth words 0) in + let b = Float.of_string (List.nth words 1) in + let c = Float.of_string (List.nth words 2) in + A.set result i (a, b, c); + ()) + pool in + let _ = T.teardown_pool pool in + result + + +let read2DArrayFile (fp : string) : point2d array = + let lines = read_file fp in + let n = List.length lines in + let pool = T.setup_pool ~num_domains:48 in + let result = A.make n (0.01, 0.01) in + let _ = T.parallel_for ~start:0 ~finish:(n-1) + ~body:(fun i -> let line = List.nth lines i in + if (String.length line) == 0 + then () + else + let words = String.split_on_char ' ' line in + let a = Float.of_string (List.nth words 0) in + let b = Float.of_string (List.nth words 1) in + A.set result i (a, b); + ()) + pool in + let _ = T.teardown_pool pool in + result + +let get_rand (n : int) : int = + let t = Unix.gettimeofday () in + let i = Float.round t in + (Float.hash i) mod n + +let filter_array (f : ('a -> bool)) (arr : 'a array) : 'a array = + let module A = Array in + A.of_list (List.filter f (A.to_list arr)) + +let print_array f arr = + let _ = A.iter (fun x -> print_string (f x ^ " ")) arr in + print_endline "\n" + +let print_slice f arr = + let _ = AS.iter (fun x -> print_string (f x ^ " ")) arr in + print_endline "\n" diff --git a/benchmarks/mergesort/dune b/benchmarks/mergesort/dune new file mode 100644 index 0000000000..7e286a5f91 --- /dev/null +++ b/benchmarks/mergesort/dune @@ -0,0 +1,15 @@ +(executable + (name mergesort) + (modules mergesort) + (modes native) + (libraries domainslib containers)) + + (executable + (name common) + (modules common) + (modes native) + (libraries domainslib containers)) + +(alias + (name multibench_parallel) + (deps mergesort.exe common.exe)) diff --git a/benchmarks/mergesort/mergesort.ml b/benchmarks/mergesort/mergesort.ml new file mode 100644 index 0000000000..29101edd84 --- /dev/null +++ b/benchmarks/mergesort/mergesort.ml @@ -0,0 +1,169 @@ +(* similar to Maple's mergesort: https://github.com/MPLLang/mpl/blob/master/examples/lib/Mergesort.sml *) + +open Common +module A = Array +module T = Domainslib.Task +module AS = CCArray_slice + +let goto_seqmerge = 4096 +let goto_quicksort = 8192 + +let rec binary_search' (lo : int) (hi :int) (f : 'a -> 'a -> int) (s : 'a AS.t) (x : 'a) : int = + let n = hi - lo in + if n == 0 + then lo + else let mid = lo + (n / 2) in + let pivot = AS.get s mid in + let cmp = f x pivot in + if cmp < 0 + then binary_search' lo mid f s x + else if cmp > 0 + then binary_search' (mid+1) hi f s x + else mid + +let binary_search (f : 'a -> 'a -> int) (s : 'a AS.t) (x : 'a) : int = + binary_search' 0 (AS.length s) f s x + +let write_loop_seq (idx : int) (offset : int) (end_idx : int) (from : 'a AS.t) (to0 : 'a AS.t) : 'a AS.t = + for i = idx to (end_idx-1) do + AS.set to0 (i+offset) (AS.get from i) + done; + to0 + +let write_loop pool (idx : int) (offset : int) (end_idx : int) (from : 'a AS.t) (to0 : 'a AS.t) :'a AS.t = + T.parallel_for ~start:idx ~finish:(end_idx-1) + ~body:(fun i -> AS.set to0 (i+offset) (AS.get from i)) + pool; + to0 + +(* i1 index into s1 + * i2 index into s2 + * j index into output *) +let rec write_merge_seq_loop (i1 : int) (i2 : int) (j : int) (n1 : int) (n2 : int) (f : 'a -> 'a -> int) (s1 : 'a AS.t) (s2 : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + if i1 == n1 + then let tmp1 = AS.sub s2 i2 (n2-i2) in + let t2 = write_loop_seq 0 j (n2-i2) tmp1 t in + t2 + else if i2 == n2 + then let tmp1 = AS.sub s1 i1 (n1-i1) in + let t1 = write_loop_seq 0 j (n1-i1) tmp1 t in + t1 + else let x1 = AS.get s1 i1 in + let x2 = AS.get s2 i2 in + if (f x1 x2) < 0 + then let _ = AS.set t j x1 in + let res = write_merge_seq_loop (i1+1) i2 (j+1) n1 n2 f s1 s2 t in + res + else let _ = AS.set t j x2 in + let res = write_merge_seq_loop i1 (i2+1) (j+1) n1 n2 f s1 s2 t in + res + +let write_merge_seq (f : 'a -> 'a -> int) (s1 : 'a AS.t) (s2 : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + let n1 = AS.length s1 in + let n2 = AS.length s2 in + let _ = write_merge_seq_loop 0 0 0 n1 n2 f s1 s2 t in + t + +let rec write_merge pool (f : 'a -> 'a -> int) (s1 : 'a AS.t) (s2 : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + if AS.length t < goto_seqmerge + then write_merge_seq f s1 s2 t + else + let n1 = AS.length s1 in + let n2 = AS.length s2 in + if n1 == 0 + then write_loop pool 0 0 n2 s2 t + else let mid1 = n1 / 2 in + let pivot = AS.get s1 mid1 in + let mid2 = binary_search f s2 pivot in + let l1 = AS.sub s1 0 mid1 in + let r1 = AS.sub s1 (mid1+1) (n1 - (mid1+1)) in + let l2 = AS.sub s2 0 mid2 in + let r2 = AS.sub s2 mid2 (n2-mid2) in + let _ = AS.set t (mid1+mid2) pivot in + let len_t = AS.length t in + let tl = AS.sub t 0 (mid1+mid2) in + let tr = AS.sub t (mid1+mid2+1) (len_t - (mid1+mid2+1)) in + let tl1_f = T.async pool (fun _ -> write_merge pool f l1 l2 tl) in + let tr1 = write_merge pool f r1 r2 tr in + let tl1 = T.await pool tl1_f in + t + +let rec write_sort1 pool (f : 'a -> 'a -> int) (s : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + let len = AS.length s in + if len < goto_quicksort + then (Qsort.sortInPlace f s; + s) + else + let half = len / 2 in + let (sl, sr) = (AS.sub s 0 half, AS.sub s half (len-half)) in + let (tl, tr) = (AS.sub t 0 half, AS.sub t half (len-half)) in + let tl1_f = T.async pool (fun _ -> write_sort2 pool f sl tl) in + let tr1 = write_sort2 pool f sr tr in + let tl1 = T.await pool tl1_f in + let res = write_merge pool f tl1 tr1 s in + (* let res = write_merge_seq f tl1 tr1 s in *) + res + +and write_sort1_seq (f : 'a -> 'a -> int) (s : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + let len = AS.length s in + if len < goto_quicksort + then (Qsort.sortInPlace f s; + s) + else + let half = len / 2 in + let (sl, sr) = (AS.sub s 0 half, AS.sub s half (len-half)) in + let (tl, tr) = (AS.sub t 0 half, AS.sub t half (len-half)) in + let tl1 = write_sort2_seq f sl tl in + let tr1 = write_sort2_seq f sr tr in + let res = write_merge_seq f tl1 tr1 s in + res + +and write_sort2 pool (f : 'a -> 'a -> int) (s : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + let len = AS.length s in + if len < goto_quicksort + then + let t1 = write_loop pool 0 0 len s t in + Qsort.sortInPlace f t1; + t1 + else + let half = len / 2 in + let (sl, sr) = (AS.sub s 0 half, AS.sub s half (len-half)) in + let (tl, tr) = (AS.sub t 0 half, AS.sub t half (len-half)) in + let sl1_f = T.async pool (fun _ -> write_sort1 pool f sl tl) in + let sr1 = write_sort1 pool f sr tr in + let sl1 = T.await pool sl1_f in + let res = write_merge pool f sl1 sr1 t in + (* let res = write_merge_seq f sl1 sr1 t in *) + res + +and write_sort2_seq (f : 'a -> 'a -> int) (s : 'a AS.t) (t : 'a AS.t) : 'a AS.t = + let len = AS.length s in + if len < goto_quicksort + then + let t1 = write_loop_seq 0 0 len s t in + (Qsort.sortInPlace f t1; + t1) + else + let half = len / 2 in + let (sl, sr) = (AS.sub s 0 half, AS.sub s half (len-half)) in + let (tl, tr) = (AS.sub t 0 half, AS.sub t half (len-half)) in + let sl1 = write_sort1_seq f sl tl in + let sr1 = write_sort1_seq f sr tr in + let res = write_merge_seq f sl1 sr1 t in + res + +let mergesort pool (f : 'a -> 'a -> int) (vec : 'a array) : 'a array = + let vec2 = A.copy vec in + let s = AS.full vec2 in + let t = AS.full (A.make (A.length vec2) (A.get vec2 0)) in + let s1 = write_sort1 pool f s t in + AS.underlying s1 + +let mergesort_seq (f : 'a -> 'a -> int) (vec : 'a array) : 'a array = + let vec2 = A.copy vec in + let s = AS.full vec2 in + let t = AS.full (A.make (A.length vec2) (A.get vec2 0)) in + (* Qsort.sortInPlace f s; + * AS.underlying s *) + let s1 = write_sort1_seq f s t in + AS.underlying s1 \ No newline at end of file diff --git a/multicore_parallel_run_config.json b/multicore_parallel_run_config.json index 9b77953c50..254c439116 100644 --- a/multicore_parallel_run_config.json +++ b/multicore_parallel_run_config.json @@ -710,6 +710,23 @@ "params": "100000" } ] + }, + { + "executable": "benchmarks/multicore-numerical/mergesort_multicore.exe", + "name": "mergesort_multicore", + "tags": ["run_in_ci", "macro_bench", "my_bench" ], + "runs": [ + { "params": "1024", "paramwrapper": "taskset --cpu-list 2-13" } + ] + }, + { + "executable": "benchmarks/mergesort/mergesort.exe", + "name": "mergesort", + "tags": ["1s_10s", "macro_bench", "my_bench"], + + "runs": [ + { "params": "16", "paramwrapper": "taskset --cpu-list 2-13" } + ] } ] -} +} \ No newline at end of file diff --git a/run_config.json b/run_config.json index f69838778c..2544472a4f 100644 --- a/run_config.json +++ b/run_config.json @@ -2,19 +2,19 @@ "wrappers": [ { "name": "orun", - "command": "orun -o %{output} -- taskset --cpu-list 5 %{command}" + "command": "orun -o %{output} -- taskset --cpu-list 1 %{command}" }, { "name": "perfstat", - "command": "perf stat -o %{output} -- taskset --cpu-list 5 %{command}" + "command": "perf stat -o %{output} -- taskset --cpu-list 1 %{command}" }, { "name": "pausetimes_trunk", - "command": "bash pausetimes_trunk %{output} taskset --cpu-list 5 %{command}" + "command": "bash pausetimes_trunk %{output} taskset --cpu-list 1 %{command}" }, { "name": "pausetimes_multicore", - "command": "bash pausetimes_multicore %{output} taskset --cpu-list 5 %{command}" + "command": "bash pausetimes_multicore %{output} taskset --cpu-list 1 %{command}" } ], "benchmarks": [ @@ -91,8 +91,8 @@ "name": "test_decompress", "tags": [ "1s_10s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -105,8 +105,8 @@ "name": "yojson_ydump", "tags": [ "lt_1s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -146,8 +146,8 @@ "name": "thread_ring_lwt_mvar", "tags": [ "1s_10s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -331,9 +331,11 @@ { "executable": "benchmarks/benchmarksgame/knucleotide3.exe", "name": "knucleotide3", + "tags": [ "10s_100s", "macro_bench" + ], "runs": [ { @@ -359,8 +361,8 @@ "name": "revcomp2", "tags": [ "1s_10s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -492,6 +494,7 @@ "tags": [ "10s_100s", "macro_bench" + ], "runs": [ { @@ -632,8 +635,8 @@ "name": "zarith_pi", "tags": [ "1s_10s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -670,8 +673,8 @@ "name": "menhir", "tags": [ "1s_10s", - "macro_bench", - "run_in_ci" + "macro_bench" + ], "runs": [ { @@ -1517,9 +1520,10 @@ ], "runs": [ { - "params": "" + "params": "200" } ] } + ] }