binpash · EtomicBomb · Nov 21, 2024 · Nov 21, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/Dockerfile.infrastructure b/Dockerfile.infrastructure
@@ -0,0 +1,8 @@
+FROM ubuntu:24.04
+
+RUN apt update && apt install -y cloc sysstat sudo vim git wget curl
+
+WORKDIR /benchmarks
+COPY . .
+
+CMD ["bash"]
diff --git a/covid-mts/run.sh b/covid-mts/run.sh
@@ -15,8 +15,10 @@ input_file="$input_dir/in$suffix.csv"
 output_scoped="$outputs_dir/outputs$suffix"
 mkdir -p "$output_scoped"
 
-"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
-"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
-"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
-"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
+$BENCHMARK_SHELL "$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
+$BENCHMARK_SHELL "$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
+$BENCHMARK_SHELL "$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
+$BENCHMARK_SHELL "$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"
 
diff --git a/file-enc/run.sh b/file-enc/run.sh
@@ -17,5 +17,6 @@ if [[ "$1" == "--small" ]]; then
     suffix=".small"
 fi
 
-$scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
-$scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+$BENCHMARK_SHELL $scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
+$BENCHMARK_SHELL $scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
diff --git a/infrastructure/data/script-globs.json b/infrastructure/data/script-globs.json
@@ -21,13 +21,13 @@
         "scripts": ["oneliners/scripts/*.sh"]
     },
     "sklearn": {
-        "scripts": ["sklearn/run.sh"]
+        "scripts": ["sklearn/scripts/run.sh"]
     },
     "riker": {
         "scripts": ["riker/scripts/*/build.sh"]
     },
     "uniq-ips": {
-        "scripts": ["uniq-ips/run.sh"]
+        "scripts": ["uniq-ips/scripts/run.sh"]
     },
     "unix50": {
         "scripts": ["unix50/scripts/*.sh"]

diff --git a/infrastructure/run_dynamic.py b/infrastructure/run_dynamic.py
@@ -0,0 +1,42 @@
+#!/usr/bin/env python3
+
+import argparse
+from pathlib import Path
+from typing import Optional
+import json
+from subprocess import check_output, run
+from collections import Counter
+import os
+
+from all_scripts import get_all_scripts
+from syntax_analysis import parse_shell_script, count_nodes
+from project_root import get_project_root
+
+def get_parser():
+    parser = argparse.ArgumentParser(
+            prog='run_dynamic',
+            description='runs the dynamic analysis')
+    parser.add_argument('--bench', required=True, type=str)
+    parser.add_argument('--run-input', action=argparse.BooleanOptionalAction)
+    parser.add_argument('--run-deps', action=argparse.BooleanOptionalAction)
+    return parser
+
+def get_environment(root):
+    env = os.environ.copy()  
+    dynamic_shell = root / 'infrastructure' / 'run_dynamic_shell.py'
+    env['BENCHMARK_SHELL'] = str(dynamic_shell)
+    return env
+
+def run_analysis(root: Path, bench: Path, run_input: bool, run_deps: bool):
+    env = get_environment(root)
+    if run_deps:
+        run([root / bench / 'deps.sh'], env=env)
+    if run_input:
+        run([root / bench / 'input.sh'], env=env)
+    run([root / bench / 'run.sh'], env=env)
+
+if __name__ == '__main__':
+    parser = get_parser()
+    args = parser.parse_args()
+    root = get_project_root()
+    run_analysis(root, bench=args.bench, run_input=args.run_input, run_deps=args.run_deps)
diff --git a/infrastructure/run_dynamic_shell.py b/infrastructure/run_dynamic_shell.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python3
+
+import shlex
+from pathlib import Path
+from typing import Optional
+import json
+from subprocess import check_output, run
+from collections import Counter
+import sys
+from time import perf_counter
+
+from all_scripts import get_all_scripts
+from syntax_analysis import parse_shell_script, count_nodes
+from project_root import get_project_root
+
+time_file = get_project_root() / 'infrastructure' / 'target' / 'runtime_log.csv'
+
+command = sys.argv[1:]
+
+start = perf_counter()
+run(['bash', *command])
+elapsed = perf_counter() - start
+
+with time_file.open('a') as file:
+    print(shlex.join(command), elapsed, sep=',', file=file)
+
diff --git a/infrastructure/target/runtime_log.csv b/infrastructure/target/runtime_log.csv
@@ -0,0 +1,2 @@
+/benchmarks/log-analysis/scripts/nginx.sh /benchmarks/log-analysis/input/nginx-logs /benchmarks/log-analysis/results/nginx.full,1.5968113659982919
+/benchmarks/log-analysis/scripts/pcaps.sh /benchmarks/log-analysis/input/pcaps /benchmarks/log-analysis/results/pcaps.full,116.00431065500015
diff --git a/log-analysis/run.sh b/log-analysis/run.sh
@@ -17,10 +17,13 @@ if [[ "$@" == *"--small"* ]]; then
     suffix=".small"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+echo "shell: $BENCHMARK_SHELL"
+
 echo "nginx"
-time $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix 
+$BENCHMARK_SHELL $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix 
 echo $?
 
 echo "pcaps"
-time $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix 
+$BENCHMARK_SHELL $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix 
 echo $?
diff --git a/max-temp/run.sh b/max-temp/run.sh
@@ -18,4 +18,6 @@ export statistics_dir="$results_dir/statistics$suffix"
 
 mkdir -p "$statistics_dir"
 
-${scripts_dir}/temp-analytics.sh
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
+$BENCHMARK_SHELL ${scripts_dir}/temp-analytics.sh
diff --git a/media-conv/run.sh b/media-conv/run.sh
@@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then
     suffix=".small"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 echo "img_convert"
-time $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
+$BENCHMARK_SHELL $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
 echo $?
 
 echo "to_mp3"
-time $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
+$BENCHMARK_SHELL $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
 echo $?
diff --git a/nlp/run.sh b/nlp/run.sh
@@ -12,6 +12,8 @@ else
     export IN="$SUITE_DIR/inputs/pg"
 fi
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 mkdir -p "outputs"
 
 # Define the script names in a single variable
@@ -51,6 +53,6 @@ while IFS= read -r script; do
     mkdir -p "$output_dir"
 
     echo "$script"
-    time "$SHELL" "$script_file" "$output_dir"
+    $BENCHMARK_SHELL "$SHELL" "$script_file" "$output_dir"
     echo "$?"
 done <<< "$script_names"
diff --git a/oneliners/run.sh b/oneliners/run.sh
@@ -4,6 +4,8 @@ export SUITE_DIR=$(realpath $(dirname "$0"))
 export TIMEFORMAT=%R
 cd $SUITE_DIR
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+
 if [[ "$@" == *"--small"* ]]; then
     scripts_inputs=(
         "nfa-regex;1M"
@@ -42,6 +44,6 @@ do
     output_file="./outputs/${parsed[0]}.out"
 
     echo "$script_file"
-    time "$SHELL" "$script_file" "$input_file" > "$output_file"
+    $BENCHMARK_SHELL "$SHELL" "$script_file" "$input_file" > "$output_file"
     echo "$?"
 done
diff --git a/sklearn/run.sh b/sklearn/run.sh
@@ -1,72 +1,10 @@
 #!/bin/bash
 
-PYTHON="python3"
-OUT=${OUT:-$PWD/result}
-TMP=${TMP:-$PWD/tmp}
-#export tmp to env
-export TMP
-SCRIPTS=${SCRIPTS:-$PWD/scripts}
+REPO_TOP=$(git rev-parse --show-toplevel)
+eval_dir="${REPO_TOP}/sklearn"
+scripts_dir="${eval_dir}/scripts"
 
-# Ideally, we'll move on to piping rather than writing to a file
-MODEL=$TMP/model.obj
-X=$TMP/X_train.obj
-y=$TMP/y_train.obj
-CLASSES=$TMP/classes.obj
-DUAL=false # should be converted to bool inside script
-MAX_SQ_SUM=$TMP/max_squared_sum.obj
-WARM_COEF=$TMP/warm_start_coef.obj
-C_=$TMP/C_.obj
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+cd "$eval_dir" # scripts/run.sh references PWD
+$BENCHMARK_SHELL "$scripts_dir/run.sh" $@
 
-echo $PYTHON >&2
-echo "DIR: $DIR" >&2
-echo "SCRIPTS: $SCRIPTS" >&2
-echo "MODEL: $MODEL" >&2
-echo "X: $X" >&2
-echo "y: $y" >&2
-echo "CLASSES: $CLASSES" >&2
-echo "DUAL: $DUAL" >&2
-echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
-echo "WARM_COEF: $WARM_COEF" >&2
-echo "C_: $C_" >&2
-
-# TODO: Try this out on a larger dataset
-# TODO: Benchmark each phase
-
-# Generating model & samples
-$PYTHON $SCRIPTS/gen_model.py 100
-$PYTHON $SCRIPTS/gen_samples.py
-
-# Validity checking functions
-# These functions just check to make sure that the input is valid. 
-# If not they will raise an error. Otherwise, they do not mutate the data.
-$PYTHON $SCRIPTS/check_solver.py $MODEL
-penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
-$PYTHON $SCRIPTS/val_data.py $MODEL $X $y 
-$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
-echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
-multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
-echo "------" >&2
-# TODO: Benchmark each step of the pipeline
-# Make a modified pipeline where each step writes its output to a file
-
-# Calculations functions
-$PYTHON $SCRIPTS/rownorm.py $X
-n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
-$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients
-
-# Covtype dataset has 7 classes
-echo "WARM_COEF: $WARM_COEF" >&2
-echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
-
-echo "multiclass: $multiclass" >&2
-echo "penalty: $penalty" >&2
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
-$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7
-
-$PYTHON $SCRIPTS/zip_coef.py $MODEL
-$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
diff --git a/sklearn/scripts/run.sh b/sklearn/scripts/run.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+
+PYTHON="python3"
+OUT=${OUT:-$PWD/result}
+TMP=${TMP:-$PWD/tmp}
+#export tmp to env
+export TMP
+SCRIPTS=${SCRIPTS:-$PWD/scripts}
+
+# Ideally, we'll move on to piping rather than writing to a file
+MODEL=$TMP/model.obj
+X=$TMP/X_train.obj
+y=$TMP/y_train.obj
+CLASSES=$TMP/classes.obj
+DUAL=false # should be converted to bool inside script
+MAX_SQ_SUM=$TMP/max_squared_sum.obj
+WARM_COEF=$TMP/warm_start_coef.obj
+C_=$TMP/C_.obj
+
+echo $PYTHON >&2
+echo "DIR: $DIR" >&2
+echo "SCRIPTS: $SCRIPTS" >&2
+echo "MODEL: $MODEL" >&2
+echo "X: $X" >&2
+echo "y: $y" >&2
+echo "CLASSES: $CLASSES" >&2
+echo "DUAL: $DUAL" >&2
+echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
+echo "WARM_COEF: $WARM_COEF" >&2
+echo "C_: $C_" >&2
+
+# TODO: Try this out on a larger dataset
+# TODO: Benchmark each phase
+
+# Generating model & samples
+$PYTHON $SCRIPTS/gen_model.py 100
+$PYTHON $SCRIPTS/gen_samples.py
+
+# Validity checking functions
+# These functions just check to make sure that the input is valid. 
+# If not they will raise an error. Otherwise, they do not mutate the data.
+$PYTHON $SCRIPTS/check_solver.py $MODEL
+penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
+$PYTHON $SCRIPTS/val_data.py $MODEL $X $y 
+$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
+echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
+multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
+echo "------" >&2
+# TODO: Benchmark each step of the pipeline
+# Make a modified pipeline where each step writes its output to a file
+
+# Calculations functions
+$PYTHON $SCRIPTS/rownorm.py $X
+n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
+$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients
+
+# Covtype dataset has 7 classes
+echo "WARM_COEF: $WARM_COEF" >&2
+echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
+
+echo "multiclass: $multiclass" >&2
+echo "penalty: $penalty" >&2
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
+$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7
+
+$PYTHON $SCRIPTS/zip_coef.py $MODEL
+$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
diff --git a/uniq-ips/run.sh b/uniq-ips/run.sh
@@ -1 +1,10 @@
-cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
+#!/bin/bash
+
+REPO_TOP=$(git rev-parse --show-toplevel)
+eval_dir="${REPO_TOP}/uniq-ips"
+scripts_dir="${eval_dir}/scripts"
+
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
+cd "$eval_dir" # scripts/run.sh puts files in its current directory
+$BENCHMARK_SHELL "$scripts_dir/run.sh" $@
+
diff --git a/uniq-ips/scripts/run.sh b/uniq-ips/scripts/run.sh
@@ -0,0 +1 @@
+cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
diff --git a/unix50/run.sh b/unix50/run.sh
@@ -54,6 +54,7 @@ fi
 echo executing unix50 $(date)
 
 mkdir -p "outputs"
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
 
 for script_input in ${scripts_inputs[@]};
 do
@@ -67,6 +68,6 @@ do
     output_file="./outputs/$script.out"
 
     echo "$script"
-    time $SHELL $script_file $input_file > $output_file
+    $BENCHMARK_SHELL $script_file $input_file > $output_file
     echo $?
 done
diff --git a/web-index/run.sh b/web-index/run.sh
@@ -2,6 +2,7 @@
 
 cd "$(dirname "$0")"
 
+BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
 directory_path="inputs/articles"
 
 if [ ! -d "$directory_path" ]; then
@@ -25,5 +26,5 @@ fi
 mkdir -p "$OUTPUT_BASE"
 
 echo "web-index"
-time $SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
+$BENCHMARK_SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
 echo $?