Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dynamic analysis #40

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions Dockerfile.infrastructure
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
FROM ubuntu:24.04

RUN apt update && apt install -y cloc sysstat sudo vim git wget curl

WORKDIR /benchmarks
COPY . .

CMD ["bash"]
10 changes: 6 additions & 4 deletions covid-mts/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,10 @@ input_file="$input_dir/in$suffix.csv"
output_scoped="$outputs_dir/outputs$suffix"
mkdir -p "$output_scoped"

"$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
"$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
"$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
"$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"
BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

$BENCHMARK_SHELL "$scripts_dir/1.sh" "$input_file" > "$output_scoped/1.out"
$BENCHMARK_SHELL "$scripts_dir/2.sh" "$input_file" > "$output_scoped/2.out"
$BENCHMARK_SHELL "$scripts_dir/3.sh" "$input_file" > "$output_scoped/3.out"
$BENCHMARK_SHELL "$scripts_dir/4.sh" "$input_file" > "$output_scoped/4.out"

5 changes: 3 additions & 2 deletions file-enc/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ if [[ "$1" == "--small" ]]; then
suffix=".small"
fi

$scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
$scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
$BENCHMARK_SHELL $scripts_dir/compress_files.sh $input_pcaps $results_dir/compress_files$suffix
$BENCHMARK_SHELL $scripts_dir/encrypt_files.sh $input_pcaps $results_dir/encrypt_files$suffix
4 changes: 2 additions & 2 deletions infrastructure/data/script-globs.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@
"scripts": ["oneliners/scripts/*.sh"]
},
"sklearn": {
"scripts": ["sklearn/run.sh"]
"scripts": ["sklearn/scripts/run.sh"]
},
"riker": {
"scripts": ["riker/scripts/*/build.sh"]
},
"uniq-ips": {
"scripts": ["uniq-ips/run.sh"]
"scripts": ["uniq-ips/scripts/run.sh"]
},
"unix50": {
"scripts": ["unix50/scripts/*.sh"]
Expand Down
42 changes: 42 additions & 0 deletions infrastructure/run_dynamic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/usr/bin/env python3

import argparse
from pathlib import Path
from typing import Optional
import json
from subprocess import check_output, run
from collections import Counter
import os

from all_scripts import get_all_scripts
from syntax_analysis import parse_shell_script, count_nodes
from project_root import get_project_root

def get_parser():
parser = argparse.ArgumentParser(
prog='run_dynamic',
description='runs the dynamic analysis')
parser.add_argument('--bench', required=True, type=str)
parser.add_argument('--run-input', action=argparse.BooleanOptionalAction)
parser.add_argument('--run-deps', action=argparse.BooleanOptionalAction)
return parser

def get_environment(root):
env = os.environ.copy()
dynamic_shell = root / 'infrastructure' / 'run_dynamic_shell.py'
env['BENCHMARK_SHELL'] = str(dynamic_shell)
return env

def run_analysis(root: Path, bench: Path, run_input: bool, run_deps: bool):
env = get_environment(root)
if run_deps:
run([root / bench / 'deps.sh'], env=env)
if run_input:
run([root / bench / 'input.sh'], env=env)
run([root / bench / 'run.sh'], env=env)

if __name__ == '__main__':
parser = get_parser()
args = parser.parse_args()
root = get_project_root()
run_analysis(root, bench=args.bench, run_input=args.run_input, run_deps=args.run_deps)
26 changes: 26 additions & 0 deletions infrastructure/run_dynamic_shell.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/usr/bin/env python3

import shlex
from pathlib import Path
from typing import Optional
import json
from subprocess import check_output, run
from collections import Counter
import sys
from time import perf_counter

from all_scripts import get_all_scripts
from syntax_analysis import parse_shell_script, count_nodes
from project_root import get_project_root

time_file = get_project_root() / 'infrastructure' / 'target' / 'runtime_log.csv'

command = sys.argv[1:]

start = perf_counter()
run(['bash', *command])
elapsed = perf_counter() - start

with time_file.open('a') as file:
print(shlex.join(command), elapsed, sep=',', file=file)

2 changes: 2 additions & 0 deletions infrastructure/target/runtime_log.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
/benchmarks/log-analysis/scripts/nginx.sh /benchmarks/log-analysis/input/nginx-logs /benchmarks/log-analysis/results/nginx.full,1.5968113659982919
/benchmarks/log-analysis/scripts/pcaps.sh /benchmarks/log-analysis/input/pcaps /benchmarks/log-analysis/results/pcaps.full,116.00431065500015
7 changes: 5 additions & 2 deletions log-analysis/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@ if [[ "$@" == *"--small"* ]]; then
suffix=".small"
fi

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
echo "shell: $BENCHMARK_SHELL"

echo "nginx"
time $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix
$BENCHMARK_SHELL $scripts_dir/nginx.sh $nginx_input $results_dir/nginx$suffix
echo $?

echo "pcaps"
time $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix
$BENCHMARK_SHELL $scripts_dir/pcaps.sh $pcaps_input $results_dir/pcaps$suffix
echo $?
4 changes: 3 additions & 1 deletion max-temp/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,6 @@ export statistics_dir="$results_dir/statistics$suffix"

mkdir -p "$statistics_dir"

${scripts_dir}/temp-analytics.sh
BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

$BENCHMARK_SHELL ${scripts_dir}/temp-analytics.sh
6 changes: 4 additions & 2 deletions media-conv/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@ if [[ "$@" == *"--small"* ]]; then
suffix=".small"
fi

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

echo "img_convert"
time $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
$BENCHMARK_SHELL $scripts_dir/img_convert.sh $img_convert_input $results_dir/img_convert$suffix > $results_dir/img_convert$suffix.log
echo $?

echo "to_mp3"
time $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
$BENCHMARK_SHELL $scripts_dir/to_mp3.sh $to_mp3_input $results_dir/to_mp3$suffix > $results_dir/to_mp3$suffix.log
echo $?
4 changes: 3 additions & 1 deletion nlp/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ else
export IN="$SUITE_DIR/inputs/pg"
fi

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

mkdir -p "outputs"

# Define the script names in a single variable
Expand Down Expand Up @@ -51,6 +53,6 @@ while IFS= read -r script; do
mkdir -p "$output_dir"

echo "$script"
time "$SHELL" "$script_file" "$output_dir"
$BENCHMARK_SHELL "$SHELL" "$script_file" "$output_dir"
echo "$?"
done <<< "$script_names"
4 changes: 3 additions & 1 deletion oneliners/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ export SUITE_DIR=$(realpath $(dirname "$0"))
export TIMEFORMAT=%R
cd $SUITE_DIR

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

if [[ "$@" == *"--small"* ]]; then
scripts_inputs=(
"nfa-regex;1M"
Expand Down Expand Up @@ -42,6 +44,6 @@ do
output_file="./outputs/${parsed[0]}.out"

echo "$script_file"
time "$SHELL" "$script_file" "$input_file" > "$output_file"
$BENCHMARK_SHELL "$SHELL" "$script_file" "$input_file" > "$output_file"
echo "$?"
done
74 changes: 6 additions & 68 deletions sklearn/run.sh
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,72 +1,10 @@
#!/bin/bash

PYTHON="python3"
OUT=${OUT:-$PWD/result}
TMP=${TMP:-$PWD/tmp}
#export tmp to env
export TMP
SCRIPTS=${SCRIPTS:-$PWD/scripts}
REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/sklearn"
scripts_dir="${eval_dir}/scripts"

# Ideally, we'll move on to piping rather than writing to a file
MODEL=$TMP/model.obj
X=$TMP/X_train.obj
y=$TMP/y_train.obj
CLASSES=$TMP/classes.obj
DUAL=false # should be converted to bool inside script
MAX_SQ_SUM=$TMP/max_squared_sum.obj
WARM_COEF=$TMP/warm_start_coef.obj
C_=$TMP/C_.obj
BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
cd "$eval_dir" # scripts/run.sh references PWD
$BENCHMARK_SHELL "$scripts_dir/run.sh" $@

echo $PYTHON >&2
echo "DIR: $DIR" >&2
echo "SCRIPTS: $SCRIPTS" >&2
echo "MODEL: $MODEL" >&2
echo "X: $X" >&2
echo "y: $y" >&2
echo "CLASSES: $CLASSES" >&2
echo "DUAL: $DUAL" >&2
echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
echo "WARM_COEF: $WARM_COEF" >&2
echo "C_: $C_" >&2

# TODO: Try this out on a larger dataset
# TODO: Benchmark each phase

# Generating model & samples
$PYTHON $SCRIPTS/gen_model.py 100
$PYTHON $SCRIPTS/gen_samples.py

# Validity checking functions
# These functions just check to make sure that the input is valid.
# If not they will raise an error. Otherwise, they do not mutate the data.
$PYTHON $SCRIPTS/check_solver.py $MODEL
penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
$PYTHON $SCRIPTS/val_data.py $MODEL $X $y
$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
echo "------" >&2
# TODO: Benchmark each step of the pipeline
# Make a modified pipeline where each step writes its output to a file

# Calculations functions
$PYTHON $SCRIPTS/rownorm.py $X
n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients

# Covtype dataset has 7 classes
echo "WARM_COEF: $WARM_COEF" >&2
echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2

echo "multiclass: $multiclass" >&2
echo "penalty: $penalty" >&2
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7

$PYTHON $SCRIPTS/zip_coef.py $MODEL
$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
72 changes: 72 additions & 0 deletions sklearn/scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/bin/bash

PYTHON="python3"
OUT=${OUT:-$PWD/result}
TMP=${TMP:-$PWD/tmp}
#export tmp to env
export TMP
SCRIPTS=${SCRIPTS:-$PWD/scripts}

# Ideally, we'll move on to piping rather than writing to a file
MODEL=$TMP/model.obj
X=$TMP/X_train.obj
y=$TMP/y_train.obj
CLASSES=$TMP/classes.obj
DUAL=false # should be converted to bool inside script
MAX_SQ_SUM=$TMP/max_squared_sum.obj
WARM_COEF=$TMP/warm_start_coef.obj
C_=$TMP/C_.obj

echo $PYTHON >&2
echo "DIR: $DIR" >&2
echo "SCRIPTS: $SCRIPTS" >&2
echo "MODEL: $MODEL" >&2
echo "X: $X" >&2
echo "y: $y" >&2
echo "CLASSES: $CLASSES" >&2
echo "DUAL: $DUAL" >&2
echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2
echo "WARM_COEF: $WARM_COEF" >&2
echo "C_: $C_" >&2

# TODO: Try this out on a larger dataset
# TODO: Benchmark each phase

# Generating model & samples
$PYTHON $SCRIPTS/gen_model.py 100
$PYTHON $SCRIPTS/gen_samples.py

# Validity checking functions
# These functions just check to make sure that the input is valid.
# If not they will raise an error. Otherwise, they do not mutate the data.
$PYTHON $SCRIPTS/check_solver.py $MODEL
penalty=$($PYTHON $SCRIPTS/penalty.py $MODEL)
$PYTHON $SCRIPTS/val_data.py $MODEL $X $y
$PYTHON $SCRIPTS/classes.py $MODEL $y # This should return a classes with just the unique classes in y
echo "$PYTHON $SCRIPTS/check_multiclass.py $MODEL" >&2
multiclass=$($PYTHON $SCRIPTS/check_multiclass.py $MODEL)
echo "------" >&2
# TODO: Benchmark each step of the pipeline
# Make a modified pipeline where each step writes its output to a file

# Calculations functions
$PYTHON $SCRIPTS/rownorm.py $X
n_classes=$($PYTHON $SCRIPTS/reshape_classes.py $MODEL $CLASSES)
$PYTHON $SCRIPTS/warm_start.py $MODEL $multiclass $n_classes # pipes coefficients

# Covtype dataset has 7 classes
echo "WARM_COEF: $WARM_COEF" >&2
echo "MAX_SQ_SUM: $MAX_SQ_SUM" >&2

echo "multiclass: $multiclass" >&2
echo "penalty: $penalty" >&2
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 1
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 2
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 3
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 4
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 5
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 6
$PYTHON $SCRIPTS/parallel.py $MODEL $X $y $C_ $WARM_COEF $MAX_SQ_SUM $multiclass $penalty 7

$PYTHON $SCRIPTS/zip_coef.py $MODEL
$PYTHON $SCRIPTS/adjust_coef.py $MODEL $X $multiclass $n_classes $RESULT/trained_model.obj
11 changes: 10 additions & 1 deletion uniq-ips/run.sh
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1 +1,10 @@
cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
#!/bin/bash

REPO_TOP=$(git rev-parse --show-toplevel)
eval_dir="${REPO_TOP}/uniq-ips"
scripts_dir="${eval_dir}/scripts"

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
cd "$eval_dir" # scripts/run.sh puts files in its current directory
$BENCHMARK_SHELL "$scripts_dir/run.sh" $@

1 change: 1 addition & 0 deletions uniq-ips/scripts/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
cat "logs-popcount-org.txt" | sort | uniq > "out.txt"
3 changes: 2 additions & 1 deletion unix50/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ fi
echo executing unix50 $(date)

mkdir -p "outputs"
BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}

for script_input in ${scripts_inputs[@]};
do
Expand All @@ -67,6 +68,6 @@ do
output_file="./outputs/$script.out"

echo "$script"
time $SHELL $script_file $input_file > $output_file
$BENCHMARK_SHELL $script_file $input_file > $output_file
echo $?
done
3 changes: 2 additions & 1 deletion web-index/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

cd "$(dirname "$0")"

BENCHMARK_SHELL=${BENCHMARK_SHELL:-bash}
directory_path="inputs/articles"

if [ ! -d "$directory_path" ]; then
Expand All @@ -25,5 +26,5 @@ fi
mkdir -p "$OUTPUT_BASE"

echo "web-index"
time $SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
$BENCHMARK_SHELL ./scripts/ngrams.sh "$OUTPUT_BASE"
echo $?
Loading