redhat-nfvpe · wchwhite · Oct 26, 2023 · bartwensley · Nov 1, 2023 · bartwensley
diff --git a/Dockerfile-rteval b/Dockerfile-rteval
@@ -0,0 +1,13 @@
+# Based on centos:stream9 as per your requirement
+FROM centos:stream9
+USER root
+COPY rteval/cmd.sh /root
+COPY common-libs /root/common-libs
+RUN yum install --enablerepo=rt rteval trace-cmd procps-ng -y \
+    && yum clean all && rm -rf /var/cache/yum \
+    && curl -L -o /root/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.2.2/dumb-init_1.2.2_amd64 \
+    && chmod 777 /root/dumb-init \
+    && chmod 777 /root/cmd.sh
+WORKDIR /root
+ENTRYPOINT ["/root/dumb-init", "--"]
+CMD ["/root/cmd.sh"]
diff --git a/rteval/cmd.sh b/rteval/cmd.sh
@@ -0,0 +1,265 @@
+#!/bin/bash
+
+# env vars:
+#   LOAD_CPUS: Specifies CPUs on which to run a CPU-intensive task. If not set, defaults to an empty string.
+#   MEASUREMENT_CPU: Designates the CPU to monitor during the test. If not set, defaults to an empty string.
+#   THRESHOLD: Determines a latency threshold. Default is 150.
+#   TRACE_CMD: If set to 'y', trace-cmd will be utilized. Options are 'y' or 'n'. Default is 'n'.
+#   SET_BREAK: If set to 'y', a break point is created. Options are 'y' or 'n'. Default is 'n'.
+#   GEN_REPORT: If active ('y'), a report will be generated post-test. Choices include 'y' or 'n'. Default is 'n'.
+#   DURATION: Specifies the duration of the test. Default value is 720 minutes (12 hours).
+#   DELAY: Dictates the initial delay in seconds before the test starts. Default is 0 seconds.
+#   ONLYLOAD: If set to 'y', only the loading test will be executed. Options are 'y' or 'n'. Default is 'n'.
+#   QUIET: If enabled ('y'), the tool operates in quiet mode. Options are 'y' or 'n'. Default is 'n'.
+#   VERBOSE: If set to 'y', the tool operates in verbose mode. Options are 'y' or 'n'. Default is 'n'. Overrides QUIET.
+#   PAUSE: If set to 'y', a pause is induced after the test completes. Options are 'y' or 'n'. Default is 'n'.
+#   MANUAL: If enabled ('y'), allows for manual initiation of the test. Options are 'y' or 'n'. Default is 'n'.
+#   EXTRA_ARGS: Permits custom options to be added. Default is blank. Provide as a space-separated list.
+#   EVENTS: Lists multiple trace events. Default includes a combination of scheduler, IRQ, and softIRQ events. Events should be provided in a comma-separated manner.
+#   LIST_TRACE_EVENTS: If set to 'y', lists available trace events and exits. Options are 'y' or 'n'. Default is 'n'.
+
+source common-libs/functions.sh
+
+# Functions for logging
+function create_file() {
+	tool="rteval"
+    log_dir="/var/log/app/$tool"
+    if [ ! -d "$log_dir" ]; then
+        mkdir -p "$log_dir"
+    fi
+    timestamp=$(date +%Y%m%d%H%M%S)
+    file_path="$log_dir/$timestamp.log"
+    touch "$file_path"
+    echo "$file_path"
+}
+
+function log_echo() {
+    local message=$1
+    echo "$message" >> "$log_file"
+    echo "$message"
+}
+
+if [[ "${help:-}" == "y" ]]; then
+    echo "Usage:"
+    echo ""
+    echo "Options:"
+    echo "  help=y                  Show this help message"
+    echo "  LOAD_CPUS=value         Specifies CPUs to run a CPU-intensive task. Default is blank."
+    echo "  MEASUREMENT_CPU=value   Designates CPU to monitor during test. Default is blank."
+    echo "  THRESHOLD=value         Determines a latency threshold. Default is 150."
+    echo "  TRACE_CMD=value         Use trace-cmd. Options are 'y' or 'n'. Default is 'n'."
+    echo "  SET_BREAK=value         Creates a break point. Options are 'y' or 'n'. Default is 'n'."
+    echo "  GEN_REPORT=value        Generate report post-test. Choices are 'y' or 'n'. Default is 'n'."
+    echo "  DURATION=value          Set test duration. Default is 720m (12 hours)."
+    echo "  DELAY=value             Delay in seconds before test starts. Default is 0."
+    echo "  ONLYLOAD=value          Only execute the loading test. Options are 'y' or 'n'. Default is 'n'."
+    echo "  QUIET=value             Operate in quiet mode. Options are 'y' or 'n'. Default is 'n'."
+    echo "  VERBOSE=value           Operate in verbose mode. Options are 'y' or 'n'. Default is 'n'. Overrides QUIET."
+    echo "  PAUSE=value             Pause after test completion. Options are 'y' or 'n'. Default is 'n'."
+    echo "  MANUAL=value            Allow manual test initiation. Options are 'y' or 'n'. Default is 'n'."
+    echo "  EXTRA_ARGS=value        Specify custom options. Default is blank. Provide as space-separated list."
+    echo "  EVENTS=value            Specify multiple trace events. Default includes scheduler, IRQ, and softIRQ events. Provide as comma-separated list."
+    echo "  LIST_TRACE_EVENTS=y     List available trace events and exit."
+    exit 0
+fi
+
+if [[ "${LIST_TRACE_EVENTS:-}" == "y" ]]; then
+    trace-cmd list -e
+    exit 0
+fi
+
+
+# Default parameters for rteval
+LOAD_CPUS=${LOAD_CPUS:-""}
+MEASUREMENT_CPU=${MEASUREMENT_CPU:-""}
+THRESHOLD=${THRESHOLD:-150}
+TRACE_CMD=${TRACE_CMD:-n}
+SET_BREAK=${SET_BREAK:-n}
+GEN_REPORT=${GEN_REPORT:-n}
+DURATION=${DURATION:-720m}
+DELAY=${DELAY:-0}
+ONLYLOAD=${ONLYLOAD:-n}
+QUIET=${QUIET:-n}
+PAUSE=${PAUSE:-n}
+MANUAL=${MANUAL:-n}
+EXTRA_ARGS=${EXTRA_ARGS:-""}
+EVENTS=${EVENTS:-"sched:sched_switch,sched:sched_wakeup,sched:sched_wakeup_new,sched:sched_stat_wait,sched:sched_stat_iowait,sched:sched_stat_blocked,irq"}
+
+
+log_file=$(create_file rteval)
+log_echo "Storing log files at $log_file"
+log_echo "INFO: Mount a volume at /var/log/app to persist logs!!!"
+
+# convert the custom_options string into an array
+original_ifs="$IFS" #for resetting IFS
+IFS=' ' read -r -a custom_options_arr <<< "$EXTRA_ARGS"
+IFS=$original_ifs
+IFS=',' read -r -a events_array <<< "$EVENTS"
+IFS=$original_ifs
+
+log_echo "############# dumping env ###########"
+dump=$(env)
+log_echo "$dump"
+log_echo "#####################################"
+
+log_echo " "
+log_echo "########## container info ###########"
+log_echo "/proc/cmdline:"
+cmdline=$(cat /proc/cmdline)
+log_echo "$cmdline"
+log_echo "#####################################"
+
+log_echo "**** uid: $UID ****"
+
+# Check if the command is installed
+for cmd in rteval trace-cmd; do
+    command -v $cmd >/dev/null 2>&1 || { log_echo "$cmd required but not installed. Aborting"; exit 1; }
+done
+
+uname=$(uname -nr) # get the kernel version major number
+log_echo "$uname"
+version=$(rpm -q rteval)
+log_echo "$version"
+trace_version=$(rpm -q trace-cmd)
+log_echo "$trace_version"
+
+EVENTS="$(echo -e "${EVENTS}" | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+if [[ $EVENTS =~ [[:space:]] ]]; then
+    log_echo "Error: The EVENTS variable contains spaces between words. It should be a comma-separated list without spaces."
+    exit 1
+fi
+
+# Validate each event
+for e in "${events_arr[@]}"; do
+    if ! echo "$e" | grep -Pq "^[a-zA-Z0-9_]+(:[a-zA-Z0-9_]+)?$"; then
+        log_echo "Invalid event format: $e. It must be one word or two sets of words with a colon in between. Each word can include underscore '_' but no spaces."
+        exit 1
+    fi
+done
+
+# if tracing is enabled, disable some things that may get in the way
+if [[ "$TRACE_CMD" == "y" || "$SET_BREAK" == "y" ]]; then
+    # If setting a break point for another tool to use, disable reprot generation so it does not intefer.
+    if [[ "$GEN_REPORT" == "y" ]]; then
+        log_echo "Warning: sosreport generation is enabled with tracing options. This has the potential to inflate the trace data with additional noise."
+    fi
+    if [[ "$ONLYLOAD" == "y" ]]; then
+        log_echo "Warning: Disabling onlyload because tracing is enabled. Onlyload is only used when another tool is doing the measurements and tracing."
+        ONLYLOAD="n"
+    fi
+    if [[ "$SET_BREAK" == "y" && "$TRACE_CMD" == "y" ]]; then
+        log_echo "Warning: Disabling SET_BREAK because tracing via trace-cmd is enabled. Please set TRACE_CMD == 'n' to enable SET_BREAK for using with another tool such as rtla or perf."
+        SET_BREAK="n"
+    fi
+fi
+
+# Build the rteval command
+if [[ "$TRACE_CMD" == "y" ]]; then
+    command_args=("trace-cmd record")
+    for e in "${events_array[@]}"; do
+        command_args=("${command_args[@]}" "-e" "$e")
+    done
+    command_args=("${command_args[@]}" "rteval")
+
+else
+    command_args=("rteval")
+fi
+
+command_args=("${command_args[@]}" "--duration ${DURATION}")
+
+if [[ "${VERBOSE}" == "y" ]]; then
+    if [[ "${QUIET}" == "y" ]]; then
+        log_echo "Warning: VERBOSE mode and QUIET mod are both enabled. VERBOSE will take precedence."
+    fi
+    command_args=("${command_args[@]}" "--verbose")
+elif [[ "${QUIET}" == "y" ]]; then
+    command_args=("${command_args[@]}" "--quiet")
+fi
+
+if [[ ! -z "${LOAD_CPUS}" ]]; then
+    if [[ -z "${MEASUREMENT_CPUS}" && $ONLYLOAD == "n" ]]; then
+        log_echo "Warning: LOAD_CPUS is set but MEASUREMENT_CPUS is not set. Make sure this is intentional."
+    fi
+    command_args=("${command_args[@]}" "--loads-cpulist=${LOAD_CPUS}")
+fi
+
+if [[ "${ONLYLOAD}" == "y" ]]; then
+    command_args=("${command_args[@]}" "--onlyload")
+else
+    if [[ ! -z "${MEASUREMENT_CPUS}" ]]; then
+        if [[ -z "${LOAD_CPUS}" ]]; then
+            log_echo "Warning: MEASUREMENT_CPUS is set but LOAD_CPUS is not set. Make sure this is intentional."
+        fi
+        command_args=("${command_args[@]}" "--measurement-cpulist=${MEASUREMENT_CPUS}")
+    fi
+
+    if [[ "$SET_BREAK" == "y" || "$TRACE_CMD" == "y" ]]; then
+        command_args=("${command_args[@]}" "--cyclictest-breaktrace=${THRESHOLD}")
+    else 
+        command_args=("${command_args[@]}" "--cyclictest-threshold=${THRESHOLD}")
+    fi
+
+    if [[ "${GEN_REPORT}" == "y" ]]; then
+        command_args=("${command_args[@]}" "--sysreport")
+    fi
+fi
+
+if [[ -n "$EXTRA_ARGS" ]]; then
+    for opt in "${custom_options_arr[@]}"; do
+        command_args=("${command_args[@]}" "$opt")
+    done
+fi
+
+log_echo "=================================================="
+log_echo "RUNNING COMMAND:"
+formatted_command=$(printf "%s " "${command_args[@]}")
+log_echo "$formatted_command"
+log_echo "=================================================="
+
+if [[ "${MANUAL}" == "y" ]]; then
+    log_echo "=================== MANUAL MODE ==================="
+    log_echo "Entering into MANUAL intervention mode."
+    log_echo "Access the container with one of the following based on your environment:"
+    log_echo "- Podman: podman exec -it ${HOSTNAME} /bin/bash"
+    log_echo "- OC:     oc exec -it rteval -- /bin/bash"
+    log_echo "=================================================="
+    sleep infinity
+fi
+
+if [[ "${DELAY}" != "0" ]]; then
+    log_echo "Pausing for ${DELAY} seconds before test..."
+    sleep ${DELAY}
+fi
+
+output=$(eval "$formatted_command")
+log_echo "$output"
+
+log_name="${log_file::-4}" # Remove .log from the name
+log_echo "=================================================="
+if [ -f "/root/trace.dat" ]; then
+    cp "/root/trace.dat" "${log_name}_rteval_trace.dat"
+    log_echo "TRACE DATA:"
+    log_echo "Trace data copied to: ${log_name}_rteval_trace.dat"
+    log_echo "Either mount storage or set PAUSE=y to retrieve it."
+fi
+
+if [[ "$GEN_REPORT" == "y" ]]; then
+    log_echo "SOSREPORT was generated. Moving it to: ${log_name}_rteval_sosreport.tar.xz"
+    cp /var/tmp/sosreport*rteval*tar.xz "${log_name}_rteval_sosreport.tar.xz" 
+fi
+
+if [[ "$PAUSE" == "y" ]]; then
+    log_echo "================== RETRIEVAL INFO ================="
+    log_echo "DONE: If a trace was collected you can retrieve it with:"
+    log_echo "- OC:     oc cp rteval:/root/trace.dat trace.dat"
+    log_echo "- Podman: podman cp ${HOSTNAME}:/root/${trace_name}_trace.txt ${trace_name}_trace.txt"
+    log_echo "If a sosreport was generated you can retrieve it with:"
+    log_echo "- OC:     oc cp rteval:${log_name}_rteval_sosreport.tar.xz sosreport-rteval.tar.xz"
+    log_echo "- Podman: podman cp ${HOSTNAME}:${log_name}_rteval_sosreport.tar.xz sosreport-rteval.tar.xz"
+    log_echo "Pausing after run."
+    log_echo "Access the container with one of the following based on your environment:"
+    log_echo "- Podman: podman exec -it ${HOSTNAME} /bin/bash"
+    log_echo "- OC:     oc exec -it rteval -- /bin/bash"
+    log_echo "=================================================="
+    sleep infinity
+fi
diff --git a/sample-yamls/pod_rteval.yaml b/sample-yamls/pod_rteval.yaml
@@ -0,0 +1,22 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: rteval
+  annotations:
+    cpu-load-balancing.crio.io: "disable"
+    irq-load-balancing.crio.io: "disable"
+    cpu-quota.crio.io: "disable"
+spec:
+  runtimeClassName: performance-openshift-node-performance-profile
+  restartPolicy: Never
+  containers:
+  - name: rteval
+    image: qquay.io/container-perf-tools/rteval
+    imagePullPolicy: Always
+    env:
+    - name: tool
+      value: "rteval"
+    securityContext:
+      privileged: true
+  nodeSelector:
+    node-role.kubernetes.io/worker: ""
diff --git a/sample-yamls/pod_rteval_loadgen.yaml b/sample-yamls/pod_rteval_loadgen.yaml
@@ -0,0 +1,27 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: rteval
+  annotations:
+    cpu-load-balancing.crio.io: "disable"
+    irq-load-balancing.crio.io: "disable"
+    cpu-quota.crio.io: "disable"
+spec:
+  runtimeClassName: performance-openshift-node-performance-profile
+  restartPolicy: Never
+  containers:
+  - name: rteval
+    image: quay.io/container-perf-tools/rteval
+    imagePullPolicy: Always
+    env:
+    - name: tool
+      value: "rteval"
+    # In this example we are generating load on only the first CPU  
+    - name: LOAD_CPUS
+      value: "0"
+    - name: ONLYLOAD
+      value: "y"
+    securityContext:
+      privileged: true
+  nodeSelector:
+    node-role.kubernetes.io/worker: ""
diff --git a/sample-yamls/pod_rteval_tracecmd.yaml b/sample-yamls/pod_rteval_tracecmd.yaml
@@ -0,0 +1,24 @@
+apiVersion: v1
+kind: Pod
+metadata:
+  name: rteval
+  annotations:
+    cpu-load-balancing.crio.io: "disable"
+    irq-load-balancing.crio.io: "disable"
+    cpu-quota.crio.io: "disable"
+spec:
+  runtimeClassName: performance-openshift-node-performance-profile
+  restartPolicy: Never
+  containers:
+  - name: rteval
+    image: quay.io/container-perf-tools/rteval
+    imagePullPolicy: Always
+    env:
+    - name: tool
+      value: "rteval"
+    - name: TRACE_CMD
+      value: "y"
+    securityContext:
+      privileged: true
+  nodeSelector:
+    node-role.kubernetes.io/worker: ""