Enhancement reporting benchmark results (privacy-scaling-explorations…

…#1166) This PR aims to enhance the reporting functionality for github actions-triggered circuit benchmarks. Results are saved in postgresql and accessible via grafana - [ ] Bug fix (non-breaking change which fixes an issue) - [x] New feature (non-breaking change which adds functionality) - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) - [ ] This change requires a documentation update - New python module introduced for results processing and saving. - getSysStats.sh will now only display the test timers - New action step: Calculate Benchmark Result prints a grafana url linking to detailed test results and downloadable prover log file - New sysstat collection script (sadf.sh) now handles statistics for test that span beyond 00.00 hrs - execBench.sh : Added missing bench tests (bytecode, exp, copy, pi) --------- Co-authored-by: Chih Cheng Liang <[email protected]>
Fiesta55 · Feb 21, 2023 · e9eda03 · e9eda03
1 parent 34a58ce
commit e9eda03
Show file tree

Hide file tree

Showing 19 changed files with 407 additions and 48 deletions.
diff --git a/.github/proverCiScripts/benchmarks_result_reporting/reporting_main.py b/.github/proverCiScripts/benchmarks_result_reporting/reporting_main.py
@@ -0,0 +1,35 @@
+import argparse, json
+from pprint import pprint
+import reporting_modules as rmod
+
+env = json.load(open('/home/ubuntu/env.json'))
+cstats = '/home/ubuntu/cpu.stats'
+mstats = '/home/ubuntu/mem.stats'
+
+def main():
+    parser = argparse.ArgumentParser(
+                    prog = 'BenchmarkResults',
+                    usage = 'python3 reporting_main.py 13 EVM 19',
+                    description = 'Writes circuit benchmark results to postgresql, uploads logfiles to s3 bucket',
+                    )
+    parser.add_argument('pr') 
+    parser.add_argument('circuit')
+    parser.add_argument('degree')
+    parser.add_argument('test_id')
+    args = parser.parse_args()
+    pr, circuit, degree, test_id = (args.pr, args.circuit, args.degree, args.test_id)
+    test_result = rmod.log_processor(pr,circuit, degree)
+    cpustats, memstats, sysstat = rmod.calc_stats(cstats,mstats)
+    data = rmod.prepare_result_dataframe(test_result, sysstat, env, test_id)
+    table = 'testresults_circuitbenchmark'
+    engine = rmod.pgsql_engine(env['db'])
+    data.to_sql(table,engine,if_exists='append')
+    ms = rmod.write_mem_time(engine,memstats, test_id)
+    cs = rmod.write_cpuall_time(engine,cpustats, test_id)
+
+    url = f'{env["grafana_dashboard_prefix"]}{test_id}'.replace(" ", "")
+    print(f'Test Result: {url}')
+
+if __name__ == '__main__':
+    main()
+
diff --git a/.github/proverCiScripts/benchmarks_result_reporting/reporting_modules.py b/.github/proverCiScripts/benchmarks_result_reporting/reporting_modules.py
@@ -0,0 +1,194 @@
+import pandas as pd, json, datetime, itertools
+from sqlalchemy import create_engine, text
+import warnings, time, os
+
+
+warnings.simplefilter(action='ignore', category=FutureWarning)
+warnings.simplefilter(action='ignore', category=UserWarning)
+pd.options.mode.chained_assignment = None
+
+def pgsql_engine(pgsqldb):
+    '''
+    creates a psql engine instance
+    '''
+    user     = pgsqldb['user']
+    password = pgsqldb['password']
+    host     = pgsqldb['host']
+    database = pgsqldb['database']
+    engine   = create_engine(f'postgresql://{user}:{password}@{host}:5432/{database}')
+
+    return engine
+
+def prepare_result_dataframe(test_result, sysstat,env, test_id):
+    '''
+    prepares postgres data (in the form of dataframe) for table circuit_benchmarks
+    '''
+    try:
+        r = {
+            'pull_request'          : test_result['pull_request'],
+            'test_id'               : test_id,
+            'circuit'               : test_result['circuit'],
+            'degree'                : test_result['degree'],
+            'test_result'           : test_result['result'],
+            'test_date'             : datetime.datetime.now().date(),
+            'setup_gen'             : test_result['setup_gen'],
+            'proof_gen'             : test_result['proof_gen'],
+            'proof_ver'             : test_result['proof_ver'],
+            'max_ram'               : sysstat['max_ram'],
+            'cpu_all_Average'       : sysstat['cpu_all_Average'],
+            'cpu_all_Max'           : sysstat['cpu_all_Max'],
+            'cpu_count'             : sysstat['cpu_count'],
+            'sysstats_url'          : f'{env["grafana_dashboard_prefix"]}{test_id}',
+            'logsurl'               : f'{env["s3endpoint"]}{test_id}.tar.gz'
+        }
+
+    except Exception as e:
+        print(e)
+
+    test_id = r['test_id']
+    r = pd.DataFrame([r])
+    r = r.set_index('test_date')
+
+    return r
+
+def write_mem_time(engine, mem_statistics, test_id, dummy=False):
+    '''
+    adds mem stats df as time series data to table mem_stats
+    '''
+    table = 'testresults_cbmemtime'
+    mem_statistics['dummy'] = mem_statistics['timestamp'].apply(lambda x: f'{dummy}')
+    mem_statistics['test_id'] = mem_statistics['timestamp'].apply(lambda x: f'{test_id}')
+    mem_statistics.to_sql(table,engine,if_exists='append',index=False)
+
+def write_cpuall_time(engine, cpu_statistics, test_id, dummy=False):
+    '''
+    adds cpu stats df as time series data to table mem_stats
+    '''
+    table = 'testresults_cbcpualltime'
+    cpu_statistics['dummy'] = cpu_statistics['timestamp'].apply(lambda x: f'{dummy}')
+    cpu_statistics['test_id'] = cpu_statistics['timestamp'].apply(lambda x: f'{test_id}')
+    cpu_statistics.to_sql(table,engine,if_exists='append',index=False)
+
+def calc_stats(cstats,mstats): 
+    '''
+    returns 2 dataframes with cpu/mem stats to be consumed by postgresql engine
+    returns a dict with average/max cpu and max ram utilization durint the benchmark
+    '''
+    dfcpu,cpus = load_stats(cstats)
+    cpustats,cpu_all_Max,cpu_all_Average = process_cpustats(dfcpu)
+    dfmem, _ = load_stats(mstats)
+    memstats,max_ram = process_memstats(dfmem)
+    sysstat = {
+        'cpu_all_Average': cpu_all_Average,
+        'cpu_all_Max'    : cpu_all_Max,
+        'cpu_count'      : cpus,
+        'max_ram'        : f'{max_ram}Gb'
+    }
+
+    return cpustats, memstats, sysstat
+
+def log_processor(pull_request,circuit, degree):
+    '''
+    Exports test metadata and result metrics from prover logfile
+    '''
+    SETUP_PREFIX     = "[Setup generation]"
+    PROOFGEN_PREFIX  = "[Proof generation]"
+    PROOFVER_PREFIX  = "[Proof verification]"
+    logfile = [i for i in os.listdir('/home/ubuntu/') if 'proverlog' in i][0]
+    f = open(f'/home/ubuntu/{logfile}', 'r')
+    logdata = f.read()
+    logdata = logdata.split("\n")
+    running = [i for i in logdata if 'running' in i and 'test' in i][0].split()[1]
+    if running != '0':
+        r = [i.split(":")[1].split(".")[0].replace(" ","") for i in logdata if 'test result' in i][0]
+        if r == 'ok':
+            result = 'PASSED'
+            try:
+                sg = ''.join(g[0] for g in itertools.groupby([i for i in logdata if 'End' in i and SETUP_PREFIX in i ][0])).split('.', 1)[-1]
+            except:
+                sg = 'None'
+            try:
+                pg = ''.join(g[0] for g in itertools.groupby([i for i in logdata if 'End' in i and PROOFGEN_PREFIX in i ][0])).split('.', 1)[-1]
+            except:
+                pg = 'None'                
+            try:
+                pv = ''.join(g[0] for g in itertools.groupby([i for i in logdata if 'End' in i and PROOFVER_PREFIX in i ][0])).split('.', 1)[-1]
+            except:
+                pv = 'None'
+            logdata = {
+                    'pull_request': pull_request,
+                    'circuit'     : circuit,
+                    'degree'      : degree,
+                    'result'      : result,
+                    'setup_gen'   : sg,
+                    'proof_gen'   : pg,
+                    'proof_ver'   : pv
+                }
+        else:
+            result = 'FAILED'
+            logdata = {
+                'pull_request': pull_request,
+                'circuit'     : circuit,
+                'degree'      : degree,
+                'result'      : result,
+                'setup_gen'   : 'None',
+                'proof_gen'   : 'None',
+                'proof_ver'   : 'None'
+            }        
+
+    else:
+        result = 'None'
+        logdata = {
+            'pull_request': pull_request,
+            'circuit'     : circuit,
+            'degree'      : degree,
+            'result'      : result,
+            'setup_gen'   : 'NoTestExecuted',
+            'proof_gen'   : 'NoTestExecuted',
+            'proof_ver'   : 'NoTestExecuted'
+            }
+
+
+    return logdata
+
+def load_stats(file):
+    '''
+    loads raw mem/cpu sar data from csv to dataframe
+    '''
+    try:
+        with open(file,'r') as filedata:
+            filedatalist = [i for i in filedata.read().splitlines()]
+            header = [i for i in filedatalist if 'LINUX-RESTART' in i][0]
+            cpus = header.split('(')[1].split()[0]
+            cpudatalist = [i for i in filedatalist if 'LINUX-RESTART' not in i]
+            columns = cpudatalist[0].split(';')
+            cpudatalist = [i for i in cpudatalist if 'hostname' not in i]
+            df = pd.DataFrame([i.split(';') for i in cpudatalist], columns=columns)
+        return df, cpus
+    except Exception as e:
+        print(e)
+        return None
+
+def process_cpustats(statsdf):
+    '''
+    accepts cpu stats raw data from csv and returns a dataframe for further processing
+    '''
+    statsdf = statsdf[['timestamp', '%idle']]
+    statsdf['%idle']   = pd.to_numeric(statsdf['%idle'])
+    statsdf['utilizationall'] = statsdf['%idle'].apply(lambda x:round(float(100) - x, 2 ))
+    statsdf = statsdf[['timestamp','utilizationall']]
+    cpu_all_Max = statsdf['utilizationall'].max()
+    cpu_all_Average = statsdf['utilizationall'].mean()
+    return statsdf, cpu_all_Max,cpu_all_Average
+
+
+def process_memstats(df):
+    '''
+    accepts ram stats raw data  and returns a dataframe for further processing
+    '''
+    statsdf = df[['timestamp', 'kbmemused']]
+    statsdf['kbmemused']   = pd.to_numeric(statsdf['kbmemused'])
+    statsdf['utilizationgb']   = statsdf['kbmemused'].apply(lambda x: round(x/float(1000000),2))
+    statsdf = statsdf[['timestamp','utilizationgb']]
+    max_ram = statsdf['utilizationgb'].max()
+    return statsdf, max_ram
diff --git a/.github/proverCiScripts/execBench.sh b/.github/proverCiScripts/execBench.sh
@@ -29,6 +29,18 @@ case $circuit in
     "super")
         run_suffix="super_circuit_prover"
         ;;
+    "bytecode")
+        run_suffix="bytecode_circuit_prover"
+        ;;
+    "pi")
+        run_suffix="pi_circuit_prover"
+        ;;
+    "exp")
+        run_suffix="exp_circuit_prover"
+        ;;
+    "copy")
+        run_suffix="copy_circuit_prover"
+        ;;
     *)
         echo "No proper value"
         exit 1
@@ -40,3 +52,5 @@ logfile=$_date--${circuit}_bench-$k.proverlog
 
 export RUST_BACKTRACE=1
 DEGREE=$k ~/.cargo/bin/cargo test --profile bench bench_${run_suffix} -p circuit-benchmarks --features benches  -- --nocapture > "$target_dir/$logfile" 2>&1
+
+exit 0
diff --git a/.github/proverCiScripts/getSysstat.sh b/.github/proverCiScripts/getSysstat.sh
@@ -6,22 +6,5 @@ prnumber=$1
 base_dir="/home/ubuntu/CI_Prover_Benches/"
 target_dir="$base_dir"PR"$prnumber"
 
-sar -uh > cpu.stats
-sar -rh > mem.stats
-
-sed -i -e '1,5d' cpu.stats
-sed -i -e '1,5d' mem.stats
-sed -i -e '$ d' cpu.stats
-sed -i -e '$ d' mem.stats
-
-minIdleCPU=$(cat cpu.stats | awk '{ print $8 }' | sed  's/%//g' | sort -n | head -1)
-maxUsedCPU=$(bc <<< "scale=2; 100-$minIdleCPU")
-maxMemUsed=$(cat mem.stats | awk '{ print $4 }' | sed 's/G//g' | sort -n | tail -1)
-
 logfile=$(ls $target_dir | grep proverlog | xargs -n 1 basename)
 tail -12 $target_dir/$logfile
-
-echo "Maximum CPU Usage at $maxUsedCPU%"
-echo "Maximum Mem Usage at ${maxMemUsed}Gb"
-
-mv $target_dir/$logfile /home/ubuntu/CI_Prover_Benches/ProverLogs/"$logfile"_PR"$prnumber"
diff --git a/.github/proverCiScripts/processResults.sh b/.github/proverCiScripts/processResults.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+set -e
+#set -x
+
+prnumber=$1
+label=$2
+degree=$3
+base_dir="/home/ubuntu/CI_Prover_Benches/"
+target_dir="$base_dir"PR"$prnumber"
+
+rm -f ~/*.stats
+rm -f ~/*.proverlog
+rm -f ~/*.tar.gz
+
+scp prover:$target_dir/*proverlog ~/
+scp ~/actions-runner/zkevm_circuits_prover/zkevm-circuits/zkevm-circuits/.github/proverCiScripts/sadf.sh prover:~/
+ssh prover "bash -s" <<EOF
+./sadf.sh
+rm -f sadf.sh
+EOF
+
+scp prover:~/*.stats ~/
+
+
+l=$(echo $label | tr -d '"') 
+circuit=$(echo $l |  awk '{print $2}')
+time=$(date +%s)
+test_id=$circuit-$degree-Benchmark-PR$prnumber--$time
+
+tar -czvf ~/$test_id.tar.gz ~/*proverlog ~/*.stats
+aws s3 cp ~/$test_id.tar.gz s3://zkevm-chain-testing --profile cirunner
+echo "Log file uploaded at : https://zkevm-chain-testing.s3.eu-central-1.amazonaws.com/$test_id"".tar.gz"
+/usr/bin/python3 .github/proverCiScripts/benchmarks_result_reporting/reporting_main.py  "$prnumber" "$circuit" "$degree" "$test_id"
+
+ssh prover "bash -s" <<EOF
+rm -f $target_dir/*proverlog
+rm -f ~/*.stats
+EOF
+
+rm -rf ~/*proverlog
+rm -rf ~/*.stats
+rm -rf ~/$test_id.tar.gz
diff --git a/.github/proverCiScripts/sadf.sh b/.github/proverCiScripts/sadf.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+sleep 5
+sudo rm -f /var/log/sysstat/sar*
+sacount=$(sudo find /var/log/sysstat/ -type f | wc -l)
+previousdays=$(expr 1 - $sacount)
+while [ $previousdays -lt 0 ] 
+  do
+    sadf $previousdays -d >> cpu.stats
+    sadf $previousdays -d -- -r >> mem.stats
+    (( previousdays++ ))
+  done
+sadf -d >> cpu.stats
+sadf -d -- -r >> mem.stats
diff --git a/.github/workflows/gh-actions-prover-benches.yml b/.github/workflows/gh-actions-prover-benches.yml
@@ -15,7 +15,7 @@ jobs:
       - run: .github/proverCiScripts/wakeUpProver.sh
         shell: bash
       - run: |
-          ssh prover "bash -s" -- < .github/proverCiScripts/rsSysstat.sh
+          ssh prover "bash -s" -- < .github/proverCiScripts/rsSysstat.sh "${{ env.PR_NUMBER }}"
       - run: |
           ssh prover "bash -s" -- < .github/proverCiScripts/prepareProver.sh "${{ env.PR_NUMBER }}" "${{ github.workspace }}"
       - run: .github/proverCiScripts/deployToProver.sh "${{ env.PR_NUMBER }}" "${{ github.workspace }}"
@@ -25,6 +25,12 @@ jobs:
         shell: bash
       - run: |
           ssh prover "bash -s" -- < .github/proverCiScripts/getSysstat.sh "${{ env.PR_NUMBER }}"
-      - run: .github/proverCiScripts/shutdownProver.sh
+      - name: Calculate Benchmark Result
+        if: success() || failure()
+        run: .github/proverCiScripts/processResults.sh "${{ env.PR_NUMBER }}" '"${{ github.event.label.name }}"' "19"
+        shell: bash
+      - name: PowerOff prover
+        if: always()
+        run: .github/proverCiScripts/shutdownProver.sh
         shell: bash
 
diff --git a/README.md b/README.md
@@ -19,3 +19,13 @@ to use for your circuit in the bench process.
 -   State Circuit prover benches. -> `DEGREE=18 make state_bench`
 
 You can also run all benchmarks by running: `make circuit_benches DEGREE=18`.
+
+## GH Actions Benchmark Results
+
+Circuit Benchmark Results are accessible here: https://grafana.zkevm-testnet.org/d/vofy8DAVz/circuit-benchmarks?orgId=1
+
+- circuit_benchmarks panel displays:
+    - overall test result
+    - timers and system statistics
+    - url for downloading prover log and sys stat files
+    - clickable sysstats_url element that loads the memory and cpu utilization profiles for the given test