Skip to content

Commit

Permalink
Add PostgreSQL + Sysbench TPC-C Workload (#434)
Browse files Browse the repository at this point in the history
* feat: add postgresql 15

* chore: no default entrypoint for postgre

* feat: Add workloads based on sysbench

* CI: add support for postgresql

* fix: wrong setting for server

* doc: the document draft for postgresql

* fix: spelling

* postgre: docker container entrypoint args python

* postgre: documentation

* fix: missing word

* chore: rename PostgreSQL database

* fix: add missing parameter

---------

Co-authored-by: Rafael Ulises Luzius Pizarro Solar <[email protected]>
  • Loading branch information
xusine and Rafael Ulises Luzius Pizarro Solar authored Jun 25, 2023
1 parent 4ab2c8e commit c9d7584
Show file tree
Hide file tree
Showing 11 changed files with 460 additions and 0 deletions.
50 changes: 50 additions & 0 deletions .github/workflows/build-images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,33 @@ jobs:
- id: set_is_parent_modified
run: echo "is_parent_modified=${MODIFIED}" >> $GITHUB_OUTPUT

postgresql:
runs-on: ubuntu-latest
needs: base-os
env:
DH_REPO: "cloudsuite/${{ github.job }}"
outputs:
is_parent_modified: ${{ steps.set_is_parent_modified.outputs.is_parent_modified }}
strategy:
matrix:
tag: ["15"]
platform: ["linux/amd64,linux/arm64"]
steps:
- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- if: ${{ needs.java.outputs.is_parent_modified == 'true' }}
run: echo "IS_PARENT_MODIFIED=true" >> $GITHUB_ENV
- name: build and push
run: "./.github/scripts/build-images.sh"
env:
IMG_TAG: "${{ matrix.tag }}"
DF_PATH: "./commons/${{ github.job }}/${{ matrix.tag }}"
DBX_PLATFORM: ${{ matrix.platform }}
- id: set_is_parent_modified
run: echo "is_parent_modified=${MODIFIED}" >> $GITHUB_OUTPUT

data-analytics:
runs-on: ubuntu-latest
needs: hadoop
Expand Down Expand Up @@ -301,6 +328,29 @@ jobs:
DF_PATH: "./benchmarks/${{ github.job }}/${{ matrix.tag }}"
DBX_PLATFORM: ${{ matrix.platform }}

data-serving-relational:
runs-on: ubuntu-latest
needs: postgresql
env:
DH_REPO: "cloudsuite/${{ github.job }}"
strategy:
matrix:
tag: ["server", "client"]
platform: ["linux/amd64,linux/arm64"]
steps:
- name: checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- if: ${{ needs.cassandra.outputs.is_parent_modified == 'true' }}
run: echo "IS_PARENT_MODIFIED=true" >> $GITHUB_ENV
- name: build and push
run: "./.github/scripts/build-images.sh"
env:
IMG_TAG: "${{ matrix.tag }}"
DF_PATH: "./benchmarks/${{ github.job }}/${{ matrix.tag }}"
DBX_PLATFORM: ${{ matrix.platform }}

graph-analytics:
runs-on: ubuntu-latest
needs: spark
Expand Down
5 changes: 5 additions & 0 deletions .wordlist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ mysqlserverdocker
nginx
NoSQL
Nutch
OLTP
OPERATIONCOUNT
os
PageRank
Expand All @@ -118,10 +119,14 @@ solr
solr's
SQL
src
stddev
sudo
sys
sysbench
taskset
TCP
TPC
tpcc
threadcount
txt
UI
Expand Down
23 changes: 23 additions & 0 deletions benchmarks/data-serving-relational/client/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
FROM cloudsuite/base-os:ubuntu

ENV DEBIAN_FRONTEND noninteractive


# 1. install necessary software (sysbench)
RUN apt update && apt install git sysbench python3 -y

# 2. clone sysbench-tpcc's repo and install its script
RUN git clone https://github.com/Percona-Lab/sysbench-tpcc && cp sysbench-tpcc/*.lua /usr/share/sysbench/

# 3. Copy the template load file
COPY ./docker-entrypoint.py /root
COPY ./template/tpcc.py /root/template/tpcc.py
COPY ./template/oltp-rw.py /root/template/oltp-rw.py
COPY ./template/database.conf /root/template/database.conf
ENV DATABASE_CONF_FILE /root/template/database.conf

RUN chmod +x /root/docker-entrypoint.py
RUN chmod +x /root/template/tpcc.py
RUN chmod +x /root/template/oltp-rw.py

ENTRYPOINT ["/root/docker-entrypoint.py"]
84 changes: 84 additions & 0 deletions benchmarks/data-serving-relational/client/docker-entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/env python3

import os
import sys
import subprocess
import argparse

args = sys.argv[1:]
parser = argparse.ArgumentParser()
parser.add_argument("--tpcc", help="Run TPC-C benchmark", action='store_true')
parser.add_argument("--oltp-rw", help="Run sysbench OLTP Read/Write workload", action='store_true')
parser.add_argument("--server-ip", help="IP of the server to load")

args_parsed, unknown = parser.parse_known_args()

if not args_parsed.server_ip:
print("Please pass the server IP as an argument with --server-ip=<IP>")
sys.exit()

print("args: " + str(args))
if not args_parsed.tpcc and not args_parsed.oltp_rw:
print("Precise whenever it's --tpcc or --oltp-rw")
sys.exit()

import os
import os.path as path
import shutil

def get_dict(lines):
config_dict = {}
for line in lines:
is_enabled = True
if "=" in line:
if line.startswith("#"):
is_enabled = False
line = line[1:] # Remove `#`

key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
config_dict[key] = (value, is_enabled)

return config_dict

def save_dict(config_dict, lines):
# Reconstruct the updated configuration
new_lines = []
for line in lines:
if "=" in line:
if line.startswith("#"):
line = line[1:]
key, _ = line.split("=", 1)
key = key.strip()
if config_dict[key][1]:
new_lines.append(f"{key}={config_dict[key][0]}")
else:
new_lines.append(f"#{key}={config_dict[key][0]}")
else:
new_lines.append(line)

new_config = "\n".join(new_lines)
return new_config

DATABASE_CONF_FILE = os.environ["DATABASE_CONF_FILE"]

if not path.exists(f"{DATABASE_CONF_FILE}"):
shutil.copy(f"{DATABASE_CONF_FILE}", f"{DATABASE_CONF_FILE}.bak")

with open(f"{DATABASE_CONF_FILE}", "r") as f:
lines = f.readlines()
config_dict = get_dict(lines)

# Update the desired key with the new value
config_dict["pgsql-host"] = (args_parsed.server_ip, True)

file_txt = save_dict(config_dict, lines)
# Write it back
with open(f"{DATABASE_CONF_FILE}", "w") as f:
f.writelines(file_txt)

if args_parsed.tpcc:
subprocess.call(['/root/template/tpcc.py'] + args)
else:
subprocess.call(['/root/template/oltp-rw.py'] + args)
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
db-driver=pgsql
pgsql-host=128.178.116.117
pgsql-port=5432
pgsql-user=cloudsuite
pgsql-password=cloudsuite
pgsql-db=sbtest
40 changes: 40 additions & 0 deletions benchmarks/data-serving-relational/client/template/oltp-rw.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
#!/usr/bin/env python3

import os
import sys
import subprocess
import argparse

# According to the code, the table structure is like the following:
# - id: 4B (primary key)
# - key: 4B
# - c: 120B
# - pad: 60B
# As a result, each row takes 188B.
# You can increase the dataset size by adding more

parser = argparse.ArgumentParser()
parser.add_argument("--run", help="Run the benchmark, must be warmuped up before with --warmup", action='store_true')
parser.add_argument("--warmup", help="Warmup the benchmark, then can be ran with --run", action='store_true')
parser.add_argument("--threads", "-t", help="Number of threads for the client", default=8, type=int)
parser.add_argument("--report-interval", "-ri", help="Report interval for metrics in seconds", default=10, type=int)
parser.add_argument("--record-count", "-c", help="Record count per table. Each record is 188B", default=1000000, type=int)
parser.add_argument("--tables", "-n", help="Number of tables with `table_size` rows each", default=50, type=int)
parser.add_argument("--rate", "-r", help="The expected load (transaction / sec)", type=int)
parser.add_argument("--time", "-s", help="Length of the benchmark in seconds", default=360, type=int)



args_parsed, unknown = parser.parse_known_args()

# Warmup
if not args_parsed.warmup and not args_parsed.run:
print("Need to pass at least --run or --warmup argument")
exit()

if args_parsed.warmup:
os.system(f"sysbench oltp_read_write --config-file=/root/template/database.conf --threads={args_parsed.threads} --time={args_parsed.time} --report-interval={args_parsed.report_interval} prepare --table_size={args_parsed.record_count} --tables={args_parsed.tables}")
elif not args_parsed.rate:
os.system(f"sysbench oltp_read_write --config-file=/root/template/database.conf --threads={args_parsed.threads} --time={args_parsed.time} --report-interval={args_parsed.report_interval} run --table_size={args_parsed.record_count} --tables={args_parsed.tables}")
else:
os.system(f"sysbench oltp_read_write --config-file=/root/template/database.conf --threads={args_parsed.threads} --time={args_parsed.time} --report-interval={args_parsed.report_interval} run --table_size={args_parsed.record_count} --tables={args_parsed.tables} --rate={args_parsed.rate}")
31 changes: 31 additions & 0 deletions benchmarks/data-serving-relational/client/template/tpcc.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/env python3

import os
import sys
import subprocess
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("--run", help="Run the benchmark, must be warmuped up before with --warmup", action='store_true')
parser.add_argument("--warmup", help="Warmup the benchmark, then can be ran with --run", action='store_true')
parser.add_argument("--threads", "-t", help="Number of threads for the client", default=8, type=int)
parser.add_argument("--report-interval", "-ri", help="Report interval for metrics in seconds", default=10, type=int)
parser.add_argument("--time", "-s", help="Length of the benchmark in seconds", default=360, type=int)
parser.add_argument("--scale", "-n", help="Scale of the dataset", default=10, type=int)
parser.add_argument("--rate", "-r", help="The expected load (transaction / sec)", type=int)

args_parsed, unknown = parser.parse_known_args()

# Warmup
if not args_parsed.warmup and not args_parsed.run:
print("Need to pass at least --run or --warmup argument")
exit()



if args_parsed.warmup:
os.system(f"sysbench tpcc --config-file=/root/template/database.conf --threads={args_parsed.threads} prepare --scale={args_parsed.scale}")
elif not args_parsed.rate:
os.system(f"sysbench tpcc --config-file=/root/template/database.conf --threads={args_parsed.threads} --time={args_parsed.time} --report-interval={args_parsed.report_interval} run --scale={args_parsed.scale}")
else:
os.system(f"sysbench tpcc --config-file=/root/template/database.conf --threads={args_parsed.threads} --time={args_parsed.time} --report-interval={args_parsed.report_interval} run --scale={args_parsed.scale} --rate={args_parsed.rate}")
14 changes: 14 additions & 0 deletions benchmarks/data-serving-relational/server/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM cloudsuite/postgresql:15

# Install sudo for user switching
RUN apt update && apt install sudo python3 -y

# Make the database access public
RUN echo 'host\tall\tcloudsuite\t0.0.0.0/0\tscram-sha-256' >> /etc/postgresql/15/main/pg_hba.conf

# Copy the entrypoint
COPY ./docker-entrypoint.py /root

RUN chmod +x /root/docker-entrypoint.py

ENTRYPOINT ["/root/docker-entrypoint.py"]
88 changes: 88 additions & 0 deletions benchmarks/data-serving-relational/server/docker-entrypoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
#!/usr/bin/env python3

import socket
def get_ip():
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.settimeout(0)
try:
# doesn't even have to be reachable
s.connect(('8.8.8.8', 1))
IP = s.getsockname()[0]
except Exception:
IP = '127.0.0.1'
finally:
s.close()
return IP

import argparse

parser = argparse.ArgumentParser()
# If no value provided, the script tries to find the primary IP address by itself. = get_ip()
parser.add_argument("--listen-addresses", "-a", help="The listening IP address of PostGRES.", default="'*'")
parser.add_argument("--number", "-n", type=int, help="The number is not used, place holder for new argument.", default=0)

args, unknown = parser.parse_known_args()

import os
import os.path as path
import shutil

def get_dict(lines):
config_dict = {}
for line in lines:
is_enabled = True
if "=" in line:
if line.startswith("#"):
is_enabled = False
line = line[1:] # Remove `#`

key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
config_dict[key] = (value, is_enabled)

return config_dict

def save_dict(config_dict, lines):
# Reconstruct the updated configuration
new_lines = []
for line in lines:
if "=" in line:
if line.startswith("#"):
line = line[1:]
key, _ = line.split("=", 1)
key = key.strip()
if config_dict[key][1]:
new_lines.append(f"{key} = {config_dict[key][0]}")
else:
new_lines.append(f"#{key} = {config_dict[key][0]}")
else:
new_lines.append(line)

new_config = "\n".join(new_lines)
return new_config

POSTGRE_HOMEDIR = os.environ["POSTGRE_HOME"]

# Backup the original file
if not path.exists(f"{POSTGRE_HOMEDIR}/postgresql.conf"):
shutil.copy(f"{POSTGRE_HOMEDIR}/postgresql.conf", f"{POSTGRE_HOMEDIR}/postgresql.conf.bak")

with open(f"{POSTGRE_HOMEDIR}/postgresql.conf", "r") as f:
lines = f.readlines()
config_dict = get_dict(lines)

# Update the desired key with the new value
config_dict["listen_addresses"] = (args.listen_addresses, True) # sed -i "s/#listen_addresses = 'localhost'/listen_addresses = '*'/g" /etc/postgresql/15/main/postgresql.conf

file_txt = save_dict(config_dict, lines)
# Write it back
with open(f"{POSTGRE_HOMEDIR}/postgresql.conf", "w") as f:
f.writelines(file_txt)

os.system("service postgresql start")
os.system("sudo -u postgres psql -c \"CREATE USER cloudsuite WITH PASSWORD 'cloudsuite';\"") # Create the user called `cloudsuite`
os.system("sudo -u postgres psql -c \"CREATE DATABASE sbtest;\"") # Create a table named sbtest
os.system("sudo -u postgres psql -c \"GRANT ALL PRIVILEGES ON DATABASE sbtest TO cloudsuite\"") # Gave permission to this table
os.system("sudo -u postgres psql sbtest -c \"GRANT ALL ON SCHEMA public TO cloudsuite;\"")
os.system("sudo -u postgres psql")
12 changes: 12 additions & 0 deletions commons/postgresql/15/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM cloudsuite/base-os:ubuntu

ENV DEBIAN_FRONTEND noninteractive

RUN apt update && apt install -y wget gnupg lsb-release && \
echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list && \
wget --quiet -O - https://www.postgresql.org/media/keys/ACCC4CF8.asc | apt-key add - && \
apt update && \
apt -y install postgresql-15


ENV POSTGRE_HOME /etc/postgresql/15/main
Loading

0 comments on commit c9d7584

Please sign in to comment.