tsworkload_dmw3lt

# Copyright (c) 2020 YCSB contributors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you
# may not use this file except in compliance with the License. You
# may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied. See the License for the specific language governing
# permissions and limitations under the License. See accompanying
# LICENSE file.

# Yahoo! Cloud System Benchmark
#
# Workload Defintion: DevOps Monitoring Workload 3 (DMW3LT)
#
# This workload was created as part of a bachelor thesis
# at the Technical University of Berlin, by Michael Smart
# https://github.com/smartygus
#
# Data to load as basis:
#
# This workload uses a server monitoring scenario, and
# in particular this scenario is based around CPU utilisation
# metrics.
# There are 20000 time series, each with one week of
# data at 15 second intervals.
# We are using a fixed starting time stamp representing
# 1/2/2020 at midnight.
# This amounts to a total of:
# 20000 (time series) * 4 (points per minute) * 60 (minutes in an hours) *
#       24 (hours in a day) * 7 days = 806400000 records
#
# Each time series has four tags:
#
# - app: the application that the particular host is performing a role for
# - cmp: what component of the app the host is (ie. what role), eg. web server,
#        appliation server, database server, etc.
# - host: the name or ID of the host, unique only within the context of the first
#         two tags
# - mode: which mode the CPU utilisation metric refers to, eg. user, system, idle
#         and so-on.
#
# Run phase of the workload:
#
# There is 1 SCAN query and 1 INSERT query that make up the
# run phase of the workload. The INSERT query models
# the ongoing stream of incoming metrics.
#
# The idea behind this workload is an alerting scenario,
# where a query is run continuousy to provide data to an 
# alerting expressino, that when fulfilled would send
# an alert to the appropriate person.
# This alerting scenario is based on sending alerts for
# individual hosts, so the query would be run once per
# minute for each of the 2000 hosts in our data set.
# The metrics being monitored is the overall "busy"
# state of the CPU, so it is enough to query the idle
# mode metric and subtract this from 100% to get the total
# CPU busy percentage. With 2000 SCANs per minute in addition
# to the live incoming measurements we arrive at a
# proportion of 0.975 INSERTs to 0.025 SCANs, with an
# approximate real-world overall query rate of 1367 requests/sec.
# One could set this as the target rate for a workload 
# measuring latency for example.
# For a 20-minute benchmark run that would give
# and operation count of around 1.64 million for example.
# To measure throughput just experiment with
# higher and higher target throughputs until
# the throughput achieved by the system under test
# does not increase anymore, then do a full
# benchmark run at a target rate slightly higher
# than the maximum throughput.
workload=site.ycsb.workloads.TimeSeriesWorkload

recordcount=806400000
operationcount=1640000
insertcount=806400000
# Load phase inserts start from 1/2/2020 @ 00:00:00
insertstart=1580511600
# Run phase inserts start from 8/2/2020 @ 00:00:00
inserttransactionstart=1581116400

# We only have a single metric, so we are
# dividing up the write load into groups of
# individual time series instead of groups
# of metrics
threadedwritedistribution=timeseries

fieldlength=3
# single metric: 'cpu'
fieldcount=1
# four tags, "real world" names: app, cmp, host, mode
tagcount=4
tagcardinality=2,2,500,10

sparsity=0.0
delayedseries=0.0
delayedintervals=0

timestampunits=SECONDS
# one measurement every 15 seconds
timestampinterval=15
randomtimeseriesorder=false

# Query: CPU metric mode: idle
# app, cmp, and host tags chosen
# at random.
scanproportion=0.025
scantags=0,0,0,10
scanstartts=-300
scanquerytimespan=300
scanqueryrandomtimespan=false
scandownsamplingfunction=AVERAGE
scandownsamplinginterval=0

# INSERTs to represent ongoing stream
# of incoming measurements
insertproportion=0.975

readproportion=0.00
updateproportion=0.00