diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 0000000..1d72255
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,86 @@
+
+cmake_minimum_required (VERSION 2.8...3.23)
+
+Project(workloads NONE)
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/script/")
+include(prerequisite)
+
+add_custom_target(bom)
+add_custom_target(kpi)
+enable_testing()
+
+if(NOT DEFINED PLATFORM)
+    execute_process(COMMAND bash -c "head -n 1 '${CMAKE_SOURCE_DIR}/workload/platforms'" OUTPUT_VARIABLE PLATFORM OUTPUT_STRIP_TRAILING_WHITESPACE)
+else()
+    execute_process(COMMAND bash -c "grep -E '^${PLATFORM}$' '${CMAKE_SOURCE_DIR}/workload/platforms' | head -n 1" OUTPUT_VARIABLE platform OUTPUT_STRIP_TRAILING_WHITESPACE)
+    if(NOT platform)
+        message(FATAL_ERROR "Platform ${PLATFORM} not recognized!")
+    endif()
+    set(PLATFORM "${platform}")
+endif()
+
+if(PLATFORM MATCHES "GRAVITON")
+    set(IMAGEARCH "linux/arm64")
+else()
+    set(IMAGEARCH "linux/amd64")
+endif()
+
+if(REGISTRY)
+    if (NOT ${REGISTRY} MATCHES "/$")
+        set(REGISTRY "${REGISTRY}/")
+    endif()
+endif()
+
+set(ENABLE_BUILD ON)
+
+if (NOT DEFINED TIMEOUT)
+    set(TIMEOUT "28800,600")
+endif()
+
+if (NOT DEFINED RELEASE)
+    set(RELEASE ":latest")
+elseif (NOT ${RELEASE} MATCHES "^:")
+    set(RELEASE ":${RELEASE}")
+endif()
+
+if (NOT BACKEND)
+    set(BACKEND "kubernetes")
+endif()
+
+if (EXISTS "${PROJECT_SOURCE_DIR}/script/${BACKEND}.cmake")
+    include(${BACKEND})
+endif()
+
+add_subdirectory("script/march")
+add_subdirectory(stack)
+add_subdirectory(workload)
+
+message("")
+message("This script will build third party components licensed under various open source licenses into your container images. The terms under which those components may be used and distributed can be found with the license document that is provided with those components. Please familiarize yourself with those terms to ensure your distribution of those components complies with the terms of those licenses.")
+message("")
+message("-- Setting: PLATFORM=${PLATFORM}, ARCH=${IMAGEARCH}")
+if(NOT DEFINED REGISTRY_AUTH)
+    message("-- Setting: REGISTRY=${REGISTRY}")
+else()
+    message("-- Setting: REGISTRY=${REGISTRY}, AUTH=${REGISTRY_AUTH}")
+endif()
+message("-- Setting: RELEASE=${RELEASE}")
+message("-- Setting: TIMEOUT=${TIMEOUT}")
+message("-- Setting: BACKEND=${BACKEND}")
+if(ACCEPT_LICENSE)
+    message("-- Setting: ACCEPT_LICENSE=${ACCEPT_LICENSE}")
+endif()
+if(COMMAND show_backend_settings)
+    show_backend_settings()
+endif()
+if(BENCHMARK)
+    message("-- Setting: BENCHMARK=${BENCHMARK}")
+endif()
+if(NOT ENABLE_BUILD)
+    message("")
+    message("Build is disabled as ${DEFAULT_REGISTRY} is readonly.")
+    message("")
+endif()
+message("")
+
+execute_process(COMMAND bash -c "ln -s -r -f '${PROJECT_SOURCE_DIR}'/script/benchmark/*.sh ." WORKING_DIRECTORY "${CMAKE_BINARY_DIR}")
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index f682f4e..04f153e 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,57 +1,29 @@
-# Contributing
+## Welcome to the Service Framework Workload Repository Contributing Guide
 
-### License
+### Evaluate Workloads
 
-<PROJECT NAME> is licensed under the terms in [LICENSE]<link to license file in repo>. By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms.
+Follow the [README](README.md#prerequisite) instructions to setup [local](doc/setup-docker.md), [remote](doc/setup-cumulus.md), or [Cloud](doc/setup-cumulus.md) systems to evaluate any [supported workloads](worklod/README.md#list-of-workloads). 
 
-### Sign your work
+You can choose to build the workloads and evaluate the workload execution with `ctest`, which manage the workload test cases. You can run any subset of the test cases or all of them. 
 
-Please use the sign-off line at the end of the patch. Your signature certifies that you wrote the patch or otherwise have the right to pass it on as an open-source patch. The rules are pretty simple: if you can certify
-the below (from [developercertificate.org](http://developercertificate.org/)):
+### Submit Issues
 
-```
-Developer Certificate of Origin
-Version 1.1
+If you spot a problem with the repository, submit an issue at the **github issues**.  
 
-Copyright (C) 2004, 2006 The Linux Foundation and its contributors.
-660 York Street, Suite 102,
-San Francisco, CA 94110 USA
+### Contribute to Workload Development
 
-Everyone is permitted to copy and distribute verbatim copies of this
-license document, but changing it is not allowed.
+Here is a list of references you can follow for workload development:
+- A workload consists of a few critical pieces of scripts or manifests, documented in [Workload Elements](doc/workload.md):
+  - [`CMakeLists.txt`](doc/cmakelists.txt.md)  
+  - [`build.sh`](doc/build.md)  
+  - [`Dockerfiles`](doc/dockerfile.md)  
+  - [`cluster-config.yaml.m4`](doc/cluster-config.md)  
+  - [`kubernetes-config.yaml.m4`](doc/kuernetes-config.md)  
+  - [`validate.sh`](doc/validate.md)  
+  - [`kpi.sh`](doc/kpi.md)  
+- The best way to start a new workload development is by copying the [dummy](workload/dummy) workload and then modifying it to your needs. 
 
-Developer's Certificate of Origin 1.1
+### Submit Contributions
 
-By making a contribution to this project, I certify that:
+Thanks for your contribution to the Service Framework workload repository. Whether you plan to modify an existing workload or to create a new workload, please fork the SF workload repository to your own private work place. Make modifications there. Then submit a merge request. The branches of the main repository are reserved for release-related activities.  
 
-(a) The contribution was created in whole or in part by me and I
-    have the right to submit it under the open source license
-    indicated in the file; or
-
-(b) The contribution is based upon previous work that, to the best
-    of my knowledge, is covered under an appropriate open source
-    license and I have the right under that license to submit that
-    work with modifications, whether created in whole or in part
-    by me, under the same open source license (unless I am
-    permitted to submit under a different license), as indicated
-    in the file; or
-
-(c) The contribution was provided directly to me by some other
-    person who certified (a), (b) or (c) and I have not modified
-    it.
-
-(d) I understand and agree that this project and the contribution
-    are public and that a record of the contribution (including all
-    personal information I submit with it, including my sign-off) is
-    maintained indefinitely and may be redistributed consistent with
-    this project or the open source license(s) involved.
-```
-
-Then you just add a line to every git commit message:
-
-    Signed-off-by: Joe Smith <joe.smith@email.com>
-
-Use your real name (sorry, no pseudonyms or anonymous contributions.)
-
-If you set your `user.name` and `user.email` git configs, you can sign your
-commit automatically with `git commit -s`.
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..afdf0b1
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright (c)2022 Intel Corporation
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..1f6ed29
--- /dev/null
+++ b/README.md
@@ -0,0 +1,33 @@
+## Introduction
+
+This is the **Workload Services Framework** repository. The repository contains a set of workloads that can be used to exercise multiple platforms. Each workload is a complete and standalone implementation that can be built and run collectively or individually. See the list of supported workloads under the [workload](workload/README.md) directory.  
+
+### Prerequisite
+
+- Sync your system date/time. It is required by docker credential authorization.  
+- Install `cmake`, `make`, `m4`, and `gawk`.  
+- Setup [docker](doc/setup-docker.md), [Kubernetes](doc/setup-kubernetes.md), or [cumulus](doc/setup-cumulus.md). [docker](doc/setup-docker.md) is the minimum requirement and can be used for single-container workload validation. [Kubernetes](doc/setup-kubernetes.md) can be used for multiple-node workload validation. Setup [cumulus](doc/setup-cumulus.md) for remote worker validation.  
+
+### Build & Evaluate Workload
+
+Evaluate a workload as follows:
+
+```
+mkdir -p build
+cd build
+cmake ..
+cd workload/dummy
+make
+ctest -V
+./list-kpi.sh logs*
+```
+
+> It takes a long time to rebuild all workload images. It is recommended that you only rebuild the workloads of interest by going to the workload sub-directory to make and test.  
+
+You can optionally specify a `REGISTRY` value, `cmake -DREGISTRY=XYZ ..` to ask the build process to push the images to the docker registry. Please `docker login` beforehand if your docker registry requires authentication. A docker registry is optional except in the case of Kubernetes on-premises validation.   
+
+### See Also
+
+- [Build Options](doc/cmake.md)   
+- [Test Options](doc/ctest.md)   
+- [Develop New Workload](doc/workload.md)  
diff --git a/doc/build.md b/doc/build.md
new file mode 100644
index 0000000..924084a
--- /dev/null
+++ b/doc/build.md
@@ -0,0 +1,29 @@
+
+The `build.sh` script performs the workload build. Since the docker build process is [standardized](dockerfile.md), there is usually no need to customize it. You can use the following template as is:  
+
+```
+#!/bin/bash -e
+
+DIR="$(dirname "$(readlink -f "$0")")"
+. "$DIR"/../../script/build.sh
+```
+
+In some cases, the `script/buid.sh` can be customized as follows:
+- **`FIND_OPTIONS`**: Specify any custom arguments to the `find` program to locate the set of Dockerfiles for building the docker images and for listing the BOMs.  
+- **`DOCKER_CONTEXT`**: Optionally specify a or an array of relative directory names where the Dockerfiles are located. By default, the Dockerfiles are assumed to be located directly under the workload directory.   
+
+### Build Dependencies
+
+If your workload depends on some common software stacks, simply invoke the corresponding software stack's `build.sh`. For example, if your image depends on `QAT-Setup`, your `build.sh` can be something like below:
+
+```
+#!/bin/bash -e
+
+DIR="$(dirname "$(readlink -f "$0")")"
+
+# build QAT-Setup
+STACK="qat_setup" "$DIR"/../../stack/QAT-Setup/build.sh $@
+
+# build our image(s)
+. "$DIR"/../../script/build.sh
+```
diff --git a/doc/cluster-config.md b/doc/cluster-config.md
new file mode 100644
index 0000000..a44792b
--- /dev/null
+++ b/doc/cluster-config.md
@@ -0,0 +1,78 @@
+
+The `cluster-config.yaml` manifest describes the machine specification to run the workloads. The specification is still evolving and subject to change.  
+
+The following example describes a 3-node cluster to be used in some workload:
+
+```
+cluster:
+  - labels: {}
+  - labels: {}
+  - labels: {}
+```
+
+The `cluster-config.yaml` consists of the following sections: 
+
+- **`cluster`**: This section defines the post-Sil cluster configurations. 
+
+### cluster.labels
+
+The `cluster.labels` section describes any must have system level setup that a workload must use. The setup is specified in terms of a set of Kubernetes node labels as follows:
+
+| Label | Description |
+|:-----:|:------------|
+| <pre>`HAS-SETUP-DISK</pre> | This set of labels specify that SSD disks be mounted on the worker node(s).<br>See also: [Storage Setup](setup-storage.md). | 
+| <pre>`HAS-SETUP-MODULE`</pre> | This set of labels specify the kernel modules that the workload must use.<br>See also: [Module Setup](setup-module.md). |
+| <pre>`HAS-SETUP-HUGEPAGE`</pre> | This set of labels specify the kernel hugepage settings.<br>See also: [Hugepage Setup](setup-hugepage.md) | 
+
+The label value is either `required` or `preferred` as follows:
+
+```
+cluster:
+- labels:
+    HAS-SETUP-HUGEPAGE-2048kB-2048: required
+```
+
+### cluster.cpuinfo
+
+The `cluster.cpuinfo` section describes any CPU-related constraints that a workload must use. The cpuinfo section is currently declarative and is not enforced.  
+
+```
+cluster:
+- cpuinfo:
+    flags:
+    - "avx512f"
+```
+
+where the CPU flags must match what are shown by `lscpu` or `cat /proc/cpuinfo`.  
+
+### cluster.meminfo
+
+The `cluster.meminfo` section describes any memory constraints that a workload must use. The meminfo section is currently declarative and is not enforced. 
+
+> Please also use the Kubernetes [resource constraints](https://kubernetes.io/docs/tasks/configure-pod-container/assign-memory-resource) to specify the workload memory requirements.)   
+
+```
+cluster:
+- meminfo:
+    available: 128
+```
+
+where the available memory is in the unit of GBytes. 
+
+### kubernetes
+
+The `kubernetes` section describes the Kubernetes configurations. This section is currently optionally enforced.  
+- `cni`: Specify the CNI plugin: `flannel` or `calico`.  
+- `cni-options`: Specify the CNI option: `vxlan` (calico).  
+- `kubelet-options`: Specify the kubelet options as described in [`KubeletConfiguration`](https://kubernetes.io/docs/reference/config-api/kubelet-config.v1beta1/#kubelet-config-k8s-io-v1beta1-KubeletConfiguration).  
+- `kubevirt`: Specify whether to enable kubevirt: `true/false`.  
+
+```
+kubernetes:
+  cni: flannel
+  kubelet-options:
+    runtimeRequestTimeout: 10m
+```
+
+> Note that CNIs may behave differently on CSPs. Calico BGP and VXLAN work on AWS but only VXLAN works on GCP and AZure.   
+
diff --git a/doc/cmake.md b/doc/cmake.md
new file mode 100644
index 0000000..199dbec
--- /dev/null
+++ b/doc/cmake.md
@@ -0,0 +1,40 @@
+
+### Customize the Build Process:
+
+You can use the following build options to customize the build process:  
+
+- **PLATFORM**: Specify the platform name. The only supported platform is `ICX`.  
+- **REGISTRY**: Specify the privacy docker registry URL. All built images will be pushed to the specified docker registry.
+  > `REGISTRY` must end with forward slash `/`
+- **RELEASE**: Specify the release version. All built images will be tagged with it. Defaults to `:latest`
+  > `RELEASE` must begin with colon `:`
+- **REGISTRY_AUTH**: Specify the registry authentication method. The only supported value is `docker`, which uses the docker configuration file.
+- **BACKEND**: Specify the validation backend: [`docker`](setup-docker.md), [`kubernetes`](setup-kubernetes.md), or [`cumulus`](setup-cumulus.md).    
+- **CUMULUS_OPTIONS**: Specify the `cumulus` options.  
+- **TIMEOUT**: Specify the validation timeout, which contains the execution timeout and docker pull timeout. Default to 28800,300 seconds.   
+- **BENCHMARK**: Specify a workload pattern. Workloads not matching the pattern will be disabled. 
+
+Build examples:   
+
+```bash
+cd build
+cmake -DREGISTRY=xxyyzz.com:1234 ..
+```
+
+### Command Make Targets
+
+- **bom**: Print out the BOM list of each workload.  
+- **kpi**: Print out the KPI of each workload.  
+- **clean**: Purge the `logs`.  
+
+```bash
+cd build
+cmake ..
+make bom
+```
+
+### See Also
+
+- [Docker Engine](setup-docker.md)  
+- [Kubernetes Cluster](setup-kubernetes.md)  
+- [Cumulus Setup](setup-cumulus.md)  
diff --git a/doc/cmakelists.md b/doc/cmakelists.md
new file mode 100644
index 0000000..46b5173
--- /dev/null
+++ b/doc/cmakelists.md
@@ -0,0 +1,56 @@
+
+### Workload CMakeLists.txt
+
+The `CMakeLists.txt` defines actions in `cmake`: build and test. Let us start with a simple example:  
+
+```
+add_workload("dummy")
+add_testcase(${workload})
+```
+
+The `add_workload` function defines `cmake` build rules for a workload. The name must be lower cased as a convention and does not contain any special characters with the exception of `_`. It is recommedded to append the version info to indicate the implementation versioning. The function also defines a parent-scope variable `workload` with the same name that any subsequent function can use.   
+
+The `add_testcase` function defines a test case. You may define multiple test cases, each with a unique name and some configuration parameters. Internally, this gets routed to the `validate.sh` script with the specified parameters. (There is no argument in the above example.) The validation results are saved to the corresponding `logs-$workload` directory under the build tree.  
+
+Note that the name of any `cmake` target must be unique across all workloads. Thus it is usually a concatenation of platform, feature, workload and configuration.   
+
+### Special Test Cases
+
+The test case name should be descriptive for the workload test conditions. For example, use `_1n` to indicate that the workload runs on a single worker node, and `_3n` to indicate that the workload runs on multiple worker nodes.  
+
+The following test case suffixes are reserved: 
+- `_gated`: A test case suffixed with `_gated` is designed for CI commit validation. The test case is expected to be a quick test of the workload software stack. To improve CI efficiency, design the test case such that the workload completes within 5 minutes.  
+
+### Licensing Terms
+
+If the workload requires the user to agree to any license terms, use the `check_license` function. The function prompts the user for license agreement and then saves the decision. If the user denies the license terms, the workload will be skipped during the build process. If there are multiple license terms, you can write as many `check_license` functions as needed. 
+
+```
+check_license("media.xiorg.com" "Please agree to the license terms for downloading datasets from xiorg.com")
+add_workload("foo" LICENSE "media.xiorg.com")
+```
+
+### Fixed SUT
+
+If the workload can only run on specific SUT (System Under Test), specify the SUT constraints as part of the `add_workload` function as follows:
+
+```
+add_workload("foo" SUT azure)
+```
+
+where the `azure` SUT must be defined with `script/cumulus/cumulus-config.<sut>.yaml`.  
+
+### Software Stack CMakeLists.txt
+
+`CMakeLists.txt` for software stacks defines the software stack build and test targets. Let us start with a simple example:  
+
+```
+add_stack("foo_setup")
+add_testcase(${stack})
+```
+
+The `add_stack` function defines `cmake` build rules for a workload. The name must be lower cased as a convention and does not contain any special characters with the exception of `_`. It is recommedded to append the version info to indicate the implementation versioning. The function also defines a parent-scope variable `stack` with the same name that any subsequent function can use.   
+
+The `add_testcase` function defines a test case. You may define multiple test cases, each with a unique name and some configuration parameters. Internally, this gets routed to the `validate.sh` script with the specified parameters. (There is no argument in the above example.) The validation results are saved to the corresponding `logs-$stack` directory under the build tree.  
+
+Note that the name of any `cmake` target must be unique across all workloads. Thus it is usually a concatenation of platform, feature, stack and configuration.   
diff --git a/doc/ctest.md b/doc/ctest.md
new file mode 100644
index 0000000..dea3ae7
--- /dev/null
+++ b/doc/ctest.md
@@ -0,0 +1,205 @@
+
+### Run Test
+
+Use `ctest` to run a single test or batch of tests. You can do this at the top level `build` directory or under each workload directory. In the latter case, only the tests of the workload will be executed. 
+
+```
+cd build
+cd workload/dummy
+ctest
+```
+
+### CTest Options
+
+There are extensive list of options in `ctest` to control how tests can be executed. See the `ctest` manpage. The followings are most common options.  
+
+- *`-R`*: Select tests based on a regular expression string.   
+- *`-E`*: Exclude tests based on a regular expression string.  
+- *`-V`*: Show test execution with details.  
+- *`-N`*: Dry-run the tests only.  
+
+Example: list tests with `boringssl` in name excluding those with `_gated`
+```
+ctest -R boringssl -E _gated -N
+```
+
+Example: run only `test_static_boringssl` (exact match)
+```
+ctest -R '^test_static_boringssl$'
+```
+
+### Customize Configurations
+
+It is possible to specify a global test configuration file to overwrite any configuration parameter of a test case:   
+
+```
+TEST_CONFIG=$(pwd)/test_config.yaml ctest -V
+```
+
+where `TEST_CONFIG` specifies the test configuration file.  
+
+The configuration file uses the following format:
+
+```
+*_dummy_pi:
+    SCALE: 3000
+```
+
+where `*_dummy_pi` specifies the test case name. You can use `*` to specify a wildcard match. The subsection underneath specifies the configuration variables and values. Any parameters specified in each test case [`validate.sh`](validate.md) can be overwritten. 
+
+Use with caution as overwritting configuration parameters may lead to invalid parameter combinations.  
+
+### Benchmark Scripts
+
+A set of utility scripts are linked under your workload build directory to make it easy for workload benchmark activities.  
+
+- **`ctest.sh`**: This is an extended ctest script extending the following features, besides what ctest supports:
+
+```
+Usage: [options]
+--nohup          Run ctest in the daemon mode for long benchmark 
+--loop           Run the benchmark multiple times sequentially.
+--run            Run the benchmark multiple times on the same SUT(s), only for cumulus.  
+--burst          Run the benchmark multiple times simultaneously.
+--test-config    Specify the test-config file.  
+--set            Set the workload parameter values during loop and burst iterations.  
+--stop           Kill all ctest sessions.  
+--continue       Ignore any errors and continue the loop and burst iterations.  
+--prepare-sut    Prepare cloud SUT instances for reuse.
+--reuse-sut      Reuse previously prepared cloud SUT instances. 
+--cleanup-sut    Cleanup cloud SUT instances. 
+--dry-run        Generate the testcase configurations and then exit.  
+```
+
+The followings are some examples:
+
+```
+# run aws test cases 5 times sequentially
+./ctest.sh -R aws --loop=5 --nohup    
+
+# run aws test cases 5 times simultaneously
+./ctest.sh -R aws --burst=5 --nohup   
+
+# run aws test cases 4 times simultaneously with the SCALE value
+# incremented linearly as 1000, 1300, 1600, 1900 in each iteration.  
+# "..." uses three previous values to deduce the increment. 
+./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000" --burst=4 --nohup
+
+# run aws test cases 4 times simultaneously with the SCALE value
+# incremented linearly as 1000, 1600, 1000, 1600 in each iteration.  
+# "..." uses three previous values to deduce the increment. 
+# "|200" means the values must be divisible by 200.  
+./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000 |200" --burst=4 --nohup
+
+# run aws test cases 4 times simultaneously with the SCALE value
+# incremented linearly as 1000, 1600, 2000, 1000 in each iteration.  
+# "..." uses three previous values to deduce the increment. 
+# "8000|" means the values must be a factor of 8000. 
+./ctest.sh -R aws --set "SCALE=1000 1200 1400 ...2000 8000|" --burst=4 --nohup
+
+# run aws test cases 4 times simultaneously with the SCALE value
+# incremented exponentially as 1000, 2000, 4000, 8000 in each iteration.  
+# "..." uses three previous values to deduce the multiplication factor. 
+./ctest.sh -R aws --set "SCALE=1000 2000 4000 ...10000" --burst=4 --nohup  
+
+# run aws test cases 6 times simultaneously with the SCALE value
+# enumerated repeatedly as 1000, 1500, 1700, 1000, 1500, 1700 in each iteration.  
+./ctest.sh -R aws --set "SCALE=1000 1500 1700" --burst=6 --nohup
+
+# run aws test cases 6 times simultaneously with the SCALE and BATCH_SIZE values
+# enumerated seperately as (1000,1), (1500,2), (1700,4), (1000,8) in each 
+# iteration. Values are repeated as needed.   
+./ctest.sh -R aws --set "SCALE=1000 1500 1700" --set BATCH_SIZE="1 2 4 8" --burst=6 --nohup
+
+# run aws test cases 8 times simultaneously with the SCALE and BATCH_SIZE values
+# permutated as (1000,1), (1000,2), (1000,4), (1000,8), (1500,1), (1500, 2), 
+# (1500, 4), (1500, 8) in each iteration.   
+./ctest.sh -R aws --set "SCALE=1000 1500 1700/BATCH_SIZE=1 2 4 8" --burst=8 --nohup
+
+# for cloud instances, it is possible to test different machine types by 
+# enumerating the AWS_MACHINE_TYPE values (or similar GCP_MACHINE_TYPE):
+./ctest.sh -R aws --set "AWS_MACHINE_TYPE=m6i.xlarge m6i.2xlarge m6i.4xlarge" --loop 3 --nohup
+```
+
+See Also: [Cloud SUT Reuse](#cloud-sut-reuse)
+
+- **`list-kpi.sh`**: Scan the ctest logs files and export the KPI data.  
+
+```
+Usage: [options] [logs-directory]
+--primary             List only the primary KPI.  
+--all                 List all KPIs.  
+--outlier <n>         Remove outliers beyond N-stdev.  
+--params              List workload configurations.  
+--format list|xls-ai|xls-inst  
+                      Specify the output format.
+--var1 <value>        Specify the spread sheet variable 1. 
+--var2 <value>        Specify the spread sheet variable 2. 
+--var3 <value>        Specify the spread sheet variable 3. 
+--var4 <value>        Specify the spread sheet variable 4. 
+--phost node1         Specify the hostname for identifying the primary instance type, in the multi-node workload scenario.  
+--pinst CPU.Microarchitecture
+                      Specify the SVRInfo field name for identifying the primary instance name. 
+--filter _(real|throughput)
+                      Specify a trim filter to shorten spreadsheet name.  
+--file <filename>     Specify the spread sheet filename. 
+```
+
+> The `xls-ai` option writes the KPI data in the `kpi-report.xls` spread sheet as follows:
+
+<IMG SRC="image/ss-ai.png" width="50%">
+    
+> where `--var1=batch_size` `--var2=cores_per_instance` `--var3='*Throughput'` `--var4=Throughput_`.  
+
+> The `xls-inst` option writes the KPI data in the `kpi-report.xls` spread sheet as follows:
+
+<IMG SRC="image/ss-inst.png" width="50%">
+    
+> where `--phost=node1`.  
+
+> The `xls-table` option writes the KPI data in the `kpi-report.xls` spread sheet as follows:
+
+<IMG SRC="image/ss-table.png" width="40%">
+
+> where `--var1=scale`, `--var2=sleep_time`. Optionally, you can specify `--var3` and `--var4` variables for multiple tables in the same spreadsheet.
+
+### Cloud SUT Reuse
+
+With the cumulus backend, it is possible to reuse the Cloud SUT instances during the benchmark process. This is especially useful in tuning parameters for any workload.   
+
+To reuse any SUT instances, you need to first prepare (provision) the Cloud instances, using the `ctest.sh` `--prepare-sut` command as follows:  
+
+```
+./ctest.sh -R aws_kafka_3n_pkm -V --prepare-sut
+```
+
+The `--prepare-sut` command provisions and prepares the Cloud instances suitable for running the `aws_kafka_3n_pkm` test case. The preparation includes installing docker/Kubernetes and labeling the worker nodes. The SUT details are stored under the `sut-logs-aws_kafka_3n_pkm` directory.  
+
+Next, you can run any iterations of the test cases, reusing the prepared SUT instances with the `--reuse-sut` command, as follows:
+
+```
+./ctest.sh -R aws_kafka_3n_pkm -V --reuse-sut
+```
+
+> If `--reuse-sut` is set, `--burst` is disabled.  
+
+Finally, to cleanup the SUT instances, use the `--cleanup-sut` command:
+
+```
+./ctest.sh -R aws_kafka_3n_pkm -V --cleanup-sut
+```
+
+SUT reuse is subject to the following limitations:
+- The SUT instances are provisioned and prepared for a specific test case. Different test cases cannot share SUT instances.  
+- It is possible to change workload parameters, provided that such changes do not:
+  - The changes do not affect the worker node numbers.  
+  - The changes do not affect the worker node machine types, disk storage, or network topologies.   
+  - The changes do not affect worker node labeling.  
+  - The changes do not introduce any new container images.  
+
+---
+
+Please cleanup the Cloud instances after use. You can also use the cumulus [cloud cleanup](setup-cumulus.md#cleanup-cloud-resources) procedure to completely cleanup any Cloud resources.  
+
+--- 
+
diff --git a/doc/dockerfile.md b/doc/dockerfile.md
new file mode 100644
index 0000000..41b5e3d
--- /dev/null
+++ b/doc/dockerfile.md
@@ -0,0 +1,86 @@
+
+
+The workload Dockerfile must meet certain requirements to facilitate image build, validation execution and data collection.  
+
+### Use Template
+
+You can use `m4` template in constructing Dockerfles, which avoids duplication of identical steps. Any files with the `.m4` suffix will be replaced with the corresponding files without the suffix, during the build process.  
+
+### Set Build Order
+
+If there are multiple Dockerfiles under the workload directory, the build order is determined by the filename pattern of the Dockerfile: `Dockerfile.[1-9].<string>`. The bigger the number in the middle of the filename, the earlier that the build script builds the Dockerfile. If there are two Dockerfiles with the same number, the build order is platform specific.  
+
+### Specify Image Name
+
+The first line of the Dockerfile is used to specify the docker image name, as follows:   
+
+Final image:   
+```
+# resnet_50
+...
+```
+
+Intermediate image:  
+```
+## resnet_50_model
+```
+
+Any final images will be pushed to the docker registry. Any intermediate images will be left on the build machine. As a convention, the image name uses the following pattern: `[<platform>-]<workload>-<other names>`. The platform prefix is a must have if the image is platform specific, and optional if the image can run on any platform.  
+
+### List Ingredients 
+
+Any significent ingredients used in the workload must be marked with the `ARG` statement, so that we can easily list ingredients of a workload:  
+
+```
+ARG IPP_CRYPTO_VER="ippcp_2020u3"
+ARG IPP_CRYPTO_REPO=https://github.com/intel/ipp-crypto.git
+...
+```
+
+The following `ARG` suffixes are supported:  
+- **_REPO/_REPOSITORY**: Specify the ingredient source repository location.  
+- **_VER/_VERSION**: Specify the ingredient version.  
+- **_IMG/_IMAGE**: Specify an ingredient docker image.  
+- **_PKG/_PACKAGE**: Specify an ingredient OS package, such as deb or rpm.  
+
+> _VER and the corresponding _REPO/_PKACAGE/_IMAGE must be in a pair and in order to properly show up in the Wiki ingredient table. For example, if you define `OS_VER`, then there should be a following `OS_IMAGE` definition.  
+
+### Export Status & Logs
+
+It is the workload developer's responsibility to design how to start the workload and how to stop the workload. However, it is a common requirement for the validation runtime to reliably collect execution logs and any telemetry data for analyzing the results.  
+
+#### Export to FIFO 
+
+The workload image must create a fifo `/export-logs` and then archive (1) the workload exit code (in `status`) and (2) any workload-specifc logs to the fifo. The workload exit code is mandatory. Workload logs can be used to generate KPIs.  
+
+```
+RUN mkfifo /export-logs
+CMD (<run-workload.sh>; echo $? > status) 2>&1 | tee output.logs && \
+    tar cf /export-logs status output.logs && \
+    sleep infinity
+```
+
+#### Import from FIFO
+
+The validation backend (script/validate.sh) imports the logs data through the fifo, as follows:   
+
+```
+# docker
+docker exec <container-id> cat /export-logs | tar xf -
+```
+```
+# kubernetes
+kubectl exec <pod-id> cat /export-logs | tar xf -
+```
+
+The above command blocks if the workload execution is in progress and exits after the execution is completed (thus it is time for cleanup.)  
+
+### Reserved Feature
+
+Do not use `ENTRYPOINT` in the Dockerfile. This is a reserved feature for future extension. 
+
+### See Also
+
+- [How to Create a Workload](workload.md)  
+- [Provisioning Specification](cluster-config.md)  
+
diff --git a/doc/image/ss-ai.png b/doc/image/ss-ai.png
new file mode 100644
index 0000000..b27b0f5
Binary files /dev/null and b/doc/image/ss-ai.png differ
diff --git a/doc/image/ss-inst.png b/doc/image/ss-inst.png
new file mode 100644
index 0000000..79ac2b8
Binary files /dev/null and b/doc/image/ss-inst.png differ
diff --git a/doc/image/ss-table.png b/doc/image/ss-table.png
new file mode 100644
index 0000000..1918977
Binary files /dev/null and b/doc/image/ss-table.png differ
diff --git a/doc/kpi.md b/doc/kpi.md
new file mode 100644
index 0000000..8e6b5e7
--- /dev/null
+++ b/doc/kpi.md
@@ -0,0 +1,28 @@
+
+The `kpi.sh` script parses the validation output and exports a set of key/value pairs to represent the workload performance.  
+
+The following is some example of the KPI data:  
+```
+# this is a test                  ## Optional comments
+## threads: 4                     ## Tunable parameters overwrite
+throughput: 123.45                ## Simple key/value
+throughput (op/s): 123.45         ## Key, unit (in parentheses) and value
+*throughput (images/s): 123.45    ## Primary KPI for regression reporting
+```
+
+To avoid introducing additional software dependencies, it is recommended to use `gawk` to parse the validation logs and format the output.  
+
+The validation output is assumed to be stored at 1 layer under the current directory. The `kpi.sh` example is as follows:  
+
+```
+#!/bin/bash -e
+
+awk '
+{
+   # KPI parsing script
+}
+' */output.logs 2>/dev/null || true
+```
+
+where `2>/dev/null` supresses any error message if `*/output.logs` does not exist, and `||true` makes the `kpi.sh` always returns an ok status.   
+
diff --git a/doc/kubernetes-config.md b/doc/kubernetes-config.md
new file mode 100644
index 0000000..359f928
--- /dev/null
+++ b/doc/kubernetes-config.md
@@ -0,0 +1,87 @@
+
+The `kubernetes-config.yaml` script is a manifest that describes how the workload container(s) should be scheduled (to the machine cluster described by `cluster-config.yaml`.) This is the standard Kubernetes script.  
+
+```
+include(config.m4)
+...
+spec:
+...
+    spec:
+      containers:
+      - name: database
+        image: IMAGENAME(wordpress5mt-defn(`DATABASE'))
+...
+```
+
+where the `IMAGENAME` macro expands the image name to include the `REGISTRY` prefix and the `RELEASE` versions.   
+
+#### About `imagePullPolicy`
+
+To ensure that the validation runs always on the latest code, it is recommended to use `imagePullPolicy: Always`. However, this requires to use a private docker registry. In local development, `imagePullPolicy: IfNotPresent` is desired. The `config.m4` utility provides a macro, `IMAGEPOLICY`, to switch between `Always` and `IfNotPresent` depending on the `REGISTRY` setting.  
+
+```
+...
+    spec:
+      containers:
+      - name: database
+        image: IMAGENAME(wordpress5mt-defn(`DATABASE'))
+        imagePullPolicy: IMAGEPOLICY(Always)
+...
+```
+
+Not all docker images are built equally. Some are less frequently updated and less sensitive to performance. Thus it is preferrable to use `imagePullPolicy: IfNotPresent` in all cases.   
+
+#### About `podAntiAffinity`
+
+To spread the pods onto different nodes, use `podAntiAffinity` as follows:  
+
+```
+...
+    metadata:
+      labels:
+        app: foo
+    spec:
+       PODANTIAFFINITY(preferred,app,foo)
+...
+```
+
+where the convenient macro `PODANTIAFFINITY` expands to 
+
+```
+...
+    metadata:
+      labels:
+        app: foo
+    spec:
+      affinity:
+        podAntiAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 1
+            podAffinityTerm:
+              labelSelector:
+                matchExpressions:
+                - key: app
+                  operator: In
+                  values:
+                  - foo
+              topologyKey: "kubernetes.io/hostname"
+...
+```
+
+#### About `CLUSTER_WORKERS`
+
+Some workload uses the Kubernetes operator to launch new Kubernetes pods during the workload execution. It is critical to restrict any newly launched pods to be within the cluster workers that the workload is assigned to run. Define the `CLUSTER_WORKERS` environment variable as follows to retrieve the information about the list of worker node IP addresses.   
+
+```
+    spec:
+      containers:
+      - name: foo
+        image: IMAGENAME(foo)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        env:
+        - name: CLUSTER_WORKERS
+          value: ""
+```
+
+The value will be replaced with the list of worker node IP addresses, separated by `,`, if the workload is restricted to a set of worker nodes. The value will remain unchanged if there is no such restriction.  
+
diff --git a/doc/readme.md b/doc/readme.md
new file mode 100644
index 0000000..49d7e1a
--- /dev/null
+++ b/doc/readme.md
@@ -0,0 +1,26 @@
+
+The workload README should have the following sections:
+- Introduction: Introduce the workload and any background information.  
+- Test Case: Describe the test cases.  
+- Docker Image: Describe the docker images and example usages.  
+- KPI: Describe the KPI definitions and the meanings of the values.  
+- [System Setup](#system-setup): Describe the system setup.  
+- [Index Info](#index-info): List workload indexing information. 
+- See Also: Add any workload-related references.   
+
+### Index Info
+
+The following information must be provided: 
+
+- Name: The workload friendly name.  
+- Category: The category of the workload: one of `DataServices`, `ML/DL/AI`, `HPC`, `Media`, `Networking`, `Synthetic`, `uServices`.  
+- Platform: A list of supported [platform](../workload/platforms) names.  
+- keywords: A list of related keywords.  
+
+### System Setup
+
+Include (but not limited to) the following information:
+- The minimum system setup.  
+- The recommendded system setup.  
+- Any guideline in tuning the workload performance.  
+
diff --git a/doc/setup-auth.md b/doc/setup-auth.md
new file mode 100644
index 0000000..691e118
--- /dev/null
+++ b/doc/setup-auth.md
@@ -0,0 +1,55 @@
+
+### Introduction
+
+A private docker registry is optional in most of the validation senarios except if you want to run the workloads on an on-premesis Kubernetes cluster, or you explicitly setup a docker registry to store any newly built workload images. 
+
+This document describes how to authenticate to a docker registry if the registry requires authentication. Skip this document if there is no authentication requriement.   
+
+### `REGISTRY_AUTH`
+
+The [`cmake`](cmake.md) `REGISTRY_AUTH` option specifies how to authenticate to a private docker registry. Currently, `docker` is the only supported value, which uses the docker authentication mechanism. 
+
+Enable the `REGISTRY_AUTH` option: 
+
+```
+cmake -DREGISTRY=<url> -DREGISTRY_AUTH=docker ..
+```
+
+With the above command, the validation scripts will upload the docker authentication information specified in `.docker/config.json` as a Kubernetes `imagePullSecret` to the validation cluster, on-premesis or in Cloud.  
+
+> `CredHelpers` or `CredStore` in `.docker/config.json` is not suppoerted. 
+
+### Authenticate to Cloud Private Registry
+
+#### Amazon Elastic Container Registry
+
+```
+make aws
+$ aws ecr get-login-password --region <region> | docker login --username AWS --password-stdin <account-id>.dkr.ecr.<region>.amazonaws.com
+$ exit
+```
+
+> Note that the build script will auto-create the image repository namespaces.  
+
+#### Google Cloud Container Registry
+
+```
+make gcp
+$ gcloud iam service-accounts create <NAME>
+$ gcloud projects add-iam-policy-binding <PROJECT_ID> --member "serviceAccount:<NAME>@<PROJECT_ID>.iam.gserviceaccount.com" --role "roles/ROLE"
+$ gcloud iam service-accounts keys create keyfile.json --iam-account <NAME>@<PROJECT_ID>.iam.gserviceaccount.com
+$ gcloud auth print-access-token | docker login -u oauth2accesstoken --password-stdin <registry-url>
+$ exit
+```
+
+> Note that the Oauth2 access token will expire in an hour.
+
+
+#### Azure Container Registry:
+
+```
+make azure
+$ az acr login --name <registry-name> --expose-token --output tsv --query accessToken | docker login -username 00000000-0000-0000-0000-000000000000 --password-stdin <registry-url>
+$ exit
+```
+
diff --git a/doc/setup-containerd.md b/doc/setup-containerd.md
new file mode 100644
index 0000000..815051a
--- /dev/null
+++ b/doc/setup-containerd.md
@@ -0,0 +1,57 @@
+
+### Introduction
+
+Starting Kubernetes v1.20, Kubernetes deprecated docker as a runtime and used `containerd` instead. It is a prerequisite to install `containerd` before installing Kubernetes.
+
+#### Installation
+
+Install `containerd` from your OS packages:
+
+```
+apt-get install containerd       # Ubuntu or Debian
+yum install containerd           # Centos
+```
+
+#### Setup Proxy
+
+```
+sudo mkdir -p /etc/systemd/system/containerd.service.d
+printf "[Service]\nEnvironment=\"HTTP_PROXY=$http_proxy\" \"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/containerd.service.d/proxy.conf
+sudo systemctl daemon-reload
+sudo systemctl restart containerd
+```
+
+#### Setup Configuration Files
+
+```
+containerd config default | sudo tee /etc/containerd/config.toml
+sudo systemctl restart containerd
+```
+
+#### Setup Insecure Registries
+
+On-Premisis workload validation based on Kuberenetes requires to use a docker registry. If you need to setup any insecure registries with `containerd`, modify the `containerd` configuration as follows, assuming your private registry is `foo.com:5000`:   
+
+```
+sudo sed -i 's|config_path =.*|config_path = "/etc/containerd/certs.d"|' /etc/containerd/config.toml
+sudo mkdir -p /etc/containerd/foo.com:5000
+cat | sudo tee /etc/containerd/certs.d/foo.com:5000/hosts.toml <<EOF
+server = "http://foo.com:5000"
+[host."http://foo.com:5000"]
+  capabilities = ["pull", "resolve"]
+[plugin."io.containerd.grpc.v1.cri".registry.configs."foo.com:5000".tls]
+  insecure_skip_verify = true
+EOF 
+sudo systemctl restart containerd
+```
+
+#### Setup Data Storage
+
+Optionally, if you need to move the containerd storage location to, for example, `/mnt/storage/containerd`:
+
+```
+sed -i 's|^root =.*|root = "/mnt/storage/containerd"|' /etc/containerd/config.toml
+sudo systemctl restart containerd
+```
+
+
diff --git a/doc/setup-cumulus.md b/doc/setup-cumulus.md
new file mode 100644
index 0000000..88a89c6
--- /dev/null
+++ b/doc/setup-cumulus.md
@@ -0,0 +1,256 @@
+
+### Introduction
+
+The cumulus backend can be used to validation workloads on a remote cluster, on Premesis or on Cloud. 
+
+### Prerequisite
+
+- Set the `REGISTRY` cmake variable to an empty string or a [private](https://docs.docker.com/registry/deploying) docker registry where you have write permission. 
+
+```
+cd build
+cmake -DBACKEND=cumulus -DREGISTRY= ..
+make build_cumulus
+```
+
+- Check the proxy setting in [`ssh_config`](../script/cumulus/ssh_config), if you are behind a firewall.  
+
+### Setup Cumulus for Cloud Validation
+
+The cumulus backend supports Cloud vendors such as `AWS`, `GCP`, `AZURE`, `Tencent`, and `AliCloud`. 
+- Each vendor has a corresonding configuration file: [`script/cumulus/cumulus-config.<SUT>.yaml`](../script/cumulus), where `<SUT>` is the Cloud vendor name. You can customize as needed.  
+- If you are behind a corporate firewall, please update the proxy settings in [`ssh_config`](../script/cumulus/ssh_config) accordingly.  
+
+#### Configure Cloud Account
+
+```
+make aws           # or make -C ../.. aws, if under build/workload/<workload>
+$ aws configure    # please specify a region
+$ exit
+```
+
+```
+make azure         # or make -C ../.. azure, if under build/workload/<workload>
+$ az login
+$ exit
+```
+
+```
+make gcp           # or make -C ../.. gcp, if under build/workload/<workload>
+$ gcloud init --no-browser
+$ exit
+```
+
+```
+make tencent       # or make -C ../.. tencent, if under build/workload/<workload>
+$ tccli configure  # please specify a region
+$ exit
+```
+
+```
+make alicloud      # make -C ../.. alicloud, if under build/workload/<workload>
+$ aliyun configure # please specify a region
+$ exit
+```
+
+#### Run Workload(s) Through Cumulus
+
+```
+cd workload/<workload>
+make
+ctest -N
+```
+
+#### Cleanup Cloud Resources
+
+If your cumulus validation is interrupted for any reason, the Cloud resource may remain active. You can explicitly cleanup any Cloud resources as follows:
+
+```
+make -C ../.. aws
+$ cleanup
+$ exit
+```
+
+```
+make -C ../.. gcp
+$ cleanup
+$ exit
+```
+
+```
+make -C ../.. azure
+$ cleanup
+$ exit
+```
+
+```
+make -C ../.. tencent
+$ cleanup
+$ exit
+```
+
+```
+make -C ../.. alicloud
+$ cleanup
+$ exit
+```
+
+#### Use A Cloud Private Registry
+
+A Cloud private registry is a convenient option to store workload images. During the Cloud validation, the SUTs (System Under Test) can directly pull images from the docker registry without transfering any images. Here we assume the Cloud registry and the SUTs are in the same region.
+
+Add the following flag in the `flags` section of `script/cumulus/cumulus-config.<cloud>.yaml` to indicate that the SUTs can directly access to the registry. The cumulus backend will then skip transfering workload images during validation:
+
+```
+  flags:
+    skopeo_sut_accessible_registries: "<registry-url>"
+```
+
+See Also: [Private Registry Authentication](setup-auth.md)
+
+
+### Setup Cumulus for On-Premesis Validation
+
+- Setup a [Kubernetes](setup-kubernetes.md#Setup-Kubernetes) cluster. Customize [`cumulus-config.static.yaml`](../script/cumulus/cumulus-config.static.yaml) to specify your cluster information.  
+- Run the [`setup-sut.sh`](../script/cumulus/script/setup-sut.sh) script to setup the SUT (System Under Test) hosts as follows:
+
+```
+./setup-sut.sh user@host1 [user@host2...]
+```
+
+> The script requires sudo permission on the SUT hosts.
+
+### Cumulus Options
+
+Use the following options to customize the cumulus validation:  
+
+- Set the default to use `docker` in validation wherever possible:  
+
+```
+cmake -DCUMULUS_OPTIONS=--docker-run ..
+```
+
+- Set the dry-run mode. Configure the workload but skip the execution stage: 
+
+```
+cmake -DCUMULUS_OPTIONS=--dry-run ..
+```
+
+### Telemetry Tracing
+
+You can enable telemetry tracing via `sar`, `emon`, and/or `collectd` as follows:  
+- **`sar`**: Add `--sar` to `CUMULUS_OPTIONS`.  
+- **`emon`**: Add `--emon --edp_publish --emon_post_process_skip` to `CUMULUS_OPTIONS`.  
+- **`collectd`**: Add `--collectd` to `CUMULUS_OPTIONS`.  
+
+```
+cmake -DCUMULUS_OPTIONS=--collectd ..
+cd workload/<workload>
+ctest -N
+```
+
+For On-Cloud validation, there is no additional setup. For On-Premesis validation, you need to perform additional setup for each telemetry tracing mechniasm: 
+
+#### Setup `sar` On-Prem
+
+- Install the `sar` utility on your worker nodes. 
+
+#### Setup `EMON` On-Prem
+
+On your worker nodes,
+- Create a `/opt/pkb` folder with the right ownership:  
+
+```
+sudo mkdir -p /opt/pkb
+sudo chown $(id -u):$(id -g) /opt/pkb
+```
+
+- Download and install [EMON](https://www.intel.com/content/dam/develop/public/us/en/documents/emon-user-guide-nov-2019.pdf) to `/opt/emon/emon_files`.  
+> Note it is critical that the installation location is `/opt/emon/emon_files`.  
+ 
+- Add your worker username to the `vtune` group.   
+
+```
+sudo usermod -aG vtune $(id -gn)
+```
+
+##### For `EDP` post process capabilities
+
+On your worker nodes install `python3` and add the following pip packages:
+
+```
+sudo python3 -m pip install xlsxwriter pandas numpy pytz defusedxml tdigest dataclasses
+```
+
+#### Setup Collectd On-Prem
+
+On your worker nodes, 
+- Install `flex`, `bison`, `autoconf`, `automake` and `libtool`.  
+- Download [collectd](https://github.com/collectd/collectd) and compile it as follows:
+
+```
+sudo mkdir -p /opt/pkb
+sudo chown -R $(id -u).$(id -g) /opt/pkb
+sudo mkdir -p /opt/collectd
+sudo chown -R $(id -u).$(id -g) /opt/collectd
+
+git clone https://github.com/collectd/collectd.git
+cd collectd
+./build.sh
+./configure --prefix=/opt/collectd/collectd
+make
+make install
+```
+
+- Copy [collectd.conf](../script/cumulus/collectd.conf) to `/opt/collectd/collectd/etc`.  
+
+### Setup SVRINFO
+
+The `svrinfo` utility is used to retrieve system-level information at the beginning of any validation run. Since `svrinfo` is under NDA only, the use of `svrinfo` is optional and by default disabled.  
+
+To setup `svrinfo`, copy the `svrinfo` tarball under `script/cumulus/pkb/perfkitbenchmarker/data/svrinfo`. Remake. Then turn on the `svrinfo` option as follows:
+
+```
+cmake -DCUMULUS_OPTIONS=--svrinfo ..
+```
+
+### Cumulus Debugging
+
+Enable the cumulus debugging mode as follows:  
+
+- Specify break points in `CUMULUS_OPTIONS`:  
+
+```
+cmake -DCUMULUS_OPTIONS=--dpt_debug=<BreakPoint>[,<BreakPoint>] ..
+```
+
+where `<BreakPoint>` can be one of more of the following strings:  
+- `PrepareStage`: Pause when the workload is about to setup the host environment. 
+- `SetupVM`: Pause when the workload is about to setup external VMs. 
+- `RunStage`: Pause when the workload is about to start the workload execution. 
+- `CleanupStage`: Pause when the workload is about to cleanup.  
+- `ScheduleExec`: Pause when the workload is about to schedule execution.  
+- `ExtractLogs`: Pause when the workload is about to extract logs.  
+- `ExtractKPI`: Pause when the workload is about to extract KPIs.  
+- `ScheduleExecFailed`: Pause when scheduling execution is failed.  
+- `ExtractLogsFailed`: Pause when extracting logs is failed.  
+- `ExtractKPIFailed`: Pause when extracting KPI is failed. 
+
+Start the workload validation as usual (ctest), cumulus will pause at the specified breakpoints. You can start a new shell and login to the cumulus container as follows:  
+
+```
+./debug.sh
+$
+```
+
+Now you can `ssh` to the remote worker and start debugging. To resume validation, simply create an empty signalling file `Resume<BreakPoint>` under `/tmp/pkb/runs/<runid>/` as follows:  
+
+```
+> touch /tmp/pkb/runs/784d84f59e3d/ResumeRunStage
+```
+
+
+### See Also
+
+- [TCP TIME_WAIT Reuse](https://github.com/intel/Updates-for-OSS-Performance/blob/main/time_wait.md)  
+- [Unsuitable CPU Speed Policy](https://github.com/intel/Updates-for-OSS-Performance/blob/main/cpufreq.md)  
diff --git a/doc/setup-docker.md b/doc/setup-docker.md
new file mode 100644
index 0000000..531ab28
--- /dev/null
+++ b/doc/setup-docker.md
@@ -0,0 +1,33 @@
+
+The `docker` engine is a prerequsite to build the workload images. It is also one of the validation backends that can be used to run single-container workloads on your local machine.
+
+### Setup Docker
+
+Follow the instructions to install the `docker` engine on your local system. The docker version `20.10.10` or later is required for full features.  
+
+```
+curl -fsSL https://get.docker.com -o get-docker.sh
+sh get-docker.sh
+```
+
+> It is recommended that you complete the [post-installation steps](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user) to manage `docker` as a non-root user.   
+
+### Setup Proxies
+
+If you are behind a firewall, complete the following steps to setup the proxies:  
+
+```
+sudo mkdir -p /etc/systemd/system/docker.service.d
+printf "[Service]\nEnvironment=\"HTTP_PROXY=$http_proxy\" \"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/docker.service.d/proxy.conf
+sudo systemctl daemon-reload
+sudo systemctl restart docker
+```
+
+### Docker Login
+
+Login to your dockerhub account so that you can pull images from dockerhub. 
+
+### See Also
+
+- [Docker Setup](https://docs.docker.com/engine/install/#server)  
+
diff --git a/doc/setup-hugepage.md b/doc/setup-hugepage.md
new file mode 100644
index 0000000..044e6bb
--- /dev/null
+++ b/doc/setup-hugepage.md
@@ -0,0 +1,43 @@
+
+### Hugepage Setup
+
+Workloads that require to use hugepages must specify a `HAS-SETUP-HUGEPAGE` label in the format of `HAS-SETUP-HUGEPAGE-<size>-<pages>`, where `<size>` is the hugepage size and `<pages>` is the #pages required. The `<size>` value must exactly match the string, case sensitive, of the hugepage sizes supported under `/sys/kernel/mm/hugepages`. For example, to request 1024 pages of 2MB hugepages, use `HAS-SETUP-HUGEPAGE-2048kB-1024`.   
+
+### System Setup
+
+Setup hugepages through the kernel boot parameters, as follows:  
+
+```
+sudo grubby --update-kernel=DEFAULT --args="hugepages=1024"
+```
+
+Then reboot the machine for the hugepages to take effect. 
+
+For Ubuntu, you need to edit `sudo vi /etc/default/grub` by adding the number of huge pages to `GRUB_CMDLINE_LINUX`, like this:
+
+```
+GRUB_CMDLINE_LINUX="hugepages=1024"
+```
+
+Then you need to do `sudo update-grub` and reboot.
+
+To verify changes you can use this `cat /proc/meminfo | grep Huge`.
+
+---
+
+Kubernetes only recognizes hugepages if they are preallocated through boot parameters.    
+
+---
+
+### Node Labels
+
+To avoid creating a lot of node labels, it is recommended to specify #pages only in the power 2 values. Label the worker node(s) with the following node labels:  
+- `HAS-SETUP-HUGEPAGE-2048kB-512=yes` Optional  
+- `HAS-SETUP-HUGEPAGE-2048kB-1024=yes` Optional  
+- `HAS-SETUP-HUGEPAGE-2048kB-2048=yes` Optional  
+- `HAS-SETUP-HUGEPAGE-2048kB-4096=yes` Optional  
+
+### See Also
+
+- [Manage HugePages](https://kubernetes.io/docs/tasks/manage-hugepages/scheduling-hugepages)  
+
diff --git a/doc/setup-kubernetes.md b/doc/setup-kubernetes.md
new file mode 100644
index 0000000..66a5aee
--- /dev/null
+++ b/doc/setup-kubernetes.md
@@ -0,0 +1,45 @@
+
+`Kubernetes` is the default validation backend to run single-or-multiple-container workloads on your local cluster of machines.  
+
+### Prerequisite
+
+Starting Kubernetes v1.20, Kubernetes deprecated docker as a runtime and used containerd instead. Follow the [instructions](setup-containerd.md) to install and configure `containerd` on your system. 
+
+### Setup Kubernetes
+
+Follow the [Ubuntu](https://phoenixnap.com/kb/install-kubernetes-on-ubuntu)/[CentOS](https://phoenixnap.com/kb/how-to-install-kubernetes-on-centos) instructions to setup a Kubernetes cluster. For full features, please install Kubernetes v1.21 or later.  
+
+---
+
+You can build the workloads and run the workloads on the same machine by setting up a single-node Kubernetes cluster:  
+
+```
+kubectl taint node --all node-role.kubernetes.io/master-
+kubectl taint node --all node-role.kubernetes.io/control-plane-  # >= v1.20
+```
+
+---
+
+### Setup Node Feature Discovery
+
+Install node feature discovery as follows:
+
+```
+kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default
+```
+
+### Setup arm64 Emulation
+
+You can setup any worker node as an arm64 emulator. To do so, run the [`setup.sh`](../script/march/setup.sh) script on each worker node to setup the arm64 emulation. 
+
+```
+script/march/setup.sh
+```
+
+### See Also
+
+- [Docker Setup](setup-docker.md)  
+- [Kubernetes Setup](setup-kubernetes.md)  
+- [Private Registry Authentication](setup-auth.md)
+- [Cumulus Setup](setup-cumulus.md)  
+- [`cluster-config.yaml`](cluster-config.md)  
diff --git a/doc/setup-module.md b/doc/setup-module.md
new file mode 100644
index 0000000..7181bbb
--- /dev/null
+++ b/doc/setup-module.md
@@ -0,0 +1,20 @@
+
+### Module Setup
+
+The set of `HAS-SETUP-MODULE` labels specify the request of installing kernel modules that are part of the OS distribution but not by default installed during boot. 
+
+The label should be specified in the format of `HAS-SETUP-MODULE-<module-name>`, where `<module-name>` is the module name with `_` replaced with `-`. 
+
+### System Setup
+
+The kernel module can be installed as follows:
+
+```
+sudo modprobe <module-name>.ko
+```
+
+### Node Labels
+
+Add a node label to the worker node(s):
+- `HAS-SETUP-MODULE-MSA`: Optional
+
diff --git a/doc/setup-storage.md b/doc/setup-storage.md
new file mode 100644
index 0000000..5c5146c
--- /dev/null
+++ b/doc/setup-storage.md
@@ -0,0 +1,10 @@
+
+### Storage Setup
+
+#### SSD Disks
+
+Certain workloads require to use scratch disk(s) as cache storage. The workers must be equipped with the right SSD disks.  
+
+Label the worker nodes with the following node labels:
+- `HAS-SETUP-DISK-MOUNT-1`: The worker node must have a SSD disk with the size of at least 500GB. The SSD disk must be mounted under `/mnt/disk1`.  
+
diff --git a/doc/stack.md b/doc/stack.md
new file mode 100644
index 0000000..cea2428
--- /dev/null
+++ b/doc/stack.md
@@ -0,0 +1,14 @@
+
+### Software Stack Elements
+
+A software stack is the underlying software layers that a workload is constructed upon. Software stack consists of the following elements, some described in this document and others in the linked document. 
+
+- **[Dockerfiles](dockerfile.md)**: A software stack may contain one or many Dockerfiles.   
+- **[CMakeLists.txt](cmakelists.md)**: A manifest to configure `cmake`.  
+- **[build.sh](build.md)**: A script for building the workload docker image(s).  
+
+### See Also
+
+- [Dockerfile Requirements](dockerfile.md)   
+
+
diff --git a/doc/template.md b/doc/template.md
new file mode 100644
index 0000000..2d6aa0c
--- /dev/null
+++ b/doc/template.md
@@ -0,0 +1,40 @@
+
+There is a template system, based on `m4`, built into the workload build process. You can use the template system to simplify the workload recipe development by encapsulating any duplicated steps.    
+
+To use the template system, create a (or more) `.m4` files under your workload folder, and put
+any shared templates `.m4` under the `template` folder under the workload, feature, platform or the top directory. During the build process, those `.m4` files will be translated to the corresponding files without the `.m4` suffix, before docker build.     
+
+The following sample uses `ippmb.m4` to encapsulate the IPP library installation steps:  
+
+```
+# SPR/Crypto/WordPress/Docker.1.nginx.m4
+...
+include(ippmb.m4)
+...
+```
+
+where `ippmb.m4` will be expanded to:   
+
+```
+# SPR/Crypto/template/ippmb.m4
+ARG IPP_CRYPTO_VERSION="ippcp_2020u3"
+ARG IPP_CRYPTO_REPO=https://github.com/intel/ipp-crypto.git
+RUN git clone -b ${IPP_CRYPTO_VERSION} --depth 1 ${IPP_CRYPTO_REPO} && \
+    cd /ipp-crypto/sources/ippcp/crypto_mb && \
+    cmake . -B"../build" \
+      -DOPENSSL_INCLUDE_DIR=/usr/local/include/openssl \
+      -DOPENSSL_LIBRARIES=/usr/local/lib64 \
+      -DOPENSSL_ROOT_DIR=/usr/local/bin/openssl && \
+    cd ../build && \
+    make crypto_mb && \
+    make install
+```
+
+### Pre-defined Variables:
+
+- **PLATFORM**: The platform name that the workload is defined for.   
+- **FEATURE**: The hero feature name that the workload is defined under.  
+- **WORKLOAD**: The workload name.   
+- **REGISTRY**: The private registry.  
+- **RELEASE**: The release version.  
+
diff --git a/doc/validate.md b/doc/validate.md
new file mode 100644
index 0000000..1e4fbbd
--- /dev/null
+++ b/doc/validate.md
@@ -0,0 +1,55 @@
+
+The `validate.sh` script initiates the workload execution, with a typical `validate.sh` shown as follows:  
+
+```
+#!/bin/bash -e
+
+# Read test case configuration parameters
+... 
+
+# Logs Setting
+DIR=$(dirname $(readlink -f "$0"))
+. "$DIR/../../script/overwrite.sh"
+
+# Workload Setting
+WORKLOAD_PARAMS=""
+
+# Docker Setting
+DOCKER_IMAGE="$DIR/Dockerfile"
+DOCKER_OPTIONS=""
+
+# Kubernetes Setting
+RECONFIG_OPTIONS="-DCONFIG=$CONFIG"
+JOB_FILTER="job-name=benchmark"
+
+. "$DIR/../../script/validate.sh"
+```
+
+The `validate.sh` saves any validation results to the current directory.  
+
+> The following script variables are reserved. Avoid overwriting their values in `validate.sh`:    
+> `PLATFORM`, `WORKLOAD`, `TESTCASE`, `REGISTRY`, `RELEASE`,  
+> `IMAGEARCH`, `TIMEOUT`, and `SCRIPT`.   
+
+### Validation Parameters
+
+- **`WORKLOAD_PARAMS`**: Specify the workload configuration parameters as an array variable of `key:value` pairs. The configuration parameters will be shown as software configuration metadata associated with the workload.  
+- **`WORKLOAD_TAGS`**: Specify any workload related tags as a space separated string.  
+- **`DOCKER_IMAGE`**: If the workload is a single-container workload and support docker run, specify either the docker image name or the `Dockerfile` used to compile the docker image. If the workload does not support docker run, leave the variable value empty.  
+- **`DOCKER_DATASET`**: Specify a set of dataset images as an array variable. The dataset image(s) will be volume-mounted to the running docker image.  
+- **`DOCKER_OPTIONS`**: Specify any docker run options, if the workload supports docker run.  
+- **`RECONFIG_OPTIONS`**: Specify any `m4` configuration parameters when `kubernetes-config.yaml.m4` and `cumulus-config.yaml.m4` are configured. This applies to the `kuberentes` and `cumulus` validation.    
+- **`JOB_FILTER`**: Specify which job/deployment is used to monitor the validation progress and after validation completion, retrieve the validation logs.  
+- **`SCRIPT_ARGS`**: Specify the script arguments for the `kpi.sh` or `setup.sh`. 
+
+### Event Tracing Parameters
+
+- **`EVENT_TRACE_PARAMS`**: Specify the event tracing parameters:  
+  - `roi`: Specify the ROI-based trace parameters: `roi,<start-phrase>,<end-phrase>`. For example, the trace parameters can be `roi,begin region of interest,end region of interest`. The workload must be instrumented to print these phrases in the console output.  
+  - `time`: Specify a time-based trace parameters: `time,<start-time>,<trace-duration>`. For example, if the trace parameters are `time,30,10`, the trace collection starts 30 seconds after the workload containers become ready and the collection duration is 10 seconds. 
+  - For short-ROI workloads (less than a few seconds), it is recommended that you specify the `EVENT_TRACE_PARAMS` value as an empty string, meaning that the trace ROI should be the entirety of the workload execution, which ensures that the trace collection catches the short duration of the workload execution.  
+
+> Between `roi` and `time`, use `roi` if possible and use `time` as the last resort if the workload does not output anything meaningful to indicate a ROI.  
+
+> Note that none of the event tracing mechanisms is timing accurate. You need to define the event trace parameter values with a high timing tolerance, at least in seconds.  
+
diff --git a/doc/workload.md b/doc/workload.md
new file mode 100644
index 0000000..d1be0b5
--- /dev/null
+++ b/doc/workload.md
@@ -0,0 +1,19 @@
+
+### Workload Elements
+
+A workload consists of the following elements, some described in this document and others in the linked document:  
+
+- **[Dockerfiles](dockerfile.md)**: A workload may contain one or many Dockerfiles.   
+- **[CMakeLists.txt](cmakelists.md)**: A manifest to configure `cmake`.  
+- **[build.sh](build.md)**: A script for building the workload docker image(s).  
+- **[validate.sh](validate.md)**: A script for executing the workload.  
+- **[kpi.sh](kpi.md)**: A script for extracting KPI data out of the workload execution logs. 
+- **[cluster-config.yaml.m4](cluster-config.md)**: A manifest to describe how to provision a machine or a set of machines for running the workload.  
+- **[kubernetes-config.yaml.m4](kubernetes-config.md)**: A manifest to describe how to schedule the containers to the cluster for Kubernetes.  
+- **[README](readme.md)**: A README to describe the workload.  
+
+### See Also
+
+- [Dockerfile Requirements](dockerfile.md)   
+- [Provisioning Specification](cluster-config.md)   
+
diff --git a/script/benchmark/ctest.sh b/script/benchmark/ctest.sh
new file mode 100755
index 0000000..8a53e2d
--- /dev/null
+++ b/script/benchmark/ctest.sh
@@ -0,0 +1,289 @@
+#!/bin/bash -e
+
+if [ "$#" -eq 0 ]; then
+    echo "Usage: [options]"
+    echo "--loop <number>       Run the ctest commands sequentially."
+    echo "--burst <number>      Run the ctest commands simultaneously."
+    echo "--run <number>        Run the ctest commands on same SUT (only with cumulus)."
+    echo "--test-config <yaml>  Specify the test-config yaml."
+    echo "--nohup               Run the script as a daemon."
+    echo "--stop                Kill all ctest sessions."
+    echo "--set <vars>          Set variable values between burst and loop iterations."
+    echo "--continue            Ignore any error and continue the burst and loop iterations." 
+    echo "--prepare-sut         Prepare cloud SUT for reuse."
+    echo "--reuse-sut           Reuse the cloud SUT previously prepared."
+    echo "--cleanup-sut         Cleanup cloud SUT."
+    echo "--dry-run             Generate the testcase configurations and then exit."  
+    echo "ctest options apply"
+    echo ""
+    echo "<vars> accepts the following formats:"
+    echo "VAR=str1 str2 str3    Enumerate the variable values."
+    echo "VAR=1 3 5 ...20 [|7]  Increment variable values linearly, with mod optionally."
+    echo "VAR=1 2 4 ...32 [35|] Increment variable values exponentially, with mod optionally."
+    echo "VAR1=n1 n2/VAR2=n1 n2 Permutate variable 1 and 2."
+    echo "The values are repeated if insufficient to cover the loops."
+    exit 3
+fi
+
+run_as_nohup=""
+args=()
+for var in "$@"; do
+    case "$var" in
+    --nohup)
+        run_as_nohup="1"
+        ;;
+    --stop)
+        kill -9 -a $(ps auxwww | grep ctest | awk '{print$2}') 2> /dev/null || echo -n ""
+        exit 0
+        ;;
+    *)
+        args+=("$var")
+        ;;
+    esac
+done
+
+if [ -n "$run_as_nohup" ]; then
+    nohup "$0" "${args[@]}" > nohup.out 2>&1 &
+    echo "tail -f nohup.out to monitor progress"
+    exit 0
+fi
+
+run=1
+burst=1
+loop=1
+step=1
+args=()
+steps=()
+contf=0
+test_config="$(readlink -f "$TEST_CONFIG" || echo "")"
+prepare_sut=0
+sut=()
+cleanup_sut=0
+reuse_sut=0
+dry_run=0
+for var in "$@"; do
+    case "$var" in
+    --loop=*)
+        loop="${var/--loop=/}"
+        ;;
+    --loop)
+        loop="-1"
+        ;;
+    --burst=*)
+        burst="${var/--burst=/}"
+        ;;
+    --burst)
+        burst="-1"
+        ;;
+    --run=*)
+        run="${var/--run=/}"
+        ;;
+    --run)
+        run="-1"
+        ;;
+    --prepare-sut)
+        prepare_sut=1
+        ;;
+    --set=*)
+        steps+=("${var/--set=/}")
+        ;;
+    --set)
+        step="-1"
+        ;;
+    --test-config=*)
+        test_config="$(readlink -f "${var/--test-config=/}")"
+        ;;
+    --test-config)
+        test_config="-1"
+        ;;
+    --continue)
+        contf=1
+        ;;
+    --cleanup-sut)
+        cleanup_sut=1
+        ;;
+    --reuse-sut)
+        reuse_sut=1
+        ;;
+    --dry-run)
+        dry_run=1
+        ;;
+    *)
+        if [ "$loop" = "-1" ]; then
+            loop="$var"
+        elif [ "$burst" = "-1" ]; then
+            burst="$var"
+        elif [ "$run" = "-1" ]; then
+            run="$var"
+        elif [ "$step" = "-1" ]; then
+            steps+=("$var")
+            step=1
+        elif [ "$test_config" = "-1" ]; then
+            test_config="$(readlink -f "$var")"
+        else
+            args+=("$var")
+        fi
+        ;;
+    esac
+done
+
+if [ "$loop" = "-1" ]; then
+    loop=1
+fi
+
+if [ "$burst" = "-1" ]; then
+    burst=1
+fi
+
+if [ "$run" = "-1" ]; then
+    run=1
+fi
+
+if [ $prepare_sut = 1 ] || [ $cleanup_sut = 1 ]; then
+    loop=1
+    burst=1
+    run=1
+fi
+
+if [ $reuse_sut = 1 ]; then
+    burst=1
+fi
+
+readarray -t values < <(for step1 in "${steps[@]}"; do
+    echo "$step1" | tr '/' '\n' | awk '{
+        split($1,kv,"=")
+        $1=kv[2]
+        if ($NF~/^\|[0-9]+$/ || $NF~/^[0-9]+\|$/) {
+            modstr=$NF
+            NF=NF-1
+        } else {
+            modstr=""
+        }
+        if (($NF ~ /^\.\.\./) && (NF>3)) {
+            stop=gensub(/^\.\.\./,"",1,$NF)
+            $NF=""
+            current=$(NF-1)
+            delta=current-$(NF-2)
+            if ($(NF-2)-$(NF-3)==delta) {
+                for(i=NF-1;(current+0) <= (stop+0);i++) {
+                    $i=current
+                    current+=delta
+                }
+            }
+            if ($(NF-3)!=0 && $(NF-2)!=0) {
+                factor=$(NF-1)/$(NF-2)
+                if ($(NF-3)*factor==$(NF-2)) {
+                    for(i=NF-1;(current+0) <= (stop+0);i++) {
+                        $i=current
+                        current*=factor
+                    }
+                }
+            }
+        }
+        if (modstr~/^\|[0-9]+/) {
+            modval=gensub(/\|/,"",1,modstr)
+            j=0
+            for(i=1;i<=NF;i++) {
+                if (($i % modval)==0) {
+                   j++
+                   if (i!=j) $j=$i
+                }
+            }
+            NF=j
+        }
+        if (modstr~/^[0-9]+\|$/) {
+            modval=gensub(/\|/,"",1,modstr)
+            j=0
+            for(i=1;i<=NF;i++) {
+                if ($i!=0) {
+                    if ((modval % $i)==0) {
+                        j++
+                        if (i!=j) $j=$i
+                    }
+                }
+            }
+            NF=j
+        }
+        for(k=1;k<=NF;k++)
+            vars[kv[1]][k]=$k
+    }
+    END {
+        nk=1
+        for(k in vars) {
+           nk*=length(vars[k])
+        }
+        nk1=nk
+        nk2=1
+        for(k in vars) {
+           varn=length(vars[k])
+           nk1=nk1/varn
+           printf "%s ", k
+           for(r=0;r<nk2;r++)
+               for(i=1;i<=varn;i++)
+                   for (j=0;j<nk1;j++)
+                      printf "%s ", vars[k][i]
+           nk2=nk2*varn
+           printf "\n"
+        }
+    }'
+done)
+
+tmp_files=()
+remove_tmp_files () {
+    rm -f "${tmp_files[@]}"
+    tmp_files=()
+}
+trap 'remove_tmp_files' ERR EXIT
+
+
+for loop1 in $(seq 1 $loop); do
+    loop_prefix="$(date +%m%d-%H%M%S)-"
+    while [ -d "$loop_prefix"* ]; do
+        sleep 1s
+        loop_prefix="$(date +%m%d-%H%M%S)-"
+    done
+    [ $prepare_sut = 1 ] && loop_prefix="sut-"
+    [ $cleanup_sut = 1 ] && loop_prefix=""
+    pids=()
+    for burst1 in $(seq 1 $burst); do
+        echo "Loop: $loop1 Burst: $burst1 Run: $run"
+        if [ "$burst" = "1" ]; then
+            export TEST_PREFIX="$loop_prefix"
+        else
+            export TEST_PREFIX="${loop_prefix}r$burst1-"
+        fi
+        export TEST_CONFIG="$test_config"
+        if [ ${#values[@]} -gt 0 ]; then
+            tmp="$(mktemp)"
+            if [ -n "$TEST_CONFIG" ]; then
+                cp -f "$TEST_CONFIG" $tmp
+            fi
+            export TEST_CONFIG="$tmp"
+            echo "*:" >> $tmp
+            for var1 in "${values[@]}"; do
+                values1=($(echo "$var1" | tr ' ' '\n'))
+                key1="${values1[0]}"
+                val1="${values1[$(( (((loop1-1)*burst+burst1-1) % (${#values1[@]}-1))+1 ))]}"
+                echo "$key1: $val1"
+                echo "  $key1: \"$val1\"" >> $tmp
+            done
+            tmp_files+=($tmp)
+        fi
+        (   
+            export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --run_stage_iterations=$run"
+            [ $prepare_sut = 1 ] && export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --run_stage=provision,prepare"
+            [ $cleanup_sut = 1 ] && export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --cleanup-sut"
+            [ $reuse_sut = 1 ] && export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --reuse-sut"
+            [ $dry_run = 1 ] && export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --dry-run"
+            set -x
+            ctest "${args[@]}"
+        ) &
+        pids+=($!)
+    done
+    if [ $contf = 1 ]; then
+        wait ${pids[@]} || true
+    else
+        wait ${pids[@]}
+    fi
+    remove_tmp_files
+done
diff --git a/script/benchmark/debug.sh b/script/benchmark/debug.sh
new file mode 100755
index 0000000..3e4565d
--- /dev/null
+++ b/script/benchmark/debug.sh
@@ -0,0 +1,13 @@
+#!/bin/bash -e
+
+owner="${1:-$( (git config user.name || id -un) 2> /dev/null)-}"
+cmd="docker ps -f name=$(echo $owner | tr 'A-Z' 'a-z' | tr -c -d 'a-z0-9-' | sed 's|^\(.\{12\}\).*$|\1|')"
+if [ "$($cmd | wc -l)" -ne 2 ]; then
+    echo "None or multiple ctest instances detected:"
+    echo ""
+    $cmd --format '{{.ID}} {{.Names}}'
+    echo ""
+    echo "Please identify the instance with: ./debug.sh <name prefix>"
+    exit 3
+fi
+docker exec -u pkb -it $($cmd --format '{{.ID}}') bash
diff --git a/script/benchmark/kpi-list.awk b/script/benchmark/kpi-list.awk
new file mode 100755
index 0000000..07d3f2d
--- /dev/null
+++ b/script/benchmark/kpi-list.awk
@@ -0,0 +1,79 @@
+#!/usr/bin/gawk
+
+/^#svrinfo[:-]/ {
+    next
+}
+{   
+    print $0
+}
+/^*/ {
+    kpi=$NF
+    $NF=""
+    n[$0]=n[$0]+1
+    kpis[$0][n[$0]]=kpi
+}
+END {
+    print ""
+    for (x in n) {
+        sum[x]=0
+        sumsq[x]=0
+        for (y in kpis[x]) {
+            sum[x]+=kpis[x][y]
+            sumsq[x]+=kpis[x][y]^2
+        }
+        average=sum[x]/n[x]
+        stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x])
+
+        print "avg "x,average
+        print "std "x,stdev
+
+        average=sum[x]/n[x]
+        stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x])
+
+        asort(kpis[x], kpis1, "@val_num_asc")
+        if(n[x]%2) {
+            k=(n[x]+1)/2
+            print "med "x,kpis1[k]
+        } else {
+            k=n[x]/2+1
+            print "med "x,kpis1[k]
+        }
+
+        r=0
+        if (outlier>0) {
+            for (y in kpis[x]) {
+                if ((kpis[x][y]>average+outlier*stdev)||(kpis[x][y]<average-outlier*stdev)) {
+                    delete kpis[x][y];
+                    r=r+1
+                }
+            }
+        }
+
+        if (r>0) {
+            print "removed "r" outlier(s)"
+
+            sum[x]=0
+            sumsq[x]=0
+            n[x]=0
+            for (y in kpis[x]) {
+                sum[x]+=kpis[x][y]
+                sumsq[x]+=kpis[x][y]^2
+                n[x]=n[x]+1
+            }
+
+            asort(kpis[x], kpis1, "@val_num_asc")
+            if(n[x]%2) {
+                k=(n[x]+1)/2
+                print "med "x,kpis1[k]
+            } else {
+                k=n[x]/2+1
+                print "med "x,kpis1[k]
+            }
+
+            average=sum[x]/n[x]
+            stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x])
+            print "avg "x,average
+            print "std "x,stdev
+        }
+    }
+}
diff --git a/script/benchmark/kpi-xls-ai.awk b/script/benchmark/kpi-xls-ai.awk
new file mode 100755
index 0000000..264e2c9
--- /dev/null
+++ b/script/benchmark/kpi-xls-ai.awk
@@ -0,0 +1,217 @@
+#!/usr/bin/gawk
+
+BEGIN {
+    if (var1 == "default") var1="batch_size"
+    if (var2 == "default") var2="cores_per_instance"
+    if (var3 == "default") var3="*Throughput"
+    if (var4 == "default") var4="Throughput_"
+}
+
+function get_value() {
+    if ($NF*1 == $NF) return $NF
+    if ($(NF-1)*1 == $(NF-1)) return $(NF-1)
+    print "Unable to extract value: "$0 > "/dev/stderr"
+    exit 3
+}
+
+/^#svrinfo: / {
+    product=$3
+}
+
+/\/itr-[0-9]*:$/{
+    name=gensub(/^.*-logs-(.*)\/itr-.*$/,"\\1",1)
+    itr=gensub(/^.*\/itr-([0-9]+):$/,"\\1",1)
+}
+
+index($0,var1)==1 || ($1=="#" && index($2,var1)==1) {
+    var1v=gensub(/"(.*)"/,"\\1",1,$NF)
+}
+
+index($0,var2)==1 || ($1=="#" && index($2,var2)==1) {
+    var2v=gensub(/"(.*)"/,"\\1",1,$NF)
+}
+
+index($0,var3)==1 {
+    var3v[name][product][var1v][var2v][++var3vct[name][product][var1v][var2v]]=get_value()
+    n=length(var3v[name][product][var1v][var2v])
+    if (n>var34n[name][product][var1v][var2v])
+        var34n[name][product][var1v][var2v]=n
+}
+
+index($0,var4)==1 {
+    idx=gensub(/ *([0-9]+).*$/,"\\1",1,substr($0,length(var4)+1))
+    var4v[name][product][var1v][var2v][idx][++var4vct[name][product][var1v][var2v][idx]]=get_value()
+    n=length(var4v[name][product][var1v][var2v][idx])
+    if (n>var34n[name][product][var1v][var2v])
+        var34n[name][product][var1v][var2v]=n
+}
+
+END {
+    add_xls_header()
+
+    print "<Worksheet ss:Name=\"Summary\">"
+    print "<Table>"
+
+    print "<Row>"
+    print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\">" escape(var1) "</Data></Cell>"
+    for (ws in var3v) {
+        for (p in var3v[ws]) {
+            ws_p=ws"-"p
+            print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\">" escape(ws_name_ex(ws_p)) "</Data></Cell>"
+            for (v1 in var3v[ws][p]) {
+                v1s[v1][ws_p]=0
+                for (v2 in var3v[ws][p][v1]) {
+                    var3m=length(var3v[ws][p][v1][v2])>0?median(var3v[ws][p][v1][v2]):0 
+                    if (var3m>v1s[v1][ws_p]) v1s[v1][ws_p]=var3m
+                }
+            }
+        }
+    }
+    print "</Row>"
+
+    n1=asorti(v1s,v1sp,"@ind_num_asc")
+    for (v1=1;v1<=n1;v1++) {
+        print "<Row>"
+        print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"Number\">" v1sp[v1]*1 "</Data></Cell>"
+        for (ws in var3v) {
+            for (p in var3v[ws]) {
+                print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"Number\">" v1s[v1sp[v1]][ws"-"p]*1 "</Data></Cell>"
+            }
+        }
+        print "</Row>"
+    }
+    print "</Table>"
+    print "</Worksheet>"
+
+    for (ws in var34n) {
+        for (p in var34n[ws]) {
+            print "<Worksheet ss:Name=\"" ws_name(ws"-"p) "\">"
+            print "<Table>"
+            n1=asorti(var34n[ws][p], var1sp, "@ind_num_asc")
+
+            th=1
+            for(v1=1;v1<=n1;v1++) {
+                th++
+                n2=asorti(var34n[ws][p][var1sp[v1]], var2sp, "@ind_num_asc")
+                for (v2=1;v2<=n2;v2++) {
+                    th++
+                    n3=var34n[ws][p][var1sp[v1]][var2sp[v2]]
+                    for (i=1;i<=n3;i++) {
+                        print "<Column ss:Index=\"" th "\" ss:Hidden=\"1\" ss:AutoFitWidth=\"0\"/>"
+                        th++
+                    }
+                }
+                th++
+            }
+
+            th=1
+            print "<Row>"
+            for (v1=1;v1<=n1;v1++) {
+                print "<Cell ss:Index=\"" th "\"><Data ss:Type=\"String\">" escape(var1) "</Data></Cell>"
+                print "<Cell><Data ss:Type=\"Number\">" var1sp[v1]*1 "</Data></Cell>"
+                th++
+
+                for (v2 in var34n[ws][p][var1sp[v1]])
+                    th+=var34n[ws][p][var1sp[v1]][v2]+1
+                th++
+            }
+            print "</Row>"
+
+            print "<Row>"
+            th=1
+            for(v1=1;v1<=n1;v1++) {
+                print "<Cell ss:StyleID=\"border\" ss:Index=\"" th "\"><Data ss:Type=\"String\">" escape(var2) "</Data></Cell>"
+                th++
+
+                n2=asorti(var34n[ws][p][var1sp[v1]], var2sp, "@ind_num_asc")
+                for (v2=1;v2<=n2;v2++) {
+                    print "<Cell ss:StyleID=\"border\" ss:Index=\"" th "\"><Data ss:Type=\"Number\">" var2sp[v2]*1 "</Data></Cell>"
+                    th+=var34n[ws][p][var1sp[v1]][var2sp[v2]]+1
+                }
+                th++
+            }
+            print "</Row>"
+
+            print "<Row>"
+            th=1
+            for(v1=1;v1<=n1;v1++) {
+                print "<Cell ss:StyleID=\"border-primary\" ss:Index=\"" th "\"><Data ss:Type=\"String\">" escape(gensub(/^\*/,"",1,var3)) "</Data></Cell>"
+                th++
+
+                n2=asorti(var34n[ws][p][var1sp[v1]], var2sp, "@ind_num_asc")
+                for (v2=1;v2<=n2;v2++) {
+                    var3m=length(var3v[ws][p][var1sp[v1]][var2sp[v2]])>0?median(var3v[ws][p][var1sp[v1]][var2sp[v2]]):0
+                    print "<Cell ss:StyleID=\"border-primary\"><Data ss:Type=\"Number\">" var3m*1 "</Data></Cell>"
+                    th++
+
+                    n3=var34n[ws][p][var1sp[v1]][var2sp[v2]]
+                    for (i=1;i<=n3;i++) {
+                        vi=(length(var3v[ws][p][var1sp[v1]][var2sp[v2]])>0)?var3v[ws][p][var1sp[v1]][var2sp[v2]][i]:0
+                        if (vi==var3m) {
+                            print "<Cell ss:StyleID=\"border-median-primary\"><Data ss:Type=\"Number\">" var3m*1 "</Data></Cell>"
+                            var4i[ws][var1sp[v1]][var2sp[v2]]=i
+                        } else {
+                            print "<Cell ss:StyleID=\"border-primary\"><Data ss:Type=\"Number\">" vi*1 "</Data></Cell>"
+                        }
+                        th++
+                    }
+                }
+                th++
+            }
+            print "</Row>"
+
+            print "<Row>"
+            th=1
+            cn=0
+            for(v1=1;v1<=n1;v1++) {
+                print "<Cell ss:StyleID=\"border\" ss:Index=\"" th "\"><Data ss:Type=\"String\">count</Data></Cell>"
+                th++
+
+                n2=asorti(var34n[ws][p][var1sp[v1]], var2sp, "@ind_num_asc")
+                for (v2=1;v2<=n2;v2++) {
+                    count=length(var4v[ws][p][var1sp[v1]][var2sp[v2]])
+                    print "<Cell ss:StyleID=\"border\" ss:Index=\"" th "\"><Data ss:Type=\"Number\">" count*1 "</Data></Cell>"
+                    if (count>cn) cn=count
+                    th+=var34n[ws][p][var1sp[v1]][var2sp[v2]]+1
+                }
+                th++
+            }
+            print "</Row>"
+
+            for (c=1;c<=cn;c++) {
+                print "<Row>"
+
+                th=2
+                for(v1=1;v1<=n1;v1++) {
+                    n2=asorti(var34n[ws][p][var1sp[v1]], var2sp, "@ind_num_asc")
+                    for (v2=1;v2<=n2;v2++) {
+                        n4=var34n[ws][p][var1sp[v1]][var2sp[v2]]
+                        if (length(var4v[ws][p][var1sp[v1]][var2sp[v2]][c])>0) {
+                            var4ii=var4i[ws][var1sp[v1]][var2sp[v2]]
+                            print "<Cell ss:Index=\"" th "\"><Data ss:Type=\"Number\">" var4v[ws][p][var1sp[v1]][var2sp[v2]][c][var4ii]*1 "</Data></Cell>"
+
+                            for(i=1;i<=n4;i++) {
+                                if (i==var4ii) {
+                                    print "<Cell ss:StyleID=\"border-median\"><Data ss:Type=\"Number\">" var4v[ws][p][var1sp[v1]][var2sp[v2]][c][i]*1 "</Data></Cell>"
+                                } else {
+                                    print "<Cell><Data ss:Type=\"Number\">" var4v[ws][p][var1sp[v1]][var2sp[v2]][c][i]*1 "</Data></Cell>"
+                                }
+                            }
+                        } 
+                        th+=n4+1
+                    }
+                    th+=2
+                }
+                print "</Row>"
+            }
+
+            print "</Table>"
+            print "</Worksheet>"
+        }
+
+        # write svrinfo
+        if (length(svrinfo_values[ws])>0) 
+            add_svrinfo(ws)
+    }
+    print "</Workbook>"
+}
diff --git a/script/benchmark/kpi-xls-inst.awk b/script/benchmark/kpi-xls-inst.awk
new file mode 100755
index 0000000..693ceb3
--- /dev/null
+++ b/script/benchmark/kpi-xls-inst.awk
@@ -0,0 +1,96 @@
+#!/usr/bin/gawk
+
+/^#svrinfo: / {
+    name=gensub(/^.*-logs-(.*)\/runs\/.*$/,"\\1",1,$2)
+    product=$3
+}
+
+/\/itr-[0-9]*:$/ {
+    name=gensub(/^.*-logs-(.*)\/itr-.*$/,"\\1",1)
+}
+
+(!/^#/) && /.*: *[0-9.-]+ *$/ {
+    k=gensub(/^(.*):.*$/, "\\1", 1)
+    v=gensub(/^.*: *([0-9.-]+) *$/, "\\1", 1)
+    kpis[name][product][k][++kpisct[name][product][k]]=v
+    kpis_uniq[name][k]=1
+}
+
+END {
+    add_xls_header(1)
+
+    for (ws in kpis) {
+        nk=asorti(kpis_uniq[ws], ksp, "@ind_str_asc")
+        if(nk>24) nk=24
+        np=asorti(kpis[ws], psp, "@ind_str_asc")
+
+        print "<Worksheet ss:Name=\"" ws_name(ws) "\">"
+        print "<Table>"
+
+        th=2
+        for (p=1;p<=np;p++) {
+            ith[p]=th
+            nk1=0
+            for (k=1;k<=nk;k++) {
+                nk1n=length(kpis[ws][psp[p]][ksp[k]])
+                if (nk1n>nk1) nk1=nk1n
+            }
+            for (k=1;k<=nk1;k++)
+                print "<Column ss:Index=\"" (th+k) "\" ss:Hidden=\"1\" ss:AutoFitWidth=\"0\"/>"
+            
+            th+=nk1+1
+        }
+        ith[p]=th
+        
+        print "<Row>"
+        print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\">Instance Type</Data></Cell>"
+        for (p=1;p<=np;p++) {
+            print "<Cell ss:Index=\"" ith[p] "\" ss:StyleID=\"border\"><Data ss:Type=\"String\">" escape(psp[p]) "</Data></Cell>"
+        }
+        print "</Row>"
+
+        # calculate median
+        for (p=1;p<=np;p++) {
+            kn[p]=length(kpis[ws][psp[p]][ksp[1]])
+            m=median(kpis[ws][psp[p]][ksp[1]])
+            kii[p]=0
+            for (i=1;i<=kn[p];i++)
+                if (m==kpis[ws][psp[p]][ksp[1]][i]) 
+                    kii[p]=i
+        }
+            
+        # kpis
+        for(k=1;k<=nk;k++) {
+            print "<Row>"
+            print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\">" escape(ksp[k]) "</Data></Cell>"
+            for(p=1;p<=np;p++) {
+                print "<Cell ss:Index=\"" ith[p] "\" ss:StyleID=\"border\"><Data ss:Type=\"Number\">" kpis[ws][psp[p]][ksp[k]][kii[p]]*1 "</Data></Cell>"
+                for(i=1;i<=kn[p];i++) {
+                    style=(i==kii[p])?"-median":""
+                    print "<Cell ss:StyleID=\"border" style "\"><Data ss:Type=\"Number\">" kpis[ws][psp[p]][ksp[k]][i]*1 "</Data></Cell>"
+                }
+            }
+            print "</Row>"
+        }
+
+        # empty KPI lines
+        for(k=nk+1;k<=23;k++) {
+            print "<Row>"
+            print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\"></Data></Cell>"
+            for(p=1;p<=np;p++) {
+                print "<Cell ss:Index=\"" ith[p] "\" ss:StyleID=\"border\"><Data ss:Type=\"String\"></Data></Cell>"
+                for(i=1;i<=kn[p];i++)
+                    print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\"></Data></Cell>"
+            }
+            print "</Row>"
+        }
+
+        add_svrinfo_ex(ws, psp, ith)
+
+        print "</Table>"
+        print "</Worksheet>"
+
+        add_svrinfo(ws)
+    }
+    print "</Workbook>"
+}
diff --git a/script/benchmark/kpi-xls-table.awk b/script/benchmark/kpi-xls-table.awk
new file mode 100755
index 0000000..6507f3e
--- /dev/null
+++ b/script/benchmark/kpi-xls-table.awk
@@ -0,0 +1,123 @@
+#!/usr/bin/gawk
+
+BEGIN {
+    name="default"
+    if (var1 == "default") var1="batch_size"
+    if (var2 == "default") var2="cores_per_instance"
+    if (var3 == "default") var3=""
+    if (var4 == "default") var4=""
+    var3v=""
+    var4v=""
+}
+
+/^#svrinfo: / {
+    name=gensub("^.*logs-(.*)[/]runs[/].*$","\\1",1,$2)
+}
+
+/[/]itr-[0-9]*:$/ {
+    name=gensub("^.*logs-(.*)[/]itr-.*$","\\1",1)
+}
+
+index($0,var1)==1 || ($1=="#" && index($2,var1)==1) {
+    var1v=gensub(/"/,"","g",$NF)
+}
+
+index($0,var2)==1 || ($1=="#" && index($2,var2)==1) {
+    var2v=gensub(/"/,"","g",$NF)
+}
+
+(index($0,var3)==1 || ($1=="#" && index($2,var3)==1)) && length(var3)>0 {
+    var3v=var3": "gensub(/"/,"","g",$NF)
+}
+
+(index($0,var4)==1 || ($1=="#" && index($2,var4)==1)) && length(var4)>0 {
+    var4v=var4": "gensub(/"/,"","g",$NF)
+}
+
+/^[*]/ {
+    primary_kpi[name]=gensub(/^[*](.*):.*/,"\\1",1,$0)
+    var34v=""
+    if (length(var3)>0) var34v=var3v
+    if (length(var4)>0) {
+        if (length(var34v)>0)
+            var34v=var34v", "var4v
+        else
+            var34v=var4v
+    }
+    idx= ++ikpis[name][var34v][var2v][var1v]
+    kpis[name][var34v][var2v][var1v][idx]=$NF
+    if (idx > var1v_num[name][var1v])
+        var1v_num[name][var1v]=idx
+}
+
+END {
+    add_xls_header(1)
+
+    for (ws in kpis) {
+        ntables=asorti(kpis[ws], tables, "@ind_str_asc")
+
+        print "<Worksheet ss:Name=\"" ws_name(ws) "\">"
+        print "<Table>"
+
+        th=3
+        var1v_nsp=asorti(var1v_num[ws], var1v_sp, "@ind_num_asc")
+        for (v1=1;v1<=var1v_nsp;v1++) {
+            ith[v1]=th
+            nk=var1v_num[ws][var1v_sp[v1]]
+            for (k=1;k<=nk;k++)
+               print "<Column ss:Index=\"" (th+k) "\" ss:Hidden=\"1\" ss:AutoFitWidth=\"0\"/>"
+            th+=nk+1
+        }
+
+        for (t=1;t<=ntables;t++) {
+            print "<Row>"
+            print "<Cell ss:StyleID=\"border\"><Data ss:Type=\"String\">" tables[t] "</Data></Cell>"
+            print "</Row>"
+            var34=tables[t]
+
+            print "<Row>"
+            print "<Cell ss:Index=\"" ith[1] "\"><Data ss:Type=\"String\">" var1 "</Data></Cell>"
+            print "</Row>"
+
+            print "<Row>"
+            print "<Cell ss:StyleID=\"border\" ss:Index=\"" ith[1]-1 "\"><Data ss:Type=\"String\">" primary_kpi[ws] "</Data></Cell>"
+            for (v1=1;v1<=var1v_nsp;v1++) {
+                style=(var1v_sp[v1]==var1v_sp[v1]*1)?"Number":"String"
+                print "<Cell ss:StyleID=\"border\" ss:Index=\"" ith[v1] "\"><Data ss:Type=\"" style "\">" var1v_sp[v1] "</Data></Cell>"
+            }
+            print "</Row>"
+
+            var2v_nsp=asorti(kpis[ws][var34], var2v_sp, "@ind_num_asc")
+            for (v2=1;v2<=var2v_nsp;v2++) {
+                print "<Row>"
+                if (v2==1) {
+                    print "<Cell ss:Index=\"" ith[1]-2 "\"><Data ss:Type=\"String\">" var2 "</Data></Cell>"
+                }
+                style=(var2v_sp[v2]==var2v_sp[v2]*1)?"Number":"String"
+                print "<Cell ss:StyleID=\"border\" ss:Index=\"" ith[1]-1 "\"><Data ss:Type=\"" style "\">" var2v_sp[v2] "</Data></Cell>"
+                for (v1=1;v1<=var1v_nsp;v1++) {
+                    n=length(kpis[ws][var34][var2v_sp[v2]][var1v_sp[v1]])
+                    if (n>0) {
+                        m=median(kpis[ws][var34][var2v_sp[v2]][var1v_sp[v1]])
+                        print "<Cell ss:StyleID=\"border\" ss:Index=\"" ith[v1] "\"><Data ss:Type=\"Number\">" m "</Data></Cell>"
+                        for (n1=1;n1<=n;n1++) {
+                            n1v=kpis[ws][var34][var2v_sp[v2]][var1v_sp[v1]][n1]
+                            style=(m""==n1v"")?"-median":""
+                            print "<Cell ss:StyleID=\"border" style "\" ss:Index=\"" ith[v1]+n1 "\"><Data ss:Type=\"Number\">" n1v "</Data></Cell>"
+                        }
+                    }
+                }
+                print "</Row>"
+            }
+            print "<Row>"
+            print "</Row>"
+        }
+
+        print "</Table>"
+        print "</Worksheet>"
+
+        if (length(svrinfo_values[ws])>0) 
+            add_svrinfo(ws)
+    }
+    print "</Workbook>"
+}
diff --git a/script/benchmark/list-kpi.sh b/script/benchmark/list-kpi.sh
new file mode 100755
index 0000000..236513d
--- /dev/null
+++ b/script/benchmark/list-kpi.sh
@@ -0,0 +1,222 @@
+#!/bin/bash -e
+
+DIR="$(dirname "$(readlink -f "$0")")"
+
+print_help () {
+    echo "Usage: [options] logsdir"
+    echo "--primary             List only the primary KPI."
+    echo "--all                 List all KPIs."
+    echo "--params              Print out all configuration parameters."
+    echo "--outlier <n>         Drop samples beyond N-stdev."
+    echo "--format <format>     Specify the output format: list, xls-ai, xls-inst, or xls-table."
+    echo "--var[1-9] value      Specify spreadsheet variables."
+    echo "--phost name          Specify the primary hostname for identifying instance type."
+    echo "--pinst name          Specify the svrinfo field for identifying instance type name."
+    echo "--file filename       Specify the spreadsheet filename."
+    echo "--filter filter       Specify the trim filter to shorten the worksheet name."
+    exit 0
+}
+
+phost="node1"
+pinst="System.Product Name"
+prefixes=()
+primary=1
+outlier=0
+printvar=0
+format="list"
+xlsfile="kpi-report.xls"
+params=0
+var1="default"
+var2="default"
+var3="default"
+var4="default"
+filter="_(tensorflow|throughput|inference|benchmark|real)"
+for var in "$@"; do
+    case "$var" in
+    --primary)
+        primary=1
+        ;;
+    --all)
+        primary=""
+        ;;
+    --outlier=*)
+        outlier="${var#--outlier=}"
+        ;;
+    --outlier)
+        outlier="-1"
+        ;;
+    --params|--params=true)
+        params=1
+        ;;
+    --params=false)
+        params=0
+        ;;
+    --format=*)
+        format="${var#--format=}"
+        ;;
+    --format)
+        format="-1"
+        ;;
+    --var1=*)
+        var1="${var#--var1=}"
+        ;;
+    --var1)
+        var1="-1"
+        ;;
+    --var2=*)
+        var2="${var#--var2=}"
+        ;;
+    --var2)
+        var2="-1"
+        ;;
+    --var3=*)
+        var3="${var#--var3=}"
+        ;;
+    --var3)
+        var3="-1"
+        ;;
+    --var4=*)
+        var4="${var#--var4=}"
+        ;;
+    --var4)
+        var4="-1"
+        ;;
+    --phost=*)
+        phost="${var#--phost=}"
+        ;;
+    --phost)
+        phost="-1"
+        ;;
+    --pinst=*)
+        pinst="${var#--pinst=}"
+        ;;
+    --pinst)
+        pinst="-1"
+        ;;
+    --filter=*)
+        filter="${var#--filter=}"
+        ;;
+    --filter)
+        filter="-1"
+        ;;
+    --file=*)
+        xlsfile="${var#--file=}"
+        ;;
+    --file)
+        xlsfile=""
+        ;;
+    --help)
+        print_help
+        ;;
+    *)
+        if [ "$outlier" = "-1" ]; then
+            outlier="$var"
+        elif [ "$format" = "-1" ]; then
+            format="$var"
+        elif [ "$var1" = "-1" ]; then
+            var1="$var"
+        elif [ "$var2" = "-1" ]; then
+            var2="$var"
+        elif [ "$var3" = "-1" ]; then
+            var3="$var"
+        elif [ "$var4" = "-1" ]; then
+            var4="$var"
+        elif [ "$phost" = "-1" ]; then
+            phost="$var"
+        elif [ "$pinst" = "-1" ]; then
+            pinst="$var"
+        elif [ "$filter" = "-1" ]; then
+            filter="$var"
+        elif [ -z "$xlsfile" ]; then
+            xlsfile="$var"
+        else
+            prefixes+=("$var")
+        fi
+        ;;
+    esac
+done
+
+if [ "$outlier" = "-1" ]; then
+    outlier=0
+fi
+
+if [ "$format" != "list" ]; then
+    primary=""
+    params=1
+fi
+
+if [ ${#prefixes[@]} -eq 0 ]; then
+    print_help
+fi
+
+for logsdir1 in ${prefixes[@]}; do
+    if [ -r "$logsdir1/kpi.sh" ] && [ -r "$logsdir1"/cumulus-config.yaml ]; then
+        pinstv=""
+        for svrinfo in "$logsdir1"/runs/*/pkb-*-svrinfo/*.json; do
+            if [ -r "$svrinfo" ]; then
+                pinstv="$(sed 's/^/#svrinfo- /' "$logsdir1"/runs/*/pkb-*-svrinfo/*.json | awk -v name=$phost -v pinst="$pinst" -f "$DIR/svrinfo-json.awk" -f "$DIR/svrinfo-inst.awk")"
+                break
+            fi
+        done
+        for svrinfo in "$logsdir1"/runs/*/pkb-*-svrinfo/*.json; do
+            if [ -r "$svrinfo" ]; then
+                echo "#svrinfo: $svrinfo $pinstv"
+                sed 's/^/#svrinfo- /' "$svrinfo"
+                echo
+            fi
+        done
+        script_args="$(awk '/dpt_script_args:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/cumulus-config.yaml")"
+        if [ -d "$logsdir1/itr-1" ]; then
+            for itrdir1 in "$logsdir1"/itr-*; do
+                echo "$itrdir1:"
+                if [ $params -eq 1 ]; then
+                    awk '/dpt_tunables:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/cumulus-config.yaml" | tr ';' '\n' | sed 's/^ *\([^:]*\):\(.*\)$/# \1: "\2"/'
+                fi
+                chmod a+rx "$itrdir1/kpi.sh"
+                if [ -n "$primary" ]; then
+                    ( cd "$itrdir1" && ./kpi.sh $script_args | grep -E "^\*" ) || true
+                else
+                    ( cd "$itrdir1" && ./kpi.sh $script_args ) || true
+                fi
+            done
+        else
+            echo "$logsdir1:"
+            if [ $params -eq 1 ]; then
+                awk '/dpt_tunables:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/cumulus-config.yaml" | tr ';' '\n' | sed 's/^ *\([^:]*\):\(.*\)$/# \1: "\2"/'
+            fi
+            chmod a+rx "$logsdir1/kpi.sh"
+            if [ -n "$primary" ]; then
+                ( cd "$logsdir1" && ./kpi.sh $script_args | grep -E "^\*" ) || true
+            else
+                ( cd "$logsdir1" && ./kpi.sh $script_args ) || true
+            fi
+        fi
+    elif [ -r "$logsdir1/kpi.sh" ] && [ -r "$logsdir1"/workload-config.yaml ]; then
+        script_args="$(awk '/^script_args:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/workload-config.yaml")"
+        echo "$logsdir1:"
+        if [ $params -eq 1 ]; then
+            awk '/^tunables:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/workload-config.yaml" | tr ';' '\n' | sed 's/^ *\([^:]*\):\(.*\)$/# \1: "\2"/'
+        fi
+        chmod a+rx "$logsdir1/kpi.sh"
+        if [ -n "$primary" ]; then
+            ( cd "$logsdir1" && ./kpi.sh $script_args | grep -E "^\*" ) || true
+        else
+            ( cd "$logsdir1" && ./kpi.sh $script_args ) || true
+        fi
+    fi
+done | (
+    case "$format" in
+    list)
+        awk -v outlier=$outlier -f "$DIR/kpi-list.awk" 
+        ;;
+    xls-ai)
+        awk -v outlier=$outlier -v var1="$var1" -v var2="$var2" -v var3="$var3" -v var4="$var4" -v filter="$filter" -f "$DIR/xlsutil.awk" -f "$DIR/svrinfo-json.awk" -f "$DIR/svrinfo-xls.awk" -f "$DIR/kpi-xls-ai.awk" > "$xlsfile"
+        ;;
+    xls-inst)
+        awk -v var1="$var1" -v phost=$phost -v filter="$filter" -f "$DIR/xlsutil.awk" -f "$DIR/svrinfo-json.awk" -f "$DIR/svrinfo-xls.awk" -f "$DIR/kpi-xls-inst.awk" > "$xlsfile"
+        ;;
+    xls-table)
+        awk -v var1="$var1" -v var2="$var2" -v var3="$var3" -v var4="$var4" -v filter="$filter" -f "$DIR/xlsutil.awk" -f "$DIR/svrinfo-json.awk" -f "$DIR/svrinfo-xls.awk" -f "$DIR/kpi-xls-table.awk" > "$xlsfile"
+        ;;
+    esac
+)
diff --git a/script/benchmark/svrinfo-inst.awk b/script/benchmark/svrinfo-inst.awk
new file mode 100755
index 0000000..c8b0128
--- /dev/null
+++ b/script/benchmark/svrinfo-inst.awk
@@ -0,0 +1,14 @@
+#!/usr/bin/gawk
+
+END {
+    split(pinst, pinst_fields, ".")
+    for (p in svrinfo_values[svrinfo_ws]) {
+        for (ip in svrinfo_values[svrinfo_ws][p]) {
+            if (svrinfo_values[svrinfo_ws][p][ip]["Host"]["Name"][1] == name) {
+                print svrinfo_values[svrinfo_ws][p][ip][pinst_fields[1]][pinst_fields[2]][1]
+                break
+            }
+        }
+    }
+}
+
diff --git a/script/benchmark/svrinfo-json.awk b/script/benchmark/svrinfo-json.awk
new file mode 100755
index 0000000..0134afe
--- /dev/null
+++ b/script/benchmark/svrinfo-json.awk
@@ -0,0 +1,58 @@
+#!/usr/bin/gawk
+
+BEGIN {
+    svrinfo_values_start=0
+    svrinfo_names_start=0
+    svrinfo_host_start=0
+    svrinfo_ws="default"
+    svrinfo_product="default"
+}
+
+/^#svrinfo: / {
+    svrinfo_ws=gensub("^.*logs-(.*)[/]runs[/].*$","\\1",1,$2)
+    svrinfo_product=$3
+}
+
+/^#svrinfo-\s*"Name":\s*".*",*\s*$/ && !svrinfo_values_start && !svrinfo_names_start {
+    v=gensub(/^#svrinfo-\s*"Name":\s*"(.*)",*\s*$/, "\\1", 1, $0)
+    if (svrinfo_host_start) {
+        svrinfo_ip=v
+    } else {
+        svrinfo_group=v
+    }
+}
+
+/^#svrinfo-\s*"AllHostValues":/ {
+    svrinfo_host_start=1
+}
+    
+svrinfo_names_start && !svrinfo_values_start && /^#svrinfo-\s*".*",*\s*$/ {
+    svrinfo_names[++svrinfo_nnames]=gensub(/^#svrinfo-\s*"(.*)",*\s*$/, "\\1", 1, $0)
+}
+
+svrinfo_names_start && !svrinfo_values_start && /^#svrinfo-\s*]\s*$/ {
+    svrinfo_names_start=0
+}
+
+/^#svrinfo-\s*"ValueNames":/ {
+    svrinfo_names_start=1
+    svrinfo_nnames=0
+}
+
+svrinfo_values_start && /^#svrinfo-\s*".*",*\s*$/ {
+    n=svrinfo_names[(svrinfo_nvalues%svrinfo_nnames)+1]
+    i=int(svrinfo_nvalues/svrinfo_nnames)+1
+    ++svrinfo_nvalues
+    svrinfo_values[svrinfo_ws][svrinfo_product][svrinfo_ip][svrinfo_group][n][i]=gensub(/^#svrinfo-\s*"(.*)",*\s*$/,"\\1",1,$0)
+}
+
+svrinfo_values_start && /^#svrinfo-\s*}\s*$/ {
+    svrinfo_values_start=0
+    svrinfo_host_start=0
+}
+
+/^#svrinfo-\s*"Values":/ {
+    svrinfo_values_start=1
+    svrinfo_nvalues=0
+}
+
diff --git a/script/benchmark/svrinfo-xls.awk b/script/benchmark/svrinfo-xls.awk
new file mode 100755
index 0000000..08f442b
--- /dev/null
+++ b/script/benchmark/svrinfo-xls.awk
@@ -0,0 +1,387 @@
+#!/usr/bin/gawk
+
+function add_svrinfo_cell(vv) {
+    t=(vv==vv*1)?"Number":"String"
+    print "<Cell ss:StyleID=\"svrinfo\"><Data ss:Type=\"" t "\">" escape(vv) "</Data></Cell>"
+}
+
+function add_svrinfo_row(ws, g, k) {
+    print "<Row>"
+    add_svrinfo_cell(g"."k)
+    for (p in svrinfo_values[ws])
+        for (s in svrinfo_values[ws][p])
+            add_svrinfo_cell(svrinfo_values[ws][p][s][g][k][1])
+    print "</Row>"
+}
+
+function add_svrinfo_isa_summary(ws, g) {
+    print "<Row>"
+    add_svrinfo_cell(g)
+    for (p in svrinfo_values[ws]) {
+        for (s in svrinfo_values[ws][p]) {
+            vv=""
+            for (k in svrinfo_values[ws][p][s][g])
+                if (svrinfo_values[ws][p][s][g][k][1] == "Yes")
+                    vv=vv", "gensub(/-.*/,"",1,k)
+            add_svrinfo_cell(gensub(/^, /,"",1,vv))
+        }
+    }
+    print "</Row>"
+}
+
+function add_svrinfo_accelerator_summary(ws, g) {
+    print "<Row>"
+    add_svrinfo_cell(g)
+    for (p in svrinfo_values[ws]) {
+        for (s in svrinfo_values[ws][p]) {
+            vv=""
+            for (k in svrinfo_values[ws][p][s][g])
+                if (svrinfo_values[ws][p][s][g][k][1]>0)
+                    vv=vv", "k":"svrinfo_values[ws][p][s][g][k][1]
+            add_svrinfo_cell(gensub(/^, /,"",1,vv))
+        }
+    }
+    print "</Row>"
+}
+
+function add_svrinfo_nic_summary(ws, g, n, m) {
+    n1=0
+    for (p in svrinfo_values[ws]) {
+        for (s in svrinfo_values[ws][p]) {
+            n2=length(svrinfo_values[ws][p][s][g][m])
+            if (n2>n1) n1=n2
+        }
+    }
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        add_svrinfo_cell((n2==1)?g:"")
+        for (p in svrinfo_values[ws]) {
+            for (s in svrinfo_values[ws][p]) {
+                vv=""
+                n3=0
+                for (i in svrinfo_values[ws][p][s][g][m]) {
+                    n3++
+                    if (n3==n2) {
+                        vv=svrinfo_values[ws][p][s][g][n][i]": "svrinfo_values[ws][p][s][g][m][i]
+                        break
+                    }
+                }
+                add_svrinfo_cell(vv)
+            }
+        }
+        print "</Row>"
+    }
+}
+
+function add_svrinfo_disk_summary(ws, g, n, m) {
+    n1=0
+    for (p in svrinfo_values[ws]) {
+        for (s in svrinfo_values[ws][p]) {
+            n2=0
+            for (i in svrinfo_values[ws][p][s][g][m])
+                if (length(svrinfo_values[ws][p][s][g][m][i])>0) n2++
+            if (n2>n1) n1=n2
+        }
+    }
+
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        add_svrinfo_cell((n2==1)?g:"")
+        for (p in svrinfo_values[ws]) {
+            for (s in svrinfo_values[ws][p]) {
+                n3=0
+                vv=""
+                for (i in svrinfo_values[ws][p][s][g][m]) {
+                    if (length(svrinfo_values[ws][p][s][g][m][i])>0) n3++
+                    if (n3==n2) {
+                        vv=svrinfo_values[ws][p][s][g][n][i]": "svrinfo_values[ws][p][s][g][m][i]
+                        break
+                    }
+                }
+                add_svrinfo_cell(vv)
+            }
+        }
+        print "</Row>"
+    }
+}
+
+function add_svrinfo_security_summary(ws, g) {
+    n1=0
+    for (p in svrinfo_values[ws]) {
+        for (s in svrinfo_values[ws][p]) {
+            n2=length(svrinfo_values[ws][p][s][g])
+            if (n2>n1) n1=n2
+        }
+    }
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        g1=(n2==1)?g:""
+        add_svrinfo_cell(g1)
+        for (p in svrinfo_values[ws]) {
+            for (s in svrinfo_values[ws][p]) {
+                vv=""
+                n3=0
+                for (k in svrinfo_values[ws][p][s][g]) {
+                    n3++
+                    if (n3==n2) {
+                        vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][p][s][g][k][1])
+                        break;
+                    }
+                }
+                add_svrinfo_cell(vv)
+            }
+        }
+        print "</Row>"
+    }
+}
+
+function find_svrinfo_phost(ws, p) {
+    if (length(svrinfo_values[ws][p])==1)
+        for (s in svrinfo_values[ws][p])
+            return s
+    for (s in svrinfo_values[ws][p])
+        for (i in svrinfo_values[ws][p][s]["Host"]["Name"])
+            if (svrinfo_values[ws][p][s]["Host"]["Name"][i] == phost)
+                return s
+    return ""
+}
+
+function add_svrinfo_cell_ex(i, vv) {
+    style=(vv==vv*1)?"Number":"String"
+    print "<Cell ss:Index=\"" i "\" ss:StyleID=\"svrinfo\"><Data ss:Type=\"" style "\">" escape(vv) "</Data></Cell>"
+}
+
+function add_svrinfo_row_ex(ws, psp, ith, g, k) {
+    print "<Row>"
+    add_svrinfo_cell(g"."k)
+    np=length(psp)
+    for (p1=1;p1<=np;p1++) {
+        s=find_svrinfo_phost(ws, psp[p1])
+        add_svrinfo_cell_ex(ith[p1], svrinfo_values[ws][psp[p1]][s][g][k][1])
+    }
+    print "</Row>"
+}
+
+function add_svrinfo_nic_summary_ex(ws, psp, ith, g, n, m) {
+    np=length(psp)
+    n1=0
+    for (p1=1;p1<=np;p1++) {
+        s=find_svrinfo_phost(ws, psp[p1])
+        n2=length(svrinfo_values[ws][psp[p1]][s][g][m])
+        if (n2>n1) n1=n2
+    }
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        add_svrinfo_cell((n2==1)?g:"")
+        for (p1=1;p1<=np;p1++) {
+            s=find_svrinfo_phost(ws, psp[p1])
+            vv=""
+            n3=0
+            for (i in svrinfo_values[ws][psp[p1]][s][g][m]) {
+                n3++
+                if (n3==n2) {
+                    vv=svrinfo_values[ws][psp[p1]][s][g][n][i]": "svrinfo_values[ws][psp[p1]][s][g][m][i]
+                    break
+                }
+            }
+            add_svrinfo_cell_ex(ith[p1], vv)
+        }
+        print "</Row>"
+    }
+}
+
+function add_svrinfo_security_summary_ex(ws, psp, ith, g) {
+    np=length(psp)
+    n1=0
+    for (p1=1;p1<=np;p1++) {
+        s=find_svrinfo_phost(ws, psp[p1])
+        n2=length(svrinfo_values[ws][psp[p1]][s][g])
+        if (n2>n1) n1=n2
+    }
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        add_svrinfo_cell((n2==1)?g:"")
+        for (p1=1;p1<=np;p1++) {
+            s=find_svrinfo_phost(ws, psp[p1])
+            vv=""
+            n3=0
+            for (k in svrinfo_values[ws][psp[p1]][s][g]) {
+                n3++
+                if (n3==n2) {
+                    vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][psp[p1]][s][g][k][1])
+                    break
+                }
+            }
+            add_svrinfo_cell_ex(ith[p1], vv)
+        }
+        print "</Row>"
+    }
+}
+
+function add_svrinfo_disk_summary_ex(ws, psp, ith, g, n, m) {
+    np=length(psp)
+    n1=0
+    for (p1=1;p1<=np;p1++) {
+        s=find_svrinfo_phost(ws, psp[p1])
+        n2=0
+        for (i in svrinfo_values[ws][psp[p1]][s][g][m])
+            if (length(svrinfo_values[ws][psp[p1]][s][g][m][i])>0) n2++
+        if (n2>n1) n1=n2
+    }
+
+    for (n2=1;n2<=n1;n2++) {
+        print "<Row>"
+        g1=(n2==1)?g:""
+        add_svrinfo_cell(g)
+        for (p1=1;p1<=np;p1++) {
+            s=find_svrinfo_phost(ws, psp[p1])
+            n3=0
+            vv=""
+            for (i in svrinfo_values[ws][psp[p1]][s][g][m]) {
+                if (length(svrinfo_values[ws][psp[p1]][s][g][m][i])>0) n3++
+                if (n3==n2) {
+                    vv=svrinfo_values[ws][psp[p1]][s][g][n][i]":"svrinfo_values[ws][psp[p1]][s][g][m][i]
+                    break
+                }
+            }
+            add_svrinfo_cell(vv)
+        }
+        print "</Row>"
+    }
+}
+
+function add_svrinfo(ws) {
+    print "<Worksheet ss:Name=\"" ws_name(ws"-INF") "\">"
+    print "<Table>"
+
+    add_svrinfo_row(ws, "Host", "Name")
+    add_svrinfo_row(ws, "Host", "Time")
+    add_svrinfo_row(ws, "System", "Manufacturer")
+    add_svrinfo_row(ws, "System", "Product Name")
+    add_svrinfo_row(ws, "System", "Version")
+    add_svrinfo_row(ws, "System", "Serial #")
+    add_svrinfo_row(ws, "System", "UUID")
+
+    add_svrinfo_row(ws, "Baseboard", "Manifacturer")
+    add_svrinfo_row(ws, "Baseboard", "Product Name")
+    add_svrinfo_row(ws, "Baseboard", "Version")
+    add_svrinfo_row(ws, "Baseboard", "Serial #")
+    
+    add_svrinfo_row(ws, "Chassis", "Manufacturer")
+    add_svrinfo_row(ws, "Chassis", "Type")
+    add_svrinfo_row(ws, "Chassis", "Version")
+    add_svrinfo_row(ws, "Chassis", "Serial #")
+
+    add_svrinfo_row(ws, "BIOS", "Vendor")
+    add_svrinfo_row(ws, "BIOS", "Version")
+    add_svrinfo_row(ws, "BIOS", "Release Date")
+
+    add_svrinfo_row(ws, "Operating System", "OS")
+    add_svrinfo_row(ws, "Operating System", "Kernel")
+    add_svrinfo_row(ws, "Operating System", "Microcode")
+
+    add_svrinfo_row(ws, "Software Version", "GCC")
+    add_svrinfo_row(ws, "Software Version", "GLIBC")
+    add_svrinfo_row(ws, "Software Version", "Binutils")
+    add_svrinfo_row(ws, "Software Version", "Python")
+    add_svrinfo_row(ws, "Software Version", "Python3")
+    add_svrinfo_row(ws, "Software Version", "Java")
+    add_svrinfo_row(ws, "Software Version", "OpenSSL")
+
+    add_svrinfo_row(ws, "CPU", "CPU Model")
+    add_svrinfo_row(ws, "CPU", "Architecture")
+    add_svrinfo_row(ws, "CPU", "Microarchitecture")
+    add_svrinfo_row(ws, "CPU", "Family")
+    add_svrinfo_row(ws, "CPU", "Model")
+    add_svrinfo_row(ws, "CPU", "Stepping")
+    add_svrinfo_row(ws, "CPU", "Base Frequency")
+    add_svrinfo_row(ws, "CPU", "Maximum Frequency")
+    add_svrinfo_row(ws, "CPU", "All-core Maximum Frequency")
+    add_svrinfo_row(ws, "CPU", "CPUs")
+    add_svrinfo_row(ws, "CPU", "On-line CPU List")
+    add_svrinfo_row(ws, "CPU", "Hyperthreading")
+    add_svrinfo_row(ws, "CPU", "Cores per Socket")
+    add_svrinfo_row(ws, "CPU", "Sockets")
+    add_svrinfo_row(ws, "CPU", "NUMA Nodes")
+    add_svrinfo_row(ws, "CPU", "NUMA CPU List")
+    add_svrinfo_row(ws, "CPU", "CHA Count")
+    add_svrinfo_row(ws, "CPU", "L1d Cache")
+    add_svrinfo_row(ws, "CPU", "L1i Cache")
+    add_svrinfo_row(ws, "CPU", "L2 Cache")
+    add_svrinfo_row(ws, "CPU", "L3 Cache")
+    add_svrinfo_row(ws, "CPU", "Memory Channels")
+    add_svrinfo_row(ws, "CPU", "Prefetchers")
+    add_svrinfo_row(ws, "CPU", "Intel Turbo Boost")
+    add_svrinfo_row(ws, "CPU", "PPINs")
+
+    add_svrinfo_isa_summary(ws, "ISA")
+    add_svrinfo_accelerator_summary(ws, "Accelerator")
+
+    add_svrinfo_row(ws, "Power", "TDP")
+    add_svrinfo_row(ws, "Power", "Power & Perf Policy")
+    add_svrinfo_row(ws, "Power", "Frequency Governer")
+    add_svrinfo_row(ws, "Power", "Frequency Driver")
+    add_svrinfo_row(ws, "Power", "MAX C-State")
+
+    add_svrinfo_row(ws, "Memory", "Installed Memory")
+    add_svrinfo_row(ws, "Memory", "MemTotal")
+    add_svrinfo_row(ws, "Memory", "MemFree")
+    add_svrinfo_row(ws, "Memory", "MemAvailable")
+    add_svrinfo_row(ws, "Memory", "Buffers")
+    add_svrinfo_row(ws, "Memory", "Cached")
+    add_svrinfo_row(ws, "Memory", "HugePages_Total")
+    add_svrinfo_row(ws, "Memory", "Hugepagesize")
+    add_svrinfo_row(ws, "Memory", "Transparent Huge Pages")
+    add_svrinfo_row(ws, "Memory", "Automatic NUMA Balancing")
+    add_svrinfo_row(ws, "Memory", "Populated Memory Channels")
+
+    add_svrinfo_row(ws, "GPU", "Manufacturer")
+    add_svrinfo_row(ws, "GPU", "Model")
+
+    add_svrinfo_nic_summary(ws, "NIC", "Name", "Model")
+    add_svrinfo_nic_summary(ws, "Network IRQ Mapping", "Interface", "CPU:IRQs CPU:IRQs ...")
+    add_svrinfo_disk_summary(ws, "Disk", "NAME", "MODEL")
+    add_svrinfo_security_summary(ws, "Vulnerability")
+
+    add_svrinfo_row(ws, "PMU", "cpu_cycles")
+    add_svrinfo_row(ws, "PMU", "instructions")
+    add_svrinfo_row(ws, "PMU", "ref_cycles")
+    add_svrinfo_row(ws, "PMU", "topdown_slots")
+    print "</Table>"
+    print "</Worksheet>"
+}
+
+function add_svrinfo_ex(ws, psp, ith) {
+    add_svrinfo_row_ex(ws, psp, ith, "Host", "Name")
+    add_svrinfo_row_ex(ws, psp, ith, "Host", "Time")
+    add_svrinfo_row_ex(ws, psp, ith, "System", "Manufacturer")
+    add_svrinfo_row_ex(ws, psp, ith, "System", "Product Name")
+    add_svrinfo_row_ex(ws, psp, ith, "BIOS", "Version")
+    add_svrinfo_row_ex(ws, psp, ith, "Operating System", "OS")
+    add_svrinfo_row_ex(ws, psp, ith, "Operating System", "Kernel")
+    add_svrinfo_row_ex(ws, psp, ith, "Operating System", "Microcode")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "CPU Model")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Base Frequency")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Maximum Frequency")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "All-core Maximum Frequency")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "CPUs")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Cores per Socket")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Sockets")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "NUMA Nodes")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Prefetchers")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "Intel Turbo Boost")
+    add_svrinfo_row_ex(ws, psp, ith, "CPU", "PPINs")
+    add_svrinfo_row_ex(ws, psp, ith, "Power", "Power & Perf Policy")
+    add_svrinfo_row_ex(ws, psp, ith, "Power", "TDP")
+    add_svrinfo_row_ex(ws, psp, ith, "Power", "Frequency Driver")
+    add_svrinfo_row_ex(ws, psp, ith, "Power", "Frequency Governer")
+    add_svrinfo_row_ex(ws, psp, ith, "Power", "MAX C-State")
+    add_svrinfo_row_ex(ws, psp, ith, "Memory", "Installed Memory")
+    add_svrinfo_row_ex(ws, psp, ith, "Memory", "Hugepagesize")
+    add_svrinfo_row_ex(ws, psp, ith, "Memory", "Transparent Huge Pages")
+    add_svrinfo_row_ex(ws, psp, ith, "Memory", "Automatic NUMA Balancing")
+    add_svrinfo_nic_summary_ex(ws, psp, ith, "NIC", "Name", "Model")
+    add_svrinfo_nic_summary_ex(ws, psp, ith, "Network IRQ Mapping", "Interface", "CPU:IRQs CPU:IRQs ...")
+    add_svrinfo_disk_summary_ex(ws, psp, ith, "Disk", "NAME", "MODEL")
+    add_svrinfo_security_summary_ex(ws, psp, ith, "Vulnerability")
+}
diff --git a/script/benchmark/xlsutil.awk b/script/benchmark/xlsutil.awk
new file mode 100755
index 0000000..693741c
--- /dev/null
+++ b/script/benchmark/xlsutil.awk
@@ -0,0 +1,98 @@
+#!/usr/bin/gawk
+
+function median(v) {
+    n=asort(v, v_sorted, "@val_num_asc")
+    if (n%2 == 0) {
+        return v_sorted[n/2]
+    } else {
+        return v_sorted[(n+1)/2]
+    }
+}
+
+function escape(text) {
+    text=gensub(/</,"\\&lt;","g",text)
+    text=gensub(/>/,"\\&gt;","g",text)
+    return text
+}
+
+function add_xls_header(align_left) {
+    print "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
+    print "<?mso-application progid=\"Excel.Sheet\"?>"
+    print "<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\""
+    print "xmlns:o=\"urn:schemas-microsoft-com:office:office\""
+    print "xmlns:x=\"urn:schemas-microsoft-com:office:excel\""
+    print "xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\""
+    print "xmlns:html=\"http://www.w3.org/TR/REC-html40\">"
+
+    print "<Styles>"
+    print "<Style ss:ID=\"Default\" ss:Name=\"Normal\">"
+    print "<Font ss:FontName=\"Verdana\" x:Family=\"Swiss\" ss:Size=\"10\"/>"
+    print "</Style>"
+    print "<Style ss:ID=\"border\">"
+    print "<Borders>"
+    print "<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "</Borders>"
+    if (align_left) print "<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Bottom\"/>"
+    print "</Style>"
+    print "<Style ss:ID=\"border-primary\">"
+    print "<Borders>"
+    print "<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "</Borders>"
+    print "<Interior ss:Color=\"#D9D9D9\" ss:Pattern=\"Solid\"/>"
+    if (align_left) print "<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Bottom\"/>"
+    print "</Style>"
+    print "<Style ss:ID=\"border-median\">"
+    print "<Borders>"
+    print "<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "</Borders>"
+    if (align_left) print "<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Bottom\"/>"
+    print "</Style>"
+    print "<Style ss:ID=\"border-median-primary\">"
+    print "<Borders>"
+    print "<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#FF0000\"/>"
+    print "</Borders>"
+    if (align_left) print "<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Bottom\"/>"
+    print "<Interior ss:Color=\"#D9D9D9\" ss:Pattern=\"Solid\"/>"
+    print "</Style>"
+    print "<Style ss:ID=\"svrinfo\">"
+    print "<Borders>"
+    print "<Border ss:Position=\"Bottom\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Left\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Right\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "<Border ss:Position=\"Top\" ss:LineStyle=\"Continuous\" ss:Weight=\"1\" ss:Color=\"#000000\"/>"
+    print "</Borders>"
+    print "<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Bottom\"/>"
+    print "</Style>"
+    print "</Styles>"
+}
+
+function ws_name_ex(a) {
+    a1=gensub(filter,"","g",a)
+    if (length(a1)>26) a1=substr(a1,length(a1)-26)
+    return gensub(/^[^-_]+[-_](.*)$/,"\\1",1,a1)
+}
+
+function ws_name(a) {
+    a1=ws_name_ex(a)
+    if (ws_uniq[a1] == "") {
+        ws_uniq[a1]=a
+        return a1
+    }
+    print "Worksheet name conflict: "a1 > "/dev/stderr"
+    print "previous: "ws_uniq[a1] > "/dev/stderr"
+    print "new: "a > "/dev/stderr"
+    exit 3
+}
+
diff --git a/script/build.sh b/script/build.sh
new file mode 100644
index 0000000..461a292
--- /dev/null
+++ b/script/build.sh
@@ -0,0 +1,91 @@
+#!/bin/bash -e
+
+PLATFORM=${PLATFORM:-SPR}
+IMAGEARCH=${IMAGEARCH:-linux/amd64}
+BACKEND=${BACKEND:-docker}
+RELEASE=${RELEASE:-:latest}
+
+with_arch () {
+    if [[ "$IMAGEARCH" = "linux/amd64" ]]; then
+        echo $1
+    else
+        echo $1-${IMAGEARCH/*\//}
+    fi
+}
+
+docker_push () {
+    case "$1" in
+    *.dkr.ecr.*.amazonaws.com/*)
+        REGISTRY= "$SCRIPT/cumulus/shell.sh" aws -v "$SCRIPT/cumulus:/mnt:ro" -- /mnt/script/create-private-repository-aws.sh $1 || true
+        ;;
+    esac
+    docker -D push $1
+}
+
+# template substitution
+if [[ "$DIR" = */workload/* ]]; then
+    find "$DIR" -name "*.m4" ! -name "*-config.yaml.m4" ! -path "*/template/*" -exec /bin/bash -c 'f="{}" && cd "'$DIR'" && m4 -Itemplate -I../../template -DPLATFORM='$PLATFORM' -DIMAGEARCH='$IMAGEARCH' -DWORKLOAD='$WORKLOAD' -DREGISTRY='$REGISTRY' -DBACKEND='$BACKEND' -DRELEASE='$RELEASE' "$f" > "${f/.m4/}"' \;
+elif [[ "$DIR" = */stack/* ]]; then
+    find "$DIR" -name "*.m4" ! -name "*-config.yaml.m4" ! -path "*/template/*" -exec /bin/bash -c 'f="{}" && cd "'$DIR'" && m4 -Itemplate -I../../template -DPLATFORM='$PLATFORM' -DIMAGEARCH='$IMAGEARCH' -DSTACK='$STACK' -DREGISTRY='$REGISTRY' -DBACKEND='$BACKEND' -DRELEASE='$RELEASE' "$f" > "${f/.m4/}"' \;
+fi
+
+if [ "${#DOCKER_CONTEXT[@]}" -eq 0 ]; then
+    DOCKER_CONTEXT=("${DOCKER_CONTEXT:-.}")
+fi
+
+if [ "$1" = "--bom" ]; then
+    [[ "$DIR" = *"/workload/"* ]] && echo "# ${DIR/*\/workload/workload}"
+    [[ "$DIR" = *"/stack/"* ]] && echo "# ${DIR/*\/stack/stack}"
+    for dc in "${DOCKER_CONTEXT[@]}"; do
+        for pat in '.9.*' '.8.*' '.7.*' '.6.*' '.5.*' '.4.*' '.3.*' '.2.*' '.1.*' ''; do
+            find "$DIR/$dc" -maxdepth 1 -mindepth 1 -name "Dockerfile$pat" $FIND_OPTIONS ! -name "*.m4" -print 2> /dev/null | (
+                while IFS= read df; do
+                    image=$(with_arch $(head -n 2 "$df" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f2))
+                    header=$(head -n 2 "$df" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f1)
+                    [ -n "$REGISTRY" ] && [ "$header" = "#" ] && image="$REGISTRY$image$RELEASE"
+                    echo "$header image: $image"
+                    while IFS= read line; do
+                        if [[ "$line" = "ARG "*=* ]]; then
+                            var="$(echo ${line/ARG /} | tr -d '"' | cut -f1 -d=)"
+                            value="$(echo ${line/ARG /} | tr -d '"' | cut -f2- -d=)"
+                            eval "$var=\"$value\""
+                            eval "value=\"$value\""
+
+                            case "$line" in
+                            *_VER=*|*_VERSION=*|*_REPO=*|*_REPOSITORY=*|*_IMG=*|*_IMAGE=*|*_PKG=*|*_PACKAGE=*)
+                                echo "ARG $var=$value"
+                                ;;
+                            esac
+                        fi
+                    done < "$df"
+                done
+            )
+        done
+    done
+else
+    build_options="$(env | cut -f1 -d= | grep -iE '_proxy$' | sed 's/^/--build-arg /'  | tr '\n' ' ')"
+    build_options="$build_options --build-arg RELEASE=$RELEASE"
+
+    if [ "$IMAGEARCH" != "linux/amd64" ]; then
+        build_options="$build_options --platform $IMAGEARCH"
+    fi
+
+    build_options="$build_options --build-arg BUILDKIT_INLINE_CACHE=1"
+    for dc in "${DOCKER_CONTEXT[@]}"; do
+        for pat in '.9.*' '.8.*' '.7.*' '.6.*' '.5.*' '.4.*' '.3.*' '.2.*' '.1.*' ''; do
+            for dockerfile in $(find "$DIR/$dc" -maxdepth 1 -mindepth 1 -name "Dockerfile$pat" $FIND_OPTIONS -print 2>/dev/null); do
+
+                image=$(with_arch $(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f2))
+                header=$(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f1)
+                IMAGE="$REGISTRY$image$RELEASE"
+                (
+                    cd "$DIR/$dc"
+                    DOCKER_BUILDKIT=1 docker build $BUILD_OPTIONS $build_options --cache-from $REGISTRY$image$RELEASE -t $image -t $image$RELEASE $([ -n "$REGISTRY" ] && [ "$header" = "#" ] && echo -t $IMAGE) -f "$dockerfile" .
+                )
+                if [ -n "$REGISTRY" ] && [ "$header" = "#" ]; then
+                    docker_push $IMAGE
+                fi
+            done
+        done
+    done
+fi
diff --git a/script/check-license.sh b/script/check-license.sh
new file mode 100755
index 0000000..37edcbc
--- /dev/null
+++ b/script/check-license.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+
+if [ ${#@} = 0 ]; then
+    echo "Usage: <cache-file> <name>:<text> ..."
+    exit 3
+fi
+
+check_access () {
+    printf -- "$1\n"
+    reply='n'
+    read -p 'Type "accept" to proceed or anything else to skip: '
+    [ "$REPLY" = "accept" ] || [ "$REPLY" = "ACCEPT" ]
+}
+
+check_history () {
+    grep -qFx "$1" "$2" 2> /dev/null
+}
+
+cache="$1"
+shift
+while [ ${#@} -gt 0 ]; do
+    name="${1/:*/}"
+    text="${1#$name:}"
+    shift
+    if check_history "$name" "$cache"; then
+        access=OK
+    elif check_access "$text"; then
+        access=OK
+        echo "$name" >> "$cache"
+    else
+        access=denied
+    fi
+    if [ $access != OK ]; then
+        echo
+        echo "Access to $name denied. Build aborted."
+        echo
+        exit 1
+    fi
+done 
+exit 0
diff --git a/script/component.cmake b/script/component.cmake
new file mode 100644
index 0000000..238eecb
--- /dev/null
+++ b/script/component.cmake
@@ -0,0 +1,90 @@
+
+function(check_license name license_text)
+    if(NOT "/${CMAKE_CURRENT_SOURCE_DIR}//" MATCHES "/[^/]*${BENCHMARK}[^/]*/")
+        return()
+    endif()
+
+    if(ENABLE_BUILD)
+        if (NOT ";${license_list}" MATCHES ";${name}:")
+            set(license_list "${license_list};${name}:${license_text}" PARENT_SCOPE)
+        endif()
+    endif()
+endfunction()
+
+function(add_component_build type name)
+    set(component ${name} PARENT_SCOPE)
+
+    if(NOT "/${CMAKE_CURRENT_SOURCE_DIR}//" MATCHES "/[^/]*${BENCHMARK}[^/]*/")
+        return()
+    elseif(BENCHMARK)
+        message("BENCHMARK enabled ${type} ${name}")
+    endif()
+
+    set(license_reqs "")
+    set(sut_reqs "")
+    set(req_mode "LICENSE")
+    foreach(arg1 ${ARGN})
+        if(arg1 STREQUAL "LICENSE")
+            set(req_mode "LICENSE")
+        elseif(arg1 STREQUAL "SUT")
+            set(req_mode "SUT")
+        else()
+            if(req_mode STREQUAL "LICENSE")
+                foreach(item ${license_list})
+                    if((item MATCHES "^${arg1}:") AND (NOT ACCEPT_LICENSE STREQUAL "ALL") AND (NOT " ${ACCEPT_LICENSE} " MATCHES " ${arg1} "))
+                        set(license_reqs "${license_reqs} '${item}'")
+                    endif()
+                endforeach()
+            elseif(req_mode STREQUAL "SUT")
+                set(sut_reqs "${sut_reqs};${arg1}")
+            endif()
+        endif()
+    endforeach()
+    set(sut_reqs "${sut_reqs}" PARENT_SCOPE)
+    string(REPLACE "\n" "\\n" license_reqs "${license_reqs}")
+
+    string(TOUPPER ${type} typeu)
+    add_custom_target(bom_${name} COMMAND bash -c "echo BOM of ${PLATFORM}/${name}:" COMMAND bash -c "PLATFORM=${PLATFORM} IMAGEARCH=${IMAGEARCH} ${typeu}=${name} BACKEND=${BACKEND} RELEASE=${RELEASE} REGISTRY=${REGISTRY} SCRIPT='${PROJECT_SOURCE_DIR}/script' '${CMAKE_CURRENT_SOURCE_DIR}/build.sh' --bom" VERBATIM)
+    add_dependencies(bom bom_${name})
+
+    if(ENABLE_BUILD)
+        add_custom_target(build_${name} ALL COMMAND bash -c "'${PROJECT_SOURCE_DIR}/script/check-license.sh' '${CMAKE_CURRENT_BINARY_DIR}/.check-license' ${license_reqs} && PLATFORM=${PLATFORM} IMAGEARCH=${IMAGEARCH} ${typeu}=${name} BACKEND=${BACKEND} RELEASE=${RELEASE} REGISTRY=${REGISTRY} SCRIPT='${PROJECT_SOURCE_DIR}/script' '${CMAKE_CURRENT_SOURCE_DIR}/build.sh'" VERBATIM)
+        set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/.check-license")
+
+        if(COMMAND add_backend_dependencies)
+            add_backend_dependencies(${type} ${name})
+        endif()
+
+        if(NOT IMAGEARCH STREQUAL "linux/amd64")
+            add_dependencies(build_${name} build_march)
+        endif()
+    endif()
+    execute_process(COMMAND bash -c "ln -s -r -f '${PROJECT_SOURCE_DIR}'/script/benchmark/*.sh ." WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
+endfunction()
+
+function(add_component_testcase type component name)
+    if(NOT "/${CMAKE_CURRENT_SOURCE_DIR}//" MATCHES "/[^/]*${BENCHMARK}[^/]*/")
+        return()
+    endif()
+
+    function(add_testcase_1 name backend)
+        string(REPLACE ";" " " argstr "${ARGN}")
+
+        string(TOUPPER ${type} typeu)
+        add_test(NAME test_${name} COMMAND bash -c "rm -rf $TEST_PREFIX''logs-${name} && mkdir -p $TEST_PREFIX''logs-${name} && cd $TEST_PREFIX''logs-${name} && TESTCASE=test_${name} PLATFORM=${PLATFORM} IMAGEARCH=${IMAGEARCH} ${typeu}=${component} RELEASE=${RELEASE} REGISTRY=${REGISTRY} TIMEOUT=${TIMEOUT} ${backend} SCRIPT='${PROJECT_SOURCE_DIR}/script' REGISTRY_AUTH=${REGISTRY_AUTH} '${CMAKE_CURRENT_SOURCE_DIR}/validate.sh' ${argstr}" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}")
+
+        add_custom_target(kpi_${component}_${name} COMMAND "${CMAKE_SOURCE_DIR}/script/make-kpi.sh" "logs-${name}" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" VERBATIM)
+        add_dependencies(kpi kpi_${component}_${name})
+
+        set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/logs-${name}")
+    endfunction()
+
+    if(COMMAND add_backend_testcase)
+        add_backend_testcase(${type} ${component} ${name} ${ARGN})
+    else()
+        if(NOT sut_reqs)
+            add_testcase_1(${name} "BACKEND=${BACKEND}" ${ARGN})
+        endif()
+    endif()
+endfunction()
+
diff --git a/script/cumulus.cmake b/script/cumulus.cmake
new file mode 100644
index 0000000..81e5289
--- /dev/null
+++ b/script/cumulus.cmake
@@ -0,0 +1,53 @@
+
+function(show_backend_settings)
+    message("-- Setting: CUMULUS_OPTIONS=${CUMULUS_OPTIONS}")
+    message("-- Setting: CUMULUS_SUT=${CUMULUS_SUT}")
+endfunction()
+
+function(add_backend_dependencies type name)
+    add_dependencies(build_${name} build_cumulus)
+endfunction()
+
+function(add_backend_testcase type component name)
+    if(";${sut_reqs};" STREQUAL ";;")
+        string(REPLACE " " ";" cumulus_sut "${CUMULUS_SUT}")
+        set(sut_list "")
+        foreach(sut1 ${cumulus_sut})
+            if(NOT sut1 MATCHES "^-")
+                set(sut_list "${sut_list};${sut1}")
+            endif()
+        endforeach()
+    else()
+        set(sut_list "${sut_reqs}")
+    endif()
+    foreach(sut1 ${sut_list})
+        if(sut1 AND (" ${CUMULUS_SUT} " MATCHES " ${sut1} "))
+            string(REGEX REPLACE "^-" "" sut2 "${sut1}")
+            add_testcase_1("${sut2}_${name}" "BACKEND=${BACKEND} CUMULUS_OPTIONS='${CUMULUS_OPTIONS}' CUMULUS_CONFIG_IN='${PROJECT_SOURCE_DIR}/script/cumulus/cumulus-config.${sut1}.yaml'" ${ARGN})
+        endif()
+    endforeach()
+endfunction()
+
+################ TOP-LEVEL-CMAKE ###########################
+
+execute_process(COMMAND bash -c "echo \"\"$(find -name 'cumulus-config.*.yaml' | sed 's|.*/cumulus-config.\\(.*\\).yaml$|\\1|')" OUTPUT_VARIABLE sut_all OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/script/cumulus")
+execute_process(COMMAND bash -c "echo \"\"$(find -name 'cumulus-config.*.yaml' ! -exec grep -q cloud: '{}' \\; -print | sed 's|.*/cumulus-config.\\(.*\\).yaml$|\\1|')" OUTPUT_VARIABLE CUMULUS_SUT_STATIC OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/script/cumulus")
+
+if (NOT CUMULUS_OPTIONS)
+    set(CUMULUS_OPTIONS "--docker-run --nosvrinfo")
+endif()
+
+if ((NOT DEFINED CUMULUS_SUT) OR (CUMULUS_SUT STREQUAL ""))
+    set(CUMULUS_SUT "${sut_all}")
+endif()
+
+string(REPLACE " " ";" configs "${CUMULUS_SUT}")
+foreach(config ${configs})
+    if(NOT " ${sut_all} " MATCHES "${config}")
+        message(FATAL_ERROR "Failed to locate cumulus config: ${config}")
+    endif()
+endforeach()
+
+add_subdirectory(script/cumulus)
+execute_process(COMMAND bash -c "ln -s -r -f '${PROJECT_SOURCE_DIR}'/script/cumulus/script/setup-sut.sh ." WORKING_DIRECTORY "${CMAKE_BINARY_DIR}")
+
diff --git a/script/cumulus/CMakeLists.txt b/script/cumulus/CMakeLists.txt
new file mode 100644
index 0000000..c1344cc
--- /dev/null
+++ b/script/cumulus/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(ENABLE_BUILD)
+    add_custom_target(build_cumulus bash -c "PLATFORM=X64 IMAGEARCH=linux/amd64 BACKEND=${BACKEND} RELEASE=${RELEASE} REGISTRY=${REGISTRY} CUMULUS_SUT='${CUMULUS_SUT}' CUMULUS_OPTIONS='${CUMULUS_OPTIONS}' SCRIPT='${PROJECT_SOURCE_DIR}/script' '${CMAKE_CURRENT_SOURCE_DIR}/build.sh'")
+else()
+    add_custom_target(build_cumulus)
+endif()
+
+execute_process(COMMAND bash -c "mkdir -p '${CMAKE_CURRENT_SOURCE_DIR}/.docker' > /dev/null; touch '${CMAKE_CURRENT_SOURCE_DIR}/.gitconfig' 2>/dev/null; chmod 600 '${CMAKE_CURRENT_SOURCE_DIR}'/ssh_config" OUTPUT_QUIET ERROR_QUIET)
+
+foreach(cloud aws gcp azure tencent alicloud)
+    add_custom_target(${cloud} COMMAND bash -c "REGISTRY=${REGISTRY} RELEASE=${RELEASE} '${CMAKE_CURRENT_SOURCE_DIR}/shell.sh' ${cloud} -v '${CMAKE_CURRENT_SOURCE_DIR}:/home' -v '${CMAKE_CURRENT_SOURCE_DIR}:/root' -t -- bash || true" VERBATIM)
+endforeach()
+
diff --git a/script/cumulus/Dockerfile.1.alicloud b/script/cumulus/Dockerfile.1.alicloud
new file mode 100644
index 0000000..9d66884
--- /dev/null
+++ b/script/cumulus/Dockerfile.1.alicloud
@@ -0,0 +1,14 @@
+# cumulus-alicloud
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-cloud${RELEASE}
+
+ARG ALIYUN_CLI_VER=3.0.104
+ARG ALIYUN_CLI_PACKAGE=https://github.com/aliyun/aliyun-cli/releases/download/v${ALIYUN_CLI_VER}/aliyun-cli-linux-${ALIYUN_CLI_VER}-amd64.tgz
+RUN curl -L ${ALIYUN_CLI_PACKAGE} | tar -xz -C /usr/local/bin
+
+# Add cleanup script
+COPY script/cleanup-alicloud.sh /usr/local/bin/cleanup
diff --git a/script/cumulus/Dockerfile.1.aws b/script/cumulus/Dockerfile.1.aws
new file mode 100644
index 0000000..a32ff9b
--- /dev/null
+++ b/script/cumulus/Dockerfile.1.aws
@@ -0,0 +1,14 @@
+# cumulus-aws
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-cloud${RELEASE}
+
+# Install AWS CLI 
+RUN python3 -m pip install --no-cache-dir -r /PerfKitBenchmarker/perfkitbenchmarker/providers/aws/requirements.txt
+
+# Add cleanup script
+COPY script/cleanup-aws.sh /usr/local/bin/cleanup
+
diff --git a/script/cumulus/Dockerfile.1.azure b/script/cumulus/Dockerfile.1.azure
new file mode 100644
index 0000000..f5c8c07
--- /dev/null
+++ b/script/cumulus/Dockerfile.1.azure
@@ -0,0 +1,19 @@
+# cumulus-azure
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-cloud${RELEASE}
+
+# Install AZure CLI
+RUN apt-get update && apt-get install -y ca-certificates curl apt-transport-https gnupg && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN curl -sL https://packages.microsoft.com/keys/microsoft.asc | \
+    gpg --dearmor > /etc/apt/trusted.gpg.d/microsoft.gpg && \
+    echo "deb [arch=amd64] https://packages.microsoft.com/repos/azure-cli/ $(grep VERSION_CODENAME /etc/os-release | cut -f2 -d=) main" > /etc/apt/sources.list.d/azure-cli.list
+RUN apt-get update && apt-get install -y azure-cli && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Add clean script
+COPY script/cleanup-azure.sh /usr/local/bin/cleanup
diff --git a/script/cumulus/Dockerfile.1.gcp b/script/cumulus/Dockerfile.1.gcp
new file mode 100644
index 0000000..adc5c1e
--- /dev/null
+++ b/script/cumulus/Dockerfile.1.gcp
@@ -0,0 +1,18 @@
+# cumulus-gcp
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-cloud${RELEASE}
+
+# Install GCP CLI
+RUN apt-get update && apt-get install -y apt-transport-https ca-certificates gnupg && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+RUN curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key --keyring /usr/share/keyrings/cloud.google.gpg add - && \
+    echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list
+RUN apt-get update && apt-get install -y google-cloud-cli && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Add cleanup script
+COPY script/cleanup-gcp.sh /usr/local/bin/cleanup
diff --git a/script/cumulus/Dockerfile.1.tencent b/script/cumulus/Dockerfile.1.tencent
new file mode 100644
index 0000000..ae547f8
--- /dev/null
+++ b/script/cumulus/Dockerfile.1.tencent
@@ -0,0 +1,13 @@
+# cumulus-tencent
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-cloud${RELEASE}
+
+# Install TCCLI 
+RUN python3 -m pip install --no-cache-dir -r /PerfKitBenchmarker/perfkitbenchmarker/providers/tencent/requirements.txt
+
+# Add cleanup script
+COPY script/cleanup-tencent.sh /usr/local/bin/cleanup
diff --git a/script/cumulus/Dockerfile.2.cloud b/script/cumulus/Dockerfile.2.cloud
new file mode 100644
index 0000000..1373b50
--- /dev/null
+++ b/script/cumulus/Dockerfile.2.cloud
@@ -0,0 +1,11 @@
+## cumulus-cloud
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG RELEASE
+FROM cumulus-static${RELEASE}
+
+# Install docker CLI
+COPY --from=docker:20.10.17-dind /usr/local/bin/docker /usr/local/bin/
+
diff --git a/script/cumulus/Dockerfile.3.static b/script/cumulus/Dockerfile.3.static
new file mode 100644
index 0000000..6f69b29
--- /dev/null
+++ b/script/cumulus/Dockerfile.3.static
@@ -0,0 +1,56 @@
+# syntax=docker/dockerfile:1
+# cumulus-static
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG OS_VER=22.04
+ARG OS_IMAGE=ubuntu
+
+FROM ${OS_IMAGE}:${OS_VER}
+## Security updates
+RUN apt-get update && apt-get upgrade -y libsqlite3-0 rsync perl-modules libgnutls30 tar passwd libpcre2-8-0 libpcre3 patch openssh-client libtinfo6 libk5crypto3 libgmp10 libc-bin git-man coreutils && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+## Prerequisites
+RUN apt-get update && apt-get install -y curl git rsync bzip2 openssl skopeo sudo file && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Install miniconda
+ARG CONDA_DIR=/opt/conda
+ARG CONDA_VER=py39_4.12.0
+ARG CONDA_REPO=https://repo.anaconda.com/miniconda
+RUN curl -o ~/miniconda.sh ${CONDA_REPO}/Miniconda3-${CONDA_VER}-Linux-x86_64.sh && bash ~/miniconda.sh -b -p ${CONDA_DIR} && rm -f ~/miniconda.sh
+
+# Put conda in path so we can use python3 and pip3 installed in CONDA_DIR
+ENV PATH=${CONDA_DIR}/bin:$PATH
+
+## Cumulus patches
+COPY pkb/ /PerfKitBenchmarker/
+RUN python3 -m pip install --no-cache-dir -r PerfKitBenchmarker/requirements.txt kazoo paramiko lxml
+WORKDIR /home
+
+## svrinfo NDA
+## Download the package to script/cumulus/pkb/data/svrinfo
+RUN [ -e /PerfKitBenchmarker/perfkitbenchmarker/data/svrinfo/svrinfo* ] && \
+    sed -i "/DEFINE_string('svrinfo_tarball'/s|None|'$(ls -1 /PerfKitBenchmarker/perfkitbenchmarker/data/svrinfo/svrinfo*)'|" /PerfKitBenchmarker/perfkitbenchmarker/traces/svrinfo.py || true
+
+####
+ARG USER=pkb
+ARG GROUP=pkb
+RUN groupadd -f ${GROUP} && useradd -d /home -M ${USER} -g ${GROUP}
+RUN echo "${USER} ALL=(ALL:ALL) NOPASSWD: ALL" >> /etc/sudoers
+RUN groupadd -f docker && usermod -aG docker pkb
+####
+
+RUN apt-get update && apt-get install -y netcat gawk inetutils-ping && \
+    apt-get clean && rm -rf /var/lib/apt/lists/*
+
+# Install gosu
+ARG GOSU_VER=1.14
+ARG GOSU_REPO=https://github.com/tianon/gosu/releases/download/${GOSU_VER}/gosu-amd64
+RUN curl -o /usr/local/bin/gosu -SL ${GOSU_REPO} && \
+    curl -o /usr/local/bin/gosu.asc ${GOSU_REPO}.asc && \
+    chmod +x /usr/local/bin/gosu
+
+# entry point
+COPY entrypoint.sh /
+ENTRYPOINT [ "/entrypoint.sh" ]
diff --git a/script/cumulus/build.sh b/script/cumulus/build.sh
new file mode 100755
index 0000000..6ce02e8
--- /dev/null
+++ b/script/cumulus/build.sh
@@ -0,0 +1,18 @@
+#!/bin/bash -e
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+
+clouds="$(
+for x in $CUMULUS_SUT; do 
+    grep cloud: "$DIR"/cumulus-config.$x.yaml
+done | awk '{a[$NF]=1}END{for(x in a)print x}'
+)"
+
+FIND_OPTIONS="-name Dockerfile.*.static"
+[[ "$clouds" = *AWS* ]] && FIND_OPTIONS="$FIND_OPTIONS -o -name Dockerfile.*.aws -o -name Dockerfile.*.cloud"
+[[ "$clouds" = *GCP* ]] && FIND_OPTIONS="$FIND_OPTIONS -o -name Dockerfile.*.gcp -o -name Dockerfile.*.cloud"
+[[ "$clouds" = *Azure* ]] && FIND_OPTIONS="$FIND_OPTIONS -o -name Dockerfile.*.azure -o -name Dockerfile.*.cloud"
+[[ "$clouds" = *Tencent* ]] && FIND_OPTIONS="$FIND_OPTIONS -o -name Dockerfile.*.tencent -o -name Dockerfile.*.cloud"
+[[ "$clouds" = *AliCloud* ]] && FIND_OPTIONS="$FIND_OPTIONS -o -name Dockerfile.*.alicloud -o -name Dockerfile.*.cloud"
+FIND_OPTIONS="( $FIND_OPTIONS )"
+. $DIR/../build.sh
diff --git a/script/cumulus/cumulus-config.alicloud.yaml b/script/cumulus/cumulus-config.alicloud.yaml
new file mode 100644
index 0000000..099d6bd
--- /dev/null
+++ b/script/cumulus/cumulus-config.alicloud.yaml
@@ -0,0 +1,57 @@
+cloud_worker: &cloud_worker
+  AliCloud:
+    machine_type: ecs.g7.large
+# https://ecs-buy.aliyun.com/instanceTypes
+    zone: cn-shanghai-m
+  
+cloud_disk_mount_1: &cloud_disk_mount_1
+  AliCloud:
+    mount_point: /mnt/disk1
+    disk_type: ephemeral_ssd
+    disk_size: 500
+    num_striped_disks: 1
+
+cloud_controller: &cloud_controller
+  AliCloud:
+    machine_type: ecs.g7.large
+# https://ecs-buy.aliyun.com/instanceTypes
+    zone: cn-shanghai-m
+  
+docker_pt:
+  vm_groups:
+    worker:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_worker
+      disk_spec: *cloud_disk_mount_1
+    controller:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_controller
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "28800,300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    ali_system_disk_size: 500
+    ali_system_disk_type: cloud_essd
+    ssh_options: "-o TCPKeepAlive=yes"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
+    docker_dist_repo: "http://mirrors.aliyun.com/docker-ce/linux/ubuntu"
+    docker_registry_mirrors: "https://registry.cn-hangzhou.aliyuncs.com"
+    k8s_repo_key_url: "http://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg"
+    k8s_repo_url: "http://mirrors.aliyun.com/kubernetes/apt"
+    k8s_kubeadm_options: "--image-repository=registry.aliyuncs.com/google_containers"
+    k8s_image_mirrors: "docker.io/xmchen/node-feature-discovery:v0.10.1,k8s.gcr.io/nfd/node-feature-discovery:v0.10.1"
+    enable_rsync: true
+    cloud: AliCloud
diff --git a/script/cumulus/cumulus-config.aws.yaml b/script/cumulus/cumulus-config.aws.yaml
new file mode 100644
index 0000000..eb7ac95
--- /dev/null
+++ b/script/cumulus/cumulus-config.aws.yaml
@@ -0,0 +1,49 @@
+cloud_worker: &cloud_worker
+  AWS:
+    machine_type: m5.16xlarge
+    zone: us-east-2a
+    boot_disk_size: 500
+  
+cloud_disk_mount_1: &cloud_disk_mount_1
+  AWS:
+    mount_point: /mnt/disk1
+    disk_type: io2
+    disk_size: 512
+    iops: 25600
+    num_striped_disks: 1
+
+cloud_controller: &cloud_controller
+  AWS:
+    machine_type: m5.4xlarge
+    zone: us-east-2a
+    boot_disk_size: 500
+
+docker_pt:
+  vm_groups:
+    worker:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_worker
+      disk_spec: *cloud_disk_mount_1
+    controller:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_controller
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "28800,300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    ssh_options: "-o TCPKeepAlive=yes"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
+    cloud: AWS
diff --git a/script/cumulus/cumulus-config.azure.yaml b/script/cumulus/cumulus-config.azure.yaml
new file mode 100644
index 0000000..a3d8539
--- /dev/null
+++ b/script/cumulus/cumulus-config.azure.yaml
@@ -0,0 +1,48 @@
+cloud_worker: &cloud_worker
+  Azure:
+    machine_type: Standard_D8s_v3
+    zone: eastus
+    boot_disk_size: 500
+  
+cloud_disk_mount_1: &cloud_disk_mount_1
+  Azure:
+    mount_point: /mnt/disk1
+    disk_type: Standard_LRS
+    disk_size: 500
+    num_striped_disks: 1
+
+cloud_controller: &cloud_controller
+  Azure:
+    machine_type: Standard_D8s_v3
+    zone: eastus
+    boot_disk_size: 500
+  
+docker_pt:
+  vm_groups:
+    worker:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_worker
+      disk_spec: *cloud_disk_mount_1
+    controller:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_controller
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "28800,300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    ssh_options: "-o TCPKeepAlive=yes"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
+    cloud: Azure
diff --git a/script/cumulus/cumulus-config.gcp.yaml b/script/cumulus/cumulus-config.gcp.yaml
new file mode 100644
index 0000000..840a8da
--- /dev/null
+++ b/script/cumulus/cumulus-config.gcp.yaml
@@ -0,0 +1,51 @@
+cloud_worker: &cloud_worker
+  GCP:
+    machine_type: n2-standard-16
+    zone: us-west1-b
+  
+cloud_disk_mount_1: &cloud_disk_mount_1
+  GCP:
+    mount_point: /mnt/disk1
+    disk_type: pd-ssd
+    disk_size: 500
+    num_striped_disks: 1
+
+cloud_controller: &cloud_controller
+  GCP:
+    machine_type: n2-standard-16
+    zone: us-west1-b
+  
+docker_pt:
+  vm_groups:
+    worker:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_worker
+      disk_spec: *cloud_disk_mount_1
+    controller:
+      vm_count: 1
+      os_type: "ubuntu2204"
+      vm_spec: *cloud_controller
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "28800,300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    gce_subnet_region: us-west1
+    gce_boot_disk_size: "500"
+# The GCP higher bandwidth feature works with N2, N2D, C2, or C2D series && >=32 VCPUs.
+#    gce_egress_bandwidth_tier: "TIER_1"
+#    gce_nic_type: "GVNIC"
+    ssh_options: "-o TCPKeepAlive=yes"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
+    cloud: GCP
diff --git a/script/cumulus/cumulus-config.static.yaml b/script/cumulus/cumulus-config.static.yaml
new file mode 100644
index 0000000..db2c5ac
--- /dev/null
+++ b/script/cumulus/cumulus-config.static.yaml
@@ -0,0 +1,48 @@
+static_vms:
+  - &vm1
+    ip_address: 10.219.170.167
+    user_name: raspadmin
+    ssh_private_key: ~/.ssh/id_rsa
+    internal_ip: 10.219.170.167
+    ssh_port: 22
+    install_packages: false
+    os_type: "centos8"
+
+vm_spec: &default_single_core
+  GCP:
+    machine_type: n1-standard-1
+
+docker_pt:
+  vm_groups:
+    worker:
+      vm_spec: *default_single_core
+      vm_count: 1
+      os_type: "centos8"
+      static_vms:
+        - *vm1
+    controller:
+      vm_spec: *default_single_core
+      vm_count: 1
+      os_type: "centos8"
+      static_vms:
+        - *vm1
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "28800,300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    dpt_trace_mode: ""
+# enable the following flags to speed up svrinfo, only for functional validation
+#    svrinfo_flags: "--format all"
+    trace_vm_groups: "worker"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
diff --git a/script/cumulus/cumulus-config.tencent.yaml b/script/cumulus/cumulus-config.tencent.yaml
new file mode 100644
index 0000000..8de80a4
--- /dev/null
+++ b/script/cumulus/cumulus-config.tencent.yaml
@@ -0,0 +1,55 @@
+cloud_worker: &cloud_worker
+  Tencent:
+    machine_type: S5.LARGE8
+    zone: ap-shanghai-2
+  
+cloud_disk_mount_1: &cloud_disk_mount_1
+  Tencent:
+    mount_point: /mnt/disk1
+    disk_type: LOCAL_SSD
+    disk_size: 500
+    num_striped_disks: 1
+
+cloud_controller: &cloud_controller
+  Tencent:
+    machine_type: S5.LARGE8
+    zone: ap-shanghai-2
+  
+docker_pt:
+  vm_groups:
+    worker:
+      vm_count: 1
+      os_type: "ubuntu2004"
+      vm_spec: *cloud_worker
+      disk_spec: *cloud_disk_mount_1
+    controller:
+      vm_count: 1
+      os_type: "ubuntu2004"
+      vm_spec: *cloud_controller
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: ""
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_logs_dir: ""
+    dpt_timeout: ""
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_registry_map: ""
+    dpt_namespace: ""
+    tencent_boot_disk_size: "500"
+    tencent_boot_disk_type: "CLOUD_SSD"
+    ssh_options: "-o TCPKeepAlive=yes"
+    sar_flags: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r ALL -u ALL -P ALL -v -W -w"
+    docker_dist_repo: "http://mirrors.aliyun.com/docker-ce/linux/ubuntu"
+    docker_registry_mirrors: "https://registry.cn-hangzhou.aliyuncs.com"
+    k8s_repo_key_url: "http://mirrors.aliyun.com/kubernetes/apt/doc/apt-key.gpg"
+    k8s_repo_url: "http://mirrors.aliyun.com/kubernetes/apt"
+    k8s_kubeadm_options: "--image-repository=registry.aliyuncs.com/google_containers"
+    k8s_image_mirrors: "docker.io/xmchen/node-feature-discovery:v0.10.1,k8s.gcr.io/nfd/node-feature-discovery:v0.10.1"
+    enable_rsync: true
+    cloud: Tencent
diff --git a/script/cumulus/entrypoint.sh b/script/cumulus/entrypoint.sh
new file mode 100755
index 0000000..cbe6943
--- /dev/null
+++ b/script/cumulus/entrypoint.sh
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+if [ -n "$DOCKER_GID" ]; then
+    if grep -q -E '^docker:' /etc/group; then
+        if [ "$DOCKER_GID" != "$(getent group docker | cut -f3 -d:)" ]; then
+            groupmod -g $DOCKER_GID -o docker > /dev/null || true
+        fi
+    fi
+fi
+
+if [ -n "$PKB_GID" ]; then
+    if [ "$PKB_GID" != "$(id -g pkb)" ]; then
+        groupmod -g $PKB_GID -o pkb > /dev/null || true
+    fi
+    if [ -n "$PKB_UID" ]; then
+        if [ "$PKB_UID" != "$(id -u pkb)" ]; then
+            usermod -u $PKB_UID -g $PKB_GID -o pkb > /dev/null || true
+        fi
+    fi
+fi
+
+####INSERT####
+exec gosu pkb "$@"
diff --git a/script/cumulus/pkb/perfkitbenchmarker/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/__init__.py
new file mode 100644
index 0000000..181b117
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Import absl.app."""
+from absl import app
diff --git a/script/cumulus/pkb/perfkitbenchmarker/app_service.py b/script/cumulus/pkb/perfkitbenchmarker/app_service.py
new file mode 100644
index 0000000..32984e1
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/app_service.py
@@ -0,0 +1,175 @@
+"""Module containing class for BaseAppService and BaseAppServiceSpec."""
+import threading
+import time
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('appservice', None,
+                    'Type of app service. e.g. AppEngine')
+flags.DEFINE_string('appservice_region', None,
+                    'Region of deployed app service.')
+flags.DEFINE_string('appservice_backend', None,
+                    'Backend instance type of app service uses.')
+flags.DEFINE_string('app_runtime', None,
+                    'Runtime environment of app service uses. '
+                    'e.g. python, java')
+flags.DEFINE_string('app_type', None,
+                    'Type of app packages builders should built.')
+flags.DEFINE_integer('appservice_count', 1,
+                     'Copies of applications to launch.')
+
+
+def GetAppServiceSpecClass(service):
+  return spec.GetSpecClass(
+      BaseAppServiceSpec, SERVICE=service)
+
+
+class BaseAppServiceSpec(spec.BaseSpec):
+  """Storing various data about app service."""
+
+  SPEC_TYPE = 'BaseAppServiceSpec'
+  SPEC_ATTRS = ['SERVICE']
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    super(BaseAppServiceSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['appservice_region'] .present:
+      config_values['appservice_region'] = flag_values.appservice_region
+    if flag_values['appservice_backend'].present:
+      config_values['appservice_backend'] = flag_values.appservice_backend
+    if flag_values['appservice'].present:
+      config_values['appservice'] = flag_values.appservice
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    result = super(BaseAppServiceSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'appservice_region': (option_decoders.StringDecoder, {
+            'default': None, 'none_ok': True}),
+        'appservice_backend': (option_decoders.StringDecoder, {
+            'default': None, 'none_ok': True}),
+        'appservice': (option_decoders.StringDecoder, {
+            'default': None, 'none_ok': True})
+    })
+    return result
+
+
+def GetAppServiceClass(service):
+  return resource.GetResourceClass(
+      BaseAppService, SERVICE=service)
+
+
+class BaseAppService(resource.BaseResource):
+  """Base class for representing an App instance."""
+
+  RESOURCE_TYPE = 'BaseAppService'
+  REQUIRED_ATTRS = ['SERVICE']
+  POLL_INTERVAL = 1
+
+  _appservice_counter = 0
+  _appservice_counter_lock = threading.Lock()
+
+  def __init__(self, base_app_service_spec):
+    super(BaseAppService, self).__init__()
+    with self._appservice_counter_lock:
+      self.appservice_number = self._appservice_counter
+      self.name = 'pkb-%s-%s' % (FLAGS.run_uri, self.appservice_number)
+      BaseAppService._appservice_counter += 1
+    self.region = base_app_service_spec.appservice_region
+    self.backend = base_app_service_spec.appservice_backend
+    self.builder = None
+    # update metadata
+    self.metadata.update({'backend': self.backend,
+                          'region': self.region,
+                          'concurrency': 'default'})
+    self.samples = []
+
+  def _UpdateDependencies(self):
+    """Update dependencies for AppService."""
+    self.builder.Mutate()
+
+  def _Update(self):
+    raise NotImplementedError()
+
+  def Update(self):
+    """Update a deployed app instance."""
+
+    @vm_util.Retry(poll_interval=self.POLL_INTERVAL, fuzz=0,
+                   timeout=self.READY_TIMEOUT,
+                   retryable_exceptions=(
+                       errors.Resource.RetryableCreationError,))
+    def WaitUntilReady():
+      if not self._IsReady():
+        raise errors.Resource.RetryableCreationError('Not yet ready')
+
+    if self.user_managed:
+      return
+    self._UpdateDependencies()
+    self.update_start_time = time.time()
+    self._Update()
+    self.update_end_time = time.time()
+    WaitUntilReady()
+    self.update_ready_time = time.time()
+    self.samples.append(
+        sample.Sample('update latency',
+                      self.update_end_time - self.update_start_time,
+                      'seconds', {}))
+    self.samples.append(
+        sample.Sample('update ready latency',
+                      self.update_ready_time - self.update_start_time,
+                      'seconds', {}))
+
+  def Invoke(self, args=None):
+    """Invoke a deployed app instance.
+
+    Args:
+      args: dict. Arguments passed to app.
+    """
+    raise NotImplementedError()
+
+  def _CreateDependencies(self):
+    """Builds app package."""
+    if self.builder:
+      self.builder.Create()
+
+  def _DeleteDependencies(self):
+    """Delete app package."""
+    if self.builder:
+      self.builder.Delete()
+
+  def SetBuilder(self, builder=None, **kwargs):
+    """Set builder for AppService."""
+    if builder:
+      self.builder = builder
+
+  def GetLifeCycleMetrics(self):
+    """Export internal lifecycle metrics."""
+    if self.builder:
+      self.metadata.update(self.builder.GetResourceMetadata())
+
+    for s in self.samples:
+      s.metadata.update(self.metadata)
+    return self.samples
+
+  def _PostCreate(self):
+    """Method called after _CreateResource."""
+    if self.builder:
+      self.metadata.update(self.builder.GetResourceMetadata())
+
+  def Create(self):
+    super(BaseAppService, self).Create()
+    self.samples.append(
+        sample.Sample('create latency',
+                      self.create_end_time - self.create_start_time,
+                      'seconds', {}))
+    self.samples.append(
+        sample.Sample('create ready latency',
+                      self.resource_ready_time - self.create_start_time,
+                      'seconds', {}))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/archive.py b/script/cumulus/pkb/perfkitbenchmarker/archive.py
new file mode 100644
index 0000000..be62d05
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/archive.py
@@ -0,0 +1,74 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Archive a run directory to GCS or S3."""
+
+import datetime
+import logging
+import os
+import posixpath
+import subprocess
+import tarfile
+
+from perfkitbenchmarker.providers.aws.util import AWS_PATH
+
+
+def ArchiveRun(run_temp_directory, target_bucket,
+               prefix='',
+               gsutil_path='gsutil',
+               aws_path=AWS_PATH):
+  """Archive a run directory to GCS or S3.
+
+  Args:
+    run_temp_directory: str. directory to archive.
+    target_bucket: str. Either a gs:// or s3:// path to an extant bucket.
+    prefix: str. prefix for the file.
+    gsutil_path: str. Path to the gsutil tool.
+    aws_path: str. Path to the aws command line tool.
+
+  Raises:
+    ValueError: when directory or target_bucket does not exist.
+    subprocess.CalledProcessError: subprocess call failed.
+  """
+  if not os.path.isdir(run_temp_directory):
+    raise ValueError('{0} is not a directory.'.format(run_temp_directory))
+
+  tar_file_name = '{}{}.tar.gz'.format(
+      prefix, datetime.datetime.now().strftime('%Y%m%d%H%M%S'))
+
+  prefix_len = 5
+  prefixes = {
+      's3://': [aws_path, 's3', 'cp'],
+      'gs://': [gsutil_path, 'cp']
+  }
+
+  assert all(len(key) == prefix_len for key in prefixes), prefixes
+
+  try:
+    cmd = (prefixes[target_bucket[:prefix_len]] +
+           ['-', posixpath.join(target_bucket, tar_file_name)])
+  except KeyError:
+    raise ValueError('Unsupported bucket name: {0}'.format(target_bucket))
+
+  logging.info('Streaming %s to %s\n%s', run_temp_directory, tar_file_name,
+               ' '.join(cmd))
+  p = subprocess.Popen(cmd, stdin=subprocess.PIPE)
+
+  with p.stdin:
+    with tarfile.open(mode='w:gz', fileobj=p.stdin) as tar:
+      tar.add(run_temp_directory, os.path.basename(run_temp_directory))
+
+  status = p.wait()
+  if status:
+    raise subprocess.CalledProcessError(status, cmd)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/background_tasks.py b/script/cumulus/pkb/perfkitbenchmarker/background_tasks.py
new file mode 100644
index 0000000..4ac494e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/background_tasks.py
@@ -0,0 +1,687 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Background tasks that propagate PKB thread context.
+
+TODO(skschneider): Many of the threading module flaws have been corrected in
+Python 3. When PKB switches to Python 3, this module can be simplified.
+
+PKB tries its best to clean up provisioned resources upon SIGINT. By default,
+Python raises a KeyboardInterrupt upon a SIGINT, but none of the built-in
+threading module classes are designed to handle a KeyboardInterrupt very well:
+
+- threading.Lock has an atomic acquire method that cannot be interrupted and
+  hangs forever if the same thread tries to acquire twice. Its release method
+  can be called by any thread but raises thread.error if an unacquired Lock is
+  released.
+
+- More complicated classes (threading.RLock, threading.Event, threading.Thread,
+  Queue.Queue) use internal Locks in such a way that a KeyboardInterrupt can
+  cause a thread that has acquired a Lock to jump out of its current action
+  without releasing the Lock. For example, in the below code, a
+  KeyboardInterrupt can be raised immediately after the acquire call but before
+  entering the try block:
+    lock.acquire()
+    try:
+      ...
+    except:
+      lock.release()
+
+Taken together, this means that there is a possibility to leave an internal Lock
+acquired, and when later cleanup steps on the same or different thread attempt
+to acquire the Lock, they will hang forever, unresponsive to even a second
+KeyboardInterrupt. A KeyboardInterrupt during Thread.start() or Thread.join()
+can even trigger an unbalanced acquire on a global lock used to keep track of
+active threads, so that later attempts to start or join any Thread will hang
+forever.
+
+While it would take a significant and impractical redesign of PKB's code to
+completely eliminate any risk of deadlock following a KeyboardInterrupt, the
+code in this module is designed to allow interrupting parallel tasks while
+keeping the risk of deadlock low.
+"""
+
+
+import abc
+from collections import deque
+import ctypes
+import functools
+import logging
+import os
+import signal
+import threading
+import time
+import traceback
+from concurrent import futures
+
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from absl import flags
+from perfkitbenchmarker import log_util
+import six
+from six.moves import queue
+from six.moves import range
+from six.moves import zip
+
+
+# For situations where an interruptable wait is necessary, a loop of waits with
+# long timeouts is used instead. This is because some of Python's built-in wait
+# methods are non-interruptable without a timeout.
+_LONG_TIMEOUT = 1000.
+
+# Constants used for polling waits. See _WaitForCondition.
+_WAIT_MIN_RECHECK_DELAY = 0.001  # 1 ms
+_WAIT_MAX_RECHECK_DELAY = 0.050  # 50 ms
+
+# Values sent to child threads that have special meanings.
+_THREAD_STOP_PROCESSING = 0
+_THREAD_WAIT_FOR_KEYBOARD_INTERRUPT = 1
+
+# The default value for max_concurrent_threads.
+MAX_CONCURRENT_THREADS = 200
+
+# The default value is set in pkb.py. It is the greater of
+# MAX_CONCURRENT_THREADS or the value passed to --num_vms. This is particularly
+# important for the cluster_boot benchmark where we want to launch all of the
+# VMs in parallel.
+flags.DEFINE_integer(
+    'max_concurrent_threads', None, 'Maximum number of concurrent threads to '
+    'use when running a benchmark.')
+FLAGS = flags.FLAGS
+
+
+def _GetCallString(target_arg_tuple):
+  """Returns the string representation of a function call."""
+  target, args, kwargs = target_arg_tuple
+  while isinstance(target, functools.partial):
+    args = target.args + args
+    inner_kwargs = target.keywords.copy()
+    inner_kwargs.update(kwargs)
+    kwargs = inner_kwargs
+    target = target.func
+  arg_strings = [str(a) for a in args]
+  arg_strings.extend(['{0}={1}'.format(k, v) for k, v in six.iteritems(kwargs)])
+  return '{0}({1})'.format(getattr(target, '__name__', target),
+                           ', '.join(arg_strings))
+
+
+def _WaitForCondition(condition_callback, timeout=None):
+  """Waits until the specified callback returns a value that evaluates True.
+
+  Similar to the threading.Condition.wait method that is the basis of most
+  threading class wait routines. Polls the condition, starting with frequent
+  checks but extending the delay between checks upon each failure.
+
+  Args:
+    condition_callback: Callable that returns a value that evaluates True to end
+        the wait or evaluates False to continue the wait.
+    timeout: Optional float. Number of seconds to wait before giving up. If
+        provided, the condition is still checked at least once before giving up.
+        If not provided, the wait does not time out.
+
+  Returns:
+    True if condition_callback returned a value that evaluated True. False if
+    condition_callback did not return a value that evaluated True before the
+    timeout.
+  """
+  deadline = None if timeout is None else time.time() + timeout
+  delay = _WAIT_MIN_RECHECK_DELAY
+  while True:
+    if condition_callback():
+      return True
+    remaining_time = (_WAIT_MAX_RECHECK_DELAY if deadline is None
+                      else deadline - time.time())
+    if remaining_time <= 0:
+      return False
+    time.sleep(delay)
+    delay = min(delay * 2, remaining_time, _WAIT_MAX_RECHECK_DELAY)
+
+
+class _SingleReaderQueue(object):
+  """Queue to which multiple threads write but from which only one thread reads.
+
+  A lightweight substitute for the Queue.Queue class that does not use
+  internal Locks.
+
+  Gets are interruptable but depend on polling.
+  """
+
+  def __init__(self):
+    self._deque = deque()
+
+  def Get(self, timeout=None):
+    if not _WaitForCondition(lambda: self._deque, timeout):
+      raise queue.Empty
+    return self._deque.popleft()
+
+  def Put(self, item):
+    self._deque.append(item)
+
+
+class _NonPollingSingleReaderQueue(object):
+  """Queue to which multiple threads write but from which only one thread reads.
+
+  Uses a threading.Lock to implement a non-interruptable Get that does not poll
+  and is therefore easier on CPU usage. The reader waits for items by acquiring
+  the Lock, and writers release the Lock to signal that items have been written.
+  """
+
+  def __init__(self):
+    self._deque = deque()
+    self._lock = threading.Lock()
+    self._lock.acquire()
+
+  def _WaitForItem(self):
+    self._lock.acquire()
+
+  def _SignalAvailableItem(self):
+    try:
+      self._lock.release()
+    except threading.ThreadError:
+      pass
+
+  def Get(self):
+    while True:
+      self._WaitForItem()
+      if self._deque:
+        item = self._deque.popleft()
+        if self._deque:
+          self._SignalAvailableItem()
+        return item
+
+  def Put(self, item):
+    self._deque.append(item)
+    self._SignalAvailableItem()
+
+
+class _BackgroundTaskThreadContext(object):
+  """Thread-specific information that can be inherited by a background task.
+
+  Attributes:
+    benchmark_spec: BenchmarkSpec of the benchmark currently being executed.
+    log_context: ThreadLogContext of the parent thread.
+  """
+
+  def __init__(self):
+    self.benchmark_spec = context.GetThreadBenchmarkSpec()
+    self.log_context = log_util.GetThreadLogContext()
+
+  def CopyToCurrentThread(self):
+    """Sets the thread context of the current thread."""
+    log_util.SetThreadLogContext(log_util.ThreadLogContext(self.log_context))
+    context.SetThreadBenchmarkSpec(self.benchmark_spec)
+
+
+class _BackgroundTask(object):
+  """Base class for a task executed in a child thread or process.
+
+  Attributes:
+    target: Function that is invoked in the child thread or process.
+    args: Series of unnamed arguments to be passed to the target.
+    kwargs: dict. Keyword arguments to be passed to the target.
+    context: _BackgroundTaskThreadContext. Thread-specific state to be inherited
+        from parent to child thread.
+    return_value: Return value if the call was executed successfully, or None
+        otherwise.
+    traceback: The traceback string if the call raised an exception, or None
+        otherwise.
+  """
+
+  def __init__(self, target, args, kwargs, thread_context):
+    self.target = target
+    self.args = args
+    self.kwargs = kwargs
+    self.context = thread_context
+    self.return_value = None
+    self.traceback = None
+
+  def Run(self):
+    """Sets the current thread context and executes the target."""
+    self.context.CopyToCurrentThread()
+    try:
+      self.return_value = self.target(*self.args, **self.kwargs)
+    except Exception:
+      self.traceback = traceback.format_exc()
+
+
+class _BackgroundTaskManager(six.with_metaclass(abc.ABCMeta, object)):
+  """Base class for a context manager that manages state for background tasks.
+
+  Attributes:
+    tasks: list of _BackgroundTask instances. Contains one _BackgroundTask per
+        started task, in the order that they were started.
+  """
+
+  def __init__(self, max_concurrency):
+    self._max_concurrency = max_concurrency
+    self.tasks = []
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, *unused_args, **unused_kwargs):
+    pass
+
+  @abc.abstractmethod
+  def StartTask(self, target, args, kwargs, thread_context):
+    """Creates and starts a _BackgroundTask.
+
+    The created task is appended to self.tasks.
+
+    Args:
+      target: Function that is invoked in the child thread or process.
+      args: Series of unnamed arguments to be passed to the target.
+      kwargs: dict. Keyword arguments to be passed to the target.
+      thread_context: _BackgroundTaskThreadContext. Thread-specific state to be
+          inherited from parent to child thread.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def AwaitAnyTask(self):
+    """Waits for any of the started tasks to complete.
+
+    Returns:
+      int. Index of the task that completed in self.tasks.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def HandleKeyboardInterrupt(self):
+    """Called by the parent thread if a KeyboardInterrupt occurs.
+
+    Ensures that any child thread also receives a KeyboardInterrupt, and then
+    waits for each child thread to stop executing.
+    """
+    raise NotImplementedError()
+
+
+def _ExecuteBackgroundThreadTasks(worker_id, task_queue, response_queue):
+  """Executes tasks received on a task queue.
+
+  Executed in a child Thread by _BackgroundThreadTaskManager.
+
+  Args:
+    worker_id: int. Identifier for the child thread relative to other child
+        threads.
+    task_queue: _NonPollingSingleReaderQueue. Queue from which input is read.
+        Each value in the queue can be one of three types of values. If it is a
+        (task_id, _BackgroundTask) pair, the task is executed on this thread.
+        If it is _THREAD_STOP_PROCESSING, the thread stops executing. If it is
+        _THREAD_WAIT_FOR_KEYBOARD_INTERRUPT, the thread waits for a
+        KeyboardInterrupt.
+    response_queue: _SingleReaderQueue. Queue to which output is written. It
+        receives worker_id when this thread's bootstrap code has completed and
+        receives a (worker_id, task_id) pair for each task completed on this
+        thread.
+  """
+  try:
+    response_queue.Put(worker_id)
+    while True:
+      task_tuple = task_queue.Get()
+      if task_tuple == _THREAD_STOP_PROCESSING:
+        break
+      elif task_tuple == _THREAD_WAIT_FOR_KEYBOARD_INTERRUPT:
+        while True:
+          time.sleep(_WAIT_MAX_RECHECK_DELAY)
+      task_id, task = task_tuple
+      task.Run()
+      response_queue.Put((worker_id, task_id))
+  except KeyboardInterrupt:
+    # TODO(skschneider): Detect when the log would be unhelpful (e.g. if the
+    # current thread was spinning in the _THREAD_WAIT_FOR_KEYBOARD_INTERRUPT
+    # sub-loop). Only log in helpful cases, like when the task is interrupted.
+    logging.debug('Child thread %s received a KeyboardInterrupt from its '
+                  'parent.', worker_id, exc_info=True)
+
+
+class _BackgroundThreadTaskManager(_BackgroundTaskManager):
+  """Manages state for background tasks started in child threads."""
+
+  def __init__(self, *args, **kwargs):
+    super(_BackgroundThreadTaskManager, self).__init__(*args, **kwargs)
+    self._response_queue = _SingleReaderQueue()
+    self._task_queues = []
+    self._threads = []
+    self._available_worker_ids = list(range(self._max_concurrency))
+    uninitialized_worker_ids = set(self._available_worker_ids)
+    for worker_id in self._available_worker_ids:
+      task_queue = _NonPollingSingleReaderQueue()
+      self._task_queues.append(task_queue)
+      thread = threading.Thread(
+          target=_ExecuteBackgroundThreadTasks,
+          args=(worker_id, task_queue, self._response_queue))
+      thread.daemon = True
+      self._threads.append(thread)
+      thread.start()
+    # Wait for each Thread to finish its bootstrap code. Starting all the
+    # threads upfront like this and reusing them for later calls minimizes the
+    # risk of a KeyboardInterrupt interfering with any of the Lock interactions.
+    for _ in self._threads:
+      worker_id = self._response_queue.Get()
+      uninitialized_worker_ids.remove(worker_id)
+    assert not uninitialized_worker_ids, uninitialized_worker_ids
+
+  def __exit__(self, *unused_args, **unused_kwargs):
+    # Shut down worker threads.
+    for task_queue in self._task_queues:
+      task_queue.Put(_THREAD_STOP_PROCESSING)
+    for thread in self._threads:
+      _WaitForCondition(lambda: not thread.is_alive())
+
+  def StartTask(self, target, args, kwargs, thread_context):
+    assert self._available_worker_ids, ('StartTask called when no threads were '
+                                        'available')
+    task = _BackgroundTask(target, args, kwargs, thread_context)
+    task_id = len(self.tasks)
+    self.tasks.append(task)
+    worker_id = self._available_worker_ids.pop()
+    self._task_queues[worker_id].Put((task_id, task))
+
+  def AwaitAnyTask(self):
+    worker_id, task_id = self._response_queue.Get()
+    self._available_worker_ids.append(worker_id)
+    return task_id
+
+  def HandleKeyboardInterrupt(self):
+    # Raise a KeyboardInterrupt in each child thread.
+    for thread in self._threads:
+      ctypes.pythonapi.PyThreadState_SetAsyncExc(
+          ctypes.c_long(thread.ident), ctypes.py_object(KeyboardInterrupt))
+    # Wake threads up from possible non-interruptable wait states so they can
+    # actually see the KeyboardInterrupt.
+    for task_queue, thread in zip(self._task_queues, self._threads):
+      task_queue.Put(_THREAD_WAIT_FOR_KEYBOARD_INTERRUPT)
+    for thread in self._threads:
+      _WaitForCondition(lambda: not thread.is_alive())
+
+
+def _ExecuteProcessTask(task):
+  """Function invoked in another process by _BackgroundProcessTaskManager.
+
+  Executes a specified task function and returns the result or exception
+  traceback.
+
+  TODO(skschneider): Rework this helper function when moving to Python 3.5 or
+  when the backport of concurrent.futures.ProcessPoolExecutor is able to
+  preserve original traceback.
+
+  Args:
+    task: _BackgroundTask to execute.
+
+  Returns:
+    (result, traceback) tuple. The first element is the return value from the
+    task function, or None if the function raised an exception. The second
+    element is the exception traceback string, or None if the function
+    succeeded.
+  """
+  def handle_sigint(signum, frame):
+    # Ignore any new SIGINTs since we are already tearing down.
+    signal.signal(signal.SIGINT, signal.SIG_IGN)
+    # Execute the default SIGINT handler which throws a KeyboardInterrupt
+    # in the main thread of the process.
+    signal.default_int_handler(signum, frame)
+  signal.signal(signal.SIGINT, handle_sigint)
+  task.Run()
+  return task.return_value, task.traceback
+
+
+class _BackgroundProcessTaskManager(_BackgroundTaskManager):
+  """Manages states for background tasks started in child processes.
+
+  TODO(skschneider): This class uses futures.ProcessPoolExecutor. We have been
+  using this executor since before issues regarding KeyboardInterrupt were
+  fully explored. The only consumer of this class is RunParallelProcesses, and
+  currently the uses for RunParallelProcesses are limited. In the future, this
+  class should also be redesigned for protection against KeyboardInterrupt.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(_BackgroundProcessTaskManager, self).__init__(*args, **kwargs)
+    self._active_futures = {}
+    self._executor = futures.ProcessPoolExecutor(self._max_concurrency)
+
+  def __enter__(self):
+    self._executor.__enter__()
+    return self
+
+  def __exit__(self, *args, **kwargs):
+    # Note: This invokes a non-interruptable wait.
+    return self._executor.__exit__(*args, **kwargs)
+
+  def StartTask(self, target, args, kwargs, thread_context):
+    task = _BackgroundTask(target, args, kwargs, thread_context)
+    task_id = len(self.tasks)
+    self.tasks.append(task)
+    future = self._executor.submit(_ExecuteProcessTask, task)
+    self._active_futures[future] = task_id
+
+  def AwaitAnyTask(self):
+    completed_tasks = None
+    while not completed_tasks:
+      completed_tasks, _ = futures.wait(
+          self._active_futures, timeout=_LONG_TIMEOUT,
+          return_when=futures.FIRST_COMPLETED)
+    future = completed_tasks.pop()
+    task_id = self._active_futures.pop(future)
+    task = self.tasks[task_id]
+    task.return_value, task.traceback = future.result()
+    return task_id
+
+  def HandleKeyboardInterrupt(self):
+    # If this thread received an interrupt signal, then processes started with
+    # a ProcessPoolExecutor will also have received an interrupt without any
+    # extra work needed from this class. Only need to wait for child processes.
+    # Note: This invokes a non-interruptable wait.
+    self._executor.shutdown(wait=True)
+
+
+def _RunParallelTasks(target_arg_tuples, max_concurrency, get_task_manager,
+                      parallel_exception_class, post_task_delay=0):
+  """Executes function calls concurrently in separate threads or processes.
+
+  Args:
+    target_arg_tuples: list of (target, args, kwargs) tuples. Each tuple
+        contains the function to call and the arguments to pass it.
+    max_concurrency: int or None. The maximum number of concurrent new
+        threads or processes.
+    get_task_manager: Callable that accepts an int max_concurrency arg and
+        returns a _TaskManager.
+    parallel_exception_class: Type of exception to raise upon an exception in
+        one of the called functions.
+    post_task_delay: Delay in seconds between parallel task invocations.
+
+  Returns:
+    list of function return values in the order corresponding to the order of
+    target_arg_tuples.
+
+  Raises:
+    parallel_exception_class: When an exception occurred in any of the called
+        functions.
+  """
+  thread_context = _BackgroundTaskThreadContext()
+  max_concurrency = min(max_concurrency, len(target_arg_tuples))
+  error_strings = []
+  started_task_count = 0
+  active_task_count = 0
+  with get_task_manager(max_concurrency) as task_manager:
+    try:
+      while started_task_count < len(target_arg_tuples) or active_task_count:
+        if (started_task_count < len(target_arg_tuples) and
+            active_task_count < max_concurrency):
+          # Start a new task.
+          target, args, kwargs = target_arg_tuples[started_task_count]
+          task_manager.StartTask(target, args, kwargs, thread_context)
+          started_task_count += 1
+          active_task_count += 1
+          if post_task_delay:
+            time.sleep(post_task_delay)
+          continue
+
+        # Wait for a task to complete.
+        task_id = task_manager.AwaitAnyTask()
+        active_task_count -= 1
+        # If the task failed, it may still be a long time until all remaining
+        # tasks complete. Log the failure immediately before continuing to wait
+        # for other tasks.
+        stacktrace = task_manager.tasks[task_id].traceback
+        if stacktrace:
+          msg = ('Exception occurred while calling {0}:{1}{2}'.format(
+              _GetCallString(target_arg_tuples[task_id]), os.linesep,
+              stacktrace))
+          logging.error(msg)
+          error_strings.append(msg)
+
+    except KeyboardInterrupt:
+      logging.error(
+          'Received KeyboardInterrupt while executing parallel tasks. Waiting '
+          'for %s tasks to clean up.', active_task_count)
+      task_manager.HandleKeyboardInterrupt()
+      raise
+
+  if error_strings:
+    # TODO(skschneider): Combine errors.VmUtil.ThreadException and
+    # errors.VmUtil.CalledProcessException so this can be a single exception
+    # type.
+    raise parallel_exception_class(
+        'The following exceptions occurred during parallel execution:'
+        '{0}{1}'.format(os.linesep, os.linesep.join(error_strings)))
+  results = [task.return_value for task in task_manager.tasks]
+  assert len(target_arg_tuples) == len(results), (target_arg_tuples, results)
+  return results
+
+
+def RunParallelThreads(target_arg_tuples, max_concurrency, post_task_delay=0):
+  """Executes function calls concurrently in separate threads.
+
+  Args:
+    target_arg_tuples: list of (target, args, kwargs) tuples. Each tuple
+        contains the function to call and the arguments to pass it.
+    max_concurrency: int or None. The maximum number of concurrent new
+        threads.
+    post_task_delay: Delay in seconds between parallel task invocations.
+
+  Returns:
+    list of function return values in the order corresponding to the order of
+    target_arg_tuples.
+
+  Raises:
+    errors.VmUtil.ThreadException: When an exception occurred in any of the
+        called functions.
+  """
+  return _RunParallelTasks(
+      target_arg_tuples, max_concurrency, _BackgroundThreadTaskManager,
+      errors.VmUtil.ThreadException, post_task_delay)
+
+
+def RunThreaded(target,
+                thread_params,
+                max_concurrent_threads=None,
+                post_task_delay=0):
+  """Runs the target method in parallel threads.
+
+  The method starts up threads with one arg from thread_params as the first arg.
+
+  Args:
+    target: The method to invoke in the thread.
+    thread_params: A thread is launched for each value in the list. The items
+        in the list can either be a singleton or a (args, kwargs) tuple/list.
+        Usually this is a list of VMs.
+    max_concurrent_threads: The maximum number of concurrent threads to allow.
+    post_task_delay: Delay in seconds between commands.
+
+  Returns:
+    List of the same length as thread_params. Contains the return value from
+    each threaded function call in the corresponding order as thread_params.
+
+  Raises:
+    ValueError: when thread_params is not valid.
+    errors.VmUtil.ThreadException: When an exception occurred in any of the
+        called functions.
+
+  Example 1: # no args other than list.
+    args = [self.CreateVm()
+            for x in range(0, 10)]
+    RunThreaded(MyThreadedTargetMethod, args)
+
+  Example 2: # using args only to pass to the thread:
+    args = [((self.CreateVm(), i, 'somestring'), {})
+            for i in range(0, 10)]
+    RunThreaded(MyThreadedTargetMethod, args)
+
+  Example 3: # using args & kwargs to pass to the thread:
+    args = [((self.CreateVm(),), {'num': i, 'name': 'somestring'})
+            for i in range(0, 10)]
+    RunThreaded(MyThreadedTargetMethod, args)
+  """
+  if max_concurrent_threads is None:
+    max_concurrent_threads = (
+        FLAGS.max_concurrent_threads or MAX_CONCURRENT_THREADS)
+
+  if not isinstance(thread_params, list):
+    raise ValueError('Param "thread_params" must be a list')
+
+  if not thread_params:
+    # Nothing to do.
+    return []
+
+  if not isinstance(thread_params[0], tuple):
+    target_arg_tuples = [(target, (arg,), {}) for arg in thread_params]
+  elif (not isinstance(thread_params[0][0], tuple) or
+        not isinstance(thread_params[0][1], dict)):
+    raise ValueError('If Param is a tuple, the tuple must be (tuple, dict)')
+  else:
+    target_arg_tuples = [(target, args, kwargs)
+                         for args, kwargs in thread_params]
+
+  return RunParallelThreads(target_arg_tuples,
+                            max_concurrency=max_concurrent_threads,
+                            post_task_delay=post_task_delay)
+
+
+def RunParallelProcesses(target_arg_tuples, max_concurrency,
+                         post_process_delay=0):
+  """Executes function calls concurrently in separate processes.
+
+  Args:
+    target_arg_tuples: list of (target, args, kwargs) tuples. Each tuple
+        contains the function to call and the arguments to pass it.
+    max_concurrency: int or None. The maximum number of concurrent new
+        processes. If None, it will default to the number of processors on the
+        machine.
+    post_process_delay: Delay in seconds between parallel process invocations.
+
+  Returns:
+    list of function return values in the order corresponding to the order of
+    target_arg_tuples.
+
+  Raises:
+    errors.VmUtil.CalledProcessException: When an exception occurred in any
+        of the called functions.
+  """
+  def handle_sigint(signum, frame):
+    # Ignore any SIGINTS in the parent process, but let users know
+    # that the child processes are getting cleaned up.
+    logging.error('Got SIGINT while executing parallel tasks. '
+                  'Waiting for tasks to clean up.')
+  old_handler = None
+  try:
+    old_handler = signal.signal(signal.SIGINT, handle_sigint)
+    ret_val = _RunParallelTasks(
+        target_arg_tuples, max_concurrency, _BackgroundProcessTaskManager,
+        errors.VmUtil.CalledProcessException,
+        post_task_delay=post_process_delay)
+  finally:
+    if old_handler:
+      signal.signal(signal.SIGINT, old_handler)
+  return ret_val
diff --git a/script/cumulus/pkb/perfkitbenchmarker/background_workload.py b/script/cumulus/pkb/perfkitbenchmarker/background_workload.py
new file mode 100644
index 0000000..95eb803
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/background_workload.py
@@ -0,0 +1,133 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes for background workloads."""
+
+from typing import List
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import vm_util
+import six
+
+BACKGROUND_WORKLOADS: List['BaseBackgroundWorkload'] = []
+
+BACKGROUND_IPERF_PORT = 20001
+BACKGROUND_IPERF_SECONDS = 2147483647
+
+
+class AutoRegisterBackgroundWorkloadMeta(type):
+  """Metaclass which allows BackgroundWorkloads to be auto-registered."""
+
+  def __init__(cls, name, bases, dct):
+    super(AutoRegisterBackgroundWorkloadMeta, cls).__init__(name, bases, dct)
+    BACKGROUND_WORKLOADS.append(cls)  # pytype: disable=container-type-mismatch
+
+
+class BaseBackgroundWorkload(
+    six.with_metaclass(AutoRegisterBackgroundWorkloadMeta, object)):
+  """Baseclass for background workloads."""
+
+  EXCLUDED_OS_TYPES = []
+
+  @staticmethod
+  def IsEnabled(vm):
+    """Returns true if this background workload is enabled on this VM."""
+    del vm  # Unused
+    return False
+
+  @staticmethod
+  def Prepare(vm):
+    """Prepares the background workload on this VM."""
+    pass
+
+  @staticmethod
+  def Start(vm):
+    """Starts the background workload on this VM."""
+    pass
+
+  @staticmethod
+  def Stop(vm):
+    """Stops the background workload on this VM."""
+    pass
+
+
+class CpuWorkload(BaseBackgroundWorkload):
+  """Workload that runs sysbench in the background."""
+
+  EXCLUDED_OS_TYPES = os_types.WINDOWS_OS_TYPES
+
+  @staticmethod
+  def IsEnabled(vm):
+    """Returns true if this background workload is enabled on this VM."""
+    return bool(vm.background_cpu_threads)
+
+  @staticmethod
+  def Prepare(vm):
+    """Prepares the background workload on this VM."""
+    vm.Install('sysbench')
+
+  @staticmethod
+  def Start(vm):
+    """Starts the background workload on this VM."""
+    vm.RemoteCommand(
+        'nohup sysbench --num-threads=%s --test=cpu --cpu-max-prime=10000000 '
+        'run 1> /dev/null 2> /dev/null &' % vm.background_cpu_threads)
+
+  @staticmethod
+  def Stop(vm):
+    """Stops the background workload on this VM."""
+    vm.RemoteCommand('pkill -9 sysbench')
+
+
+class NetworkWorkload(BaseBackgroundWorkload):
+  """Workload that runs iperf in the background."""
+
+  EXCLUDED_OS_TYPES = os_types.WINDOWS_OS_TYPES
+
+  @staticmethod
+  def IsEnabled(vm):
+    """Returns true if this background workload is enabled on this VM."""
+    return bool(vm.background_network_mbits_per_sec)
+
+  @staticmethod
+  def Prepare(vm):
+    """Prepares the background workload on this VM."""
+    vm.Install('iperf')
+
+  @staticmethod
+  def Start(vm):
+    """Starts the background workload on this VM."""
+    vm.AllowPort(BACKGROUND_IPERF_PORT)
+    vm.RemoteCommand('nohup iperf --server --port %s &> /dev/null &' %
+                     BACKGROUND_IPERF_PORT)
+    stdout, _ = vm.RemoteCommand('pgrep iperf -n')
+    vm.server_pid = stdout.strip()
+
+    if vm.background_network_ip_type == vm_util.IpAddressSubset.EXTERNAL:
+      ip_address = vm.ip_address
+    else:
+      ip_address = vm.internal_ip
+    iperf_cmd = ('nohup iperf --client %s --port %s --time %s -u -b %sM '
+                 '&> /dev/null &' % (ip_address, BACKGROUND_IPERF_PORT,
+                                     BACKGROUND_IPERF_SECONDS,
+                                     vm.background_network_mbits_per_sec))
+
+    vm.RemoteCommand(iperf_cmd)
+    stdout, _ = vm.RemoteCommand('pgrep iperf -n')
+    vm.client_pid = stdout.strip()
+
+  @staticmethod
+  def Stop(vm):
+    """Stops the background workload on this VM."""
+    vm.RemoteCommand('kill -9 ' + vm.client_pid)
+    vm.RemoteCommand('kill -9 ' + vm.server_pid)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/beam_benchmark_helper.py b/script/cumulus/pkb/perfkitbenchmarker/beam_benchmark_helper.py
new file mode 100644
index 0000000..6b7b3ce
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/beam_benchmark_helper.py
@@ -0,0 +1,326 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper methods for Apache Beam benchmarks.
+
+This file contains methods which are common to all Beam benchmarks and
+executions.
+"""
+
+import fnmatch
+import os
+
+from absl import flags
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+
+BEAM_JAVA_SDK = 'java'
+BEAM_PYTHON_SDK = 'python'
+
+flags.DEFINE_string('gradle_binary', None,
+                    'Set to use a different gradle binary than gradle wrapper '
+                    'from the repository')
+flags.DEFINE_string('beam_location', None,
+                    'Location of already checked out Beam codebase.')
+flags.DEFINE_string('beam_it_module', None,
+                    'Gradle module containing integration test. Use full '
+                    'module starting and separated by colon, like :sdk:python')
+flags.DEFINE_boolean('beam_prebuilt', False,
+                     'Set this to indicate that the repo in beam_location '
+                     'does not need to be rebuilt before being used')
+flags.DEFINE_integer('beam_it_timeout', 600, 'Integration Test Timeout.')
+flags.DEFINE_string('git_binary', 'git', 'Path to git binary.')
+flags.DEFINE_string('beam_version', None,
+                    'Version of Beam to download. Use tag from Github '
+                    'as value. If not specified, will use HEAD.')
+flags.DEFINE_enum('beam_sdk', None, [BEAM_JAVA_SDK, BEAM_PYTHON_SDK],
+                  'Which BEAM SDK is used to build the benchmark pipeline.')
+flags.DEFINE_string('beam_python_attr', 'IT',
+                    'Test decorator that is used in Beam Python to filter a '
+                    'specific category.')
+flags.DEFINE_string('beam_python_sdk_location', None,
+                    'Python SDK tar ball location. It is a required option to '
+                    'run Python pipeline.')
+
+flags.DEFINE_string('beam_extra_properties', None,
+                    'Allows to specify list of key-value pairs that will be '
+                    'forwarded to target mvn command as system properties')
+
+flags.DEFINE_string('beam_runner', 'dataflow', 'Defines runner which will be used in tests')
+flags.DEFINE_string('beam_runner_option', None,
+                    'Overrides any pipeline options to specify the runner.')
+
+flags.DEFINE_string('beam_filesystem', None,
+                    'Defines filesystem which will be used in tests. '
+                    'If not specified it will use runner\'s local filesystem.')
+
+FLAGS = flags.FLAGS
+
+SUPPORTED_RUNNERS = [dpb_service.DATAFLOW]
+
+BEAM_REPO_LOCATION = 'https://github.com/apache/beam.git'
+
+DEFAULT_PYTHON_TAR_PATTERN = 'apache-beam-*.tar.gz'
+
+
+def AddRunnerArgument(command, runner_name):
+  if runner_name is None or runner_name == 'direct':
+    command.append('-DintegrationTestRunner=direct')
+
+  if runner_name == 'dataflow':
+    command.append('-DintegrationTestRunner=dataflow')
+
+
+def AddRunnerPipelineOption(beam_pipeline_options, runner_name,
+                            runner_option_override):
+  """Add runner to pipeline options."""
+  runner_pipeline_option = ''
+
+  if runner_name == 'dataflow':
+    runner_pipeline_option = ('"--runner=TestDataflowRunner"')
+
+  if runner_name == 'direct':
+    runner_pipeline_option = ('"--runner=DirectRunner"')
+
+  if runner_option_override:
+    runner_pipeline_option = '--runner=' + runner_option_override
+
+  if len(runner_pipeline_option) > 0:
+    beam_pipeline_options.append(runner_pipeline_option)
+
+
+def AddFilesystemArgument(command, filesystem_name):
+  if filesystem_name == 'hdfs':
+    command.append('-Dfilesystem=hdfs')
+
+
+def AddExtraProperties(command, extra_properties):
+  if not extra_properties:
+    return
+
+  if 'integrationTestPipelineOptions=' in extra_properties:
+    raise ValueError('integrationTestPipelineOptions must not be in '
+                     'beam_extra_properties')
+
+  extra_properties = extra_properties.rstrip(']').lstrip('[').split(',')
+  extra_properties = [p.rstrip('" ').lstrip('" ') for p in extra_properties]
+  for p in extra_properties:
+    command.append('-D{}'.format(p))
+
+
+def AddPythonAttributes(command, attributes):
+  if attributes:
+    command.append('-Dattr={}'.format(attributes))
+
+
+def AddTaskArgument(command, task_name, module):
+  if not task_name or not module:
+    raise ValueError('task_name and module should not be empty.')
+  command.append('{}:{}'.format(module, task_name))
+
+
+def InitializeBeamRepo(benchmark_spec):
+  """Ensures environment is prepared for running Beam benchmarks.
+
+  In the absence of FLAGS.beam_location, initializes the beam source code base
+  by checking out the repository from github. Specific branch selection is
+  supported.
+
+  Args:
+    benchmark_spec: The PKB spec for the benchmark to run.
+  """
+  if benchmark_spec.dpb_service.SERVICE_TYPE not in SUPPORTED_RUNNERS:
+    raise NotImplementedError('Unsupported Runner')
+
+  vm_util.GenTempDir()
+  if FLAGS.beam_location is None:
+    git_clone_command = [FLAGS.git_binary, 'clone', BEAM_REPO_LOCATION]
+    if FLAGS.beam_version:
+      git_clone_command.append('--branch={}'.format(FLAGS.beam_version))
+      git_clone_command.append('--single-branch')
+
+    vm_util.IssueCommand(git_clone_command, cwd=vm_util.GetTempDir())
+
+  elif not os.path.exists(FLAGS.beam_location):
+    raise errors.Config.InvalidValue('Directory indicated by beam_location '
+                                     'does not exist: {}.'.format(
+                                         FLAGS.beam_location))
+
+  _PrebuildBeam()
+
+
+def _PrebuildBeam():
+  """Rebuild beam if it was not build earlier."""
+  if not FLAGS.beam_prebuilt:
+
+    gradle_prebuild_tasks = ['clean', 'assemble']
+    gradle_prebuild_flags = ['--stacktrace', '--info']
+    build_command = [_GetGradleCommand()]
+    build_command.extend(gradle_prebuild_flags)
+
+    for task in gradle_prebuild_tasks:
+      AddTaskArgument(build_command, task, FLAGS.beam_it_module)
+    AddRunnerArgument(build_command, FLAGS.beam_runner)
+    AddFilesystemArgument(build_command, FLAGS.beam_filesystem)
+    AddExtraProperties(build_command, FLAGS.beam_extra_properties)
+
+    vm_util.IssueCommand(build_command, timeout=1500, cwd=_GetBeamDir())
+
+
+def BuildBeamCommand(benchmark_spec, classname, job_arguments):
+  """Constructs a Beam execution command for the benchmark.
+
+  Args:
+    benchmark_spec: The PKB spec for the benchmark to run.
+    classname: The classname of the class to run.
+    job_arguments: The additional job arguments provided for the run.
+
+  Returns:
+    cmd: Array containing the built command.
+    beam_dir: The directory in which to run the command.
+  """
+  if benchmark_spec.service_type not in SUPPORTED_RUNNERS:
+    raise NotImplementedError('Unsupported Runner')
+
+  base_dir = _GetBeamDir()
+
+  if FLAGS.beam_sdk == BEAM_JAVA_SDK:
+    cmd = _BuildGradleCommand(classname, job_arguments)
+  elif FLAGS.beam_sdk == BEAM_PYTHON_SDK:
+    cmd = _BuildPythonCommand(benchmark_spec, classname, job_arguments)
+  else:
+    raise NotImplementedError('Unsupported Beam SDK: %s.' % FLAGS.beam_sdk)
+
+  return cmd, base_dir
+
+
+def _BuildGradleCommand(classname, job_arguments):
+  """Constructs a Gradle command for the benchmark.
+
+  Args:
+    classname: The classname of the class to run.
+    job_arguments: The additional job arguments provided for the run.
+
+  Returns:
+    cmd: Array containing the built command.
+  """
+  cmd = []
+
+  gradle_executable = _GetGradleCommand()
+
+  if not vm_util.ExecutableOnPath(gradle_executable):
+    raise errors.Setup.MissingExecutableError(
+        'Could not find required executable "%s"' % gradle_executable)
+
+  cmd.append(gradle_executable)
+  AddTaskArgument(cmd, 'integrationTest', FLAGS.beam_it_module)
+  cmd.append('--tests={}'.format(classname))
+
+  beam_args = job_arguments if job_arguments else []
+
+  AddRunnerArgument(cmd, FLAGS.beam_runner)
+  AddRunnerPipelineOption(beam_args, FLAGS.beam_runner,
+                          FLAGS.beam_runner_option)
+  AddFilesystemArgument(cmd, FLAGS.beam_filesystem)
+  AddExtraProperties(cmd, FLAGS.beam_extra_properties)
+
+  cmd.append('-DintegrationTestPipelineOptions='
+             '[{}]'.format(','.join(beam_args)))
+
+  cmd.append('--stacktrace')
+  cmd.append('--info')
+  cmd.append('--scan')
+
+  return cmd
+
+
+def _BuildPythonCommand(benchmark_spec, classname, job_arguments):
+  """Constructs Gradle command for Python benchmark.
+
+  Python integration tests can be invoked from Gradle task
+  `integrationTest`. How Python Gradle command constructed
+  is different from Java. We can use following system properties
+  in commandline:
+
+    -Dtests: fully qualified class/module name of the test to run.
+      e.g. apache_beam.examples.wordcount_it_test:WordCountIT
+    -Dattr: a set of tests that are annotated by this attribute tag.
+    -DpipelineOptions: a set of pipeline options needed to run Beam job
+
+  Args:
+    benchmark_spec: The PKB spec for the benchmark to run.
+    classname: The fully qualified class/module name of the test to run.
+    job_arguments: The additional job arguments provided for the run.
+
+  Returns:
+    cmd: Array holds the execution command.
+  """
+
+  cmd = []
+
+  gradle_executable = _GetGradleCommand()
+
+  if not vm_util.ExecutableOnPath(gradle_executable):
+    raise errors.Setup.MissingExecutableError(
+        'Could not find required executable "%s"' % gradle_executable)
+
+  cmd.append(gradle_executable)
+  AddTaskArgument(cmd, 'integrationTest', FLAGS.beam_it_module)
+  cmd.append('-Dtests={}'.format(classname))
+  AddPythonAttributes(cmd, FLAGS.beam_python_attr)
+
+  beam_args = job_arguments if job_arguments else []
+  if benchmark_spec.service_type == dpb_service.DATAFLOW:
+    beam_args.append('"--runner={}"'.format(FLAGS.beam_runner))
+
+    sdk_location = FLAGS.beam_python_sdk_location
+    if not sdk_location:
+      tar_list = _FindFiles(_GetBeamPythonDir(), DEFAULT_PYTHON_TAR_PATTERN)
+      if not tar_list:
+        raise RuntimeError('No python sdk tar file is available.')
+      else:
+        sdk_location = tar_list[0]
+    beam_args.append('"--sdk_location={}"'.format(sdk_location))
+  cmd.append('-DpipelineOptions={}'.format(' '.join(beam_args)))
+
+  cmd.append('--info')
+  cmd.append('--scan')
+
+  return cmd
+
+
+def _GetGradleCommand():
+  return FLAGS.gradle_binary or os.path.join(_GetBeamDir(), 'gradlew')
+
+
+def _GetBeamDir():
+  # TODO: This is temporary, find a better way.
+  return FLAGS.beam_location or os.path.join(vm_util.GetTempDir(), 'beam')
+
+
+def _GetBeamPythonDir():
+  return os.path.join(_GetBeamDir(), 'sdks/python')
+
+
+def _FindFiles(base_path, pattern):
+  if not os.path.exists(base_path):
+    raise RuntimeError('No such directory: %s' % base_path)
+
+  results = []
+  for root, _, files in os.walk(base_path):
+    for f in files:
+      if fnmatch.fnmatch(f, pattern):
+        results.append(os.path.join(root, f))
+  return results
diff --git a/script/cumulus/pkb/perfkitbenchmarker/beam_pipeline_options.py b/script/cumulus/pkb/perfkitbenchmarker/beam_pipeline_options.py
new file mode 100644
index 0000000..8ac754e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/beam_pipeline_options.py
@@ -0,0 +1,153 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import kubernetes_helper
+import yaml
+
+FLAGS = flags.FLAGS
+
+
+def GetStaticPipelineOptions(options_list):
+  """
+  Takes the dictionary loaded from the yaml configuration file and returns it
+  in a form consistent with the others in GenerateAllPipelineOptions: a list of
+  (pipeline_option_name, pipeline_option_value) tuples.
+
+  The options in the options_list are a dict:
+    Key is the name of the pipeline option to pass to beam
+    Value is the value of the pipeline option to pass to beam
+  """
+  options = []
+  for option in options_list:
+    if len(list(option.keys())) != 1:
+      raise Exception('Each item in static_pipeline_options should only have'
+                      ' 1 key/value')
+    option_kv = list(option.items())[0]
+    options.append((option_kv[0], option_kv[1]))
+  return options
+
+
+def EvaluateDynamicPipelineOptions(dynamic_options):
+  """
+  Takes the user's dynamic args and retrieves the information to fill them in.
+
+  dynamic_args is a python map of argument name -> {type, kubernetesSelector, *format}
+  returns a list of tuples containing (argName, argValue)
+
+  if optional format it passed, argValue is equal to format with "{{type}}" being replaced with actual value.
+  """
+  filledOptions = []
+  for optionDescriptor in dynamic_options:
+    fillType = optionDescriptor['type']
+    optionName = optionDescriptor['name']
+    valueFormat = optionDescriptor.get('format')
+
+    if not fillType:
+      raise errors.Config.InvalidValue(
+          'For dynamic arguments, you must provide a "type"')
+
+    if fillType == 'NodePortIp':
+      argValue = RetrieveNodePortIp(optionDescriptor)
+    elif fillType == 'LoadBalancerIp':
+      argValue = RetrieveLoadBalancerIp(optionDescriptor)
+    elif fillType == 'TestValue':
+      argValue = optionDescriptor['value']
+    else:
+      raise errors.Config.InvalidValue(
+          'Unknown dynamic argument type: %s' % (fillType))
+
+    if valueFormat:
+      argValue = valueFormat.replace("{{" + fillType + "}}", argValue)
+    filledOptions.append((optionName, argValue))
+
+  return filledOptions
+
+
+def GenerateAllPipelineOptions(it_args, it_options, static_pipeline_options,
+                               dynamic_pipeline_options):
+  """
+  :param it_args: options list passed in via FLAGS.beam_it_args
+  :param it_options: options list passed in via FLAGS.beam_it_options
+  :param static_pipeline_options: options list loaded from the yaml config file
+  :param dynamic_pipeline_options: options list loaded from the yaml config file
+  :return: a list of values of the form "\"--option_name=value\""
+  """
+  # beam_it_options are in [--option=value,--option2=val2] form
+  user_option_list = []
+  if it_options is not None and len(it_options) > 0:
+    user_option_list = it_options.rstrip(']').lstrip('[').split(',')
+    user_option_list = [option.rstrip('" ').lstrip('" ')
+                        for option in user_option_list]
+
+  # Add static options from the benchmark_spec
+  benchmark_spec_option_list = (
+      EvaluateDynamicPipelineOptions(dynamic_pipeline_options))
+  benchmark_spec_option_list.extend(
+      GetStaticPipelineOptions(static_pipeline_options))
+  option_list = ['--{}={}'.format(t[0], t[1])
+                 for t in benchmark_spec_option_list]
+
+  # beam_it_args is the old way of passing parameters
+  args_list = []
+  if it_args is not None and len(it_args) > 0:
+    args_list = it_args.split(',')
+
+  return ['"{}"'.format(arg)
+          for arg in args_list + user_option_list + option_list]
+
+
+def ReadPipelineOptionConfigFile():
+  """
+  Reads the path to the config file from FLAGS, then loads the static and
+  dynamic pipeline options from it.
+  """
+  dynamic_pipeline_options = []
+  static_pipeline_options = []
+  if FLAGS.beam_options_config_file:
+    with open(FLAGS.beam_options_config_file, 'r') as fileStream:
+      config = yaml.safe_load(fileStream)
+      if config['static_pipeline_options']:
+        static_pipeline_options = config['static_pipeline_options']
+      if config['dynamic_pipeline_options']:
+        dynamic_pipeline_options = config['dynamic_pipeline_options']
+  return static_pipeline_options, dynamic_pipeline_options
+
+
+def RetrieveNodePortIp(argDescriptor):
+  jsonSelector = argDescriptor['podLabel']
+  if not jsonSelector:
+    raise errors.Config.InvalidValue('For NodePortIp arguments, you must'
+                                     ' provide a "selector"')
+  ip = kubernetes_helper.GetWithWaitForContents(
+      'pods', '', jsonSelector, '.items[0].status.podIP')
+  if len(ip) == 0:
+    raise "Could not retrieve NodePort IP address"
+  logging.info("Using NodePort IP Address: " + ip)
+  return ip
+
+
+def RetrieveLoadBalancerIp(argDescriptor):
+  serviceName = argDescriptor['serviceName']
+  if not serviceName:
+    raise errors.Config.InvalidValue('For LoadBalancerIp arguments, you must'
+                                     'provide a "serviceName"')
+  ip = kubernetes_helper.GetWithWaitForContents(
+      'svc', serviceName, '', '.status.loadBalancer.ingress[0].ip')
+  if len(ip) == 0:
+    raise "Could not retrieve LoadBalancer IP address"
+  logging.info("Using LoadBalancer IP Address: " + ip)
+  return ip
diff --git a/script/cumulus/pkb/perfkitbenchmarker/benchmark_lookup.py b/script/cumulus/pkb/perfkitbenchmarker/benchmark_lookup.py
new file mode 100644
index 0000000..10335f8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/benchmark_lookup.py
@@ -0,0 +1,48 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Function to lookup modules from benchmark names.
+
+BenchmarkModule: Returns a benchmark module given its name.
+
+This module works around a circular import issue where we cannot import
+benchmark_sets.py directly into virtual_machine.py. After SetUpPKB is called,
+benchmark_lookup.BenchmarkModule is equivalent to
+benchmark_sets.BenchmarkModule.
+"""
+
+from perfkitbenchmarker import errors
+
+_global_benchmark_module_function = None
+
+
+def SetBenchmarkModuleFunction(function):
+  """Sets the function called by BenchmarkModule; See benchmark_sets.py."""
+  global _global_benchmark_module_function
+  _global_benchmark_module_function = function
+
+
+def BenchmarkModule(benchmark_name):
+  """Finds the module for a benchmark by name.
+
+  Args:
+    benchmark_name: The name of the benchmark.
+
+  Returns:
+    The benchmark's module, or None if the benchmark is invalid.
+  """
+  if not _global_benchmark_module_function:
+    raise errors.Setup.InvalidSetupError(
+        'Cannot call benchmark_lookup.py; Was SetUpPKB called?')
+  return _global_benchmark_module_function(benchmark_name)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/benchmark_sets.py b/script/cumulus/pkb/perfkitbenchmarker/benchmark_sets.py
new file mode 100644
index 0000000..5d7f427
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/benchmark_sets.py
@@ -0,0 +1,477 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Benchmark set specific functions and definitions."""
+
+
+import collections
+import copy
+import itertools
+
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import linux_benchmarks
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import windows_benchmarks
+from perfkitbenchmarker import windows_packages
+import six
+from six.moves import zip
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('flag_matrix', None,
+                    'The name of the flag matrix to run.')
+flags.DEFINE_string('flag_zip', None,
+                    'The name of the flag zip to run.')
+flags.DEFINE_integer('num_benchmark_copies', 1,
+                     'The number of copies of each benchmark config to run.')
+
+MESSAGE = 'message'
+BENCHMARK_LIST = 'benchmark_list'
+STANDARD_SET = 'standard_set'
+
+BENCHMARK_SETS = {
+    STANDARD_SET: {
+        MESSAGE: ('The standard_set is a community agreed upon set of '
+                  'benchmarks to measure Cloud performance.'),
+        BENCHMARK_LIST: [
+            'aerospike',
+            'block_storage_workload',
+            'cassandra_stress',
+            'cluster_boot',
+            'copy_throughput',
+            'coremark',
+            'fio',
+            'hadoop_terasort',
+            'hpcc',
+            'iperf',
+            'mesh_network',
+            'mongodb_ycsb',
+            'netperf',
+            'object_storage_service',
+            'ping',
+            'redis_memtier',
+            'speccpu2006',
+            'sysbench',
+            'unixbench',
+        ]
+    },
+    'arm_set': {
+        MESSAGE: 'ARM benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'alicloud_set': {
+        MESSAGE: 'AliCloud benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'broadcom_set': {
+        MESSAGE: 'Broadcom benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'canonical_set': {
+        MESSAGE: 'Canonical benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'centurylinkcloud_set': {
+        MESSAGE:
+            'This benchmark set is supported on CenturyLink Cloud.',
+        BENCHMARK_LIST: [
+            'cassandra_stress',
+            'copy_throughput',
+            'hpcc',
+            'iperf',
+            'mesh_network',
+            'mongodb_ycsb',
+            'ping',
+            'redis_memtier',
+            'sysbench',
+            'unixbench',
+        ]
+    },
+    'cisco_set': {
+        MESSAGE: 'Cisco benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'cloudharmony_set': {
+        MESSAGE: 'CloudHarmony benchmark set.',
+        BENCHMARK_LIST: [
+            'speccpu2006',
+            'unixbench',
+        ]
+    },
+    'cloudspectator_set': {
+        MESSAGE: 'CloudSpectator benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'google_set': {
+        MESSAGE: ('This benchmark set is maintained by Google Cloud Platform '
+                  'Performance Team.'),
+        BENCHMARK_LIST: [
+            'aerospike_ycsb',
+            'bidirectional_network',
+            'block_storage_workload',
+            'cassandra_stress',
+            'cassandra_ycsb',
+            'cluster_boot',
+            'copy_throughput',
+            'fio',
+            'gpu_pcie_bandwidth',
+            'hadoop_terasort',
+            'horovod',
+            'hpcc',
+            'hpcg',
+            'inception3',
+            'iperf',
+            'mesh_network',
+            'mlperf',
+            'mlperf_multiworkers',
+            'mnist',
+            'mongodb_ycsb',
+            'multichase',
+            'mxnet',
+            'netperf',
+            'object_storage_service',
+            'oldisim',
+            'pgbench',
+            'ping',
+            'redis_ycsb',
+            'resnet',
+            'stencil2d',
+            'speccpu2006',
+            'sysbench',
+            'tensorflow',
+            'tensorflow_serving',
+            'tomcat_wrk',
+            'unixbench',
+        ]
+    },
+    'intel_set': {
+        MESSAGE:
+            'Intel benchmark set.',
+        BENCHMARK_LIST: [
+            'fio',
+            'iperf',
+            'unixbench',
+            'hpcc',
+            'cluster_boot',
+            'redis_memtier',
+            'cassandra_stress',
+            'object_storage_service',
+            'sysbench',
+        ]
+    },
+    'kubernetes_set': {
+        MESSAGE:
+            'Kubernetes benchmark set.',
+        BENCHMARK_LIST: [
+            'block_storage_workload',
+            'cassandra_ycsb',
+            'cassandra_stress',
+            'cluster_boot',
+            'fio',
+            'iperf',
+            'mesh_network',
+            'mongodb_ycsb',
+            'netperf',
+            'redis_memtier',
+            'sysbench',
+        ]
+    },
+    'mellanox_set': {
+        MESSAGE: 'Mellanox benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'microsoft_set': {
+        MESSAGE: 'Microsoft benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'qualcomm_technologies_set': {
+        MESSAGE: 'Qualcomm Technologies, Inc. benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'rackspace_set': {
+        MESSAGE:
+            'Rackspace benchmark set.',
+        BENCHMARK_LIST: [
+            'aerospike',
+            'block_storage_workload',
+            'cassandra_stress',
+            'cluster_boot',
+            'copy_throughput',
+            'fio',
+            'hpcc',
+            'iperf',
+            'mesh_network',
+            'mongodb_ycsb',
+            'netperf',
+            'oldisim',
+            'ping',
+            'redis_memtier',
+            'silo',
+            'sysbench',
+            'unixbench',
+        ]
+    },
+    'red_hat_set': {
+        MESSAGE: 'Red Hat benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'tradeworx_set': {
+        MESSAGE: 'Tradeworx Inc. benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'thesys_technologies_set': {
+        MESSAGE: 'Thesys Technologies LLC. benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET]
+    },
+    'stanford_set': {
+        MESSAGE: 'Stanford University benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET, 'oldisim']
+    },
+    'mit_set': {
+        MESSAGE: 'Massachusetts Institute of Technology benchmark set.',
+        BENCHMARK_LIST: [STANDARD_SET, 'silo']
+    },
+    'cloudsuite_set': {
+        MESSAGE:
+            'CloudSuite benchmark set.',
+        BENCHMARK_LIST: [
+            'cloudsuite_data_analytics',
+            'cloudsuite_data_caching',
+            'cloudsuite_graph_analytics',
+            'cloudsuite_in_memory_analytics',
+            'cloudsuite_media_streaming',
+            'cloudsuite_web_search',
+            'cloudsuite_web_serving',
+        ]
+    }
+}
+
+
+class FlagMatrixNotFoundException(Exception):
+  pass
+
+
+class FlagZipNotFoundException(Exception):
+  pass
+
+
+def _GetValidBenchmarks():
+  """Returns a dict mapping valid benchmark names to their modules."""
+  if FLAGS.os_type in os_types.CONTAINER_OS_TYPES:
+    return {'cluster_boot': linux_benchmarks.VALID_BENCHMARKS['cluster_boot']}
+  elif FLAGS.os_type in os_types.WINDOWS_OS_TYPES:
+    return windows_benchmarks.VALID_BENCHMARKS
+  return linux_benchmarks.VALID_BENCHMARKS
+
+
+def _GetValidPackages():
+  """Returns a dict mapping valid package names to their modules."""
+  if FLAGS.os_type in os_types.CONTAINER_OS_TYPES:
+    return {}
+  elif FLAGS.os_type in os_types.WINDOWS_OS_TYPES:
+    return windows_packages.PACKAGES
+  return linux_packages.PACKAGES
+
+
+def BenchmarkModule(benchmark_name):
+  """Finds the module for a benchmark by name.
+
+  Args:
+    benchmark_name: The name of the benchmark.
+
+  Returns:
+    The benchmark's module, or None if the benchmark is invalid.
+  """
+  valid_benchmarks = _GetValidBenchmarks()
+  return valid_benchmarks.get(benchmark_name)
+
+
+def PackageModule(package_name):
+  """Finds the module for a package by name.
+
+  Args:
+    package_name: The name of the package.
+
+  Returns:
+    The package's module, or None if the package_name is invalid.
+  """
+  packages = _GetValidPackages()
+  return packages.get(package_name)
+
+
+def _GetBenchmarksFromUserConfig(user_config):
+  """Returns a list of benchmark module, config tuples."""
+  benchmarks = user_config.get('benchmarks', [])
+  valid_benchmarks = _GetValidBenchmarks()
+  benchmark_config_list = []
+
+  for entry in benchmarks:
+    name, user_config = entry.popitem()
+    try:
+      benchmark_module = valid_benchmarks[name]
+    except KeyError:
+      raise ValueError('Benchmark "%s" not valid on os_type "%s"' %
+                       (name, FLAGS.os_type))
+    benchmark_config_list.append((benchmark_module, user_config))
+
+  return benchmark_config_list
+
+
+def _GetConfigForAxis(benchmark_config, flag_config):
+  config = copy.copy(benchmark_config)
+  config_local_flags = config.get('flags', {})
+  config['flags'] = copy.deepcopy(configs.GetConfigFlags())
+  config['flags'].update(config_local_flags)
+  for setting in flag_config:
+    config['flags'].update(setting)
+  return config
+
+
+def _AssertZipAxesHaveSameLength(axes):
+  expected_length = len(axes[0])
+  for axis in axes[1:]:
+    if len(axis) != expected_length:
+      raise ValueError('flag_zip axes must all be the same length')
+
+
+def _AssertFlagMatrixAndZipDefsExist(benchmark_config,
+                                     flag_matrix_name,
+                                     flag_zip_name):
+  """Asserts that specified flag_matrix and flag_zip exist.
+
+  Both flag_matrix_name and flag_zip_name can be None, meaning that the user
+  (or the benchmark_config) did not specify them.
+
+  Args:
+    benchmark_config: benchmark_config
+    flag_matrix_name: name of the flag_matrix_def specified by the user via a
+      flag, specified in the benchmark_config, or None.
+    flag_zip_name: name of the flag_zip_def specified by the user via a flag,
+      specified in the benchmark_config, or None.
+
+  Raises:
+    FlagMatrixNotFoundException: if flag_matrix_name is not None, and is not
+      found in the flag_matrix_defs section of the benchmark_config.
+    FlagZipNotFoundException: if flag_zip_name is not None, and is not
+      found in the flag_zip_defs section of the benchmark_config.
+  """
+  if (flag_matrix_name and
+      flag_matrix_name not in
+      benchmark_config.get('flag_matrix_defs', {})):
+    raise FlagMatrixNotFoundException('No flag_matrix with name {0}'
+                                      .format(flag_matrix_name))
+  if (flag_zip_name and
+      flag_zip_name not in
+      benchmark_config.get('flag_zip_defs', {})):
+    raise FlagZipNotFoundException('No flag_zip with name {0}'
+                                   .format(flag_zip_name))
+
+
+def GetBenchmarksFromFlags():
+  """Returns a list of benchmarks to run based on the benchmarks flag.
+
+  If no benchmarks (or sets) are specified, this will return the standard set.
+  If multiple sets or mixes of sets and benchmarks are specified, this will
+  return the union of all sets and individual benchmarks.
+
+  Raises:
+    ValueError: when benchmark_name is not valid for os_type supplied
+  """
+  user_config = configs.GetUserConfig()
+  benchmark_config_list = _GetBenchmarksFromUserConfig(user_config)
+  if benchmark_config_list and not FLAGS['benchmarks'].present:
+    return benchmark_config_list
+
+  benchmark_queue = collections.deque(FLAGS.benchmarks)
+  benchmark_names = []
+  benchmark_set = set()
+
+  while benchmark_queue:
+    benchmark = benchmark_queue.popleft()
+    if benchmark in benchmark_set:
+      continue
+    benchmark_set.add(benchmark)
+    if benchmark in BENCHMARK_SETS:
+      benchmark_queue.extendleft(BENCHMARK_SETS[benchmark][BENCHMARK_LIST])
+    else:
+      benchmark_names.append(benchmark)
+
+  valid_benchmarks = _GetValidBenchmarks()
+
+  # create a list of module, config tuples to return
+  benchmark_config_list = []
+  for benchmark_name in benchmark_names:
+    benchmark_config = user_config.get(benchmark_name, {})
+    benchmark_name = benchmark_config.get('name', benchmark_name)
+    benchmark_module = valid_benchmarks.get(benchmark_name)
+
+    if benchmark_module is None:
+      raise ValueError('Benchmark "%s" not valid on os_type "%s"' %
+                       (benchmark_name, FLAGS.os_type))
+
+    flag_matrix_name = (
+        FLAGS.flag_matrix or benchmark_config.get('flag_matrix', None)
+    )
+    flag_zip_name = (
+        FLAGS.flag_zip or benchmark_config.get('flag_zip', None)
+    )
+    _AssertFlagMatrixAndZipDefsExist(benchmark_config,
+                                     flag_matrix_name,
+                                     flag_zip_name)
+
+    # We need to remove the 'flag_matrix', 'flag_matrix_defs', 'flag_zip',
+    # 'flag_zip_defs', and 'flag_matrix_filters' keys from the config
+    # dictionary since they aren't actually part of the config spec and will
+    # cause errors if they are left in.
+    benchmark_config.pop('flag_matrix', None)
+    benchmark_config.pop('flag_zip', None)
+
+    flag_matrix = benchmark_config.pop(
+        'flag_matrix_defs', {}).get(flag_matrix_name, {})
+    flag_matrix_filter = benchmark_config.pop(
+        'flag_matrix_filters', {}).get(flag_matrix_name, {})
+    flag_zip = benchmark_config.pop(
+        'flag_zip_defs', {}).get(flag_zip_name, {})
+
+    zipped_axes = []
+    crossed_axes = []
+    if flag_zip:
+      flag_axes = []
+      for flag, values in six.iteritems(flag_zip):
+        flag_axes.append([{flag: v} for v in values])
+
+      _AssertZipAxesHaveSameLength(flag_axes)
+
+      for flag_config in zip(*flag_axes):
+        config = _GetConfigForAxis(benchmark_config, flag_config)
+        zipped_axes.append((benchmark_module, config))
+
+      crossed_axes.append([benchmark_tuple[1]['flags'] for
+                           benchmark_tuple in zipped_axes])
+
+    for flag, values in sorted(six.iteritems(flag_matrix)):
+      crossed_axes.append([{flag: v} for v in values])
+
+    for flag_config in itertools.product(*crossed_axes):
+      config = _GetConfigForAxis(benchmark_config, flag_config)
+      if (flag_matrix_filter and not eval(
+          flag_matrix_filter, {}, config['flags'])):
+        continue
+
+      benchmark_config_list.extend([(benchmark_module, config)] *
+                                   FLAGS.num_benchmark_copies)
+
+  return benchmark_config_list
diff --git a/script/cumulus/pkb/perfkitbenchmarker/benchmark_spec.py b/script/cumulus/pkb/perfkitbenchmarker/benchmark_spec.py
new file mode 100644
index 0000000..7f903cb
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/benchmark_spec.py
@@ -0,0 +1,1065 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Container for all data required for a benchmark to run."""
+
+
+import contextlib
+import copy
+import datetime
+import importlib
+import logging
+import os
+import pickle
+import threading
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import benchmark_status
+from perfkitbenchmarker import capacity_reservation
+from perfkitbenchmarker import cloud_tpu
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import context
+from perfkitbenchmarker import data_discovery_service
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import messaging_service
+from perfkitbenchmarker import nfs_service
+from perfkitbenchmarker import non_relational_db
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import smb_service
+from perfkitbenchmarker import spark_service
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import static_virtual_machine as static_vm
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import vpn_service
+from perfkitbenchmarker.configs import freeze_restore_spec
+from perfkitbenchmarker.providers.gcp import gcp_spanner
+import six
+from six.moves import range
+import six.moves._thread
+import six.moves.copyreg
+
+
+def PickleLock(lock):
+  return UnPickleLock, (lock.locked(),)
+
+
+def UnPickleLock(locked, *args):
+  lock = threading.Lock()
+  if locked:
+    if not lock.acquire(False):
+      raise pickle.UnpicklingError('Cannot acquire lock')
+  return lock
+
+six.moves.copyreg.pickle(six.moves._thread.LockType, PickleLock)
+
+SUPPORTED = 'strict'
+NOT_EXCLUDED = 'permissive'
+SKIP_CHECK = 'none'
+# GCP labels only allow hyphens (-), underscores (_), lowercase characters, and
+# numbers and International characters.
+# metadata allow all characters and numbers.
+METADATA_TIME_FORMAT = '%Y%m%dt%H%M%Sz'
+FLAGS = flags.FLAGS
+
+flags.DEFINE_enum('cloud', providers.GCP, providers.VALID_CLOUDS,
+                  'Name of the cloud to use.')
+flags.DEFINE_string('scratch_dir', None,
+                    'Base name for all scratch disk directories in the VM. '
+                    'Upon creation, these directories will have numbers '
+                    'appended to them (for example /scratch0, /scratch1, etc).')
+flags.DEFINE_string('startup_script', None,
+                    'Script to run right after vm boot.')
+flags.DEFINE_string('postrun_script', None,
+                    'Script to run right after run stage.')
+flags.DEFINE_integer('create_and_boot_post_task_delay', None,
+                     'Delay in seconds to delay in between boot tasks.')
+# pyformat: disable
+flags.DEFINE_enum('benchmark_compatibility_checking', SUPPORTED,
+                  [SUPPORTED, NOT_EXCLUDED, SKIP_CHECK],
+                  'Method used to check compatibility between the benchmark '
+                  ' and the cloud.  ' + SUPPORTED + ' runs the benchmark only'
+                  ' if the cloud provider has declared it supported. ' +
+                  NOT_EXCLUDED + ' runs the benchmark unless it has been'
+                  ' declared not supported by the cloud provider. ' + SKIP_CHECK
+                  + ' does not do the compatibility'
+                  ' check.')
+# pyformat: enable
+
+
+class BenchmarkSpec(object):
+  """Contains the various data required to make a benchmark run."""
+
+  total_benchmarks = 0
+
+  def __init__(self, benchmark_module, benchmark_config, benchmark_uid):
+    """Initialize a BenchmarkSpec object.
+
+    Args:
+      benchmark_module: The benchmark module object.
+      benchmark_config: BenchmarkConfigSpec. The configuration for the
+          benchmark.
+      benchmark_uid: An identifier unique to this run of the benchmark even
+          if the same benchmark is run multiple times with different configs.
+    """
+    self.config = benchmark_config
+    self.control_traces = False
+    self.name = benchmark_module.BENCHMARK_NAME
+    self.uid = benchmark_uid
+    self.status = benchmark_status.SKIPPED
+    self.failed_substatus = None
+    self.status_detail = None
+    BenchmarkSpec.total_benchmarks += 1
+    self.sequence_number = BenchmarkSpec.total_benchmarks
+    self.vms = []
+    self.regional_networks = {}
+    self.networks = {}
+    self.custom_subnets = {k: {
+        'cloud': v.cloud,
+        'cidr': v.cidr} for (k, v) in self.config.vm_groups.items()}
+    self.firewalls = {}
+    self.networks_lock = threading.Lock()
+    self.firewalls_lock = threading.Lock()
+    self.vm_groups = {}
+    self.container_specs = benchmark_config.container_specs or {}
+    self.container_registry = None
+    self.deleted = False
+    self.uuid = '%s-%s' % (FLAGS.run_uri, uuid.uuid4())
+    self.always_call_cleanup = True
+    self.spark_service = None
+    self.dpb_service = None
+    self.container_cluster = None
+    self.relational_db = None
+    self.non_relational_db = None
+    self.spanner = None
+    self.tpus = []
+    self.tpu_groups = {}
+    self.edw_service = None
+    self.nfs_service = None
+    self.smb_service = None
+    self.messaging_service = None
+    self.data_discovery_service = None
+    self.app_groups = {}
+    self._zone_index = 0
+    self.additional_private_addresses_count = 0
+    self.capacity_reservations = []
+    self.placement_group_specs = benchmark_config.placement_group_specs or {}
+    self.placement_groups = {}
+    self.vms_to_boot = (
+        self.config.vm_groups if self.config.relational_db is None else
+        relational_db.VmsToBoot(self.config.relational_db.vm_groups))
+    self.vpc_peering = self.config.vpc_peering
+
+    self.vpn_service = None
+    self.vpns = {}  # dict of vpn's
+    self.vpn_gateways = {}  # dict of vpn gw's
+    self.vpn_gateways_lock = threading.Lock()
+    self.vpns_lock = threading.Lock()
+
+    self.restore_spec = None
+    self.freeze_path = None
+
+    # Intel collector contributions
+    self.workload_name = None
+    self.software_config_metadata = {}
+    self.tunable_parameters_metadata = {}
+    self.sut_vm_group = None
+    self.s3_archive_url = None
+    self.s3_reports = []
+    # End Intel collector contributions
+
+    # Modules can't be pickled, but functions can, so we store the functions
+    # necessary to run the benchmark.
+    self.BenchmarkPrepare = benchmark_module.Prepare
+    self.BenchmarkRun = benchmark_module.Run
+    self.BenchmarkCleanup = benchmark_module.Cleanup
+
+    # Each benchmark may implement a GetUsage method if it has
+    # additional information to display
+    if hasattr(benchmark_module, 'GetUsage'):
+      self.BenchmarkGetUsage = benchmark_module.GetUsage
+
+    # Set the current thread's BenchmarkSpec object to this one.
+    context.SetThreadBenchmarkSpec(self)
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+  def __str__(self):
+    return(
+        'Benchmark name: {0}\nFlags: {1}'
+        .format(self.name, self.config.flags))
+
+  @contextlib.contextmanager
+  def RedirectGlobalFlags(self):
+    """Redirects flag reads and writes to the benchmark-specific flags object.
+
+    Within the enclosed code block, reads and writes to the flags.FLAGS object
+    are redirected to a copy that has been merged with config-provided flag
+    overrides specific to this benchmark run.
+    """
+    with self.config.RedirectFlags(FLAGS):
+      yield
+
+  def _InitializeFromSpec(
+      self, attribute_name: str,
+      resource_spec: freeze_restore_spec.FreezeRestoreSpec) -> bool:
+    """Initializes the BenchmarkSpec attribute from the restore_spec.
+
+    Args:
+      attribute_name: The attribute to restore.
+      resource_spec: The spec class corresponding to the resource to be
+        restored.
+
+    Returns:
+      True if successful, False otherwise.
+    """
+    if not hasattr(self, 'restore_spec'):
+      return False
+    if not self.restore_spec or not hasattr(self.restore_spec, attribute_name):
+      return False
+    if not resource_spec.enable_freeze_restore:
+      return False
+    logging.info('Getting %s instance from restore_spec', attribute_name)
+    frozen_resource = copy.copy(getattr(self.restore_spec, attribute_name))
+    setattr(self, attribute_name, frozen_resource)
+    return True
+
+  def ConstructContainerCluster(self):
+    """Create the container cluster."""
+    if self.config.container_cluster is None:
+      return
+    cloud = self.config.container_cluster.cloud
+    cluster_type = self.config.container_cluster.type
+    providers.LoadProvider(cloud)
+    container_cluster_class = container_service.GetContainerClusterClass(
+        cloud, cluster_type)
+    self.container_cluster = container_cluster_class(
+        self.config.container_cluster)
+
+  def ConstructContainerRegistry(self):
+    """Create the container registry."""
+    if self.config.container_registry is None:
+      return
+    cloud = self.config.container_registry.cloud
+    providers.LoadProvider(cloud)
+    container_registry_class = container_service.GetContainerRegistryClass(
+        cloud)
+    self.container_registry = container_registry_class(
+        self.config.container_registry)
+
+  def ConstructDpbService(self):
+    """Create the dpb_service object and create groups for its vms."""
+    if self.config.dpb_service is None:
+      return
+    dpb_service_spec = self.config.dpb_service
+    dpb_service_cloud = dpb_service_spec.worker_group.cloud
+    dpb_service_spec.worker_group.vm_count = dpb_service_spec.worker_count
+    providers.LoadProvider(dpb_service_cloud)
+
+    dpb_service_type = dpb_service_spec.service_type
+    dpb_service_class = dpb_service.GetDpbServiceClass(dpb_service_cloud,
+                                                       dpb_service_type)
+    self.dpb_service = dpb_service_class(dpb_service_spec)
+
+    # If the dpb service is un-managed, the provisioning needs to be handed
+    # over to the vm creation module.
+    if dpb_service_type in [
+        dpb_service.UNMANAGED_DPB_SVC_YARN_CLUSTER,
+        dpb_service.UNMANAGED_SPARK_CLUSTER
+    ]:
+      # Ensure non cluster vms are not present in the spec.
+      if self.vms_to_boot:
+        raise Exception('Invalid Non cluster vm group {0} when benchmarking '
+                        'unmanaged dpb service'.format(self.vms_to_boot))
+
+      base_vm_spec = dpb_service_spec.worker_group
+      base_vm_spec.vm_spec.zone = self.dpb_service.dpb_service_zone
+
+      if dpb_service_spec.worker_count:
+        self.vms_to_boot['worker_group'] = dpb_service_spec.worker_group
+      # else we have a single node cluster.
+
+      master_group_spec = copy.copy(base_vm_spec)
+      master_group_spec.vm_count = 1
+      self.vms_to_boot['master_group'] = master_group_spec
+
+  def ConstructRelationalDb(self):
+    """Create the relational db and create groups for its vms."""
+    if self.config.relational_db is None:
+      return
+    cloud = self.config.relational_db.cloud
+    is_managed_db = self.config.relational_db.is_managed_db
+    engine = self.config.relational_db.engine
+    providers.LoadProvider(cloud)
+    relational_db_class = (
+        relational_db.GetRelationalDbClass(cloud, is_managed_db, engine))
+    self.relational_db = relational_db_class(self.config.relational_db)
+
+  def ConstructNonRelationalDb(self) -> None:
+    """Initializes the non_relational db."""
+    db_spec: non_relational_db.BaseNonRelationalDbSpec = self.config.non_relational_db
+    if not db_spec:
+      return
+    # Initialization from restore spec
+    if self._InitializeFromSpec('non_relational_db', db_spec):
+      return
+    # Initialization from benchmark config spec
+    logging.info('Constructing non_relational_db instance with spec: %s.',
+                 db_spec)
+    service_type = db_spec.service_type
+    non_relational_db_class = non_relational_db.GetNonRelationalDbClass(
+        service_type)
+    self.non_relational_db = non_relational_db_class.FromSpec(db_spec)
+
+  def ConstructSpanner(self) -> None:
+    """Initializes the spanner instance."""
+    spanner_spec: gcp_spanner.SpannerSpec = self.config.spanner
+    if not spanner_spec:
+      return
+    # Initialization from restore spec
+    if self._InitializeFromSpec('spanner', spanner_spec):
+      return
+    # Initialization from benchmark config spec
+    logging.info('Constructing spanner instance with spec: %s.', spanner_spec)
+    spanner_class = gcp_spanner.GetSpannerClass(spanner_spec.service_type)
+    self.spanner = spanner_class.FromSpec(spanner_spec)
+
+  def ConstructTpuGroup(self, group_spec):
+    """Constructs the BenchmarkSpec's cloud TPU objects."""
+    if group_spec is None:
+      return
+    cloud = group_spec.cloud
+    providers.LoadProvider(cloud)
+    tpu_class = cloud_tpu.GetTpuClass(cloud)
+    return tpu_class(group_spec)
+
+  def ConstructTpu(self):
+    """Constructs the BenchmarkSpec's cloud TPU objects."""
+    tpu_group_specs = self.config.tpu_groups
+
+    for group_name, group_spec in sorted(six.iteritems(tpu_group_specs)):
+      tpu = self.ConstructTpuGroup(group_spec)
+
+      self.tpu_groups[group_name] = tpu
+      self.tpus.append(tpu)
+
+  def ConstructEdwService(self):
+    """Create the edw_service object."""
+    if self.config.edw_service is None:
+      return
+    # Load necessary modules from the provider to account for dependencies
+    # TODO(saksena): Replace with
+    # providers.LoadProvider(string.lower(FLAGS.cloud))
+    providers.LoadProvider(
+        edw_service.TYPE_2_PROVIDER.get(self.config.edw_service.type))
+    # Load the module for the edw service based on type
+    edw_service_type = self.config.edw_service.type
+    edw_service_module = importlib.import_module(
+        edw_service.TYPE_2_MODULE.get(edw_service_type))
+    # The edw_service_type in certain cases may be qualified with a hosting
+    # cloud eg. snowflake_aws,snowflake_gcp, etc.
+    # However the edw_service_class_name in all cases will still be cloud
+    # agnostic eg. Snowflake.
+    edw_service_class_name = edw_service_type.split('_')[0]
+    edw_service_class = getattr(
+        edw_service_module,
+        edw_service_class_name[0].upper() + edw_service_class_name[1:])
+    # Check if a new instance needs to be created or restored from snapshot
+    self.edw_service = edw_service_class(self.config.edw_service)
+
+  def ConstructNfsService(self):
+    """Construct the NFS service object.
+
+    Creates an NFS Service only if an NFS disk is found in the disk_specs.
+    """
+    if self.nfs_service:
+      logging.info('NFS service already created: %s', self.nfs_service)
+      return
+    for group_spec in self.vms_to_boot.values():
+      if not group_spec.disk_spec or not group_spec.vm_count:
+        continue
+      disk_spec = group_spec.disk_spec
+      if disk_spec.disk_type != disk.NFS:
+        continue
+      # Choose which nfs_service to create.
+      if disk_spec.nfs_ip_address:
+        self.nfs_service = nfs_service.StaticNfsService(disk_spec)
+      elif disk_spec.nfs_managed:
+        cloud = group_spec.cloud
+        providers.LoadProvider(cloud)
+        nfs_class = nfs_service.GetNfsServiceClass(cloud)
+        self.nfs_service = nfs_class(disk_spec, group_spec.vm_spec.zone)
+      else:
+        self.nfs_service = nfs_service.UnmanagedNfsService(disk_spec,
+                                                           self.vms[0])
+      logging.debug('NFS service %s', self.nfs_service)
+      break
+
+  def ConstructSmbService(self):
+    """Construct the SMB service object.
+
+    Creates an SMB Service only if an SMB disk is found in the disk_specs.
+    """
+    if self.smb_service:
+      logging.info('SMB service already created: %s', self.smb_service)
+      return
+    for group_spec in self.vms_to_boot.values():
+      if not group_spec.disk_spec or not group_spec.vm_count:
+        continue
+      disk_spec = group_spec.disk_spec
+      if disk_spec.disk_type != disk.SMB:
+        continue
+
+      cloud = group_spec.cloud
+      providers.LoadProvider(cloud)
+      smb_class = smb_service.GetSmbServiceClass(cloud)
+      self.smb_service = smb_class(disk_spec, group_spec.vm_spec.zone)
+      logging.debug('SMB service %s', self.smb_service)
+      break
+
+  def ConstructVirtualMachineGroup(self, group_name, group_spec):
+    """Construct the virtual machine(s) needed for a group."""
+    vms = []
+
+    vm_count = group_spec.vm_count
+    disk_count = group_spec.disk_count
+
+    # First create the Static VM objects.
+    if group_spec.static_vms:
+      specs = [
+          spec for spec in group_spec.static_vms
+          if (FLAGS.static_vm_tags is None or spec.tag in FLAGS.static_vm_tags)
+      ][:vm_count]
+      for vm_spec in specs:
+        static_vm_class = static_vm.GetStaticVmClass(vm_spec.os_type)
+        vms.append(static_vm_class(vm_spec))
+
+    os_type = group_spec.os_type
+    cloud = group_spec.cloud
+
+    # This throws an exception if the benchmark is not
+    # supported.
+    self._CheckBenchmarkSupport(cloud)
+
+    # Then create the remaining VM objects using VM and disk specs.
+
+    if group_spec.disk_spec:
+      disk_spec = group_spec.disk_spec
+      # disk_spec.disk_type may contain legacy values that were
+      # copied from FLAGS.scratch_disk_type into
+      # FLAGS.data_disk_type at the beginning of the run. We
+      # translate them here, rather than earlier, because here is
+      # where we know what cloud we're using and therefore we're
+      # able to pick the right translation table.
+      disk_spec.disk_type = disk.WarnAndTranslateDiskTypes(
+          disk_spec.disk_type, cloud)
+    else:
+      disk_spec = None
+
+    if group_spec.placement_group_name:
+      group_spec.vm_spec.placement_group = self.placement_groups[
+          group_spec.placement_group_name]
+
+    for _ in range(vm_count - len(vms)):
+      # Assign a zone to each VM sequentially from the --zones flag.
+      if FLAGS.zones or FLAGS.extra_zones or FLAGS.zone:
+        zone_list = FLAGS.zones + FLAGS.extra_zones + FLAGS.zone
+        group_spec.vm_spec.zone = zone_list[self._zone_index]
+        self._zone_index = (self._zone_index + 1
+                            if self._zone_index < len(zone_list) - 1 else 0)
+      # you might need this info in certain providers, e.g Kubernetes
+      group_spec.vm_spec.group_name = group_name
+      if group_spec.cidr:  # apply cidr range to all vms in vm_group
+        group_spec.vm_spec.cidr = group_spec.cidr
+      vm = self._CreateVirtualMachine(group_spec.vm_spec, os_type, cloud)
+      if disk_spec and not vm.is_static:
+        if disk_spec.disk_type == disk.LOCAL and disk_count is None:
+          disk_count = vm.max_local_disks
+        vm.disk_specs = [copy.copy(disk_spec) for _ in range(disk_count)]
+        # In the event that we need to create multiple disks from the same
+        # DiskSpec, we need to ensure that they have different mount points.
+        if (disk_count > 1 and disk_spec.mount_point):
+          for i, vm_disk_spec in enumerate(vm.disk_specs):
+            vm_disk_spec.mount_point += str(i)
+      vm.vm_group = group_name
+      vms.append(vm)
+
+    return vms
+
+  def ConstructCapacityReservations(self):
+    """Construct capacity reservations for each VM group."""
+    if not FLAGS.use_capacity_reservations:
+      return
+    for vm_group in six.itervalues(self.vm_groups):
+      cloud = vm_group[0].CLOUD
+      providers.LoadProvider(cloud)
+      capacity_reservation_class = capacity_reservation.GetResourceClass(
+          cloud)
+      self.capacity_reservations.append(
+          capacity_reservation_class(vm_group))
+
+  def _CheckBenchmarkSupport(self, cloud):
+    """Throw an exception if the benchmark isn't supported."""
+
+    if FLAGS.benchmark_compatibility_checking == SKIP_CHECK:
+      return
+
+    provider_info_class = provider_info.GetProviderInfoClass(cloud)
+    benchmark_ok = provider_info_class.IsBenchmarkSupported(self.name)
+    if FLAGS.benchmark_compatibility_checking == NOT_EXCLUDED:
+      if benchmark_ok is None:
+        benchmark_ok = True
+
+    if not benchmark_ok:
+      raise ValueError('Provider {0} does not support {1}.  Use '
+                       '--benchmark_compatibility_checking=none '
+                       'to override this check.'.format(
+                           provider_info_class.CLOUD, self.name))
+
+  def _ConstructJujuController(self, group_spec):
+    """Construct a VirtualMachine object for a Juju controller."""
+    juju_spec = copy.copy(group_spec)
+    juju_spec.vm_count = 1
+    jujuvms = self.ConstructVirtualMachineGroup('juju', juju_spec)
+    if len(jujuvms):
+      jujuvm = jujuvms.pop()
+      jujuvm.is_controller = True
+      return jujuvm
+    return None
+
+  def ConstructVirtualMachines(self):
+    """Constructs the BenchmarkSpec's VirtualMachine objects."""
+
+    self.ConstructPlacementGroups()
+
+    vm_group_specs = self.vms_to_boot
+
+    clouds = {}
+    for group_name, group_spec in sorted(six.iteritems(vm_group_specs)):
+      vms = self.ConstructVirtualMachineGroup(group_name, group_spec)
+
+      if group_spec.os_type == os_types.JUJU:
+        # The Juju VM needs to be created first, so that subsequent units can
+        # be properly added under its control.
+        if group_spec.cloud in clouds:
+          jujuvm = clouds[group_spec.cloud]
+        else:
+          jujuvm = self._ConstructJujuController(group_spec)
+          clouds[group_spec.cloud] = jujuvm
+
+        for vm in vms:
+          vm.controller = clouds[group_spec.cloud]
+
+        jujuvm.units.extend(vms)
+        if jujuvm and jujuvm not in self.vms:
+          self.vms.extend([jujuvm])
+          self.vm_groups['%s_juju_controller' % group_spec.cloud] = [jujuvm]
+
+      self.vm_groups[group_name] = vms
+      self.vms.extend(vms)
+    # If we have a spark service, it needs to access the master_group and
+    # the worker group.
+    if (self.config.spark_service and
+        self.config.spark_service.service_type == spark_service.PKB_MANAGED):
+      for group_name in 'master_group', 'worker_group':
+        self.spark_service.vms[group_name] = self.vm_groups[group_name]
+
+    # In the case of an un-managed yarn cluster, for hadoop software
+    # installation, the dpb service instance needs access to constructed
+    # master group and worker group.
+    if (self.config.dpb_service and self.config.dpb_service.service_type in [
+        dpb_service.UNMANAGED_DPB_SVC_YARN_CLUSTER,
+        dpb_service.UNMANAGED_SPARK_CLUSTER]):
+      self.dpb_service.vms['master_group'] = self.vm_groups['master_group']
+      if self.config.dpb_service.worker_count:
+        self.dpb_service.vms['worker_group'] = self.vm_groups['worker_group']
+      else:  # single node cluster
+        self.dpb_service.vms['worker_group'] = []
+
+  def ConstructPlacementGroups(self):
+    for placement_group_name, placement_group_spec in six.iteritems(
+        self.placement_group_specs):
+      self.placement_groups[placement_group_name] = self._CreatePlacementGroup(
+          placement_group_spec, placement_group_spec.CLOUD)
+
+  def ConstructSparkService(self):
+    """Create the spark_service object and create groups for its vms."""
+    if self.config.spark_service is None:
+      return
+
+    spark_spec = self.config.spark_service
+    # Worker group is required, master group is optional
+    cloud = spark_spec.worker_group.cloud
+    if spark_spec.master_group:
+      cloud = spark_spec.master_group.cloud
+    providers.LoadProvider(cloud)
+    service_type = spark_spec.service_type
+    spark_service_class = spark_service.GetSparkServiceClass(
+        cloud, service_type)
+    self.spark_service = spark_service_class(spark_spec)
+    # If this is Pkb managed, the benchmark spec needs to adopt vms.
+    if service_type == spark_service.PKB_MANAGED:
+      for name, group_spec in [('master_group', spark_spec.master_group),
+                               ('worker_group', spark_spec.worker_group)]:
+        if name in self.vms_to_boot:
+          raise Exception('Cannot have a vm group {0} with a {1} spark '
+                          'service'.format(name, spark_service.PKB_MANAGED))
+        self.vms_to_boot[name] = group_spec
+
+  def ConstructVPNService(self):
+    """Create the VPNService object."""
+    if self.config.vpn_service is None:
+      return
+    self.vpn_service = vpn_service.VPNService(self.config.vpn_service)
+
+  def ConstructMessagingService(self):
+    """Create the messaging_service object.
+
+    Assumes VMs are already constructed.
+    """
+    if self.config.messaging_service is None:
+      return
+    cloud = self.config.messaging_service.cloud
+    delivery = self.config.messaging_service.delivery
+    providers.LoadProvider(cloud)
+    messaging_service_class = messaging_service.GetMessagingServiceClass(
+        cloud, delivery
+    )
+    self.messaging_service = messaging_service_class.FromSpec(
+        self.config.messaging_service)
+    self.messaging_service.setVms(self.vm_groups)
+
+  def ConstructDataDiscoveryService(self):
+    """Create the data_discovery_service object."""
+    if not self.config.data_discovery_service:
+      return
+    cloud = self.config.data_discovery_service.cloud
+    service_type = self.config.data_discovery_service.service_type
+    providers.LoadProvider(cloud)
+    data_discovery_service_class = (
+        data_discovery_service.GetDataDiscoveryServiceClass(cloud, service_type)
+    )
+    self.data_discovery_service = data_discovery_service_class.FromSpec(
+        self.config.data_discovery_service)
+
+  def Prepare(self):
+    targets = [(vm.PrepareBackgroundWorkload, (), {}) for vm in self.vms]
+    vm_util.RunParallelThreads(targets, len(targets))
+
+  def Provision(self):
+    """Prepares the VMs and networks necessary for the benchmark to run."""
+    should_restore = hasattr(self, 'restore_spec') and self.restore_spec
+    # Create capacity reservations if the cloud supports it. Note that the
+    # capacity reservation class may update the VMs themselves. This is true
+    # on AWS, because the VM needs to be aware of the capacity reservation id
+    # before its Create() method is called. Furthermore, if the user does not
+    # specify an AWS zone, but a region instead, the AwsCapacityReservation
+    # class will make a reservation in a zone that has sufficient capacity.
+    # In this case the VM's zone attribute, and the VMs network instance
+    # need to be updated as well.
+    if self.capacity_reservations:
+      vm_util.RunThreaded(lambda res: res.Create(), self.capacity_reservations)
+
+    # Sort networks into a guaranteed order of creation based on dict key.
+    # There is a finite limit on the number of threads that are created to
+    # provision networks. Until support is added to provision resources in an
+    # order based on dependencies, this key ordering can be used to avoid
+    # deadlock by placing dependent networks later and their dependencies
+    # earlier.
+    networks = [
+        self.networks[key] for key in sorted(six.iterkeys(self.networks))
+    ]
+
+    vm_util.RunThreaded(lambda net: net.Create(), networks)
+
+    # VPC peering is currently only supported for connecting 2 VPC networks
+    if self.vpc_peering:
+      if len(networks) > 2:
+        raise errors.Error(
+            'Networks of size %d are not currently supported.' %
+            (len(networks)))
+      # Ignore Peering for one network
+      elif len(networks) == 2:
+        networks[0].Peer(networks[1])
+
+    if self.container_registry:
+      self.container_registry.Create()
+      for container_spec in six.itervalues(self.container_specs):
+        if container_spec.static_image:
+          continue
+        container_spec.image = self.container_registry.GetOrBuild(
+            container_spec.image)
+
+    if self.container_cluster:
+      self.container_cluster.Create()
+
+    # do after network setup but before VM created
+    if self.nfs_service and self.nfs_service.CLOUD != nfs_service.UNMANAGED:
+      self.nfs_service.Create()
+    if self.smb_service:
+      self.smb_service.Create()
+
+    for placement_group_object in self.placement_groups.values():
+      placement_group_object.Create()
+
+    if self.vms:
+
+      # We separate out creating, booting, and preparing the VMs into two phases
+      # so that we don't slow down the creation of all the VMs by running
+      # commands on the VMs that booted.
+      vm_util.RunThreaded(
+          self.CreateAndBootVm,
+          self.vms,
+          post_task_delay=FLAGS.create_and_boot_post_task_delay)
+      if self.nfs_service and self.nfs_service.CLOUD == nfs_service.UNMANAGED:
+        self.nfs_service.Create()
+      vm_util.RunThreaded(self.PrepareVmAfterBoot, self.vms)
+
+      sshable_vms = [
+          vm for vm in self.vms if vm.OS_TYPE not in os_types.WINDOWS_OS_TYPES
+      ]
+      sshable_vm_groups = {}
+      for group_name, group_vms in six.iteritems(self.vm_groups):
+        sshable_vm_groups[group_name] = [
+            vm for vm in group_vms
+            if vm.OS_TYPE not in os_types.WINDOWS_OS_TYPES
+        ]
+      vm_util.GenerateSSHConfig(sshable_vms, sshable_vm_groups)
+    if self.spark_service:
+      self.spark_service.Create()
+    if self.dpb_service:
+      self.dpb_service.Create()
+    if hasattr(self, 'relational_db') and self.relational_db:
+      self.relational_db.SetVms(self.vm_groups)
+      self.relational_db.Create()
+    if self.non_relational_db:
+      self.non_relational_db.Create(restore=should_restore)
+    if self.spanner:
+      self.spanner.Create(restore=should_restore)
+    if self.tpus:
+      vm_util.RunThreaded(lambda tpu: tpu.Create(), self.tpus)
+    if self.edw_service:
+      if (not self.edw_service.user_managed and
+          self.edw_service.SERVICE_TYPE == 'redshift'):
+        # The benchmark creates the Redshift cluster's subnet group in the
+        # already provisioned virtual private cloud (vpc).
+        for network in networks:
+          if network.__class__.__name__ == 'AwsNetwork':
+            self.edw_service.cluster_subnet_group.subnet_id = network.subnet.id
+      self.edw_service.Create()
+    if self.vpn_service:
+      self.vpn_service.Create()
+    if hasattr(self, 'messaging_service') and self.messaging_service:
+      self.messaging_service.Create()
+    if self.data_discovery_service:
+      self.data_discovery_service.Create()
+
+  def Delete(self):
+    if self.deleted:
+      return
+
+    should_freeze = hasattr(self, 'freeze_path') and self.freeze_path
+    if should_freeze:
+      self.Freeze()
+
+    if self.container_registry:
+      self.container_registry.Delete()
+    if self.spark_service:
+      self.spark_service.Delete()
+    if self.dpb_service:
+      self.dpb_service.Delete()
+    if hasattr(self, 'relational_db') and self.relational_db:
+      self.relational_db.Delete()
+    if hasattr(self, 'non_relational_db') and self.non_relational_db:
+      self.non_relational_db.Delete(freeze=should_freeze)
+    if hasattr(self, 'spanner') and self.spanner:
+      self.spanner.Delete(freeze=should_freeze)
+    if self.tpus:
+      vm_util.RunThreaded(lambda tpu: tpu.Delete(), self.tpus)
+    if self.edw_service:
+      self.edw_service.Delete()
+    if self.nfs_service:
+      self.nfs_service.Delete()
+    if self.smb_service:
+      self.smb_service.Delete()
+    if hasattr(self, 'messaging_service') and self.messaging_service:
+      self.messaging_service.Delete()
+    if hasattr(self, 'data_discovery_service') and self.data_discovery_service:
+      self.data_discovery_service.Delete()
+
+    # Note: It is ok to delete capacity reservations before deleting the VMs,
+    # and will actually save money (mere seconds of usage).
+    if self.capacity_reservations:
+      try:
+        vm_util.RunThreaded(lambda reservation: reservation.Delete(),
+                            self.capacity_reservations)
+      except Exception:  # pylint: disable=broad-except
+        logging.exception('Got an exception deleting CapacityReservations. '
+                          'Attempting to continue tearing down.')
+
+    if self.vms:
+      try:
+        vm_util.RunThreaded(self.DeleteVm, self.vms)
+      except Exception:
+        logging.exception('Got an exception deleting VMs. '
+                          'Attempting to continue tearing down.')
+    if hasattr(self, 'placement_groups'):
+      for placement_group_object in self.placement_groups.values():
+        placement_group_object.Delete()
+
+    for firewall in six.itervalues(self.firewalls):
+      try:
+        firewall.DisallowAllPorts()
+      except Exception:
+        logging.exception('Got an exception disabling firewalls. '
+                          'Attempting to continue tearing down.')
+
+    if self.container_cluster:
+      self.container_cluster.DeleteServices()
+      self.container_cluster.DeleteContainers()
+      self.container_cluster.Delete()
+
+    for net in six.itervalues(self.networks):
+      try:
+        net.Delete()
+      except Exception:
+        logging.exception('Got an exception deleting networks. '
+                          'Attempting to continue tearing down.')
+
+    if hasattr(self, 'vpn_service') and self.vpn_service:
+      self.vpn_service.Delete()
+
+    self.deleted = True
+
+  def GetSamples(self):
+    """Returns samples created from benchmark resources."""
+    samples = []
+    if self.container_cluster:
+      samples.extend(self.container_cluster.GetSamples())
+    if self.container_registry:
+      samples.extend(self.container_registry.GetSamples())
+    return samples
+
+  def StartBackgroundWorkload(self):
+    targets = [(vm.StartBackgroundWorkload, (), {}) for vm in self.vms]
+    vm_util.RunParallelThreads(targets, len(targets))
+
+  def StopBackgroundWorkload(self):
+    targets = [(vm.StopBackgroundWorkload, (), {}) for vm in self.vms]
+    vm_util.RunParallelThreads(targets, len(targets))
+
+  def _IsSafeKeyOrValueCharacter(self, char):
+    return char.isalpha() or char.isnumeric() or char == '_'
+
+  def _SafeLabelKeyOrValue(self, key):
+    result = ''.join(c if self._IsSafeKeyOrValueCharacter(c) else '_'
+                     for c in key.lower())
+
+    # max length contraints on keys and values
+    # https://cloud.google.com/resource-manager/docs/creating-managing-labels
+    max_safe_length = 63
+    return result[:max_safe_length]
+
+  def _GetResourceDict(self, time_format, timeout_minutes=None):
+    """Gets a list of tags to be used to tag resources."""
+    now_utc = datetime.datetime.utcnow()
+
+    if not timeout_minutes:
+      timeout_minutes = FLAGS.timeout_minutes
+
+    timeout_utc = (
+        now_utc +
+        datetime.timedelta(minutes=timeout_minutes))
+
+    tags = {
+        'timeout_utc': timeout_utc.strftime(time_format),
+        'create_time_utc': now_utc.strftime(time_format),
+        'benchmark': self.name,
+        'perfkit_uuid': self.uuid,
+        'owner': FLAGS.owner,
+        'benchmark_uid': self.uid,
+    }
+
+    # add metadata key value pairs
+    metadata_dict = (flag_util.ParseKeyValuePairs(FLAGS.metadata)
+                     if hasattr(FLAGS, 'metadata') else dict())
+    for key, value in metadata_dict.items():
+      tags[self._SafeLabelKeyOrValue(key)] = self._SafeLabelKeyOrValue(value)
+
+    return tags
+
+  def GetResourceTags(self, timeout_minutes=None):
+    """Gets a list of tags to be used to tag resources."""
+    return self._GetResourceDict(METADATA_TIME_FORMAT, timeout_minutes)
+
+  def _CreatePlacementGroup(self, placement_group_spec, cloud):
+    """Create a placement group in zone.
+
+    Args:
+      placement_group_spec: A placement_group.BasePlacementGroupSpec object.
+      cloud: The cloud for the placement group.
+          See the flag of the same name for more information.
+    Returns:
+      A placement_group.BasePlacementGroup object.
+    """
+
+    placement_group_class = placement_group.GetPlacementGroupClass(cloud)
+    if placement_group_class:
+      return placement_group_class(placement_group_spec)
+    else:
+      return None
+
+  def GetVmGroupForVm(self, vm):
+    """Look up and return vm group name for the vm argument."""
+    for group_name in self.vm_groups.keys():
+      if vm in self.vm_groups[group_name]:
+        return group_name
+    return None
+
+  def _CreateVirtualMachine(self, vm_spec, os_type, cloud):
+    """Create a vm in zone.
+
+    Args:
+      vm_spec: A virtual_machine.BaseVmSpec object.
+      os_type: The type of operating system for the VM. See the flag of the
+          same name for more information.
+      cloud: The cloud for the VM. See the flag of the same name for more
+          information.
+    Returns:
+      A virtual_machine.BaseVirtualMachine object.
+    """
+    vm = static_vm.StaticVirtualMachine.GetStaticVirtualMachine()
+    if vm:
+      return vm
+
+    vm_class = virtual_machine.GetVmClass(cloud, os_type)
+    if vm_class is None:
+      raise errors.Error(
+          'VMs of type %s" are not currently supported on cloud "%s".' %
+          (os_type, cloud))
+
+    return vm_class(vm_spec)
+
+  def CreateAndBootVm(self, vm):
+    """Creates a single VM and waits for boot to complete.
+
+    Args:
+        vm: The BaseVirtualMachine object representing the VM.
+    """
+    vm.Create()
+    logging.info('VM: %s', vm.ip_address)
+    logging.info('Waiting for boot completion.')
+    vm.AllowRemoteAccessPorts()
+    vm.WaitForBootCompletion()
+
+  def PrepareVmAfterBoot(self, vm):
+    """Prepares a VM after it has booted.
+
+    This function will prepare a scratch disk if required.
+
+    Args:
+        vm: The BaseVirtualMachine object representing the VM.
+
+    Raises:
+        Exception: If --vm_metadata is malformed.
+    """
+    vm.AddMetadata()
+    vm.OnStartup()
+    # Prepare vm scratch disks:
+    if any((spec.disk_type == disk.LOCAL for spec in vm.disk_specs)):
+      vm.SetupLocalDisks()
+    for disk_spec in vm.disk_specs:
+      if disk_spec.disk_type == disk.RAM:
+        vm.CreateRamDisk(disk_spec)
+      else:
+        vm.CreateScratchDisk(disk_spec)
+    # This must come after Scratch Disk creation to support the
+    # Containerized VM case
+    vm.PrepareVMEnvironment()
+    vm.LowerSecurityScannerPriority()
+
+  def DeleteVm(self, vm):
+    """Deletes a single vm and scratch disk if required.
+
+    Args:
+        vm: The BaseVirtualMachine object representing the VM.
+    """
+    if vm.is_static and vm.install_packages:
+      vm.PackageCleanup()
+    vm.ProxyCleanup()
+    vm.Delete()
+    vm.DeleteScratchDisks()
+
+  @staticmethod
+  def _GetPickleFilename(uid):
+    """Returns the filename for the pickled BenchmarkSpec."""
+    return os.path.join(vm_util.GetTempDir(), uid)
+
+  def Pickle(self, filename=None):
+    """Pickles the spec so that it can be unpickled on a subsequent run."""
+    with open(filename or self._GetPickleFilename(self.uid),
+              'wb') as pickle_file:
+      pickle.dump(self, pickle_file, 2)
+
+  def Freeze(self):
+    """Pickles the spec to a destination, defaulting to tempdir if not found."""
+    if not self.freeze_path:
+      return
+    logging.info('Freezing benchmark_spec to %s', self.freeze_path)
+    try:
+      self.Pickle(self.freeze_path)
+    except FileNotFoundError:
+      default_path = f'{vm_util.GetTempDir()}/restore_spec.pickle'
+      logging.exception('Could not find file path %s, defaulting freeze to %s.',
+                        self.freeze_path, default_path)
+      self.Pickle(default_path)
+
+  @classmethod
+  def GetBenchmarkSpec(cls, benchmark_module, config, uid):
+    """Unpickles or creates a BenchmarkSpec and returns it.
+
+    Args:
+      benchmark_module: The benchmark module object.
+      config: BenchmarkConfigSpec. The configuration for the benchmark.
+      uid: An identifier unique to this run of the benchmark even if the same
+          benchmark is run multiple times with different configs.
+
+    Returns:
+      A BenchmarkSpec object.
+    """
+    if stages.PROVISION in FLAGS.run_stage:
+      return cls(benchmark_module, config, uid)
+
+    try:
+      with open(cls._GetPickleFilename(uid), 'rb') as pickle_file:
+        bm_spec = pickle.load(pickle_file)
+    except Exception as e:  # pylint: disable=broad-except
+      logging.error('Unable to unpickle spec file for benchmark %s.',
+                    benchmark_module.BENCHMARK_NAME)
+      raise e
+    # Always let the spec be deleted after being unpickled so that
+    # it's possible to run cleanup even if cleanup has already run.
+    bm_spec.deleted = False
+    bm_spec.status = benchmark_status.SKIPPED
+    context.SetThreadBenchmarkSpec(bm_spec)
+    return bm_spec
diff --git a/script/cumulus/pkb/perfkitbenchmarker/benchmark_status.py b/script/cumulus/pkb/perfkitbenchmarker/benchmark_status.py
new file mode 100644
index 0000000..f402d9c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/benchmark_status.py
@@ -0,0 +1,142 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Constants and helpers for reporting the success status of each benchmark."""
+
+import os
+from six.moves import zip
+
+SUCCEEDED = 'SUCCEEDED'
+FAILED = 'FAILED'
+SKIPPED = 'SKIPPED'
+
+ALL = SUCCEEDED, FAILED, SKIPPED
+
+_COL_SEPARATOR = '  '
+
+
+class FailedSubstatus(object):
+  """Failure modes for benchmarks."""
+  # Failure due to insufficient quota, user preventable
+  QUOTA = 'QUOTA_EXCEEDED'
+
+  # Failure due to insufficient capacity in the cloud provider, user
+  # non-preventable.
+  INSUFFICIENT_CAPACITY = 'INSUFFICIENT_CAPACITY'
+
+  # Failure during the execution of the benchmark. These are non-retryable,
+  # known failure modes of the benchmark.  It is recommended that the benchmark
+  # be completely re-run.
+  KNOWN_INTERMITTENT = 'KNOWN_INTERMITTENT'
+
+  # Failure due to an interruptible vm being interrupted before the benchmark
+  # completes. User non-preventable.
+  INTERRUPTED = 'INTERRUPTED'
+
+  # Failure due to an unsupported configuration running. Ex. Machine type not
+  # supported in a zone. For retries, this will pick a new region to maximize
+  # the chance of success.
+  UNSUPPORTED = 'UNSUPPORTED'
+
+  # General failure that don't fit in the above categories.
+  UNCATEGORIZED = 'UNCATEGORIZED'
+
+  # Failure when restoring resource.
+  RESTORE_FAILED = 'RESTORE_FAILED'
+
+  # Failure when freezing resource.
+  FREEZE_FAILED = 'FREEZE_FAILED'
+
+  # List of valid substatuses for use with --retries.
+  # UNCATEGORIZED failures are not retryable. To make a specific UNCATEGORIZED
+  # failure retryable, please raise an errors.Benchmarks.KnownIntermittentError.
+  # RESTORE_FAILED/FREEZE_FAILED failures are not retryable since generally
+  # logic for freeze/restore is already retried in the BaseResource
+  # Create()/Delete().
+  RETRYABLE_SUBSTATUSES = [
+      QUOTA, INSUFFICIENT_CAPACITY, KNOWN_INTERMITTENT, INTERRUPTED, UNSUPPORTED
+  ]
+
+
+def _CreateSummaryTable(benchmark_specs):
+  """Converts statuses of benchmark runs into a formatted string table.
+
+  Args:
+    benchmark_specs: List of BenchmarkSpecs.
+
+  Returns:
+    string. Multi-line string summarizing benchmark success statuses. Example:
+        --------------------------------------------------------
+        Name          UID            Status     Failed Substatus
+        --------------------------------------------------------
+        iperf         iperf0         SUCCEEDED
+        iperf         iperf1         FAILED
+        iperf         iperf2         FAILED     QUOTA_EXCEEDED
+        cluster_boot  cluster_boot0  SKIPPED
+        --------------------------------------------------------
+  """
+  run_status_tuples = [(spec.name, spec.uid, spec.status,
+                        spec.failed_substatus if spec.failed_substatus else '')
+                       for spec in benchmark_specs]
+  assert run_status_tuples, ('run_status_tuples must contain at least one '
+                             'element.')
+  col_headers = 'Name', 'UID', 'Status', 'Failed Substatus'
+  col_lengths = []
+  for col_header, col_entries in zip(col_headers,
+                                     list(zip(*run_status_tuples))):
+    max_col_content_length = max(len(entry) for entry in col_entries)
+    col_lengths.append(max(len(col_header), max_col_content_length))
+  line_length = (len(col_headers) - 1) * len(_COL_SEPARATOR) + sum(col_lengths)
+  dash_line = '-' * line_length
+  line_format = _COL_SEPARATOR.join(
+      '{{{0}:<{1}s}}'.format(col_index, col_length)
+      for col_index, col_length in enumerate(col_lengths))
+  msg = [dash_line, line_format.format(*col_headers), dash_line]
+  msg.extend(line_format.format(*row_entries)
+             for row_entries in run_status_tuples)
+  msg.append(dash_line)
+  return os.linesep.join(msg)
+
+
+def CreateSummary(benchmark_specs):
+  """Logs a summary of benchmark run statuses.
+
+  Args:
+    benchmark_specs: List of BenchmarkSpecs.
+
+  Returns:
+    string. Multi-line string summarizing benchmark success statuses. Example:
+        Benchmark run statuses:
+        --------------------------------------------------------
+        Name          UID            Status     Failed Substatus
+        --------------------------------------------------------
+        iperf         iperf0         SUCCEEDED
+        iperf         iperf1         FAILED
+        iperf         iperf2         FAILED     QUOTA_EXCEEDED
+        cluster_boot  cluster_boot0  SKIPPED
+        --------------------------------------------------------
+        Success rate: 25.00% (1/4)
+  """
+  run_status_tuples = [(spec.name, spec.uid, spec.status)
+                       for spec in benchmark_specs]
+  assert run_status_tuples, ('run_status_tuples must contain at least one '
+                             'element.')
+  benchmark_count = len(run_status_tuples)
+  successful_benchmark_count = sum(1 for _, _, status in run_status_tuples
+                                   if status == SUCCEEDED)
+  return os.linesep.join((
+      'Benchmark run statuses:',
+      _CreateSummaryTable(benchmark_specs),
+      'Success rate: {0:.2f}% ({1}/{2})'.format(
+          100. * successful_benchmark_count / benchmark_count,
+          successful_benchmark_count, benchmark_count)))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/capacity_reservation.py b/script/cumulus/pkb/perfkitbenchmarker/capacity_reservation.py
new file mode 100644
index 0000000..dec04b5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/capacity_reservation.py
@@ -0,0 +1,46 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing abstract class for a capacity reservation for VMs."""
+
+from absl import flags
+from perfkitbenchmarker import resource
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_bool('use_capacity_reservations', False,
+                  'Whether to use capacity reservations for virtual '
+                  'machines. Only supported on AWS.')
+
+
+def GetResourceClass(cloud):
+  """Get the CapacityReservation class corresponding to 'cloud'.
+
+  Args:
+    cloud: name of cloud to get the class for.
+
+  Returns:
+    Cloud-specific implementation of BaseCapacityReservation.
+  """
+  return resource.GetResourceClass(BaseCapacityReservation, CLOUD=cloud)
+
+
+class BaseCapacityReservation(resource.BaseResource):
+  """An object representing a CapacityReservation."""
+
+  RESOURCE_TYPE = 'BaseCapacityReservation'
+
+  def __init__(self, vm_group):
+    super(BaseCapacityReservation, self).__init__()
+    self.vm_group = vm_group
diff --git a/script/cumulus/pkb/perfkitbenchmarker/cloud_harmony_util.py b/script/cumulus/pkb/perfkitbenchmarker/cloud_harmony_util.py
new file mode 100644
index 0000000..d01419c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/cloud_harmony_util.py
@@ -0,0 +1,164 @@
+"""Module for Helper methods when working with Cloud Harmony Suite.
+
+https://github.com/cloudharmony
+"""
+
+import io
+from typing import Any, Dict, List, Optional
+
+from absl import flags
+import pandas as pd
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker.providers.gcp import util as gcp_util
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_boolean('ch_store_results', False,
+                     'Whether to store cloudharmony benchmark reports. '
+                     'Defaults to False, can be turned on for production runs. '
+                     'This flag is used to produce benchmark reports.')
+STORE = flags.DEFINE_string('ch_results_store', None,
+                            'Storage to store cloudharmony benchmark reports. '
+                            'Used if ch_store_results is set to True.')
+BUCKET = flags.DEFINE_string('ch_results_bucket', None,
+                             'Bucket to store cloudharmony benchmark reports. '
+                             'Used if ch_store_results is set to True.')
+KEY = flags.DEFINE_string(
+    'ch_results_key', None,
+    'Access key to store cloudharmony benchmark reports. '
+    'Used in conjunction with ch_results_bucket')
+SECRET = flags.DEFINE_string(
+    'ch_results_secret', None,
+    'Access secret to store cloudharmony benchmark reports. '
+    'Used in conjunction with ch_results_bucket')
+
+ITERATIONS = flags.DEFINE_integer(
+    'ch_iterations', 1, 'The number of times to run the test. Multiple test '
+    'iterations will be grouped and saved in the same results resport.')
+
+
+def GetSaveCommand() -> str:
+  """Returns the cloudharmony command to save benchmark reports."""
+  return (f' --db_and_csv --store {STORE.value} --store_key {KEY.value} '
+          f' --store_secret {SECRET.value} --store_container {BUCKET.value} ')
+
+
+def GetRegionFromZone(zone: str) -> str:
+  # only gcp is supported as cloudharmony metadata is exclusive to gcp runs.
+  if FLAGS.cloud == 'GCP':
+    return gcp_util.GetRegionFromZone(zone)
+  else:
+    return zone
+
+
+def ParseCsvResultsIntoMetadata(vm: virtual_machine.BaseVirtualMachine,
+                                path: str) -> List[Dict[str, Any]]:
+  """Loads the CSV created by cloud harmony at path in the VM into metadata.
+
+  The CSV located by path inside of virtual machine VM will be loaded. For each
+  row of results, a set of key/value pairs is created. The keys will all be
+  prepended with `cloudharmony` or similar.
+
+  Args:
+     vm: the Virtual Machine that has run a cloud harmony benchmark
+     path: The path inside of VM which has the CSV file which should be loaded
+  Returns:
+     A list of metadata outputs that should be appended to the samples that are
+     produced by a cloud harmony benchmark.
+  """
+  csv_string, _ = vm.RemoteCommand('cat {path}'.format(path=path))
+
+  return ParseCsvResultsFromString(csv_string)
+
+
+def ParseCsvResultsFromString(csv_string: str,
+                              prefix: str = '') -> List[Dict[str, Any]]:
+  """Loads the CSV created by cloud harmony in csv_string.
+
+  The CSV will be loaded into a pandas data frame.
+  For every row of results - we will create a set of key/value pairs
+  representing that row of results.  The keys will all be prepended with
+  prefix.
+
+  Args:
+     csv_string:  a string of the CSV which was produced by cloud_harmony
+     prefix: a string prefix to attach to the metadata. Defaults to empty
+     string. It can be set to a unique string if cloudharmony data is
+     attached to every sample instead of being its own sample.
+  Returns:
+     A list of metadata dictionaries, where each dict represents one row of
+     results (an iteration) in the csv string.
+  """
+  data_frame = pd.read_csv(io.StringIO(csv_string)).fillna('')
+  number_of_rows = len(data_frame.index)
+
+  results = []
+  # one row = one test result
+  for row in range(number_of_rows):
+    result = {}
+    for column in data_frame.columns:
+      key = column
+      value = data_frame[column][row]
+      result_key = f'{prefix}_{key}' if prefix else key
+      result[result_key] = value
+    results.append(result)
+
+  return results
+
+
+def GetCommonMetadata(custom_metadata: Optional[Dict[str, Any]] = None) -> str:
+  """Returns pkb metadata associated with this run as cloudharmony metadata.
+
+  Cloudharmony benchmarks take in benchmark setup configurations as inputs and
+  include them in the output as metadata for the run. This function creates a
+  string of input metadata from pkb flags to be included as run parameter for
+  cloudharmony benchmarks.
+
+  Args:
+     custom_metadata: a dictionary of metadata key value pairs that should
+     override any flag chosen in the function, or should also be included.
+  Returns:
+     A string of metadata that should be appended to the cloudharmony
+     benchmark run.
+  """
+  if FLAGS.cloud != providers.GCP:
+    # Should not be including cloudharmony metadata for non-gcp runs.
+    return ''
+
+  metadata = {
+      'meta_compute_service': 'Google Compute Engine',
+      'meta_compute_service_id': 'google:compute',
+      'meta_instance_id': FLAGS.machine_type,
+      'meta_provider': 'Google Cloud Platform',
+      'meta_provider_id': 'google',
+      'meta_region': gcp_util.GetRegionFromZone(FLAGS.zone[0]),
+      'meta_zone': FLAGS.zone[0],
+      'meta_test_id': FLAGS.run_uri,
+  }
+  if custom_metadata:
+    metadata.update(custom_metadata)
+
+  metadata_pair = [f'--{key} {value}' for key, value in metadata.items()]
+  return ' '.join(metadata_pair)
+
+
+def GetMetadataSamples(
+    cloud_harmony_metadata: List[Dict[Any, Any]]) -> List[sample.Sample]:
+  """Returns the cloudharmony metadata as a list of samples.
+
+  This function is commonly used across all cloudharmony benchmarks.
+
+  Args:
+    cloud_harmony_metadata: list of metadata outputs to save in samples.
+
+  Returns:
+    A list of sample.Sample objects of cloudharmony metadata, where one sample
+    represents one row of csv results (one row = one test iteration).
+
+  """
+  samples = []
+  for result in cloud_harmony_metadata:
+    samples.append(sample.Sample('cloudharmony_output', '', '', result))
+  return samples
diff --git a/script/cumulus/pkb/perfkitbenchmarker/cloud_tpu.py b/script/cumulus/pkb/perfkitbenchmarker/cloud_tpu.py
new file mode 100644
index 0000000..2ab85f5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/cloud_tpu.py
@@ -0,0 +1,124 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for TPU."""
+
+import abc
+
+from absl import flags
+from perfkitbenchmarker import resource
+
+
+flags.DEFINE_string('tpu_cidr_range', None, """CIDR Range for the TPU. The IP
+                    range that the TPU will select an IP address from. Must be
+                    in CIDR notation and a /29 range, for example
+                    192.168.0.0/29. Errors will occur if the CIDR range has
+                    already been used for a currently existing TPU, the CIDR
+                    range conflicts with any networks in the user's provided
+                    network, or the provided network is peered with another
+                    network that is using that CIDR range.""")
+flags.DEFINE_string('tpu_accelerator_type', 'tpu-v2',
+                    'TPU accelerator type for the TPU.')
+flags.DEFINE_string('tpu_description', None,
+                    'Specifies a text description of the TPU.')
+flags.DEFINE_string('tpu_network', None,
+                    'Specifies the network that this TPU will be a part of.')
+flags.DEFINE_string('tpu_tf_version', None,
+                    'TensorFlow version for the TPU.')
+flags.DEFINE_string('tpu_zone', None,
+                    'The zone of the tpu to create. Zone in which TPU lives.')
+flags.DEFINE_string('tpu_name', None,
+                    'The name of the TPU to create.')
+flags.DEFINE_boolean('tpu_preemptible', False,
+                     'Use preemptible TPU or not.')
+flags.DEFINE_integer('tpu_cores_per_donut', 8,
+                     'The number of cores per TPU donut. This is 8 because each'
+                     ' TPU has 4 chips each with 2 cores.')
+
+FLAGS = flags.FLAGS
+
+
+def GetTpuClass(cloud):
+  """Gets the TPU class corresponding to 'cloud'.
+
+  Args:
+    cloud: String. name of cloud to get the class for.
+
+  Returns:
+    Implementation class corresponding to the argument cloud
+
+  Raises:
+    Exception: An invalid TPU was provided
+  """
+  return resource.GetResourceClass(BaseTpu, CLOUD=cloud)
+
+
+class BaseTpu(resource.BaseResource):
+  """Object representing a TPU."""
+
+  RESOURCE_TYPE = 'BaseTpu'
+
+  def __init__(self, tpu_spec):
+    """Initialize the TPU object.
+
+    Args:
+      tpu_spec: spec of the TPU.
+    """
+    super(BaseTpu, self).__init__()
+    self.spec = tpu_spec
+
+  def _Create(self):
+    """Creates the TPU."""
+    raise NotImplementedError()
+
+  def _Delete(self):
+    """Deletes the TPU."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def GetName(self):
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def GetMasterGrpcAddress(self):
+    """Gets the master grpc address of the TPU."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def GetNumShards(self):
+    """Gets the number of TPU shards."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def GetZone(self):
+    """Gets the TPU zone."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def GetAcceleratorType(self):
+    """Gets the TPU accelerator type."""
+    raise NotImplementedError()
+
+  def GetResourceMetadata(self):
+    """Returns a dictionary of cluster metadata."""
+    metadata = {
+        'cidr_range': self.spec.tpu_cidr_range,
+        'accelerator_type': self.spec.tpu_accelerator_type,
+        'description': self.spec.tpu_description,
+        'network': self.spec.tpu_network,
+        'tf_version': self.spec.tpu_tf_version,
+        'zone': self.spec.tpu_zone,
+        'name': self.spec.tpu_name,
+        'preemptible': self.spec.tpu_preemptible
+    }
+    return metadata
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/configs/__init__.py
new file mode 100644
index 0000000..9ad39c1
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/__init__.py
@@ -0,0 +1,325 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Configuration files for benchmarks.
+
+Each benchmark has a default configuration defined inside its module.
+The configuration is written in YAML (www.yaml.org) and specifies what
+resources are needed to run the benchmark. Users can write their own
+config files, which will be merged with the default configuration. These
+config files specify overrides to the default configuration. Users can also
+specify which benchmarks to run in the same config file.
+
+Valid top level keys:
+  benchmarks: A YAML array of dictionaries mapping benchmark names to their
+      configs. This also determines which benchmarks to run.
+  flags: A YAML dictionary with overrides for default flag values. Benchmark
+      config specific flags override those specified here.
+  *any_benchmark_name*: If the 'benchmarks' key is not specified, then
+      specifying a benchmark name mapped to a config will override
+      that benchmark's default configuration in the event that that
+      benchmark is run.
+
+Valid config keys:
+  vm_groups: A YAML dictionary mapping the names of VM groups to the groups
+      themselves. These names can be any string.
+  description: A description of the benchmark.
+  flags: A YAML dictionary with overrides for default flag values.
+
+Valid VM group keys:
+  vm_spec: A YAML dictionary mapping names of clouds (e.g. AWS) to the
+      actual VM spec.
+  disk_spec: A YAML dictionary mapping names of clouds to the actual
+      disk spec.
+  vm_count: The number of VMs to create in this group. If this key isn't
+      specified, it defaults to 1.
+  disk_count: The number of disks to attach to VMs of this group. If this key
+      isn't specified, it defaults to 1.
+  cloud: The name of the cloud to create the group in. This is used for
+      multi-cloud configurations.
+  os_type: The OS type of the VMs to create (see the flag of the same name for
+      more information). This is used if you want to run a benchmark using VMs
+      with different OS types (e.g. Debian and RHEL).
+  static_vms: A YAML array of Static VM specs. These VMs will be used before
+      any Cloud VMs are created. The total number of VMs will still add up to
+      the number specified by the 'vm_count' key.
+
+For valid VM spec keys, see virtual_machine.BaseVmSpec and derived classes.
+For valid disk spec keys, see disk.BaseDiskSpec and derived classes.
+
+See configs.spec.BaseSpec for more information about adding additional keys to
+VM specs, disk specs, or any component of the benchmark configuration
+dictionary.
+"""
+
+import copy
+import functools
+import json
+import logging
+import re
+
+from absl import flags
+import contextlib2
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+import six
+import yaml
+
+
+FLAGS = flags.FLAGS
+CONFIG_CONSTANTS = 'default_config_constants.yaml'
+FLAGS_KEY = 'flags'
+IMPORT_REGEX = re.compile('^#import (.*)')
+
+flags.DEFINE_string('benchmark_config_file', None,
+                    'The file path to the user config file which will '
+                    'override benchmark defaults. This should either be '
+                    'a path relative to the current working directory, '
+                    'an absolute path, or just the name of a file in the '
+                    'configs/ directory.')
+flags.DEFINE_multi_string(
+    'config_override', None,
+    'This flag can be used to override any config value. It is applied after '
+    'the user config (specified via --benchmark_config_file_path), so it has '
+    'a higher priority than that config. The value of the flag should be '
+    'fully.qualified.key=value (e.g. --config_override=cluster_boot.vm_groups.'
+    'default.vm_count=4).')
+
+
+class _ConcatenatedFiles(object):
+  """Class that presents several files as a single object.
+
+  The class exposes a single method (read) which is all that yaml
+  needs to interact with a stream.
+
+  Attributes:
+    files: A list of opened file objects.
+    current_file_index: The index of the current file that is being read from.
+  """
+
+  def __init__(self, files):
+    self.files = files
+    self.current_file_index = 0
+
+  def read(self, length):
+    data = self.files[self.current_file_index].read(length)
+    while (not data) and (self.current_file_index + 1 < len(self.files)):
+      self.current_file_index += 1
+      data = self.files[self.current_file_index].read(length)
+    return data
+
+
+def _GetImportFiles(config_file, imported_set=None):
+  """Get a list of file names that get imported from config_file.
+
+  Args:
+    config_file: The name of a config file to find imports for.
+    imported_set: A set of files that _GetImportFiles has already
+        been called on that should be ignored.
+
+  Returns:
+    A list of file names that are imported by config_file
+    (including config_file itself).
+  """
+  imported_set = imported_set or set()
+  config_path = data.ResourcePath(config_file)
+  # Give up on circular imports.
+  if config_path in imported_set:
+    return []
+  imported_set.add(config_path)
+
+  with open(config_path) as f:
+    line = f.readline()
+    match = IMPORT_REGEX.match(line)
+    import_files = []
+    while match:
+      import_file = match.group(1)
+      for file_name in _GetImportFiles(import_file, imported_set):
+        if file_name not in import_files:
+          import_files.append(file_name)
+      line = f.readline()
+      match = IMPORT_REGEX.match(line)
+    import_files.append(config_path)
+    return import_files
+
+
+def _LoadUserConfig(path):
+  """Loads a user config from the supplied path."""
+  config_files = _GetImportFiles(path)
+  with contextlib2.ExitStack() as stack:
+    files = [stack.enter_context(open(f)) for f in config_files]
+    return yaml.safe_load(_ConcatenatedFiles(files))
+
+
+@functools.lru_cache()
+def _LoadConfigConstants():
+  """Reads the config constants file."""
+  with open(data.ResourcePath(CONFIG_CONSTANTS, False)) as fp:
+    return fp.read()
+
+
+def _GetConfigFromOverrides(overrides):
+  """Converts a list of overrides into a config."""
+  config = {}
+
+  for override in overrides:
+    if override.count('=') != 1:
+      raise ValueError('--config_override flag value has incorrect number of '
+                       '"=" characters. The value must take the form '
+                       'fully.qualified.key=value.')
+    full_key, value = override.split('=')
+    keys = full_key.split('.')
+    new_config = {keys.pop(): yaml.safe_load(value)}
+    while keys:
+      new_config = {keys.pop(): new_config}
+    config = MergeConfigs(config, new_config)
+
+  return config
+
+
+@functools.lru_cache()
+def GetConfigFlags():
+  """Returns the global flags from the user config."""
+  return GetUserConfig().get(FLAGS_KEY, {})
+
+
+def GetUserConfig():
+  """Returns the user config with any overrides applied.
+
+  This loads config from --benchmark_config_file and merges it with
+  any overrides specified via --config_override and returns the result.
+
+  Returns:
+    dict. The result of merging the loaded config from the
+    --benchmark_config_file flag with the config generated from the
+    --config override flag.
+  """
+  try:
+    if FLAGS.benchmark_config_file:
+      config = _LoadUserConfig(FLAGS.benchmark_config_file)
+    else:
+      config = {}
+
+    if FLAGS.config_override:
+      override_config = _GetConfigFromOverrides(FLAGS.config_override)
+      config = MergeConfigs(config, override_config)
+
+  except yaml.parser.ParserError as e:
+    raise errors.Config.ParseError(
+        'Encountered a problem loading config. Please ensure that the config '
+        'is valid YAML. Error received:\n%s' % e)
+  except yaml.composer.ComposerError as e:
+    raise errors.Config.ParseError(
+        'Encountered a problem loading config. Please ensure that all '
+        'references are defined. Error received:\n%s' % e)
+
+  return config
+
+
+def MergeConfigs(default_config, override_config, warn_new_key=False):
+  """Merges the override config into the default config.
+
+  This function will recursively merge two nested dicts.
+  The override_config represents overrides to the default_config dict, so any
+  leaf key/value pairs which are present in both dicts will take their value
+  from the override_config.
+
+  Args:
+    default_config: The dict which will have its values overridden.
+    override_config: The dict wich contains the overrides.
+    warn_new_key: Determines whether we warn the user if the override config
+      has a key that the default config did not have.
+
+  Returns:
+    A dict containing the values from the default_config merged with those from
+    the override_config.
+  """
+  def _Merge(d1, d2):
+    """Merge two nested dicts."""
+    merged_dict = copy.deepcopy(d1)
+    for k, v in six.iteritems(d2):
+      if k not in d1:
+        merged_dict[k] = copy.deepcopy(v)
+        if warn_new_key:
+          logging.warning('The key "%s" was not in the default config, '
+                          'but was in user overrides. This may indicate '
+                          'a typo.', k)
+      elif isinstance(d1[k], dict) and isinstance(v, dict):
+        merged_dict[k] = _Merge(d1[k], v)
+      else:
+        merged_dict[k] = v
+    return merged_dict
+
+  if override_config:
+    return _Merge(default_config, override_config)
+  else:
+    return default_config
+
+
+def LoadMinimalConfig(benchmark_config, benchmark_name):
+  """Loads a benchmark config without using any flags in the process.
+
+  This function will prepend configs/default_config_constants.yaml to the
+  benchmark config prior to loading it. This allows the config to use
+  references to anchors defined in the constants file.
+
+  Args:
+    benchmark_config: str. The default config in YAML format.
+    benchmark_name: str. The name of the benchmark.
+
+  Returns:
+    dict. The loaded config.
+  """
+  yaml_config = []
+  yaml_config.append(_LoadConfigConstants())
+  yaml_config.append(benchmark_config)
+
+  try:
+    config = yaml.safe_load('\n'.join(yaml_config))
+  except yaml.parser.ParserError as e:
+    raise errors.Config.ParseError(
+        'Encountered a problem loading the default benchmark config. Please '
+        'ensure that the config is valid YAML. Error received:\n%s' % e)
+  except yaml.composer.ComposerError as e:
+    raise errors.Config.ParseError(
+        'Encountered a problem loading the default benchmark config. Please '
+        'ensure that all references are defined. Error received:\n%s' % e)
+
+  config = config[benchmark_name]
+  # yaml safe_parse parses anchor by reference and return the same
+  # object when the same anchor is used multiple times.
+  # Seralize and deserialize to make sure all objects in the dictionary are
+  # unique.
+  config = json.loads(json.dumps(config))
+  return config
+
+
+def LoadConfig(benchmark_config, user_config, benchmark_name):
+  """Loads a benchmark configuration.
+
+  This function loads a benchmark's default configuration (in YAML format),
+  then merges it with any overrides the user provided, and returns the result.
+  This loaded config is then passed to the benchmark_spec.BenchmarkSpec
+  constructor in order to create a BenchmarkSpec.
+
+  Args:
+    benchmark_config: str. The default configuration in YAML format.
+    user_config: dict. The loaded user config for the benchmark.
+    benchmark_name: str. The name of the benchmark.
+
+  Returns:
+    dict. The loaded config.
+  """
+  config = LoadMinimalConfig(benchmark_config, benchmark_name)
+  config = MergeConfigs(config, user_config, warn_new_key=True)
+  return config
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/benchmark_config_spec.py b/script/cumulus/pkb/perfkitbenchmarker/configs/benchmark_config_spec.py
new file mode 100644
index 0000000..209f749
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/benchmark_config_spec.py
@@ -0,0 +1,2120 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Classes that verify and transform benchmark configuration input.
+
+See perfkitbenchmarker/configs/__init__.py for more information about
+configuration files.
+"""
+
+import contextlib
+import logging
+import os
+
+from perfkitbenchmarker import app_service
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import data_discovery_service
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import non_relational_db
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import spark_service
+from perfkitbenchmarker import sql_engine_utils
+from perfkitbenchmarker import static_virtual_machine
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+from perfkitbenchmarker.dpb_service import BaseDpbService
+from perfkitbenchmarker.providers.gcp import gcp_spanner
+import six
+
+_DEFAULT_DISK_COUNT = 1
+_DEFAULT_VM_COUNT = 1
+
+
+class _DpbApplicationListDecoder(option_decoders.ListDecoder):
+  """Decodes the list of applications to be enabled on the dpb service."""
+
+  def __init__(self, **kwargs):
+    super(_DpbApplicationListDecoder, self).__init__(
+        default=None,
+        item_decoder=option_decoders.EnumDecoder(
+            [dpb_service.FLINK, dpb_service.HIVE]),
+        **kwargs)
+
+
+class _DpbServiceDecoder(option_decoders.TypeVerifier):
+  """Validates the dpb service dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_DpbServiceDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies dpb service dictionary of a benchmark config object.
+
+    Args:
+      value: dict Dpb Service config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _DpbServiceSpec Build from the config passed in in value.
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    dpb_service_config = super(_DpbServiceDecoder,
+                               self).Decode(value, component_full_name,
+                                            flag_values)
+
+    if (dpb_service_config['service_type'] == dpb_service.EMR and
+        component_full_name == 'dpb_wordcount_benchmark'):
+      if flag_values.dpb_wordcount_fs != BaseDpbService.S3_FS:
+        raise errors.Config.InvalidValue('EMR service requires S3.')
+    result = _DpbServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **dpb_service_config)
+    return result
+
+
+class _DpbServiceSpec(spec.BaseSpec):
+  """Configurable options of an Distributed Processing Backend Service.
+
+    We may add more options here, such as disk specs, as necessary.
+    When there are flags for these attributes, the convention is that
+    the flag is prefixed with dpb.
+    Attributes:
+      service_type: string.  pkb_managed or dataflow,dataproc,emr, etc.
+      static_dpb_service_instance: if user has pre created a container, the id
+      worker_group: Vm group spec for workers.
+      worker_count: the number of workers part of the dpb service
+      applications: An enumerated list of applications that need to be enabled
+        on the dpb service
+      version: string. The version of software to install inside the service.
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_DpbServiceSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict)
+      pair. The pair specifies a decoder class and its __init__() keyword
+      arguments to construct in order to decode the named option.
+    """
+    result = super(_DpbServiceSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'static_dpb_service_instance': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'service_type': (
+            option_decoders.EnumDecoder,
+            {
+                'default':
+                    dpb_service.DATAPROC,
+                'valid_values': [
+                    dpb_service.DATAPROC,
+                    dpb_service.DATAPROC_GKE,
+                    dpb_service.DATAPROC_SERVERLESS,
+                    dpb_service.DATAFLOW,
+                    dpb_service.EMR,
+                    dpb_service.UNMANAGED_DPB_SVC_YARN_CLUSTER,
+                    dpb_service.UNMANAGED_SPARK_CLUSTER,
+                    dpb_service.KUBERNETES_SPARK_CLUSTER,
+                ]
+            }),
+        'worker_group': (_VmGroupSpecDecoder, {}),
+        'worker_count': (option_decoders.IntDecoder, {
+            'default': dpb_service.DEFAULT_WORKER_COUNT,
+            'min': 0
+        }),
+        'applications': (_DpbApplicationListDecoder, {}),
+        'version': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'gke_cluster_name': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'gke_cluster_nodepools': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'gke_cluster_location': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'dataproc_serverless_core_count': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True,
+        }),
+        'dataproc_serverless_initial_executors': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'dataproc_serverless_min_executors': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'dataproc_serverless_max_executors': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_DpbServiceSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['static_dpb_service_instance'].present:
+      config_values['static_dpb_service_instance'] = (
+          flag_values.static_dpb_service_instance)
+    # TODO(saksena): Update the documentation for zones assignment
+    if flag_values['zones'].present:
+      group = 'worker_group'
+      if group in config_values:
+        for cloud in config_values[group]['vm_spec']:
+          config_values[group]['vm_spec'][cloud]['zone'] = (
+              flag_values.zones[0])
+
+
+class _TpuGroupSpec(spec.BaseSpec):
+  """Configurable options of a TPU."""
+
+  def __init__(self,
+               component_full_name,
+               group_name,
+               flag_values=None,
+               **kwargs):
+    super(_TpuGroupSpec, self).__init__(
+        '{0}.{1}'.format(component_full_name, group_name),
+        flag_values=flag_values,
+        **kwargs)
+    if not self.tpu_name:
+      self.tpu_name = 'pkb-tpu-{group_name}-{run_uri}'.format(
+          group_name=group_name, run_uri=flag_values.run_uri)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_TpuGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'tpu_cidr_range': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_accelerator_type': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_description': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_network': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_tf_version': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_zone': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_name': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'tpu_preemptible': (option_decoders.BooleanDecoder, {
+            'default': False
+        })
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_TpuGroupSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present:
+      config_values['cloud'] = flag_values.cloud
+    if flag_values['tpu_cidr_range'].present:
+      config_values['tpu_cidr_range'] = flag_values.tpu_cidr_range
+    if flag_values['tpu_accelerator_type'].present:
+      config_values['tpu_accelerator_type'] = flag_values.tpu_accelerator_type
+    if flag_values['tpu_description'].present:
+      config_values['tpu_description'] = flag_values.tpu_description
+    if flag_values['tpu_network'].present:
+      config_values['tpu_network'] = flag_values.tpu_network
+    if flag_values['tpu_tf_version'].present:
+      config_values['tpu_tf_version'] = flag_values.tpu_tf_version
+    if flag_values['tpu_zone'].present:
+      config_values['tpu_zone'] = flag_values.tpu_zone
+    if flag_values['tpu_name'].present:
+      config_values['tpu_name'] = flag_values.tpu_name
+    if flag_values['tpu_preemptible'].present:
+      config_values['tpu_preemptible'] = flag_values.tpu_preemptible
+
+
+class _EdwServiceDecoder(option_decoders.TypeVerifier):
+  """Validates the edw service dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_EdwServiceDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies edw service dictionary of a benchmark config object.
+
+    Args:
+      value: dict edw service config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _EdwServiceSpec Built from the config passed in in value.
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    edw_service_config = super(_EdwServiceDecoder,
+                               self).Decode(value, component_full_name,
+                                            flag_values)
+    result = _EdwServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **edw_service_config)
+    return result
+
+
+class _EdwServiceSpec(spec.BaseSpec):
+  """Configurable options of an EDW service.
+
+    When there are flags for these attributes, the convention is that
+    the flag is prefixed with edw_service.
+
+  Attributes:
+    cluster_name  : string. If set, the name of the cluster
+    type: string. The type of EDW service (redshift)
+    node_type: string, type of node comprising the cluster
+    node_count: integer, number of nodes in the cluster
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_EdwServiceSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments to
+      construct in order to decode the named option.
+    """
+    result = super(_EdwServiceSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'type': (option_decoders.StringDecoder, {
+            'default': 'redshift',
+            'none_ok': False
+        }),
+        'cluster_identifier': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'endpoint': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'concurrency': (option_decoders.IntDecoder, {
+            'default': 5,
+            'none_ok': True
+        }),
+        'db': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'user': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'password': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'node_type': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'node_count': (option_decoders.IntDecoder, {
+            'default': edw_service.DEFAULT_NUMBER_OF_NODES,
+            'min': edw_service.DEFAULT_NUMBER_OF_NODES
+        }),
+        'snapshot': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'cluster_subnet_group': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'cluster_parameter_group': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'resource_group': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'server_name': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'iam_role': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        })
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+        Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_EdwServiceSpec, cls)._ApplyFlags(config_values, flag_values)
+    # TODO(saksena): Add cluster_subnet_group and cluster_parameter_group flags
+    # Restoring from a snapshot, so defer to the user supplied cluster details
+    if flag_values['edw_service_cluster_snapshot'].present:
+      config_values['snapshot'] = flag_values.edw_service_cluster_snapshot
+    if flag_values['edw_service_cluster_identifier'].present:
+      config_values['cluster_identifier'] = (
+          flag_values.edw_service_cluster_identifier)
+    if flag_values['edw_service_endpoint'].present:
+      config_values['endpoint'] = flag_values.edw_service_endpoint
+    if flag_values['edw_service_cluster_concurrency'].present:
+      config_values['concurrency'] = flag_values.edw_service_cluster_concurrency
+    if flag_values['edw_service_cluster_db'].present:
+      config_values['db'] = flag_values.edw_service_cluster_db
+    if flag_values['edw_service_cluster_user'].present:
+      config_values['user'] = flag_values.edw_service_cluster_user
+    if flag_values['edw_service_cluster_password'].present:
+      config_values['password'] = flag_values.edw_service_cluster_password
+
+
+class _StaticVmDecoder(option_decoders.TypeVerifier):
+  """Decodes an item of the static_vms list of a VM group config object."""
+
+  def __init__(self, **kwargs):
+    super(_StaticVmDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Decodes an item of the static_vms list of a VM group config object.
+
+    Args:
+      value: dict mapping static VM config option name string to corresponding
+        option value.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      StaticVmSpec decoded from the input dict.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    input_dict = super(_StaticVmDecoder,
+                       self).Decode(value, component_full_name, flag_values)
+    return static_virtual_machine.StaticVmSpec(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **input_dict)
+
+
+class _StaticVmListDecoder(option_decoders.ListDecoder):
+  """Decodes the static_vms list of a VM group config object."""
+
+  def __init__(self, **kwargs):
+    super(_StaticVmListDecoder, self).__init__(
+        default=list, item_decoder=_StaticVmDecoder(), **kwargs)
+
+
+class _RelationalDbSpec(spec.BaseSpec):
+  """Configurable options of a database service."""
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_RelationalDbSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    # TODO(ferneyhough): This is a lot of boilerplate, and is repeated
+    # below in VmGroupSpec. See if some can be consolidated. Maybe we can
+    # specify a VmGroupSpec instead of both vm_spec and disk_spec.
+    ignore_package_requirements = (
+        getattr(flag_values, 'ignore_package_requirements', True)
+        if flag_values else True)
+    providers.LoadProvider(self.cloud, ignore_package_requirements)
+
+    if self.db_disk_spec:
+      disk_config = getattr(self.db_disk_spec, self.cloud, None)
+      if disk_config is None:
+        raise errors.Config.MissingOption(
+            '{0}.cloud is "{1}", but {0}.db_disk_spec does not contain a '
+            'configuration for "{1}".'.format(component_full_name, self.cloud))
+      disk_spec_class = disk.GetDiskSpecClass(self.cloud)
+      self.db_disk_spec = disk_spec_class(
+          '{0}.db_disk_spec.{1}'.format(component_full_name, self.cloud),
+          flag_values=flag_values,
+          **disk_config)
+
+    db_vm_config = getattr(self.db_spec, self.cloud, None)
+    if db_vm_config is None:
+      raise errors.Config.MissingOption(
+          '{0}.cloud is "{1}", but {0}.db_spec does not contain a '
+          'configuration for "{1}".'.format(component_full_name, self.cloud))
+    db_vm_spec_class = virtual_machine.GetVmSpecClass(self.cloud)
+    self.db_spec = db_vm_spec_class(
+        '{0}.db_spec.{1}'.format(component_full_name, self.cloud),
+        flag_values=flag_values,
+        **db_vm_config)
+
+    # Set defaults that were not able to be set in
+    # GetOptionDecoderConstructions()
+    if not self.engine_version:
+      db_class = relational_db.GetRelationalDbClass(self.cloud,
+                                                    self.is_managed_db,
+                                                    self.engine)
+      self.engine_version = db_class.GetDefaultEngineVersion(self.engine)
+    if not self.database_name:
+      self.database_name = 'pkb-db-%s' % flag_values.run_uri
+    if not self.database_username:
+      self.database_username = 'pkb%s' % flag_values.run_uri
+    if not self.database_password:
+      self.database_password = relational_db.GenerateRandomDbPassword()
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_RelationalDbSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'engine': (option_decoders.EnumDecoder, {
+            'valid_values': sql_engine_utils.ALL_ENGINES,
+        }),
+        'zones': (option_decoders.ListDecoder, {
+            'item_decoder': option_decoders.StringDecoder(),
+            'default': None
+        }),
+        'engine_version': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'database_name': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'database_password': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'database_username': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'high_availability': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+        'backup_enabled': (option_decoders.BooleanDecoder, {
+            'default': True
+        }),
+        'backup_start_time': (option_decoders.StringDecoder, {
+            'default': '07:00'
+        }),
+        'is_managed_db': (option_decoders.BooleanDecoder, {'default': True}),
+        'db_spec': (option_decoders.PerCloudConfigDecoder, {}),
+        'db_disk_spec': (option_decoders.PerCloudConfigDecoder, {}),
+        'vm_groups': (_VmGroupsDecoder, {
+            'default': {}
+        }),
+        'db_flags': (option_decoders.ListDecoder, {
+            'item_decoder': option_decoders.StringDecoder(),
+            'default': None
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    # TODO(ferneyhough): Add flags for db_disk_spec.
+    # Currently the only way to modify the disk spec of the
+    # db is to change the benchmark spec in the benchmark source code
+    # itself.
+    super(_RelationalDbSpec, cls)._ApplyFlags(config_values, flag_values)
+
+    # TODO(jerlawson): Rename flags 'managed_db_' -> 'db_'.
+    has_db_machine_type = flag_values['managed_db_machine_type'].present
+    has_db_cpus = flag_values['managed_db_cpus'].present
+    has_db_memory = flag_values['managed_db_memory'].present
+    has_custom_machine_type = has_db_cpus and has_db_memory
+    has_client_machine_type = flag_values['client_vm_machine_type'].present
+    has_client_vm_cpus = flag_values['client_vm_cpus'].present
+    has_client_vm_memory = flag_values['client_vm_memory'].present
+    has_client_custom_machine_type = has_client_vm_cpus and has_client_vm_memory
+
+    if has_custom_machine_type and has_db_machine_type:
+      raise errors.Config.UnrecognizedOption(
+          'db_cpus/db_memory can not be specified with '
+          'db_machine_type.   Either specify a custom machine '
+          'with cpus and memory or specify a predefined machine type.')
+
+    if (not has_custom_machine_type and (has_db_cpus or has_db_memory)):
+      raise errors.Config.MissingOption(
+          'To specify a custom database machine instance, both managed_db_cpus '
+          'and managed_db_memory must be specified.')
+
+    if has_client_custom_machine_type and has_client_machine_type:
+      raise errors.Config.UnrecognizedOption(
+          'client_vm_cpus/client_vm_memory can not be specified with '
+          'client_vm_machine_type.   Either specify a custom machine '
+          'with cpus and memory or specify a predefined machine type.')
+
+    if (not has_client_custom_machine_type and
+        (has_client_vm_cpus or has_client_vm_memory)):
+      raise errors.Config.MissingOption(
+          'To specify a custom client VM, both client_vm_cpus '
+          'and client_vm_memory must be specified.')
+
+    if flag_values['use_managed_db'].present:
+      config_values['is_managed_db'] = flag_values.use_managed_db
+
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+    if flag_values['managed_db_engine'].present:
+      config_values['engine'] = flag_values.managed_db_engine
+    if flag_values['managed_db_engine_version'].present:
+      config_values['engine_version'] = flag_values.managed_db_engine_version
+    if flag_values['managed_db_database_name'].present:
+      config_values['database_name'] = flag_values.managed_db_database_name
+    if flag_values['managed_db_database_username'].present:
+      config_values['database_username'] = (
+          flag_values.managed_db_database_username)
+    if flag_values['managed_db_database_password'].present:
+      config_values['database_password'] = (
+          flag_values.managed_db_database_password)
+    if flag_values['managed_db_high_availability'].present:
+      config_values['high_availability'] = (
+          flag_values.managed_db_high_availability)
+    if flag_values['managed_db_backup_enabled'].present:
+      config_values['backup_enabled'] = (flag_values.managed_db_backup_enabled)
+    if flag_values['managed_db_backup_start_time'].present:
+      config_values['backup_start_time'] = (
+          flag_values.managed_db_backup_start_time)
+    if flag_values['db_flags'].present:
+      config_values['db_flags'] = flag_values.db_flags
+    cloud = config_values['cloud']
+    has_unmanaged_dbs = ('vm_groups' in config_values and
+                         'servers' in config_values['vm_groups'])
+
+    if flag_values['managed_db_zone'].present:
+      config_values['db_spec'][cloud]['zone'] = flag_values.managed_db_zone[0]
+      config_values['zones'] = flag_values.managed_db_zone
+      if has_unmanaged_dbs:
+        config_values['vm_groups']['servers']['vm_spec'][cloud]['zone'] = (
+            flag_values.managed_db_zone[0])
+    if flag_values['client_vm_zone'].present:
+      config_values['vm_groups']['clients']['vm_spec'][cloud]['zone'] = (
+          flag_values.client_vm_zone)
+    if has_db_machine_type:
+      config_values['db_spec'][cloud]['machine_type'] = (
+          flag_values.managed_db_machine_type)
+      if has_unmanaged_dbs:
+        config_values['vm_groups']['servers']['vm_spec'][cloud][
+            'machine_type'] = (
+                flag_values.managed_db_machine_type)
+    if has_custom_machine_type:
+      config_values['db_spec'][cloud]['machine_type'] = {
+          'cpus': flag_values.managed_db_cpus,
+          'memory': flag_values.managed_db_memory
+      }
+      # tox and pylint have contradictory closing brace rules, so avoid having
+      # opening and closing brackets on different lines.
+      config_values_vm_groups = config_values['vm_groups']
+      if has_unmanaged_dbs:
+        config_values_vm_groups['servers']['vm_spec'][cloud]['machine_type'] = {
+            'cpus': flag_values.managed_db_cpus,
+            'memory': flag_values.managed_db_memory
+        }
+    if flag_values['managed_db_azure_compute_units'].present:
+      config_values['db_spec'][cloud]['machine_type']['compute_units'] = (
+          flag_values.managed_db_azure_compute_units)
+    if flag_values['managed_db_tier'].present:
+      config_values['db_spec'][cloud]['machine_type']['tier'] = (
+          flag_values.managed_db_tier)
+    if has_client_machine_type:
+      config_values['vm_groups']['clients']['vm_spec'][cloud][
+          'machine_type'] = (
+              flag_values.client_vm_machine_type)
+    if has_client_custom_machine_type:
+      config_values_vm_groups = config_values['vm_groups']
+      config_values_vm_groups['clients']['vm_spec'][cloud]['machine_type'] = {
+          'cpus': flag_values.client_vm_cpus,
+          'memory': flag_values.client_vm_memory
+      }
+    if flag_values[
+        'db_num_striped_disks'].present and has_unmanaged_dbs:
+      config_values['vm_groups']['servers']['disk_spec'][cloud][
+          'num_striped_disks'] = flag_values.db_num_striped_disks
+    if flag_values['managed_db_disk_size'].present:
+      config_values['db_disk_spec'][cloud]['disk_size'] = (
+          flag_values.managed_db_disk_size)
+      if has_unmanaged_dbs:
+        config_values['vm_groups']['servers']['disk_spec'][cloud][
+            'disk_size'] = flag_values.managed_db_disk_size
+    if flag_values['managed_db_disk_type'].present:
+      config_values['db_disk_spec'][cloud]['disk_type'] = (
+          flag_values.managed_db_disk_type)
+      if has_unmanaged_dbs:
+        config_values['vm_groups']['servers']['disk_spec'][cloud][
+            'disk_type'] = flag_values.managed_db_disk_type
+    if flag_values['managed_db_disk_iops'].present:
+      # This value will be used in aws_relation_db.py druing db creation
+      config_values['db_disk_spec'][cloud]['iops'] = (
+          flag_values.managed_db_disk_iops)
+      if has_unmanaged_dbs:
+        config_values['vm_groups']['servers']['disk_spec'][cloud][
+            'iops'] = flag_values.managed_db_disk_iops
+
+    if flag_values['client_vm_os_type'].present:
+      config_values['vm_groups']['clients'][
+          'os_type'] = flag_values.client_vm_os_type
+    if flag_values['server_vm_os_type'].present:
+      config_values['vm_groups']['servers'][
+          'os_type'] = flag_values.server_vm_os_type
+
+    if flag_values['client_gcp_min_cpu_platform'].present:
+      config_values['vm_groups']['clients']['vm_spec'][cloud][
+          'min_cpu_platform'] = flag_values.client_gcp_min_cpu_platform
+    if flag_values['server_gcp_min_cpu_platform'].present:
+      config_values['vm_groups']['servers']['vm_spec'][cloud][
+          'min_cpu_platform'] = flag_values.server_gcp_min_cpu_platform
+
+    if flag_values['client_vm_disk_size'].present:
+      config_values['vm_groups']['clients']['disk_spec'][cloud]['disk_size'] = (
+          flag_values.client_vm_disk_size)
+    if flag_values['client_vm_disk_type'].present:
+      config_values['vm_groups']['clients']['disk_spec'][cloud]['disk_type'] = (
+          flag_values.client_vm_disk_type)
+    if flag_values['client_vm_disk_iops'].present:
+      config_values['vm_groups']['clients']['disk_spec'][cloud]['disk_iops'] = (
+          flag_values.client_vm_disk_iops)
+    logging.warning('Relational db config values: %s', config_values)
+
+
+class _SparkServiceSpec(spec.BaseSpec):
+  """Configurable options of an Apache Spark Service.
+
+  We may add more options here, such as disk specs, as necessary.
+  When there are flags for these attributes, the convention is that
+  the flag is prefixed with spark.  For example, the static_cluster_id
+  is overridden by the flag spark_static_cluster_id
+
+  Attributes:
+    service_type: string.  pkb_managed or managed_service
+    static_cluster_id: if user has created a cluster, the id of the cluster.
+    worker_group: Vm group spec for workers.
+    master_group: Vm group spec for master
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_SparkServiceSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_SparkServiceSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'static_cluster_id': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'service_type': (option_decoders.EnumDecoder, {
+            'default':
+                spark_service.PROVIDER_MANAGED,
+            'valid_values': [
+                spark_service.PROVIDER_MANAGED, spark_service.PKB_MANAGED
+            ]
+        }),
+        'worker_group': (_VmGroupSpecDecoder, {}),
+        'master_group': (_VmGroupSpecDecoder, {
+            'default': None,
+            'none_ok': True
+        })
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_SparkServiceSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['spark_static_cluster_id'].present:
+      config_values['static_cluster_id'] = (flag_values.spark_static_cluster_id)
+    if flag_values['zones'].present:
+      for group in ('master_group', 'worker_group'):
+        if group in config_values:
+          for cloud in config_values[group]['vm_spec']:
+            config_values[group]['vm_spec'][cloud]['zone'] = (
+                flag_values.zones[0])
+
+
+class _VmGroupSpec(spec.BaseSpec):
+  """Configurable options of a VM group.
+
+  Attributes:
+    cloud: string. Cloud provider of the VMs in this group.
+    disk_count: int. Number of data disks to attach to each VM in this group.
+    disk_spec: BaseDiskSpec. Configuration for all data disks to be attached to
+      VMs in this group.
+    os_type: string. OS type of the VMs in this group.
+    static_vms: None or list of StaticVmSpecs. Configuration for all static VMs
+      in this group.
+    vm_count: int. Number of VMs in this group, including static VMs and
+      provisioned VMs.
+    vm_spec: BaseVmSpec. Configuration for provisioned VMs in this group.
+    placement_group_name: string. Name of placement group that VM group belongs
+      to.
+    cidr: subnet each vm in this group belongs to
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_VmGroupSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    ignore_package_requirements = (
+        getattr(flag_values, 'ignore_package_requirements', True)
+        if flag_values else True)
+    providers.LoadProvider(self.cloud, ignore_package_requirements)
+    if self.disk_spec:
+      disk_config = getattr(self.disk_spec, self.cloud, None)
+      if disk_config is None:
+        raise errors.Config.MissingOption(
+            '{0}.cloud is "{1}", but {0}.disk_spec does not contain a '
+            'configuration for "{1}".'.format(component_full_name, self.cloud))
+      disk_spec_class = disk.GetDiskSpecClass(self.cloud)
+      self.disk_spec = disk_spec_class(
+          '{0}.disk_spec.{1}'.format(component_full_name, self.cloud),
+          flag_values=flag_values,
+          **disk_config)
+    vm_config = getattr(self.vm_spec, self.cloud, None)
+    if vm_config is None:
+      raise errors.Config.MissingOption(
+          '{0}.cloud is "{1}", but {0}.vm_spec does not contain a '
+          'configuration for "{1}".'.format(component_full_name, self.cloud))
+    vm_spec_class = virtual_machine.GetVmSpecClass(self.cloud)
+    self.vm_spec = vm_spec_class(
+        '{0}.vm_spec.{1}'.format(component_full_name, self.cloud),
+        flag_values=flag_values,
+        **vm_config)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_VmGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'disk_count': (option_decoders.IntDecoder, {
+            'default': _DEFAULT_DISK_COUNT,
+            'min': 0,
+            'none_ok': True
+        }),
+        'disk_spec': (option_decoders.PerCloudConfigDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'os_type': (option_decoders.EnumDecoder, {
+            'valid_values': os_types.ALL
+        }),
+        'static_vms': (_StaticVmListDecoder, {}),
+        'vm_count': (option_decoders.IntDecoder, {
+            'default': _DEFAULT_VM_COUNT,
+            'min': 0
+        }),
+        'cidr': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'vm_spec': (option_decoders.PerCloudConfigDecoder, {}),
+        'placement_group_name': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_VmGroupSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+    if flag_values['os_type'].present or 'os_type' not in config_values:
+      config_values['os_type'] = flag_values.os_type
+    if 'vm_count' in config_values and config_values['vm_count'] is None:
+      config_values['vm_count'] = flag_values.num_vms
+
+
+class _VmGroupsDecoder(option_decoders.TypeVerifier):
+  """Validates the vm_groups dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_VmGroupsDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies vm_groups dictionary of a benchmark config object.
+
+    Args:
+      value: dict mapping VM group name string to the corresponding VM group
+        config dict.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      dict mapping VM group name string to _VmGroupSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    vm_group_configs = super(_VmGroupsDecoder,
+                             self).Decode(value, component_full_name,
+                                          flag_values)
+    result = {}
+    for vm_group_name, vm_group_config in six.iteritems(vm_group_configs):
+      result[vm_group_name] = _VmGroupSpec(
+          '{0}.{1}'.format(
+              self._GetOptionFullName(component_full_name), vm_group_name),
+          flag_values=flag_values,
+          **vm_group_config)
+    return result
+
+
+class _VmGroupSpecDecoder(option_decoders.TypeVerifier):
+  """Validates a single VmGroupSpec dictionary."""
+
+  def __init__(self, **kwargs):
+    super(_VmGroupSpecDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies vm_groups dictionary of a benchmark config object.
+
+    Args:
+      value: dict corresonding to a VM group config.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      dict a _VmGroupSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    vm_group_config = super(_VmGroupSpecDecoder,
+                            self).Decode(value, component_full_name,
+                                         flag_values)
+    return _VmGroupSpec(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **vm_group_config)
+
+
+class _PlacementGroupSpecsDecoder(option_decoders.TypeVerifier):
+  """Validates the placement_group_specs dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_PlacementGroupSpecsDecoder, self).__init__(
+        valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies placement_group_specs dictionary of a benchmark config object.
+
+    Args:
+      value: dict mapping Placement Group Spec name string to the corresponding
+        placement group spec config dict.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      dict mapping Placement Group Spec name string
+          to placement_group.BasePlacementGroupSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    placement_group_spec_configs = (
+        super(_PlacementGroupSpecsDecoder,
+              self).Decode(value, component_full_name, flag_values))
+    result = {}
+    for placement_group_name, placement_group_spec_config in six.iteritems(
+        placement_group_spec_configs):
+      placement_group_spec_class = placement_group.GetPlacementGroupSpecClass(
+          self.cloud)
+      result[placement_group_name] = placement_group_spec_class(
+          '{0}.{1}'.format(
+              self._GetOptionFullName(component_full_name),
+              placement_group_name),
+          flag_values=flag_values,
+          **placement_group_spec_config)
+    return result
+
+
+class _ContainerRegistryDecoder(option_decoders.TypeVerifier):
+  """Validates the container_registry dictionary of a benchmark config."""
+
+  def __init__(self, **kwargs):
+    super(_ContainerRegistryDecoder, self).__init__(
+        valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies container_registry dictionary of a benchmark config object.
+
+    Args:
+      value: dict mapping VM group name string to the corresponding container
+        spec config dict.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      dict mapping container spec name string to ContainerSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    vm_group_config = super(_ContainerRegistryDecoder,
+                            self).Decode(value, component_full_name,
+                                         flag_values)
+    return container_service.ContainerRegistrySpec(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **vm_group_config)
+
+
+class _ContainerSpecsDecoder(option_decoders.TypeVerifier):
+  """Validates the container_specs dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_ContainerSpecsDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies container_specs dictionary of a benchmark config object.
+
+    Args:
+      value: dict mapping VM group name string to the corresponding container
+        spec config dict.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      dict mapping container spec name string to ContainerSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    container_spec_configs = super(_ContainerSpecsDecoder,
+                                   self).Decode(value, component_full_name,
+                                                flag_values)
+    result = {}
+    for spec_name, spec_config in six.iteritems(container_spec_configs):
+      result[spec_name] = container_service.ContainerSpec(
+          '{0}.{1}'.format(
+              self._GetOptionFullName(component_full_name), spec_name),
+          flag_values=flag_values,
+          **spec_config)
+    return result
+
+
+class _NodepoolSpec(spec.BaseSpec):
+  """Configurable options of a Nodepool."""
+
+  def __init__(self,
+               component_full_name,
+               group_name,
+               flag_values=None,
+               **kwargs):
+    super(_NodepoolSpec, self).__init__(
+        '{0}.{1}'.format(component_full_name, group_name),
+        flag_values=flag_values,
+        **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_NodepoolSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'vm_count': (option_decoders.IntDecoder, {
+            'default': _DEFAULT_VM_COUNT,
+            'min': 0
+        }),
+        'vm_spec': (option_decoders.PerCloudConfigDecoder, {})
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_NodepoolSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['container_cluster_num_vms'].present:
+      config_values['vm_count'] = flag_values.container_cluster_num_vms
+
+    # Need to apply the first zone in the zones flag, if specified,
+    # to the spec. _NodepoolSpec does not currently support
+    # running in multiple zones in a single PKB invocation.
+    if flag_values['zones'].present:
+      for cloud in config_values['vm_spec']:
+        config_values['vm_spec'][cloud]['zone'] = (flag_values.zones[0])
+
+
+class _NodepoolsDecoder(option_decoders.TypeVerifier):
+  """Validate the nodepool dictionary of a nodepools config object."""
+
+  def __init__(self, **kwargs):
+    super(_NodepoolsDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify Nodepool dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _NodepoolsDecoder built from the config passed in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    nodepools_configs = super(_NodepoolsDecoder, self).Decode(
+        value, component_full_name, flag_values)
+    result = {}
+    for nodepool_name, nodepool_config in six.iteritems(nodepools_configs):
+      result[nodepool_name] = _NodepoolSpec(
+          self._GetOptionFullName(component_full_name), nodepool_name,
+          flag_values, **nodepool_config)
+    return result
+
+
+class _ContainerClusterSpec(spec.BaseSpec):
+  """Spec containing info needed to create a container cluster."""
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_ContainerClusterSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    ignore_package_requirements = (
+        getattr(flag_values, 'ignore_package_requirements', True)
+        if flag_values else True)
+    providers.LoadProvider(self.cloud, ignore_package_requirements)
+    vm_config = getattr(self.vm_spec, self.cloud, None)
+    if vm_config is None:
+      raise errors.Config.MissingOption(
+          '{0}.cloud is "{1}", but {0}.vm_spec does not contain a '
+          'configuration for "{1}".'.format(component_full_name, self.cloud))
+    vm_spec_class = virtual_machine.GetVmSpecClass(self.cloud)
+    self.vm_spec = vm_spec_class(
+        '{0}.vm_spec.{1}'.format(component_full_name, self.cloud),
+        flag_values=flag_values,
+        **vm_config)
+    nodepools = {}
+    for nodepool_name, nodepool_spec in sorted(six.iteritems(self.nodepools)):
+      if nodepool_name == container_service.DEFAULT_NODEPOOL:
+        raise errors.Config.InvalidValue(
+            'Nodepool name {0} is reserved for use during cluster creation. '
+            'Please rename nodepool'.format(nodepool_name))
+      nodepool_config = getattr(nodepool_spec.vm_spec, self.cloud, None)
+      if nodepool_config is None:
+        raise errors.Config.MissingOption(
+            '{0}.cloud is "{1}", but {0}.vm_spec does not contain a '
+            'configuration for "{1}".'.format(component_full_name, self.cloud))
+      vm_spec_class = virtual_machine.GetVmSpecClass(self.cloud)
+      nodepool_spec.vm_spec = vm_spec_class(
+          '{0}.vm_spec.{1}'.format(component_full_name, self.cloud),
+          flag_values=flag_values,
+          **nodepool_config)
+      nodepools[nodepool_name] = nodepool_spec
+
+    self.nodepools = nodepools
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_ContainerClusterSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'static_cluster': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'type': (option_decoders.StringDecoder, {
+            'default': container_service.KUBERNETES,
+        }),
+        'vm_count': (option_decoders.IntDecoder, {
+            'default': _DEFAULT_VM_COUNT,
+            'min': 0
+        }),
+        'min_vm_count': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True,
+            'min': 0
+        }),
+        'max_vm_count': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True,
+            'min': 0
+        }),
+        # vm_spec is used to define the machine type for the default nodepool
+        'vm_spec': (option_decoders.PerCloudConfigDecoder, {}),
+        # nodepools specifies a list of additional nodepools to create alongside
+        # the default nodepool (nodepool created on cluster creation).
+        'nodepools': (_NodepoolsDecoder, {
+            'default': {},
+            'none_ok': True
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    super(_ContainerClusterSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+    if flag_values['container_cluster_cloud'].present:
+      config_values['cloud'] = flag_values.container_cluster_cloud
+    if flag_values['container_cluster_type'].present:
+      config_values['type'] = flag_values.container_cluster_type
+    if flag_values['container_cluster_num_vms'].present:
+      config_values['vm_count'] = flag_values.container_cluster_num_vms
+
+    # Need to apply the first zone in the zones flag, if specified,
+    # to the spec. ContainerClusters do not currently support
+    # running in multiple zones in a single PKB invocation.
+    if flag_values['zones'].present:
+      for cloud in config_values['vm_spec']:
+        config_values['vm_spec'][cloud]['zone'] = (flag_values.zones[0])
+
+
+class _ContainerClusterSpecDecoder(option_decoders.TypeVerifier):
+  """Validates a ContainerClusterSpec dictionairy."""
+
+  def __init__(self, **kwargs):
+    super(_ContainerClusterSpecDecoder, self).__init__(
+        valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies container_cluster dictionairy of a benchmark config object."""
+    cluster_config = super(_ContainerClusterSpecDecoder,
+                           self).Decode(value, component_full_name, flag_values)
+
+    return _ContainerClusterSpec(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **cluster_config)
+
+
+class _SparkServiceDecoder(option_decoders.TypeVerifier):
+  """Validates the spark_service dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_SparkServiceDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies spark_service dictionary of a benchmark config object.
+
+    Args:
+      value: dict Spark Service config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _SparkServiceSpec Build from the config passed in value.
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    spark_service_config = super(_SparkServiceDecoder,
+                                 self).Decode(value, component_full_name,
+                                              flag_values)
+    result = _SparkServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **spark_service_config)
+    return result
+
+
+class _RelationalDbDecoder(option_decoders.TypeVerifier):
+  """Validate the relational_db dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_RelationalDbDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify relational_db dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _RelationalDbService built from the config passed in in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    relational_db_config = super(_RelationalDbDecoder,
+                                 self).Decode(value, component_full_name,
+                                              flag_values)
+    result = _RelationalDbSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **relational_db_config)
+    return result
+
+
+class _NonRelationalDbDecoder(option_decoders.TypeVerifier):
+  """Validate the non_relational_db dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_NonRelationalDbDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify non_relational_db dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _NonRelationalDbService built from the config passed in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    non_relational_db_config = super().Decode(value, component_full_name,
+                                              flag_values)
+    if 'service_type' in non_relational_db_config:
+      db_spec_class = non_relational_db.GetNonRelationalDbSpecClass(
+          non_relational_db_config['service_type'])
+    else:
+      raise errors.Config.InvalidValue(
+          'Required attribute `service_type` missing from non_relational_db '
+          'config.')
+    return db_spec_class(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **non_relational_db_config)
+
+
+class _SpannerDecoder(option_decoders.TypeVerifier):
+  """Validate the spanner dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_SpannerDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify spanner dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _SpannerSpec built from the config passed in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    spanner_config = super().Decode(value, component_full_name, flag_values)
+    # Allow for subclass-specific specs.
+    if 'service_type' in spanner_config:
+      spanner_spec_class = gcp_spanner.GetSpannerSpecClass(
+          spanner_config['service_type'])
+    else:
+      raise errors.Config.InvalidValue(
+          'Required attribute `service_type` missing from spanner config.')
+    return spanner_spec_class(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **spanner_config)
+
+
+class _TpuGroupsDecoder(option_decoders.TypeVerifier):
+  """Validate the tpu dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_TpuGroupsDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify tpu dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _Tpu built from the config passed in in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    tpu_group_configs = super(_TpuGroupsDecoder,
+                              self).Decode(value, component_full_name,
+                                           flag_values)
+    result = {}
+    for tpu_group_name, tpu_group_config in six.iteritems(tpu_group_configs):
+      result[tpu_group_name] = _TpuGroupSpec(
+          self._GetOptionFullName(component_full_name), tpu_group_name,
+          flag_values, **tpu_group_config)
+    return result
+
+
+class _CloudRedisSpec(spec.BaseSpec):
+  """Specs needed to configure a cloud redis instance."""
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_CloudRedisSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    if not self.redis_name:
+      self.redis_name = 'pkb-cloudredis-{0}'.format(flag_values.run_uri)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments to
+      construct in order to decode the named option.
+    """
+    result = super(_CloudRedisSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'redis_name': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': False
+        }),
+        'redis_version': (option_decoders.EnumDecoder, {
+            'default': managed_memory_store.REDIS_3_2,
+            'valid_values': managed_memory_store.REDIS_VERSIONS
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_CloudRedisSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+
+
+class _CloudRedisDecoder(option_decoders.TypeVerifier):
+  """Validate the cloud_redis dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_CloudRedisDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify cloud_redis dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _CloudRedis built from the config passed in in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    cloud_redis_config = super(_CloudRedisDecoder,
+                               self).Decode(value, component_full_name,
+                                            flag_values)
+    result = _CloudRedisSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **cloud_redis_config)
+    return result
+
+
+class _VPNServiceSpec(spec.BaseSpec):
+  """Spec needed to configure a vpn tunnel between two vm_groups.
+
+    Since vpn_gateway may be across cloud providers we only create tunnel when
+    vpn_gateway's are up and known
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(_VPNServiceSpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    if not self.name:
+      self.name = 'pkb-vpn-svc-{0}'.format(flag_values.run_uri)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments to
+      construct in order to decode the named option.
+    """
+    result = super(_VPNServiceSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'shared_key': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'name': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'tunnel_count': (option_decoders.IntDecoder, {
+            'default': 1,
+            'none_ok': True
+        }),
+        'gateway_count': (option_decoders.IntDecoder, {
+            'default': 1,
+            'none_ok': True
+        }),
+        'routing_type': (option_decoders.StringDecoder, {
+            'default': 'static',
+            'none_ok': True
+        }),
+        'ike_version': (option_decoders.IntDecoder, {
+            'default': 1,
+            'none_ok': True
+        }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(_VPNServiceSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['vpn_service_tunnel_count'].present:
+      config_values['tunnel_count'] = flag_values.vpn_service_tunnel_count
+    if flag_values['vpn_service_gateway_count'].present:
+      config_values['gateway_count'] = flag_values.vpn_service_gateway_count
+    if flag_values['vpn_service_name'].present:
+      config_values['name'] = flag_values.vpn_service_name
+    if flag_values['vpn_service_shared_key'].present:
+      config_values['shared_key'] = flag_values.vpn_service_shared_key
+    if flag_values['vpn_service_routing_type'].present:
+      config_values['routing_type'] = flag_values.vpn_service_routing_type
+    if flag_values['vpn_service_ike_version'].present:
+      config_values['ike_version'] = flag_values.vpn_service_ike_version
+
+
+class _VPNServiceDecoder(option_decoders.TypeVerifier):
+  """Validate the vpn_service dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_VPNServiceDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify vpn_service dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _VPNService built from the config passed in in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    vpn_service_config = super(_VPNServiceDecoder,
+                               self).Decode(value, component_full_name,
+                                            flag_values)
+    result = _VPNServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **vpn_service_config)
+    return result
+
+
+class _AppGroupSpec(spec.BaseSpec):
+  """Configurable options of a AppService group."""
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_AppGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'app_runtime': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'app_type': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'appservice_count': (option_decoders.IntDecoder, {
+            'default': 1
+        }),
+        'appservice_spec': (_AppServiceDecoder, {})
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    super(_AppGroupSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['appservice_count'].present:
+      config_values['appservice_count'] = flag_values.appservice_count
+    if flag_values['app_runtime'].present:
+      config_values['app_runtime'] = flag_values.app_runtime
+    if flag_values['app_type'].present:
+      config_values['app_type'] = flag_values.app_type
+
+
+class _AppGroupsDecoder(option_decoders.TypeVerifier):
+  """Verify app_groups dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_AppGroupsDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifys app_groups dictionary of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+     dict mapping app group name string to _AppGroupSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    app_group_configs = super(_AppGroupsDecoder,
+                              self).Decode(value, component_full_name,
+                                           flag_values)
+    result = {}
+    for app_group_name, app_group_config in six.iteritems(app_group_configs):
+      result[app_group_name] = _AppGroupSpec(
+          '{0}.{1}'.format(
+              self._GetOptionFullName(component_full_name), app_group_name),
+          flag_values=flag_values,
+          **app_group_config)
+    return result
+
+
+class _AppServiceDecoder(option_decoders.TypeVerifier):
+  """Verify app_service dict of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super(_AppServiceDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify app_service dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary.
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      AppService object built from config.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    config = super(_AppServiceDecoder, self).Decode(value, component_full_name,
+                                                    flag_values)
+    spec_cls = app_service.GetAppServiceSpecClass(
+        flag_values.appservice or config.get('appservice'))
+    return spec_cls(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **config)
+
+
+class _MessagingServiceSpec(spec.BaseSpec):
+  """Specs needed to configure messaging service.
+  """
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments to
+      construct in order to decode the named option.
+    """
+    result = super()._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS}),
+        # TODO(odiego): Add support for push delivery mechanism
+        'delivery': (option_decoders.EnumDecoder, {
+            'valid_values': ('pull',)}),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super()._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+    # TODO(odiego): Handle delivery when adding more delivery mechanisms
+
+
+class _MessagingServiceDecoder(option_decoders.TypeVerifier):
+  """Validate the messaging_service dictionary of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super().__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify messaging_service dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _MessagingServiceSpec object built from the config passed in in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    messaging_service_config = super().Decode(value, component_full_name,
+                                              flag_values)
+    result = _MessagingServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **messaging_service_config)
+    return result
+
+
+class _DataDiscoveryServiceSpec(spec.BaseSpec):
+  """Specs needed to configure data discovery service.
+  """
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments to
+      construct in order to decode the named option.
+    """
+    result = super()._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.EnumDecoder, {
+            'valid_values': providers.VALID_CLOUDS
+        }),
+        'service_type': (
+            option_decoders.EnumDecoder,
+            {
+                'default':
+                    data_discovery_service.GLUE,
+                'valid_values': [
+                    data_discovery_service.GLUE,
+                ]
+            }),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super()._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+
+
+class _DataDiscoveryServiceDecoder(option_decoders.TypeVerifier):
+  """Validate the data_discovery_service dict of a benchmark config object."""
+
+  def __init__(self, **kwargs):
+    super().__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verify data_discovery_service dict of a benchmark config object.
+
+    Args:
+      value: dict. Config dictionary
+      component_full_name: string.  Fully qualified name of the configurable
+        component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be propagated to
+        BaseSpec constructors.
+
+    Returns:
+      _DataDiscoveryServiceSpec object built from the config passed in value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    if value is None:
+      value = {}
+    data_discovery_service_config = super().Decode(value, component_full_name,
+                                                   flag_values)
+    result = _DataDiscoveryServiceSpec(
+        self._GetOptionFullName(component_full_name), flag_values,
+        **data_discovery_service_config)
+    return result
+
+
+class BenchmarkConfigSpec(spec.BaseSpec):
+  """Configurable options of a benchmark run.
+
+  Attributes:
+    description: None or string. Description of the benchmark to run.
+    name: Optional. The name of the benchmark
+    flags: dict. Values to use for each flag while executing the benchmark.
+    vm_groups: dict mapping VM group name string to _VmGroupSpec. Configurable
+      options for each VM group used by the benchmark.
+  """
+
+  def __init__(self, component_full_name, expected_os_types=None, **kwargs):
+    """Initializes a BenchmarkConfigSpec.
+
+    Args:
+      component_full_name: string. Fully qualified name of the benchmark config
+        dict within the config file.
+      expected_os_types: Optional series of strings from os_types.ALL.
+      **kwargs: Keyword arguments for the BaseSpec constructor.
+
+    Raises:
+      errors.Config.InvalidValue: If expected_os_types is provided and any of
+          the VM groups are configured with an OS type that is not included.
+    """
+    super(BenchmarkConfigSpec, self).__init__(component_full_name, **kwargs)
+    if expected_os_types is not None:
+      mismatched_os_types = []
+      for group_name, group_spec in sorted(six.iteritems(self.vm_groups)):
+        if group_spec.os_type not in expected_os_types:
+          mismatched_os_types.append('{0}.vm_groups[{1}].os_type: {2}'.format(
+              component_full_name, repr(group_name), repr(group_spec.os_type)))
+      if mismatched_os_types:
+        raise errors.Config.InvalidValue(
+            'VM groups in {0} may only have the following OS types: {1}. The '
+            'following VM group options are invalid:{2}{3}'.format(
+                component_full_name,
+                ', '.join(repr(os_type) for os_type in expected_os_types),
+                os.linesep, os.linesep.join(mismatched_os_types)))
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(BenchmarkConfigSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'description': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'name': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'flags': (option_decoders.TypeVerifier, {
+            'default': None,
+            'none_ok': True,
+            'valid_types': (dict,)
+        }),
+        'vm_groups': (_VmGroupsDecoder, {
+            'default': {}
+        }),
+        'placement_group_specs': (_PlacementGroupSpecsDecoder, {
+            'default': {}
+        }),
+        'spark_service': (_SparkServiceDecoder, {
+            'default': None
+        }),
+        'container_cluster': (_ContainerClusterSpecDecoder, {
+            'default': None
+        }),
+        'container_registry': (_ContainerRegistryDecoder, {
+            'default': None
+        }),
+        'container_specs': (_ContainerSpecsDecoder, {
+            'default': None
+        }),
+        'dpb_service': (_DpbServiceDecoder, {
+            'default': None
+        }),
+        'relational_db': (_RelationalDbDecoder, {
+            'default': None
+        }),
+        'tpu_groups': (_TpuGroupsDecoder, {
+            'default': {}
+        }),
+        'edw_service': (_EdwServiceDecoder, {
+            'default': None
+        }),
+        'cloud_redis': (_CloudRedisDecoder, {
+            'default': None
+        }),
+        'vpn_service': (_VPNServiceDecoder, {
+            'default': None
+        }),
+        'app_groups': (_AppGroupsDecoder, {
+            'default': {}
+        }),
+        'vpc_peering': (option_decoders.BooleanDecoder, {
+            'default': False,
+            'none_ok': True,
+        }),
+        'non_relational_db': (_NonRelationalDbDecoder, {
+            'default': None,
+            'none_ok': True,
+        }),
+        'spanner': (_SpannerDecoder, {
+            'default': None,
+            'none_ok': True,
+        }),
+        'messaging_service': (_MessagingServiceDecoder, {
+            'default': None,
+        }),
+        'data_discovery_service': (_DataDiscoveryServiceDecoder, {
+            'default': None,
+            'none_ok': True,
+        })
+    })
+    return result
+
+  def _DecodeAndInit(self, component_full_name, config, decoders, flag_values):
+    """Initializes spec attributes from provided config option values.
+
+    Args:
+      component_full_name: string. Fully qualified name of the configurable
+        component containing the config options.
+      config: dict mapping option name string to option value.
+      decoders: OrderedDict mapping option name string to ConfigOptionDecoder.
+      flag_values: flags.FlagValues. Runtime flags that may override provided
+        config option values. These flags have already been applied to the
+        current config, but they may be passed to the decoders for propagation
+        to deeper spec constructors.
+    """
+    # Decode benchmark-specific flags first and use them while decoding the
+    # rest of the BenchmarkConfigSpec's options.
+    decoders = decoders.copy()
+    self.flags = config.get('flags')
+    with self.RedirectFlags(flag_values):
+      super(BenchmarkConfigSpec,
+            self)._DecodeAndInit(component_full_name, config, decoders,
+                                 flag_values)
+
+  @contextlib.contextmanager
+  def RedirectFlags(self, flag_values):
+    """Redirects flag reads and writes to the benchmark-specific flags object.
+
+    Args:
+      flag_values: flags.FlagValues object. Within the enclosed code block,
+        reads and writes to this object are redirected to self.flags.
+
+    Yields:
+      context manager that redirects flag reads and writes.
+    """
+    with flag_util.OverrideFlags(flag_values, self.flags):
+      yield
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/default_config_constants.yaml b/script/cumulus/pkb/perfkitbenchmarker/configs/default_config_constants.yaml
new file mode 100644
index 0000000..75a4b29
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/default_config_constants.yaml
@@ -0,0 +1,192 @@
+# All anchors defined in this file should be compatible
+# with *all* clouds. That means any vm_specs or disk_specs
+# defined here should have keys for every cloud.
+default_single_core: &default_single_core
+  GCP:
+    machine_type: n1-standard-1
+    zone: us-central1-a
+    image: null
+  Azure:
+    machine_type: Standard_A1
+    zone: eastus2
+    image: null
+  AWS:
+    machine_type: t2.small
+    zone: us-east-1
+    image: null
+  AliCloud:
+    machine_type: ecs.g5.large
+    zone: cn-beijing-g
+    image: null
+  DigitalOcean:
+    machine_type: 2gb
+    zone: sfo1
+    image: null
+  OpenStack:
+    machine_type: m1.small
+    zone: nova
+    image: null
+  CloudStack:
+    machine_type: 1vCPU.1GB
+    zone: QC-1
+    image: null
+  Rackspace:
+    machine_type: general1-1
+    zone: IAD
+    image: null
+  Kubernetes:
+    image: null
+  Mesos:
+    image: null
+  ProfitBricks:
+    machine_type: Small
+    zone: ZONE_1
+    image: null
+  Docker:
+    image: null
+    machine_type:
+      cpus: 1
+      memory: 2.0GiB
+  IBMCloud:
+    machine_type: cx2-2x4
+    zone: us-south-1
+    image: null
+
+# TODO: update the two core machines for more providers
+default_dual_core: &default_dual_core
+  GCP:
+    machine_type: n1-standard-2
+    zone: us-central1-a
+    image: null
+  Azure:
+    machine_type: Standard_D2_v3
+    zone: eastus2
+    image: null
+  AWS:
+    machine_type: m5.large
+    zone: us-east-1
+    image: null
+  Docker:
+    image: null
+    machine_type:
+      cpus: 2
+      memory: 4.0GiB
+  AliCloud:
+    machine_type: ecs.g5.xlarge
+    zone: cn-beijing-g
+    image: null
+  IBMCloud:
+    machine_type: cx2-4x8
+    zone: us-south-1
+    image: null
+
+# TODO(user): update the disk types below as more providers are
+# updated for the disk types refactor.
+default_500_gb: &default_500_gb
+  GCP:
+    disk_type: pd-standard
+    disk_size: 500
+    mount_point: /scratch
+  Azure:
+    disk_type: Standard_LRS
+    disk_size: 500
+    mount_point: /scratch
+  AWS:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  AliCloud:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  DigitalOcean:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  OpenStack:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  CloudStack:
+    disk_size: 500
+    mount_point: /scratch
+  Rackspace:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  Kubernetes:
+    disk_type: emptyDir
+    disk_size: 500
+    mount_point: /scratch
+  Mesos:
+    disk_type: local
+    disk_size: 500
+    mount_point: /scratch
+  ProfitBricks:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+  Docker:
+    disk_type: local
+    disk_size: 500
+    mount_point: /scratch
+  IBMCloud:
+    disk_type: standard
+    disk_size: 500
+    mount_point: /scratch
+
+
+# TODO(user): update the disk types below as more providers are
+# updated for the disk types refactor.
+default_50_gb: &default_50_gb
+  GCP:
+    disk_type: pd-standard
+    disk_size: 50
+    mount_point: /scratch
+  Azure:
+    disk_type: Standard_LRS
+    disk_size: 50
+    mount_point: /scratch
+  AWS:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  AliCloud:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  DigitalOcean:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  OpenStack:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  CloudStack:
+    disk_size: 50
+    mount_point: /scratch
+  Rackspace:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  Kubernetes:
+    disk_type: emptyDir
+    disk_size: 50
+    mount_point: /scratch
+  Mesos:
+    disk_type: local
+    disk_size: 50
+    mount_point: /scratch
+  ProfitBricks:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
+  Docker:
+    disk_type: local
+    disk_size: 50
+    mount_point: /scratch
+  IBMCloud:
+    disk_type: standard
+    disk_size: 50
+    mount_point: /scratch
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/example_docker_to_cloud.yaml b/script/cumulus/pkb/perfkitbenchmarker/configs/example_docker_to_cloud.yaml
new file mode 100644
index 0000000..1de08f4
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/example_docker_to_cloud.yaml
@@ -0,0 +1,19 @@
+# This config file is an example of performing
+# a network benchmark between a Docker container
+# running locally and an n1-standard-1 instance
+# on GCP
+#
+#./pkb.py --benchmarks=netperf --benchmark_config_file=example_docker_to_cloud.yaml
+
+netperf:
+  flags:
+    ip_addresses: EXTERNAL
+  vm_groups:
+    vm_1:
+      cloud: Docker
+    vm_2:
+      cloud: GCP
+      vm_spec:
+        GCP:
+          machine_type: n1-standard-1
+          zone: us-central1-a
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/example_user_config.yaml b/script/cumulus/pkb/perfkitbenchmarker/configs/example_user_config.yaml
new file mode 100644
index 0000000..dd44618
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/example_user_config.yaml
@@ -0,0 +1,55 @@
+# Declare any anchors you may want to use later.
+four_core: &four_core
+  GCP:
+    machine_type: n1-standard-4
+  AWS:
+    machine_type: c4.xlarge
+
+# You may also want to declare static VMs.
+# For a complete list of valid keys, see
+#  static_virtual_machine.StaticVmSpec.
+static_vms:
+  - &vm1
+    user_name: perfkit
+    ssh_private_key: /absolute/path/to/key
+    ip_address: 1.1.1.1
+  - &vm2
+    user_name: perfkit
+    ssh_private_key: /absolute/path/to/key
+    ip_address: 2.2.2.2
+    # Declare the OS type of the VM if necessary.
+    os_type: rhel
+    # If you want to run any benchmarks that use disks you
+    # should declare them.
+    disk_specs:
+      # For most benchmarks, just declaring the mount point
+      # is sufficient.
+      - mount_point: /scratch
+
+# Multi cloud iperf config.
+iperf: &iperf_multicloud
+  vm_groups:
+    vm_1:
+      cloud: GCP
+      vm_spec: *four_core
+    vm_2:
+      cloud: AWS
+      vm_spec: *four_core
+
+
+# If you've already declared your static VMs,
+# here's how to use them.
+# Don't run fio using this config because the
+# static VM example values are completely bogus.
+fio:
+  vm_groups:
+    default:
+      static_vms:
+        - *vm2
+
+# If you choose to, you can specify which benchmarks should be
+# run in your config file. This will even let you run the same
+# benchmark multiple times with different configs.
+benchmarks:
+  - iperf: null # This means use the default config
+  - iperf: *iperf_multicloud
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/freeze_restore_spec.py b/script/cumulus/pkb/perfkitbenchmarker/configs/freeze_restore_spec.py
new file mode 100644
index 0000000..34c6d39
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/freeze_restore_spec.py
@@ -0,0 +1,73 @@
+# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Benchmark-configurable options for freezing and restoring resources."""
+
+from typing import Dict, Optional
+
+from absl import flags
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+
+FLAGS = flags.FLAGS
+
+
+class FreezeRestoreSpec(spec.BaseSpec):
+  """Configurable freeze/restore options for resources.
+
+  Attributes:
+    enable_freeze_restore: Designates the current resource to use
+      freeze/restore functionality if --freeze/--restore is specified on the
+      command line. This is a no-op if the resource does not have
+      _Freeze/_Restore implemented.
+    delete_on_freeze_error: If true, the resource deletes itself if there are
+      issues during a freeze.
+    create_on_restore_error: If true, the resource creates itself if there are
+      issues during a restore.
+  """
+
+  enable_freeze_restore: bool
+  delete_on_freeze_error: bool
+  create_on_restore_error: bool
+
+  def __init__(self,
+               component_full_name: str,
+               flag_values: Optional[Dict[str, flags.FlagValues]] = None,
+               **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super()._GetOptionDecoderConstructions()
+    result.update({
+        'enable_freeze_restore': (option_decoders.BooleanDecoder, {
+            'default': False,
+            'none_ok': True
+        }),
+        'delete_on_freeze_error': (option_decoders.BooleanDecoder, {
+            'default': False,
+            'none_ok': True
+        }),
+        'create_on_restore_error': (option_decoders.BooleanDecoder, {
+            'default': False,
+            'none_ok': True
+        }),
+    })
+    return result
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/import_three.yml b/script/cumulus/pkb/perfkitbenchmarker/configs/import_three.yml
new file mode 100644
index 0000000..6af7de2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/import_three.yml
@@ -0,0 +1,2 @@
+#import test_import.yml
+three: &three 3
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/import_three2.yml b/script/cumulus/pkb/perfkitbenchmarker/configs/import_three2.yml
new file mode 100644
index 0000000..e4bfe86
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/import_three2.yml
@@ -0,0 +1 @@
+#import import_three.yml
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/option_decoders.py b/script/cumulus/pkb/perfkitbenchmarker/configs/option_decoders.py
new file mode 100644
index 0000000..420f22c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/option_decoders.py
@@ -0,0 +1,376 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Classes for verifying and decoding config option values."""
+
+
+import abc
+
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.configs import spec
+import six
+
+
+class ConfigOptionDecoder(six.with_metaclass(abc.ABCMeta, object)):
+  """Verifies and decodes a config option value.
+
+  Attributes:
+    option: None or string. Name of the config option.
+    required: boolean. True if the config option is required. False if not.
+  """
+
+  def __init__(self, option=None, **kwargs):
+    """Initializes a ConfigOptionDecoder.
+
+    Args:
+      option: None or string. Name of the config option.
+      **kwargs: May optionally contain a 'default' key mapping to a value or
+          callable object. If a value is provided, the config option is
+          optional, and the provided value is the default if the user does not
+          set a value for the config option. If a callable object is provided,
+          the config option is optional, and the provided object is called to
+          determine the value if the user does not set a value for the config
+          option. If not provided, the config option is required.
+    """
+    self.option = option
+    self.required = 'default' not in kwargs
+    if not self.required:
+      self._default = kwargs.pop('default')
+    assert not kwargs, ('__init__() received unexpected keyword arguments: '
+                        '{0}'.format(kwargs))
+
+  def _GetOptionFullName(self, component_full_name):
+    """Returns the fully qualified name of a config option.
+
+    Args:
+      component_full_name: string. Fully qualified name of a configurable object
+          to which the option belongs.
+    """
+    return (component_full_name if self.option is None
+            else '{0}.{1}'.format(component_full_name, self.option))
+
+  @property
+  def default(self):
+    """Gets the config option's default value.
+
+    Returns:
+      Default value of an optional config option.
+    """
+    assert not self.required, (
+        'Attempted to get the default value of required config option '
+        '"{0}".'.format(self.option))
+    if hasattr(self._default, '__call__'):
+      return self._default()
+    return self._default
+
+  @abc.abstractmethod
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies and decodes a config option value.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      The decoded value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    raise NotImplementedError()
+
+
+class EnumDecoder(ConfigOptionDecoder):
+  """Verifies that the config options value is in the allowed set.
+
+  Passes through the value unmodified
+  """
+
+  def __init__(self, valid_values, **kwargs):
+    """Initializes the EnumVerifier.
+
+    Args:
+      valid_values: list of the allowed values
+      **kwargs: Keyword arguments to pass to the base class.
+    """
+    super(EnumDecoder, self).__init__(**kwargs)
+    self.valid_values = valid_values
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies that the provided value is in the allowed set.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string.  Fully qualified name of the
+          configurable component containing the config option.
+      flag_values: flags.FlagValues.  Runtime flag values to be
+          propagated to the BaseSpec constructors.
+
+    Returns:
+      The valid value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    if value in self.valid_values:
+      return value
+    else:
+      raise errors.Config.InvalidValue(
+          'Invalid {0} value: "{1}". Value must be one of the following: '
+          '{2}.'.format(self._GetOptionFullName(component_full_name), value,
+                        ', '.join(str(t) for t in self.valid_values)))
+
+
+class TypeVerifier(ConfigOptionDecoder):
+  """Verifies that a config option value's type belongs to an allowed set.
+
+  Passes value through unmodified.
+  """
+
+  def __init__(self, valid_types, none_ok=False, **kwargs):
+    """Initializes a TypeVerifier.
+
+    Args:
+      valid_types: tuple of allowed types.
+      none_ok: boolean. If True, None is also an allowed option value.
+      **kwargs: Keyword arguments to pass to the base class.
+    """
+    super(TypeVerifier, self).__init__(**kwargs)
+    if none_ok:
+      self._valid_types = (type(None),) + valid_types
+    else:
+      self._valid_types = valid_types
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies that the provided value is of an allowed type.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      The valid value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    if not isinstance(value, self._valid_types):
+      raise errors.Config.InvalidValue(
+          'Invalid {0} value: "{1}" (of type "{2}"). Value must be one of the '
+          'following types: {3}.'.format(
+              self._GetOptionFullName(component_full_name), value,
+              value.__class__.__name__,
+              ', '.join(t.__name__ for t in self._valid_types)))
+    return value
+
+
+class BooleanDecoder(TypeVerifier):
+  """Verifies and decodes a config option value when a boolean is expected."""
+
+  def __init__(self, **kwargs):
+    super(BooleanDecoder, self).__init__((bool,), **kwargs)
+
+
+class IntDecoder(TypeVerifier):
+  """Verifies and decodes a config option value when an integer is expected.
+
+  Attributes:
+    max: None or int. If provided, it specifies the maximum accepted value.
+    min: None or int. If provided, it specifies the minimum accepted value.
+  """
+
+  def __init__(self, max=None, min=None, **kwargs):
+    super(IntDecoder, self).__init__((int,), **kwargs)
+    self.max = max
+    self.min = min
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies that the provided value is an int.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      int. The valid value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    value = super(IntDecoder, self).Decode(value, component_full_name,
+                                           flag_values)
+    if value is not None:
+      if self.max is not None and value > self.max:
+        raise errors.Config.InvalidValue(
+            'Invalid {0} value: "{1}". Value must be at most {2}.'.format(
+                self._GetOptionFullName(component_full_name), value, self.max))
+      if self.min is not None and value < self.min:
+        raise errors.Config.InvalidValue(
+            'Invalid {0} value: "{1}". Value must be at least {2}.'.format(
+                self._GetOptionFullName(component_full_name), value, self.min))
+    return value
+
+
+class FloatDecoder(TypeVerifier):
+  """Verifies and decodes a config option value when a float is expected.
+
+  Attributes:
+    max: None or float. If provided, it specifies the maximum accepted value.
+    min: None or float. If provided, it specifies the minimum accepted value.
+  """
+
+  def __init__(self, max=None, min=None, **kwargs):
+    super(FloatDecoder, self).__init__((float, int), **kwargs)
+    self.max = max
+    self.min = min
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies that the provided value is a float.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      float. The valid value.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    value = super(FloatDecoder, self).Decode(value, component_full_name,
+                                             flag_values)
+    if value is not None:
+      if self.max is not None and value > self.max:
+        raise errors.Config.InvalidValue(
+            'Invalid {0} value: "{1}". Value must be at most {2}.'.format(
+                self._GetOptionFullName(component_full_name), value, self.max))
+      if self.min is not None and value < self.min:
+        raise errors.Config.InvalidValue(
+            'Invalid {0} value: "{1}". Value must be at least {2}.'.format(
+                self._GetOptionFullName(component_full_name), value, self.min))
+    return value
+
+
+class StringDecoder(TypeVerifier):
+  """Verifies and decodes a config option value when a string is expected."""
+
+  def __init__(self, **kwargs):
+    super(StringDecoder, self).__init__(six.string_types, **kwargs)
+
+
+class ListDecoder(TypeVerifier):
+  """Verifies and decodes a config option value when a list is expected."""
+
+  def __init__(self, item_decoder, **kwargs):
+    """Initializes a ListDecoder.
+
+    Args:
+      item_decoder: ConfigOptionDecoder. Used to decode the items of an input
+          list.
+      **kwargs: Keyword arguments to pass to the base class.
+    """
+    super(ListDecoder, self).__init__((list,), **kwargs)
+    self._item_decoder = item_decoder
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Verifies that the provided value is a list with appropriate items.
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      None if the input value was None. Otherwise, a list containing the decoded
+      value of each item in the input list.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    input_list = super(ListDecoder, self).Decode(value, component_full_name,
+                                                 flag_values)
+    if input_list is None:
+      return None
+    list_full_name = self._GetOptionFullName(component_full_name)
+    result = []
+    for index, input_item in enumerate(input_list):
+      item_full_name = '{0}[{1}]'.format(list_full_name, index)
+      result.append(self._item_decoder.Decode(input_item, item_full_name,
+                                              flag_values))
+    return result
+
+
+class _PerCloudConfigSpec(spec.BaseSpec):
+  """Contains one config dict attribute per cloud provider.
+
+  The name of each attribute is the name of the cloud provider.
+  """
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super(_PerCloudConfigSpec, cls)._GetOptionDecoderConstructions()
+    for cloud in providers.VALID_CLOUDS:
+      result[cloud] = TypeVerifier, {
+          'default': None,
+          'valid_types': (dict,)
+      }
+    return result
+
+
+class PerCloudConfigDecoder(TypeVerifier):
+  """Decodes the disk_spec or vm_spec option of a VM group config object."""
+
+  def __init__(self, **kwargs):
+    super(PerCloudConfigDecoder, self).__init__(valid_types=(dict,), **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Decodes the disk_spec or vm_spec option of a VM group config object.
+
+    Args:
+      value: None or dict mapping cloud provider name string to a dict.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      _PerCloudConfigSpec decoded from the input dict.
+    """
+    input_dict = super(PerCloudConfigDecoder, self).Decode(
+        value, component_full_name, flag_values)
+    return None if input_dict is None else _PerCloudConfigSpec(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values,
+        **input_dict)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/spec.py b/script/cumulus/pkb/perfkitbenchmarker/configs/spec.py
new file mode 100644
index 0000000..9914954
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/spec.py
@@ -0,0 +1,180 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Base class for objects decoded from a YAML config."""
+
+
+import collections
+import threading
+
+from perfkitbenchmarker import errors
+import six
+
+_SPEC_REGISTRY = {}
+
+
+def GetSpecClass(base_class, **kwargs):
+  """Returns the subclass with the corresponding attributes.
+
+  Args:
+    base_class: The base class of the resource to return
+        (e.g. BaseVmSpec).
+    **kwargs: Every attribute/value of the subclass's ATTRS that were
+        used to register the subclass.
+  Raises:
+    Exception: If no class could be found with matching attributes.
+  """
+  key = [base_class.__name__]
+  key += sorted(kwargs.items())
+  return _SPEC_REGISTRY.get(tuple(key), base_class)
+
+
+class BaseSpecMetaClass(type):
+  """Metaclass that allows each BaseSpec derived class to have its own decoders.
+  """
+
+  def __init__(cls, name, bases, dct):
+    super(BaseSpecMetaClass, cls).__init__(name, bases, dct)
+    cls._init_decoders_lock = threading.Lock()
+    cls._decoders = collections.OrderedDict()
+    cls._required_options = set()
+    if (all(hasattr(cls, attr) for attr in cls.SPEC_ATTRS) and
+        cls.SPEC_TYPE):
+      key = [cls.SPEC_TYPE]
+      key += sorted([(attr, getattr(cls, attr)) for attr in cls.SPEC_ATTRS])
+      if tuple(key) in _SPEC_REGISTRY:
+        raise Exception('Subclasses of %s must define unique values for the '
+                        'attrs: %s.' % (cls.SPEC_TYPE, cls.SPEC_ATTRS))
+      _SPEC_REGISTRY[tuple(key)] = cls
+
+
+class BaseSpec(six.with_metaclass(BaseSpecMetaClass, object)):
+  """Object decoded from a YAML config."""
+  # The name of the spec class that will be extended with auto-registered
+  # subclasses.
+  SPEC_TYPE = None
+  # A list of the attributes that are used to register the subclasses.
+  SPEC_ATTRS = ['CLOUD']
+
+  # Each derived class has its own copy of the following three variables. They
+  # are initialized by BaseSpecMetaClass.__init__ and later populated by
+  # _InitDecoders when the first instance of the derived class is created.
+  _init_decoders_lock = None  # threading.Lock that protects the next two vars.
+  _decoders = None  # dict mapping config option name to ConfigOptionDecoder.
+  _required_options = None  # set of strings. Required config options.
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    """Initializes a BaseSpec.
+
+    Translates keyword arguments via the class's decoders and assigns the
+    corresponding instance attribute. Derived classes can register decoders
+    for additional attributes by overriding _GetOptionDecoderConstructions
+    and can add support for additional flags by overriding _ApplyFlags.
+
+    Args:
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config options.
+      flag_values: None or flags.FlagValues. Runtime flags that may override
+          the provided config option values in kwargs.
+      **kwargs: dict mapping config option names to provided values.
+
+    Raises:
+      errors.Config.MissingOption: If a config option is required, but a value
+          was not provided in kwargs.
+      errors.Config.UnrecognizedOption: If an unrecognized config option is
+          provided with a value in kwargs.
+    """
+    if not self._decoders:
+      self._InitDecoders()
+    if flag_values:
+      self._ApplyFlags(kwargs, flag_values)
+    missing_options = self._required_options.difference(kwargs)
+    if missing_options:
+      raise errors.Config.MissingOption(
+          'Required options were missing from {0}: {1}.'.format(
+              component_full_name, ', '.join(sorted(missing_options))))
+    unrecognized_options = frozenset(kwargs).difference(self._decoders)
+    if unrecognized_options:
+      raise errors.Config.UnrecognizedOption(
+          'Unrecognized options were found in {0}: {1}.'.format(
+              component_full_name, ', '.join(sorted(unrecognized_options))))
+    self._DecodeAndInit(component_full_name, kwargs, self._decoders,
+                        flag_values)
+
+  @classmethod
+  def _InitDecoders(cls):
+    """Creates a ConfigOptionDecoder for each config option.
+
+    Populates cls._decoders and cls._required_options.
+    """
+    with cls._init_decoders_lock:
+      if not cls._decoders:
+        constructions = cls._GetOptionDecoderConstructions()
+        for option, decoder_construction in sorted(
+            six.iteritems(constructions)):
+          decoder_class, init_args = decoder_construction
+          decoder = decoder_class(option=option, **init_args)
+          cls._decoders[option] = decoder
+          if decoder.required:
+            cls._required_options.add(option)
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    pass
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    return {}
+
+  def _DecodeAndInit(self, component_full_name, config, decoders, flag_values):
+    """Initializes spec attributes from provided config option values.
+
+    Args:
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config options.
+      config: dict mapping option name string to option value.
+      decoders: OrderedDict mapping option name string to ConfigOptionDecoder.
+      flag_values: flags.FlagValues. Runtime flags that may override provided
+          config option values. These flags have already been applied to the
+          current config, but they may be passed to the decoders for propagation
+          to deeper spec constructors.
+    """
+    assert isinstance(decoders, collections.OrderedDict), (
+        'decoders must be an OrderedDict. The order in which options are '
+        'decoded must be guaranteed.')
+    for option, decoder in six.iteritems(decoders):
+      if option in config:
+        value = decoder.Decode(config[option], component_full_name, flag_values)
+      else:
+        value = decoder.default
+      setattr(self, option, value)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/configs/test_import.yml b/script/cumulus/pkb/perfkitbenchmarker/configs/test_import.yml
new file mode 100644
index 0000000..f0e1360
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/configs/test_import.yml
@@ -0,0 +1,11 @@
+#import import_three2.yml
+#import import_three.yml
+#import import_three.yml
+
+# This is an example of how to import in config files.
+# You can even use anchors from other yaml files.
+# Importing a file multiple times either directly or indirectly won't break
+# things.
+# Imports must all be at the top of the file with no spaces between them.
+flags:
+  num_vms: *three
diff --git a/script/cumulus/pkb/perfkitbenchmarker/container_service.py b/script/cumulus/pkb/perfkitbenchmarker/container_service.py
new file mode 100644
index 0000000..5de7a11
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/container_service.py
@@ -0,0 +1,968 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains classes related to managed container services.
+
+For now this just consists of a base cluster class that other container
+services will be derived from and a Kubernetes specific variant. This enables
+users to run PKB VM based benchmarks on container providers (e.g. Kubernetes)
+without pre-provisioning container clusters. In the future, this may be
+expanded to support first-class container benchmarks.
+"""
+
+import collections
+import functools
+import ipaddress
+import itertools
+import os
+import time
+from typing import Any, Dict, List, Optional
+
+from absl import flags
+import jinja2
+from perfkitbenchmarker import context
+from perfkitbenchmarker import custom_virtual_machine_spec
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import events
+from perfkitbenchmarker import kubernetes_helper
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import units
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+import requests
+import six
+import yaml
+
+KUBERNETES = 'Kubernetes'
+DEFAULT_NODEPOOL = 'default'
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'kubeconfig', None, 'Path to kubeconfig to be used by kubectl. '
+    'If unspecified, it will be set to a file in this run\'s '
+    'temporary directory.')
+
+flags.DEFINE_string('kubectl', 'kubectl', 'Path to kubectl tool')
+
+flags.DEFINE_boolean(
+    'local_container_build', False,
+    'Force container images to be built locally rather than '
+    'just as a fallback if there is no remote image builder '
+    'associated with the registry.')
+
+flags.DEFINE_boolean(
+    'static_container_image', True,
+    'Whether container images are static (i.e. are not '
+    'managed by PKB). If this is set, PKB will accept the '
+    'image as fully qualified (including repository) and will '
+    'not attempt to build it.')
+
+flags.DEFINE_boolean(
+    'force_container_build', False,
+    'Whether to force PKB to build container images even '
+    'if they already exist in the registry.')
+
+flags.DEFINE_string(
+    'container_cluster_cloud', None,
+    'Sets the cloud to use for the container cluster. '
+    'This will override both the value set in the config and '
+    'the value set using the generic "cloud" flag.')
+
+flags.DEFINE_integer(
+    'container_cluster_num_vms', None,
+    'Number of nodes in the cluster. Defaults to '
+    'container_cluster.vm_count')
+
+flags.DEFINE_string('container_cluster_type', KUBERNETES,
+                    'The type of container cluster.')
+
+flags.DEFINE_string(
+    'container_cluster_version', None,
+    'Optional version flag to pass to the cluster create '
+    'command. If not specified, the cloud-specific container '
+    'implementation will chose an appropriate default.')
+
+_CONTAINER_CLUSTER_ARCHITECTURE = flags.DEFINE_list(
+    'container_cluster_architecture', ['linux/amd64'],
+    'The architecture(s) that the container cluster uses. '
+    'Defaults to linux/amd64')
+
+_K8S_INGRESS = """
+apiVersion: extensions/v1beta1
+kind: Ingress
+metadata:
+  name: {service_name}-ingress
+spec:
+  backend:
+    serviceName: {service_name}
+    servicePort: 8080
+"""
+
+
+class ContainerException(errors.Error):
+  """Exception during the creation or execution of a container."""
+
+
+class FatalContainerException(errors.Resource.CreationError,
+                              ContainerException):
+  """Fatal Exception during the creation or execution of a container."""
+  pass
+
+
+class RetriableContainerException(errors.Resource.RetryableCreationError,
+                                  ContainerException):
+  """Retriable Exception during the creation or execution of a container."""
+  pass
+
+
+def RunKubectlCommand(command: List[str], **kwargs):
+  """Run a kubectl command."""
+  cmd = [FLAGS.kubectl, '--kubeconfig', FLAGS.kubeconfig] + command
+  return vm_util.IssueCommand(cmd, **kwargs)
+
+
+class ContainerSpec(spec.BaseSpec):
+  """Class containing options for creating containers."""
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Apply flag settings to the container spec."""
+    super(ContainerSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['image'].present:
+      config_values['image'] = flag_values.image
+    if flag_values['static_container_image'].present:
+      config_values['static_image'] = flag_values.static_container_image
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(ContainerSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'image': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'static_image': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+        'cpus': (option_decoders.FloatDecoder, {
+            'default': None
+        }),
+        'memory': (custom_virtual_machine_spec.MemoryDecoder, {
+            'default': None
+        }),
+        'command': (_CommandDecoder, {}),
+        'container_port': (option_decoders.IntDecoder, {
+            'default': 8080
+        }),
+    })
+    return result
+
+
+class _CommandDecoder(option_decoders.ListDecoder):
+  """Decodes the command/arg list for containers."""
+
+  def __init__(self, **kwargs):
+    super(_CommandDecoder, self).__init__(
+        default=None,
+        none_ok=True,
+        item_decoder=option_decoders.StringDecoder(),
+        **kwargs)
+
+
+class BaseContainer(resource.BaseResource):
+  """Class representing a single container."""
+
+  def __init__(self, container_spec=None, **_):
+    # Hack to make container_spec a kwarg
+    assert container_spec
+    super(BaseContainer, self).__init__()
+    self.cpus = container_spec.cpus
+    self.memory = container_spec.memory
+    self.command = container_spec.command
+    self.image = container_spec.image
+    self.ip_address = None
+
+  def WaitForExit(self, timeout: int = 1200) -> Dict[str, Any]:
+    """Gets the successfully finished container.
+
+    Args:
+      timeout: The timeout to wait in seconds
+
+    Raises:
+      FatalContainerException: If the container fails
+      RetriableContainerException: If the container times out wihout succeeding.
+    """
+    raise NotImplementedError()
+
+  def GetLogs(self):
+    """Returns the logs from the container."""
+    raise NotImplementedError()
+
+
+class BaseContainerService(resource.BaseResource):
+  """Class representing a service backed by containers."""
+
+  def __init__(self, container_spec):
+    super(BaseContainerService, self).__init__()
+    self.cpus = container_spec.cpus
+    self.memory = container_spec.memory
+    self.command = container_spec.command
+    self.image = container_spec.image
+    self.container_port = container_spec.container_port
+    self.ip_address = None
+    self.port = None
+    self.host_header = None
+
+
+class _ContainerImage(object):
+  """Simple class for tracking container image names and source locations."""
+
+  def __init__(self, name):
+    self.name = name
+    self.directory = os.path.dirname(
+        data.ResourcePath(os.path.join('docker', self.name, 'Dockerfile')))
+
+
+class ContainerRegistrySpec(spec.BaseSpec):
+  """Spec containing options for creating a Container Registry."""
+
+  def __init__(self, component_full_name, flag_values=None, **kwargs):
+    super(ContainerRegistrySpec, self).__init__(
+        component_full_name, flag_values=flag_values, **kwargs)
+    registry_spec = getattr(self.spec, self.cloud, {})
+    self.project = registry_spec.get('project')
+    self.zone = registry_spec.get('zone')
+    self.name = registry_spec.get('name')
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Apply flag values to the spec."""
+    super(ContainerRegistrySpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud'].present or 'cloud' not in config_values:
+      config_values['cloud'] = flag_values.cloud
+    if flag_values['container_cluster_cloud'].present:
+      config_values['cloud'] = flag_values.container_cluster_cloud
+    updated_spec = {}
+    if flag_values['project'].present:
+      updated_spec['project'] = flag_values.project
+    if flag_values['zones'].present:
+      updated_spec['zone'] = flag_values.zones[0]
+    cloud = config_values['cloud']
+    cloud_spec = config_values.get('spec', {}).get(cloud, {})
+    cloud_spec.update(updated_spec)
+    config_values['spec'] = {cloud: cloud_spec}
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(ContainerRegistrySpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'cloud': (option_decoders.StringDecoder, {}),
+        'spec': (option_decoders.PerCloudConfigDecoder, {
+            'default': {}
+        })
+    })
+    return result
+
+
+def GetContainerRegistryClass(cloud):
+  return resource.GetResourceClass(BaseContainerRegistry, CLOUD=cloud)
+
+
+class BaseContainerRegistry(resource.BaseResource):
+  """Base class for container image registries."""
+
+  RESOURCE_TYPE = 'BaseContainerRegistry'
+
+  def __init__(self, registry_spec):
+    super(BaseContainerRegistry, self).__init__()
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    container_cluster = getattr(benchmark_spec, 'container_cluster', None)
+    zone = getattr(container_cluster, 'zone', None)
+    project = getattr(container_cluster, 'project', None)
+    self.zone = registry_spec.zone or zone
+    self.project = registry_spec.project or project
+    self.name = registry_spec.name or 'pkb%s' % FLAGS.run_uri
+    self.local_build_times = {}
+    self.remote_build_times = {}
+    self.metadata.update({'cloud': self.CLOUD})
+
+  def _Create(self):
+    """Creates the image registry."""
+    pass
+
+  def _Delete(self):
+    """Deletes the image registry."""
+    pass
+
+  def GetSamples(self):
+    """Returns image build related samples."""
+    samples = []
+    metadata = self.GetResourceMetadata()
+    for image_name, build_time in self.local_build_times.items():
+      metadata.update({
+          'build_type': 'local',
+          'image': image_name,
+      })
+      samples.append(
+          sample.Sample('Image Build Time', build_time, 'seconds', metadata))
+    for image_name, build_time in self.remote_build_times.items():
+      metadata.update({
+          'build_type': 'remote',
+          'image': image_name,
+      })
+      samples.append(
+          sample.Sample('Image Build Time', build_time, 'seconds', metadata))
+    return samples
+
+  def GetFullRegistryTag(self, image):
+    """Returns the full name of the image for the registry.
+
+    Args:
+      image: The PKB name of the image (string).
+    """
+    raise NotImplementedError()
+
+  def PrePush(self, image):
+    """Prepares registry to push a given image."""
+    pass
+
+  def RemoteBuild(self, image):
+    """Build the image remotely.
+
+    Args:
+      image: Instance of _ContainerImage representing the image to build.
+    """
+    raise NotImplementedError()
+
+  def Login(self):
+    """Log in to the registry (in order to push to it)."""
+    raise NotImplementedError()
+
+  def LocalBuildAndPush(self, image):
+    """Build the image locally and push to registry.
+
+    Assumes we are already authenticated with the registry from self.Login.
+    Building and pushing done in one command to support multiarch images
+    https://github.com/docker/buildx/issues/59
+
+    Args:
+      image: Instance of _ContainerImage representing the image to build.
+    """
+    full_tag = self.GetFullRegistryTag(image.name)
+    # Multiarch images require buildx create
+    # https://github.com/docker/build-push-action/issues/302
+    vm_util.IssueCommand(['docker', 'buildx', 'create', '--use'])
+    cmd = ['docker', 'buildx', 'build']
+    if _CONTAINER_CLUSTER_ARCHITECTURE.value:
+      cmd += ['--platform', ','.join(_CONTAINER_CLUSTER_ARCHITECTURE.value)]
+    cmd += ['--no-cache', '--push', '-t', full_tag, image.directory]
+    vm_util.IssueCommand(cmd)
+    vm_util.IssueCommand(['docker', 'buildx', 'stop'])
+
+  def GetOrBuild(self, image):
+    """Finds the image in the registry or builds it.
+
+    TODO(pclay): Add support for build ARGs.
+
+    Args:
+      image: The PKB name for the image (string).
+
+    Returns:
+      The full image name (including the registry).
+    """
+    full_image = self.GetFullRegistryTag(image)
+    # Log in to the registry to see if image exists
+    self.Login()
+    if not FLAGS.force_container_build:
+      # manifest inspect inpspects the registry's copy
+      inspect_cmd = ['docker', 'manifest', 'inspect', full_image]
+      _, _, retcode = vm_util.IssueCommand(
+          inspect_cmd, suppress_warning=True, raise_on_failure=False)
+      if retcode == 0:
+        return full_image
+    self._Build(image)
+    return full_image
+
+  def _Build(self, image):
+    """Builds the image and pushes it to the registry if necessary.
+
+    Args:
+      image: The PKB name for the image (string).
+    """
+    image = _ContainerImage(image)
+    build_start = time.time()
+    if not FLAGS.local_container_build:
+      try:
+        # Build the image remotely using an image building service.
+        self.RemoteBuild(image)
+        self.remote_build_times[image.name] = time.time() - build_start
+        return
+      except NotImplementedError:
+        pass
+
+    self.PrePush(image)
+    # Build the image locally using docker.
+    build_start = time.time()
+    self.LocalBuildAndPush(image)
+    self.local_build_times[image.name] = time.time() - build_start
+
+
+@events.benchmark_start.connect
+def _SetKubeConfig(unused_sender, benchmark_spec):
+  """Sets the value for the kubeconfig flag if it's unspecified."""
+  if not FLAGS.kubeconfig:
+    FLAGS.kubeconfig = vm_util.PrependTempDir(
+        'kubeconfig' + str(benchmark_spec.sequence_number))
+    # Store the value for subsequent run stages.
+    benchmark_spec.config.flags['kubeconfig'] = FLAGS.kubeconfig
+
+
+def NodePoolName(name: str) -> str:
+  """Clean node pool names to be usable by all providers."""
+  # GKE (or k8s?) requires nodepools use alphanumerics and hyphens
+  # AKS requires full alphanumeric
+  # PKB likes to use underscores strip them out.
+  return name.replace('_', '')
+
+
+def GetContainerClusterClass(cloud, cluster_type):
+  return resource.GetResourceClass(
+      BaseContainerCluster, CLOUD=cloud, CLUSTER_TYPE=cluster_type)
+
+
+class BaseContainerCluster(resource.BaseResource):
+  """A cluster that can be used to schedule containers."""
+
+  RESOURCE_TYPE = 'BaseContainerCluster'
+  REQUIRED_ATTRS = ['CLOUD', 'CLUSTER_TYPE']
+
+  def __init__(self, cluster_spec):
+    super().__init__(user_managed=bool(cluster_spec.static_cluster))
+    self.name = cluster_spec.static_cluster or 'pkb-' + FLAGS.run_uri
+    self.vm_config = virtual_machine.GetVmClass(self.CLOUD, os_types.DEFAULT)(
+        cluster_spec.vm_spec)
+    self.zone = self.vm_config.zone
+    # Use Virtual Machine class to resolve VM Spec. This lets subclasses parse
+    # Provider specific information like disks out of the spec.
+    for name, nodepool in cluster_spec.nodepools.copy().items():
+      nodepool_zone = nodepool.vm_spec.zone
+      # VM Classes can require zones. But nodepools have optional zones.
+      if not nodepool_zone:
+        nodepool.vm_spec.zone = self.zone
+      nodepool.vm_config = virtual_machine.GetVmClass(
+          self.CLOUD, os_types.DEFAULT)(nodepool.vm_spec)
+      nodepool.vm_config.zone = nodepool_zone
+      nodepool.num_nodes = nodepool.vm_count
+      # Fix name
+      del cluster_spec.nodepools[name]
+      cluster_spec.nodepools[NodePoolName(name)] = nodepool
+    self.nodepools = cluster_spec.nodepools
+    self.num_nodes = cluster_spec.vm_count
+    self.min_nodes = cluster_spec.min_vm_count or self.num_nodes
+    self.max_nodes = cluster_spec.max_vm_count or self.num_nodes
+    self.containers = collections.defaultdict(list)
+    self.services = {}
+
+  def DeleteContainers(self):
+    """Delete containers belonging to the cluster."""
+    for container in itertools.chain(*list(self.containers.values())):
+      container.Delete()
+
+  def DeleteServices(self):
+    """Delete services belonging to the cluster."""
+    for service in self.services.values():
+      service.Delete()
+
+  def GetResourceMetadata(self):
+    """Returns a dictionary of cluster metadata."""
+    nodepools = {}
+    for name, nodepool in six.iteritems(self.nodepools):
+      nodepool_metadata = {
+          'size': nodepool.num_nodes,
+          'machine_type': nodepool.vm_config.machine_type,
+          'name': name
+      }
+      nodepools[name] = nodepool_metadata
+
+    metadata = {
+        'cloud': self.CLOUD,
+        'cluster_type': self.CLUSTER_TYPE,
+        'zone': self.zone,
+        'size': self.num_nodes,
+        'machine_type': self.vm_config.machine_type,
+        'nodepools': nodepools
+    }
+
+    if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
+      metadata.update({
+          'max_size': self.max_nodes,
+          'min_size': self.min_nodes,
+      })
+
+    return metadata
+
+  def DeployContainer(self, name, container_spec):
+    """Deploys Containers according to the ContainerSpec."""
+    raise NotImplementedError()
+
+  def DeployContainerService(self, name, container_spec, num_containers):
+    """Deploys a ContainerSerivice according to the ContainerSpec."""
+    raise NotImplementedError()
+
+  def GetSamples(self):
+    """Return samples with information about deployment times."""
+    samples = []
+    if self.resource_ready_time and self.create_start_time:
+      samples.append(
+          sample.Sample('Cluster Creation Time',
+                        self.resource_ready_time - self.create_start_time,
+                        'seconds'))
+    for container in itertools.chain(*list(self.containers.values())):
+      metadata = {'image': container.image.split('/')[-1]}
+      if container.resource_ready_time and container.create_start_time:
+        samples.append(
+            sample.Sample(
+                'Container Deployment Time',
+                container.resource_ready_time - container.create_start_time,
+                'seconds', metadata))
+      if container.delete_end_time and container.delete_start_time:
+        samples.append(
+            sample.Sample(
+                'Container Delete Time',
+                container.delete_end_time - container.delete_start_time,
+                'seconds', metadata))
+    for service in self.services.values():
+      metadata = {'image': service.image.split('/')[-1]}
+      if service.resource_ready_time and service.create_start_time:
+        samples.append(
+            sample.Sample(
+                'Service Deployment Time',
+                service.resource_ready_time - service.create_start_time,
+                'seconds', metadata))
+      if service.delete_end_time and service.delete_start_time:
+        samples.append(
+            sample.Sample('Service Delete Time',
+                          service.delete_end_time - service.delete_start_time,
+                          'seconds', metadata))
+
+    return samples
+
+
+class KubernetesPod:
+  """Representation of a Kubernetes pod.
+
+  It can be created as a PKB managed resource using KubernetesContainer,
+  or created with ApplyManifest and directly constructed.
+  """
+
+  def __init__(self, name=None, **_):
+    assert name
+    self.name = name
+
+  def _GetPod(self) -> Dict[str, Any]:
+    """Gets a representation of the POD and returns it."""
+    stdout, _, _ = RunKubectlCommand(['get', 'pod', self.name, '-o', 'yaml'])
+    pod = yaml.safe_load(stdout)
+    self.ip_address = pod.get('status', {}).get('podIP')
+    return pod
+
+  def WaitForExit(self, timeout: int = None) -> Dict[str, Any]:
+    """Gets the finished running container."""
+
+    @vm_util.Retry(
+        timeout=timeout, retryable_exceptions=(RetriableContainerException,))
+    def _WaitForExit():
+      # Inspect the pod's status to determine if it succeeded, has failed, or is
+      # doomed to fail.
+      # https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/
+      pod = self._GetPod()
+      status = pod['status']
+      phase = status['phase']
+      if phase == 'Succeeded':
+        return pod
+      elif phase == 'Failed':
+        raise FatalContainerException(
+            f"Pod {self.name} failed:\n{yaml.dump(pod['status'])}")
+      else:
+        for condition in status.get('conditions', []):
+          if (condition['type'] == 'PodScheduled' and
+              condition['status'] == 'False' and
+              condition['reason'] == 'Unschedulable'):
+            # TODO(pclay): Revisit this when we scale clusters.
+            raise FatalContainerException(
+                f"Pod {self.name} failed to schedule:\n{condition['message']}")
+        for container_status in status.get('containerStatuses', []):
+          waiting_status = container_status['state'].get('waiting', {})
+          if waiting_status.get('reason') in [
+              'ErrImagePull', 'ImagePullBackOff'
+          ]:
+            raise FatalContainerException(
+                f'Failed to find container image for {status.name}:\n' +
+                yaml.dump(waiting_status.get('message')))
+        raise RetriableContainerException(
+            f'Pod phase ({phase}) not in finished phases.')
+
+    return _WaitForExit()
+
+  def GetLogs(self):
+    """Returns the logs from the container."""
+    stdout, _, _ = RunKubectlCommand(['logs', self.name])
+    return stdout
+
+
+class KubernetesContainer(BaseContainer, KubernetesPod):
+  """A KubernetesPod based flavor of Container."""
+
+  def _Create(self):
+    """Creates the container."""
+    run_cmd = [
+        'run',
+        self.name,
+        '--image=%s' % self.image,
+        '--restart=Never',
+    ]
+
+    limits = []
+    if self.cpus:
+      limits.append(f'cpu={int(1000 * self.cpus)}m')
+    if self.memory:
+      limits.append(f'memory={self.memory}Mi')
+    if limits:
+      run_cmd.append('--limits=' + ','.join(limits))
+
+    if self.command:
+      run_cmd.extend(['--command', '--'])
+      run_cmd.extend(self.command)
+    RunKubectlCommand(run_cmd)
+
+  def _Delete(self):
+    """Deletes the container."""
+    pass
+
+  def _IsReady(self):
+    """Returns true if the container has stopped pending."""
+    return self._GetPod()['status']['phase'] != 'Pending'
+
+
+class KubernetesContainerService(BaseContainerService):
+  """A Kubernetes flavor of Container Service."""
+
+  def __init__(self, container_spec, name):
+    super(KubernetesContainerService, self).__init__(container_spec)
+    self.name = name
+    self.port = 8080
+
+  def _Create(self):
+    run_cmd = [
+        'run', self.name,
+        '--image=%s' % self.image, '--port',
+        str(self.port)
+    ]
+
+    limits = []
+    if self.cpus:
+      limits.append(f'cpu={int(1000 * self.cpus)}m')
+    if self.memory:
+      limits.append(f'memory={self.memory}Mi')
+    if limits:
+      run_cmd.append('--limits=' + ','.join(limits))
+
+    if self.command:
+      run_cmd.extend(['--command', '--'])
+      run_cmd.extend(self.command)
+    RunKubectlCommand(run_cmd)
+
+    expose_cmd = [
+        'expose', 'deployment', self.name, '--type', 'NodePort',
+        '--target-port',
+        str(self.port)
+    ]
+    RunKubectlCommand(expose_cmd)
+    with vm_util.NamedTemporaryFile() as tf:
+      tf.write(_K8S_INGRESS.format(service_name=self.name))
+      tf.close()
+      kubernetes_helper.CreateFromFile(tf.name)
+
+  def _GetIpAddress(self):
+    """Attempts to set the Service's ip address."""
+    ingress_name = '%s-ingress' % self.name
+    get_cmd = [
+        'get', 'ing', ingress_name, '-o',
+        'jsonpath={.status.loadBalancer.ingress[*].ip}'
+    ]
+    stdout, _, _ = RunKubectlCommand(get_cmd)
+    ip_address = stdout
+    if ip_address:
+      self.ip_address = ip_address
+
+  def _IsReady(self):
+    """Returns True if the Service is ready."""
+    if self.ip_address is None:
+      self._GetIpAddress()
+    if self.ip_address is not None:
+      url = 'http://%s' % (self.ip_address)
+      r = requests.get(url)
+      if r.status_code == 200:
+        return True
+    return False
+
+  def _Delete(self):
+    """Deletes the service."""
+    with vm_util.NamedTemporaryFile() as tf:
+      tf.write(_K8S_INGRESS.format(service_name=self.name))
+      tf.close()
+      kubernetes_helper.DeleteFromFile(tf.name)
+
+    delete_cmd = ['delete', 'deployment', self.name]
+    RunKubectlCommand(delete_cmd, raise_on_failure=False)
+
+
+class KubernetesCluster(BaseContainerCluster):
+  """A Kubernetes flavor of Container Cluster."""
+
+  CLUSTER_TYPE = KUBERNETES
+
+  def _DeleteAllFromDefaultNamespace(self):
+    """Deletes all resources from a namespace.
+
+    Since StatefulSets do not reclaim PVCs upon deletion, they are explicitly
+    deleted here to prevent dynamically provisioned PDs from leaking once the
+    cluster has been deleted.
+    """
+    run_cmd = [
+        'delete', 'all', '--all', '-n', 'default'
+    ]
+    RunKubectlCommand(run_cmd)
+
+    run_cmd = [
+        'delete', 'pvc', '--all', '-n', 'default'
+    ]
+    RunKubectlCommand(run_cmd)
+
+  def _Delete(self):
+    self._DeleteAllFromDefaultNamespace()
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cluster."""
+    result = super().GetResourceMetadata()
+    result['container_cluster_version'] = self.k8s_version
+    return result
+
+  def DeployContainer(self, base_name, container_spec):
+    """Deploys Containers according to the ContainerSpec."""
+    name = base_name + str(len(self.containers[base_name]))
+    container = KubernetesContainer(container_spec=container_spec, name=name)
+    self.containers[base_name].append(container)
+    container.Create()
+
+  def DeployContainerService(self, name, container_spec):
+    """Deploys a ContainerSerivice according to the ContainerSpec."""
+    service = KubernetesContainerService(container_spec, name)
+    self.services[name] = service
+    service.Create()
+
+  # TODO(pclay): Revisit instance methods that don't rely on instance data.
+  def ApplyManifest(self, manifest_file, **kwargs):
+    """Applies a declarative Kubernetes manifest; possibly with jinja.
+
+    Args:
+      manifest_file: The name of the YAML file or YAML template.
+      **kwargs: Arguments to the jinja template.
+    """
+    filename = data.ResourcePath(manifest_file)
+    if not filename.endswith('.j2'):
+      assert not kwargs
+      RunKubectlCommand(['apply', '-f', filename])
+      return
+
+    environment = jinja2.Environment(undefined=jinja2.StrictUndefined)
+    with open(filename) as template_file, vm_util.NamedTemporaryFile(
+        mode='w', suffix='.yaml') as rendered_template:
+      manifest = environment.from_string(template_file.read()).render(kwargs)
+      rendered_template.write(manifest)
+      rendered_template.close()
+      RunKubectlCommand(['apply', '-f', rendered_template.name])
+
+  def WaitForResource(self, resource_name, condition_name, namespace=None):
+    """Waits for a condition on a Kubernetes resource (eg: deployment, pod)."""
+    run_cmd = [
+        'wait', f'--for=condition={condition_name}',
+        f'--timeout={vm_util.DEFAULT_TIMEOUT}s', resource_name
+    ]
+    if namespace:
+      run_cmd.append(f'--namespace={namespace}')
+    RunKubectlCommand(run_cmd)
+
+  def WaitForRollout(self, resource_name):
+    """Blocks until a Kubernetes rollout is completed."""
+    run_cmd = [
+        'rollout',
+        'status',
+        '--timeout=%ds' % vm_util.DEFAULT_TIMEOUT,
+        resource_name
+    ]
+
+    RunKubectlCommand(run_cmd)
+
+  @vm_util.Retry(retryable_exceptions=(errors.Resource.RetryableCreationError,))
+  def GetLoadBalancerIP(self, service_name):
+    """Returns the IP address of a LoadBalancer service when ready."""
+    get_cmd = [
+        'get', 'service', service_name, '-o',
+        'jsonpath={.status.loadBalancer.ingress[0].ip}'
+    ]
+
+    stdout, _, _ = RunKubectlCommand(get_cmd)
+
+    try:
+      # Ensure the load balancer is ready by parsing the output IP
+      ip_address = ipaddress.ip_address(stdout)
+    except ValueError:
+      raise errors.Resource.RetryableCreationError(
+          "Load Balancer IP for service '%s' is not ready." % service_name)
+
+    return format(ip_address)
+
+  @vm_util.Retry(retryable_exceptions=(errors.Resource.RetryableCreationError,))
+  def GetClusterIP(self, service_name) -> str:
+    """Returns the IP address of a ClusterIP service when ready."""
+    get_cmd = [
+        'get', 'service', service_name, '-o', 'jsonpath={.spec.clusterIP}'
+    ]
+
+    stdout, _, _ = RunKubectlCommand(get_cmd)
+
+    if not stdout:
+      raise errors.Resource.RetryableCreationError(
+          "ClusterIP for service '%s' is not ready." % service_name)
+
+    return stdout
+
+  def CreateConfigMap(self, name, from_file_dir):
+    """Creates a Kubernetes ConfigMap.
+
+    Args:
+      name: The name of the ConfigMap to create
+      from_file_dir: The directory name containing files that will be key/values
+        in the ConfigMap
+    """
+    RunKubectlCommand(
+        ['create', 'configmap', name, '--from-file', from_file_dir])
+
+  def CreateServiceAccount(self,
+                           name: str,
+                           clusterrole: Optional[str] = None,
+                           namespace='default'):
+    """Create a k8s service account and cluster-role-binding."""
+    RunKubectlCommand(
+        ['create', 'serviceaccount', name, '--namespace', namespace])
+    if clusterrole:
+      # TODO(pclay): Support customer cluster roles?
+      RunKubectlCommand([
+          'create',
+          'clusterrolebinding',
+          f'{name}-role',
+          f'--clusterrole={clusterrole}',
+          f'--serviceaccount={namespace}:{name}',
+          '--namespace',
+          namespace,
+      ])
+
+  # TODO(pclay): Move to cached property in Python 3.9
+  @property
+  @functools.lru_cache(maxsize=1)
+  def node_memory_allocatable(self) -> units.Quantity:
+    """Usable memory of each node in cluster in KiB."""
+    stdout, _, _ = RunKubectlCommand(
+        # TODO(pclay): Take a minimum of all nodes?
+        [
+            'get', 'nodes', '-o',
+            'jsonpath={.items[0].status.allocatable.memory}'
+        ])
+    return units.ParseExpression(stdout)
+
+  @property
+  @functools.lru_cache(maxsize=1)
+  def node_num_cpu(self) -> int:
+    """vCPU of each node in cluster."""
+    stdout, _, _ = RunKubectlCommand(
+        ['get', 'nodes', '-o', 'jsonpath={.items[0].status.capacity.cpu}'])
+    return int(stdout)
+
+  @property
+  @functools.lru_cache(maxsize=1)
+  def k8s_version(self) -> str:
+    """Actual Kubernetes version reported by server."""
+    stdout, _, _ = RunKubectlCommand(['version', '-o', 'yaml'])
+    return yaml.safe_load(stdout)['serverVersion']['gitVersion']
+
+  def GetPodLabel(self, resource_name):
+    run_cmd = [
+        'get', resource_name,
+        '-o', 'jsonpath="{.spec.selector.matchLabels.app}"'
+    ]
+
+    stdout, _, _ = RunKubectlCommand(run_cmd)
+    return yaml.safe_load(stdout)
+
+  def GetPodIps(self, resource_name):
+    """Returns a list of internal IPs for a pod name.
+
+    Args:
+      resource_name: The pod resource name
+    """
+    pod_label = self.GetPodLabel(resource_name)
+
+    get_cmd = [
+        'get', 'pods', '-l', 'app=%s' % pod_label,
+        '-o', 'jsonpath="{.items[*].status.podIP}"'
+    ]
+
+    stdout, _, _ = RunKubectlCommand(get_cmd)
+    return yaml.safe_load(stdout).split()
+
+  def RunKubectlExec(self, pod_name, cmd):
+    run_cmd = [
+        'exec', '-it', pod_name, '--'
+    ] + cmd
+    RunKubectlCommand(run_cmd)
+
+  # TODO(pclay): integrate with kubernetes_disk.
+  def GetDefaultStorageClass(self) -> str:
+    """Get the default storage class for the provider."""
+    raise NotImplementedError
diff --git a/script/cumulus/pkb/perfkitbenchmarker/context.py b/script/cumulus/pkb/perfkitbenchmarker/context.py
new file mode 100644
index 0000000..c9dcfb8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/context.py
@@ -0,0 +1,39 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module for working with the current thread context."""
+
+import threading
+
+
+class _ThreadData(threading.local):
+  def __init__(self):
+    self.benchmark_spec = None
+
+
+_thread_local = _ThreadData()
+
+
+def SetThreadBenchmarkSpec(benchmark_spec):
+  """Sets the current thread's BenchmarkSpec object."""
+  _thread_local.benchmark_spec = benchmark_spec
+
+
+def GetThreadBenchmarkSpec():
+  """Gets the current thread's BenchmarkSpec object.
+
+  If SetThreadBenchmarkSpec() has not been called in either the current thread
+  or in an ancestor, then this method will return None by default.
+  """
+  return _thread_local.benchmark_spec
diff --git a/script/cumulus/pkb/perfkitbenchmarker/custom_virtual_machine_spec.py b/script/cumulus/pkb/perfkitbenchmarker/custom_virtual_machine_spec.py
new file mode 100644
index 0000000..c278f76
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/custom_virtual_machine_spec.py
@@ -0,0 +1,212 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Classes relating to decoding a custom machine type.
+"""
+
+
+import re
+
+from perfkitbenchmarker import errors
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+from perfkitbenchmarker.providers.azure import flags as azure_flags
+import six
+
+
+class MemoryDecoder(option_decoders.StringDecoder):
+  """Verifies and decodes a config option value specifying a memory size."""
+
+  _CONFIG_MEMORY_PATTERN = re.compile(r'([0-9.]+)([GM]iB)')
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Decodes memory size in MiB from a string.
+
+    The value specified in the config must be a string representation of the
+    memory size expressed in MiB or GiB. It must be an integer number of MiB
+    Examples: "1280MiB", "7.5GiB".
+
+    Args:
+      value: The value specified in the config.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      int. Memory size in MiB.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    string = super(MemoryDecoder, self).Decode(value, component_full_name,
+                                               flag_values)
+    match = self._CONFIG_MEMORY_PATTERN.match(string)
+    if not match:
+      raise errors.Config.InvalidValue(
+          'Invalid {0} value: "{1}". Examples of valid values: "1280MiB", '
+          '"7.5GiB".'.format(self._GetOptionFullName(component_full_name),
+                             string))
+    try:
+      memory_value = float(match.group(1))
+    except ValueError:
+      raise errors.Config.InvalidValue(
+          'Invalid {0} value: "{1}". "{2}" is not a valid float.'.format(
+              self._GetOptionFullName(component_full_name), string,
+              match.group(1)))
+    memory_units = match.group(2)
+    if memory_units == 'GiB':
+      memory_value *= 1024
+    memory_mib_int = int(memory_value)
+    if memory_value != memory_mib_int:
+      raise errors.Config.InvalidValue(
+          'Invalid {0} value: "{1}". The specified size must be an integer '
+          'number of MiB.'.format(self._GetOptionFullName(component_full_name),
+                                  string))
+    return memory_mib_int
+
+
+class CustomMachineTypeSpec(spec.BaseSpec):
+  """Properties of a custom machine type.
+
+  Attributes:
+    cpus: int. Number of vCPUs.
+    memory: string. Representation of the size of memory, expressed in MiB or
+        GiB. Must be an integer number of MiB (e.g. "1280MiB", "7.5GiB").
+  """
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(CustomMachineTypeSpec, cls)._GetOptionDecoderConstructions()
+    result.update({'cpus': (option_decoders.IntDecoder, {'min': 1}),
+                   'memory': (MemoryDecoder, {})})
+    return result
+
+
+class MachineTypeDecoder(option_decoders.TypeVerifier):
+  """Decodes the machine_type option of a VM config."""
+
+  def __init__(self, **kwargs):
+    super(MachineTypeDecoder, self).__init__((six.string_types + (dict,)),
+                                             **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Decodes the machine_type option of a VM config.
+
+    Args:
+      value: Either a string name of a machine type or a dict containing
+          'cpu' and 'memory' keys describing a custom VM.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      If value is a string, returns it unmodified. Otherwise, returns the
+      decoded CustomMachineTypeSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    super(MachineTypeDecoder, self).Decode(value, component_full_name,
+                                           flag_values)
+    if isinstance(value, six.string_types):
+      return value
+    return CustomMachineTypeSpec(self._GetOptionFullName(component_full_name),
+                                 flag_values=flag_values, **value)
+
+
+class AzureMachineTypeDecoder(option_decoders.TypeVerifier):
+  """Decodes the machine_type option of a VM config."""
+
+  def __init__(self, **kwargs):
+    super(AzureMachineTypeDecoder, self).__init__(six.string_types + (dict,),
+                                                  **kwargs)
+
+  def Decode(self, value, component_full_name, flag_values):
+    """Decodes the machine_type option of a VM config.
+
+    Args:
+      value: Either a string name of a machine type or a dict containing
+          'compute_units' and 'tier' keys describing a machine type.
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config option.
+      flag_values: flags.FlagValues. Runtime flag values to be propagated to
+          BaseSpec constructors.
+
+    Returns:
+      If value is a string, returns it unmodified. Otherwise, returns the
+      decoded CustomMachineTypeSpec.
+
+    Raises:
+      errors.Config.InvalidValue upon invalid input value.
+    """
+    super(AzureMachineTypeDecoder, self).Decode(value, component_full_name,
+                                                flag_values)
+    if isinstance(value, six.string_types):
+      return value
+    return AzurePerformanceTierDecoder(
+        self._GetOptionFullName(component_full_name),
+        flag_values=flag_values, **value)
+
+
+class AzurePerformanceTierDecoder(spec.BaseSpec):
+  """Properties of a An Azure custom machine type.
+
+  Attributes:
+    compute_units: int. Number of compute units.
+    tier: Basic, Standard or Premium
+  """
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(
+        AzurePerformanceTierDecoder, cls)._GetOptionDecoderConstructions()
+    # https://docs.microsoft.com/en-us/azure/virtual-machines/windows/acu
+    # https://docs.microsoft.com/en-us/azure/sql-database/sql-database-service-tiers
+    result.update({'compute_units': (option_decoders.IntDecoder, {'min': 50}),
+                   'tier': (option_decoders.EnumDecoder, {
+                       'valid_values': azure_flags.VALID_TIERS})})
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values.
+        May be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    if flag_values['azure_tier'].present:
+      config_values['tier'] = flag_values.azure_tier
+
+    if flag_values['azure_compute_units'].present:
+      config_values['compute_units'] = flag_values.azure_compute_units
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/data/__init__.py
new file mode 100644
index 0000000..5aee938
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/__init__.py
@@ -0,0 +1,238 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This module defines an interface for finding named resources.
+
+Due to license restrictions, not all software dependences can be shipped with
+PerfKitBenchmarker.
+Those that can be included in perfkitbenchmarker/data, or
+ perfkitbenchmarker/scripts and are loaded via a PackageResourceLoader.
+
+Users can specify additional paths to search for required data files using the
+`--data_search_paths` flag.
+"""
+
+import abc
+import logging
+import os
+import shutil
+from absl import flags
+import perfkitbenchmarker
+from perfkitbenchmarker import temp_dir
+import pkg_resources
+import six
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_multi_string('data_search_paths', ['.'],
+                          'Additional paths to search for data files. '
+                          'These paths will be searched prior to using files '
+                          'bundled with PerfKitBenchmarker.')
+
+_RESOURCES = 'resources'
+
+
+class ResourceNotFound(ValueError):
+  """Error raised when a resource could not be found on the search path."""
+  pass
+
+
+class ResourceLoader(six.with_metaclass(abc.ABCMeta, object)):
+  """An interface for loading named resources."""
+
+  @abc.abstractmethod
+  def ResourceExists(self, name):
+    """Checks for existence of the resource 'name'.
+
+    Args:
+      name: string. Name of the resource. Typically a file name.
+
+    Returns:
+      A boolean indicating whether the resource 'name' can be loaded by this
+      object.
+    """
+    pass
+
+  @abc.abstractmethod
+  def ResourcePath(self, name):
+    """Gets the path to the resource 'name'.
+
+    Args:
+      name: string. Name of the resource. Typically a file name.
+
+    Returns:
+      A full path to 'name' on the filesystem.
+
+    Raises:
+      ResourceNotFound: If 'name' was not found.
+    """
+    pass
+
+
+class FileResourceLoader(ResourceLoader):
+  """Loads resources from a directory in the filesystem.
+
+  Attributes:
+    path: string. Root path to load resources from.
+  """
+
+  def __init__(self, path):
+    super().__init__()
+    self.path = path
+
+    if not os.path.isdir(path):
+      logging.warn('File resource loader root %s is not a directory.', path)
+
+  def __repr__(self):
+    return '<{0} path="{1}">'.format(type(self).__name__, self.path)
+
+  def _Join(self, *args):
+    return os.path.join(self.path, *args)
+
+  def ResourceExists(self, name):
+    return os.path.exists(self._Join(name))
+
+  def ResourcePath(self, name):
+    if not self.ResourceExists(name):
+      raise ResourceNotFound(name)
+    return self._Join(name)
+
+
+class PackageResourceLoader(ResourceLoader):
+  """Loads resources from a Python package.
+
+  Attributes:
+    package: string. Name of the package containing resources.
+  """
+
+  def __init__(self, package):
+    super().__init__()
+    self.package = package
+
+  def __repr__(self):
+    return '<{0} package="{1}">'.format(type(self).__name__, self.package)
+
+  def ResourceExists(self, name):
+    return pkg_resources.resource_exists(self.package, name)
+
+  def ResourcePath(self, name):
+    if not self.ResourceExists(name):
+      raise ResourceNotFound(name)
+    try:
+      path = pkg_resources.resource_filename(self.package, name)
+    except NotImplementedError:
+      # This can happen if PerfKit Benchmarker is executed from a zip file.
+      # Extract the resource to the version-specific temporary directory.
+      path = os.path.join(temp_dir.GetVersionDirPath(), _RESOURCES, name)
+      if not os.path.exists(path):
+        dir_path = os.path.dirname(path)
+        try:
+          os.makedirs(dir_path)
+        except OSError:
+          if not os.path.isdir(dir_path):
+            raise
+        with open(path, 'wb') as extracted_file:
+          shutil.copyfileobj(pkg_resources.resource_stream(self.package, name),
+                             extracted_file)
+    return path
+
+
+DATA_PACKAGE_NAME = 'perfkitbenchmarker.data'
+#YCSB_WORKLOAD_DIR_NAME = os.path.join(
+#    os.path.dirname(perfkitbenchmarker.__file__), 'data/ycsb')
+#EDW_SCRIPT_DIR_NAME = os.path.join(
+#    os.path.dirname(perfkitbenchmarker.__file__), 'data/edw')
+SCRIPT_PACKAGE_NAME = 'perfkitbenchmarker.scripts'
+CONFIG_PACKAGE_NAME = 'perfkitbenchmarker.configs'
+DEFAULT_RESOURCE_LOADERS = [PackageResourceLoader(DATA_PACKAGE_NAME),
+#                            FileResourceLoader(YCSB_WORKLOAD_DIR_NAME),
+#                            FileResourceLoader(EDW_SCRIPT_DIR_NAME),
+                            PackageResourceLoader(SCRIPT_PACKAGE_NAME),
+                            PackageResourceLoader(CONFIG_PACKAGE_NAME)]
+
+
+def _GetResourceLoaders():
+  """Gets a list of registered ResourceLoaders.
+
+  Returns:
+    List of ResourceLoader instances. FileResourceLoaders for paths in
+    FLAGS.data_search_paths will be listed first, followed by
+    DEFAULT_RESOURCE_LOADERS.
+  """
+  loaders = []
+
+  # Add all paths to list if they are specified on the command line (will warn
+  # if any are invalid).
+  # Otherwise add members of the default list iff they exist.
+  if FLAGS['data_search_paths'].present:
+    for path in FLAGS.data_search_paths:
+      loaders.append(FileResourceLoader(path))
+  else:
+    for path in FLAGS.data_search_paths:
+      if os.path.isdir(path):
+        loaders.append(FileResourceLoader(path))
+  loaders.extend(DEFAULT_RESOURCE_LOADERS)
+  return loaders
+
+
+def ResourcePath(resource_name, search_user_paths=True):
+  """Gets the filename of a resource.
+
+  Loaders are searched in order until the resource is found.
+  If no loader provides 'resource_name', an exception is thrown.
+
+  If 'search_user_paths' is true, the directories specified by
+  "--data_search_paths" are consulted before the default paths.
+
+  Args:
+    resource_name: string. Name of a resource.
+    search_user_paths: boolean. Whether paths from "--data_search_paths" should
+      be searched before the default paths.
+  Returns:
+    A path to the resource on the filesystem.
+  Raises:
+    ResourceNotFound: When resource was not found.
+  """
+  if search_user_paths:
+    loaders = _GetResourceLoaders()
+  else:
+    loaders = DEFAULT_RESOURCE_LOADERS
+  for loader in loaders:
+    if loader.ResourceExists(resource_name):
+      return loader.ResourcePath(resource_name)
+
+  raise ResourceNotFound(
+      '{0} (Searched: {1})'.format(resource_name, loaders))
+
+
+def ResourceExists(resource_name, search_user_paths=True):
+  """Returns True if a resource exists.
+
+  Loaders are searched in order until the resource is found.
+  If no loader provides 'resource_name', returns False.
+
+  If 'search_user_paths' is true, the directories specified by
+  "--data_search_paths" are consulted before the default paths.
+
+  Args:
+    resource_name: string. Name of a resource.
+    search_user_paths: boolean. Whether paths from "--data_search_paths" should
+      be searched before the default paths.
+  Returns:
+    Whether the resource exists.
+  """
+  try:
+    ResourcePath(resource_name, search_user_paths)
+    return True
+  except ResourceNotFound:
+    return False
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/build_collectd.sh.j2 b/script/cumulus/pkb/perfkitbenchmarker/data/build_collectd.sh.j2
new file mode 100755
index 0000000..250936d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/build_collectd.sh.j2
@@ -0,0 +1,82 @@
+#!/bin/bash
+
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Builds collectd from source on a VM.
+# Supports Debian and RHEL-based systems.
+
+set -o errexit
+set -o nounset
+set -o pipefail
+set -o xtrace
+
+readonly PACKAGE='{{ collectd_package }}'
+readonly BUILD_DIR='{{ build_dir }}'
+readonly PREFIX='{{ root_dir }}'
+readonly PARENT_DIR='{{ parent_dir }}'
+readonly CONFIG_DEPD_FILE='{{ config_depd_file }}'
+readonly CONFIG_FILE='{{ config_file }}'
+readonly PLUGIN_DIR='{{ plugin_dir }}'
+readonly PATCHES_DIR='{{ patches_dir }}'
+readonly PYTHON_CONFIG='{{ python_config }}'
+
+function build_collectd() {
+  mkdir $BUILD_DIR
+  pushd $BUILD_DIR
+  tar --strip-components 1 -xjf $PACKAGE
+  cp $PATCHES_DIR/*.patch .
+  for el in *.patch
+  do
+    patch -p1 < $el
+  done
+  autoreconf
+  export PYTHON_CONFIG
+  CFLAGS='-w -Werror' ./configure --prefix "$PREFIX" \
+    --enable-python \
+    --disable-perl \
+    --without-perl-bindings \
+    --disable-java \
+    --with-librdkafka=/usr/local \
+    --disable-rrdtool \
+    --disable-werror
+  CFLAGS='-w -Werror' make -j `cat /proc/cpuinfo | grep processor | wc -l`
+  make install
+  popd
+  rm -rf $BUILD_DIR
+}
+function configure_collectd() {
+  # Add a collectd configuration script.
+  # See:
+  # https://github.com/collectd/collectd/blob/collectd-5.5.0/src/collectd.conf.in
+  # for a more verbose description.
+ cd $PARENT_DIR
+ cat $CONFIG_FILE > $PREFIX/etc/collectd.conf
+}
+
+function configure_extra_plugins() {
+  cd $PLUGIN_DIR
+  chmod +x *.sh
+  for filename in *_depend.sh; do
+    sudo ./$filename
+  done
+}
+
+# Build collectd if it doesn't already exist.
+if [ ! -f $PREFIX/sbin/collectd ]
+  then
+    build_collectd
+    configure_extra_plugins
+    configure_collectd $CONFIG_FILE
+fi
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/perf-default.json b/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/perf-default.json
new file mode 100644
index 0000000..a158e3b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/perf-default.json
@@ -0,0 +1,941 @@
+{
+  "core": {
+    "events": [
+      [
+        "instructions_retired"
+      ],
+      [
+        "l1d.replacement"
+      ],
+      [
+        "L2_RQSTS.ALL_CODE_RD"
+      ],
+      [
+        "MEM_LOAD_RETIRED.L1_HIT"
+      ],
+      [
+        "MEM_LOAD_RETIRED.L2_HIT"
+      ],
+      [
+        "L2_LINES_IN.ALL"
+      ],
+      [
+        "MEM_LOAD_RETIRED.L2_MISS"
+      ],
+      [
+        "L2_RQSTS.CODE_RD_MISS"
+      ],
+      [
+        "ITLB_MISSES.WALK_COMPLETED"
+      ],
+      [
+        "ref-cycles"
+      ],
+      [
+        "instructions"
+      ],
+      [
+        "DTLB_MISSES.WALK_COMPLETED"
+      ],
+      [
+        "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M"
+      ],
+      [
+        "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD"
+      ],
+      [
+        "LSD.UOPS"
+      ],
+      [
+        "CORE_POWER.LVL0_TURBO_LICENSE"
+      ],
+      [
+        "CORE_POWER.LVL1_TURBO_LICENSE"
+      ],
+      [
+        "CORE_POWER.LVL2_TURBO_LICENSE"
+      ],
+      [
+        "CPU_CLK_UNHALTED.THREAD_ANY"
+      ],
+      [
+        "IDQ_UOPS_NOT_DELIVERED.CORE"
+      ],
+      [
+        "UOPS_RETIRED.RETIRE_SLOTS"
+      ],
+      [
+        "INT_MISC.RECOVERY_CYCLES_ANY"
+      ],
+      [
+        "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD"
+      ],
+      [
+        "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD"
+      ],
+      [
+        "UOPS_ISSUED.ANY"
+      ],
+      [
+        "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE"
+      ],
+      [
+        "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY"
+      ],
+      [
+        "OCR.ALL_READS.L3_MISS.REMOTE_HITM"
+      ],
+      [
+        "cpu-cycles"
+      ],
+      [
+        "power/energy-pkg"
+      ],
+      [
+        "power/energy-ram"
+      ],
+      [
+        "ITLB_MISSES.WALK_COMPLETED_2M_4M"
+      ],
+      [
+        "ITLB_MISSES.WALK_ACTIVE"
+      ],
+      [
+        "DTLB_LOAD_MISSES.WALK_COMPLETED"
+      ],
+      [
+        "DTLB_LOAD_MISSES.WALK_COMPLETED_4K"
+      ],
+      [
+        "DTLB_LOAD_MISSES.WALK_COMPLETED_1G"
+      ],
+      [
+        "DTLB_STORE_MISSES.WALK_COMPLETED"
+      ],
+      [
+        "DTLB_LOAD_MISSES.WALK_ACTIVE"
+      ],
+      [
+        "DTLB_STORE_MISSES.WALK_ACTIVE"
+      ],
+      [
+        "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD"
+      ],
+      [
+        "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE"
+      ],
+      [
+        "ICACHE_16B.IFDATA_STALL"
+      ],
+      [
+        "ICACHE_64B.IFTAG_STALL"
+      ],
+      [
+        "INT_MISC.CLEAR_RESTEER_CYCLES"
+      ],
+      [
+        "INT_MISC.RECOVERY_CYCLES_ANY"
+      ],
+      [
+        "BACLEARS.ANY"
+      ],
+      [
+        "BR_MISP_RETIRED.ALL_BRANCHES"
+      ],
+      [
+        "MACHINE_CLEARS.COUNT"
+      ],
+      [
+        "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE"
+      ],
+      [
+        "CYCLE_ACTIVITY.STALLS_MEM_ANY"
+      ],
+      [
+        "CYCLE_ACTIVITY.STALLS_L1D_MISS"
+      ],
+      [
+        "CYCLE_ACTIVITY.STALLS_L2_MISS"
+      ],
+      [
+        "CYCLE_ACTIVITY.STALLS_L3_MISS"
+      ],
+      [
+        "DTLB_LOAD_MISSES.STLB_HIT"
+      ],
+      [
+        "DTLB_STORE_MISSES.STLB_HIT"
+      ],
+      [
+        "LD_BLOCKS.STORE_FORWARD"
+      ],
+      [
+        "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD"
+      ],
+      [
+        "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6"
+      ],
+      [
+        "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6"
+      ],
+      [
+        "OFFCORE_REQUESTS_BUFFER.SQ_FULL"
+      ],
+      [
+        "EXE_ACTIVITY.BOUND_ON_STORES"
+      ],
+      [
+        "EXE_ACTIVITY.EXE_BOUND_0_PORTS"
+      ],
+      [
+        "EXE_ACTIVITY.1_PORTS_UTIL"
+      ],
+      [
+        "EXE_ACTIVITY.2_PORTS_UTIL"
+      ],
+      [
+        "ARITH.DIVIDER_ACTIVE"
+      ],
+      [
+        "UOPS_EXECUTED.CORE_CYCLES_NONE"
+      ],
+      [
+        "UOPS_EXECUTED.CORE_CYCLES_GE_1"
+      ],
+      [
+        "UOPS_EXECUTED.CORE_CYCLES_GE_2"
+      ],
+      [
+        "UOPS_EXECUTED.CORE_CYCLES_GE_3"
+      ],
+      [
+        "IDQ.MS_UOPS"
+      ]
+    ],
+    "custom_events": [
+      {
+        "config": [
+          "0x5300c0"
+        ],
+        "name": "instructions_retired",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x151"
+        ],
+        "name": "l1d.replacement",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xe424"
+        ],
+        "name": "L2_RQSTS.ALL_CODE_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1d1"
+        ],
+        "name": "MEM_LOAD_RETIRED.L1_HIT",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2d1"
+        ],
+        "name": "MEM_LOAD_RETIRED.L2_HIT",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1ff1"
+        ],
+        "name": "L2_LINES_IN.ALL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x10d1"
+        ],
+        "name": "MEM_LOAD_RETIRED.L2_MISS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2424"
+        ],
+        "name": "ML2_RQSTS.CODE_RD_MISS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xe85"
+        ],
+        "name": "ITLB_MISSES.WALK_COMPLETED",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x9"
+        ],
+        "name": "ref-cycles"
+      },
+      {
+        "config": [
+          "0x1"
+        ],
+        "name": "instructions"
+      },
+      {
+        "config": [
+          "0xe08"
+        ],
+        "name": "DTLB_MISSES.WALK_COMPLETED",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x408"
+        ],
+        "name": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1000160"
+        ],
+        "name": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1a8"
+        ],
+        "name": "LSD.UOPS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x728"
+        ],
+        "name": "CORE_POWER.LVL0_TURBO_LICENSE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1828"
+        ],
+        "name": "CORE_POWER.LVL1_TURBO_LICENSE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2028"
+        ],
+        "name": "CORE_POWER.LVL2_TURBO_LICENSE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x20003c"
+        ],
+        "name": "CPU_CLK_UNHALTED.THREAD_ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x19c"
+        ],
+        "name": "IDQ_UOPS_NOT_DELIVERED.CORE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2c2"
+        ],
+        "name": "UOPS_RETIRED.RETIRE_SLOTS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x20010d"
+        ],
+        "name": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1060"
+        ],
+        "name": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x10b0"
+        ],
+        "name": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x10e"
+        ],
+        "name": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x23c"
+        ],
+        "name": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x20013c"
+        ],
+        "name": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1b7",
+          "0x103FC007F7"
+        ],
+        "name": "OCR.ALL_READS.L3_MISS.REMOTE_HITM",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x00"
+        ],
+        "name": "cpu-cycles"
+      },
+      {
+        "config": [
+          "0x2"
+        ],
+        "name": "power/energy-pkg"
+      },
+      {
+        "config": [
+          "0x3"
+        ],
+        "name": "power/energy-ram"
+      },
+      {
+        "config": [
+          "0x485"
+        ],
+        "name": "ITLB_MISSES.WALK_COMPLETED_2M_4M",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1001085"
+        ],
+        "name": "ITLB_MISSES.WALK_ACTIVE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xe08"
+        ],
+        "name": "DTLB_LOAD_MISSES.WALK_COMPLETED",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x208"
+        ],
+        "name": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x808"
+        ],
+        "name": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xe49"
+        ],
+        "name": "DTLB_STORE_MISSES.WALK_COMPLETED",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1001008"
+        ],
+        "name": "DTLB_LOAD_MISSES.WALK_ACTIVE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1001049"
+        ],
+        "name": "DTLB_STORE_MISSES.WALK_ACTIVE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1b7",
+          "0x083FC007F7"
+        ],
+        "name": "OCR.ALL_READS.L3_MISS.REMOTE_HIT_FORWARD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x400019c"
+        ],
+        "name": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x480"
+        ],
+        "name": "ICACHE_16B.IFDATA_STALL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x483"
+        ],
+        "name": "ICACHE_64B.IFTAG_STALL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x800d"
+        ],
+        "name": "INT_MISC.CLEAR_RESTEER_CYCLES",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x20010d"
+        ],
+        "name": "INT_MISC.RECOVERY_CYCLES_ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1e6"
+        ],
+        "name": "BACLEARS.ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xc5"
+        ],
+        "name": "BR_MISP_RETIRED.ALL_BRANCHES",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x10401c3"
+        ],
+        "name": "MACHINE_CLEARS.COUNT",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x400019c"
+        ],
+        "name": "IDQ_UOPS_NOT_DELIVERED.CYCLES_0_UOPS_DELIV.CORE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x140014a3"
+        ],
+        "name": "CYCLE_ACTIVITY.STALLS_MEM_ANY",
+        "type": 4
+      },
+      {
+        "config": [
+          "0xc000ca3"
+        ],
+        "name": "CYCLE_ACTIVITY.STALLS_L1D_MISS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x50005a3"
+        ],
+        "name": "CYCLE_ACTIVITY.STALLS_L2_MISS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x60006a3"
+        ],
+        "name": "CYCLE_ACTIVITY.STALLS_L3_MISS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2008"
+        ],
+        "name": "DTLB_LOAD_MISSES.STLB_HIT",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2049"
+        ],
+        "name": "DTLB_STORE_MISSES.STLB_HIT",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x203"
+        ],
+        "name": "LD_BLOCKS.STORE_FORWARD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1001060"
+        ],
+        "name": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x6000160"
+        ],
+        "name": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x6001060"
+        ],
+        "name": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1b2"
+        ],
+        "name": "OFFCORE_REQUESTS_BUFFER.SQ_FULL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x40a6"
+        ],
+        "name": "EXE_ACTIVITY.BOUND_ON_STORES",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1a6"
+        ],
+        "name": "EXE_ACTIVITY.EXE_BOUND_0_PORTS",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x2a6"
+        ],
+        "name": "EXE_ACTIVITY.1_PORTS_UTIL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x4a6"
+        ],
+        "name": "EXE_ACTIVITY.2_PORTS_UTIL",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x1000114"
+        ],
+        "name": "ARITH.DIVIDER_ACTIVE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x18002b1"
+        ],
+        "name": "UOPS_EXECUTED.CORE_CYCLES_NONE",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x10002b1"
+        ],
+        "name": "UOPS_EXECUTED.CORE_CYCLES_GE_1",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x20002b1"
+        ],
+        "name": "UOPS_EXECUTED.CORE_CYCLES_GE_2",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x30002b1"
+        ],
+        "name": "UOPS_EXECUTED.CORE_CYCLES_GE_3",
+        "type": 4
+      },
+      {
+        "config": [
+          "0x3079"
+        ],
+        "name": "IDQ.MS_UOPS",
+        "type": 4
+      }
+    ]
+  },
+  "uncore": {
+    "events": [
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40432",
+      "uncore_cha/unc_cha_tor_inserts.ia_occupancy.0x40432",
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40431",
+      "uncore_imc/cas_count_read",
+      "uncore_imc/cas_count_write",
+      "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x40431",
+      "uncore_cha/unc_cha_clockticks",
+      "uncore_cha/unc_cha_tor_inserts.ia_hit.0x40433",
+      "uncore_cha/unc_cha_tor_occupancy.ia_hit.0x40433",
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40433",
+      "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x40433",
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12D40433",
+      "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12D40433",
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12CC0233",
+      "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12CC0233",
+      "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12C40033",
+      "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12C40033",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2",
+      "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3",
+      "uncore_upi/UNC_UPI_TxL_FLITS.ALL_DATA",
+      "uncore_upi/UNC_UPI_TxL_FLITS.NON_DATA",
+      "uncore_upi/UNC_UPI_CLOCKTICKS",
+      "uncore_upi/UNC_UPI_L1_POWER_CYCLES",
+      "uncore_imc/UNC_M_RPQ_INSERTS",
+      "uncore_imc/UNC_M_RPQ_OCCUPANCY"
+    ],
+    "custom_events": [
+      {
+        "config": [
+          "0x2135",
+          "0x4043200000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40432"
+      },
+      {
+        "config": [
+          "0x2136",
+          "0x4043200000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_occupancy.0x40432"
+      },
+      {
+        "config": [
+          "0x2135",
+          "0x4043100000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40431"
+      },
+      {
+        "config": [
+          "0x304"
+        ],
+        "name": "uncore_imc/cas_count_read"
+      },
+      {
+        "config": [
+          "0xc04"
+        ],
+        "name": "uncore_imc/cas_count_write"
+      },
+      {
+        "config": [
+          "0x2136"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x40431"
+      },
+      {
+        "config": [
+          "0x00"
+        ],
+        "name": "uncore_cha/unc_cha_clockticks"
+      },
+      {
+        "config": [
+          "0x1135",
+          "0x4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_hit.0x40433"
+      },
+      {
+        "config": [
+          "0x1136",
+          "0x4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_hit.0x40433"
+      },
+      {
+        "config": [
+          "0x2135",
+          "0x4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x40433"
+      },
+      {
+        "config": [
+          "0x2136",
+          "0x4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x40433"
+      },
+      {
+        "config": [
+          "0x2135",
+          "0x12D4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12D40433"
+      },
+      {
+        "config": [
+          "0x2136",
+          "0x12D4043300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12D40433"
+      },
+      {
+        "config": [
+          "0x2135",
+          "0x12CC023300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12CC0233"
+      },
+      {
+        "config": [
+          "0x2136",
+          "0x12CC023300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12CC0233"
+      },
+      {
+        "config": [
+          "0x2135",
+          "0x12C4003300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_inserts.ia_miss.0x12C40033"
+      },
+      {
+        "config": [
+          "0x2136",
+          "0x12C4003300000000"
+        ],
+        "name": "uncore_cha/unc_cha_tor_occupancy.ia_miss.0x12C40033"
+      },
+      {
+        "config": [
+          "0x700000000483"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART0"
+      },
+      {
+        "config": [
+          "0x702000000483"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART1"
+      },
+      {
+        "config": [
+          "0x704000000483"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART2"
+      },
+      {
+        "config": [
+          "0x708000000483"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_READ.PART3"
+      },
+      {
+        "config": [
+          "0x700000000183"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART0"
+      },
+      {
+        "config": [
+          "0x702000000183"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART1"
+      },
+      {
+        "config": [
+          "0x704000000183"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART2"
+      },
+      {
+        "config": [
+          "0x708000000183"
+        ],
+        "name": "uncore_iio/UNC_IIO_DATA_REQ_OF_CPU.MEM_WRITE.PART3"
+      },
+      {
+        "config": [
+          "0xf02"
+        ],
+        "name": "uncore_upi/UNC_UPI_TxL_FLITS.ALL_DATA"
+      },
+      {
+        "config": [
+          "0x9702"
+        ],
+        "name": "uncore_upi/UNC_UPI_TxL_FLITS.NON_DATA"
+      },
+      {
+        "config": [
+          "0x1"
+        ],
+        "name": "uncore_upi/UNC_UPI_CLOCKTICKS"
+      },
+      {
+        "config": [
+          "0x21"
+        ],
+        "name": "uncore_upi/UNC_UPI_L1_POWER_CYCLES"
+      },
+      {
+        "config": [
+          "0x10"
+        ],
+        "name": "uncore_imc/UNC_M_RPQ_INSERTS"
+      },
+      {
+        "config": [
+          "0x80"
+        ],
+        "name": "uncore_imc/UNC_M_RPQ_OCCUPANCY"
+      }
+    ]
+  }
+}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/prometheus-config.yaml.j2 b/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/prometheus-config.yaml.j2
new file mode 100644
index 0000000..6bcb5ca
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/cAdvisor_metrics/prometheus-config.yaml.j2
@@ -0,0 +1,43 @@
+scrape_configs:
+  - job_name: 'cAdvisors'
+    scrape_interval: {{ scrape_interval }}s
+    static_configs:
+      - targets: {{ targets_list }}
+    metric_relabel_configs:
+    - source_labels: [__name__]
+      regex: "go.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "process.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "machine.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_cpu.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_memory.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_network.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_spec.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_start.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_tasks.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_last.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: "container_scrape.*"
+      action: drop
+    - source_labels: [__name__]
+      regex: '(.*)'
+      replacement: 'cadvisor_${1}'
+      target_label: __name__
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd.conf b/script/cumulus/pkb/perfkitbenchmarker/data/collectd.conf
new file mode 100755
index 0000000..d24a649
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd.conf
@@ -0,0 +1,118 @@
+Interval   10
+
+LoadPlugin syslog
+LoadPlugin cpu
+LoadPlugin csv
+LoadPlugin df
+LoadPlugin disk
+LoadPlugin entropy
+LoadPlugin ethstat
+LoadPlugin interface
+LoadPlugin ipc
+LoadPlugin irq
+LoadPlugin load
+LoadPlugin memory
+LoadPlugin swap
+LoadPlugin cpufreq
+LoadPlugin "aggregation"
+
+
+<Plugin cpu>
+    ReportByCpu true
+    ValuesPercentage false
+</Plugin>
+<Plugin df>
+    # ignore rootfs; else, the root file-system would appear twice, causing
+    # one of the updates to fail and spam the log
+    FSType rootfs
+    # ignore the usual virtual / temporary file-systems
+    FSType sysfs
+    FSType proc
+    FSType devtmpfs
+    FSType devpts
+    FSType tmpfs
+    FSType fusectl
+    FSType cgroup
+    IgnoreSelected true
+</Plugin>
+<Plugin csv>
+    DataDir "/opt/collectd/collectd-csv"
+    StoreRates true
+</Plugin>
+<Plugin disk>
+    Disk "/^[hs]d[a-z][0-9]+?$/"
+    Disk "/^xvd[a-z][a-z0-9]+?$/"
+    Disk "/^vd[a-z]?$/"
+    Disk "/^nvme[0-9c]+n[0-9]+$/"
+    IgnoreSelected false
+    UseBSDName false
+    UdevNameAttr "DEVNAME"
+</Plugin>
+
+<Plugin ethstat>
+    # AWS: ens5, Azure: eth0, GCP: ens4, other: eno1
+    Interface "/^eth[0-9]?$/"
+    Interface "/^ens[0-9]?$/"
+    Interface "/^eno[0-9]?$/"
+    Interface "/^enp[0-5]s[0-9]?$/"
+    Interface "/^bond[0-9]?$/"
+    Interface "/^br[0-9]?$/"
+    Map "rx_csum_offload_errors" "if_rx_errors" "checksum_offload"
+    Map "multicast" "if_multicast"
+    MappedOnly false
+</Plugin>
+
+<Plugin interface>
+    Interface "/^eth/"
+    Interface "/^ens/"
+    Interface "/^enp/"
+    Interface "/^eno/"
+    Interface "/^bond/"
+    Interface "/^br/"
+    IgnoreSelected false
+</Plugin>
+
+<Plugin irq>
+    Irq 7
+    Irq 8
+    Irq 9
+    IgnoreSelected true
+</Plugin>
+
+<Plugin load>
+    ReportRelative true
+</Plugin>
+
+<LoadPlugin python>
+    Globals true
+</LoadPlugin>
+<Plugin python>
+    ModulePath "/opt/collectd/collectd_plugins"
+    Import "sysstat_memstat"
+    Import "network_irq_affinity"
+    <Module sysstat_memstat>
+        # SamplingRate is the interval (in seconds) at which the sysstat sampling is called. Needs to be a positive integer value
+        # Metrics represents the types of data that this plugin can output.
+        # Metrics options: "pgpgin/s,pgpgout/s,fault/s,majflt/s,pgfree/s,pgscank/s,pgscand/s,pgsteal/s,vmeff,kbmemfree,kbmemused,memused,kbbuffers,kbcached,kbcommit,commit,kbactive,kbinact,kbdirty,kbswpfree,kbswpused,swpused,kbswpcad,swpcad"
+        #                           --- any subset of the input above will output the metrics in the list. Must be specified as a single line string, in quotations, with the elements separated by commas,
+        #                  "all"  --- equivalent to giving the all the metrics mentioned above
+        #                  if not specified this will default to the "kbmemused" consumption.
+        # Docs: http://sebastien.godard.pagesperso-orange.fr/man_sar.html
+        SamplingRate 10 #seconds.
+        Metrics "all"
+    </Module>
+    <Module network_irq_affinity>
+        # No config options
+    </Module>
+</Plugin>
+
+<Plugin "aggregation">
+  <Aggregation>
+    Plugin "cpu"
+    Type "cpu"
+    GroupBy "Host"
+    GroupBy "TypeInstance"
+    CalculateAverage true
+  </Aggregation>
+</Plugin>
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectdDepend.txt b/script/cumulus/pkb/perfkitbenchmarker/data/collectdDepend.txt
new file mode 100644
index 0000000..e69de29
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd_patches/ethstat_interface.patch b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_patches/ethstat_interface.patch
new file mode 100644
index 0000000..312485d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_patches/ethstat_interface.patch
@@ -0,0 +1,146 @@
+diff --git a/Makefile.am b/Makefile.am
+index 00947da..5e0395f 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -1061,6 +1061,7 @@ if BUILD_PLUGIN_ETHSTAT
+ pkglib_LTLIBRARIES += ethstat.la
+ ethstat_la_SOURCES = src/ethstat.c
+ ethstat_la_LDFLAGS = $(PLUGIN_LDFLAGS)
++ethstat_la_LIBADD = libignorelist.la
+ endif
+ 
+ if BUILD_PLUGIN_FHCOUNT
+diff --git a/src/ethstat.c b/src/ethstat.c
+index f8bc5b5..6fab71d 100644
+--- a/src/ethstat.c
++++ b/src/ethstat.c
+@@ -27,6 +27,7 @@
+ #include "plugin.h"
+ #include "utils/avltree/avltree.h"
+ #include "utils/common/common.h"
++#include "utils/ignorelist/ignorelist.h"
+ #include "utils_complain.h"
+ 
+ #if HAVE_SYS_IOCTL_H
+@@ -41,6 +42,9 @@
+ #if HAVE_LINUX_ETHTOOL_H
+ #include <linux/ethtool.h>
+ #endif
++#if HAVE_IFADDRS_H
++#include <ifaddrs.h>
++#endif
+ 
+ struct value_map_s {
+   char type[DATA_MAX_NAME_LEN];
+@@ -48,31 +52,91 @@ struct value_map_s {
+ };
+ typedef struct value_map_s value_map_t;
+ 
+-static char **interfaces;
+-static size_t interfaces_num;
++static char **regexed_interfaces = NULL;
++static size_t regexed_interfaces_num = 0;
++
++static char **valid_interfaces = NULL;
++static size_t valid_interfaces_num = 0;
+ 
+ static c_avl_tree_t *value_map;
+ 
+ static bool collect_mapped_only;
+ 
++static int ethstat_check_if_existing_interface(char **existing_interfaces, char *entry) {
++  char *aux;
++  int idx;
++
++  for(idx = 0; idx < valid_interfaces_num; ++idx) {
++    aux = strdup(existing_interfaces[idx]);
++    if (strcmp(aux, entry) == 0)
++      return 0;
++  }
++
++  return 1;
++}
++
++static int ethstat_get_matching_interface(const char *entry) {
++  size_t len;
++  struct ifaddrs *if_list = NULL;
++  char **v_tmp;
++  ignorelist_t *ethstat_ignorelist = NULL;
++
++  if (ethstat_ignorelist == NULL)
++    ethstat_ignorelist = ignorelist_create(/* invert = */ 1);
++  if (ethstat_ignorelist == NULL)
++    return 1;
++  
++  len = strlen(entry);
++  if (len == 0) {
++    DEBUG("no new interface: empty entry");
++    return 1;
++  }
++
++  #if HAVE_GETIFADDRS
++  ignorelist_add(ethstat_ignorelist, entry);
++
++  if (getifaddrs(&if_list) != 0) {
++    DEBUG("no new interface: no local interfaces were found");
++    return -1;
++  }
++  for (struct ifaddrs *if_ptr = if_list; if_ptr != NULL;
++    if_ptr = if_ptr->ifa_next) {
++    if (ignorelist_match(ethstat_ignorelist, if_ptr->ifa_name) == 0 &&
++      ethstat_check_if_existing_interface(valid_interfaces, if_ptr->ifa_name) != 0) {
++      v_tmp = realloc(valid_interfaces, sizeof(*valid_interfaces) * (valid_interfaces_num + 1));
++      if (v_tmp == NULL)
++        return -1;
++
++      valid_interfaces = v_tmp;
++      valid_interfaces[valid_interfaces_num] = strdup(if_ptr->ifa_name);
++      valid_interfaces_num++;
++      INFO("ethstat plugin: Registered interface %s", if_ptr->ifa_name);
++    }
++  }
++  #endif
++
++  return 0;
++}
++
+ static int ethstat_add_interface(const oconfig_item_t *ci) /* {{{ */
+ {
+   char **tmp;
+   int status;
+ 
+-  tmp = realloc(interfaces, sizeof(*interfaces) * (interfaces_num + 1));
++  tmp = realloc(regexed_interfaces, sizeof(*regexed_interfaces) * (regexed_interfaces_num + 1));
+   if (tmp == NULL)
+     return -1;
+-  interfaces = tmp;
+-  interfaces[interfaces_num] = NULL;
++  regexed_interfaces = tmp;
++  regexed_interfaces[regexed_interfaces_num] = NULL;
+ 
+-  status = cf_util_get_string(ci, interfaces + interfaces_num);
++  status = cf_util_get_string(ci, regexed_interfaces + regexed_interfaces_num);
+   if (status != 0)
+     return status;
+ 
+-  interfaces_num++;
+-  INFO("ethstat plugin: Registered interface %s",
+-       interfaces[interfaces_num - 1]);
++  status = ethstat_get_matching_interface(regexed_interfaces[regexed_interfaces_num]);
++  if (status != 0)
++    return status;
++  regexed_interfaces_num++;
+ 
+   return 0;
+ } /* }}} int ethstat_add_interface */
+@@ -290,8 +354,8 @@ static int ethstat_read_interface(char *device) {
+ } /* }}} ethstat_read_interface */
+ 
+ static int ethstat_read(void) {
+-  for (size_t i = 0; i < interfaces_num; i++)
+-    ethstat_read_interface(interfaces[i]);
++  for (size_t i = 0; i < valid_interfaces_num; i++)
++    ethstat_read_interface(valid_interfaces[i]);
+ 
+   return 0;
+ }
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/network_irq_affinity.py b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/network_irq_affinity.py
new file mode 100755
index 0000000..3967cec
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/network_irq_affinity.py
@@ -0,0 +1,92 @@
+import collectd
+import time
+import os
+import datetime
+
+PLUGIN_NAME = "network_irq_affinity"
+UID = "0"
+IRQS = []
+
+
+def set_uid():
+    global UID
+    UID = datetime.datetime.today().strftime("%Y%m%d%H%M%S%f")
+
+
+def logger(message):
+    collectd.info("%s: %s" % (PLUGIN_NAME, message))
+
+
+def _GetIrqAffinity(irq):
+    global UID
+    os.system('cat /proc/irq/{irq}/smp_affinity > /tmp/nirqa{UID}{irq}.out'.format(irq=irq, UID=UID))
+    with open("/tmp/nirqa{UID}{irq}.out".format(irq=irq, UID=UID)) as f:
+        lines = f.readlines()
+    return lines[0].strip()
+
+
+def _GetIpInterfaceName():
+    global UID
+    os.system("ip link | awk -F: '$0 !~ \"lo|vir|wl|^[^0-9]\"{print $2a;getline}' > /tmp/nirqa%snm.out" % (UID))
+    with open("/tmp/nirqa{UID}nm.out".format(UID=UID)) as f:
+        lines = f.readlines()
+    return lines[0].strip()
+
+
+def _GetIpInterfaceIrqs():
+    global UID
+    os.system("cat /proc/interrupts | grep {name} | awk '{{print substr($1,1,length($1)-1)}}' > /tmp/nirqa{UID}irqs.out"
+              .format(name=_GetIpInterfaceName(), UID=UID))
+    with open("/tmp/nirqa{UID}irqs.out".format(UID=UID)) as f:
+        lines = f.readlines()
+    irqs = []
+    for line in lines:
+        irqs.append(line.strip())
+    return irqs
+
+
+def _GetCpusFromIrqAffinity(affinity):
+    """returns a list of ints representing a single irq's associated CPUs"""
+    """e.g. 00000000,10000000,00000000"""
+    cpus = []
+    position = 0
+    for c in reversed(affinity):
+        if c == ',':
+            continue
+        n = int(c, 16)
+        for i in range(4):
+            if n & 2 ** i:
+                cpus.append((position * 4) + i)
+        position += 1
+    return cpus
+
+
+def config_func(config):
+    pass
+
+
+def read_func():
+    global IRQS
+    for irq in IRQS:
+        cpus = _GetCpusFromIrqAffinity(_GetIrqAffinity(irq))
+        # logger(str(irq) + ":" + str(cpus))
+        metric = collectd.Values()
+        metric.plugin = PLUGIN_NAME
+        metric.type = "count"
+        metric.type_instance = irq
+        metric.values = cpus
+        metric.dispatch()
+        collectd.flush()
+
+
+def init_func():
+    set_uid()
+
+    global IRQS
+    IRQS = _GetIpInterfaceIrqs()
+
+
+# Hook Callbacks, Order is important!
+collectd.register_config(config_func)
+collectd.register_init(init_func)
+collectd.register_read(read_func)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/plugin_utils.sh b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/plugin_utils.sh
new file mode 100755
index 0000000..02abd08
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/plugin_utils.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+LINUX_DISTRIBUTION=`python -c 'import platform; print (platform.dist()[0].lower())'`
+KERNEL=`uname -r`
+
+function is_ubuntu() {
+  # 0 = true ; 1 = false
+  if [ $LINUX_DISTRIBUTION = "ubuntu" ]; then
+    return 0
+  fi
+  return 1
+}
+
+function is_centos() {
+  if [ $LINUX_DISTRIBUTION = "centos" ]; then
+    return 0
+  fi
+  if [ $LINUX_DISTRIBUTION = "redhat" ]; then
+      return 0
+  fi
+  return 1
+}
+
+function get_kernel_version() {
+  return $KERNEL
+}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_depend.sh b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_depend.sh
new file mode 100755
index 0000000..9b2fdd5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_depend.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+. plugin_utils.sh
+
+if is_ubuntu; then
+    apt-get install -y sysstat
+fi
+
+if is_centos; then
+    yum install -y sysstat
+fi
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_memstat.py b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_memstat.py
new file mode 100644
index 0000000..13ffba6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/collectd_plugins/sysstat_memstat.py
@@ -0,0 +1,171 @@
+import collectd
+import threading
+import time
+import os
+import datetime
+
+mapping = {
+    "pgpgin/s": [3, "gauge"],
+    "pgpgout/s": [4, "gauge"],
+    "fault/s": [5, "gauge"],
+    "majflt/s": [6, "gauge"],
+    "pgfree/s": [7, "gauge"],
+    "pgscank/s": [8, "gauge"],
+    "pgscand/s": [9, "gauge"],
+    "pgsteal/s": [10, "gauge"],
+    "vmeff": [11, "percent"],
+    "kbmemfree": [12, "memory"],
+    "kbmemused": [13, "memory"],
+    "memused": [14, "percent"],
+    "kbbuffers": [15, "memory"],
+    "kbcached": [16, "memory"],
+    "kbcommit": [17, "counter"],
+    "commit": [18, "percent"],
+    "kbactive": [19, "memory"],
+    "kbinact": [20, "memory"],
+    "kbdirty": [21, "memory"],
+    "kbswpfree": [22, "memory"],
+    "kbswpused": [23, "memory"],
+    "swpused": [24, "percent"],
+    "kbswpcad": [25, "memory"],
+    "swpcad": [26, "percent"],
+}
+
+
+class Queue:
+    def __init__(self):
+        self.content = []
+        self.lock = threading.Lock()
+
+    def put(self, element):
+        self.lock.acquire()
+        self.content.insert(0, element)
+        self.lock.release()
+
+    def get(self):
+        self.lock.acquire()
+        element = self.content.pop()
+        self.lock.release()
+        return element
+
+    def empty(self):
+        self.lock.acquire()
+        result = len(self.content) == 0
+        self.lock.release()
+        return result
+
+
+PLUGIN_NAME = "sysstat_memstat"
+UID = "0"
+SAMPLING_RATE = 5
+METRICS = ["kbmemused"]
+q = Queue()
+
+
+def set_uid():
+    global UID
+    UID = datetime.datetime.today().strftime("%Y%m%d%H%M%S%f")
+
+
+def logger(message):
+    collectd.info("%s: %s" % (PLUGIN_NAME, message))
+
+
+def thread_runner_func():
+    global q
+    global SAMPLING_RATE
+    global UID
+    while True:
+        os.system("rm -rf /tmp/mysar{UID} ; rm -rf /tmp/myout{UID}".format(UID=UID))
+        os.system("sar -A -o /tmp/mysar{UID} 1 1 > /dev/null 2>&1".format(UID=UID))
+        os.system("sadf /tmp/mysar{UID} -d  -U -h -- -rSB  | tr -s ';' ',' > /tmp/myout{UID}".format(UID=UID))  # memory stats
+        with open("/tmp/myout{UID}".format(UID=UID)) as f:
+            lines = f.readlines()
+        del lines[0]
+        # del lines[0]
+        data = {
+            "lines": lines
+        }
+        q.put(data)
+        time.sleep(SAMPLING_RATE - 1)
+
+
+def config_func(config):
+    interval_set = False
+    metrics_set = False
+
+    for node in config.children:
+        key = node.key.lower()
+        val = node.values[0]
+        logger(key)
+        logger(val)
+        if key == "samplingrate":
+            global SAMPLING_RATE
+            SAMPLING_RATE = int(val)
+            interval_set = True
+        elif key == "metrics":
+            global METRICS
+            if str(val) == "all":
+                METRICS = mapping.keys()
+                metrics_set = True
+            else:
+                intermediary_str = str(val).split(",")
+                new_metrics = []
+                for metric in intermediary_str:
+                    if metric in mapping.keys():
+                        new_metrics.append(metric)
+                METRICS = new_metrics
+                metrics_set = True
+        else:
+            logger('Unknown config key "%s"' % key)
+
+    if interval_set:
+        logger("Using overridden interval: %s" % str(SAMPLING_RATE))
+    else:
+        logger("Using default interval: %s " % str(SAMPLING_RATE))
+
+    if metrics_set:
+        logger("Using overridden metrics %s" % str(METRICS))
+    else:
+        logger("Using default metrics: %s " % str(METRICS))
+
+
+def read_func():
+    # logger("In read_func. Queue empty: %s" % str(q.empty()) )
+    global METRICS
+    while not q.empty():
+        data = q.get()
+        for line in data["lines"]:
+            for keyword in METRICS:
+                fields = line.split(",")
+                field_value = fields[mapping[keyword][0]]
+
+                metric = collectd.Values()
+                metric.plugin = PLUGIN_NAME
+
+                # DS_TYPE_COUNTER, DS_TYPE_GAUGE, DS_TYPE_DERIVE or DS_TYPE_ABSOLUTE.
+                # https://collectd.org/documentation/manpages/collectd-python.5.shtml
+                # gauge expects data to be float. Needs 1 single value
+                # counter expects data to be int
+                # more: cat ./collectd/share/collectd/types.db
+                # http://giovannitorres.me/using-collectd-python-and-graphite-to-graph-slurm-partitions.html
+                metric.type = mapping[keyword][1]
+
+                metric.type_instance = keyword
+                metric.values = [field_value]
+                # metric.host = 'OverwritenHostname'
+                metric.dispatch()
+                collectd.flush()
+
+
+def init_func():
+    set_uid()
+    worker_thread = threading.Thread(target=thread_runner_func, args=())
+    worker_thread.start()
+    logger("Monitoring thread started")
+
+
+# Hook Callbacks, Order is important!
+collectd.register_config(config_func)
+collectd.register_init(init_func)
+collectd.register_read(read_func)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/edw/README b/script/cumulus/pkb/perfkitbenchmarker/data/edw/README
new file mode 100644
index 0000000..9fb53f6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/edw/README
@@ -0,0 +1 @@
+Leave it empty but the directory is required
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/emon/README.md b/script/cumulus/pkb/perfkitbenchmarker/data/emon/README.md
new file mode 100644
index 0000000..f5138ce
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/emon/README.md
@@ -0,0 +1,43 @@
+### EMON
+
+This telemetry data collection tool can be used to collect EMON data on the SUT(s) while the workload is running on the same SUT(s). This is one of the traces based tool set in the Perfkitbenchmark (PKB) framework. By default, the telemetry tool will be fired at the same time the benchmark is launched, and terminated when the benchmark run is completed.  The result will be pulled back to the PKB host in the PKB output folder.
+
+"--emon" is the only required command line flag for EMON collection. All other EMON related flags are optional.
+
+⚠️ **Please note that currently EMON runs only on bare metal instances.**
+
+```
+
+Note :- By default the latest emon is available for you to run. If you want to use a custom version,
+use --emon_tarball=<emon_version>
+
+For AMD runs, download and use the emon AMD version from emon website with --emon_tarball=<emon_amd_version>
+
+```
+
+#### Use Case 1: Collect EMON data and conduct EDP post-processing on-prem.
+
+```
+python3 ./pkb.py --emon --benchmarks=sysbench_cpu --benchmark_config_file=sysbench_cpu_config.yaml
+```
+
+##### Here is an example of sysbench_cpu_config.yaml:
+```bash
+static_vms:
+  - &worker
+    ip_address: 10.165.57.29
+    user_name: pkb
+    ssh_private_key: ~/.ssh/id_rsa
+    internal_ip: 10.165.57.29
+    tag: server
+sysbench_cpu:
+  vm_groups:
+    vm_1:
+      static_vms:
+        - *worker
+
+flags:
+  sysbench_cpu_time: 60           # run for 60 seconds
+  sysbench_cpu_events: 0          # don't limit runtime by event count
+  sysbench_cpu_thread_counts: 1,0 # zero sets threadcount to number of VCPUs
+```
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/proxy_ip_list/proxy_ip_list.txt b/script/cumulus/pkb/perfkitbenchmarker/data/proxy_ip_list/proxy_ip_list.txt
new file mode 100644
index 0000000..5774985
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/proxy_ip_list/proxy_ip_list.txt
@@ -0,0 +1 @@
+# Update this file if you need limit cloud VMs access source IP addresses to certain CIDRs only such as 3.140.220.0/24
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data/ssh_config.j2 b/script/cumulus/pkb/perfkitbenchmarker/data/ssh_config.j2
new file mode 100644
index 0000000..bf71210
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data/ssh_config.j2
@@ -0,0 +1,45 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+Host *
+  Protocol=2
+  UserKnownHostsFile=/dev/null
+  StrictHostKeyChecking=no
+  IdentitiesOnly=yes
+  PreferredAuthentications=publickey
+  PasswordAuthentication=no
+  ConnectTimeout=5
+  GSSAPIAuthentication=no
+  ServerAliveInterval=30
+  ServerAliveCountMax=10
+  BatchMode=yes
+
+{# Numeric / name-based access -#}
+{% for vm in vms %}
+Host {{ vm.name }} vm{{ loop.index0 }}
+  HostName={{ vm.ip_address }}
+  User={{ vm.user_name }}
+  Port={{ vm.ssh_port }}
+  IdentityFile={{ vm.ssh_private_key }}
+{% endfor %}
+
+{# Group-based access -#}
+{% for group, group_vms in vm_groups.items() %}
+{% for vm in group_vms %}
+Host {{group}}-{{ loop.index0 }}
+  HostName={{ vm.ip_address }}
+  User={{ vm.user_name }}
+  Port={{ vm.ssh_port }}
+  IdentityFile={{ vm.ssh_private_key }}
+{% endfor -%}
+{% endfor -%}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/data_discovery_service.py b/script/cumulus/pkb/perfkitbenchmarker/data_discovery_service.py
new file mode 100644
index 0000000..925519e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/data_discovery_service.py
@@ -0,0 +1,65 @@
+"""Benchmarking support for Data Discovery Services for data lakes.
+
+In this module, the base class for Data Discovery Services is defined, which
+outlines the common interface that must be implemented for each concrete Data
+Discovery Service PKB supports (e.g. AWS Glue Crawler).
+"""
+
+import abc
+from typing import Any, Dict
+
+from absl import flags
+from perfkitbenchmarker import resource
+
+_DATA_DISCOVERY_OBJECT_STORE_PATH = flags.DEFINE_string(
+    'data_discovery_object_store_path', None,
+    'Object store path which will be analyzed by the Data Discovery Service. '
+    'Must be a fully qualified object store URL (e.g. s3://bucket/dir for S3, '
+    'or gs://bucket/dir for GCS).'
+)
+_DATA_DISCOVERY_REGION = flags.DEFINE_string(
+    'data_discovery_region', None,
+    'Region on which the data discovery service will be deployed.'
+)
+
+# Available service types
+GLUE = 'glue'
+
+
+def GetDataDiscoveryServiceClass(cloud, service_type):
+  """Gets the underlying Data Discovery Service class."""
+  return resource.GetResourceClass(
+      BaseDataDiscoveryService, CLOUD=cloud, SERVICE_TYPE=service_type)
+
+
+class BaseDataDiscoveryService(resource.BaseResource):
+  """Common interface of a data discovery service resource.
+
+  Attributes:
+    region: The region the service was set up to use.
+    data_discovery_path: The object store path containing the files to be
+      discovered.
+  """
+
+  REQUIRED_ATTRS = ['CLOUD', 'SERVICE_TYPE']
+  RESOURCE_TYPE = 'BaseDataDiscoveryService'
+  CLOUD = 'abstract'
+  SERVICE_TYPE = 'abstract'
+
+  def __init__(self):
+    super().__init__()
+    self.region = _DATA_DISCOVERY_REGION.value
+    self.data_discovery_path = _DATA_DISCOVERY_OBJECT_STORE_PATH.value
+
+  @classmethod
+  def FromSpec(cls, data_discovery_service_spec):
+    return cls()
+
+  @abc.abstractmethod
+  def DiscoverData(self) -> float:
+    """Runs data discovery. Returns the time elapsed in secs."""
+    raise NotImplementedError('Must implement in subclasses.')
+
+  def GetMetadata(self) -> Dict[str, Any]:
+    """Return a dictionary of the metadata for this service."""
+    return {'cloud': self.CLOUD, 'data_discovery_region': self.region}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/disk.py b/script/cumulus/pkb/perfkitbenchmarker/disk.py
new file mode 100644
index 0000000..8ababf9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/disk.py
@@ -0,0 +1,637 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing abstract classes related to disks.
+
+Disks can be created, deleted, attached to VMs, and detached from VMs.
+"""
+
+import abc
+import logging
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+import six
+
+flags.DEFINE_boolean('nfs_timeout_hard', True,
+                     'Whether to use hard or soft for NFS mount.')
+flags.DEFINE_integer('nfs_rsize', 1048576, 'NFS read size.')
+flags.DEFINE_integer('nfs_wsize', 1048576, 'NFS write size.')
+flags.DEFINE_integer('nfs_timeout', 60, 'NFS timeout.')
+flags.DEFINE_integer('nfs_retries', 2, 'NFS Retries.')
+flags.DEFINE_integer(
+    'nfs_nconnect', None, 'Number of connections that each NFS client should '
+    'establish to the server.')
+flags.DEFINE_boolean(
+    'nfs_noresvport', False,
+    'Whether the NFS client should use a non-privileged '
+    'source port. Suggested to use with EFS')
+flags.DEFINE_boolean(
+    'nfs_managed', True,
+    'Use a managed NFS service if using NFS disks. Otherwise '
+    'start an NFS server on the first VM.')
+flags.DEFINE_string(
+    'nfs_ip_address', None,
+    'If specified, PKB will target this ip address when '
+    'mounting NFS "disks" rather than provisioning an NFS '
+    'Service for the corresponding cloud.')
+flags.DEFINE_string(
+    'nfs_directory', None,
+    'Directory to mount if using a StaticNfsService. This '
+    'corresponds to the "VOLUME_NAME" of other NfsService '
+    'classes.')
+flags.DEFINE_string('smb_version', '3.0', 'SMB version.')
+flags.DEFINE_list('mount_options', [],
+                  'Additional arguments to supply when mounting.')
+flags.DEFINE_list('fstab_options', [],
+                  'Additional arguments to supply to fstab.')
+
+FLAGS = flags.FLAGS
+
+# These are the (deprecated) old disk type names
+STANDARD = 'standard'
+REMOTE_SSD = 'remote_ssd'
+PIOPS = 'piops'  # Provisioned IOPS (SSD) in AWS and Alicloud
+REMOTE_ESSD = 'remote_essd'  # Enhanced Cloud SSD in Alicloud
+
+# 'local' refers to disks that come attached to VMs. It is the only
+# "universal" disk type that is not associated with a provider. It
+# exists because we can provision a local disk without creating a disk
+# spec. The Aerospike benchmarks use this fact in their config
+# methods, and they need to be able to tell when a disk is local. So
+# until that changes, 'local' is a special disk type.
+LOCAL = 'local'
+
+RAM = 'ram'
+
+# refers to disks that come from a cloud/unmanaged NFS or SMB service
+NFS = 'nfs'
+SMB = 'smb'
+
+# FUSE mounted object storage bucket
+OBJECT_STORAGE = 'object_storage'
+
+# Map old disk type names to new disk type names
+DISK_TYPE_MAPS = dict()
+
+# Standard metadata keys relating to disks
+MEDIA = 'media'
+REPLICATION = 'replication'
+# And some possible values
+HDD = 'hdd'
+SSD = 'ssd'
+NONE = 'none'
+ZONE = 'zone'
+REGION = 'region'
+
+DEFAULT_MOUNT_OPTIONS = 'discard'
+DEFAULT_FSTAB_OPTIONS = 'defaults'
+
+
+# TODO(user): remove this function when we remove the deprecated
+# flags and disk type names.
+def RegisterDiskTypeMap(provider_name, type_map):
+  """Register a map from legacy disk type names to modern ones.
+
+  The translation machinery looks here to find the map corresponding
+  to the chosen provider and translates the user's flags and configs
+  to the new naming system. This function should be removed once the
+  (deprecated) legacy flags are removed.
+
+  Args:
+    provider_name: a string. The name of the provider. Must match the names we
+      give to providers in benchmark_spec.py.
+    type_map: a dict. Maps generic disk type names (STANDARD, REMOTE_SSD, PIOPS)
+      to provider-specific names.
+  """
+
+  DISK_TYPE_MAPS[provider_name] = type_map
+
+
+def GetDiskSpecClass(cloud):
+  """Get the DiskSpec class corresponding to 'cloud'."""
+  return spec.GetSpecClass(BaseDiskSpec, CLOUD=cloud)
+
+
+def WarnAndTranslateDiskTypes(name, cloud):
+  """Translate old disk types to new disk types, printing warnings if needed.
+
+  Args:
+    name: a string specifying a disk type, either new or old.
+    cloud: the cloud we're running on.
+
+  Returns:
+    The new-style disk type name (i.e. the provider's name for the type).
+  """
+
+  if cloud in DISK_TYPE_MAPS:
+    disk_type_map = DISK_TYPE_MAPS[cloud]
+    if name in disk_type_map and disk_type_map[name] != name:
+      new_name = disk_type_map[name]
+      logging.warning(
+          'Disk type name %s is deprecated and will be removed. '
+          'Translating to %s for now.', name, new_name)
+      return new_name
+    else:
+      return name
+  else:
+    logging.info('No legacy->new disk type map for provider %s', cloud)
+    # The provider has not been updated to use new-style names. We
+    # need to keep benchmarks working, so we pass through the name.
+    return name
+
+
+def WarnAndCopyFlag(old_name, new_name):
+  """Copy a value from an old flag to a new one, warning the user.
+
+  Args:
+    old_name: old name of flag.
+    new_name: new name of flag.
+  """
+
+  if FLAGS[old_name].present:
+    logging.warning(
+        'Flag --%s is deprecated and will be removed. Please '
+        'switch to --%s.', old_name, new_name)
+    if not FLAGS[new_name].present:
+      FLAGS[new_name].value = FLAGS[old_name].value
+
+      # Mark the new flag as present so we'll print it out in our list
+      # of flag values.
+      FLAGS[new_name].present = True
+    else:
+      logging.warning('Ignoring legacy flag %s because new flag %s is present.',
+                      old_name, new_name)
+  # We keep the old flag around so that providers that haven't been
+  # updated yet will continue to work.
+
+
+DISK_FLAGS_TO_TRANSLATE = {
+    'scratch_disk_type': 'data_disk_type',
+    'scratch_disk_iops': 'aws_provisioned_iops',
+    'scratch_disk_throughput': 'aws_provisioned_throughput',
+    'scratch_disk_size': 'data_disk_size'
+}
+
+
+def WarnAndTranslateDiskFlags():
+  """Translate old disk-related flags to new disk-related flags."""
+
+  for old, new in six.iteritems(DISK_FLAGS_TO_TRANSLATE):
+    WarnAndCopyFlag(old, new)
+
+
+class BaseDiskSpec(spec.BaseSpec):
+  """Stores the information needed to create a disk.
+
+  Attributes:
+    device_path: None or string. Path on the machine where the disk is located.
+    disk_number: None or int. Optional disk identifier unique within the current
+      machine.
+    disk_size: None or int. Size of the disk in GB.
+    disk_type: None or string. See cloud specific disk classes for more
+      information about acceptable values.
+    mount_point: None or string. Directory of mount point.
+    num_striped_disks: int. The number of disks to stripe together. If this is
+      1, it means no striping will occur. This must be >= 1.
+  """
+
+  SPEC_TYPE = 'BaseDiskSpec'
+  CLOUD = None
+
+  def __init__(self, *args, **kwargs):
+    self.device_path: str = None
+    self.mount_point: str = None
+    super(BaseDiskSpec, self).__init__(*args, **kwargs)
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Overrides config values with flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. Is
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+
+    Returns:
+      dict mapping config option names to values derived from the config
+      values or flag values.
+    """
+    super(BaseDiskSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['data_disk_size'].present:
+      config_values['disk_size'] = flag_values.data_disk_size
+    if flag_values['data_disk_type'].present:
+      config_values['disk_type'] = flag_values.data_disk_type
+    if flag_values['num_striped_disks'].present:
+      config_values['num_striped_disks'] = flag_values.num_striped_disks
+    if flag_values['scratch_dir'].present:
+      config_values['mount_point'] = flag_values.scratch_dir
+    if flag_values['nfs_version'].present:
+      config_values['nfs_version'] = flag_values.nfs_version
+    if flag_values['nfs_timeout_hard'].present:
+      config_values['nfs_timeout_hard'] = flag_values.nfs_timeout_hard
+    if flag_values['nfs_rsize'].present:
+      config_values['nfs_rsize'] = flag_values.nfs_rsize
+    if flag_values['nfs_wsize'].present:
+      config_values['nfs_wsize'] = flag_values.nfs_wsize
+    if flag_values['nfs_timeout'].present:
+      config_values['nfs_timeout'] = flag_values.nfs_timeout
+    if flag_values['nfs_retries'].present:
+      config_values['nfs_retries'] = flag_values.nfs_retries
+    if flag_values['nfs_nconnect'].present:
+      config_values['nfs_nconnect'] = flag_values.nfs_nconnect
+    if flag_values['nfs_ip_address'].present:
+      config_values['nfs_ip_address'] = flag_values.nfs_ip_address
+    if flag_values['nfs_managed'].present:
+      config_values['nfs_managed'] = flag_values.nfs_managed
+    if flag_values['nfs_directory'].present:
+      config_values['nfs_directory'] = flag_values.nfs_directory
+    if flag_values['smb_version'].present:
+      config_values['smb_version'] = flag_values.smb_version
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(BaseDiskSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'device_path': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'disk_number': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'disk_size': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'disk_type': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'mount_point': (option_decoders.StringDecoder, {
+            'default': None,
+            'none_ok': True
+        }),
+        'num_striped_disks': (option_decoders.IntDecoder, {
+            'default': 1,
+            'min': 1
+        }),
+        'nfs_version': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'nfs_ip_address': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'nfs_managed': (option_decoders.BooleanDecoder, {
+            'default': True
+        }),
+        'nfs_directory': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'nfs_rsize': (option_decoders.IntDecoder, {
+            'default': 1048576
+        }),
+        'nfs_wsize': (option_decoders.IntDecoder, {
+            'default': 1048576
+        }),
+        'nfs_timeout': (option_decoders.IntDecoder, {
+            'default': 60
+        }),
+        'nfs_timeout_hard': (option_decoders.BooleanDecoder, {
+            'default': True
+        }),
+        'nfs_retries': (option_decoders.IntDecoder, {
+            'default': 2
+        }),
+        'nfs_nconnect': (option_decoders.IntDecoder, {
+            'default': None
+        }),
+        'smb_version': (option_decoders.StringDecoder, {
+            'default': '3.0'
+        }),
+    })
+    return result
+
+
+class BaseDisk(resource.BaseResource):
+  """Object representing a Base Disk."""
+
+  is_striped = False
+
+  def __init__(self, disk_spec):
+    super(BaseDisk, self).__init__()
+    self.disk_size = disk_spec.disk_size
+    self.disk_type = disk_spec.disk_type
+    self.mount_point = disk_spec.mount_point
+    self.num_striped_disks = disk_spec.num_striped_disks
+    self.metadata.update({
+        'type': self.disk_type,
+        'size': self.disk_size,
+        'num_stripes': self.num_striped_disks,
+    })
+
+    # Set in derived classes by Attach()
+    self.vm = None
+
+    # Linux related attributes.
+    self.device_path = disk_spec.device_path
+
+    # Windows related attributes.
+
+    # The disk number corresponds to the order in which disks were attached to
+    # the instance. The System Disk has a disk number of 0. Any local disks
+    # have disk numbers ranging from 1 to the number of local disks on the
+    # system. Any additional disks that were attached after boot will have
+    # disk numbers starting at the number of local disks + 1. These disk
+    # numbers are used in diskpart scripts in order to identify the disks
+    # that we want to operate on.
+    self.disk_number = disk_spec.disk_number
+
+  @property
+  def mount_options(self):
+    """Returns options to mount a disk.
+
+    The default value 'discard' is from the linux VM's MountDisk method.
+
+    See `man 8 mount` for usage.  For example, returning "ro" will cause the
+    mount command to be "mount ... -o ro ..." mounting the disk as read only.
+    """
+    opts = DEFAULT_MOUNT_OPTIONS
+    if FLAGS.mount_options:
+      opts = ','.join(FLAGS.mount_options)
+    self.metadata.update({'mount_options': opts})
+    return opts
+
+  @property
+  def fstab_options(self):
+    """Returns options to use in the /etc/fstab entry for this drive.
+
+    The default value 'defaults' is from the linux VM's MountDisk method.
+
+    See `man fstab` for usage.
+    """
+    opts = DEFAULT_FSTAB_OPTIONS
+    if FLAGS.fstab_options:
+      opts = ','.join(FLAGS.fstab_options)
+    self.metadata.update({'fstab_options': opts})
+    return opts
+
+  @abc.abstractmethod
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The BaseVirtualMachine instance to which the disk will be attached.
+    """
+    pass
+
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    # This is currently never called.
+    # TODO(pclay): Figure out if static VMs should call this.
+    raise NotImplementedError
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside a Linux VM."""
+    if self.device_path is None:
+      raise ValueError('device_path is None.')
+    return self.device_path
+
+  def GetDeviceId(self):
+    """Return the Windows DeviceId of this disk."""
+    if self.disk_number is None:
+      raise ValueError('disk_number is None.')
+    return r'\\.\PHYSICALDRIVE%s' % self.disk_number
+
+
+class StripedDisk(BaseDisk):
+  """Object representing several disks striped together."""
+
+  is_striped = True
+
+  def __init__(self, disk_spec, disks):
+    """Initializes a StripedDisk object.
+
+    Args:
+      disk_spec: A BaseDiskSpec containing the desired mount point.
+      disks: A list of BaseDisk objects that constitute the StripedDisk.
+    """
+    super(StripedDisk, self).__init__(disk_spec)
+    self.disks = disks
+    self.metadata = disks[0].metadata.copy()
+    if self.disk_size:
+      self.metadata['size'] = self.disk_size * self.num_striped_disks
+
+  def _Create(self):
+    for disk in self.disks:
+      disk.Create()
+
+  def _Delete(self):
+    for disk in self.disks:
+      disk.Delete()
+
+  def Attach(self, vm):
+    for disk in self.disks:
+      disk.Attach(vm)
+
+  def Detach(self):
+    for disk in self.disks:
+      disk.Detach()
+
+
+class MountableDisk(BaseDisk):
+  """Object representing a disk that produces a mounted directory.
+
+  Examples are RamDisks or FUSE file systems.
+  """
+
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The BaseVirtualMachine instance to which the disk will be attached.
+    """
+    pass
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside a Linux VM."""
+    return None
+
+  def GetDeviceId(self):
+    """Return the Windows DeviceId of this disk."""
+    return None
+
+  def _Create(self):
+    """Creates the disk."""
+    pass
+
+  def _Delete(self):
+    """Deletes the disk."""
+    pass
+
+  @abc.abstractmethod
+  def Mount(self, vm):
+    """Mount disk at specified mount point."""
+    raise NotImplementedError()
+
+
+class NetworkDisk(BaseDisk):
+  """Object representing a Network Disk."""
+
+  @abc.abstractmethod
+  def _GetNetworkDiskMountOptionsDict(self):
+    """Default mount options as a dict."""
+    pass
+
+  @property
+  def mount_options(self):
+    opts = []
+    for key, value in sorted(
+        six.iteritems(self._GetNetworkDiskMountOptionsDict())):
+      opts.append('%s' % key if value is None else '%s=%s' % (key, value))
+    return ','.join(opts)
+
+  @property
+  def fstab_options(self):
+    return self.mount_options
+
+  @abc.abstractmethod
+  def Attach(self):
+    """Attached NetworkDisk to a VM.  Must set self.vm."""
+    raise NotImplementedError()
+
+  def _Create(self):
+    # handled by the Network Disk service
+    pass
+
+  def _Delete(self):
+    # handled by the Network Disk service
+    pass
+
+
+# TODO(chriswilkes): adds to the disk.GetDiskSpecClass registry
+# that only has the cloud as the key.  Look into using (cloud, disk_type)
+# if causes problems
+class NfsDisk(NetworkDisk):
+  """Provides options for mounting NFS drives.
+
+  NFS disk should be ready to mount at the time of creation of this disk.
+
+  Args:
+    disk_spec: The disk spec.
+    remote_mount_address: The host_address:/volume path to the NFS drive.
+    nfs_tier: The NFS tier / performance level of the server.
+  """
+
+  def __init__(self,
+               disk_spec,
+               remote_mount_address,
+               default_nfs_version=None,
+               nfs_tier=None):
+    super(NfsDisk, self).__init__(disk_spec)
+    self.nfs_version = disk_spec.nfs_version or default_nfs_version
+    self.nfs_timeout_hard = disk_spec.nfs_timeout_hard
+    self.nfs_rsize = disk_spec.nfs_rsize
+    self.nfs_wsize = disk_spec.nfs_wsize
+    self.nfs_timeout = disk_spec.nfs_timeout
+    self.nfs_retries = disk_spec.nfs_retries
+    self.nfs_nconnect = disk_spec.nfs_nconnect
+    self.device_path = remote_mount_address
+    for key, value in six.iteritems(self._GetNetworkDiskMountOptionsDict()):
+      self.metadata['nfs_{}'.format(key)] = value
+    if nfs_tier:
+      self.nfs_tier = nfs_tier
+      self.metadata['nfs_tier'] = nfs_tier
+    super(NfsDisk, self).GetResourceMetadata()
+
+  def _GetNetworkDiskMountOptionsDict(self):
+    """Default NFS mount options as a dict."""
+    options = {
+        'hard' if self.nfs_timeout_hard else 'soft': None,
+        'rsize': self.nfs_rsize,
+        'wsize': self.nfs_wsize,
+        'timeo': self.nfs_timeout * 10,  # in decaseconds
+        'retrans': self.nfs_retries,
+    }
+    # the client doesn't have to specify an NFS version to use (but should)
+    if self.nfs_version:
+      options['nfsvers'] = self.nfs_version
+    if self.nfs_nconnect:
+      options['nconnect'] = self.nfs_nconnect
+    if FLAGS.nfs_noresvport:
+      options['noresvport'] = None
+    return options
+
+  def Attach(self, vm):
+    self.vm = vm
+    self.vm.Install('nfs_utils')
+
+
+class SmbDisk(NetworkDisk):
+  """Provides options for mounting SMB drives.
+
+  SMB disk should be ready to mount at the time of creation of this disk.
+
+  Args:
+    disk_spec: The disk spec.
+    remote_mount_address: The host_address:/volume path to the SMB drive.
+    smb_tier: The SMB tier / performance level of the server.
+  """
+
+  def __init__(self,
+               disk_spec,
+               remote_mount_address,
+               storage_account_and_key,
+               default_smb_version=None,
+               smb_tier=None):
+    super(SmbDisk, self).__init__(disk_spec)
+    self.smb_version = disk_spec.smb_version
+    self.device_path = remote_mount_address
+    self.storage_account_and_key = storage_account_and_key
+    if smb_tier:
+      self.metadata['smb_tier'] = smb_tier
+
+  def _GetNetworkDiskMountOptionsDict(self):
+    """Default SMB mount options as a dict."""
+    options = {
+        'vers': self.smb_version,
+        'username': self.storage_account_and_key['user'],
+        'password': self.storage_account_and_key['pw'],
+        'dir_mode': '0777',
+        'file_mode': '0777',
+        'serverino': None,
+        # the following mount option is a suggestion from
+        # https://docs.microsoft.com/en-us/azure/storage/files/storage-troubleshooting-files-performance#throughput-on-linux-clients-is-significantly-lower-when-compared-to-windows-clients
+        'nostrictsync': None,
+    }
+    return options
+
+  def Attach(self, vm):
+    self.vm = vm
+    self.vm.InstallPackages('cifs-utils')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/disk_iops_to_capacity.py b/script/cumulus/pkb/perfkitbenchmarker/disk_iops_to_capacity.py
new file mode 100644
index 0000000..1b414c8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/disk_iops_to_capacity.py
@@ -0,0 +1,270 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""IOPS Storage Utility class.
+
+This class is used to translate an {IOPS, Cloud Provider} requirement
+to {core, number of disks, storage size} machine requirements necessary
+to meet the IOPS level using the Cloud Provider declared.
+
+ - On AWS, we will use "ebs-gp2" storage type.
+ - On GCP, we will use PD-SSD storage type.
+
+In future versions, we will support Azure as well.
+
+The data used to make these Conversions was acquired in May 2017 from the
+following resources:
+
+GCP Storage Ratings: https://cloud.google.com/compute/docs/disks/. Storage can
+          go as high as 64TB per disk but IOPS maxes out at 30,000 iops/disk
+          or (1000GB).
+AWS Storage Ratings: http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/
+          EBSVolumeTypes.html#EBSVolumeTypes_gp2. Storage can go as
+          high as 16TiB per disk but IOPS maxes out at 10000 IOPS/volume size.
+          Reference gives volume size in GiB so converted to GB below.
+GCP CPU Ratings: https://cloud.google.com/compute/docs/disks/
+          performance#ssd-pd-performance
+AWS CPU Ratings: https://aws.amazon.com/ebs/details/
+GCP Disk Number Ratings: https://cloud.google.com/compute/docs/disks/performance
+AWS Disk Number Ratings: https://aws.amazon.com/ebs/details/
+
+
+Note: These conversions will require updating as performance and resources
+change.
+
+To utilize this class, initialize an instance of the DiskIOPSToCapacity class
+with the IOPS level desired and the provider you wish to use. The machine
+requirement attributes will be immediately populated.
+"""
+import math
+import numpy
+
+
+class InvalidProviderError(Exception):
+  pass
+
+
+class InvalidIOPSError(Exception):
+  pass
+
+
+class InvalidStorageTypeError(Exception):
+  pass
+
+
+# GLOBAL STRINGS
+AWS = 'AWS'
+GCP = 'GCP'
+MAX_IOPS = 'MAX_IOPS'
+DEFAULT_STORAGE_TYPE = 'DEFAULT_STORAGE_TYPE'
+VALID_STORAGE_TYPES = 'VALID_STORAGE_TYPES'
+# Cloud providers info dictionary. Will be updated when support more cloud
+# providers. VALID_STORAGE_TYPES are storage types that are currently
+# supported by disk_iops_to_capacity converter.
+# MAX IOPS Sources:
+#   GCP: Increasing vCPUs increases IOPS performance. Top IOPS performance is
+#         per instance is averaged to 30000.
+#         https://cloud.google.com/compute/docs/disks/performance
+#   AWS: Can add additional volumes to increase IOPS performance. Top IOPS
+#         performance per CPU is 75000.
+#        http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html
+CLOUD_PROVIDERS_INFO = {
+    AWS: {
+        MAX_IOPS: 75000,
+        DEFAULT_STORAGE_TYPE: 'ebs-gp2',
+        VALID_STORAGE_TYPES: ['ebs-gp2']
+    },
+    GCP: {
+        MAX_IOPS: 30000,
+        DEFAULT_STORAGE_TYPE: 'pd-ssd',
+        VALID_STORAGE_TYPES: ['pd-ssd'],
+    }
+}
+
+
+class DiskIOPSToCapacity(object):
+  """Given iops and service provider requirements, return disk configurations.
+
+  This class is used to translate an {IOPS, Cloud Provider} requirement
+  to {core, number of disks, storage size} machine requirements necessary
+  to meet the IOPS level using the Cloud MySQL Provider declared.
+
+  Currently assumes SSD persistent disks.
+  TODO:
+    - Implement Azure calculations. Add other cloud providers as applicable.
+    - Support other storage types such as HDD and/or EBS-piops.
+      Add a further parameter of Disk Type (default SSD PD) and update
+      calculations to include HDD disk iops levels.
+
+  Attributes:
+    _iops: Number of IOPS required.
+    _provider: 'AWS' or 'GCP'.
+    _size: Minimum size (GB) required for _iops level with _provider.
+    _number_disks: Disk number required to meet _iops level with _provider.
+    _cpu_count: vCPUs per instance required to meet _iops level with _provider.
+  """
+
+  def __init__(self, iops, provider=GCP, storage_type=None):
+    self._size = None
+    self._cpu_count = None
+    self._number_disks = None
+    self._iops = iops
+    self._provider = provider.upper()
+    self._storage_type = storage_type
+    self._ValidateProvider()
+    self._ValidateIOPS()
+    self._ValidateStorageType()
+    self._PopulateConfigs()
+
+  def _ValidateStorageType(self):
+    """Validate storage type for given _provider, set to default if not given.
+
+    Raises:
+      InvalidStorageTypeError: Incorrect storage type given.
+
+    TODO: When support other types of storage types (i.e. when this class
+    supports ebs-piops for AWS or pd-hhd for gcp), will need to update
+    VALID_STORAGE_TYPES in CLOUD_PROVIDERS_INFO dictionary.
+
+    """
+    if self._storage_type:
+      self._storage_type = self._storage_type.lower()
+      if (self._storage_type is
+          not CLOUD_PROVIDERS_INFO[self._provider][DEFAULT_STORAGE_TYPE]):
+        raise InvalidStorageTypeError()
+    else:
+      self._storage_type = CLOUD_PROVIDERS_INFO[self._provider][
+          DEFAULT_STORAGE_TYPE]
+
+  def _ValidateProvider(self):
+    """Validate provider to be GCP or AWS, throw exception if invalid.
+
+    Raises:
+      InvalidProviderError: Incorrect provider type given.
+    """
+    if self._provider not in list(CLOUD_PROVIDERS_INFO.keys()):
+      raise InvalidProviderError('Provider given is not supported by '
+                                 'storage_utility.')
+
+  def _ValidateIOPS(self):
+    """Validate IOPS to be within valid limits, throw exception if invalid.
+
+    If IOPS parameter is less than 1 or greater than provider maximum IOPS
+    throw InvalidIOPSError.
+
+    Raises:
+      InvalidIOPSError: Invalid IOPS parameter given.
+    """
+    if (self._iops < 1 or
+        self._iops > CLOUD_PROVIDERS_INFO[self._provider][MAX_IOPS]):
+      raise InvalidIOPSError(
+          'Invalid IOPS parameter, must be positive number less than '
+          'the maximum achievable for given cloud provider. '
+          'The maximum for {} is {}.'.format(
+              self._provider, CLOUD_PROVIDERS_INFO[self._provider][MAX_IOPS]))
+
+  def _PopulateConfigs(self):
+    """Populate Storage Configurations."""
+    self._SetSize()
+    self._SetCPUCount()
+    self._SetNumberDisks()
+
+  def PrintConfigs(self):
+    """Print out necessary configs."""
+    vm_config = ('For {} IOPS using {}, the following is required:\n\tStorage '
+                 'Size (GB): {}\n\tCPU Count: {}\n\tNumber of '
+                 'Disks: {}').format(self._iops,
+                                     self._provider.upper(), self._size,
+                                     self._cpu_count, self._number_disks)
+    print(vm_config)
+
+  def _SetSize(self):
+    """Set minimum size (GB) necessary to achieve _iops level.
+
+    Rating performance levels as of May 2017, sources found below.
+    GCP: ratings from https://cloud.google.com/compute/docs/disks/. Storage can
+          go as high as 64TB per disk but IOPS maxes out at 30,000 iops/disk
+          or (1000GB).
+    AWS: ratings from
+          http://docs.aws.amazon.com/AWSEC2/latest/
+          UserGuide/EBSVolumeTypes.html#EBSVolumeTypes_gp2. Storage can go as
+          high as 16TiB per disk but IOPS maxes out at 10000 IOPS/volume size.
+          Reference gives volume size in GiB so converted to GB below.
+    """
+    if self._provider == GCP:
+      self._size = min(int(math.ceil(self._iops / 30.0)), 30000 / 30)
+    elif self._provider == AWS:
+      value = self._iops
+      value = numpy.array(value)
+      self._size = int(
+          numpy.piecewise([value], [[value <= 100], [(value > 100) & (
+              value <= 9999)], [value > 9999]], [
+                  lambda x: int(math.ceil(1.07374)),
+                  lambda x: int(math.ceil(3 * value)),
+                  lambda x: int(math.ceil(3579.855))]))
+
+  def GetSize(self):
+    """Return storage size.
+
+    Returns:
+      __size: Storage size (GB).
+    """
+    return self._size
+
+  def _SetCPUCount(self):
+    """Set cpu count.
+
+    GCP: ratings from
+    https://cloud.google.com/compute/docs/disks/performance#ssd-pd-performance
+    AWS: to achieve good performance on EBS, one needs to use an
+    EBS-optimized VM instance, and the smallest VM instance that can be EBS
+    optimized is *.large VM types (e.g., c4.large), those comes with 2 cores.
+    ratings from
+    http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSOptimized.html
+
+    """
+    if self._provider == GCP:
+      value = self._iops
+      self._cpu_count = int(
+          numpy.piecewise([value], [[value <= 15000], [
+              (value > 15000) & (value <= 25000)
+          ], [value > 25000]], [lambda x: 1, lambda x: 16, lambda x: 32]))
+    elif self._provider == AWS:
+      self._cpu_count = 2
+
+  def GetCPUCount(self):
+    """Return CPU count.
+
+    Returns:
+      _cpu_count: CPU count.
+    """
+    return self._cpu_count
+
+  def _SetNumberDisks(self):
+    """Set number of disks.
+
+    GCP: Adding disks does not increase IOPS for GCP.
+    AWS: ratings from https://aws.amazon.com/ebs/details/
+    """
+    if self._provider == GCP:
+      self._number_disks = 1
+    elif self._provider == AWS:
+      self._number_disks = max(int(math.ceil(self._iops / 10000.0)), 1)
+
+  def GetNumberDisks(self):
+    """Return Number of Disks.
+
+    Returns:
+      _number_disks: Number of disks.
+    """
+    return self._number_disks
diff --git a/script/cumulus/pkb/perfkitbenchmarker/dpb_service.py b/script/cumulus/pkb/perfkitbenchmarker/dpb_service.py
new file mode 100644
index 0000000..6da220f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/dpb_service.py
@@ -0,0 +1,872 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Benchmarking support for Data Processing Backend Services.
+
+In order to benchmark Data Processing Backend services such as Google
+Cloud Platform's Dataproc and Dataflow or Amazon's EMR, we create a
+BaseDpbService class.  Classes to wrap specific backend services are in
+the corresponding provider directory as a subclass of BaseDpbService.
+"""
+
+import abc
+import dataclasses
+import datetime
+import logging
+from typing import Dict, List, Optional, Type
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import units
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import hadoop
+from perfkitbenchmarker.linux_packages import spark
+from perfkitbenchmarker.providers.aws import s3
+from perfkitbenchmarker.providers.aws import util as aws_util
+from perfkitbenchmarker.providers.gcp import gcs
+from perfkitbenchmarker.providers.gcp import util as gcp_util
+
+flags.DEFINE_string(
+    'static_dpb_service_instance', None,
+    'If set, the name of the pre created dpb implementation,'
+    'assumed to be ready.')
+flags.DEFINE_string('dpb_log_level', 'INFO', 'Manipulate service log level')
+flags.DEFINE_string('dpb_job_jarfile', None,
+                    'Executable Jarfile containing workload implementation')
+flags.DEFINE_string('dpb_job_classname', None, 'Classname of the job '
+                    'implementation in the jar file')
+flags.DEFINE_string(
+    'dpb_service_bucket', None, 'A bucket to use with the DPB '
+    'service. If none is provided one will be created by the '
+    'benchmark and cleaned up afterwards unless you are using '
+    'a static instance.')
+flags.DEFINE_string('dpb_service_zone', None, 'The zone for provisioning the '
+                    'dpb_service instance.')
+flags.DEFINE_list(
+    'dpb_job_properties', [], 'A list of strings of the form '
+    '"key=value" to be passed into DBP jobs.')
+flags.DEFINE_list(
+    'dpb_cluster_properties', [], 'A list of strings of the form '
+    '"type:key=value" to be passed into DBP clusters. See '
+    'https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/cluster-properties.'
+)
+
+FLAGS = flags.FLAGS
+
+# List of supported data processing backend services
+DATAPROC = 'dataproc'
+DATAPROC_GKE = 'dataproc_gke'
+DATAPROC_SERVERLESS = 'dataproc_serverless'
+DATAFLOW = 'dataflow'
+EMR = 'emr'
+UNMANAGED_DPB_SVC_YARN_CLUSTER = 'unmanaged_dpb_svc_yarn_cluster'
+UNMANAGED_SPARK_CLUSTER = 'unmanaged_spark_cluster'
+KUBERNETES_SPARK_CLUSTER = 'kubernetes_spark_cluster'
+UNMANAGED_SERVICES = [
+    UNMANAGED_DPB_SVC_YARN_CLUSTER,
+    UNMANAGED_SPARK_CLUSTER,
+]
+
+# Default number of workers to be used in the dpb service implementation
+DEFAULT_WORKER_COUNT = 2
+
+# List of supported applications that can be enabled on the dpb service
+FLINK = 'flink'
+HIVE = 'hive'
+
+# Metrics and Status related metadata
+# TODO(pclay): Remove these after migrating all callers to SubmitJob
+SUCCESS = 'success'
+RUNTIME = 'running_time'
+WAITING = 'pending_time'
+
+
+class JobNotCompletedError(Exception):
+  """Used to signal a job is still running."""
+  pass
+
+
+class JobSubmissionError(errors.Benchmarks.RunError):
+  """Thrown by all implementations if SubmitJob fails."""
+  pass
+
+
+@dataclasses.dataclass
+class JobResult:
+  """Data class for the timing of a successful DPB job."""
+  # Service reported execution time
+  run_time: float
+  # Service reported pending time (0 if service does not report).
+  pending_time: float = 0
+
+  @property
+  def wall_time(self) -> float:
+    """The total time the service reported it took to execute."""
+    return self.run_time + self.pending_time
+
+
+class BaseDpbService(resource.BaseResource):
+  """Object representing a Data Processing Backend Service."""
+
+  REQUIRED_ATTRS = ['CLOUD', 'SERVICE_TYPE']
+  RESOURCE_TYPE = 'BaseDpbService'
+  CLOUD = 'abstract'
+  SERVICE_TYPE = 'abstract'
+  HDFS_FS = 'hdfs'
+  GCS_FS = 'gs'
+  S3_FS = 's3'
+
+  # Job types that are supported on the dpb service backends
+  PYSPARK_JOB_TYPE = 'pyspark'
+  SPARKSQL_JOB_TYPE = 'spark-sql'
+  SPARK_JOB_TYPE = 'spark'
+  HADOOP_JOB_TYPE = 'hadoop'
+  DATAFLOW_JOB_TYPE = 'dataflow'
+  BEAM_JOB_TYPE = 'beam'
+
+  def _JobJars(self) -> Dict[str, Dict[str, str]]:
+    """Known mappings of jars in the cluster used by GetExecutionJar."""
+    return {
+        self.SPARK_JOB_TYPE: {
+            # Default for Dataproc and EMR
+            'examples': 'file:///usr/lib/spark/examples/jars/spark-examples.jar'
+        }
+    }
+
+  def __init__(self, dpb_service_spec):
+    """Initialize the Dpb service object.
+
+    Args:
+      dpb_service_spec: spec of the dpb service.
+    """
+    is_user_managed = dpb_service_spec.static_dpb_service_instance is not None
+    # Hand over the actual creation to the resource module which treats the
+    # user_managed resources in a special manner and skips creation attempt
+    super(BaseDpbService, self).__init__(user_managed=is_user_managed)
+    self.spec = dpb_service_spec
+    self.dpb_hdfs_type = None
+    if dpb_service_spec.static_dpb_service_instance:
+      self.cluster_id = dpb_service_spec.static_dpb_service_instance
+    else:
+      self.cluster_id = 'pkb-' + FLAGS.run_uri
+    if FLAGS.dpb_service_bucket:
+      self.bucket = FLAGS.dpb_service_bucket
+      self.manage_bucket = False
+    else:
+      self.bucket = 'pkb-' + FLAGS.run_uri
+      self.manage_bucket = True
+    self.dpb_service_zone = FLAGS.dpb_service_zone
+    self.dpb_version = dpb_service_spec.version
+    self.dpb_service_type = 'unknown'
+    self.storage_service = None
+
+  @property
+  def base_dir(self):
+    return self.persistent_fs_prefix + self.bucket  # pytype: disable=attribute-error  # bind-properties
+
+  @abc.abstractmethod
+  def SubmitJob(self,
+                jarfile: Optional[str] = None,
+                classname: Optional[str] = None,
+                pyspark_file: Optional[str] = None,
+                query_file: Optional[str] = None,
+                job_poll_interval: Optional[float] = None,
+                job_stdout_file: Optional[str] = None,
+                job_arguments: Optional[List[str]] = None,
+                job_files: Optional[List[str]] = None,
+                job_jars: Optional[List[str]] = None,
+                job_type: Optional[str] = None,
+                properties: Optional[Dict[str, str]] = None) -> JobResult:
+    """Submit a data processing job to the backend.
+
+    Args:
+      jarfile: Jar file to execute.
+      classname: Name of the main class.
+      pyspark_file: Comma separated list of Python files to be provided to the
+        job. Must be one of the following file formats ".py, .zip, or .egg".
+      query_file: HCFS URI of file containing Spark SQL script to execute as the
+        job.
+      job_poll_interval: integer saying how often to poll for job completion.
+        Not used by providers for which submit job is a synchronous operation.
+      job_stdout_file: String giving the location of the file in which to put
+        the standard out of the job.
+      job_arguments: List of string arguments to pass to driver application.
+        These are not the arguments passed to the wrapper that submits the job.
+      job_files: Files passed to a Spark Application to be distributed to
+        executors.
+      job_jars: Jars to pass to the application
+      job_type: Spark or Hadoop job
+      properties: Dict of properties to pass with the job.
+
+    Returns:
+      A JobResult with the timing of the successful job.
+
+    Raises:
+      JobSubmissionError if job fails.
+    """
+    pass
+
+  def _WaitForJob(self, job_id, timeout, poll_interval):
+
+    @vm_util.Retry(
+        timeout=timeout,
+        poll_interval=poll_interval,
+        fuzz=0,
+        retryable_exceptions=(JobNotCompletedError,))
+    def Poll():
+      result = self._GetCompletedJob(job_id)
+      if result is None:
+        raise JobNotCompletedError('Job {} not complete.'.format(job_id))
+      return result
+
+    return Poll()
+
+  def _GetCompletedJob(self, job_id: str) -> Optional[JobResult]:
+    """Get the job result if it has finished.
+
+    Args:
+      job_id: The step id to query.
+
+    Returns:
+      A dictionary describing the job if the step the step is complete,
+          None otherwise.
+
+    Raises:
+      JobSubmissionError if job fails.
+    """
+    raise NotImplementedError('You need to implement _GetCompletedJob if you '
+                              'use _WaitForJob')
+
+  def GetSparkSubmitCommand(
+      self,
+      jarfile: Optional[str] = None,
+      classname: Optional[str] = None,
+      pyspark_file: Optional[str] = None,
+      query_file: Optional[str] = None,
+      job_arguments: Optional[List[str]] = None,
+      job_files: Optional[List[str]] = None,
+      job_jars: Optional[List[str]] = None,
+      job_type: Optional[str] = None,
+      properties: Optional[Dict[str, str]] = None,
+      spark_submit_cmd: str = spark.SPARK_SUBMIT) -> List[str]:
+    """Builds the command to run spark-submit on cluster."""
+    # TODO(pclay): support BaseDpbService.SPARKSQL_JOB_TYPE
+    if job_type not in [
+        BaseDpbService.PYSPARK_JOB_TYPE,
+        BaseDpbService.SPARK_JOB_TYPE,
+    ]:
+      raise NotImplementedError
+    cmd = [spark_submit_cmd]
+    # Order is important
+    if classname:
+      cmd += ['--class', classname]
+    all_properties = self.GetJobProperties()
+    all_properties.update(properties or {})
+    for k, v in all_properties.items():
+      cmd += ['--conf', '{}={}'.format(k, v)]
+    if job_files:
+      cmd = ['--files', ','.join(job_files)]
+    # Main jar/script goes last before args.
+    if job_type == BaseDpbService.SPARK_JOB_TYPE:
+      assert jarfile
+      cmd.append(jarfile)
+    elif job_type == BaseDpbService.PYSPARK_JOB_TYPE:
+      assert pyspark_file
+      cmd.append(pyspark_file)
+    if job_arguments:
+      cmd += job_arguments
+    return cmd
+
+  def DistributedCopy(self,
+                      source: str,
+                      destination: str,
+                      properties: Optional[Dict[str, str]] = None) -> JobResult:
+    """Method to copy data using a distributed job on the cluster.
+
+    Args:
+      source: HCFS directory to copy data from.
+      destination: name of new HCFS directory to copy data into.
+      properties: properties to add to the job. Not supported on EMR.
+
+    Returns:
+      A JobResult with the timing of the successful job.
+
+    Raises:
+      JobSubmissionError if job fails.
+    """
+    return self.SubmitJob(
+        classname='org.apache.hadoop.tools.DistCp',
+        job_arguments=[source, destination],
+        job_type=BaseDpbService.HADOOP_JOB_TYPE,
+        properties=properties)
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this cluster."""
+    pretty_version = self.dpb_version or 'default'
+    basic_data = {
+        'dpb_service':
+            self.dpb_service_type,
+        'dpb_version':
+            pretty_version,
+        'dpb_service_version':
+            '{}_{}'.format(self.dpb_service_type, pretty_version),
+        'dpb_cluster_id':
+            self.cluster_id,
+        'dpb_cluster_shape':
+            self.spec.worker_group.vm_spec.machine_type,
+        'dpb_cluster_size':
+            self.spec.worker_count,
+        'dpb_hdfs_type':
+            self.dpb_hdfs_type,
+        'dpb_disk_size':
+            self.spec.worker_group.disk_spec.disk_size,
+        'dpb_service_zone':
+            self.dpb_service_zone,
+        'dpb_job_properties':
+            ','.join('{}={}'.format(k, v)
+                     for k, v in self.GetJobProperties().items()),
+        'dpb_cluster_properties':
+            ','.join(FLAGS.dpb_cluster_properties),
+    }
+    return basic_data
+
+  def _CreateDependencies(self):
+    """Creates a bucket to use with the cluster."""
+    if self.manage_bucket:
+      self.storage_service.MakeBucket(self.bucket)
+
+  def _Create(self):
+    """Creates the underlying resource."""
+    raise NotImplementedError()
+
+  def _DeleteDependencies(self):
+    """Deletes the bucket used with the cluster."""
+    if self.manage_bucket:
+      self.storage_service.DeleteBucket(self.bucket)
+
+  def _Delete(self):
+    """Deletes the underlying resource.
+
+    Implementations of this method should be idempotent since it may
+    be called multiple times, even if the resource has already been
+    deleted.
+    """
+    raise NotImplementedError()
+
+  def _ProcessWallTime(self, start_time, end_time):
+    """Compute the wall time from the given start and end processing time.
+
+    Args:
+      start_time: Datetime value when the processing was started.
+      end_time: Datetime value when the processing completed.
+
+    Returns:
+      Wall time in seconds.
+
+    Raises:
+        ValueError: Exception raised when invalid input is provided.
+    """
+    if start_time > end_time:
+      raise ValueError('start_time cannot be later than the end_time')
+    return (end_time - start_time).total_seconds()
+
+  def GetJobProperties(self) -> Dict[str, str]:
+    """Parse the dpb_job_properties_flag."""
+    return dict(pair.split('=') for pair in FLAGS.dpb_job_properties)
+
+  def GetExecutionJar(self, job_category: str, job_type: str) -> str:
+    """Retrieve execution jar corresponding to the job_category and job_type.
+
+    Args:
+      job_category: String category of the job for eg. hadoop, spark, hive, etc.
+      job_type: String name of the type of workload to executed on the cluster,
+        for eg. word_count, terasort, etc.
+
+    Returns:
+      The path to the execusion jar on the cluster
+
+    Raises:
+        NotImplementedError: An unsupported combination of job_category
+        and job_type was provided for execution on the cluster.
+    """
+    jar = self._JobJars().get(job_category, {}).get(job_type)
+    if jar:
+      return jar
+    raise NotImplementedError(
+        f'No jar found for category {job_category} and type {job_type}.')
+
+  def GetClusterCreateTime(self) -> Optional[float]:
+    """Returns the cluster creation time.
+
+    This default implementation computes it by substracting the
+    resource_ready_time and create_start_time attributes.
+
+    Returns:
+      A float representing the creation time in seconds or None.
+    """
+    if self.resource_ready_time is None or self.create_start_time is None:
+      return None
+    return self.resource_ready_time - self.create_start_time
+
+
+class UnmanagedDpbService(BaseDpbService):
+  """Object representing an un-managed dpb service."""
+
+  def __init__(self, dpb_service_spec):
+    super(UnmanagedDpbService, self).__init__(dpb_service_spec)
+    #  Dictionary to hold the cluster vms.
+    self.vms = {}
+    self.cloud = dpb_service_spec.worker_group.cloud
+    if not self.dpb_service_zone:
+      raise errors.Setup.InvalidSetupError(
+          'dpb_service_zone must be provided, for provisioning.')
+    if self.cloud == 'GCP':
+      self.region = gcp_util.GetRegionFromZone(FLAGS.dpb_service_zone)
+      self.storage_service = gcs.GoogleCloudStorageService()
+      self.persistent_fs_prefix = 'gs://'
+    elif self.cloud == 'AWS':
+      self.region = aws_util.GetRegionFromZone(FLAGS.dpb_service_zone)
+      self.storage_service = s3.S3Service()
+      self.persistent_fs_prefix = 's3://'
+    else:
+      self.region = None
+      self.storage_service = None
+      self.persistent_fs_prefix = None
+      self.manage_bucket = False
+      logging.warning(
+          'Cloud provider %s does not support object storage. '
+          'Some benchmarks will not work.',
+          self.cloud)
+
+    if self.storage_service:
+      self.storage_service.PrepareService(location=self.region)
+
+    # set in _Create of derived classes
+    self.leader = None
+
+  def GetClusterCreateTime(self) -> Optional[float]:
+    """Returns the cluster creation time.
+
+    UnmanagedDpbService Create phase doesn't consider actual VM creation, just
+    further provisioning. Thus, we need to add the VMs create time to the
+    default implementation.
+
+    Returns:
+      A float representing the creation time in seconds or None.
+    """
+
+    my_create_time = super().GetClusterCreateTime()
+    if my_create_time is None:
+      return None
+    vms = []
+    for vm_group in self.vms.values():
+      for vm in vm_group:
+        vms.append(vm)
+    first_vm_create_start_time = min(
+        (vm.create_start_time
+         for vm in vms
+         if vm.create_start_time is not None),
+        default=None,
+    )
+    last_vm_ready_start_time = max(
+        (vm.resource_ready_time
+         for vm in vms
+         if vm.resource_ready_time is not None),
+        default=None,
+    )
+    if first_vm_create_start_time is None or last_vm_ready_start_time is None:
+      return None
+    return (my_create_time + last_vm_ready_start_time -
+            first_vm_create_start_time)
+
+
+class UnmanagedDpbServiceYarnCluster(UnmanagedDpbService):
+  """Object representing an un-managed dpb service yarn cluster."""
+
+  CLOUD = 'Unmanaged'
+  SERVICE_TYPE = UNMANAGED_DPB_SVC_YARN_CLUSTER
+
+  def __init__(self, dpb_service_spec):
+    super(UnmanagedDpbServiceYarnCluster, self).__init__(dpb_service_spec)
+    #  Dictionary to hold the cluster vms.
+    self.dpb_service_type = UNMANAGED_DPB_SVC_YARN_CLUSTER
+    # Set DPB version as Hadoop version for metadata
+    self.cloud = dpb_service_spec.worker_group.cloud
+
+  def _Create(self):
+    """Create an un-managed yarn cluster."""
+    logging.info('Should have created vms by now.')
+    logging.info(str(self.vms))
+
+    def InstallHadoop(vm):
+      vm.Install('hadoop')
+      if self.cloud == 'GCP':
+        hadoop.InstallGcsConnector(vm)
+
+    if 'worker_group' not in self.vms:
+      raise errors.Resource.CreationError(
+          'UnmanagedDpbServiceYarnCluster requires VMs in a worker_group.')
+    vm_util.RunThreaded(InstallHadoop,
+                        self.vms['worker_group'] + self.vms['master_group'])
+    self.leader = self.vms['master_group'][0]
+    hadoop.ConfigureAndStart(
+        self.leader, self.vms['worker_group'], configure_s3=self.cloud == 'AWS')
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=None,
+                job_stdout_file=None,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_type=None,
+                properties=None):
+    """Submit a data processing job to the backend."""
+    if job_type != self.HADOOP_JOB_TYPE:
+      raise NotImplementedError
+    cmd_list = [hadoop.HADOOP_CMD]
+    # Order is important
+    if jarfile:
+      cmd_list += ['jar', jarfile]
+    # Specifying classname only works if jarfile is omitted or if it has no
+    # main class.
+    if classname:
+      cmd_list += [classname]
+    all_properties = self.GetJobProperties()
+    all_properties.update(properties or {})
+    cmd_list += ['-D{}={}'.format(k, v) for k, v in all_properties.items()]
+    if job_arguments:
+      cmd_list += job_arguments
+    cmd_string = ' '.join(cmd_list)
+
+    start_time = datetime.datetime.now()
+    try:
+      stdout, _ = self.leader.RobustRemoteCommand(cmd_string, should_log=True)
+    except errors.VirtualMachine.RemoteCommandError as e:
+      raise JobSubmissionError() from e
+    end_time = datetime.datetime.now()
+
+    if job_stdout_file:
+      with open(job_stdout_file, 'w') as f:
+        f.write(stdout)
+    return JobResult(run_time=(end_time - start_time).total_seconds())
+
+  def _Delete(self):
+    pass
+
+  def _GetCompletedJob(self, job_id: str) -> Optional[JobResult]:
+    """Submitting Job via SSH is blocking so this is not meaningful."""
+    raise NotImplementedError('Submitting Job via SSH is a blocking command.')
+
+
+class UnmanagedDpbSparkCluster(UnmanagedDpbService):
+  """Object representing an un-managed dpb service spark cluster."""
+
+  CLOUD = 'Unmanaged'
+  SERVICE_TYPE = UNMANAGED_SPARK_CLUSTER
+
+  def _JobJars(self) -> Dict[str, Dict[str, str]]:
+    """Known mappings of jars in the cluster used by GetExecutionJar."""
+    return {self.SPARK_JOB_TYPE: {'examples': spark.SparkExamplesJarPath()}}
+
+  def __init__(self, dpb_service_spec):
+    super(UnmanagedDpbSparkCluster, self).__init__(dpb_service_spec)
+    #  Dictionary to hold the cluster vms.
+    self.vms = {}
+    self.dpb_service_type = UNMANAGED_SPARK_CLUSTER
+    # Set DPB version as Spark version for metadata
+    self.dpb_version = 'spark_' + FLAGS.spark_version
+    self.cloud = dpb_service_spec.worker_group.cloud
+
+  def _Create(self):
+    """Create an un-managed yarn cluster."""
+    logging.info('Should have created vms by now.')
+    logging.info(str(self.vms))
+
+    def InstallSpark(vm):
+      vm.Install('spark')
+      if self.cloud == 'GCP':
+        hadoop.InstallGcsConnector(vm)
+
+    if 'worker_group' not in self.vms:
+      raise errors.Resource.CreationError(
+          'UnmanagedDpbSparkCluster requires VMs in a worker_group.')
+
+    vm_util.RunThreaded(InstallSpark,
+                        self.vms['worker_group'] + self.vms['master_group'])
+    self.leader = self.vms['master_group'][0]
+    spark.ConfigureAndStart(
+        self.leader, self.vms['worker_group'], configure_s3=self.cloud == 'AWS')
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=None,
+                job_stdout_file=None,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_type=None,
+                properties=None):
+    """Submit a data processing job to the backend."""
+    cmd = self.GetSparkSubmitCommand(
+        jarfile=jarfile,
+        classname=classname,
+        pyspark_file=pyspark_file,
+        job_arguments=job_arguments,
+        job_files=job_files,
+        job_jars=job_jars,
+        job_type=job_type,
+        properties=properties)
+    start_time = datetime.datetime.now()
+    try:
+      stdout, _ = self.leader.RobustRemoteCommand(
+          ' '.join(cmd), should_log=True)
+    except errors.VirtualMachine.RemoteCommandError as e:
+      raise JobSubmissionError() from e
+    end_time = datetime.datetime.now()
+
+    if job_stdout_file:
+      with open(job_stdout_file, 'w') as f:
+        f.write(stdout)
+    return JobResult(run_time=(end_time - start_time).total_seconds())
+
+  def _Delete(self):
+    pass
+
+  def _GetCompletedJob(self, job_id: str) -> Optional[JobResult]:
+    """Submitting Job via SSH is blocking so this is not meaningful."""
+    raise NotImplementedError('Submitting Job via SSH is a blocking command.')
+
+
+class KubernetesSparkCluster(BaseDpbService):
+  """Object representing a Kubernetes dpb service spark cluster."""
+
+  CLOUD = container_service.KUBERNETES
+  SERVICE_TYPE = KUBERNETES_SPARK_CLUSTER
+
+  # Constants to sychronize between YAML and Spark configuration
+  # TODO(pclay): Consider setting in YAML
+  SPARK_DRIVER_SERVICE = 'spark-driver'
+  SPARK_DRIVER_PORT = 4042
+  SPARK_K8S_SERVICE_ACCOUNT = 'spark'
+
+  def _JobJars(self) -> Dict[str, Dict[str, str]]:
+    """Known mappings of jars in the cluster used by GetExecutionJar."""
+    return {self.SPARK_JOB_TYPE: {'examples': spark.SparkExamplesJarPath()}}
+
+  # TODO(odiego): Implement GetClusterCreateTime adding K8s cluster create time
+
+  def __init__(self, dpb_service_spec):
+    super().__init__(dpb_service_spec)
+    self.dpb_service_type = self.SERVICE_TYPE
+    # Set DPB version as Spark version for metadata
+    self.dpb_version = 'spark_' + FLAGS.spark_version
+
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    self.k8s_cluster = benchmark_spec.container_cluster
+    assert self.k8s_cluster
+    assert self.k8s_cluster.CLUSTER_TYPE == container_service.KUBERNETES
+    self.cloud = self.k8s_cluster.CLOUD
+    self.container_registry = benchmark_spec.container_registry
+    assert self.container_registry
+
+    self.spark_drivers = []
+
+    # TODO(pclay): Support overriding image?
+    # Corresponds with data/docker/spark directory
+    self.image = 'spark'
+
+    if self.cloud == 'GCP':
+      self.region = gcp_util.GetRegionFromZone(self.k8s_cluster.zone)
+      self.storage_service = gcs.GoogleCloudStorageService()
+      self.persistent_fs_prefix = 'gs://'
+    elif self.cloud == 'AWS':
+      self.region = self.k8s_cluster.region
+      self.storage_service = s3.S3Service()
+      self.persistent_fs_prefix = 's3://'
+    else:
+      raise errors.Config.InvalidValue(
+          f'Unsupported Cloud provider {self.cloud}')
+
+    self.storage_service.PrepareService(location=self.region)
+
+    # TODO(pclay): support
+    assert not FLAGS.dpb_cluster_properties
+
+    if self.k8s_cluster.num_nodes < 2:
+      raise errors.Config.InvalidValue(
+          f'Cluster type {KUBERNETES_SPARK_CLUSTER} requires at least 2 nodes.'
+          f'Found {self.k8s_cluster.num_nodes}.')
+
+  def _Create(self):
+    """Create docker image for cluster."""
+    logging.info('Should have created k8s cluster by now.')
+    # TODO(pclay): Should resources publicly declare they have been created?
+    assert self.k8s_cluster.resource_ready_time
+    assert self.container_registry.resource_ready_time
+    logging.info(self.k8s_cluster)
+    logging.info(self.container_registry)
+
+    logging.info('Building Spark image.')
+    self.image = self.container_registry.GetOrBuild(self.image)
+
+    # https://spark.apache.org/docs/latest/running-on-kubernetes.html#rbac
+    # TODO(pclay): Consider moving into manifest
+    self.k8s_cluster.CreateServiceAccount(
+        self.SPARK_K8S_SERVICE_ACCOUNT, clusterrole='edit')
+
+  def _GetDriverName(self):
+    return f'spark-driver-{len(self.spark_drivers)}'
+
+  # Kubernetes unlike other Spark Shedulers reserves 40% of memory for PySpark
+  # instead of the normal 10%. We don't need much memory for PySpark, because
+  # our PySpark is 100% SQL and thus on the JVM (in dpb_sparksql_benchmark).
+  # Force Spark to reserve the normal 10% overhead in all cases.
+  # https://spark.apache.org/docs/latest/running-on-kubernetes.html#spark-properties
+  MEMORY_OVERHEAD_FACTOR = 0.1
+
+  def GetJobProperties(self) -> Dict[str, str]:
+    node_cpu = self.k8s_cluster.node_num_cpu
+    # TODO(pclay): Validate that we don't have too little memory?
+    node_memory_mb = self.k8s_cluster.node_memory_allocatable.m_as(
+        units.mebibyte)
+    # Reserve 512 MB for system daemons
+    node_memory_mb -= 512
+    # Remove overhead
+    node_memory_mb /= (1 + self.MEMORY_OVERHEAD_FACTOR)
+    node_memory_mb = int(node_memory_mb)
+
+    # Common PKB Spark cluster properties
+    properties = spark.GetConfiguration(
+        driver_memory_mb=node_memory_mb,
+        worker_memory_mb=node_memory_mb,
+        # Schedule one thread per vCPU
+        worker_cores=node_cpu,
+        # Reserve one node for driver
+        num_workers=self.k8s_cluster.num_nodes - 1,
+        configure_s3=self.cloud == 'AWS')
+    # k8s specific properties
+    properties.update({
+        'spark.driver.host':
+            self.SPARK_DRIVER_SERVICE,
+        'spark.driver.port':
+            str(self.SPARK_DRIVER_PORT),
+        'spark.kubernetes.driver.pod.name':
+            self._GetDriverName(),
+        # Tell Spark to under-report cores by 1 to fit next to k8s services
+        'spark.kubernetes.executor.request.cores':
+            str(node_cpu - 1),
+        'spark.kubernetes.container.image':
+            self.image,
+        # No HDFS available
+        'spark.hadoop.fs.defaultFS':
+            self.base_dir,
+        'spark.kubernetes.memoryOverheadFactor':
+            str(self.MEMORY_OVERHEAD_FACTOR),
+    })
+    # User specified properties
+    properties.update(super().GetJobProperties())
+    return properties
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=None,
+                job_stdout_file=None,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_type=None,
+                properties=None):
+    """Submit a data processing job to the backend."""
+    # Specs can't be copied or created by hand. So we override the command of
+    # the spec for each job.
+    command = self.GetSparkSubmitCommand(
+        jarfile=jarfile,
+        classname=classname,
+        pyspark_file=pyspark_file,
+        job_arguments=job_arguments,
+        job_files=job_files,
+        job_jars=job_jars,
+        job_type=job_type,
+        properties=properties)
+    driver_name = self._GetDriverName()
+    # Request memory for driver. This should guarantee that driver does not get
+    # scheduled on same VM as exectutor and OOM.
+    driver_memory_mb = int(
+        self.GetJobProperties()[spark.SPARK_DRIVER_MEMORY].strip('m'))
+    start_time = datetime.datetime.now()
+    self.k8s_cluster.ApplyManifest(
+        'container/spark/spark-driver.yaml.j2',
+        name=driver_name,
+        command=command,
+        driver_memory_mb=driver_memory_mb,
+        driver_port=self.SPARK_DRIVER_PORT,
+        driver_service=self.SPARK_DRIVER_SERVICE,
+        image=self.image,
+        service_account=self.SPARK_K8S_SERVICE_ACCOUNT)
+    container = container_service.KubernetesPod(driver_name)
+    # increments driver_name for next job
+    self.spark_drivers.append(container)
+    try:
+      container.WaitForExit()
+    except container_service.ContainerException as e:
+      raise JobSubmissionError() from e
+    end_time = datetime.datetime.now()
+
+    if job_stdout_file:
+      with open(job_stdout_file, 'w') as f:
+        f.write(container.GetLogs())
+
+    # TODO(pclay): use k8s output for timing?
+    return JobResult(run_time=(end_time - start_time).total_seconds())
+
+  def _Delete(self):
+    pass
+
+  def _GetCompletedJob(self, job_id: str) -> Optional[JobResult]:
+    """container.WaitForExit is blocking so this is not meaningful."""
+    raise NotImplementedError('container.WaitForExit is a blocking command.')
+
+
+def GetDpbServiceClass(cloud: str,
+                       dpb_service_type: str) -> Optional[Type[BaseDpbService]]:
+  """Gets the Data Processing Backend class corresponding to 'service_type'.
+
+  Args:
+    cloud: String name of cloud of the service
+    dpb_service_type: String service type as specified in configuration
+
+  Returns:
+    Implementation class corresponding to the argument dpb_service_type
+
+  Raises:
+    Exception: An invalid data processing backend service type was provided
+  """
+  if dpb_service_type in UNMANAGED_SERVICES:
+    cloud = 'Unmanaged'
+  elif dpb_service_type == KUBERNETES_SPARK_CLUSTER:
+    cloud = container_service.KUBERNETES
+  return resource.GetResourceClass(
+      BaseDpbService, CLOUD=cloud, SERVICE_TYPE=dpb_service_type)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/edw_benchmark_results_aggregator.py b/script/cumulus/pkb/perfkitbenchmarker/edw_benchmark_results_aggregator.py
new file mode 100644
index 0000000..215d04e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/edw_benchmark_results_aggregator.py
@@ -0,0 +1,1099 @@
+"""Aggregates the performance results from a edw benchmark.
+
+An edw benchmark, runs multiple iterations of a suite of queries.
+Independent raw query performance is aggregated during the benchmark, and used
+for generating:
+a. Raw query performance samples
+b. Aggregated query performance samples
+c. Raw wall time for each stream in each iteration
+d. Raw wall time for each iteration
+e. Aggregated (average) iteration wall time
+f. Raw geo mean performance for each iteration
+g. Aggregated geo mean performance using the aggregated query performances
+"""
+import copy
+import enum
+import functools
+import json
+import logging
+from typing import Any, Dict, Iterable, List, Text
+
+from absl import flags
+import numpy as np
+from perfkitbenchmarker import sample
+
+
+flags.DEFINE_bool('edw_generate_aggregated_metrics', True,
+                  'Whether the benchmark generates aggregated_metrics such as '
+                  'geomean. Query performance metrics are still generated.')
+
+FLAGS = flags.FLAGS
+
+
+class EdwPerformanceAggregationError(Exception):
+  """Error encountered during aggregation of performance results."""
+
+
+def geometric_mean(iterable: List[float]) -> float:
+  """Function to compute the geo mean for a list of numeric values.
+
+  Args:
+    iterable: A List of Float performance values
+
+  Returns:
+    A float value equal to the geometric mean of the input performance values.
+
+  Raises:
+    EdwPerformanceAggregationError: If an invalid performance value was included
+      for aggregation.
+  """
+  if (not iterable or any(perf <= 0.0 for perf in iterable)):
+    raise EdwPerformanceAggregationError('Invalid values cannot be aggregated.')
+  a = np.array(iterable)
+  return a.prod() ** (1.0 / len(a))
+
+
+class EdwQueryExecutionStatus(enum.Enum):
+  """Enum class for potential status of query execution.
+
+  Potential values:
+  FAILED: Indicates that the query execution failed.
+  SUCCESSFUL: Indicates that the query execution succeeded.
+  """
+  FAILED = 'query_execution_failed'
+  SUCCESSFUL = 'query_execution_successful'
+
+
+class EdwQueryPerformance(object):
+  """Class that represents the performance of an executed edw query.
+
+  Attributes:
+    name: A string name of the query that was executed
+    performance: A Float variable set to the query's completion time in secs.
+    -1.0 is used as a sentinel value implying the query failed. For a successful
+    query the value is expected to be positive.
+    execution_status: An EdwQueryExecutionStatus enum indicating success/failure
+    metadata: A dictionary of query execution attributes (job_id, etc.)
+  """
+
+  def __init__(self, query_name: Text, performance: float,
+               metadata: Dict[str, str]):
+    # TODO(user): add query start and query end as attributes.
+    self.name = query_name
+    self.performance = performance
+    self.execution_status = (EdwQueryExecutionStatus.FAILED
+                             if performance == -1.0
+                             else EdwQueryExecutionStatus.SUCCESSFUL)
+    self.metadata = metadata
+
+  @classmethod
+  def from_json(cls, serialized_performance: str):
+    """Process the serialized query performance from client jar.
+
+    Expected Performance format:
+      {"query_wall_time_in_secs":1.998,"query_end":1601695222108,"query":"1",
+      "query_start":1601695220110,
+      "details":{"job_id":"b66b5a8e-633f-4ee4-8632-4e3d0856172f"}}
+
+    Args:
+      serialized_performance: Stringified json performance.
+
+    Returns:
+      An instance of EdwQueryPerformance
+    """
+    results = json.loads(serialized_performance)
+    if 'details' in results:
+      metadata = results['details']
+    else:
+      metadata = {}
+    if results['query_wall_time_in_secs'] == -1:
+      logging.warning('Query %s failed.', results['query'])
+    return cls(query_name=results['query'],
+               performance=results['query_wall_time_in_secs'],
+               metadata=metadata)
+
+  def get_performance_sample(self, metadata: Dict[str, str]) -> sample.Sample:
+    """Method to generate a sample for the query performance.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+      execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample for the edw query performance.
+    """
+    query_metadata = copy.copy(metadata)
+    query_metadata['query'] = self.name
+    query_metadata['execution_status'] = self.execution_status
+    query_metadata.update(self.metadata)
+    return sample.Sample('edw_raw_query_time', self.performance, 'seconds',
+                         query_metadata)
+
+  def get_performance_value(self) -> float:
+    """Method to get the query's completion time in secs.
+
+    Returns:
+      A float value set to the query's completion time in secs.
+    """
+    return self.performance
+
+  def get_performance_metadata(self) -> Dict[str, str]:
+    """Method to get the query's execution attributes (job_id, etc.).
+
+    Returns:
+      A dictionary set to query's execution attributes (job_id, etc.)
+    """
+    return self.metadata
+
+  def is_successful(self) -> bool:
+    """Validates if the query was successful."""
+    return self.execution_status == EdwQueryExecutionStatus.SUCCESSFUL
+
+
+class EdwBaseIterationPerformance(object):
+  """Class that represents the performance of an iteration of edw queries."""
+
+
+class EdwPowerIterationPerformance(EdwBaseIterationPerformance):
+  """Class that represents the performance of a power iteration of edw queries.
+
+  Attributes:
+    id: A unique string id for the iteration.
+    performance: A dictionary of query name to its execution performance which
+      is a EdwQueryPerformance instance.
+    successful_count: An integer count of the successful queries in the
+      iteration.
+    total_count: An integer count of the total number of queries in the
+      iteration.
+  """
+
+  def __init__(self, iteration_id: Text, total_queries: int):
+    self.id = iteration_id
+    self.performance = {}
+    self.total_count = total_queries
+    self.successful_count = 0
+
+  def add_query_performance(self, query_name: Text, performance: float,
+                            metadata: Dict[str, str]):
+    """Creates and populates a query performance from the input results.
+
+    Updates the iteration's performance map with the query performance.
+    The method also increaments the success and failure query counts for the
+    iteration.
+
+    Args:
+      query_name: A string name of the query that was executed
+      performance: A Float variable set to the query's completion time in secs.
+        -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      metadata: Extra metadata to add to each performance.
+
+    Raises:
+      EdwPerformanceAggregationError: If the query has already been added.
+    """
+    query_metadata = copy.copy(metadata)
+    query_performance = EdwQueryPerformance(
+        query_name=query_name, performance=performance, metadata=query_metadata)
+
+    if query_performance.name in self.performance:
+      raise EdwPerformanceAggregationError('Attempting to aggregate a '
+                                           'duplicate query: %s.' %
+                                           query_performance.name)
+    self.performance[query_performance.name] = query_performance
+    if query_performance.is_successful():
+      self.successful_count += 1
+
+  def has_query_performance(self, query_name: Text) -> bool:
+    """Returns whether the query was run at least once in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was executed in the iteration.
+    """
+    return query_name in self.performance
+
+  def is_query_successful(self, query_name: Text) -> bool:
+    """Returns whether the query was successful in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was successful in the iteration.
+    """
+    return self.performance.get(query_name).is_successful()
+
+  def get_query_performance(self, query_name: Text) -> float:
+    """Gets a query's execution performance generated during iteration execution.
+
+    Args:
+      query_name: A String name of the query to retrieve details for
+
+    Returns:
+      A float value set to the query's completion time in secs.
+    """
+    return self.performance[query_name].get_performance_value()
+
+  def get_query_metadata(self, query_name: Text) -> Dict[str, Any]:
+    """Gets the metadata of a query as executed in the current iteration.
+
+    Args:
+      query_name: Name of the query whose performance is requested.
+
+    Returns:
+      A dictionary set to the query's metadata.
+
+    Raises:
+      EdwPerformanceAggregationError: If the query failed.
+    """
+    if not self.is_query_successful(query_name):
+      raise EdwPerformanceAggregationError('Cannot aggregate invalid / failed'
+                                           ' query' + query_name)
+    return self.performance.get(query_name).metadata
+
+  def get_all_queries_in_iteration(self) -> List[Text]:
+    """Gets a list of names of all queries in the iteration.
+
+    Returns:
+      A list of all queries in the iteration.
+    """
+    return self.performance.keys()
+
+  def get_all_query_performance_samples(
+      self, metadata: Dict[str, str]) -> List[sample.Sample]:
+    """Gets a list of samples for all queries in the iteration.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples of each query's performance
+    """
+    return [
+        query_performance.get_performance_sample(metadata)
+        for query_performance in self.performance.values()
+    ]
+
+  def is_successful(self, expected_queries: List[Text]) -> bool:
+    """Check if all the expected queries ran and all succeeded."""
+    all_queries_ran = set(
+        self.get_all_queries_in_iteration()) == set(expected_queries)
+    all_queries_were_successful = self.total_count == self.successful_count
+    return all_queries_ran and all_queries_were_successful
+
+  def get_queries_geomean(self) -> float:
+    """Gets the geometric mean of all queries in the iteration.
+
+    Returns:
+      The (float) geometric mean of all the queries ran in the iteration.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration contains unsuccessful
+        query executions.
+    """
+    return geometric_mean([
+        query_performance.performance
+        for query_performance in self.performance.values()
+    ])
+
+  def get_queries_geomean_performance_sample(
+      self, expected_queries: List[Text], metadata: Dict[str,
+                                                         str]) -> sample.Sample:
+    """Gets a sample for geomean of all queries in the iteration.
+
+    Args:
+      expected_queries: A list of query names expected to have been executed in
+        an iteration.
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of iteration geomean performance.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration contains unsuccessful
+        query executions.
+    """
+    if not self.is_successful(expected_queries):
+      raise EdwPerformanceAggregationError('Failed executions in iteration.')
+    raw_geo_mean = self.get_queries_geomean()
+    geo_mean_metadata = copy.copy(metadata)
+    return sample.Sample('edw_iteration_geomean_time', raw_geo_mean, 'seconds',
+                         geo_mean_metadata)
+
+
+class EdwSimultaneousIterationPerformance(EdwBaseIterationPerformance):
+  """Class that represents the performance of a simultaneous iteration.
+
+  Attributes:
+    id: A unique string id for the iteration.
+    start_time: The start time of the iteration in milliseconds since epoch.
+    end_time: The end time of the iteration in milliseconds since epoch.
+    wall_time: The wall time in seconds as a double value.
+    performance: A dictionary of query name to its execution performance which
+      is an EdwQueryPerformance instance.
+    all_queries_succeeded: Whether all queries in the iteration were successful.
+  """
+
+  def __init__(self, iteration_id: Text, iteration_start_time: int,
+               iteration_end_time: int, iteration_wall_time: float,
+               iteration_performance: Dict[str, EdwQueryPerformance],
+               all_queries_succeeded: bool):
+    self.id = iteration_id
+    self.start_time = iteration_start_time
+    self.end_time = iteration_end_time
+    self.wall_time = iteration_wall_time
+    self.performance = iteration_performance
+    self.all_queries_succeeded = all_queries_succeeded
+
+  @classmethod
+  def from_json(cls, iteration_id: str, serialized_performance: str):
+    """Process the serialized simultaneous iteration performance from client jar.
+
+    Expected Performance format:
+      {"simultaneous_end":1601145943197,"simultaneous_start":1601145940113,
+      "all_queries_performance_array":[{"query_wall_time_in_secs":2.079,
+      "query_end":1601145942208,"job_id":"914682d9-4f64-4323-bad2-554267cbbd8d",
+      "query":"1","query_start":1601145940129},{"query_wall_time_in_secs":2.572,
+      "query_end":1601145943192,"job_id":"efbf93a1-614c-4645-a268-e3801ae994f1",
+      "query":"2","query_start":1601145940620}],
+      "simultaneous_wall_time_in_secs":3.084}
+
+    Args:
+      iteration_id: String identifier of the simultaneous iteration.
+      serialized_performance: Stringified json performance.
+
+    Returns:
+      An instance of EdwSimultaneousIterationPerformance
+    """
+    results = json.loads(serialized_performance)
+    query_performance_map = {}
+    all_queries_succeeded = 'failure_reason' not in results
+    if all_queries_succeeded:
+      for query_perf_json in results['all_queries_performance_array']:
+        query_perf = EdwQueryPerformance.from_json(
+            serialized_performance=(json.dumps(query_perf_json)))
+        query_performance_map[query_perf.name] = query_perf
+    else:
+      logging.warning('Failure reported. Reason: %s', results['failure_reason'])
+    return cls(
+        iteration_id=iteration_id,
+        iteration_start_time=(results['simultaneous_start']
+                              if all_queries_succeeded else -1),
+        iteration_end_time=(results['simultaneous_end']
+                            if all_queries_succeeded else -1),
+        iteration_wall_time=results['simultaneous_wall_time_in_secs'],
+        iteration_performance=query_performance_map,
+        all_queries_succeeded=all_queries_succeeded)
+
+  def get_wall_time(self) -> float:
+    """Gets the total wall time, in seconds, for the iteration.
+
+    The wall time is the time from the start of the first query to the end time
+    of the last query to finish.
+
+    Returns:
+      The wall time in seconds.
+    """
+    return self.wall_time
+
+  def get_wall_time_performance_sample(self, metadata: Dict[
+      str, str]) -> sample.Sample:
+    """Gets a sample for wall time performance of the iteration.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of iteration wall time performance
+    """
+    wall_time = self.wall_time
+    wall_time_metadata = copy.copy(metadata)
+    wall_time_metadata['iteration_start_time'] = self.start_time
+    wall_time_metadata['iteration_end_time'] = self.end_time
+    return sample.Sample('edw_iteration_wall_time', wall_time, 'seconds',
+                         wall_time_metadata)
+
+  def get_all_query_performance_samples(
+      self, metadata: Dict[str, str]) -> List[sample.Sample]:
+    """Gets a list of samples for all queries in the iteration.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples of each query's performance
+    """
+    return [
+        query_performance.get_performance_sample(metadata)
+        for query_performance in self.performance.values()
+    ]
+
+  def is_successful(self, expected_queries: List[Text]) -> bool:
+    """Check if all the expected queries ran and all succeeded."""
+    all_queries_ran = self.performance.keys() == set(expected_queries)
+    return all_queries_ran and self.all_queries_succeeded
+
+  def has_query_performance(self, query_name: Text) -> bool:
+    """Returns whether the query was run at least once in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was executed in the iteration.
+    """
+    return query_name in self.performance
+
+  def is_query_successful(self, query_name: Text) -> bool:
+    """Returns whether the query was successful in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was successful in the iteration.
+    """
+    if self.has_query_performance(query_name):
+      return self.performance.get(query_name).is_successful()
+    return False
+
+  def get_query_performance(self, query_name: Text) -> float:
+    """Gets a query's execution performance in the current iteration.
+
+    Args:
+      query_name: A String name of the query to retrieve details for
+
+    Returns:
+      A float value set to the query's completion time in secs.
+    """
+    return self.performance[query_name].get_performance_value()
+
+  def get_query_metadata(self, query_name: Text) -> Dict[str, Any]:
+    """Gets the metadata of a query in the current iteration.
+
+    Args:
+      query_name: Name of the query whose aggregated performance is requested
+
+    Returns:
+      A dictionary set to the query's aggregated metadata, accumulated from the
+       raw query run in the current iteration.
+
+    Raises:
+      EdwPerformanceAggregationError: If the query failed in the iteration.
+    """
+    if not self.is_query_successful(query_name):
+      raise EdwPerformanceAggregationError('Cannot aggregate invalid / failed'
+                                           ' query' + query_name)
+    return self.performance.get(query_name).metadata
+
+  def get_queries_geomean(self) -> float:
+    """Gets the geometric mean of all queries in the iteration.
+
+    Returns:
+      The (float) geometric mean of all the queries ran in the iteration.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration contains unsuccessful
+        query executions.
+    """
+    return geometric_mean([
+        query_performance.performance
+        for query_performance in self.performance.values()
+    ])
+
+  def get_queries_geomean_performance_sample(
+      self, expected_queries: List[Text], metadata: Dict[str,
+                                                         str]) -> sample.Sample:
+    """Gets a sample for geomean of all queries in the iteration.
+
+    Args:
+      expected_queries: A list of query names expected to have been executed in
+        an iteration.
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of iteration geomean performance.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration contains unsuccessful
+        query executions.
+    """
+    if not self.is_successful(expected_queries):
+      raise EdwPerformanceAggregationError('Failed executions in iteration.')
+    raw_geo_mean = self.get_queries_geomean()
+    geo_mean_metadata = copy.copy(metadata)
+    return sample.Sample('edw_iteration_geomean_time', raw_geo_mean, 'seconds',
+                         geo_mean_metadata)
+
+
+class EdwThroughputIterationPerformance(EdwBaseIterationPerformance):
+  """Class that represents the performance of an iteration of edw queries.
+
+  Attributes:
+    id: A unique string id for the iteration.
+    start_time: The start time of the iteration execution.
+    end_time: The end time of the iteration execution.
+    wall_time: The wall time of the stream execution.
+    performance: A dict of stream_id to stream performances, each of which is a
+      dictionary mapping query names to their execution performances, which are
+      EdwQueryPerformance instances.
+  """
+
+  def __init__(self, iteration_id: Text, iteration_start_time: int,
+               iteration_end_time: int, iteration_wall_time: float,
+               iteration_performance: Dict[str, Dict[str,
+                                                     EdwQueryPerformance]]):
+    self.id = iteration_id
+    self.start_time = iteration_start_time
+    self.end_time = iteration_end_time
+    self.wall_time = iteration_wall_time
+    self.performance = iteration_performance
+
+  @classmethod
+  def from_json(cls, iteration_id: str, serialized_performance: str):
+    """Process the serialized throughput iteration performance from client jar.
+
+    Expected Performance format:
+      {"throughput_start":1601666911596,"throughput_end":1601666916139,
+        "throughput_wall_time_in_secs":4.543,
+        "all_streams_performance_array":[
+          {"stream_start":1601666911597,"stream_end":1601666916139,
+            "stream_wall_time_in_secs":4.542,
+            "stream_performance_array":[
+              {"query_wall_time_in_secs":2.238,"query_end":1601666913849,
+                "query":"1","query_start":1601666911611,
+                "details":{"job_id":"438170b0-b0cb-4185-b733-94dd05b46b05"}},
+              {"query_wall_time_in_secs":2.285,"query_end":1601666916139,
+                "query":"2","query_start":1601666913854,
+                "details":{"job_id":"371902c7-5964-46f6-9f90-1dd00137d0c8"}}
+              ]},
+          {"stream_start":1601666911597,"stream_end":1601666916018,
+            "stream_wall_time_in_secs":4.421,
+            "stream_performance_array":[
+              {"query_wall_time_in_secs":2.552,"query_end":1601666914163,
+                "query":"2","query_start":1601666911611,
+                "details":{"job_id":"5dcba418-d1a2-4a73-be70-acc20c1f03e6"}},
+              {"query_wall_time_in_secs":1.855,"query_end":1601666916018,
+                "query":"1","query_start":1601666914163,
+                "details":{"job_id":"568c4526-ae26-4e9d-842c-03459c3a216d"}}
+            ]}
+        ]}
+
+    Args:
+      iteration_id: String identifier of the throughput iteration.
+      serialized_performance: Stringified json performance.
+
+    Returns:
+      An instance of EdwThroughputIterationPerformance
+    """
+    results = json.loads(serialized_performance)
+    stream_performances = {}
+    all_queries_succeeded = 'failure_reason' not in results
+    if all_queries_succeeded:
+      for stream_id, stream_perf_json in enumerate(
+          results['all_streams_performance_array']):
+        stream_id = str(stream_id)
+        stream_performance_map = {}
+        for query_perf_json in stream_perf_json['stream_performance_array']:
+          query_perf = EdwQueryPerformance.from_json(
+              serialized_performance=(json.dumps(query_perf_json)))
+          stream_performance_map[query_perf.name] = query_perf
+        stream_performances.update({stream_id: stream_performance_map})
+    else:
+      logging.warning('Failure reported. Reason: %s', results['failure_reason'])
+    return cls(
+        iteration_id=iteration_id,
+        iteration_start_time=(results['throughput_start']
+                              if all_queries_succeeded else -1),
+        iteration_end_time=(results['throughput_end']
+                            if all_queries_succeeded else -1),
+        iteration_wall_time=results['throughput_wall_time_in_secs'],
+        iteration_performance=stream_performances)
+
+  def has_query_performance(self, query_name: Text) -> bool:
+    """Returns whether the query was run at least once in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was executed in the iteration.
+    """
+    for stream in self.performance.values():
+      if query_name in stream:
+        return True
+    return False
+
+  def is_query_successful(self, query_name: Text) -> bool:
+    """Returns whether the query was successful in the iteration.
+
+    Args:
+      query_name: A String name of the query to check.
+
+    Returns:
+      A boolean value indicating if the query was successful in the iteration.
+    """
+    for stream in self.performance.values():
+      if query_name in stream:
+        if not stream[query_name].is_successful():
+          return False
+    return True
+
+  def get_query_performance(self, query_name: Text) -> float:
+    """Gets a query's execution performance aggregated across all streams in the current iteration.
+
+    Args:
+      query_name: A String name of the query to retrieve details for
+
+    Returns:
+      A float value set to the query's average completion time in secs.
+    """
+    all_performances = []
+    for stream in self.performance.values():
+      if query_name in stream:
+        all_performances.append(stream[query_name].get_performance_value())
+    if not all_performances:
+      return -1.0
+    return sum(all_performances) / len(all_performances)
+
+  def get_query_metadata(self, query_name: Text) -> Dict[str, Any]:
+    """Gets the metadata of a query aggregated across all streams in the current iteration.
+
+    Args:
+      query_name: Name of the query whose aggregated performance is requested
+
+    Returns:
+      A dictionary set to the query's aggregated metadata, accumulated from the
+       raw query runs in all streams of the current iteration.
+
+    Raises:
+      EdwPerformanceAggregationError: If the query failed in one or more streams
+    """
+    result = {}
+    for stream_id, stream_performance in self.performance.items():
+      if query_name in stream_performance:
+        q_performance = stream_performance[query_name]
+        result[stream_id + '_runtime'] = q_performance.get_performance_value()
+        result.update({
+            stream_id + '_' + k: v
+            for (k, v) in q_performance.get_performance_metadata().items()
+        })
+    return result
+
+  def get_all_query_performance_samples(
+      self, metadata: Dict[str, str]) -> List[sample.Sample]:
+    """Gets a list of samples for all queries in all streams of the iteration.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples of each query's performance
+    """
+    all_query_performances = []
+    for stream_id, stream_performance in self.performance.items():
+      stream_metadata = copy.copy(metadata)
+      stream_metadata['stream'] = stream_id
+      all_query_performances.extend([
+          query_perf.get_performance_sample(stream_metadata)
+          for query_perf in stream_performance.values()
+      ])
+    return all_query_performances
+
+  def all_streams_ran_all_expected_queries(
+      self, expected_queries: List[Text]) -> bool:
+    """Checks that the same set of expected queries ran in all streams."""
+    for stream in self.performance.values():
+      if set(stream.keys()) != set(expected_queries):
+        return False
+    return True
+
+  def no_duplicate_queries(self) -> bool:
+    """Checks that no streams contain any duplicate queries."""
+    for stream in self.performance.values():
+      if len(stream.keys()) != len(set(stream.keys())):
+        return False
+    return True
+
+  def all_queries_succeeded(self) -> bool:
+    """Checks if every query in every stream was successful."""
+    for stream_performance in self.performance.values():
+      for query_perf in stream_performance.values():
+        if query_perf.performance == -1:
+          return False
+    return True
+
+  def is_successful(self, expected_queries: List[Text]) -> bool:
+    """Check if the throughput run was successful.
+
+    A successful run meets the following conditions:
+    - There were more than 0 streams.
+    - Each stream ran the same set of expected queries (regardless of order)
+    - Each stream ran each query only once
+    - Every query in every stream succeeded
+
+    Args:
+      expected_queries: A list of query names expected to have been executed in
+        an iteration.
+
+    Returns:
+      True if all success conditions were met, false otherwise.
+    """
+    non_zero_streams = len(self.performance) >= 1
+    all_streams_ran_all_queries = self.all_streams_ran_all_expected_queries(
+        expected_queries)
+    no_duplicate_queries = self.no_duplicate_queries()
+    all_queries_succeeded = self.all_queries_succeeded()
+    return (non_zero_streams and all_streams_ran_all_queries and
+            no_duplicate_queries and all_queries_succeeded)
+
+  def get_queries_geomean(self) -> float:
+    """Gets the geometric mean of all queries in all streams of the iteration.
+
+    Returns:
+      The (float) geometric mean of all the individual queries ran in all
+        streams of the iteration.
+
+    Raises:
+      EdwPerformanceAggregationError: If the suite contains unsuccessful query
+        executions.
+    """
+    query_performances = []
+    for stream in self.performance.values():
+      for query in stream.values():
+        query_performances.append(query.get_performance_value())
+    return geometric_mean(query_performances)
+
+  def get_queries_geomean_performance_sample(
+      self, expected_queries: List[Text], metadata: Dict[str,
+                                                         str]) -> sample.Sample:
+    """Gets a sample for geomean of all queries in all streams of the iteration.
+
+    Args:
+      expected_queries: A list of query names expected to have been executed in
+        an iteration.
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of iteration geomean performance.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration contains unsuccessful
+        query executions.
+    """
+    if not self.is_successful(expected_queries):
+      raise EdwPerformanceAggregationError('Failed executions in iteration.')
+    raw_geo_mean = self.get_queries_geomean()
+    geo_mean_metadata = copy.copy(metadata)
+    return sample.Sample('edw_iteration_geomean_time', raw_geo_mean, 'seconds',
+                         geo_mean_metadata)
+
+  def get_wall_time(self) -> float:
+    """Gets the total wall time, in seconds, for the iteration.
+
+    The wall time is the time from the start of the first stream to the end time
+    of the last stream to finish.
+
+    Returns:
+      The wall time in seconds.
+    """
+    return self.wall_time
+
+  def get_wall_time_performance_sample(
+      self, metadata: Dict[str, str]) -> sample.Sample:
+    """Gets a sample for total wall time performance of the iteration.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of iteration wall time performance
+    """
+    wall_time_metadata = copy.copy(metadata)
+    wall_time_metadata['iteration_start_time'] = self.start_time
+    wall_time_metadata['iteration_end_time'] = self.end_time
+    return sample.Sample('edw_iteration_wall_time', self.wall_time, 'seconds',
+                         wall_time_metadata)
+
+
+class EdwBenchmarkPerformance(object):
+  """Class that represents the performance of an edw benchmark.
+
+  Attributes:
+    total_iterations: An integer variable set to total of number of iterations.
+    expected_queries: A list of query names that are executed in an iteration of
+      the benchmark
+    iteration_performances: A dictionary of iteration id (String value) to its
+      execution performance (an instance of EdwBaseIterationPerformance)
+  """
+
+  def __init__(self, total_iterations: int, expected_queries: Iterable[Text]):
+    self.total_iterations = total_iterations
+    self.expected_queries = list(expected_queries)
+    self.iteration_performances = {}
+
+  def add_iteration_performance(self, performance: EdwBaseIterationPerformance):
+    """Add an iteration's performance to the benchmark results.
+
+    Args:
+      performance: An instance of EdwBaseIterationPerformance encapsulating the
+        iteration performance details.
+
+    Raises:
+      EdwPerformanceAggregationError: If the iteration has already been added.
+    """
+    iteration_id = performance.id
+    if iteration_id in self.iteration_performances:
+      raise EdwPerformanceAggregationError('Attempting to aggregate a duplicate'
+                                           ' iteration: %s.' % iteration_id)
+    self.iteration_performances[iteration_id] = performance
+
+  def is_successful(self) -> bool:
+    """Check a benchmark's success, only if all the iterations succeed."""
+    return functools.reduce((lambda x, y: x and y), [
+        iteration_performance.is_successful(self.expected_queries)
+        for iteration_performance in self.iteration_performances.values()
+    ])
+
+  def aggregated_query_status(self, query_name: Text) -> bool:
+    """Gets the status of query aggregated across all iterations.
+
+    A query is considered successful only if
+      a. Query was executed in every iteration
+      b. Query was successful in every iteration
+
+    Args:
+      query_name: Name of the query whose aggregated success is requested
+
+    Returns:
+      A boolean value indicating if the query was successful in the benchmark.
+    """
+    for performance in self.iteration_performances.values():
+      if not performance.has_query_performance(query_name):
+        return False
+      if not performance.is_query_successful(query_name):
+        return False
+    return True
+
+  def aggregated_query_execution_time(self, query_name: Text) -> float:
+    """Gets the execution time of query aggregated across all iterations.
+
+    Args:
+      query_name: Name of the query whose aggregated performance is requested
+
+    Returns:
+      A float value set to the query's aggregated execution time
+
+    Raises:
+      EdwPerformanceAggregationError: If the query failed in one or more
+      iterations
+    """
+    if not self.aggregated_query_status(query_name):
+      raise EdwPerformanceAggregationError('Cannot aggregate invalid / failed '
+                                           'query ' + query_name)
+    query_performances = [
+        iteration_performance.get_query_performance(query_name)
+        for iteration_performance in self.iteration_performances.values()
+    ]
+    return sum(query_performances) / self.total_iterations
+
+  def aggregated_query_metadata(self, query_name: Text) -> Dict[str, Any]:
+    """Gets the metadata of a query aggregated across all iterations.
+
+    Args:
+      query_name: Name of the query whose aggregated performance is requested
+
+    Returns:
+      A dictionary set to the query's aggregated metadata, accumulated from the
+       raw query runs.
+
+    Raises:
+      EdwPerformanceAggregationError: If the query failed in one or more
+      iterations
+    """
+    if not self.aggregated_query_status(query_name):
+      raise EdwPerformanceAggregationError('Cannot aggregate invalid / failed '
+                                           'query ' + query_name)
+    result = {}
+    for iteration_id, iteration_performance in (
+        self.iteration_performances.items()):
+      result.update({
+          iteration_id + '_' + k: v
+          for (k, v) in iteration_performance.get_query_metadata(
+              query_name).items()
+      })
+    return result
+
+  def get_aggregated_query_performance_sample(
+      self, query_name: Text, metadata: Dict[str, str]) -> sample.Sample:
+    """Gets the performance of query aggregated across all iterations.
+
+    Args:
+      query_name: Name of the query whose aggregated performance is requested
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of the query's aggregated execution time
+    """
+    query_metadata = copy.copy(metadata)
+    query_metadata['query'] = query_name
+    query_metadata['aggregation_method'] = 'mean'
+    perf, exec_status, agg_md = -1.0, EdwQueryExecutionStatus.FAILED, {}
+    if self.aggregated_query_status(query_name):
+      perf = self.aggregated_query_execution_time(query_name=query_name)
+      exec_status = EdwQueryExecutionStatus.SUCCESSFUL
+      agg_md = self.aggregated_query_metadata(query_name=query_name)
+    query_metadata['execution_status'] = exec_status
+    query_metadata.update(agg_md)
+    return sample.Sample('edw_aggregated_query_time', perf, 'seconds',
+                         query_metadata)
+
+  def get_all_query_performance_samples(self, metadata: Dict[str, str]) -> List[
+      sample.Sample]:
+    """Generates samples for all query performances.
+
+    Benchmark relies on iteration runs to generate the raw query performance
+      samples
+    Benchmark appends the aggregated query performance sample
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples (raw and aggregated)
+    """
+    results = []
+    # Raw query performance samples
+    for iteration, performance in self.iteration_performances.items():
+      iteration_metadata = copy.copy(metadata)
+      iteration_metadata['iteration'] = iteration
+      results.extend(performance.get_all_query_performance_samples(
+          iteration_metadata))
+    # Aggregated query performance samples
+    for query in self.expected_queries:
+      results.append(self.get_aggregated_query_performance_sample(
+          query_name=query, metadata=metadata))
+    return results
+
+  def get_aggregated_wall_time_performance_sample(self,
+                                                  metadata: Dict[str, str]
+                                                  ) -> sample.Sample:
+    """Gets the wall time performance aggregated across all iterations.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of aggregated (averaged) wall time.
+    """
+    wall_times = [
+        iteration.get_wall_time()
+        for iteration in self.iteration_performances.values()
+    ]
+    aggregated_wall_time = sum(wall_times) / self.total_iterations
+    wall_time_metadata = copy.copy(metadata)
+    wall_time_metadata['aggregation_method'] = 'mean'
+    return sample.Sample('edw_aggregated_wall_time', aggregated_wall_time,
+                         'seconds', wall_time_metadata)
+
+  def get_wall_time_performance_samples(self, metadata: Dict[str, str]):
+    """Generates samples for all wall time performances.
+
+    Benchmark relies on iterations to generate the raw wall time performance
+      samples.
+    Benchmark appends the aggregated wall time performance sample
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples (raw and aggregated)
+    """
+    results = []
+
+    for iteration, performance in self.iteration_performances.items():
+      iteration_metadata = copy.copy(metadata)
+      iteration_metadata['iteration'] = iteration
+      results.append(performance.get_wall_time_performance_sample(
+          iteration_metadata))
+    results.append(self.get_aggregated_wall_time_performance_sample(
+        metadata=metadata))
+    return results
+
+  def get_aggregated_geomean_performance_sample(self,
+                                                metadata:
+                                                Dict[str,
+                                                     str]) -> sample.Sample:
+    """Gets the geomean performance aggregated across all iterations.
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A sample of aggregated geomean
+
+    Raises:
+      EdwPerformanceAggregationError: If the benchmark conatins a failed query
+        execution.
+    """
+    if not self.is_successful():
+      raise EdwPerformanceAggregationError('Benchmark contains a failed query.')
+    aggregated_geo_mean = geometric_mean([
+        self.aggregated_query_execution_time(query_name=query)
+        for query in self.expected_queries
+    ])
+
+    geomean_metadata = copy.copy(metadata)
+    geomean_metadata['intra_query_aggregation_method'] = 'mean'
+    geomean_metadata['inter_query_aggregation_method'] = 'geomean'
+    return sample.Sample('edw_aggregated_geomean', aggregated_geo_mean,
+                         'seconds', geomean_metadata)
+
+  def get_queries_geomean_performance_samples(self, metadata: Dict[str, str]
+                                              ) -> List[sample.Sample]:
+    """Generates samples for all geomean performances.
+
+    Benchmark relies on iteration runs to generate the raw geomean performance
+      samples
+    Benchmark appends the aggregated geomean performance sample
+
+    Args:
+      metadata: A dictionary of execution attributes to be merged with the query
+        execution attributes, for eg. tpc suite, scale of dataset, etc.
+
+    Returns:
+      A list of samples (raw and aggregated)
+
+    Raises:
+      EdwPerformanceAggregationError: If the benchmark conatins a failed query
+      execution
+    """
+    if not self.is_successful():
+      raise EdwPerformanceAggregationError('Benchmark contains a failed query.')
+    results = []
+
+    for iteration, performance in self.iteration_performances.items():
+      iteration_metadata = copy.copy(metadata)
+      iteration_metadata['iteration'] = iteration
+      results.append(
+          performance.get_queries_geomean_performance_sample(
+              self.expected_queries, iteration_metadata))
+
+    results.append(self.get_aggregated_geomean_performance_sample(
+        metadata=metadata))
+    return results
diff --git a/script/cumulus/pkb/perfkitbenchmarker/edw_service.py b/script/cumulus/pkb/perfkitbenchmarker/edw_service.py
new file mode 100644
index 0000000..7fb1e42
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/edw_service.py
@@ -0,0 +1,393 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Resource encapsulating provisioned Data Warehouse in the cloud Services.
+
+Classes to wrap specific backend services are in the corresponding provider
+directory as a subclass of BaseEdwService.
+"""
+import os
+from typing import Dict, List, Text, Tuple
+
+from absl import flags
+from perfkitbenchmarker import resource
+
+
+flags.DEFINE_integer('edw_service_cluster_concurrency', 5,
+                     'Number of queries to run concurrently on the cluster.')
+flags.DEFINE_string('edw_service_cluster_snapshot', None,
+                    'If set, the snapshot to restore as cluster.')
+flags.DEFINE_string('edw_service_cluster_identifier', None,
+                    'If set, the preprovisioned edw cluster.')
+flags.DEFINE_string('edw_service_endpoint', None,
+                    'If set, the preprovisioned edw cluster endpoint.')
+flags.DEFINE_string('edw_service_cluster_db', None,
+                    'If set, the db on cluster to use during the benchmark ('
+                    'only applicable when using snapshots).')
+flags.DEFINE_string('edw_service_cluster_user', None,
+                    'If set, the user authorized on cluster (only applicable '
+                    'when using snapshots).')
+flags.DEFINE_string('edw_service_cluster_password', None,
+                    'If set, the password authorized on cluster (only '
+                    'applicable when using snapshots).')
+flags.DEFINE_string('snowflake_snowsql_config_override_file', None,
+                    'The SnowSQL configuration to use.'
+                    'https://docs.snowflake.net/manuals/user-guide/snowsql-config.html#snowsql-config-file')  # pylint: disable=line-too-long
+flags.DEFINE_string('snowflake_connection', None,
+                    'Named Snowflake connection defined in SnowSQL config file.'
+                    'https://docs.snowflake.net/manuals/user-guide/snowsql-start.html#using-named-connections')  # pylint: disable=line-too-long
+flags.DEFINE_integer('edw_suite_iterations', 1, 'Number of suite iterations to perform.')
+# TODO(user): Revisit flags for accepting query lists.
+flags.DEFINE_string('edw_simultaneous_queries',
+                    None, 'CSV list of simultaneous queries to benchmark.')
+flags.DEFINE_integer('edw_simultaneous_query_submission_interval', '0',
+                     'Simultaneous query submission interval in milliseconds.')
+flags.DEFINE_string('edw_power_queries', None,
+                    'CSV list of power queries to benchmark.')
+flags.DEFINE_multi_string(
+    'concurrency_streams', [], 'List of all query streams to execute. Each '
+    'stream should be passed in separately and the queries should be comma '
+    'separated, e.g. --concurrency_streams=1,2,3 --concurrency_streams=3,2,1')
+flags.DEFINE_string('snowflake_warehouse', None,
+                    'A virtual warehouse, often referred to simply as a - '
+                    'warehouse, is a cluster of compute in Snowflake. '
+                    'https://docs.snowflake.com/en/user-guide/warehouses.html')  # pylint: disable=line-too-long
+flags.DEFINE_string(
+    'snowflake_database', None,
+    'The hosted snowflake database to use during the benchmark.')
+flags.DEFINE_string(
+    'snowflake_schema', None,
+    'The schema of the hosted snowflake database to use during the benchmark.')
+flags.DEFINE_enum(
+    'snowflake_client_interface', 'JDBC', ['JDBC'],
+    'The Runtime Interface used when interacting with Snowflake.')
+
+
+FLAGS = flags.FLAGS
+
+
+TYPE_2_PROVIDER = dict([('athena', 'aws'), ('redshift', 'aws'),
+                        ('spectrum', 'aws'), ('snowflake_aws', 'aws'),
+                        ('snowflakeexternal_aws', 'aws'), ('bigquery', 'gcp'),
+                        ('endor', 'gcp'), ('endorazure', 'gcp'),
+                        ('bqfederated', 'gcp'),
+                        ('azuresqldatawarehouse', 'azure')])
+TYPE_2_MODULE = dict([
+    ('athena', 'perfkitbenchmarker.providers.aws.athena'),
+    ('redshift', 'perfkitbenchmarker.providers.aws.redshift'),
+    ('spectrum', 'perfkitbenchmarker.providers.aws.spectrum'),
+    ('snowflake_aws', 'perfkitbenchmarker.providers.aws.snowflake'),
+    ('snowflakeexternal_aws', 'perfkitbenchmarker.providers.aws.snowflake'),
+    ('bigquery', 'perfkitbenchmarker.providers.gcp.bigquery'),
+    ('endor', 'perfkitbenchmarker.providers.gcp.bigquery'),
+    ('endorazure', 'perfkitbenchmarker.providers.gcp.bigquery'),
+    ('bqfederated', 'perfkitbenchmarker.providers.gcp.bigquery'),
+    ('azuresqldatawarehouse', 'perfkitbenchmarker.providers.azure.'
+     'azure_sql_data_warehouse')
+])
+DEFAULT_NUMBER_OF_NODES = 1
+# The order of stages is important to the successful lifecycle completion.
+EDW_SERVICE_LIFECYCLE_STAGES = ['create', 'load', 'query', 'delete']
+SAMPLE_QUERY_PATH = '/tmp/sample.sql'
+SAMPLE_QUERY = 'select * from INFORMATION_SCHEMA.TABLES;'
+
+
+class EdwExecutionError(Exception):
+  """Encapsulates errors encountered during execution of a query."""
+
+
+class EdwClientInterface(object):
+  """Defines the interface for EDW service clients.
+
+  Attributes:
+    client_vm: An instance of virtual_machine.BaseVirtualMachine used to
+      interface with the edw service.
+    whitelist_ip: The IP to whitelist.
+  """
+
+  def __init__(self):
+    self.client_vm = None
+
+    # set by derived classes
+    self.whitelist_ip = None
+
+  def SetProvisionedAttributes(self, benchmark_spec):
+    """Sets any attributes that were unknown during initialization."""
+    self.client_vm = benchmark_spec.vms[0]
+    self.client_vm.RemoteCommand('echo "\nMaxSessions 100" | '
+                                 'sudo tee -a /etc/ssh/sshd_config')
+
+  def Prepare(self, package_name: Text) -> None:
+    """Prepares the client vm to execute query.
+
+    The default implementation raises an Error, to ensure client specific
+    installation and authentication of runner utilities.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    raise NotImplementedError
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    raise NotImplementedError
+
+  def ExecuteSimultaneous(self, submission_interval: int,
+                          queries: List[str]) -> str:
+    """Executes queries simultaneously on client and return performance details.
+
+    Simultaneous app expects queries as white space separated query file names.
+    Response format:
+      {"simultaneous_end":1601145943197,"simultaneous_start":1601145940113,
+      "stream_performance_array":[{"query_wall_time_in_secs":2.079,
+      "query_end":1601145942208,"job_id":"914682d9-4f64-4323-bad2-554267cbbd8d",
+      "query":"1","query_start":1601145940129},{"query_wall_time_in_secs":2.572,
+      "query_end":1601145943192,"job_id":"efbf93a1-614c-4645-a268-e3801ae994f1",
+      "query":"2","query_start":1601145940620}],
+      "simultaneous_wall_time_in_secs":3.084}
+
+    Args:
+      submission_interval: Simultaneous query submission interval in milliseconds.
+      queries: List of string names of the queries to execute simultaneously.
+
+    Returns:
+      performance_details: A serialized dictionary of execution details.
+    """
+    raise NotImplementedError
+
+  def ExecuteThroughput(self, concurrency_streams: List[List[str]]) -> str:
+    """Executes a throughput test and returns performance details.
+
+    Response format:
+      {"throughput_start":1601666911596,"throughput_end":1601666916139,
+        "throughput_wall_time_in_secs":4.543,
+        "all_streams_performance_array":[
+          {"stream_start":1601666911597,"stream_end":1601666916139,
+            "stream_wall_time_in_secs":4.542,
+            "stream_performance_array":[
+              {"query_wall_time_in_secs":2.238,"query_end":1601666913849,
+                "query":"1","query_start":1601666911611,
+                "details":{"job_id":"438170b0-b0cb-4185-b733-94dd05b46b05"}},
+              {"query_wall_time_in_secs":2.285,"query_end":1601666916139,
+                "query":"2","query_start":1601666913854,
+                "details":{"job_id":"371902c7-5964-46f6-9f90-1dd00137d0c8"}}
+              ]},
+          {"stream_start":1601666911597,"stream_end":1601666916018,
+            "stream_wall_time_in_secs":4.421,
+            "stream_performance_array":[
+              {"query_wall_time_in_secs":2.552,"query_end":1601666914163,
+                "query":"2","query_start":1601666911611,
+                "details":{"job_id":"5dcba418-d1a2-4a73-be70-acc20c1f03e6"}},
+              {"query_wall_time_in_secs":1.855,"query_end":1601666916018,
+                "query":"1","query_start":1601666914163,
+                "details":{"job_id":"568c4526-ae26-4e9d-842c-03459c3a216d"}}
+            ]}
+        ]}
+
+    Args:
+      concurrency_streams: List of streams to execute simultaneously, each of
+        which is a list of string names of queries.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    raise NotImplementedError
+
+  def WarmUpQuery(self):
+    """Executes a service-agnostic query that can detect cold start issues."""
+    with open(SAMPLE_QUERY_PATH, 'w+') as f:
+      f.write(SAMPLE_QUERY)
+    self.client_vm.PushFile(SAMPLE_QUERY_PATH)
+    query_name = os.path.basename(SAMPLE_QUERY_PATH)
+    self.ExecuteQuery(query_name)
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Returns the client interface metadata."""
+    raise NotImplementedError
+
+
+class EdwService(resource.BaseResource):
+  """Object representing a EDW Service."""
+
+  def __init__(self, edw_service_spec):
+    """Initialize the edw service object.
+
+    Args:
+      edw_service_spec: spec of the edw service.
+    """
+    # Hand over the actual creation to the resource module, which assumes the
+    # resource is pkb managed by default
+    is_user_managed = self.IsUserManaged(edw_service_spec)
+    # edw_service attribute
+    self.cluster_identifier = self.GetClusterIdentifier(edw_service_spec)
+    super(EdwService, self).__init__(user_managed=is_user_managed)
+
+    # Provision related attributes
+    if edw_service_spec.snapshot:
+      self.snapshot = edw_service_spec.snapshot
+    else:
+      self.snapshot = None
+
+    # Cluster related attributes
+    self.concurrency = edw_service_spec.concurrency
+    self.node_type = edw_service_spec.node_type
+
+    if edw_service_spec.node_count:
+      self.node_count = edw_service_spec.node_count
+    else:
+      self.node_count = DEFAULT_NUMBER_OF_NODES
+
+    # Interaction related attributes
+    if edw_service_spec.endpoint:
+      self.endpoint = edw_service_spec.endpoint
+    else:
+      self.endpoint = ''
+    self.db = edw_service_spec.db
+    self.user = edw_service_spec.user
+    self.password = edw_service_spec.password
+    # resource config attribute
+    self.spec = edw_service_spec
+    # resource workflow management
+    self.supports_wait_on_delete = True
+    self.client_interface = None
+
+  def GetClientInterface(self) -> EdwClientInterface:
+    """Gets the active Client Interface."""
+    return self.client_interface
+
+  def IsUserManaged(self, edw_service_spec):
+    """Indicates if the edw service instance is user managed.
+
+    Args:
+      edw_service_spec: spec of the edw service.
+
+    Returns:
+      A boolean, set to True if the edw service instance is user managed, False
+       otherwise.
+    """
+    return edw_service_spec.cluster_identifier is not None
+
+  def GetClusterIdentifier(self, edw_service_spec):
+    """Returns a string name of the Cluster Identifier.
+
+    Args:
+      edw_service_spec: spec of the edw service.
+
+    Returns:
+      A string, set to the name of the cluster identifier.
+    """
+    if self.IsUserManaged(edw_service_spec):
+      return edw_service_spec.cluster_identifier
+    else:
+      return 'pkb-' + FLAGS.run_uri
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this edw service."""
+    basic_data = {'edw_service_type': self.spec.type,
+                  'edw_cluster_identifier': self.cluster_identifier,
+                  'edw_cluster_node_type': self.node_type,
+                  'edw_cluster_node_count': self.node_count}
+    return basic_data
+
+  def GenerateLifecycleStageScriptName(self, lifecycle_stage):
+    """Computes the default name for script implementing an edw lifecycle stage.
+
+    Args:
+      lifecycle_stage: Stage for which the corresponding sql script is desired.
+
+    Returns:
+      script name for implementing the argument lifecycle_stage.
+    """
+    return os.path.basename(
+        os.path.normpath('database_%s.sql' % lifecycle_stage))
+
+  def Cleanup(self):
+    """Cleans up any temporary resources created for the service."""
+    pass
+
+  def GetDatasetLastUpdatedTime(self, dataset=None):
+    """Get the formatted last modified timestamp of the dataset."""
+    raise NotImplementedError
+
+  def ExtractDataset(self,
+                     dest_bucket,
+                     dataset=None,
+                     tables=None,
+                     dest_format='CSV'):
+    """Extract all tables in a dataset to object storage.
+
+    Args:
+      dest_bucket: Name of the bucket to extract the data to. Should already
+        exist.
+      dataset: Optional name of the dataset. If none, will be determined by the
+        service.
+      tables: Optional list of table names to extract. If none, all tables in
+        the dataset will be extracted.
+      dest_format: Format to extract data in.
+    """
+    raise NotImplementedError
+
+  def RemoveDataset(self, dataset=None):
+    """Removes a dataset.
+
+    Args:
+      dataset: Optional name of the dataset. If none, will be determined by the
+        service.
+    """
+    raise NotImplementedError
+
+  def CreateDataset(self, dataset=None, description=None):
+    """Creates a new dataset.
+
+    Args:
+      dataset: Optional name of the dataset. If none, will be determined by the
+        service.
+      description: Optional description of the dataset.
+    """
+    raise NotImplementedError
+
+  def LoadDataset(self, source_bucket, tables, dataset=None):
+    """Load all tables in a dataset to a database from object storage.
+
+    Args:
+      source_bucket: Name of the bucket to load the data from. Should already
+        exist. Each table must have its own subfolder in the bucket named after
+        the table, containing one or more csv files that make up the table data.
+      tables: List of table names to load.
+      dataset: Optional name of the dataset. If none, will be determined by the
+        service.
+    """
+    raise NotImplementedError
+
+  def RequiresWarmUpSuite(self) -> bool:
+    """Verifies if the edw_service requires a warm up suite execution.
+
+    Currently enabled for all service types, for parity.
+
+    Returns:
+      A boolean value (True) if the warm suite is recommended.
+    """
+    return True
diff --git a/script/cumulus/pkb/perfkitbenchmarker/errors.py b/script/cumulus/pkb/perfkitbenchmarker/errors.py
new file mode 100644
index 0000000..bf0fb52
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/errors.py
@@ -0,0 +1,260 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A common location for all perfkitbenchmarker-defined exceptions."""
+
+import pprint
+
+
+class Error(Exception):
+  pass
+
+
+class Setup(object):
+  """Errors raised in setting up PKB."""
+
+  class PythonPackageRequirementUnfulfilled(Error):
+    """Error raised when a Python package requirement is unfulfilled."""
+    pass
+
+  class MissingExecutableError(Error):
+    """Error raised when we cannot find an executable we need."""
+    pass
+
+  class NoRunURIError(Error):
+    """Error raised when we were not given a run_uri and cannot infer it."""
+    pass
+
+  class BadRunURIError(Error):
+    """Error raised when the given run_uri is invalid."""
+    pass
+
+  class BadPreprovisionedDataError(Error):
+    """Error raised when the pre-provisioned data is invalid."""
+    pass
+
+  class InvalidSetupError(Error):
+    """Error raised when SetUpPKB was not called correctly."""
+    pass
+
+  class InvalidFlagConfigurationError(Error):
+    """Error raised when the set of command line flags is invalid."""
+    pass
+
+  class InvalidConfigurationError(Error):
+    """Error raised when configuration is invalid."""
+    pass
+
+
+class VirtualMachine(object):
+  """Errors raised by virtual_machine.py."""
+
+  class RemoteCommandError(Error):
+    """Error raised when a Remote Command or Remote Copy fails."""
+    pass
+
+  class RemoteExceptionError(Error):
+    pass
+
+  class AuthError(Error):
+    """Error raised when one VM cannot access another VM."""
+    pass
+
+  class VirtualMachineError(Error):
+    """An error raised when VM is having an issue."""
+
+    @classmethod
+    def FromDebugInfo(cls, info, error_message):
+      """Create VirtualMachineError class from debug information.
+
+      Args:
+        info: A dictionary containing debug information (such as traceroute
+            info).
+        error_message: the error message from the originating code.
+
+      Returns:
+        a cls exception class
+
+      Raises:
+        TypeError: if info is not an instance of dictionary.
+      """
+      if isinstance(info, dict):
+        info = VirtualMachine.VirtualMachineError.FormatDebugInfo(
+            info, error_message)
+        return cls(info)
+      raise TypeError('The argument of FromDebugInfo should be an instance '
+                      'of dictionary.')
+
+    @staticmethod
+    def FormatDebugInfo(info, error_message):
+      """A function to return a string in human readable format.
+
+      Args:
+        info: A dictionary containing debug information (such as traceroute
+            info).
+        error_message: the error message from the originating code.
+
+      Returns:
+        A human readable string of debug information.
+      """
+      sep = '\n%s\n' % ('-' * 65)
+
+      def AddHeader(error, header, message):
+        error += '{sep}{header}\n{message}\n'.format(
+            sep=sep, header=header, message=message)
+        return error
+
+      def AddKeyIfExists(result, header, key):
+        if key in info:
+          result = AddHeader(result, header, info[key])
+          del info[key]
+        return result
+
+      result = AddHeader('', 'error_message:',
+                         error_message) if error_message else ''
+      result = AddKeyIfExists(result, 'traceroute:', 'traceroute')
+      return AddHeader(result, 'Debug Info:', pprint.pformat(info))
+
+  class VmStateError(VirtualMachineError):
+    pass
+
+
+class VmUtil(object):
+  """Errors raised by vm_util.py."""
+
+  class RestConnectionError(Error):
+    pass
+
+  class IpParsingError(Error):
+    pass
+
+  class UserSetupError(Error):
+    pass
+
+  class ThreadException(Error):
+    pass
+
+  class CalledProcessException(Error):
+    pass
+
+  class IssueCommandError(Error):
+    pass
+
+  class IssueCommandTimeoutError(Error):
+    pass
+
+
+class Benchmarks(object):
+  """Errors raised by individual benchmark."""
+
+  class BucketCreationError(Error):
+    pass
+
+  class PrepareException(Error):
+    pass
+
+  class MissingObjectCredentialException(Error):
+    pass
+
+  class RunError(Error):
+    pass
+
+  class InsufficientCapacityCloudFailure(Error):
+    pass
+
+  class QuotaFailure(Error):
+    """Errors that are related to insufficient quota on cloud provider."""
+
+    class RateLimitExceededError(Error):
+      pass
+
+  class KnownIntermittentError(Error):
+    """Known intermittent failures of the benchmark.
+
+    These are non-retryable, known failure modes of the benchmark.  It is
+    recommended that the benchmark be completely re-run.
+    """
+
+  class UnsupportedConfigError(Error):
+    """Errors due to an unsupported configuration running."""
+    pass
+
+
+class Resource(object):
+  """Errors related to resource creation and deletion."""
+
+  class CreationError(Error):
+    """An error on creation which is not retryable."""
+    pass
+
+  class CleanupError(Error):
+    pass
+
+  class RetryableCreationError(Error):
+    pass
+
+  class RetryableDeletionError(Error):
+    pass
+
+  class GetError(Error):
+    """An error on get which is not retryable."""
+    pass
+
+  class RetryableGetError(Error):
+    pass
+
+  class UpdateError(Error):
+    """An error on update."""
+    pass
+
+  class SubclassNotFoundError(Error):
+    pass
+
+  class RestoreError(Error):
+    """Errors while restoring a resource."""
+    pass
+
+  class FreezeError(Error):
+    """Errors while freezing a resource."""
+    pass
+
+
+class Config(object):
+  """Errors related to configs."""
+
+  class InvalidValue(Error):
+    """User provided an invalid value for a config option."""
+    pass
+
+  class MissingOption(Error):
+    """User did not provide a value for a required config option."""
+    pass
+
+  class ParseError(Error):
+    """Error raised when a config can't be loaded properly."""
+    pass
+
+  class UnrecognizedOption(Error):
+    """User provided a value for an unrecognized config option."""
+    pass
+
+
+class Juju(object):
+  """Errors related to the Juju OS_TYPE."""
+
+  class TimeoutException(Error):
+    pass
+
+  class UnitErrorException(Error):
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/events.py b/script/cumulus/pkb/perfkitbenchmarker/events.py
new file mode 100644
index 0000000..5d8055c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/events.py
@@ -0,0 +1,182 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Defines observable events in PerfKitBenchmarker.
+
+All events are passed keyword arguments, and possibly a sender. See event
+definitions below.
+
+Event handlers are run synchronously in an unspecified order; any exceptions
+raised will be propagated.
+"""
+import logging
+import os
+
+from absl import flags
+import blinker
+from perfkitbenchmarker import data
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import stages
+
+
+FLAGS = flags.FLAGS
+_events = blinker.Namespace()
+
+
+initialization_complete = _events.signal('system-ready', doc="""
+Signal sent once after the system is initialized (command-line flags
+parsed, temporary directory initialized, run_uri set).
+
+Sender: None
+Payload: parsed_flags, the parsed FLAGS object.""")
+
+provider_imported = _events.signal('provider-imported', doc="""
+Signal sent after a cloud provider's modules have been imported.
+
+Sender: string. Cloud provider name chosen from providers.VALID_CLOUDS.""")
+
+benchmark_start = _events.signal('benchmark-start', doc="""
+Signal sent at the beginning of a benchmark before any resources are
+provisioned.
+
+Sender: None
+Payload: benchmark_spec.""")
+
+on_vm_startup = _events.signal('on-vm-startup', doc="""
+Signal sent on vm startup.
+
+Sender: None
+Payload: vm (VirtualMachine object).""")
+
+
+benchmark_end = _events.signal('benchmark-end', doc="""
+Signal sent at the end of a benchmark after any resources have been
+torn down (if run_stage includes teardown).
+
+Sender: None
+Payload: benchmark_spec.""")
+
+PREPARE_PHASE = 'prepare'
+RUN_PHASE = 'run'
+CLEANUP_PHASE = 'cleanup'
+
+before_phase = _events.signal('before-phase', doc="""
+Signal sent immediately before a phase runs.
+
+Sender: the phase. Currently only RUN_PHASE, PREPARE_PHASE.
+Payload: benchmark_spec.""")
+
+start_trace = _events.signal('start_trace', doc="""
+Signal sent to indicate that traces should begin collecting data
+
+Sender: the phase. Currently only RUN_PHASE
+Payload: benchmark_spec.""")
+
+stop_trace = _events.signal('stop_trace', doc="""
+Signal sent to indicate that traces should stop collecting data
+
+Sender: the phase. Currently only RUN_PHASE
+Payload: benchmark_spec.""")
+
+after_phase = _events.signal('after-phase', doc="""
+Signal sent immediately after a phase runs, regardless of whether it was
+successful.
+
+Sender: the phase. Currently only RUN_PHASE, PREPARE_PHASE.
+Payload: benchmark_spec.""")
+
+samples_created = _events.signal('samples-created', doc="""
+Called with samples list and benchmark spec.
+
+Signal sent immediately after a sample is created.
+The samples' metadata is mutable, and may be updated by the subscriber.
+
+Sender: the phase. Currently only stages.RUN.
+Payload: benchmark_spec (BenchmarkSpec), samples (list of sample.Sample).""")
+
+record_event = _events.signal('record-event', doc="""
+Signal sent when an event is recorded.
+
+Signal sent after an event occurred. Record start, end timestamp and metadata
+of the event for analysis.
+
+Sender: None
+Payload: event (string), start_timestamp (float), end_timestamp (float),
+metadata (dict).""")
+
+
+def RegisterTracingEvents():
+  record_event.connect(AddEvent, weak=False)
+
+
+class TracingEvent(object):
+  """Represents an event object.
+
+  Attributes:
+    sender: string. Name of the sending class/object.
+    event: string. Name of the event.
+    start_timestamp: float. Represents the start timestamp of the event.
+    end_timestamp: float. Represents the end timestamp of the event.
+    metadata: dict. Additional metadata of the event.
+  """
+
+  events = []
+
+  def __init__(self, sender, event, start_timestamp, end_timestamp, metadata):
+    self.sender = sender
+    self.event = event
+    self.start_timestamp = start_timestamp
+    self.end_timestamp = end_timestamp
+    self.metadata = metadata
+
+
+def AddEvent(sender, event, start_timestamp, end_timestamp, metadata):
+  """Record a TracingEvent."""
+  TracingEvent.events.append(
+      TracingEvent(sender, event, start_timestamp, end_timestamp, metadata))
+
+
+@on_vm_startup.connect
+def _RunStartupScript(unused_sender, vm):
+  """Run startup script if necessary."""
+  if FLAGS.startup_script:
+    vm.RemoteCopy(data.ResourcePath(FLAGS.startup_script))
+    vm.startup_script_output = vm.RemoteCommand(
+        './%s' % os.path.basename(FLAGS.startup_script))
+
+
+@samples_created.connect
+def _AddScriptSamples(unused_sender, benchmark_spec, samples):
+  def _ScriptResultToMetadata(out):
+    return {'stdout': out[0], 'stderr': out[1]}
+  for vm in benchmark_spec.vms:
+    if FLAGS.startup_script:
+      samples.append(sample.Sample(
+          'startup', 0, '', _ScriptResultToMetadata(vm.startup_script_output)))
+    if FLAGS.postrun_script:
+      samples.append(sample.Sample(
+          'postrun', 0, '', _ScriptResultToMetadata(vm.postrun_script_output)))
+
+
+@after_phase.connect
+def _RunPostRunScript(sender, benchmark_spec):
+  if sender != stages.RUN:
+    logging.info(
+        'Receive after_phase signal from :%s, not '
+        'triggering _RunPostRunScript.', sender)
+  if FLAGS.postrun_script:
+    for vm in benchmark_spec.vms:
+      vm.RemoteCopy(FLAGS.postrun_script)
+      vm.postrun_script_output = vm.RemoteCommand(
+          './%s' % os.path.basename(FLAGS.postrun_script))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/flag_util.py b/script/cumulus/pkb/perfkitbenchmarker/flag_util.py
new file mode 100644
index 0000000..b6d0515
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/flag_util.py
@@ -0,0 +1,556 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utility functions for working with user-supplied flags."""
+
+
+import logging
+import os
+import re
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import units
+
+import six
+from six.moves import range
+import yaml
+
+FLAGS = flags.FLAGS
+
+INTEGER_GROUP_REGEXP = re.compile(r'(\d+)(-(\d+))?(-(\d+))?$')
+INTEGER_GROUP_REGEXP_COLONS = re.compile(r'(-?\d+)(:(-?\d+))?(:(-?\d+))?$')
+
+
+class IntegerList(object):
+  """An immutable list of nonnegative integers.
+
+  The list contains either single integers (ex: 5) or ranges (ex:
+  8-12). Additionally, the user can provide a step to the range like so:
+  8-24-2. The list can include as many elements as will fit in
+  memory. Furthermore, the memory required to hold a range will not
+  grow with the size of the range.
+
+  Make a list with
+    lst = IntegerList(groups)
+
+  where groups is a list whose elements are either single integers,
+  2-tuples holding the low and high bounds of a range
+  (inclusive), or 3-tuples holding the low and high bounds, followed
+  by the step size. (Ex: [5, (8,12)] represents the integer list
+  5,8,9,10,11,12, and [(8-14-2)] represents the list 8,10,12,14.)
+
+  For negative number ranges use a colon separator (ex: "-2:1" is the integer
+  list -2, -1, 0, 1).
+  """
+
+  def __init__(self, groups):
+    self.groups = groups
+
+    length = 0
+    for elt in groups:
+      if isinstance(elt, six.integer_types):
+        length += 1
+      if isinstance(elt, tuple):
+        length += len(self._CreateXrangeFromTuple(elt))
+
+    self.length = length
+
+  def __len__(self):
+    return self.length
+
+  def __getitem__(self, idx):
+    if not isinstance(idx, int):
+      raise TypeError()
+    if idx < 0 or idx >= self.length:
+      raise IndexError()
+
+    group_idx = 0
+    while idx > 0:
+      group = self.groups[group_idx]
+
+      if not isinstance(group, tuple):
+        group_idx += 1
+        idx -= 1
+      else:
+        group_len = len(self._CreateXrangeFromTuple(group))
+        if idx >= group_len:
+          group_idx += 1
+          idx -= group_len
+        else:
+          step = 1 if len(group) == 2 else group[2]
+          return group[0] + idx * step
+
+    if isinstance(self.groups[group_idx], tuple):
+      return self.groups[group_idx][0]
+    else:
+      return self.groups[group_idx]
+
+  def __eq__(self, other):
+    if other is None:
+      return False
+    return tuple(self) == tuple(other)
+
+  def __ne__(self, other):
+    if other is None:
+      return True
+    return tuple(self) != tuple(other)
+
+  def __iter__(self):
+    for group in self.groups:
+      if isinstance(group, six.integer_types):
+        yield group
+      else:
+        for val in self._CreateXrangeFromTuple(group):
+          yield val
+
+  def __str__(self):
+    return IntegerListSerializer().serialize(self)
+
+  def __repr__(self):
+    return 'IntegerList([%s])' % self
+
+  def _CreateXrangeFromTuple(self, input_tuple):
+    start = input_tuple[0]
+    step = 1 if len(input_tuple) == 2 else input_tuple[2]
+    stop_inclusive = input_tuple[1] + (1 if step > 0 else -1)
+    return range(start, stop_inclusive, step)
+
+
+def _IsNonIncreasing(result, val):
+  """Determines if result would be non-increasing if val is appended.
+
+  Args:
+    result: list integers and/or range tuples.
+    val: integer or range tuple to append.
+  Returns:
+    bool indicating if the appended list is non-increasing.
+  """
+  if result:
+    if isinstance(result[-1], tuple):
+      # extract high from previous tuple
+      prev = result[-1][1]
+    else:
+      # previous is int
+      prev = result[-1]
+    if val <= prev:
+      return True
+  return False
+
+
+class IntegerListParser(flags.ArgumentParser):
+  """Parse a string containing a comma-separated list of nonnegative integers.
+
+  The list may contain single integers and dash-separated ranges. For
+  example, "1,3,5-7" parses to [1,3,5,6,7] and "1-7-3" parses to
+  [1,4,7].
+
+  Can pass the flag on_nonincreasing to the constructor to tell it
+  what to do if the list is nonincreasing. Options are
+    - None: do nothing.
+    - IntegerListParser.WARN: log a warning.
+    - IntegerListParser.EXCEPTION: raise a ValueError.
+
+  As a special case, instead of a string, can pass a list of integers
+  or an IntegerList. In these cases, the return value iterates over
+  the same integers as were in the argument.
+
+  For negative number ranges use a colon separator, for example "-3:4:2" parses
+  to [-3, -1, 1, 3].
+  """
+
+  syntactic_help = ('A comma-separated list of integers or integer '
+                    'ranges. Ex: -1,3,5:7 is read as -1,3,5,6,7.')
+
+  WARN = 'warn'
+  EXCEPTION = 'exception'
+
+  def __init__(self, on_nonincreasing=None):
+    super(IntegerListParser, self).__init__()
+
+    self.on_nonincreasing = on_nonincreasing
+
+  def parse(self, inp):
+    """Parse an integer list.
+
+    Args:
+      inp: a string, a list, or an IntegerList.
+
+    Returns:
+      An iterable of integers.
+
+    Raises:
+      ValueError: if inp doesn't follow a format it recognizes.
+    """
+
+    if isinstance(inp, IntegerList):
+      return inp
+    elif isinstance(inp, list):
+      return IntegerList(inp)
+    elif isinstance(inp, int):
+      return IntegerList([inp])
+
+    def HandleNonIncreasing():
+      if self.on_nonincreasing == IntegerListParser.WARN:
+        logging.warning('Integer list %s is not increasing', inp)
+      elif self.on_nonincreasing == IntegerListParser.EXCEPTION:
+        raise ValueError('Integer list %s is not increasing' % inp)
+
+    groups = inp.split(',')
+    result = []
+
+    for group in groups:
+      match = INTEGER_GROUP_REGEXP.match(
+          group) or INTEGER_GROUP_REGEXP_COLONS.match(group)
+      if match is None:
+        raise ValueError('Invalid integer list %s' % inp)
+      elif match.group(2) is None:
+        val = int(match.group(1))
+
+        if _IsNonIncreasing(result, val):
+          HandleNonIncreasing()
+
+        result.append(val)
+      else:
+        low = int(match.group(1))
+        high = int(match.group(3))
+        step = int(match.group(5)) if match.group(5) is not None else 1
+        step = -step if step > 0 and low > high else step
+
+        if high <= low or (_IsNonIncreasing(result, low)):
+          HandleNonIncreasing()
+
+        result.append((low, high, step))
+
+    return IntegerList(result)
+
+  def flag_type(self):
+    return 'integer list'
+
+
+class IntegerListSerializer(flags.ArgumentSerializer):
+
+  def _SerializeRange(self, val):
+    separator = ':' if any(item < 0 for item in val) else '-'
+    return separator.join(str(item) for item in val)
+
+  def serialize(self, il):
+    return ','.join([str(val) if isinstance(val, six.integer_types)
+                     else self._SerializeRange(val)
+                     for val in il.groups])
+
+
+def DEFINE_integerlist(name, default, help, on_nonincreasing=None,
+                       flag_values=FLAGS, **kwargs):
+  """Register a flag whose value must be an integer list."""
+
+  parser = IntegerListParser(on_nonincreasing=on_nonincreasing)
+  serializer = IntegerListSerializer()
+
+  flags.DEFINE(parser, name, default, help, flag_values, serializer, **kwargs)
+
+
+class OverrideFlags(object):
+  """Context manager that applies any config_dict overrides to flag_values."""
+
+  def __init__(self, flag_values, config_dict):
+    """Initializes an OverrideFlags context manager.
+
+    Args:
+      flag_values: FlagValues that is temporarily modified so that any options
+        in override_dict that are not 'present' in flag_values are applied to
+        flag_values.
+        Upon exit, flag_values will be restored to its original state.
+      config_dict: Merged config flags from the benchmark config and benchmark
+        configuration yaml file.
+    """
+    self._flag_values = flag_values
+    self._config_dict = config_dict
+    self._flags_to_reapply = {}
+
+  def __enter__(self):
+    """Overrides flag_values with options in override_dict."""
+    if not self._config_dict:
+      return
+
+    for key, value in six.iteritems(self._config_dict):
+      if key not in self._flag_values:
+        raise errors.Config.UnrecognizedOption(
+            'Unrecognized option {0}.{1}. Each option within {0} must '
+            'correspond to a valid command-line flag.'.format('flags', key))
+      if not self._flag_values[key].present:
+        self._flags_to_reapply[key] = self._flag_values[key].value
+        try:
+          self._flag_values[key].parse(value)  # Set 'present' to True.
+        except flags.IllegalFlagValueError as e:
+          raise errors.Config.InvalidValue(
+              'Invalid {0}.{1} value: "{2}" (of type "{3}").{4}{5}'.format(
+                  'flags', key, value,
+                  value.__class__.__name__, os.linesep, e))
+
+  def __exit__(self, *unused_args, **unused_kwargs):
+    """Restores flag_values to its original state."""
+    if not self._flags_to_reapply:
+      return
+    for key, value in six.iteritems(self._flags_to_reapply):
+      self._flag_values[key].value = value
+      self._flag_values[key].present = 0
+
+
+class UnitsParser(flags.ArgumentParser):
+  """Parse a flag containing a unit expression.
+
+  Attributes:
+    convertible_to: list of units.Unit instances. A parsed expression must be
+        convertible to at least one of the Units in this list. For example,
+        if the parser requires that its inputs are convertible to bits, then
+        values expressed in KiB and GB are valid, but values expressed in meters
+        are not.
+  """
+
+  syntactic_help = ('A quantity with a unit. Ex: 12.3MB.')
+
+  def __init__(self, convertible_to):
+    """Initialize the UnitsParser.
+
+    Args:
+      convertible_to: Either an individual unit specification or a series of
+          unit specifications, where each unit specification is either a string
+          (e.g. 'byte') or a units.Unit. The parser input must be convertible to
+          at least one of the specified Units, or the parse() method will raise
+          a ValueError.
+    """
+    if isinstance(convertible_to, (six.string_types, units.Unit)):
+      self.convertible_to = [units.Unit(convertible_to)]
+    else:
+      self.convertible_to = [units.Unit(u) for u in convertible_to]
+
+  def parse(self, inp):
+    """Parse the input.
+
+    Args:
+      inp: a string or a units.Quantity. If a string, it has the format
+          "<number><units>", as in "12KB", or "2.5GB".
+
+    Returns:
+      A units.Quantity.
+
+    Raises:
+      ValueError: If the input cannot be parsed, or if it parses to a value with
+          improper units.
+    """
+    if isinstance(inp, units.Quantity):
+      quantity = inp
+    else:
+      try:
+        quantity = units.ParseExpression(inp)
+      except Exception as e:
+        raise ValueError("Couldn't parse unit expression %r: %s" %
+                         (inp, str(e)))
+      if not isinstance(quantity, units.Quantity):
+        raise ValueError('Expression %r evaluates to a unitless value.' % inp)
+
+    for unit in self.convertible_to:
+      try:
+        quantity.to(unit)
+        break
+      except units.DimensionalityError:
+        pass
+    else:
+      raise ValueError(
+          'Expression {0!r} is not convertible to an acceptable unit '
+          '({1}).'.format(inp, ', '.join(str(u) for u in self.convertible_to)))
+
+    return quantity
+
+
+class UnitsSerializer(flags.ArgumentSerializer):
+  def serialize(self, units):
+    return str(units)
+
+
+def DEFINE_units(name, default, help, convertible_to,
+                 flag_values=flags.FLAGS, **kwargs):
+  """Register a flag whose value is a units expression.
+
+  Args:
+    name: string. The name of the flag.
+    default: units.Quantity. The default value.
+    help: string. A help message for the user.
+    convertible_to: Either an individual unit specification or a series of unit
+        specifications, where each unit specification is either a string (e.g.
+        'byte') or a units.Unit. The flag value must be convertible to at least
+        one of the specified Units to be considered valid.
+    flag_values: the absl.flags.FlagValues object to define the flag in.
+  """
+  parser = UnitsParser(convertible_to=convertible_to)
+  serializer = UnitsSerializer()
+  flags.DEFINE(parser, name, default, help, flag_values, serializer, **kwargs)
+
+
+def StringToBytes(string):
+  """Convert an object size, represented as a string, to bytes.
+
+  Args:
+    string: the object size, as a string with a quantity and a unit.
+
+  Returns:
+    an integer. The number of bytes in the size.
+
+  Raises:
+    ValueError, if either the string does not represent an object size
+    or if the size does not contain an integer number of bytes.
+  """
+
+  try:
+    quantity = units.ParseExpression(string)
+  except Exception:
+    # Catching all exceptions is ugly, but we don't know what sort of
+    # exception pint might throw, and we want to turn any of them into
+    # ValueError.
+    raise ValueError("Couldn't parse size %s" % string)
+
+  try:
+    bytes = quantity.m_as(units.byte)
+  except units.DimensionalityError:
+    raise ValueError("Quantity %s is not a size" % string)
+
+  if bytes != int(bytes):
+    raise ValueError("Size %s has a non-integer number (%s) of bytes!" %
+                     (string, bytes))
+
+  if bytes < 0:
+    raise ValueError("Size %s has a negative number of bytes!" % string)
+
+  return int(bytes)
+
+
+def StringToRawPercent(string):
+  """Convert a string to a raw percentage value.
+
+  Args:
+    string: the percentage, with '%' on the end.
+
+  Returns:
+    A floating-point number, holding the percentage value.
+
+  Raises:
+    ValueError, if the string can't be read as a percentage.
+  """
+
+  if len(string) <= 1:
+    raise ValueError("String '%s' too short to be percentage." % string)
+
+  if string[-1] != '%':
+    raise ValueError("Percentage '%s' must end with '%%'" % string)
+
+  # This will raise a ValueError if it can't convert the string to a float.
+  val = float(string[:-1])
+
+  if val < 0.0 or val > 100.0:
+    raise ValueError('Quantity %s is not a valid percentage' % val)
+
+  return val
+
+
+# The YAML flag type is necessary because flags can be read either via
+# the command line or from a config file. If they come from a config
+# file, they will already be parsed as YAML, but if they come from the
+# command line, they will be raw strings. The point of this flag is to
+# guarantee a consistent representation to the rest of the program.
+class YAMLParser(flags.ArgumentParser):
+  """Parse a flag containing YAML."""
+
+  syntactic_help = 'A YAML expression.'
+
+  def parse(self, inp):
+    """Parse the input.
+
+    Args:
+      inp: A string or the result of yaml.safe_load. If a string, should be
+           a valid YAML document.
+    """
+
+    if isinstance(inp, six.string_types):
+      # This will work unless the user writes a config with a quoted
+      # string that, if unquoted, would be parsed as a non-string
+      # Python type (example: '123'). In that case, the first
+      # yaml.safe_load() in the config system will strip away the quotation
+      # marks, and this second yaml.safe_load() will parse it as the
+      # non-string type. However, I think this is the best we can do
+      # without significant changes to the config system, and the
+      # problem is unlikely to occur in PKB.
+      try:
+        return yaml.safe_load(inp)
+      except yaml.YAMLError as e:
+        raise ValueError("Couldn't parse YAML string '%s': %s" %
+                         (inp, str(e)))
+    else:
+      return inp
+
+
+class YAMLSerializer(flags.ArgumentSerializer):
+
+  def serialize(self, val):
+    return yaml.dump(val)
+
+
+def DEFINE_yaml(name, default, help, flag_values=flags.FLAGS, **kwargs):
+  """Register a flag whose value is a YAML expression.
+
+  Args:
+    name: string. The name of the flag.
+    default: object. The default value of the flag.
+    help: string. A help message for the user.
+    flag_values: the absl.flags.FlagValues object to define the flag in.
+    kwargs: extra arguments to pass to absl.flags.DEFINE().
+  """
+
+  parser = YAMLParser()
+  serializer = YAMLSerializer()
+
+  flags.DEFINE(parser, name, default, help, flag_values, serializer, **kwargs)
+
+
+def ParseKeyValuePairs(strings):
+  """Parses colon separated key value pairs from a list of strings.
+
+  Pairs should be separated by a comma and key and value by a colon, e.g.,
+  ['k1:v1', 'k2:v2,k3:v3'].
+
+  Args:
+    strings: A list of strings.
+
+  Returns:
+    A dict populated with keys and values from the flag.
+  """
+  pairs = {}
+  for pair in [kv for s in strings for kv in s.split(',')]:
+    try:
+      key, value = pair.split(':', 1)
+      pairs[key] = value
+    except ValueError:
+      logging.error('Bad key value pair format. Skipping "%s".', pair)
+      continue
+
+  return pairs
+
+
+def GetProvidedCommandLineFlags():
+  """Return flag names and values that were specified on the command line.
+
+  Returns:
+    A dictionary of provided flags in the form: {flag_name: flag_value}.
+  """
+  return {k: FLAGS[k].value for k in FLAGS if FLAGS[k].present}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/hpc_util.py b/script/cumulus/pkb/perfkitbenchmarker/hpc_util.py
new file mode 100644
index 0000000..0dd6f04
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/hpc_util.py
@@ -0,0 +1,63 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""HPC utility functions."""
+
+from absl import flags
+from perfkitbenchmarker import vm_util
+
+flags.DEFINE_boolean('mpirun_allow_run_as_root', False,
+                     'Whether to allow mpirun to be run by the root user.')
+
+
+def CreateMachineFile(vms,
+                      num_slots=lambda vm: vm.NumCpusForBenchmark(),
+                      remote_path='MACHINEFILE',
+                      mpi_vendor='openmpi'):
+  """Create a file with the IP of each machine in the cluster on its own line.
+
+  The file is then pushed to the provided path on the master vm.
+
+  Pass in "num_slots=lambda vm: 0" to create a machine file without a defined
+  number of slots.
+
+  OpenMPI's format: "<host> slots=<slots>"
+    https://www.open-mpi.org/faq/?category=running#mpirun-hostfile
+  IntelMPI's format: "<host>:<slots>"
+    https://software.intel.com/content/www/us/en/develop/articles/controlling-process-placement-with-the-intel-mpi-library.html
+
+  Args:
+    vms: The list of vms which will be in the cluster.
+    num_slots: The function to use to calculate the number of slots
+      for each vm. Defaults to vm.NumCpusForBenchmark()
+    remote_path: remote path of the machine file. Defaults to MACHINEFILE
+    mpi_vendor: Implementation of MPI.  Can be openmpi or intel.
+  """
+
+  def Line(vm, vm_name=None):
+    vm_name = vm_name or vm.internal_ip
+    slots = num_slots(vm)
+    if not slots:
+      return vm_name
+    if mpi_vendor == 'intel':
+      return f'{vm_name}:{slots}'
+    return f'{vm_name} slots={slots}'
+
+  with vm_util.NamedTemporaryFile(mode='w') as machine_file:
+    master_vm = vms[0]
+    machine_file.write(Line(master_vm, 'localhost') + '\n')
+    for vm in vms[1:]:
+      machine_file.write(Line(vm) + '\n')
+    machine_file.close()
+    master_vm.PushFile(machine_file.name, remote_path)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/import_util.py b/script/cumulus/pkb/perfkitbenchmarker/import_util.py
new file mode 100644
index 0000000..197462a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/import_util.py
@@ -0,0 +1,43 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for dynamically importing python files."""
+
+import importlib
+import pkgutil
+
+
+def LoadModulesForPath(path, package_prefix=None):
+  """Load all modules on 'path', with prefix 'package_prefix'.
+
+  Example usage:
+    LoadModulesForPath(__path__, __name__)
+
+  Args:
+    path: Path containing python modules.
+    package_prefix: prefix (e.g., package name) to prefix all modules.
+      'path' and 'package_prefix' will be joined with a '.'.
+  Yields:
+    Imported modules.
+  """
+  prefix = package_prefix + '.' if package_prefix else ''
+  # If iter_modules is invoked within a zip file, the zipimporter adds the
+  # prefix to the names of archived modules, but not archived packages. Because
+  # the prefix is necessary to correctly import a package, this behavior is
+  # undesirable, so do not pass the prefix to iter_modules. Instead, apply it
+  # explicitly afterward.
+  for _, modname, _ in pkgutil.iter_modules(path):
+    # Skip recursively listed modules (e.g. 'subpackage.module').
+    if '.' not in modname:
+      yield importlib.import_module(prefix + modname)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/kubernetes_helper.py b/script/cumulus/pkb/perfkitbenchmarker/kubernetes_helper.py
new file mode 100644
index 0000000..d8711c0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/kubernetes_helper.py
@@ -0,0 +1,112 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import tempfile
+import time
+
+from absl import flags
+import jinja2
+from perfkitbenchmarker import data
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+flags.DEFINE_integer('k8s_get_retry_count', 18,
+                     'Maximum number of waits for getting LoadBalancer external IP')
+flags.DEFINE_integer('k8s_get_wait_interval', 10,
+                     'Wait interval for getting LoadBalancer external IP')
+
+
+def checkKubernetesFlags():
+  if not FLAGS.kubectl:
+    raise Exception('Please provide path to kubectl tool using --kubectl '
+                    'flag. Exiting.')
+  if not FLAGS.kubeconfig:
+    raise Exception('Please provide path to kubeconfig using --kubeconfig '
+                    'flag. Exiting.')
+
+
+def CreateFromFile(file_name):
+  checkKubernetesFlags()
+  create_cmd = [FLAGS.kubectl, '--kubeconfig=%s' % FLAGS.kubeconfig, 'create',
+                '-f', file_name]
+  vm_util.IssueRetryableCommand(create_cmd)
+
+
+def DeleteFromFile(file_name):
+  checkKubernetesFlags()
+  delete_cmd = [FLAGS.kubectl, '--kubeconfig=%s' % FLAGS.kubeconfig, 'delete',
+                '-f', file_name, '--ignore-not-found']
+  vm_util.IssueRetryableCommand(delete_cmd)
+
+
+def DeleteAllFiles(file_list):
+  for file in file_list:
+    DeleteFromFile(file)
+
+
+def CreateAllFiles(file_list):
+  for file in file_list:
+    CreateFromFile(file)
+
+
+def Get(resource, resourceInstanceName, labelFilter, jsonSelector):
+  checkKubernetesFlags()
+  get_pod_cmd = [FLAGS.kubectl, '--kubeconfig=%s' % FLAGS.kubeconfig,
+                 'get', resource]
+  if len(resourceInstanceName) > 0:
+    get_pod_cmd.append(resourceInstanceName)
+  if len(labelFilter) > 0:
+    get_pod_cmd.append('-l ' + labelFilter)
+  get_pod_cmd.append('-ojsonpath={{{}}}'.format(jsonSelector))
+  stdout, stderr, _ = vm_util.IssueCommand(get_pod_cmd, suppress_warning=True,
+                                           raise_on_failure=False)
+  if len(stderr) > 0:
+    raise Exception("Error received from kubectl get: " + stderr)
+  return stdout
+
+
+def GetWithWaitForContents(resource, resourceInstanceName, filter, jsonFilter):
+  ret = Get(resource, resourceInstanceName, filter, jsonFilter)
+  numWaitsLeft = FLAGS.k8s_get_retry_count
+  while len(ret) == 0 and numWaitsLeft > 0:
+    time.sleep(FLAGS.k8s_get_wait_interval)
+    ret = Get(resource, resourceInstanceName, filter, jsonFilter)
+    numWaitsLeft -= 1
+  return ret
+
+
+def CreateResource(resource_body):
+  with vm_util.NamedTemporaryFile(mode='w') as tf:
+    tf.write(resource_body)
+    tf.close()
+    CreateFromFile(tf.name)
+
+
+def DeleteResource(resource_body):
+  with vm_util.NamedTemporaryFile() as tf:
+    tf.write(resource_body)
+    tf.close()
+    DeleteFromFile(tf.name)
+
+
+def CreateRenderedManifestFile(filename, config):
+  """Returns a file containing a rendered Jinja manifest (.j2) template."""
+  manifest_filename = data.ResourcePath(filename)
+  environment = jinja2.Environment(undefined=jinja2.StrictUndefined)
+  with open(manifest_filename) as manifest_file:
+    manifest_template = environment.from_string(manifest_file.read())
+  rendered_yaml = tempfile.NamedTemporaryFile(mode='w')
+  rendered_yaml.write(manifest_template.render(config))
+  rendered_yaml.flush()
+  return rendered_yaml
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/__init__.py
new file mode 100644
index 0000000..4b29b83
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/__init__.py
@@ -0,0 +1,35 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains benchmark imports and a list of benchmarks.
+
+All modules within this package are considered benchmarks, and are loaded
+dynamically. Add non-benchmark code to other packages.
+"""
+
+import os
+from perfkitbenchmarker import import_util
+
+
+def _LoadBenchmarks():
+  __path__.append(os.path.join(__path__[0], "intel_community"))
+  return list(import_util.LoadModulesForPath(__path__, __name__))
+
+BENCHMARKS = _LoadBenchmarks()
+
+VALID_BENCHMARKS = {}
+for module in BENCHMARKS:
+  if module.BENCHMARK_NAME in VALID_BENCHMARKS:
+    raise ValueError('There are multiple benchmarks with BENCHMARK_NAME "%s"' %
+                     (module.BENCHMARK_NAME))
+  VALID_BENCHMARKS[module.BENCHMARK_NAME] = module
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
new file mode 100644
index 0000000..164e7cd
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/cluster_boot_benchmark.py
@@ -0,0 +1,252 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Records the time required to boot a cluster of VMs."""
+
+import logging
+import time
+from typing import List
+from absl import flags
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+
+BENCHMARK_NAME = 'cluster_boot'
+BENCHMARK_CONFIG = """
+cluster_boot:
+  description: >
+      Create a cluster, record all times to boot.
+      Specify the cluster size with --num_vms.
+  vm_groups:
+    default:
+      vm_spec:
+        AWS:
+          machine_type: m5.large
+          zone: us-east-1
+        Azure:
+          machine_type: Standard_D2s_v3
+          zone: eastus
+          boot_disk_type: StandardSSD_LRS
+        GCP:
+          machine_type: n1-standard-2
+          zone: us-central1-a
+          boot_disk_type: pd-ssd
+        IBMCloud:
+          machine_type: cx2-2x4
+          zone: us-south-1
+        Kubernetes:
+          image: null
+        OpenStack:
+          machine_type: t1.small
+          zone: nova
+      vm_count: null
+  flags:
+    # We don't want boot time samples to be affected from retrying, so don't
+    # retry cluster_boot when rate limited.
+    retry_on_rate_limited: False
+"""
+
+flags.DEFINE_boolean(
+    'cluster_boot_time_reboot', False,
+    'Whether to reboot the VMs during the cluster boot benchmark to measure '
+    'reboot performance.')
+flags.DEFINE_boolean(
+    'cluster_boot_test_port_listening', False,
+    'Test the time it takes to successfully connect to the port that is used to run the remote command.'
+)
+FLAGS = flags.FLAGS
+
+
+def GetConfig(user_config):
+  return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def Prepare(unused_benchmark_spec):
+  pass
+
+
+def GetTimeToBoot(vms):
+  """Creates Samples for the boot time of a list of VMs.
+
+  The boot time is the time difference from before the VM is created to when
+  the VM is responsive to SSH commands.
+
+  Args:
+    vms: List of BaseVirtualMachine subclasses.
+
+  Returns:
+    List of Samples containing the boot times and an overall cluster boot time.
+  """
+  if not vms:
+    return []
+
+  min_create_start_time = min(vm.create_start_time for vm in vms)
+
+  max_create_delay_sec = 0
+  max_boot_time_sec = 0
+  max_port_listening_time_sec = 0
+  max_rdp_port_listening_time_sec = 0
+  samples = []
+  os_types = set()
+  for i, vm in enumerate(vms):
+    assert vm.bootable_time
+    assert vm.create_start_time
+    assert vm.bootable_time >= vm.create_start_time
+    os_types.add(vm.OS_TYPE)
+    create_delay_sec = vm.create_start_time - min_create_start_time
+    max_create_delay_sec = max(max_create_delay_sec, create_delay_sec)
+    metadata = {
+        'machine_instance': i,
+        'num_vms': len(vms),
+        'os_type': vm.OS_TYPE,
+        'create_delay_sec': '%0.1f' % create_delay_sec
+    }
+    boot_time_sec = vm.bootable_time - min_create_start_time
+    max_boot_time_sec = max(max_boot_time_sec, boot_time_sec)
+    samples.append(
+        sample.Sample('Boot Time', boot_time_sec, 'seconds', metadata))
+    if FLAGS.cluster_boot_test_port_listening:
+      assert vm.port_listening_time
+      assert vm.port_listening_time >= vm.create_start_time
+      port_listening_time_sec = vm.port_listening_time - min_create_start_time
+      max_port_listening_time_sec = max(max_port_listening_time_sec,
+                                        port_listening_time_sec)
+      samples.append(
+          sample.Sample('Port Listening Time', port_listening_time_sec,
+                        'seconds', metadata))
+    # TODO(user): refactor so Windows specifics aren't in linux_benchmarks
+    if FLAGS.cluster_boot_test_rdp_port_listening:
+      assert vm.rdp_port_listening_time
+      assert vm.rdp_port_listening_time >= vm.create_start_time
+      rdp_port_listening_time_sec = (
+          vm.rdp_port_listening_time - min_create_start_time)
+      max_rdp_port_listening_time_sec = max(max_rdp_port_listening_time_sec,
+                                            rdp_port_listening_time_sec)
+      samples.append(
+          sample.Sample('RDP Port Listening Time', rdp_port_listening_time_sec,
+                        'seconds', metadata))
+
+  # Add a total cluster boot sample as the maximum boot time.
+  metadata = {
+      'num_vms': len(vms),
+      'os_type': ','.join(sorted(os_types)),
+      'max_create_delay_sec': '%0.1f' % max_create_delay_sec
+  }
+  samples.append(
+      sample.Sample('Cluster Boot Time', max_boot_time_sec, 'seconds',
+                    metadata))
+  if FLAGS.cluster_boot_test_port_listening:
+    samples.append(
+        sample.Sample('Cluster Port Listening Time',
+                      max_port_listening_time_sec, 'seconds', metadata))
+  if FLAGS.cluster_boot_test_rdp_port_listening:
+    samples.append(
+        sample.Sample('Cluster RDP Port Listening Time',
+                      max_rdp_port_listening_time_sec, 'seconds', metadata))
+  if max_create_delay_sec > 1:
+    logging.warning(
+        'The maximum delay between starting VM creations is %0.1fs.',
+        max_create_delay_sec)
+
+  return samples
+
+
+def _MeasureReboot(vms):
+  """Measures the time to reboot the cluster of VMs.
+
+  Args:
+    vms: List of BaseVirtualMachine subclasses.
+
+  Returns:
+    List of Samples containing the reboot times and an overall cluster reboot
+    time.
+  """
+  before_reboot_timestamp = time.time()
+  reboot_times = vm_util.RunThreaded(lambda vm: vm.Reboot(), vms)
+  cluster_reboot_time = time.time() - before_reboot_timestamp
+  return _GetVmOperationDataSamples(reboot_times, cluster_reboot_time, 'Reboot',
+                                    vms)
+
+
+def MeasureDelete(
+    vms: List[virtual_machine.BaseVirtualMachine]) -> List[sample.Sample]:
+  """Measures the time to delete the cluster of VMs.
+
+  Args:
+    vms: List of BaseVirtualMachine subclasses.
+
+  Returns:
+    List of Samples containing the delete times and an overall cluster delete
+    time.
+  """
+  before_delete_timestamp = time.time()
+  vm_util.RunThreaded(lambda vm: vm.Delete(), vms)
+  delete_times = [vm.delete_end_time - vm.delete_start_time for vm in vms]
+  max_delete_end_time = max([vm.delete_end_time for vm in vms])
+  cluster_delete_time = max_delete_end_time - before_delete_timestamp
+  return _GetVmOperationDataSamples(delete_times, cluster_delete_time, 'Delete',
+                                    vms)
+
+
+def _GetVmOperationDataSamples(
+    operation_times: List[int], cluster_time: int, operation: str,
+    vms: List[virtual_machine.BaseVirtualMachine]) -> List[sample.Sample]:
+  """Append samples from given data.
+
+  Args:
+    operation_times: The list of times for each vms.
+    cluster_time: The cluster time for the benchmark.
+    operation: The benchmark operation being run, capitalized with no spaces.
+    vms: list of virtual machines.
+
+  Returns:
+    List of samples constructed from data.
+  """
+  samples = []
+  metadata_list = []
+  for i, vm in enumerate(vms):
+    metadata = {
+        'machine_instance': i,
+        'num_vms': len(vms),
+        'os_type': vm.OS_TYPE
+    }
+    metadata_list.append(metadata)
+  for operation_time, metadata in zip(operation_times, metadata_list):
+    samples.append(
+        sample.Sample(f'{operation} Time', operation_time, 'seconds', metadata))
+  os_types = set([vm.OS_TYPE for vm in vms])
+  metadata = {'num_vms': len(vms), 'os_type': ','.join(sorted(os_types))}
+  samples.append(
+      sample.Sample(f'Cluster {operation} Time', cluster_time, 'seconds',
+                    metadata))
+  return samples
+
+
+def Run(benchmark_spec):
+  """Measure the boot time for all VMs.
+
+  Args:
+    benchmark_spec: The benchmark specification.
+
+  Returns:
+    An empty list (all boot samples will be added later).
+  """
+  samples = []
+  if FLAGS.cluster_boot_time_reboot:
+    samples.extend(_MeasureReboot(benchmark_spec.vms))
+  return samples
+
+
+def Cleanup(unused_benchmark_spec):
+  pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/docker_passthrough.py b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/docker_passthrough.py
new file mode 100644
index 0000000..5ca5336
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_benchmarks/docker_passthrough.py
@@ -0,0 +1,851 @@
+from absl import flags
+from perfkitbenchmarker import configs, events, stages, vm_util, sample
+from perfkitbenchmarker.linux_packages import k8s
+from perfkitbenchmarker.linux_packages import skopeo
+try:
+  from perfkitbenchmarker.linux_packages import archived_images
+except:
+  archived_images = None
+from perfkitbenchmarker.linux_packages import habana
+from perfkitbenchmarker.linux_packages import runwith
+from perfkitbenchmarker.linux_packages import docker_ce
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+from perfkitbenchmarker.linux_packages import docker_auth
+from posixpath import join
+from uuid import uuid4
+from yaml import safe_load_all, dump_all
+import logging
+import time
+import os
+
+BENCHMARK_NAME = "docker_pt"
+BENCHMARK_CONFIG = """
+docker_pt:
+  description: Docker Passthrough Benchmark
+  vm_groups: {}
+  flags:
+    dpt_docker_image: ""
+    dpt_docker_dataset: []
+    dpt_docker_options: ""
+    dpt_kubernetes_yaml: ""
+    dpt_kubernetes_job: ""
+    dpt_namespace: ""
+    dpt_logs_dir: ""
+    dpt_timeout: "300"
+    dpt_name: ""
+    dpt_script_args: ""
+    dpt_cluster_yaml: ""
+    dpt_registry_map: []
+    dpt_trace_mode: []
+    dpt_params: ""
+    dpt_tunables: ""
+    dpt_debug: []
+    dpt_vm_groups: "worker"
+    dpt_reuse_sut: false
+"""
+
+run_seq = 0
+SUT_VM_CTR = "controller"
+KUBERNETES_CONFIG = "kubernetes_config.yaml"
+HUGEPAGE_NR = "/sys/kernel/mm/hugepages/hugepages-{}/nr_hugepages"
+LOGS_TARFILE = "{}.tar"
+ITERATION_DIR = "itr-{}"
+KPISH = "kpi.sh"
+SF_NS_LABEL = "cn-benchmarking.intel.com/sf_namespace=true"
+EXPORT_LOGS_TARFILE = "~/export-logs.tar"
+KUBELET_CONFIG = "kubelet-config.yaml"
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("dpt_name", "", "Benchmark name")
+flags.DEFINE_list("dpt_script_args", [], "The KPI and setup script args")
+flags.DEFINE_string("dpt_docker_image", "", "Docker image name")
+flags.DEFINE_list("dpt_docker_dataset", [], "Docker dataset images")
+flags.DEFINE_string("dpt_docker_options", "", "Docker run options")
+flags.DEFINE_string("dpt_kubernetes_yaml", "", "Kubernetes run yaml file")
+flags.DEFINE_string("dpt_kubernetes_job", "benchmark", "Benchmark job name")
+flags.DEFINE_string("dpt_namespace", str(uuid4()), "namespace")
+flags.DEFINE_string("dpt_logs_dir", "", "The logs directory")
+flags.DEFINE_string("dpt_timeout", "300", "Execution timeout")
+flags.DEFINE_string("dpt_cluster_yaml", "", "The cluster configuration file")
+flags.DEFINE_string("dpt_params", "", "The workload configuration parameters")
+flags.DEFINE_string("dpt_tunables", "", "The workload tunable configuration parameters")
+flags.DEFINE_list("dpt_registry_map", [], "Replace the registries")
+flags.DEFINE_list("dpt_vm_groups", ["worker"], "Define the mapping of cluster-config groups to vm_groups")
+flags.DEFINE_list("dpt_debug", [], "Set debug breakpoints")
+flags.DEFINE_list("dpt_trace_mode", [], "Specify the trace mode triple")
+flags.DEFINE_boolean("dpt_reuse_sut", False, "Enable when IWOS running in cloud")
+
+
+def _GetTempDir():
+  return join(INSTALL_DIR, FLAGS.dpt_namespace)
+
+
+def _MakeTempDir(vm):
+  tmp_dir = _GetTempDir()
+  vm.RemoteCommand("sudo mkdir -p {0} && bash -c 'sudo chown $(id -u):$(id -g) {0}'".format(tmp_dir))
+  return tmp_dir
+
+
+def _SetBreakPoint(breakpoint):
+  if breakpoint in FLAGS.dpt_debug:
+    try:
+      logging.info("Pause for debugging at %s", breakpoint)
+      while not os.path.exists(join(vm_util.GetTempDir(), "Resume" + breakpoint)):
+        time.sleep(5)
+    except:
+      pass
+    logging.info("Resume after debugging %s", breakpoint)
+
+
+def _FormatKPI(line):
+  key, _, value = line.rpartition(":")
+  key = key.strip()
+  value = float(value.strip())
+  if key.endswith(")"):
+    key, _, unit = key.rpartition("(")
+    unit = unit[0:-1].strip()
+    key = key.strip()
+  else:
+    unit = "-"
+  return key, value, unit
+
+
+def _ParseKPI(metadata):
+  cmd = "cd {} && ./{} {}".format(join(FLAGS.dpt_logs_dir, ITERATION_DIR.format(run_seq)), KPISH, " ".join(FLAGS.dpt_script_args))
+  stdout, _, retcode = vm_util.IssueCommand(["sh", "-c", cmd])
+
+  samples = []
+  for line in stdout.split("\n"):
+    if line.startswith("##"):
+        k, _, v = line[2:].rpartition(":")
+        k = k.strip()
+        v = v.strip()
+        if k and v:
+          metadata[k]=v
+    else:
+      try:
+        k, v, u = _FormatKPI(line)
+        if k.startswith("*"):
+          samples.append(sample.Sample(k[1:], v, u, {"primary_sample": True}))
+        elif not k.startswith("#"):
+          samples.append(sample.Sample(k, v, u))
+      except Exception:
+        pass
+  if len(samples) == 1:
+    samples[0].metadata["primary_sample"] = True
+  return samples
+
+
+def GetConfig(user_config):
+  return configs.LoadConfig(BENCHMARK_CONFIG, user_config, BENCHMARK_NAME)
+
+
+def CheckPrerequisites(benchmark_config):
+  pass
+
+
+def _ReplaceImage(image):
+  if FLAGS.dpt_registry_map:
+    if FLAGS.dpt_registry_map[0]:
+      return image.replace(FLAGS.dpt_registry_map[0], FLAGS.dpt_registry_map[1])
+    return FLAGS.dpt_registry_map[1] + image
+  return image
+
+
+def _WalkTo(node, name):
+  try:
+    if name in node:
+      return node
+    for item1 in node:
+      node1 = _WalkTo(node[item1], name)
+      if node1:
+        return node1
+  except Exception:
+    pass
+  return None
+
+
+def _GetNodes(controller0):
+  nodes = {}
+  stdout, _ = controller0.RemoteCommand(
+      "kubectl get nodes -o='custom-columns=name:.metadata.name,ip:.status.addresses[?(@.type==\"InternalIP\")].address' --no-headers")
+  for line in stdout.split("\n"):
+    fields = line.strip().split(" ")
+    if fields[-1]:
+      nodes[fields[-1]] = fields[0]
+  return nodes
+
+
+def _WriteKubeletConfigFile(options):
+  kc = [{
+    "apiVersion": "kubelet.config.k8s.io/v1beta1",
+    "kind": "KubeletConfiguration",
+  }]
+  kc[0].update(options)
+
+  kcf = f"{FLAGS.dpt_logs_dir}/{KUBELET_CONFIG}"
+  with open(kcf, "w") as fd:
+    dump_all(kc, fd)
+  return kcf
+
+
+def _ParseClusterConfigs(vm_groups, nodes={}):
+  vimages = []
+  workers = []
+  nidx = {}
+  options = []
+
+  with open(FLAGS.dpt_cluster_yaml, "rt") as fd:
+    for doc in safe_load_all(fd):
+      if "cluster" in doc:
+        for i, cluster1 in enumerate(doc["cluster"]):
+          name = FLAGS.dpt_vm_groups[i % len(FLAGS.dpt_vm_groups)]
+          if name not in nidx:
+            nidx[name] = 0
+          worker1 = {
+              "vm": vm_groups[name][nidx[name] % len(vm_groups[name])],
+              "labels": cluster1["labels"],
+          }
+          worker1["labels"]["VM_GROUP_" + name.upper()] = "required"
+          internal_ip = worker1["vm"].internal_ip
+          try:
+            worker1["name"] = nodes[internal_ip]
+          except Exception:
+            worker1["name"] = internal_ip
+          nidx[name] = nidx[name] + 1
+          workers.append(worker1)
+      if "vm" in doc:
+        for vim1 in doc["vm"]:
+          for vm10 in vm_groups[vim1["name"]]:
+            vimages.append({
+                "name": vim1["name"],
+                "vm": vm10,
+                "env": vim1["env"],
+            })
+            if "setup" in vim1:
+              vimages[-1]["setup"] = vim1["setup"]
+            options.extend(["-e", "{}={}".format(vim1["env"], vm10.internal_ip)])
+      if "kubernetes" in doc:
+        dock8s = doc["kubernetes"]
+        if "cni" in dock8s:
+          FLAGS.k8s_cni = dock8s["cni"]
+        if "cni-options" in dock8s:
+          FLAGS.k8s_cni_options = dock8s["cni-options"]
+        if "kubevirt" in dock8s:
+          FLAGS.k8s_kubevirt = dock8s["kubevirt"]
+        if "kubelet-options" in dock8s:
+          option = "--config=" + _WriteKubeletConfigFile(dock8s["kubelet-options"])
+          if option not in FLAGS.k8s_kubeadm_options:
+            FLAGS.k8s_kubeadm_options.append(option)
+
+  return workers, vimages, options
+
+
+def _AddNodeAffinity(spec, workers):
+  if "affinity" not in spec:
+    spec["affinity"] = {}
+
+  if "nodeAffinity" not in spec["affinity"]:
+    spec["affinity"]["nodeAffinity"] = {}
+
+  if "requiredDuringSchedulingIgnoredDuringExecution" not in spec["affinity"]["nodeAffinity"]:
+    spec["affinity"]["nodeAffinity"]["requiredDuringSchedulingIgnoredDuringExecution"] = {}
+
+  if "nodeSelectorTerms" not in spec["affinity"]["nodeAffinity"]["requiredDuringSchedulingIgnoredDuringExecution"]:
+    spec["affinity"]["nodeAffinity"]["requiredDuringSchedulingIgnoredDuringExecution"]["nodeSelectorTerms"] = []
+
+  spec["affinity"]["nodeAffinity"]["requiredDuringSchedulingIgnoredDuringExecution"]["nodeSelectorTerms"].append({
+      "matchExpressions": [{
+          "key": "kubernetes.io/hostname",
+          "operator": "In",
+          "values": [x["name"] for x in workers],
+      }],
+  })
+
+
+def _ModifyImageRef(spec, images, ifcloud):
+  for c1 in spec:
+    if c1["image"] in images:
+      c1["image"] = FLAGS.dpt_registry_map[1] + images[c1["image"]][0]
+      if ifcloud:
+        c1["imagePullPolicy"] = "IfNotPresent"
+
+
+def _ScanImages(docs):
+  images = {}
+  for doc in docs:
+    for c1 in ["containers", "initContainers"]:
+      spec = _WalkTo(doc, c1)
+      if spec and spec[c1]:
+        for c2 in spec[c1]:
+          if "image" in c2:
+            images[c2["image"]] = 1
+
+  registry_url = FLAGS.dpt_registry_map[0] if FLAGS.dpt_registry_map else ""
+  priv_images = {}
+  for image in images:
+    attr = skopeo.InspectImage(image, registry_url)
+    if attr:
+      priv_images[image] = attr
+  return priv_images
+
+
+def _ModifyEnvs(spec, vimages, workers):
+  if vimages:
+    for c1 in spec:
+      if "env" not in c1:
+        c1["env"] = []
+      elif not c1["env"]:
+        c1["env"] = []
+      for vm1 in vimages:
+        c1["env"].append({
+            "name": vm1["env"],
+            "value": vm1["vm"].internal_ip,
+        })
+  for c1 in spec:
+    if "env" in c1:
+      if isinstance(c1["env"], list):
+        for e1 in c1["env"]:
+          if "name" in e1 and "value" in e1:
+            if e1["name"] == "CLUSTER_WORKERS" and e1["value"] == "":
+              e1["value"] = ",".join([w["name"] for w in workers])
+
+
+def _PullImages(vm, images):
+  registry_url = FLAGS.dpt_registry_map[0] if FLAGS.dpt_registry_map else ""
+  priv_images = {}
+  for image in images:
+    attr = skopeo.InspectImage(image, registry_url)
+    if attr:
+      priv_images[image] = attr
+  if priv_images:
+    priv_images_remaining = archived_images.CopyImagesToDocker(vm, priv_images) if archived_images else priv_images
+    if priv_images_remaining:
+      skopeo.CopyImagesToDocker(vm, priv_images_remaining)
+
+
+def _UpdateK8sConfig(controller0, workers, vimages):
+  with open(FLAGS.dpt_kubernetes_yaml, "rt") as fd:
+    docs = [d for d in safe_load_all(fd) if d]
+
+  images = _ScanImages(docs)
+  if controller0.CLOUD != "Static" or FLAGS.install_packages:
+    nworkers = len(_UniqueVms(workers))
+    worker1 = workers[0]["vm"]
+    if nworkers == 1 and docker_ce.IsDocker(worker1):
+      images_remaining = archived_images.CopyImagesToDocker(worker1, images) if archived_images else images
+      if images_remaining:
+        skopeo.CopyImagesToDocker(worker1, images_remaining)
+    elif not skopeo.IsSUTAccessible(FLAGS.dpt_registry_map[0]):
+      vms = _UniqueVms(workers, controller0)
+      registry_url = k8s.CreateRegistry(controller0, vms)
+      images_remaining = archived_images.CopyImagesToRegistry(controller0 if nworkers > 1 else worker1, images, registry_url, nworkers > 1) if archived_images else images
+      if images_remaining:
+        skopeo.CopyImagesToRegistry(controller0, images_remaining, registry_url)
+      FLAGS.dpt_registry_map[1] = f"{registry_url}/"
+      logging.info(f"SUT/Info: registry {FLAGS.dpt_registry_map[1]}")
+
+  modified_docs = []
+  for doc in docs:
+    modified_docs.append(doc)
+
+    spec = _WalkTo(doc, "containers")
+    if spec and spec["containers"]:
+      _AddNodeAffinity(spec, workers)
+      _ModifyImageRef(spec["containers"], images, controller0.CLOUD != "Static" or FLAGS.dpt_reuse_sut or FLAGS.install_packages)
+      _ModifyEnvs(spec["containers"], vimages, workers)
+
+    spec = _WalkTo(doc, "initContainers")
+    if spec and spec["initContainers"]:
+      _ModifyImageRef(spec["initContainers"], images, controller0.CLOUD != "Static" or FLAGS.dpt_reuse_sut or FLAGS.install_packages)
+      _ModifyEnvs(spec["initContainers"], vimages, workers)
+
+  modified_filename = FLAGS.dpt_kubernetes_yaml + ".mod.yaml"
+  with open(modified_filename, "wt") as fd:
+    dump_all(modified_docs, fd)
+
+  return modified_filename, images
+
+
+def _ParseParams(params, metadata):
+  for kv in params.split(";"):
+    k, p, v = kv.partition(":")
+    try:
+      v = float(v.strip())
+    except Exception:
+      pass
+    metadata[k.strip()] = v
+
+
+@vm_util.Retry()
+def _AddNodeLabels(controller0, workers, vm):
+  node = None
+  labels = {}
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if worker["labels"][k] == "required":
+          labels[k] = "yes"
+          node = worker["name"]
+  if labels and node:
+    cmd = ["kubectl", "label", "--overwrite", "node", node] + [k + "=" + labels[k] for k in labels]
+    controller0.RemoteCommand(" ".join(cmd))
+
+
+def _GetController(vm_groups):
+  if SUT_VM_CTR in vm_groups:
+    return vm_groups[SUT_VM_CTR][0]
+  return vm_groups[FLAGS.dpt_vm_groups[0]][-1]
+
+
+def _GetWorkers(vm_groups):
+  workers = []
+  for g1 in vm_groups:
+    if (g1 != SUT_VM_CTR) and (g1 in FLAGS.dpt_vm_groups):
+      for vm1 in vm_groups[g1]:
+        if vm1 not in workers:
+          workers.append(vm1)
+  return workers
+
+
+def _SetupHugePages(workers, vm):
+  reqs = {}
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-HUGEPAGE-") and worker["labels"][k] == "required":
+          req = k.split("-")[-2:]
+          if req[0] not in reqs:
+            reqs[req[0]] = 0
+          if int(req[1]) > reqs[req[0]]:
+            reqs[req[0]] = int(req[1])
+
+  cmds = ["hugepagesz={} hugepages={}".format(sz.replace("B",""), reqs[sz]) for sz in reqs]
+  if cmds:
+    vm.AppendKernelCommandLine(" ".join(cmds), reboot=False)
+    vm._needs_reboot = True
+
+
+def _ProbeModules(workers, vm):
+  modules = []
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-MODULE-") and worker["labels"][k] == "required":
+          modules.append(k.replace("HAS-SETUP-MODULE-", "").lower())
+  if modules:
+    cmd = ["sudo", "modprobe"] + modules
+    vm.RemoteCommand(" ".join(cmd), ignore_failure=True, suppress_warning=True)
+
+
+# We use Habana Gaudi AMI. Assume the driver is already installed. 
+# Just need to refresh docker or containerd configurations after 
+# a new installation (k8s).
+def _SetupHabanaWorker(controller0, workers, vm):
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-HABANA-") and worker["labels"][k] == "required":
+          vm.Install("habana")
+          if controller0:
+            habana.RegisterWithContainerD(vm)
+          else:
+            habana.RegisterWithDocker(vm)
+          return
+
+
+# Use aws inferentia for ai workload, need to install neuron runtime driver in vm firstly.
+def _SetupInferentiaWorker(controller0, workers, vm):
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-INFERENTIA-") and worker["labels"][k] == "required":
+          vm.Install("inferentia")
+          return
+
+
+# Use aws nvidia for ai workload, need to install cuda driver in vm firstly.
+def _SetupGPUWorker(controller0, workers, vm):
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-NVIDIA-") and worker["labels"][k] == "required":
+          vm.Install("nvidia_gpu")
+          return
+
+
+def _SetupHabanaController(controller0, workers):
+  for worker in workers:
+    for k in worker["labels"]:
+      if k.startswith("HAS-SETUP-HABANA-") and worker["labels"][k] == "required":
+        habana.RegisterKubernetesPlugins(controller0)
+        return
+
+
+def _SetupWorker(controller0, workers, vm):
+  for worker in workers:
+    if worker["vm"] == vm:
+      for k in worker["labels"]:
+        if k.startswith("HAS-SETUP-") and worker["labels"][k] == "required":
+          pass
+
+
+def _UniqueVms(workers, controller0=None):
+  vms = []
+  for worker in workers:
+    if worker["vm"] != controller0 and worker["vm"] not in vms:
+      vms.append(worker["vm"])
+  return vms
+
+
+def _PrepareWorker(controller0, workers, vm):
+  _SetupHugePages(workers, vm)
+  _ProbeModules(workers, vm)
+  _SetupHabanaWorker(controller0, workers, vm)
+  _SetupInferentiaWorker(controller0, workers, vm)
+  _SetupGPUWorker(controller0, workers, vm)
+  if controller0:
+    _AddNodeLabels(controller0, workers, vm)
+  vm._RebootIfNecessary()
+
+
+def _PrepareVM(vimage):
+  if "setup" in vimage:
+    setup = vimage["setup"]
+    vm1 = vimage["vm"]
+
+    while True:
+      try:
+        vm1.RemoteCommand("sudo mkdir -p /opt/pkb/vmsetup && sudo chown -R {} /opt/pkb".format(vm1.user_name))
+        vm1.PushFile("{}/{}.tgz".format(FLAGS.dpt_logs_dir, setup), "/opt/pkb")
+        vm1.RemoteCommand("cd /opt/pkb/vmsetup && sudo tar xfz ../{}.tgz && sudo ./setup.sh {}".format(setup, " ".join(FLAGS.dpt_script_args)))
+        break
+      except Exception as e:
+        logging.warning("VM Setup Exception: %s", str(e))
+      vm1.RemoteCommand("sleep 10s", ignore_failure=True)
+      vm1.WaitForBootCompletion()
+
+
+def _SaveConfigFiles(spec):
+  spec.s3_reports.append((FLAGS.dpt_cluster_yaml, 'text/yaml'))
+  cumulus_config_file = join(FLAGS.dpt_logs_dir, "cumulus-config.yaml")
+  spec.s3_reports.append((cumulus_config_file, 'text/yaml'))
+
+  if FLAGS.dpt_kubernetes_yaml:
+    spec.s3_reports.append((FLAGS.dpt_kubernetes_yaml, 'text/yaml'))
+
+  test_config_file = join(FLAGS.dpt_logs_dir, "test-config.yaml")
+  if os.path.exists(test_config_file):
+    spec.s3_reports.append((test_config_file, 'text/yaml'))
+
+
+def Prepare(benchmark_spec):
+  _SetBreakPoint("PrepareStage")
+
+  benchmark_spec.name = FLAGS.dpt_name.replace(" ", "_").lower()
+  benchmark_spec.workload_name = FLAGS.dpt_name
+  benchmark_spec.sut_vm_group = FLAGS.dpt_vm_groups[0]
+  benchmark_spec.always_call_cleanup = True
+
+  benchmark_spec.control_traces = True
+  FLAGS.dpt_trace_mode = [x.strip() for x in FLAGS.dpt_trace_mode]
+
+  _SaveConfigFiles(benchmark_spec)
+  _ParseParams(FLAGS.dpt_params, benchmark_spec.software_config_metadata)
+  _ParseParams(FLAGS.dpt_tunables, benchmark_spec.tunable_parameters_metadata)
+
+  # export SUT Instrumentation
+  for group1 in benchmark_spec.vm_groups:
+    for worker1 in benchmark_spec.vm_groups[group1]:
+      logging.info(f"SUT/Info: {group1} {worker1.ip_address} {worker1.internal_ip} {worker1.OS_TYPE}")
+
+  if FLAGS.dpt_docker_image:
+    controller0 = _GetWorkers(benchmark_spec.vm_groups)[0]
+    tmp_dir = _MakeTempDir(controller0)
+    if controller0.CLOUD != "Static" or FLAGS.install_packages:
+      controller0.Install('docker_ce')
+      docker_auth.CopyDockerConfig(controller0)
+      _PullImages(controller0, [FLAGS.dpt_docker_image] + FLAGS.dpt_docker_dataset)
+
+    workers, vimages, options = _ParseClusterConfigs(benchmark_spec.vm_groups)
+    FLAGS.dpt_docker_options = " ".join(FLAGS.dpt_docker_options.split(" ") + options)
+    _PrepareWorker(None, workers, controller0)
+    if controller0.CLOUD != "Static" or FLAGS.install_packages:
+      _SetBreakPoint("SetupVM")
+      vm_util.RunThreaded(lambda vim1: _PrepareVM(vim1), vimages)
+
+  if FLAGS.dpt_kubernetes_yaml:
+    controller0 = _GetController(benchmark_spec.vm_groups)
+    tmp_dir = _MakeTempDir(controller0)
+
+    if controller0.CLOUD != "Static" or FLAGS.install_packages:
+      _ParseClusterConfigs(benchmark_spec.vm_groups)
+      workers = [vm1 for vm1 in _GetWorkers(benchmark_spec.vm_groups) if vm1 != controller0]
+      taint = SUT_VM_CTR in benchmark_spec.vm_groups
+      k8s.CreateCluster(controller0, workers, taint)
+      docker_auth.CopyDockerConfig(controller0)
+
+    nodes = _GetNodes(controller0)
+    workers, vimages, _ = _ParseClusterConfigs(benchmark_spec.vm_groups, nodes)
+    k8s_config_yaml, images = _UpdateK8sConfig(controller0, workers, vimages)
+
+    if controller0.CLOUD != "Static" or FLAGS.install_packages:
+      vm_util.RunThreaded(lambda vm1: _PrepareWorker(controller0, workers, vm1), _UniqueVms(workers))
+      _SetupHabanaController(controller0, workers)
+      _SetBreakPoint("SetupVM")
+      vm_util.RunThreaded(lambda vim1: _PrepareVM(vim1), vimages)
+
+    remote_yaml_file = join(tmp_dir, KUBERNETES_CONFIG)
+    controller0.PushFile(k8s_config_yaml, remote_yaml_file)
+
+  kpish = f"{FLAGS.dpt_logs_dir}/{KPISH}"
+  for i in range(1, FLAGS.run_stage_iterations + 1):
+    local_logs_dir = join(FLAGS.dpt_logs_dir, ITERATION_DIR.format(i))
+    vm_util.IssueCommand(["mkdir", "-p", local_logs_dir])
+    vm_util.IssueCommand(["cp", "-f", kpish, local_logs_dir])
+
+  with open(kpish) as fd:
+    test_string = f"cd {ITERATION_DIR.format(1)}"
+    if test_string not in fd.read():
+      vm_util.IssueCommand(["sh", "-c", "sed -i '1a[ -d {} ] && cd {}' {}".
+                           format(ITERATION_DIR.format(1), ITERATION_DIR.format(1), kpish)])
+
+
+def _PullExtractLogs(controller0, pods, remote_logs_dir):
+  estr = None
+  for pod1 in pods:
+    remote_logs_tarfile = join(remote_logs_dir, LOGS_TARFILE.format(pod1))
+    local_logs_dir = join(FLAGS.dpt_logs_dir, join(ITERATION_DIR.format(run_seq), f"{pod1}"))
+    local_logs_tarfile = join(local_logs_dir, LOGS_TARFILE.format(pod1))
+    try:
+      vm_util.IssueCommand(["mkdir", "-p", local_logs_dir])
+      controller0.PullFile(local_logs_tarfile, remote_logs_tarfile)
+      vm_util.IssueCommand(["tar", "xf", local_logs_tarfile, "-C", local_logs_dir])
+      vm_util.IssueCommand(["rm", "-f", local_logs_tarfile])
+    except Exception as e:
+      estr = str(e)
+  if estr:
+    raise Exception("ExtractLogs Exception: " + estr)
+
+
+def _TraceByTime(benchmark_spec, controller0):
+  controller0.RemoteCommand(f"sleep {FLAGS.dpt_trace_mode[1]}s", ignore_failure=True)
+  events.start_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+  controller0.RemoteCommand(f"sleep {FLAGS.dpt_trace_mode[2]}s", ignore_failure=True)
+  events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+
+
+def _TraceByROI(benchmark_spec, controller0, timeout, cmds):
+  _, _, status = controller0.RemoteCommandWithReturnCode("timeout {}s bash -c 'while true; do ({}) | grep -q -F \"{}\" && exit 0 || sleep 1s; done'".format(timeout, cmds, FLAGS.dpt_trace_mode[1]), ignore_failure=True)
+  if status == 0:
+    events.start_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+    controller0.RemoteCommand("timeout {}s bash -c 'while true; do ({}) | grep -q -F \"{}\" && exit 0 || sleep 1s; done'".format(timeout, cmds, FLAGS.dpt_trace_mode[2]), ignore_failure=True)
+    events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+
+
+@vm_util.Retry()
+def _RobustGetLogs(vm, pod1, container, remote_logs_tarfile):
+   # copy with tarball validity check
+   vm.RemoteCommand(f"kubectl exec --namespace={FLAGS.dpt_namespace} -c {container} {pod1} -- sh -c \"cat {EXPORT_LOGS_TARFILE}\" > {remote_logs_tarfile} && tar xf {remote_logs_tarfile} -O > /dev/null")
+
+
+def Run(benchmark_spec):
+  global run_seq
+  run_seq = run_seq + 1
+
+  _SetBreakPoint("RunStage")
+
+  for vm in benchmark_spec.vms:
+    thread_count = vm.num_cpus
+    logging.debug(f"VM thread count: {thread_count}")
+
+  tmp_dir = _GetTempDir()
+  timeout = list(map(int,FLAGS.dpt_timeout.split(",")))
+  if len(timeout)<2:
+    timeout.append(timeout[0])
+  if len(timeout)<3:
+    timeout.append(timeout[0]/2)
+
+  pull_logs = False
+  if FLAGS.dpt_docker_image:
+    controller0 = _GetWorkers(benchmark_spec.vm_groups)[0]
+
+    options = FLAGS.dpt_docker_options.split(' ')
+    if controller0.CLOUD == "Static" and not FLAGS.dpt_reuse_sut and not FLAGS.install_packages and FLAGS.dpt_registry_map[0]:
+      options.extend(["--pull", "always"])
+
+    containers = []
+    options1 = "--pull always" if controller0.CLOUD == "Static" and not FLAGS.install_packages and FLAGS.dpt_registry_map[0] else ""
+    for image1 in FLAGS.dpt_docker_dataset:
+      stdout, _ = controller0.RemoteCommand("sudo -E docker create {} {} -".format(options1, _ReplaceImage(image1)))
+      container_id = stdout.strip()
+      containers.append(container_id)
+      options.extend(["--volumes-from", container_id])
+
+    _SetBreakPoint("ScheduleExec")
+    container_id, pid = runwith.DockerRun(controller0, options, _ReplaceImage(FLAGS.dpt_docker_image))
+
+    if events.start_trace.receivers:
+      try:
+        if not FLAGS.dpt_trace_mode:
+          events.start_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+
+        elif FLAGS.dpt_trace_mode[0] == "roi":
+          _TraceByROI(benchmark_spec, controller0, timeout[2],
+                      runwith.DockerLogsCmd(container_id))
+
+        elif FLAGS.dpt_trace_mode[0] == "time":
+          _TraceByTime(benchmark_spec, controller0)
+      except Exception as e:
+        logging.warning("Trace Exception: %s", str(e))
+        _SetBreakPoint("TraceFailed")
+
+    pods = [container_id]
+    try:
+      _SetBreakPoint("ExtractLogs")
+      runwith.DockerWaitForCompletion(controller0, container_id, timeout[0], join(tmp_dir, LOGS_TARFILE.format(container_id)))
+      pull_logs = True
+    except Exception as e:
+      logging.fatal("ExtractLogs Exception: %s", str(e))
+      _SetBreakPoint("ExtractLogsFailed")
+
+    if events.start_trace.receivers and (not FLAGS.dpt_trace_mode):
+      try:
+        events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+      except:
+        pass
+
+    controller0.RemoteCommand(runwith.DockerLogsCmd(container_id), 
+                              ignore_failure=True, should_log=True)
+    runwith.DockerRemove(controller0, containers, container_id, pid)
+
+  if FLAGS.dpt_kubernetes_yaml:
+    controller0 = _GetController(benchmark_spec.vm_groups)
+    remote_yaml_file = join(tmp_dir, KUBERNETES_CONFIG)
+
+    _SetBreakPoint("ScheduleExec")
+    controller0.RemoteCommand(f"kubectl create namespace {FLAGS.dpt_namespace}")
+    controller0.RemoteCommand(f"kubectl label namespace {FLAGS.dpt_namespace} {SF_NS_LABEL}")
+    docker_auth.InstallImagePullSecret(controller0, FLAGS.dpt_namespace)
+    try:
+      controller0.RemoteCommand(f"kubectl create --namespace={FLAGS.dpt_namespace} -f {remote_yaml_file}")
+  
+      try:
+        controller0.RemoteCommand("timeout {1}s bash -c 'q=0;until kubectl --namespace={0} wait pod --all --for=condition=Ready --timeout=1s 1>/dev/null 2>&1; do if kubectl --namespace={0} get pod -o json | grep -q Unschedulable; then q=1; break; fi; done; exit $q'".format(FLAGS.dpt_namespace, timeout[1]))
+  
+        pods, _ = controller0.RemoteCommand("kubectl get --namespace=" + FLAGS.dpt_namespace + " pod --selector=" + FLAGS.dpt_kubernetes_job + " '-o=jsonpath={.items[*].metadata.name}'")
+        pods = pods.strip(" \t\n").split(" ")
+        container = FLAGS.dpt_kubernetes_job.rpartition("=")[2]
+  
+        if events.start_trace.receivers:
+          try:
+            if not FLAGS.dpt_trace_mode:
+              events.start_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+  
+            elif FLAGS.dpt_trace_mode[0] == "roi":
+              cmds = []
+              for pod1 in pods:
+                cmds.append(f"kubectl logs --ignore-errors --prefix=false {pod1} -c {container} --namespace={FLAGS.dpt_namespace}")
+              _TraceByROI(benchmark_spec, controller0, timeout[2], ";".join(cmds))
+  
+            elif FLAGS.dpt_trace_mode[0] == "time":
+              _TraceByTime(benchmark_spec, controller0)
+          except Exception as e:
+            logging.warning("Trace Exception: %s", str(e))
+            _SetBreakPoint("TraceFailed")
+  
+        cmds = []
+        for pod1 in pods:
+          cmds.append(f"kubectl exec --namespace={FLAGS.dpt_namespace} {pod1} -c {container} -- sh -c \"cat /export-logs > {EXPORT_LOGS_TARFILE}\";x=$?;test $x -ne 0 && r=$x")
+  
+        try:
+          _SetBreakPoint("ExtractLogs")
+          controller0.RemoteCommand("timeout {}s bash -c 'r=0;{};exit $r'".format(timeout[0], ";".join(cmds)))
+
+          for pod1 in pods:
+            remote_logs_tarfile = join(tmp_dir, LOGS_TARFILE.format(pod1))
+            _RobustGetLogs(controller0, pod1, container, remote_logs_tarfile)
+
+          pull_logs = True
+        except Exception as e:
+          logging.fatal("ExtractLogs Exception: %s", str(e))
+          _SetBreakPoint("ExtractLogsFailed")
+  
+        if events.start_trace.receivers and (not FLAGS.dpt_trace_mode):
+          try:
+            events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+          except:
+            pass
+  
+      except Exception as e:
+        logging.fatal("Schedule Exception: %s", str(e))
+        _SetBreakPoint("ScheduleExecFailed")
+  
+      controller0.RemoteCommand(f"kubectl describe node --namespace={FLAGS.dpt_namespace}", ignore_failure=True, should_log=True)
+      controller0.RemoteCommand(f"kubectl describe pod --namespace={FLAGS.dpt_namespace}", ignore_failure=True, should_log=True)
+      controller0.RemoteCommand("bash -c 'for p in $(kubectl get pod -n {0} --no-headers -o custom-columns=:metadata.name);do echo \"pod $p:\";kubectl -n {0} logs --all-containers=true $p;done'".format(FLAGS.dpt_namespace), ignore_failure=True, should_log=True)
+
+    except Exception as e:
+      logging.fatal("Failed to deploy test: %s", str(e))
+      _SetBreakPoint("ScheduleExecFailed")
+
+    if (controller0.CLOUD == "Static" and not FLAGS.install_packages) or (run_seq < FLAGS.run_stage_iterations):
+      controller0.RemoteCommand(f"kubectl delete --namespace={FLAGS.dpt_namespace} -f {remote_yaml_file} --ignore-not-found=true", ignore_failure=True)
+      try:
+        controller0.RemoteCommand(f"timeout 120s kubectl delete namespace {FLAGS.dpt_namespace} --timeout=0 --wait --ignore-not-found=true")
+      except:
+        # force namespace removal
+        controller0.RemoteCommand("bash -c 'kubectl replace --raw \"/api/v1/namespaces/{0}/finalize\" -f <(kubectl get ns {0} -o json | grep -v \"\\\"kubernetes\\\"\")'".format(FLAGS.dpt_namespace), ignore_failure=True)
+
+        nodes = _GetNodes(controller0)
+        workers, _, _ = _ParseClusterConfigs(benchmark_spec.vm_groups, nodes)
+        vm_util.RunThreaded(lambda vm1: vm1.Reboot(), _UniqueVms(workers))
+        controller0.RemoteCommand("kubectl wait --for=condition=Ready nodes --all")
+
+  _SetBreakPoint("ExtractKPI")
+
+  # pull the logs tarfile back
+  samples = []
+  if pull_logs:
+    _PullExtractLogs(controller0, pods, tmp_dir)
+    samples = _ParseKPI(benchmark_spec.tunable_parameters_metadata)
+
+  if not samples:
+    _SetBreakPoint("ExtractKPIFailed")
+    raise Exception("KPI Exception: No KPI data")
+
+  return samples
+
+
+def Cleanup(benchmark_spec):
+  _SetBreakPoint("CleanupStage")
+  tmp_dir = _GetTempDir()
+
+  _, vimages, _ = _ParseClusterConfigs(benchmark_spec.vm_groups)
+  for i, vim1 in enumerate(vimages):
+    if "setup" in vim1:
+      local_dir = "{}/{}/{}".format(FLAGS.dpt_logs_dir, vim1["name"], i)
+      vm_util.IssueCommand(["mkdir", "-p", local_dir])
+      try:
+        vm1 = vim1["vm"]
+        vm1.RemoteCommand("cd /opt/pkb/vmsetup && sudo ./cleanup.sh {}".format(" ".join(FLAGS.dpt_script_args)))
+        vm1.PullFile("{}/vmlogs.tgz".format(local_dir), "/opt/pkb/vmsetup/vmlogs.tgz")
+      except Exception as e:
+        logging.warning("Cleanup Exception: %s", str(e))
+
+  if FLAGS.dpt_docker_image:
+    controller0 = _GetWorkers(benchmark_spec.vm_groups)[0]
+
+  if FLAGS.dpt_kubernetes_yaml:
+    controller0 = _GetController(benchmark_spec.vm_groups)
+
+  # cleanup containers
+  if controller0.CLOUD == "Static" and not FLAGS.install_packages:
+    controller0.RemoteCommand(f"sudo rm -rf '{tmp_dir}'", ignore_failure=True)
+
+
+def GetCmdLine():
+  tcase = FLAGS.dpt_tunables[FLAGS.dpt_tunables.index(";testcase:")+10:]
+  tconfig = "TEST_CONFIG=$(pwd)/test-config.yaml " if os.path.exists("test-config.yaml") else ""
+  return f"{tconfig}ctest -R '^{tcase}$' -V"
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/__init__.py
new file mode 100644
index 0000000..9a53e22
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/__init__.py
@@ -0,0 +1,68 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains package imports and a dictionary of package names and modules.
+
+All modules within this package are considered packages, and are loaded
+dynamically. Add non-package code to other packages.
+
+Packages should, at a minimum, define install functions for each type of
+package manager (e.g. YumInstall(vm) and AptInstall(vm)).
+They may also define functions that return the path to a configuration file
+(e.g. AptGetPathToConfig(vm)) and functions that return the linux service
+name (e.g. YumGetServiceName(vm)). If the package only installs
+packages through the package manager, then it does not need to define an
+uninstall function.  If the package manually places files in other locations
+(e.g. /usr/bin, /opt/pkb), then it also needs to define uninstall functions
+(e.g.  YumUninstall(vm)).
+
+Package installation should persist across reboots.
+
+All functions in each package module should be prefixed with the type of package
+manager, and all functions should accept a BaseVirtualMachine object as their
+only arguments.
+
+See perfkitbenchmarker/package_managers.py for more information on how to use
+packages in benchmarks.
+"""
+
+import os
+from perfkitbenchmarker import import_util
+
+
+# Place to install stuff. Persists across reboots.
+INSTALL_DIR = '/opt/pkb'
+
+
+def _LoadPackages():
+  packages = dict([(module.__name__.split('.')[-1], module) for module in
+                   import_util.LoadModulesForPath(__path__, __name__)])
+  #packages.update(packages['docker'].CreateImagePackages())
+  return packages
+
+
+PACKAGES = _LoadPackages()
+
+
+def GetPipPackageVersion(vm, package_name):
+  """This function returns the version of a pip package installed on a vm.
+
+  Args:
+    vm: the VM the package is installed on.
+    package_name: the name of the package.
+
+  Returns:
+    The version string of the package.
+  """
+  version, _ = vm.RemoteCommand('pip3 show %s |grep Version' % package_name)
+  return version.strip()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/aws_credentials.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/aws_credentials.py
new file mode 100644
index 0000000..42f9260
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/aws_credentials.py
@@ -0,0 +1,163 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing AWS credential file installation and cleanup helpers.
+
+AWS credentials consist of a secret access key and its ID, stored in a single
+file. Following PKB's AWS setup instructions (see
+https://github.com/GoogleCloudPlatform/PerfKitBenchmarker#install-aws-cli-and-setup-authentication),
+the default location of the file will be at ~/.aws/credentials
+
+This package copies the credentials file to the remote VM to make them available
+for calls from the VM to other AWS services, such as SQS or Kinesis.
+"""
+
+import configparser
+import logging
+import os
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from subprocess import check_output
+
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'aws_credentials_local_path', os.path.join('~', '.aws'),
+    'Path where the AWS credential files can be found on the local machine.')
+
+flags.DEFINE_string(
+    'aws_credentials_remote_path', '.aws',
+    'Path where the AWS credential files will be written on remote machines.')
+
+flags.DEFINE_boolean(
+    'aws_credentials_overwrite', False,
+    'When set, if an AWS credential file already exists at the destination '
+    'specified by --aws_credentials_remote_path, it will be overwritten during '
+    'AWS credential file installation.')
+flags.DEFINE_string('aws_s3_region', None, 'Region for the S3 bucket')
+
+
+def _GetLocalPath():
+  """Gets the expanded local path of the credential files.
+
+  Returns:
+    string. Path to the credential files on the local machine.
+  """
+  return os.path.expanduser(FLAGS.aws_credentials_local_path)
+
+
+def GetCredentials(credentials_file_name='credentials'):
+  """Gets the credentials from the local credential file.
+
+  AWS credentials file is expected to be called 'credentials'.
+  AWS credentials file looks like this, and ends with a newline:
+  [default]
+  aws_access_key_id = {access_key}
+  aws_secret_access_key = {secret_access_key}
+
+  Args:
+    credentials_file_name: String name of the file containing the credentials.
+
+  Returns:
+    A string, string tuple of access_key and secret_access_key
+  """
+  config = configparser.ConfigParser()
+  config.read(os.path.join(_GetLocalPath(), credentials_file_name))
+  key_id = config['default']['aws_access_key_id']
+  key = config['default']['aws_secret_access_key']
+  return key_id, key
+
+
+def CheckPrerequisites():
+  """Verifies that the required resources are present.
+
+  Raises:
+    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
+  """
+  local_path = _GetLocalPath()
+  if not os.path.exists(local_path):
+    raise data.ResourceNotFound(
+        'AWS credential files were not found at {0}'.format(local_path))
+
+
+def _IsCopyNeeded(vm, local_path, remote_path):
+  # return False: if the username, file_path, IP (or localhost) are the same for local and remote
+  local_ips = check_output(['hostname', '--all-ip-addresses'])
+  local_ip_list_str = local_ips.decode('utf-8')
+  remote_ip = vm.GetConnectionIp()
+  if remote_path == '.aws':
+    full_remote_path = '/home/{}/{}'.format(vm.user_name, remote_path)
+  else:
+    full_remote_path = remote_path
+  local_user = os.environ.get('USER') or os.environ.get('USERNAME') or ''
+  # remote_ip is the same as one of the local IP or localhost
+  if remote_ip in local_ip_list_str or remote_ip == 'localhost':
+    # same user and same path, do NOT copy
+    if local_user == vm.user_name and local_path == full_remote_path:
+      return False
+  return True
+
+
+def Install(vm):
+  """Copies credential files to the specified VM.
+
+  Args:
+    vm: BaseVirtualMachine. VM that receives the credential files.
+
+  Raises:
+    errors.Error: If the file destination on the VM already exists, and the
+        overwrite behavior is not specified via --aws_credentials_overwrite.
+  """
+  local_path = _GetLocalPath()
+  remote_path = FLAGS.aws_credentials_remote_path
+  overwrite = FLAGS.aws_credentials_overwrite
+  try:
+    vm.RemoteCommand('[[ ! -e {0} ]]'.format(remote_path))
+  except errors.VirtualMachine.RemoteCommandError:
+    err_msg = 'File {0} already exists on VM {1}.'.format(remote_path, vm)
+    if overwrite:
+      logging.info('%s Overwriting.', err_msg)
+    else:
+      raise errors.Error(err_msg)
+  remote_dir = os.path.dirname(remote_path)
+  if remote_dir:
+    vm.RemoteCommand('mkdir -p {0}'.format(remote_dir))
+  if _IsCopyNeeded(vm, local_path, remote_path):
+    if FLAGS.enable_rsync:
+      vm.PushFile(f"{local_path}/", remote_path)
+    else:
+      vm.PushFile(local_path, remote_path)
+
+
+def Uninstall(vm):
+  """Deletes the credential files from the specified VM.
+
+  Args:
+    vm: BaseVirtualMachine. VM that has the credential files.
+  """
+  vm.RemoveFile(FLAGS.aws_credentials_remote_path)
+
+
+def IsInstalled(vm):
+  """Checks whether aws credentials is installed on the VM."""
+  _, _, retVal = vm.RemoteCommandWithReturnCode('test -d {0}'.format(FLAGS.aws_credentials_remote_path),
+                                                ignore_failure=True,
+                                                suppress_warning=True)
+  # It is not installed
+  if retVal != 0:
+    return False
+
+  return True
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/awscliv2.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/awscliv2.py
new file mode 100644
index 0000000..d47dc05
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/awscliv2.py
@@ -0,0 +1,37 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Package for installing the AWS CLI."""
+
+AWSCLI_URL_FMT = "https://awscli.amazonaws.com/awscli-exe-linux-{arch}.zip"
+AWSCLI_ZIP = "awscliv2.zip"
+
+
+def Install(vm):
+  """Installs the awscli package on the VM."""
+  uname = vm.RemoteCommand('uname -m')[0].strip()
+  if uname != 'x86_64' and uname != 'aarch64':
+    raise NotImplementedError("unsupported architecture: {}".format(uname))
+
+  vm.InstallPackages("unzip")
+  cli_url = AWSCLI_URL_FMT.format(arch=uname)
+  vm.RemoteCommand(f"curl {cli_url} -o {AWSCLI_ZIP} && unzip {AWSCLI_ZIP}")
+  vm.RemoteCommand("sudo ./aws/install")
+  # Clean up unused files
+  vm.RemoteCommand(f"rm -rf aws {AWSCLI_ZIP}")
+
+
+def Uninstall(vm):
+  vm.RemoteCommand('sudo rm -rf /usr/local/aws-cli')
+  vm.RemoteCommand('sudo rm /usr/local/bin/aws')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/azure_credentials.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/azure_credentials.py
new file mode 100644
index 0000000..6e03012
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/azure_credentials.py
@@ -0,0 +1,72 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Package for installing the Azure MSAL credentials  for the azure-cli."""
+
+import os
+
+from absl import logging
+from packaging import version
+
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import object_storage_service
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+
+AZURE_CREDENTIAL_DIRECTORY = os.path.join('~', '.azure')
+# Necessary for user login with MSAL
+TOKENS_FILE = os.path.join(AZURE_CREDENTIAL_DIRECTORY, 'msal_token_cache.json')
+# Necessary for service_principal login with MSAL
+SERVICE_PRINCIPAL_FILE = os.path.join(AZURE_CREDENTIAL_DIRECTORY,
+                                      'service_principal_entries.json')
+# Determines which login is active
+PROFILE_FILE = os.path.join(AZURE_CREDENTIAL_DIRECTORY, 'azureProfile.json')
+
+# See https://docs.microsoft.com/en-us/cli/azure/msal-based-azure-cli
+_REQUIRED_MSAL_CLI_VERSION = '2.30.0'
+
+
+def Install(vm):
+  """Copies Azure credentials to the VM."""
+  # Validate local azure-cli uses MSAL
+  stdout, _, _ = vm_util.IssueCommand(
+      [azure.AZURE_PATH, 'version', '--query', '"azure-cli"'])
+  az_version = version.Version(stdout.strip('"\n'))
+  if az_version < version.Version(_REQUIRED_MSAL_CLI_VERSION):
+    raise errors.Benchmarks.MissingObjectCredentialException(
+        f'Local Azure CLI version must be at least {_REQUIRED_MSAL_CLI_VERSION}'
+        f' to copy credentials into a VM. Found version {az_version}. '
+        'The recent CLI on the VM will not be able to use your credentials.')
+
+  # Install CLI to validate credentials
+  vm.Install('azure_cli')
+
+  # Copy credentials to VM
+  vm.RemoteCommand('mkdir -p {0}'.format(AZURE_CREDENTIAL_DIRECTORY))
+  vm.PushFile(
+      object_storage_service.FindCredentialFile(PROFILE_FILE), PROFILE_FILE)
+  for file in [SERVICE_PRINCIPAL_FILE, TOKENS_FILE]:
+    try:
+      vm.PushFile(object_storage_service.FindCredentialFile(file), file)
+    except errors.Benchmarks.MissingObjectCredentialException:
+      logging.info('Optional service account file %s not found.', file)
+
+  # Validate azure-cli is now authenticating correctly.
+  try:
+    # This token is not used, it is simply used to prove that the CLI on the VM
+    # is authenticated.
+    vm.RemoteCommand('az account get-access-token')
+  except errors.VirtualMachine.RemoteExceptionError as e:
+    raise errors.Benchmarks.MissingObjectCredentialException(
+        'Failed to install azure_credentials on VM.') from e
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/build_tools.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/build_tools.py
new file mode 100644
index 0000000..a515e78
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/build_tools.py
@@ -0,0 +1,146 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing build tools installation and cleanup functions."""
+import logging
+from absl import flags
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string(
+    'gcc_version', None, 'Version of gcc to use. Benchmarks '
+    'that utilize gcc compilation should ensure reinstallation '
+    'of GCC. Default is set by the OS package manager.')
+flags.DEFINE_boolean('force_build_gcc_from_source', False, 'Whether to force '
+                     'building GCC from source.')
+flags.DEFINE_boolean(
+    'build_fortran', False, 'Whether to build fortran '
+    'alongside c and c++ when building GCC.')
+
+GCC_TAR = 'gcc-{version}.tar.gz'
+GCC_URL = 'https://ftp.gnu.org/gnu/gcc/gcc-{version}/' + GCC_TAR
+PREPROVISIONED_DATA = {
+    GCC_TAR.format(version='9.2.0'):
+        'a931a750d6feadacbeecb321d73925cd5ebb6dfa7eff0802984af3aef63759f4'
+}
+PACKAGE_DATA_URL = {
+    GCC_TAR.format(version='9.2.0'): GCC_URL.format(version='9.2.0')
+}
+
+
+def YumInstall(vm):
+  """Installs build tools on the VM."""
+  vm.InstallPackageGroup('Development Tools')
+  if FLAGS.gcc_version:
+    Reinstall(vm, version=FLAGS.gcc_version)
+
+
+def AptInstall(vm):
+  """Installs build tools on the VM."""
+  vm.InstallPackages('build-essential git libtool autoconf automake')
+  if FLAGS.gcc_version:
+    Reinstall(vm, version=FLAGS.gcc_version)
+
+
+def BuildGccFromSource(vm, gcc_version):
+  """Install a specific version of gcc by compiling from source.
+
+  Args:
+    vm: VirtualMachine object.
+    gcc_version: string. GCC version.
+  Taken from: https://gist.github.com/nchaigne/ad06bc867f911a3c0d32939f1e930a11
+  """
+  if gcc_version == '9' or gcc_version == '9.2':
+    gcc_version = '9.2.0'
+  logging.info('Compiling GCC %s', gcc_version)
+
+  # build GCC on scratch disks for speed if possible
+  build_dir = vm.GetScratchDir() if vm.scratch_disks else '~/'
+  gcc_tar = GCC_TAR.format(version=gcc_version)
+  if gcc_tar in PREPROVISIONED_DATA:
+    vm.InstallPreprovisionedPackageData('build_tools',
+                                        PREPROVISIONED_DATA.keys(), build_dir)
+  else:
+    vm.RemoteCommand(f'cd {build_dir} && '
+                     f'wget {GCC_URL.format(version=gcc_version)}')
+  vm.RemoteCommand(f'cd {build_dir} && tar xzvf {gcc_tar}')
+  vm.RemoteCommand(f'cd {build_dir} && mkdir -p obj.gcc-{gcc_version}')
+  vm.RemoteCommand(f'cd {build_dir}/gcc-{gcc_version} && '
+                   './contrib/download_prerequisites')
+  enable_languages = 'c,c++' + (',fortran' if FLAGS.build_fortran else '')
+  vm.RemoteCommand(f'cd {build_dir}/obj.gcc-{gcc_version} && '
+                   f'../gcc-{gcc_version}/configure '
+                   f'--disable-multilib --enable-languages={enable_languages}')
+  # TODO(user): Measure GCC compilation time as a benchmark.
+  vm.RemoteCommand(f'cd {build_dir}/obj.gcc-{gcc_version} && '
+                   f'time make -j {vm.NumCpusForBenchmark()}')
+  vm.RemoteCommand(f'cd {build_dir}/obj.gcc-{gcc_version} && sudo make install')
+  vm.RemoteCommand('sudo rm -rf /usr/bin/gcc && '
+                   'sudo ln -s /usr/local/bin/gcc /usr/bin/gcc')
+  vm.RemoteCommand('sudo rm -rf /usr/bin/g++ && '
+                   'sudo ln -s /usr/local/bin/g++ /usr/bin/g++')
+  if FLAGS.build_fortran:
+    vm.RemoteCommand('sudo rm -rf /usr/bin/gfortran && '
+                     'sudo ln -s /usr/local/bin/gfortran /usr/bin/gfortran')
+
+  if '11' in gcc_version:
+    # https://stackoverflow.com/a/65384705
+    vm.RemoteCommand(
+        f'sudo cp {build_dir}/obj.gcc-{gcc_version}/x86_64-pc-linux-gnu/'
+        'libstdc++-v3/src/.libs/* /usr/lib/x86_64-linux-gnu/',
+        ignore_failure=True)
+    vm.RemoteCommand(
+        f'sudo cp {build_dir}/obj.gcc-{gcc_version}/aarch64-unknown-linux-gnu/'
+        'libstdc++-v3/src/.libs/* /usr/lib/aarch64-linux-gnu/',
+        ignore_failure=True)
+
+
+def GetVersion(vm, pkg):
+  """Get version of package using -dumpversion."""
+  out, _ = vm.RemoteCommand(
+      '{pkg} -dumpversion'.format(pkg=pkg), ignore_failure=True)
+  return out.rstrip()
+
+
+def GetVersionInfo(vm, pkg):
+  """Get compiler version info for package using --version."""
+  out, _ = vm.RemoteCommand(
+      '{pkg} --version'.format(pkg=pkg), ignore_failure=True)
+  # return first line of pkg --version
+  return out.splitlines()[0] if out else None
+
+
+def Reinstall(vm, version: str):
+  """Install specific version of gcc.
+
+  Args:
+    vm: VirtualMachine object.
+    version: string. GCC version.
+  """
+  if 'ubuntu' not in vm.OS_TYPE or FLAGS.force_build_gcc_from_source:
+    BuildGccFromSource(vm, version)
+    logging.info('GCC info: %s', GetVersion(vm, 'gcc'))
+    return
+  vm.Install('ubuntu_toolchain')
+  for pkg in ('gcc', 'gfortran', 'g++'):
+    version_string = GetVersion(vm, pkg)
+    if version in version_string:
+      logging.info('Have expected version of %s: %s', pkg, version_string)
+      continue
+    else:
+      new_pkg = pkg + '-' + version
+      vm.InstallPackages(new_pkg)
+      vm.RemoteCommand('sudo rm -f /usr/bin/{pkg}'.format(pkg=pkg))
+      vm.RemoteCommand('sudo ln -s /usr/bin/{new_pkg} /usr/bin/{pkg}'.format(
+          new_pkg=new_pkg, pkg=pkg))
+      logging.info('Updated version of %s: Old: %s New: %s', pkg,
+                   version_string, GetVersion(vm, pkg))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/collectd.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/collectd.py
new file mode 100644
index 0000000..ae166d0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/collectd.py
@@ -0,0 +1,186 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Builds collectd from source, installs to linux_packages.INSTALL_DIR.
+
+https://collectd.org/
+collectd is extremely configurable. We enable some basic monitoring, gathered
+every 10s, saving in .csv format. See
+perfkitbenchmarker/data/build_collectd.sh.j2 for configuration details.
+"""
+
+import posixpath
+import os
+import re
+import logging
+import csv
+
+from absl import flags
+from perfkitbenchmarker import data
+
+flags.DEFINE_string('collectd_depend', None,
+                    'list collectd dependency files required to add plugin in configuration file.')
+flags.DEFINE_string('collectd_config', None,
+                    'override the default collectd configuration file.')
+flags.DEFINE_string('collectd_tar', None,
+                    'Path on PKB host from which user can upload collectd package to SUT')
+
+FLAGS = flags.FLAGS
+
+COLLECTD_DIR = '/opt/collectd'
+BUILD_SCRIPT_NAME = 'build_collectd.sh.j2'
+PLUGIN_DIR_NAME = 'collectd_plugins'
+PATCHES_DIR_NAME = 'collectd_patches'
+COLLECTD_URL = ('https://storage.googleapis.com/collectd-tarballs/collectd-5.12.0.tar.bz2')
+BUILD_DIR = posixpath.join(COLLECTD_DIR, 'collectd-build')
+CSV_DIR = posixpath.join(COLLECTD_DIR, 'collectd-csv')
+PREFIX = posixpath.join(COLLECTD_DIR, 'collectd')
+PLUGIN_DIR = posixpath.join(COLLECTD_DIR, PLUGIN_DIR_NAME)
+PATCHES_DIR = posixpath.join(COLLECTD_DIR, PATCHES_DIR_NAME)
+PID_FILE = posixpath.join(PREFIX, 'var', 'run', 'collectd.pid')
+PYTHON_CONFIG = "/usr/bin/python3-config"
+
+
+def _GetAbsPath(path):
+  absPath = os.path.abspath(os.path.expanduser(path))
+  if not os.path.isfile(absPath):
+    raise RuntimeError('File (%s) does not exist.' % path)
+  return absPath
+
+
+def _CollectdInstalled(vm):
+  ret, _ = vm.RemoteCommand(f"test -d {COLLECTD_DIR} && echo collectd_installed || echo collectd_not_found",
+                            ignore_failure=True)
+  if "collectd_installed" in ret:
+    return True
+  return False
+
+
+def Prepare(vm, benchmark_spec):
+  """Prepares collect on VM."""
+  pass
+
+
+def _Install(vm):
+  # if skip_install flag is true and collectd_dir exist, do nothing
+  if FLAGS.trace_skip_install and _CollectdInstalled(vm):
+    logging.info('Skip installing collectd')
+    return
+
+  # first clean up the existing collectd_dir
+  vm.RemoteCommand(f"sudo rm -rf {COLLECTD_DIR}")
+  vm.RemoteCommand(f"sudo mkdir -p {COLLECTD_DIR}; sudo chown -R {vm.user_name} {COLLECTD_DIR}")
+  # copy config file
+  if FLAGS.collectd_config:
+    configFile_path = _GetAbsPath(FLAGS.collectd_config)
+  else:
+    configFile_path = data.ResourcePath('collectd.conf')
+  vm.RemoteCopy(configFile_path, COLLECTD_DIR, True)
+  # install user-defined package dependencies
+  if FLAGS.collectd_depend:
+    dependFile_path = _GetAbsPath(FLAGS.collectd_depend)
+  else:
+    dependFile_path = data.ResourcePath('collectdDepend.txt')
+  # this file may not be there or it could be empty
+  if posixpath.exists(dependFile_path) and posixpath.getsize(dependFile_path) > 0:
+    with open(dependFile_path) as fh:
+      for line in fh:
+        if re.match(r"^\w+", line):
+          vm.InstallPackages(line)
+  # upload patches
+  vm.RemoteCommand('bash -c "mkdir -p {0}"'.format(PATCHES_DIR))
+  patches_path = data.ResourcePath(PATCHES_DIR_NAME)
+  for file in os.listdir(patches_path):
+    vm.RemoteCopy(posixpath.join(patches_path, file), posixpath.join(PATCHES_DIR, file), True)
+  # upload plugins
+  vm.RemoteCommand('bash -c "mkdir -p {0}"'.format(PLUGIN_DIR))
+  plugins_path = data.ResourcePath(PLUGIN_DIR_NAME)
+  for file in os.listdir(plugins_path):
+    vm.RemoteCopy(posixpath.join(plugins_path, file), posixpath.join(PLUGIN_DIR, file), True)
+
+  collectd_tar_package = posixpath.join(COLLECTD_DIR, 'collectd.tar.bz2')
+  if FLAGS.collectd_tar:
+    vm.RemoteCopy(data.ResourcePath(FLAGS.collectd_tar), '{0}'.format(collectd_tar_package))
+  else:
+    vm.RemoteCommand("curl -L {0} --output {1}".format(COLLECTD_URL, collectd_tar_package))
+
+  if vm.OS_TYPE == "ubuntu2004":
+     python_config = PYTHON_CONFIG + " --embed"
+  else:
+     python_config = PYTHON_CONFIG
+
+  # build collectd
+  context = {
+      'collectd_package': collectd_tar_package,
+      'build_dir': BUILD_DIR,
+      'root_dir': PREFIX,
+      'parent_dir': COLLECTD_DIR,
+      'plugin_dir': PLUGIN_DIR,
+      'patches_dir': PATCHES_DIR,
+      'config_depd_file': os.path.basename(dependFile_path),
+      'config_file': os.path.basename(configFile_path),
+      'python_config': python_config}
+  remote_path = posixpath.join(
+      COLLECTD_DIR,
+      posixpath.splitext(posixpath.basename(BUILD_SCRIPT_NAME))[0])
+  vm.RenderTemplate(data.ResourcePath(BUILD_SCRIPT_NAME),
+                    remote_path, context=context)
+  vm.RemoteCommand('bash ' + remote_path)
+
+
+def _Uninstall(vm):
+  pass
+
+
+def Start(vm):
+  vm.RemoteCommand("%s/sbin/collectd -t" % PREFIX)  # exception will be thrown if this fails
+  vm.RemoteCommand("%s/sbin/collectd" % PREFIX)
+
+
+def Stop(vm):
+  vm.RemoteCommand('kill -9 $(cat {0})'.format(PID_FILE), ignore_failure=True)
+
+
+def YumInstall(vm):
+  """Installs collectd on 'vm'."""
+  vm.InstallPackages('libtool-ltdl-devel python3-devel python3-pip curl')
+  vm.InstallPackageGroup('Development Tools')
+  _Install(vm)
+
+
+def AptInstall(vm):
+  """Installs collectd on 'vm'."""
+  pkg_list = "autoconf bison flex libtool pkg-config build-essential python3 python3-dev curl"
+  vm.InstallPackages(pkg_list)
+  _Install(vm)
+
+
+def AptUninstall(vm):
+  """Stops collectd on 'vm'."""
+  _Uninstall(vm)
+
+
+def YumUninstall(vm):
+  """Stops collectd on 'vm'."""
+  _Uninstall(vm)
+
+
+def SwupdInstall(vm):
+  """Installs collectd on 'vm'."""
+  vm.InstallPackages('os-testsuite-phoronix-server')
+  _Install(vm)
+
+
+def SwupdUninstall(vm):
+  """Stops collectd on 'vm'."""
+  _Uninstall(vm)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/compiler.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/compiler.py
new file mode 100644
index 0000000..c96b90a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/compiler.py
@@ -0,0 +1,30 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Module containing compiler installation and cleanup functions."""
+
+import logging
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+
+def Install(vm):
+    try:
+      vm.Install('{}'.format(FLAGS.compiler))
+    except:
+      """ If we can't figure it out, use gcc """
+      logging.warn('Unable to determine desired compiler, using GCC instead!')
+      vm.Install('gcc')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/containerd.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/containerd.py
new file mode 100644
index 0000000..d37bf50
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/containerd.py
@@ -0,0 +1,33 @@
+from perfkitbenchmarker.linux_packages import proxy
+from absl import flags
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("containerd_version", "1.5",
+                    "Specify the containerd version")
+
+CONFIG_FILE = "/etc/containerd/config.toml"
+
+
+def YumInstall(vm):
+  raise Exception("Not Implemented")
+
+
+def AptInstall(vm):
+  vm.AptUpdate()
+  version, _ = vm.RemoteCommand(f"sudo apt-cache madison containerd | grep {FLAGS.containerd_version} | cut -f2 -d'|' | tr -d ' ' | sort -V -r | head -n 1")
+  version = version.strip()
+  vm.InstallPackages(f'containerd={version}')
+  _ConfigureContainerd(vm)
+
+
+def _ConfigureContainerd(vm):
+  vm.RemoteCommand(f"sudo mkdir -p $(dirname {CONFIG_FILE})")
+  vm.RemoteCommand(f"containerd config default | sudo tee {CONFIG_FILE}")
+  vm.RemoteCommand(f"sudo sed -i 's/SystemdCgroup = .*/SystemdCgroup = true/' {CONFIG_FILE}")
+  proxy.AddProxy(vm, "containerd")
+  vm.RemoteCommand(f"sudo systemctl daemon-reload")
+  vm.RemoteCommand(f"sudo systemctl restart containerd")
+
+
+def Uninstall(vm):
+  pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_auth.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_auth.py
new file mode 100644
index 0000000..898da2e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_auth.py
@@ -0,0 +1,38 @@
+from absl import flags
+import posixpath
+import tempfile
+import logging
+import json
+import os
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean('docker_auth_reuse', False, 'SUT reuses the same docker auth info.')
+flags.DEFINE_string('docker_auth_local_path', '~/.docker/config.json', 'The docker config file local path')
+flags.DEFINE_string('docker_auth_remote_path', '.docker/config.json', 'The docker config file remote path')
+
+SECRET_NAME = "my-registry-secret"
+
+def CopyDockerConfig(vm):
+  if FLAGS.docker_auth_reuse:
+    try:
+      with open(os.path.expanduser(FLAGS.docker_auth_local_path),'r') as fdr:
+        auths = json.load(fdr)["auths"]
+    except Exception as e:
+      logging.warning("Exception: %s", str(e))
+      return
+
+    if auths:
+      handle, local_path = tempfile.mkstemp()
+      with os.fdopen(handle, "w") as fdw:
+        fdw.write(json.dumps({"auths": auths}) + "\n")
+      remote_path = f"/home/{vm.user_name}/{FLAGS.docker_auth_remote_path}"
+      vm.RemoteCommand("mkdir -p {}".format(posixpath.dirname(remote_path)))
+      vm.PushFile(local_path, remote_path)
+      os.unlink(local_path)
+
+
+def InstallImagePullSecret(vm, namespace):
+  if FLAGS.docker_auth_reuse:
+    remote_path = f"/home/{vm.user_name}/{FLAGS.docker_auth_remote_path}"
+    vm.RemoteCommand(f"kubectl create secret docker-registry {SECRET_NAME} --from-file=.dockerconfigjson={remote_path} -n {namespace}", ignore_failure=True)
+    vm.RemoteCommand(f"kubectl patch serviceaccount default -p '{{\"imagePullSecrets\": [{{\"name\": \"{SECRET_NAME}\"}}]}}' -n {namespace}", ignore_failure=True)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_ce.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_ce.py
new file mode 100644
index 0000000..9043d32
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/docker_ce.py
@@ -0,0 +1,89 @@
+
+from perfkitbenchmarker.linux_packages import proxy
+from absl import flags
+import json
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('docker_dist_repo', None,
+                    'Path to the dockerce repository.')
+flags.DEFINE_string('docker_version', '20.10',
+                  'Specify the docker version.')
+flags.DEFINE_list('docker_registry_mirrors', [],
+                  'Specify the docker mirrors.')
+
+
+def YumInstall(vm):
+  repo = FLAGS.docker_dist_repo if FLAGS.docker_dist_repo else "https://download.docker.com/linux/centos"
+  vm.InstallPackages("yum-utils device-mapper-persistent-data lvm2")
+  # Package for RHEL8 containerd.io does not yet exist - this is a workaround
+  if vm.OS_TYPE == "centos8" or vm.OS_TYPE == "rhel8":
+    cmd = "sudo yum install -y " + repo + "/7/x86_64/stable/Packages/containerd.io-1.2.6-3.3.el7.x86_64.rpm"
+    cmd += " && sudo yum-config-manager --add-repo " + repo + "/docker-ce.repo"
+  else:
+    cmd = 'sudo yum-config-manager --add-repo ' + repo + '/docker-ce.repo'
+  vm.RemoteCommand(cmd)
+  vm.InstallPackages('docker-ce')
+
+  proxy.AddProxy(vm, "docker")
+  AddConfig(vm)
+  _AddUserToDockerGroup(vm)
+
+
+def AptInstall(vm):
+  repo = FLAGS.docker_dist_repo if FLAGS.docker_dist_repo else "https://download.docker.com/linux/ubuntu"
+  vm.InstallPackages("apt-transport-https ca-certificates curl gnupg-agent software-properties-common")
+  vm.RemoteCommand(f'curl -fsSL {repo}/gpg | sudo apt-key add -')
+  vm.RemoteCommand(f"bash -c 'sudo -E add-apt-repository \"deb [arch=$(dpkg --print-architecture)] {repo} $(grep CODENAME /etc/lsb-release | cut -f2 -d=) stable\"'")
+  vm.AptUpdate()
+  version, _ = vm.RemoteCommand(f"sudo apt-cache madison docker-ce | grep {FLAGS.docker_version} | cut -f2 -d'|' | tr -d ' ' | sort -V -r | head -n 1")
+  vm.InstallPackages(f'docker-ce={version.strip()} --allow-change-held-packages')
+
+  proxy.AddProxy(vm, "docker")
+  AddConfig(vm)
+  _AddUserToDockerGroup(vm)
+
+
+def SwupdInstall(vm):
+  vm.RemoteCommand("sudo swupd update")
+  vm.InstallPackages("containers-basic")
+
+  proxy.AddProxy(vm, "docker")
+  AddConfig(vm)
+  _AddUserToDockerGroup(vm)
+
+
+def AddConfig(vm, config={}):
+  config["exec-opts"] = ["native.cgroupdriver=systemd"]
+  if FLAGS.docker_registry_mirrors:
+    config["registry-mirrors"] = FLAGS.docker_registry_mirrors
+
+  vm.RemoteCommand(f"echo '{json.dumps(config)}' | sudo tee /etc/docker/daemon.json")
+  vm.RemoteCommand(f"sudo systemctl daemon-reload")
+  vm.RemoteCommand(f"sudo systemctl restart docker")
+  
+  
+def _AddUserToDockerGroup(vm):
+  """
+  Add user to the docker group so docker commands can be executed without sudo
+  """
+  vm.RemoteCommand("sudo usermod --append --groups docker {}".format(vm.user_name))
+  vm.RemoteCommand("sudo systemctl restart docker")
+
+  # SSH uses multiplexing to reuse connections without going through the SSH handshake
+  # for a remote host. Typically we need to logout / login after adding the user to
+  # the docker group as group memberships are evaluated at login.
+  # See: https://docs.docker.com/engine/install/linux-postinstall/
+  # This requirement along with the multiplexing causes subsequent docker commands run in the
+  # reused session to fail with "permission denied" errors.
+  # This command will cause the ssh multiplexing for this particular VM to stop causing the next
+  # SSH command to the VM to restart a multiplex session with ControlMaster=auto. This new session
+  # will start with docker group membership and will be able to execute docker commands without root.
+  vm.RemoteCommand('', ssh_args = ['-O', 'stop'])
+
+
+def IsDocker(vm):
+  return "docker_ce" in vm._installed_packages
+
+
+def Uninstall(vm):
+  pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/emon.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/emon.py
new file mode 100644
index 0000000..1a85d01
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/emon.py
@@ -0,0 +1,413 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Builds and install emon from source.
+"""
+
+import posixpath
+import os
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import data
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import vm_util
+try:
+  from perfkitbenchmarker.linux_packages import intel_s3_transfer
+except:
+  intel_s3_transfer = None
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+
+flags.DEFINE_string('emon_tarball', None,
+                    'Optional, path to emon package. eg --emon_tarball=/tmp/sep_private_5_19_linux_07062101c5153a9.tar.bz2')
+flags.DEFINE_string('edp_events_file', None,
+                    'Optional, path to edp event list. present in config/edp')
+flags.DEFINE_boolean('emon_post_process_skip', False,
+                     'Optional, no post processing will be done if supplied')
+flags.DEFINE_enum('edp_script_type', 'python3',
+                  ['ruby', 'python3'], 'Optional, default is python3. eg --edp_script_type=ruby if need to use ruby script')
+flags.DEFINE_string('edp_config_file', None,
+                    'Optional, path to EDP config. eg --edp_config_file=/tmp/edp_config.txt')
+flags.DEFINE_boolean('edp_publish', False,
+                     'Optional, EDP csv files will be published to zip file if provided and --intel-publish is applied')
+flags.DEFINE_boolean('emon_debug', False,
+                     'Optional, for debugging EMON driver, build, collection, and post processing. eg --emon_debug')
+flags.DEFINE_enum('emon_package_version', '5_34_linux_050122015feb2b5',
+                  ['5_29_linux_09162200a7108a4', '5_33_linux_0316081130eb678', '5_34_linux_050122015feb2b5'],
+                  'Specify the internal emon version')
+FLAGS = flags.FLAGS
+
+
+UBUNTU_PKGS = ["linux-headers-`uname -r`", "build-essential"]
+RHEL_PKGS = ["kernel-devel"]
+EMON_SOURCE_TARBALL_DEFAULT = 'sep_private_linux_pkb.tar.bz2'
+EMON_SOURCE_TARBALL_DEFAULT_LOCATION_S3_BUCKET = 'emon'
+EMON_MAIN_DIR = '/opt/emon'
+EMON_INSTALL_DIR = '/opt/emon/emon_files'
+EMON_RESULT_TARBALL = 'emon_result.tar.gz'
+EMON_EDP_TARBALL = 'emon_edp.tar.gz'
+PKB_RUBY_FILE = '{0}/pkb_ruby_file'.format(EMON_MAIN_DIR)
+PKB_POSTPROCESS_FILE = '{0}/pkb_postprocess_packages_file'.format(EMON_MAIN_DIR)
+
+
+def _GetEmonTarball():
+  if FLAGS.emon_package_version:
+    return "sep_private_" + FLAGS.emon_package_version + ".tar.bz2"
+  else:
+    return EMON_SOURCE_TARBALL_DEFAULT
+
+
+def _GetAbsPath(path):
+  absPath = os.path.abspath(os.path.expanduser(path))
+  if not os.path.isfile(absPath):
+    raise RuntimeError('File (%s) does not exist.' % path)
+
+  return absPath
+
+
+def _TransferEMONTarball(vm):
+  # get emon_tarball file name
+  emon_tarball = _GetEmonTarball()
+
+  if FLAGS.emon_tarball:
+    logging.info("Copying local emon tarball ({}) to remote SUT location ({})"
+                 .format(FLAGS.emon_tarball, INSTALL_DIR))
+    tarFile_path = _GetAbsPath(FLAGS.emon_tarball)
+    _, emon_tarball = os.path.split(FLAGS.emon_tarball)
+    vm.RemoteCopy(tarFile_path, INSTALL_DIR, True)
+  else:
+    download_success = False
+    s3_image_path = posixpath.join(EMON_SOURCE_TARBALL_DEFAULT_LOCATION_S3_BUCKET, emon_tarball)
+    target = posixpath.join(INSTALL_DIR, emon_tarball)
+    download_success = intel_s3_transfer.GetFileFromS3(vm, s3_image_path, target) if intel_s3_transfer else None
+
+    if not download_success:
+      raise RuntimeError(f'Failed to download EMON tarball ({emon_tarball}). Quit!')
+
+  return emon_tarball
+
+
+def _DoSanityCheck(vm):
+  """do a sanity check first"""
+  logging.info("Doing EMON sanity check")
+  sub_dir = CheckIfExternalVersion(vm)
+  cmds = ['source {0}/{1}/sep_vars.sh'.format(EMON_INSTALL_DIR, sub_dir),
+          'emon -v > {0}/emon-v.dat'.format(INSTALL_DIR),
+          'emon -M > {0}/emon-M.dat'.format(INSTALL_DIR)]
+  cmd = ' ; '.join(cmds)
+  vm.RemoteCommand("bash -c '{}'".format(cmd))
+  emon_v_dat = posixpath.join(INSTALL_DIR, 'emon-v.dat')
+  emon_M_dat = posixpath.join(INSTALL_DIR, 'emon-M.dat')
+  logging.info("checking the contents in sanity checking output files ({}) and ({})".format(emon_M_dat, emon_v_dat))
+  wc_M, stderr_M, ret_M = vm.RemoteCommandWithReturnCode('wc -c {}'.format(emon_M_dat), ignore_failure=False)
+  wc_v, stderr_v, ret_v = vm.RemoteCommandWithReturnCode('wc -c {}'.format(emon_v_dat), ignore_failure=False)
+  if ret_M != 0 or ret_v != 0:
+    logging.info("Failed to collect emon sanity checking data ({}) and data ({}) with stderr ({}) and ({})"
+                 .format(emon_M_dat, emon_v_dat, stderr_M, stderr_v))
+    raise RuntimeError('EMON sanity check failed, quit!')
+  else:
+    # check the str return with num > 0 from wc_M and wc_v
+    # "wc -c /opt/pkb/emon-M.dat" ==> "4255 /opt/pkb/emon-M.dat"
+    # sample output: wc_M = '4255 /opt/pkb/emon-M.dat'
+    # split into an array with multiple strings separated by space,
+    # and get the first string, which is 4255 before converting it to int
+    # if that int is zero, we didn't get any output
+    if int(wc_M.split()[0]) <= 0 or int(wc_v.split()[0]) <= 0:
+      err_str = ('EMON sanity check failed with invalid output '
+                 ' in ({}) and/or ({}), quit!').format(emon_M_dat, emon_v_dat)
+      raise RuntimeError(err_str)
+
+
+def GetEmonVersion(vm):
+  sub_dir = 'sep'
+  if FLAGS.emon_tarball:
+     _, emon_version = os.path.split(FLAGS.emon_tarball)
+  else:
+    tar_file = vm.RemoteCommand("cd {0} && ls -ld {1}_* | head -n 1 | awk -F' ' '{{print $9}}'"
+                                .format(EMON_MAIN_DIR, sub_dir))[0].rstrip("\n")
+    emon_version = tar_file.split('.tar')[0]
+  return emon_version
+
+
+def GetEDPVersion(vm):
+  sub_dir = CheckIfExternalVersion(vm)
+  edp_version = vm.RemoteCommand("grep 'EDP_VERSION' {0}/{1}/config/edp/edp.rb | head -n 1"
+                                 " | awk -F' ' '{{print $3}}'"
+                                 .format(EMON_INSTALL_DIR, sub_dir))[0].rstrip("\n")
+  return edp_version.strip('"')
+
+
+def CheckIfExternalVersion(vm):
+  use_dir = 'sep'
+  if FLAGS.emon_tarball and 'emon_nda' in FLAGS.emon_tarball:
+    use_dir = 'emon'
+  return use_dir
+
+
+def _GetGroup(vm):
+  group = 'sudo'
+  if "centos" in vm.OS_TYPE:
+    group = 'wheel'
+  return group
+
+
+def _AddUserToGroup(vm, group):
+  vm.RemoteCommand('sudo usermod -g {0} $USER'.format(group))
+  # When we add pkb to the wheel group in CentOS, we need to exit and log in for new shells
+  # to load this new environment.
+  if "centos" in vm.OS_TYPE:
+    if FLAGS.ssh_reuse_connections:
+      vm.RemoteCommand('', ssh_args=['-O', 'stop'])
+
+
+def _InstallCentosKernelDev(vm):
+  mirror_base = 'https://mirrors.portworx.com/mirrors/http/mirror.centos.org/centos/'
+  if os_types.CENTOS7 == vm.OS_TYPE:
+    os_repo = '7'
+  elif os_types.CENTOS8 == vm.OS_TYPE:
+    os_repo = '8'
+  elif os_types.CENTOS_STREAM8 == vm.OS_TYPE:
+    os_repo = '8-stream'
+  else:
+    return False
+  base_arq_pkg = '/BaseOS/x86_64/os/Packages/'
+  kernel_devel = 'kernel-devel-$(uname -r).rpm'
+  vm.InstallPackages('wget')
+  pkg_url = mirror_base + os_repo + base_arq_pkg + kernel_devel
+  _, _, wget_rc = vm.RemoteCommandWithReturnCode('wget {} -O /tmp/{}'.format(pkg_url, kernel_devel),
+                                                 ignore_failure=True)
+  if wget_rc:
+    return False
+
+  _, _, yum_rc = vm.RemoteCommandWithReturnCode('sudo yum -y install /tmp/{}'.format(kernel_devel),
+                                                ignore_failure=True)
+
+  vm.RemoteCommand('rm /tmp/{}'.format(kernel_devel))
+  if yum_rc:
+    return False
+
+  return True
+
+
+def _Install(vm):
+  # check input file exists asap, if supplied from command line as an optional flag
+  # error out if file does not exist
+  if FLAGS.emon_tarball:
+    _GetAbsPath(FLAGS.emon_tarball)
+
+  if FLAGS.edp_events_file is not None:
+    _GetAbsPath(FLAGS.edp_events_file)
+
+  if FLAGS.edp_config_file is not None:
+    _GetAbsPath(FLAGS.edp_config_file)
+
+  """Installs emon on vm."""
+  logging.info("Installing emon")
+
+  # transfer tarball to the SUT
+  emon_tarball = _TransferEMONTarball(vm)
+  # install emon
+  vm.RemoteCommand('sudo rm -rf {0} && sudo mkdir -p {0}'.format(EMON_MAIN_DIR))
+  vm.RemoteCommand('sudo mkdir -p {0}'.format(EMON_INSTALL_DIR))
+
+  vm.RemoteCommand('sudo tar -xf {}/{} -C {} --strip-components=1'
+                   .format(INSTALL_DIR, emon_tarball, EMON_MAIN_DIR))
+
+  sub_dir = CheckIfExternalVersion(vm)
+  group = _GetGroup(vm)
+  _AddUserToGroup(vm, group)
+
+  cmds = ['cd {0}'.format(EMON_MAIN_DIR),
+          './{0}-installer.sh -i -u -C {1} --accept-license -ni -g {2}'
+          .format(sub_dir, EMON_INSTALL_DIR, group)]
+  vm.RemoteCommand(' && '.join(cmds))
+
+  # quick sanity check
+  _DoSanityCheck(vm)
+
+
+def Start(vm):
+  sub_dir = CheckIfExternalVersion(vm)
+  logging.info("Starting emon collection")
+  cmd = ('source {0}/{1}/sep_vars.sh; emon -collect-edp > {2}/emon.dat 2>&1 &'
+         .format(EMON_INSTALL_DIR, sub_dir, INSTALL_DIR))
+  if FLAGS.edp_events_file:
+    edp_event_file_path = _GetAbsPath(FLAGS.edp_events_file)
+    _, edp_event_file_name = os.path.split(FLAGS.edp_events_file)
+    vm.RemoteCopy(edp_event_file_path, INSTALL_DIR, True)
+    cmd = ('source {0}/{1}/sep_vars.sh; cd {2};'
+           'emon -collect-edp edp_file={3} > {2}/emon.dat 2>&1 &'
+           .format(EMON_INSTALL_DIR, sub_dir, INSTALL_DIR, edp_event_file_name))
+
+  stdout, stderr, retcode = vm.RemoteCommandWithReturnCode("bash -c '{}'".format(cmd))
+
+
+def Stop(vm):
+  """Stops emon collection on vm"""
+  logging.info("Stopping emon collection")
+  sub_dir = CheckIfExternalVersion(vm)
+  logging.info("Stopping emon")
+  cmds = ['source {0}/{1}/sep_vars.sh'.format(EMON_INSTALL_DIR, sub_dir),
+          'emon -stop',
+          'sleep 5',
+          'pkill -9 -x emon']
+  stdout, stderr, retcode = vm.RemoteCommandWithReturnCode("bash -c '{}'".format(' ; '.join(cmds)), ignore_failure=True)
+  if not FLAGS.emon_post_process_skip:
+    _PostProcess(vm)
+
+
+def _CheckRubyFile(vm):
+  _, _, retcode = vm.RemoteCommandWithReturnCode('sudo file -f {0}'.format(PKB_RUBY_FILE),
+                                                 ignore_failure=True, suppress_warning=True)
+  if retcode == 0:
+    return True
+  return False
+
+
+def _PostProcessingPackagesExist(vm):
+  _, _, retcode = vm.RemoteCommandWithReturnCode('sudo file -f {0}'.format(PKB_POSTPROCESS_FILE),
+                                                 ignore_failure=True, suppress_warning=True)
+  if retcode == 0:
+    return True
+  return False
+
+
+def _PostProcess(vm):
+  logging.info("Starting emon post processing")
+  if FLAGS.trace_skip_install and _PostProcessingPackagesExist(vm):
+    logging.info("Post processing packages present. Skipping installation")
+  else:
+    if FLAGS.edp_script_type == 'ruby':
+      logging.info("Installing ruby ...")
+      vm.Install('ruby')
+    elif FLAGS.edp_script_type == 'python3':
+      vm.InstallPackages('python3-pip')
+      if 'centos' in vm.OS_TYPE:
+        vm.InstallPackages('python3-devel')
+      vm.RemoteCommand('sudo pip3 install xlsxwriter pandas numpy pytz defusedxml tdigest dataclasses')
+    vm.RemoteCommand('sudo touch {0}'.format(PKB_POSTPROCESS_FILE))
+
+  sub_dir = CheckIfExternalVersion(vm)
+  cmd = 'source {0}/{1}/sep_vars.sh; cd {2};'.format(EMON_INSTALL_DIR, sub_dir, INSTALL_DIR)
+  default_edp_config_file = " {0}/{1}/config/edp/edp_config.txt".format(EMON_INSTALL_DIR, sub_dir)
+
+  if FLAGS.edp_script_type == 'ruby':
+    if _CheckRubyFile(vm):
+      cmd = cmd + 'rvm use ruby-{0}; '.format(FLAGS.ruby_version)
+    cmd = cmd + "emon -process-edp"
+  elif FLAGS.edp_script_type == 'python3':
+    cmd = cmd + "emon -process-pyedp"
+    default_edp_config_file = " {0}/{1}/config/edp/pyedp_config.txt".format(EMON_INSTALL_DIR, sub_dir)
+
+  if FLAGS.edp_config_file:
+    edp_config_file_full_path = _GetAbsPath(FLAGS.edp_config_file)
+    _, edp_config_file_name = os.path.split(FLAGS.edp_config_file)
+    vm.RemoteCopy(edp_config_file_full_path, INSTALL_DIR, True)
+    cmd = cmd + ' {0}'.format(edp_config_file_name)
+  else:
+    cmd = cmd + default_edp_config_file
+  stdout, stderr, retcode = vm.RemoteCommandWithReturnCode("bash -c '{}'".format(cmd))
+
+  if FLAGS.emon_debug:
+    if stdout != '' or stderr != '':
+      logging.info("Emon post process generated stdout ({}) and stderr ({})"
+                   .format(stdout, stderr))
+
+
+def FetchResults(vm):
+  """Copies emon data to PKB host."""
+  logging.info('Fetching emon results')
+
+  # TODO: tag vm with machine category, such as server, client, single_machine
+  # if vm.tag is not None and vm.tag is not '':
+  #  local_dir = os.path.join(vm_util.GetTempDir(), vm.name + '-' + vm.tag + '-emon')
+  #  e.g.: pkb-5c37bc7a-0-client-emon
+  #  e.g.: pkb-5c37bc7a-1-server-emon
+  # else:
+  local_dir = os.path.join(vm_util.GetTempDir(), vm.name + '-emon')
+  # e.g.: pkb-5c37bc7a-0-emon
+  cmd = ['mkdir', '-p', local_dir]
+  vm_util.IssueCommand(cmd)
+  tar_pkgs = [('*emon*', EMON_RESULT_TARBALL)]
+  if not FLAGS.emon_post_process_skip:
+    edp_files = '*edp*.csv'
+    if FLAGS.edp_script_type == 'python3':
+        edp_files += ' summary.xlsx'
+    tar_pkgs.append((edp_files, EMON_EDP_TARBALL))
+    # tar command below will cause an exception if edp fails to generate the output files as expected,
+    # and PKB process will be shutdown by the framework
+    # this is desired if edp post process fails, which could be due to multiple reasons,
+    # such as EMON data corruption, and we should quit
+
+  for remote_output_files, remote_output_tarfile in tar_pkgs:
+    remote_output_tarfile = os.path.join("/tmp", remote_output_tarfile)
+    vm.RemoteCommand('cd {} && sudo -E tar cvzf {} {}'
+                     .format(INSTALL_DIR, remote_output_tarfile, remote_output_files))
+    vm.PullFile(local_dir, remote_output_tarfile)
+
+
+def EmonCleanup(vm):
+  sub_dir = CheckIfExternalVersion(vm)
+  group = _GetGroup(vm)
+  cmds = ['cd {0}'.format(EMON_MAIN_DIR),
+          './{1}-installer.sh  -u -C {0} --accept-license -ni -g {2} > /dev/null 2>&1 &'
+          .format(EMON_INSTALL_DIR, sub_dir, group)]
+  vm.RemoteCommand(' && '.join(cmds))
+  if not FLAGS.emon_post_process_skip:
+    emon_edp_data = posixpath.join(INSTALL_DIR, '*edp*.csv')
+    if FLAGS.edp_script_type == 'python3':
+      emon_edp_data += ' summary.xlsx'
+    vm.RemoteCommand('sudo rm -f {}'.format(emon_edp_data), ignore_failure=False)
+  # rm emon shell scripts and output emon*.dat files
+  emon_all_files = posixpath.join(INSTALL_DIR, '*emon*')
+  vm.RemoteCommand('sudo rm -f {}'.format(emon_all_files), ignore_failure=False)
+
+  # rm entire emon install folder
+  vm.RemoteCommand('sudo rm -fr {}'.format(EMON_MAIN_DIR))
+
+
+def EmonDirsExist(vm):
+  _, _, retVal = vm.RemoteCommandWithReturnCode('test -d {0}'.format(EMON_INSTALL_DIR),
+                                                ignore_failure=True)
+  # They do not exist
+  if retVal != 0:
+    return False
+  return True
+
+
+def YumInstall(vm):
+  vm.InstallPackageGroup('Development Tools')
+  _, _, rc = vm.RemoteCommandWithReturnCode('test -d /usr/src/kernels/$(uname -r)', ignore_failure=True)
+  if rc != 0:
+    pkg_name = "kernel-devel-$(uname -r)"
+    if vm.HasPackage(pkg_name):
+      vm.InstallPackages(pkg_name)
+    elif _InstallCentosKernelDev(vm):
+      pass
+    else:
+      raise Exception("\n Could not find the kernel headers to match your kernel ! Please install it manually \n "
+                      "There are two approaches to solve this :- \n"
+                      "1) Get the kernel details with 'uname-r' and search for kernel headers on "
+                      "CentOS repo and download them.\n"
+                      "2)'sudo yum -y update' and then 'sudo reboot' - Please note that this may \n"
+                      "update other packages on your system which may not be desirable to you\n")
+  vm.RemoteCommand('sudo yum -y install bzip2')
+  _Install(vm)
+
+
+def AptInstall(vm):
+  vm.InstallPackages(' '.join(UBUNTU_PKGS))
+  # since we always install the exact matching kernel headers by UBUNTU_PKGS
+  # there is no need to search for it like in YUM based kernel
+  _Install(vm)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/gcc.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/gcc.py
new file mode 100644
index 0000000..f4178eb
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/gcc.py
@@ -0,0 +1,77 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing gcc installation and cleanup functions."""
+
+""" This is a generic gcc installer and must offer installation
+    methods for various versions of gcc """
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+import logging
+
+FLAGS = flags.FLAGS
+
+
+def _Install(vm):
+  """Installs the gcc package on the VM."""
+  pass
+
+
+def YumInstall(vm):
+  """Installs the gcc package on the VM."""
+  # TODO: Figure out how to install gcc with yum
+  raise NotImplementedError
+
+
+def AptInstall(vm):
+  """ Install gcc from the ppa ubuntu-toolchain repo """
+  # On Ubuntu this is a symlink, save it for uninstall
+  vm.RemoteCommand('test -L /usr/bin/gcc && readlink /usr/bin/gcc > {0}/gcc-symlink-target'.format(INSTALL_DIR),
+                   ignore_failure=True)
+  try:
+    # try first if distributions/versions have gcc-8 in their default repository
+    vm.AptUpdate()
+    vm.RemoteCommand('sudo apt-get -y install {0}-{1}'
+                     .format(FLAGS.compiler, FLAGS.compiler_version))
+  except errors.VirtualMachine.RemoteCommandError:
+    # try again if distributions/versions do NOT have gcc-8 in their default repository
+    vm.RemoteCommand('sudo add-apt-repository ppa:ubuntu-toolchain-r/test -y')
+    vm.AptUpdate()
+    vm.RemoteCommand('sudo apt-get -y install {0}-{1}'
+                     .format(FLAGS.compiler, FLAGS.compiler_version))
+
+  try:
+    vm.RemoteCommand('if [[ -s {0}/gcc-symlink-target ]]; then cat {0}/gcc-symlink-target; fi'.format(INSTALL_DIR))[0].rstrip('\n')
+    if vm.RemoteCommandWithReturnCode('test -L /usr/bin/gcc')[2] == 0:
+      vm.RemoteCommand('sudo rm -f /usr/bin/gcc && sudo ln -s /usr/bin/gcc-{0} /usr/bin/gcc'.format(FLAGS.compiler_version))
+    else:
+      vm.RemoteCommand('sudo ln -s /usr/bin/gcc-{0} /usr/bin/gcc'.format(FLAGS.compiler_version))
+  except errors.VirtualMachine.RemoteCommandError:
+    # Install the distro default for gcc
+    # This should ALWAYS work, unless there are bigger issues
+    logging.warn("Falling back to your distro's default gcc!")
+    vm.InstallPackages('gcc')
+
+
+def SwupdInstall(vm):
+  """ Installs a gcc containing bundle on the Clear Linux VM """
+  raise NotImplementedError
+
+
+def Uninstall(vm):
+  gcc = vm.RemoteCommand('if [[ -s {0}/gcc-symlink-target ]]; then cat {0}/gcc-symlink-target; fi'.format(INSTALL_DIR))[0].rstrip('\n')
+  if gcc != '':
+    vm.RemoteCommand('sudo rm /usr/bin/gcc && sudo ln -s /usr/bin/{0} /usr/bin/gcc'.format(gcc), ignore_failure=True)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/google_cloud_sdk.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/google_cloud_sdk.py
new file mode 100644
index 0000000..c7d24a5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/google_cloud_sdk.py
@@ -0,0 +1,43 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing google cloud sdk installation function."""
+
+import os
+
+from perfkitbenchmarker import vm_util
+
+
+SDK_REPO = 'https://dl.google.com/dl/cloudsdk/release/google-cloud-sdk.tar.gz'
+SDK_DIR = '%s/google-cloud-sdk' % vm_util.VM_TMP_DIR
+SDK_INSTALL_FILE = '%s/install.sh' % SDK_DIR
+GCLOUD_PATH = '%s/bin/gcloud' % SDK_DIR
+GSUTIL_PATH = '%s/bin/gsutil' % SDK_DIR
+KUBECTL_PATH = '%s/bin/kubectl' % SDK_DIR
+
+
+def RunGcloud(vm, cmd):
+  return vm.RemoteCommand('export CLOUDSDK_CORE_DISABLE_PROMPTS=1 && %s %s '
+                          '--project %s --format json' % (GCLOUD_PATH, cmd,
+                                                          vm.project))
+
+
+def Install(vm):
+  """Installs google cloud sdk on the VM."""
+  vm.Install('wget')
+  vm.RemoteCommand('cd {0} && wget {1} && tar xzf {2} && rm {2}'.format(
+      vm_util.VM_TMP_DIR, SDK_REPO, os.path.basename(SDK_REPO)))
+  vm.RemoteCommand('%s --disable-installation-options --usage-report=false '
+                   '--path-update=false --bash-completion=false'
+                   % SDK_INSTALL_FILE)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/habana.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/habana.py
new file mode 100644
index 0000000..983acd5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/habana.py
@@ -0,0 +1,143 @@
+from absl import flags
+from perfkitbenchmarker import os_types
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("habana_version", "1.3.0-499", "Specify the Habana driver version")
+
+TEST_HABANA_YAML = """
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: test-habana
+spec:
+  template:
+    spec:
+      containers:
+      - name: test-habana
+        image: busybox
+        command: [ "true" ]
+        resources:
+          limits:
+            habana.ai/gaudi: 1
+          requests:
+            habana.ai/gaudi: 1
+      restartPolicy: Never
+""".replace('\n', '\\n')
+
+HABANA_REPO = '''[vault]
+name=Habana Vault
+baseurl=https://vault.habana.ai/artifactory/centos/8/8.3
+enabled=1
+gpgcheck=0
+gpgkey=https://vault.habana.ai/artifactory/centos/8/8.3/repodata/repomod.xml.key
+repo_gpgcheck=0'''
+
+CONTAINERD_CONFIG = '''disabled_plugins = []
+version = 2
+
+   [plugins]
+   [plugins."io.containerd.grpc.v1.cri"]
+      [plugins."io.containerd.grpc.v1.cri".containerd]
+         default_runtime_name = "habana"
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
+         [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.habana]
+            runtime_type = "io.containerd.runc.v2"
+            [plugins."io.containerd.grpc.v1.cri".containerd.runtimes.habana.options]
+               BinaryName = "/usr/bin/habana-container-runtime"
+   [plugins."io.containerd.runtime.v1.linux"]
+      runtime = "habana-container-runtime"'''
+
+DOCKER_DAEMON = '''{
+   "default-runtime": "habana",
+   "runtimes": {
+      "habana": {
+            "path": "/usr/bin/habana-container-runtime",
+            "runtimeArgs": []
+      }
+   }
+}'''
+
+
+def RegisterKubernetesPlugins(vm):
+  vm.RemoteCommand("kubectl apply -f https://vault.habana.ai/artifactory/docker-k8s-device-plugin/habana-k8s-device-plugin.yaml")
+  vm.RemoteCommand(f"printf '{TEST_HABANA_YAML}' | kubectl apply -f -")
+  vm.RemoteCommand("timeout 300s bash -c 'until kubectl wait job test-habana --for=condition=complete; do sleep 1s; done'", ignore_failure=True)
+  vm.RemoteCommand(f"printf '{TEST_HABANA_YAML}' | kubectl delete -f -")
+
+
+def RegisterWithContainerD(vm):
+  vm.RemoteCommand(f"echo '{CONTAINERD_CONFIG}' | sudo tee /etc/containerd/config.toml")
+  vm.RemoteCommand("sudo systemctl restart containerd")
+  vm.RemoteCommand("sleep 5s")
+  vm.RemoteCommand("sudo systemctl restart kubelet")
+  vm.RemoteCommand("sleep 5s")
+
+
+def RegisterWithDocker(vm):
+  vm.RemoteCommand(f"echo '{DOCKER_DAEMON}' | sudo tee /etc/docker/daemon.json")
+  vm.RemoteCommand("sudo systemctl restart docker")
+  vm.RemoteCommand("sleep 5s")
+
+
+def _InstallCentosKernelDev(vm):
+  mirror_base = 'https://mirrors.portworx.com/mirrors/http/mirror.centos.org/centos/'
+  if os_types.CENTOS7 == vm.OS_TYPE:
+    os_repo = '7'
+  elif os_types.CENTOS8 == vm.OS_TYPE:
+    os_repo = '8'
+  elif os_types.CENTOS_STREAM8 == vm.OS_TYPE:
+    os_repo = '8-stream'
+  else:
+    return False
+
+  base_arq_pkg = '/BaseOS/x86_64/os/Packages/'
+  kernel_devel = 'kernel-devel-$(uname -r).rpm'
+  vm.InstallPackages('wget')
+  pkg_url = mirror_base + os_repo + base_arq_pkg + kernel_devel
+  _, _, wget_rc = vm.RemoteCommandWithReturnCode('wget {} -O /tmp/{}'.format(pkg_url, kernel_devel),
+                                                 ignore_failure=True)
+  if wget_rc:
+    return False
+
+  _, _, yum_rc = vm.RemoteCommandWithReturnCode('sudo yum -y install /tmp/{}'.format(kernel_devel),
+                                                ignore_failure=True)
+  vm.RemoteCommand('rm /tmp/{}'.format(kernel_devel))
+
+  if yum_rc:
+    return False
+
+  return True
+
+
+def YumInstall(vm):
+  if vm.OS_TYPE != os_types.CENTOS_STREAM8:
+    raise Exception("Only CentOS 8 Stream is supported!")
+
+  # Install kernel devel for CentOS
+  if not _InstallCentosKernelDev(vm):
+    raise Exception("Failed to install kernel-devel for CentOS")
+
+  habana_centos_version = FLAGS.habana_version + ".el8"
+  vm.RemoteCommand(f"echo '{HABANA_REPO}' | sudo tee /etc/yum.repos.d/Habana-Vault.repo")
+  vm.RemoteCommand("sudo yum makecache")
+  vm.InstallPackages("--enablerepo=extras epel-release")
+  vm.InstallPackages("dkms habanalabs-firmware-{0} habanalabs-firmware-tools-{0}".
+                   format(habana_centos_version))
+  vm.RemoteCommand("sudo yum install -y habanalabs-{0} habanalabs-graph-{0} habanalabs-container-runtime-{0}".
+                   format(habana_centos_version))
+  vm.RemoteCommand("sudo modprobe habanalabs_en")
+  vm.RemoteCommand("sudo modprobe habanalabs")
+
+
+def AptInstall(vm):
+  vm.RemoteCommand('curl -X GET https://vault.habana.ai/artifactory/api/gpg/key/public | sudo apt-key add - && echo "deb https://vault.habana.ai/artifactory/debian $(grep VERSION_CODENAME= /etc/os-release | cut -f2 -d=) main" | sudo tee -a /etc/apt/sources.list.d/artifactory.list > /dev/null && sudo dpkg --configure -a')
+  vm.AptUpdate()
+  vm.InstallPackages("dkms libelf-dev")
+  vm.InstallPackages("habanalabs-firmware={0} habanalabs-firmware-tools={0}".format(FLAGS.habana_version))
+  vm.InstallPackages("--allow-downgrades habanalabs-thunk={0} habanalabs-dkms={0} habanalabs-graph={0} habanalabs-container-runtime={0}".format(FLAGS.habana_version))
+  vm.RemoteCommand("sudo modprobe habanalabs_en")
+  vm.RemoteCommand("sudo modprobe habanalabs")
+
+
+def Uninstall(vm):
+  pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/hadoop.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/hadoop.py
new file mode 100644
index 0000000..c62b0f0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/hadoop.py
@@ -0,0 +1,406 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing Hadoop installation and cleanup functions.
+
+For documentation of commands to run at startup and shutdown, see:
+http://hadoop.apache.org/docs/current/hadoop-project-dist/hadoop-common/ClusterSetup.html#Hadoop_Startup
+"""
+import functools
+import logging
+import os
+import posixpath
+import re
+import time
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import regex_util
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import aws_credentials
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('hadoop_version', '3.3.1', 'Version of Hadoop.')
+flags.DEFINE_string('hadoop_bin_url', None,
+                    'Specify to override url from HADOOP_URL_BASE.')
+
+DATA_FILES = [
+    'hadoop/core-site.xml.j2', 'hadoop/yarn-site.xml.j2',
+    'hadoop/hdfs-site.xml', 'hadoop/mapred-site.xml.j2',
+    'hadoop/hadoop-env.sh.j2', 'hadoop/workers.j2'
+]
+START_HADOOP_SCRIPT = 'hadoop/start-hadoop.sh.j2'
+
+HADOOP_URL_BASE = 'https://downloads.apache.org/hadoop/common'
+
+HADOOP_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'hadoop')
+HADOOP_BIN = posixpath.join(HADOOP_DIR, 'bin')
+HADOOP_SBIN = posixpath.join(HADOOP_DIR, 'sbin')
+HADOOP_CONF_DIR = posixpath.join(HADOOP_DIR, 'etc', 'hadoop')
+HADOOP_PRIVATE_KEY = posixpath.join(HADOOP_CONF_DIR, 'hadoop_keyfile')
+HADOOP_URL = 'https://archive.apache.org/dist/hadoop/core/hadoop-{0}/hadoop-{0}.tar.gz'
+HADOOP_LOCAL_SCRATCH = posixpath.join(vm_util.VM_TMP_DIR, 'local_scratch', 'hadoop')
+
+PACKAGE_NAME = 'hadoop'
+PREPROVISIONED_DATA = {
+    'hadoop-{0}.tar.gz'.format('2.9.2'):
+        '3d2023c46b1156c1b102461ad08cbc17c8cc53004eae95dab40a1f659839f28a',
+    'hadoop-{0}.tar.gz'.format('3.2.1'):
+        'f66a3a4115b8f16c1077d1a198a06854dbef0e4233291712ed08d0a10629ed37'
+}
+PACKAGE_DATA_URL = {
+    'hadoop-{0}.tar.gz'.format('2.9.2'): HADOOP_URL.format('2.9.2'),
+    'hadoop-{0}.tar.gz'.format('3.2.1'): HADOOP_URL.format('3.2.1')
+}
+HADOOP_LIB_DIR = posixpath.join(HADOOP_DIR, 'share', 'hadoop', 'common', 'lib')
+HADOOP_TOOLS_DIR = posixpath.join(HADOOP_DIR, 'share', 'hadoop', 'tools', 'lib')
+
+HADOOP_CMD = posixpath.join(HADOOP_BIN, 'hadoop')
+HDFS_CMD = posixpath.join(HADOOP_BIN, 'hdfs')
+YARN_CMD = posixpath.join(HADOOP_BIN, 'yarn')
+
+
+def _GetHadoopURL():
+  """Gets the Hadoop download url based on flags.
+
+  The default is to look for the version `--hadoop_version` to download.
+
+  Returns:
+    The Hadoop download url.
+  """
+
+  return '{0}/hadoop-{1}/hadoop-{1}.tar.gz'.format(HADOOP_URL_BASE,
+                                                   FLAGS.hadoop_version)
+
+
+def CheckPrerequisites():
+  """Verifies that the required resources are present.
+
+  Raises:
+    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
+  """
+  for resource in DATA_FILES + [START_HADOOP_SCRIPT]:
+    data.ResourcePath(resource)
+
+
+def _Install(vm):
+  vm.Install('openjdk')
+  vm.Install('curl')
+  hadoop_url = HADOOP_URL.format(FLAGS.hadoop_version)
+  hadoop_tar = hadoop_url.split('/')[-1]
+  if hadoop_tar not in PREPROVISIONED_DATA:
+    PREPROVISIONED_DATA[hadoop_tar] = ''  # will only work with preprovision_ignore_checksum
+    PACKAGE_DATA_URL[hadoop_tar] = hadoop_url
+  vm.InstallPreprovisionedPackageData(
+      PACKAGE_NAME,
+      [hadoop_tar],
+      linux_packages.INSTALL_DIR
+  )
+  hadoop_remote_path = posixpath.join(linux_packages.INSTALL_DIR, hadoop_tar)
+  # Intel - rather than overwrite, to ensure a pristine Hadoop, remove first then unpack
+  vm.RemoteCommand('test -d {0} && rm -rf {0}; mkdir {0} && tar -C {0} --strip-component=1 -xzf {1}'.format(
+      HADOOP_DIR, hadoop_remote_path))
+
+
+def YumInstall(vm):
+  """Installs Hadoop on the VM."""
+  vm.InstallPackages('snappy')
+  _Install(vm)
+
+
+def AptInstall(vm):
+  """Installs Hadoop on the VM."""
+  libsnappy = 'libsnappy1'
+  if not vm.HasPackage(libsnappy):
+    # libsnappy's name on ubuntu16.04 is libsnappy1v5. Let's try that instead.
+    libsnappy = 'libsnappy1v5'
+  vm.InstallPackages(libsnappy)
+  _Install(vm)
+
+
+def InstallGcsConnector(vm, install_dir=HADOOP_LIB_DIR):
+  """Install the GCS connector for Hadoop, which allows I/O to GCS."""
+  connector_url = ('https://storage.googleapis.com/hadoop-lib/gcs/'
+                   'gcs-connector-hadoop{}-latest.jar'.format(
+                       FLAGS.hadoop_version[0]))
+  vm.RemoteCommand('cd {0} && curl -O {1}'.format(install_dir, connector_url))
+
+
+# Scheduling constants.
+# Give 90% of VM memory to YARN for scheduling.
+# This is roguhly consistent with Dataproc 2.0+
+YARN_MEMORY_FRACTION = 0.9
+# Give 80% of the memory YARN schedules to the JVM Heap space.
+# This is probably conservative on more memory mahcines, but is a traditonal
+# rule of thumb.
+HEAP_MEMORY_RATIO = 0.8
+
+# Schedule slightly more tasks than vCPUs. This was found to be optimal for
+# sorting 240 GB using standard GCE virtual machines with sufficient disk.
+# Using a grid seach.
+# TODO(pclay): Confirm results generalize to larger data sizes.
+MAP_SLOTS_PER_CORE = 1.5
+REDUCE_SLOTS_PER_CORE = 4 / 3
+
+
+def _RenderConfig(vm,
+                  master,
+                  workers,
+                  memory_fraction=YARN_MEMORY_FRACTION,
+                  configure_s3=False,
+                  extra_config={}):
+  """Load Hadoop Condfiguration on VM."""
+  # Use first worker to get worker configuration
+  worker = workers[0]
+  num_workers = len(workers)
+  worker_cores = worker.NumCpusForBenchmark()
+  yarn_memory_mb = int((vm.total_memory_kb / 1024) * memory_fraction)
+  # Reserve 1 GB per worker for AppMaster containers.
+  usable_memory_mb = yarn_memory_mb - 1024
+
+  # YARN generally schedules based on memory (and ignores cores). We invert this
+  # by calculating memory in terms of cores. This means that changing
+  # machine memory will not change scheduling simply change the memory given to
+  # each task.
+  maps_per_node = int(worker_cores * MAP_SLOTS_PER_CORE)
+  map_memory_mb = usable_memory_mb // maps_per_node
+  map_heap_mb = int(map_memory_mb * HEAP_MEMORY_RATIO)
+
+  reduces_per_node = int(worker_cores * REDUCE_SLOTS_PER_CORE)
+  reduce_memory_mb = usable_memory_mb // reduces_per_node
+  reduce_heap_mb = int(reduce_memory_mb * HEAP_MEMORY_RATIO)
+
+  # This property is only used for generating data like teragen.
+  # Divide 2 to avoid tiny files on large clusters.
+  num_map_tasks = maps_per_node * num_workers
+  # This determines the number of reduce tasks in Terasort and is critical to
+  # scale with the cluster.
+  num_reduce_tasks = reduces_per_node * num_workers
+
+  if vm.scratch_disks:
+    # TODO(pclay): support multiple scratch disks. A current suboptimal
+    # workaround is RAID0 local_ssds with --num_striped_disks.
+    scratch_dir = posixpath.join(vm.GetScratchDir(), 'hadoop')
+  else:
+    # Intel change
+    scratch_dir = HADOOP_LOCAL_SCRATCH
+    # End Intel change
+
+  aws_access_key = None
+  aws_secret_key = None
+  optional_tools = None
+  if configure_s3:
+    aws_access_key, aws_secret_key = aws_credentials.GetCredentials()
+    optional_tools = 'hadoop-aws'
+
+  context = {
+      'master_ip': master.internal_ip,
+      'worker_ips': [vm.internal_ip for vm in workers],
+      'scratch_dir': scratch_dir,
+      'worker_vcpus': worker_cores,
+      'hadoop_private_key': HADOOP_PRIVATE_KEY,
+      'user': vm.user_name,
+      'yarn_memory_mb': yarn_memory_mb,
+      'map_memory_mb': map_memory_mb,
+      'map_heap_mb': map_heap_mb,
+      'num_map_tasks': num_map_tasks,
+      'reduce_memory_mb': reduce_memory_mb,
+      'reduce_heap_mb': reduce_heap_mb,
+      'num_reduce_tasks': num_reduce_tasks,
+      'aws_access_key': aws_access_key,
+      'aws_secret_key': aws_secret_key,
+      'optional_tools': optional_tools
+  }
+  context.update(extra_config)
+  for file_name in DATA_FILES:
+    file_path = data.ResourcePath(file_name)
+    if (file_name == 'hadoop/workers.j2' and
+        FLAGS.hadoop_version.split('.')[0] < '3'):
+      file_name = 'hadoop/slaves.j2'
+    remote_path = posixpath.join(HADOOP_CONF_DIR, os.path.basename(file_name))
+    if file_name.endswith('.j2'):
+      vm.RenderTemplate(file_path, os.path.splitext(remote_path)[0], context)
+    else:
+      vm.RemoteCopy(file_path, remote_path)
+
+
+def _GetHDFSOnlineNodeCount(master):
+  cmd = HDFS_CMD + ' dfsadmin -report'
+  stdout = master.RemoteCommand(cmd)[0]
+  avail_str = regex_util.ExtractGroup(r'Live datanodes\s+\((\d+)\):', stdout)
+  return int(avail_str)
+
+
+def _GetYARNOnlineNodeCount(master):
+  cmd = YARN_CMD + ' node -list -all'
+  stdout = master.RemoteCommand(cmd)[0]
+  return len(re.findall(r'RUNNING', stdout))
+
+
+def _WaitForNodes(vm, expected_nodes, GetJoinedNodesFunc):
+  num_tries = 5
+  healthy = False
+  for _ in range(num_tries):
+    logging.info('Sleeping 5s to wait for nodes to join.')
+    time.sleep(5)
+    online_count = GetJoinedNodesFunc(vm)
+    if online_count == expected_nodes:
+      logging.info('Service running on all %d workers', expected_nodes)
+      healthy = True
+      break
+    else:
+      logging.info('Only {0} out of {1} nodes are up. Retrying'.format(
+          online_count, expected_nodes))
+  if not healthy:
+    raise ValueError('Not all nodes running: {0} < {1}'.format(
+        online_count, expected_nodes))
+
+
+def _PopulateHostEntries(vm):
+    hostname = vm.RemoteCommand('hostname')[0]
+    host_entry = '{0} {1}'.format(vm.internal_ip, hostname.rstrip())
+    cmd = "grep -qxF '{0}' {1} || echo '{0}' | sudo tee -a {1}".format(
+        host_entry, '/etc/hosts')
+    vm.RemoteCommand(cmd)
+
+
+def _SetupHosts(vms):
+  vm_util.RunThreaded(lambda vm: _PopulateHostEntries(vm), vms)
+
+
+def StartHadoop(master, workers, start_yarn=True, extra_config={}):
+  vms = [master] + workers
+  context = {'hadoop_dir': HADOOP_DIR,
+             'vm_ips': [vm.internal_ip for vm in vms],
+             'start_yarn': start_yarn}
+
+  # make sure /etc/hosts is properly populated
+  _SetupHosts(vms)
+
+  # HDFS setup and formatting, YARN startup
+  script_path = posixpath.join(HADOOP_DIR, 'start-hadoop.sh')
+  master.RenderTemplate(data.ResourcePath(START_HADOOP_SCRIPT),
+                        script_path, context=context)
+  master.RemoteCommand('bash {0}'.format(script_path), should_log=True)
+  logging.info('Checking HDFS status.')
+  _WaitForNodes(master, len(workers), _GetHDFSOnlineNodeCount)
+
+  if start_yarn:
+    logging.info('Checking YARN status.')
+    _WaitForNodes(master, len(workers), _GetYARNOnlineNodeCount)
+
+
+def CleanHadoopTmp(vm, mountpoints):
+  """Delete Hadoop data from 'vm'."""
+  for _, mountpoint in enumerate(mountpoints):
+    vm.RemoteCommand('rm -rf {0}'.format(
+        posixpath.join(mountpoint, 'hadoop_tmp')))
+
+
+def ConfigureAndStart(master, workers, start_yarn=True, configure_s3=False, extra_config={}):
+  """Configure hadoop on a cluster.
+
+  Args:
+    master: VM. Master VM - will be the HDFS NameNode, YARN ResourceManager.
+    workers: List of VMs. Each VM will run an HDFS DataNode, YARN node.
+    start_yarn: bool. Start YARN and JobHistory server? Set to False if HDFS is
+        the only service required. Default: True.
+    configure_s3: Whether to configure Hadoop to access S3.
+  """
+  vms = [master] + workers
+  fn = functools.partial(
+      _RenderConfig, master=master, workers=workers, configure_s3=configure_s3, extra_config=extra_config)
+  vm_util.RunThreaded(fn, vms)
+
+  master.RemoteCommand("rm -f {0} && ssh-keygen -q -t rsa -N '' -f {0}".format(
+      HADOOP_PRIVATE_KEY))
+
+  public_key = master.RemoteCommand('cat {0}.pub'.format(HADOOP_PRIVATE_KEY))[0]
+
+  def AddKey(vm):
+    vm.RemoteCommand('echo "{0}" >> ~/.ssh/authorized_keys'.format(public_key))
+
+  vm_util.RunThreaded(AddKey, vms)
+
+  context = {
+      'hadoop_dir': HADOOP_DIR,
+      'vm_ips': [vm.internal_ip for vm in vms],
+      'start_yarn': start_yarn
+  }
+
+  # HDFS setup and formatting, YARN startup
+  script_path = posixpath.join(HADOOP_DIR, 'start-hadoop.sh')
+  master.RenderTemplate(
+      data.ResourcePath(START_HADOOP_SCRIPT), script_path, context=context)
+  master.RemoteCommand('bash {0}'.format(script_path), should_log=True)
+
+  logging.info('Sleeping 10s for Hadoop nodes to join.')
+  time.sleep(10)
+
+  logging.info('Checking HDFS status.')
+  hdfs_online_count = _GetHDFSOnlineNodeCount(master)
+  if hdfs_online_count != len(workers):
+    raise ValueError('Not all nodes running HDFS: {0} < {1}'.format(
+        hdfs_online_count, len(workers)))
+  else:
+    logging.info('HDFS running on all %d workers', len(workers))
+
+  if start_yarn:
+    logging.info('Checking YARN status.')
+    yarn_online_count = _GetYARNOnlineNodeCount(master)
+    if yarn_online_count != len(workers):
+      raise ValueError('Not all nodes running YARN: {0} < {1}'.format(
+          yarn_online_count, len(workers)))
+    else:
+      logging.info('YARN running on all %d workers', len(workers))
+  try:
+    StartHadoop(master, workers, start_yarn, extra_config)
+  except:
+    raise Exception("Caught exception while starting Hadoop!")
+
+
+def StopYARN(master):
+  """Stop YARN on all nodes."""
+  master.RemoteCommand(posixpath.join(HADOOP_SBIN, 'stop-yarn.sh'),
+                       ignore_failure=True)
+
+
+def StopHDFS(master):
+  """Stop HDFS on all nodes."""
+  master.RemoteCommand(posixpath.join(HADOOP_SBIN, 'stop-dfs.sh'),
+                       ignore_failure=True)
+
+
+def StopHistoryServer(master):
+  """Stop the MapReduce JobHistory daemon."""
+  master.RemoteCommand('{0} stop historyserver'.format(
+      posixpath.join(HADOOP_SBIN, 'mr-jobhistory-daemon.sh')),
+      ignore_failure=True)
+
+
+def StopAll(master):
+  """Stop HDFS and YARN.
+
+  Args:
+    master: VM. HDFS NameNode/YARN ResourceManager.
+  """
+  StopHistoryServer(master)
+  StopYARN(master)
+  StopHDFS(master)
+
+
+def CleanDatanode(vm):
+  """Delete Hadoop data from 'vm'."""
+  vm.RemoteCommand('rm -rf {0}'.format(
+      posixpath.join(vm.GetScratchDir(), 'hadoop')))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/k8s.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/k8s.py
new file mode 100644
index 0000000..6f1b930
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/k8s.py
@@ -0,0 +1,331 @@
+from absl import flags
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker.linux_packages import docker_ce
+from yaml import safe_load_all, dump_all
+import logging
+import posixpath
+import uuid
+import os
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string("k8s_repo_key_url", "https://packages.cloud.google.com/apt/doc/apt-key.gpg",
+                    "Specify the installation repo GPG key url")
+flags.DEFINE_string("k8s_repo_url", "http://apt.kubernetes.io/",
+                    "Specify the installation repo url")
+flags.DEFINE_string("k8s_version", "1.21",
+                    "Specify the installation repo url")
+flags.DEFINE_string("k8s_nfd_version", "0.10.1",
+                    "Specify the node feature discovery version")
+flags.DEFINE_boolean("k8s_kubevirt", False,
+                     "Specify whether to install kubevirt")
+flags.DEFINE_string("k8s_kubevirt_version", "v0.55.0",
+                    "Specify the kubevert version")
+flags.DEFINE_list("k8s_kubeadm_options", [],
+                    "Specify the kubeadm options")
+flags.DEFINE_list("k8s_nfd_scripts", [],
+                  "Specify any extra node feature discovery scripts")
+flags.DEFINE_list("k8s_image_mirrors", [],
+                  "Specify docker image mirrors")
+flags.DEFINE_enum("k8s_cni", "flannel",
+                  ["flannel", "calico"],
+                  "Specify the CNI")
+flags.DEFINE_string("k8s_flannel_version", "v0.18.1",
+                    "Specify the flannel CNI version")
+flags.DEFINE_string("k8s_calico_version", "v3.23",
+                    "Specify the calico CNI version")
+flags.DEFINE_string("k8s_cni_options", "",
+                    "Specify the CNI options")
+
+
+NFD_SOURCE_D = "/etc/kubernetes/node-feature-discovery/source.d"
+REGISTRY_CERTS_DIR = "registry-certs"
+REGISTRY_YAML = """
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: registry
+  labels:
+    app: registry
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: registry
+  template:
+    metadata:
+      labels:
+        app: registry
+    spec:
+      volumes:
+      - name: cert
+        secret:
+          secretName: registry-cert
+      containers:
+        - image: registry:2
+          name: registry
+          imagePullPolicy: IfNotPresent
+          env:
+          - name: REGISTRY_HTTP_TLS_CERTIFICATE
+            value: "/certs/tls.crt"
+          - name: REGISTRY_HTTP_TLS_KEY
+            value: "/certs/tls.key"
+          ports:
+            - containerPort: 5000
+          volumeMounts:
+          - name: cert
+            mountPath: /certs
+      tolerations:
+      - effect: NoSchedule
+        key: node-role.kubernetes.io/control-plane
+      - effect: NoSchedule
+        key: node-role.kubernetes.io/master
+      nodeSelector:
+        node-role.kubernetes.io/control-plane: ""
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: registry-service
+  labels:
+    app: registry-service
+spec:
+  ports:
+    - port: {HOSTPORT}
+      targetPort: 5000
+  externalIPs:
+    - "{HOSTIP}"
+  selector:
+    app: registry
+""".replace("\n", "\\n")
+POD_NETWORK_CIDR = "10.244.0.0/16"
+
+
+def YumInstall(vm):
+  raise Exception("Not Implemented")
+
+
+def AptInstall(vm):
+  vm.InstallPackages(f'apt-transport-https ca-certificates curl')
+  vm.RemoteCommand(f'curl {FLAGS.k8s_repo_key_url} | sudo apt-key add -')
+  vm.RemoteCommand(f"bash -c 'sudo -E add-apt-repository \"deb [arch=$(dpkg --print-architecture)] {FLAGS.k8s_repo_url} kubernetes-xenial main\"'")
+  vm.AptUpdate()
+  version, _ = vm.RemoteCommand(f"sudo apt-cache madison kubelet | grep {FLAGS.k8s_version} | cut -f2 -d'|' | tr -d ' ' | sort -V -r | head -n 1")
+  version = version.strip()
+  vm.InstallPackages(f'kubeadm={version} kubelet={version} kubectl={version}')
+
+
+def _InstallNFDScripts(vm):
+  for script1 in FLAGS.k8s_nfd_scripts:
+    basename = os.path.basename(script1)
+    remote_file = posixpath.join(INSTALL_DIR, basename)
+    vm.RemoteCopy(script1, remote_file)
+    vm.RemoteCommand(f'sudo mv -f {remote_file} {NFD_SOURCE_D}')
+  vm.RemoteCommand(f'sudo chmod -R a+rx {NFD_SOURCE_D}')
+  vm.RemoteCommand('sudo systemctl restart kubelet')
+
+
+@vm_util.Retry()
+def _RetagImage(vm, image, image_retag):
+  if docker_ce.IsDocker(vm):
+    vm.RemoteCommand(f"sudo -E docker pull {image}")
+    vm.RemoteCommand(f"sudo -E docker tag  {image} {image_retag}")
+  else:
+    vm.RemoteCommand(f"sudo ctr -n k8s.io i pull {image}")
+    vm.RemoteCommand(f"sudo ctr -n k8s.io i tag  {image} {image_retag}")
+
+
+def _PrepareSystem(vm):
+  vm.RemoteCommand("sudo systemctl stop named", ignore_failure=True)
+  vm.RemoteCommand("sudo systemctl disable named", ignore_failure=True)
+
+  vm.RemoteCommand("sudo swapoff -a")
+  vm.RemoteCommand("sudo sed -ri 's/.*swap.*/#&/' /etc/fstab")
+
+  vm.RemoteCommand("sudo modprobe overlay")
+  vm.RemoteCommand("sudo modprobe br_netfilter")
+  vm.RemoteCommand("printf 'overlay\\nbr_netfilter\\n' | sudo tee /etc/modules-load.d/k8s.conf")
+
+  vm.RemoteCommand("printf 'net.bridge.bridge-nf-call-ip6tables=1\\nnet.bridge.bridge-nf-call-iptables=1\\nnet.ipv4.ip_forward=1\\nnet.netfilter.nf_conntrack_max=1000000' | sudo tee /etc/sysctl.d/k8s.conf")
+  vm.RemoteCommand("sudo sysctl --system")
+
+  version = FLAGS.k8s_version.split(".")
+  if int(version[0]) <= 1 and int(version[1]) < 24:
+    vm.Install("docker_ce")
+  else:
+    vm.Install("containerd")
+
+  for i in range(0,len(FLAGS.k8s_image_mirrors),2):
+    _RetagImage(vm, FLAGS.k8s_image_mirrors[i], FLAGS.k8s_image_mirrors[i+1])
+
+  vm.Install("k8s")
+
+
+def _SetCNIOptions():
+  if FLAGS.k8s_cni == "flannel":
+    FLAGS.k8s_kubeadm_options.append(f"--pod-network-cidr={POD_NETWORK_CIDR}")
+  elif FLAGS.k8s_cni == "calico":
+    FLAGS.k8s_kubeadm_options.append(f"--pod-network-cidr={POD_NETWORK_CIDR}")
+ 
+
+def _InstallCNI(vm):
+  if FLAGS.k8s_cni == "flannel":
+    vm.RemoteCommand(f'kubectl create -f https://raw.githubusercontent.com/flannel-io/flannel/{FLAGS.k8s_flannel_version}/Documentation/kube-flannel.yml')
+
+  elif FLAGS.k8s_cni == "calico":
+    if "vxlan" in FLAGS.k8s_cni_options:
+      manifest = f"https://projectcalico.docs.tigera.io/archive/{FLAGS.k8s_calico_version}/manifests/calico-vxlan.yaml"
+      manifest_mod = "-e \"/CALICO_IPV[4|6]POOL_VXLAN/{n;s|\\\"CrossSubnet\\\"|\\\"Always\\\"|}\""
+    else:
+      manifest = f"https://projectcalico.docs.tigera.io/archive/{FLAGS.k8s_calico_version}/manifests/calico.yaml"
+      manifest_mod = ""
+
+    vm.RemoteCommand(f"bash -c 'kubectl apply -f <(curl -o - {manifest} | sed -e \"s|^\\(\\s*\\)env:\\s*$|\\1env:\\n\\1  - name: CALICO_IPV4POOL_CIDR\\n\\1    value: \\\"{POD_NETWORK_CIDR}\\\"\\n\\1  - name: IP_AUTODETECTION_METHOD\\n\\1    value: \\\"can-reach={vm.internal_ip}\\\"|\" {manifest_mod})'")
+
+    _RobustRemoteCommand(vm, "kubectl wait --namespace=kube-system pod --for=condition=ready -l k8s-app=calico-node")
+  else:
+    raise Exception(f"Kubernetes: Unsupported CNI {FLAGS.k8s_cni}")
+
+
+def _InstallNFD(vm, all_vms):
+  _RobustRemoteCommand(vm, f"kubectl apply -k 'https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref=v{FLAGS.k8s_nfd_version}'")
+  _RobustRemoteCommand(vm, "kubectl --namespace=node-feature-discovery wait pod --for=condition=ready -l app=nfd-worker")
+  if FLAGS.k8s_nfd_scripts:
+    vm_util.RunThreaded(lambda vm1: _InstallNFDScripts(vm1), all_vms)
+
+
+def _InstallKubeVirt(vm):
+  if FLAGS.k8s_kubevirt:
+    vm.RemoteCommand(f"kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/{FLAGS.k8s_kubevirt_version}/kubevirt-operator.yaml")
+    vm.RemoteCommand(f"kubectl apply -f https://github.com/kubevirt/kubevirt/releases/download/{FLAGS.k8s_kubevirt_version}/kubevirt-cr.yaml")
+    _RobustRemoteCommand(vm, "kubectl -n kubevirt wait kv kubevirt --for condition=Available")
+
+
+def _UpdateKubeadmConfigFile():
+  cluster_config = {
+    "apiVersion": "kubeadm.k8s.io/v1beta2",
+    "kind": "ClusterConfiguration",
+    "networking": {},
+  }
+  config_file = None
+  options = []
+  for option1 in FLAGS.k8s_kubeadm_options:
+    if option1.startswith("--pod-network-cidr="):
+      _, _, cluster_config["networking"]["podSubnet"] = option1.partition("=")
+    elif option1.startswith("--image-repository="):
+      _, _, cluster_config["imageRepository"] = option1.partition("=")
+    elif option1.startswith("--config="):
+      _, _, config_file = option1.partition("=")
+    else:
+      options.append(option1)
+
+  if config_file:
+    docs = []
+    with open(config_file, "r") as fd:
+      cluster_config_doc = False
+      for doc1 in safe_load_all(fd):
+        if doc1 and "kind" in doc1:
+          if doc1["kind"] == "ClusterConfiguration":
+            doc1.update(cluster_config)
+            cluster_config_doc = True
+          docs.append(doc1)
+      if not cluster_config_doc:
+        docs.append(cluster_config)
+
+    config_yaml = dump_all(docs).replace("\n","\\n").replace('"','\\"')
+    options.append("--config=<(printf \"{}\")".format(config_yaml))
+    FLAGS.k8s_kubeadm_options = options
+
+
+def CreateCluster(vm, workers, taint=True):
+  all_vms = list(set([vm] + workers))
+  vm_util.RunThreaded(lambda vm1: _PrepareSystem(vm1), all_vms)
+
+  _SetCNIOptions()
+
+  _UpdateKubeadmConfigFile()
+  vm.RemoteCommand("sudo bash -c 'kubeadm init "+ ' '.join(FLAGS.k8s_kubeadm_options) + "'")
+  vm.RemoteCommand('mkdir -p $HOME/.kube')
+  vm.RemoteCommand('sudo cp -f /etc/kubernetes/admin.conf $HOME/.kube/config')
+  vm.RemoteCommand("bash -c 'sudo chown $(id -u):$(id -g) $HOME/.kube/config'")
+
+  cmd, _ = vm.RemoteCommand('sudo kubeadm token create --print-join-command')
+  if not cmd.startswith("kubeadm join "):
+    raise Exception(f"Invalid kubeadm command: {cmd}")
+  vm_util.RunThreaded(lambda vm1: _RobustRemoteCommand(vm1, f'sudo {cmd}'), workers)
+
+  _InstallCNI(vm)
+  _RobustRemoteCommand(vm, "kubectl --namespace=kube-system wait pod --all --for=condition=ready")
+
+  _InstallNFD(vm, all_vms)
+  _InstallKubeVirt(vm)
+
+  if not taint:
+    vm.RemoteCommand(f'kubectl taint node --all --overwrite node-role.kubernetes.io/master-')
+    vm.RemoteCommand(f'kubectl taint node --all --overwrite node-role.kubernetes.io/control-plane-')
+    
+
+def _OpenSSLConf(vm):
+  if vm.BASE_OS_TYPE == os_types.DEBIAN:
+    return "/etc/ssl/openssl.cnf"
+  if vm.BASE_OS_TYPE == os_types.RHEL:
+    return "/etc/pki/tls/openssl.conf"
+  raise Exception(f"{vm.BASE_OS_TYPE} Not Supported")
+  
+
+def _CopyCertsToWorker(vm, certs_dir):
+  vm.RemoteCommand(f"mkdir -p {certs_dir}")
+  vm.PushFile(f"{certs_dir}/client.cert", f"{certs_dir}/client.cert")
+
+  if vm.BASE_OS_TYPE == os_types.DEBIAN:
+    vm.RemoteCommand(f"sudo cp -f {certs_dir}/client.cert /usr/local/share/ca-certificates/registry.crt")
+    vm.RemoteCommand("sudo update-ca-certificates")
+  elif vm.BASE_OS_TYPE == os_types.RHEL:
+    vm.RemoteCommand(f"sudo cp -f {certs_dir}/client.cert /etc/pki/ca-trust/source/anchors/registry.crt")
+    vm.RemoteCommand("sudo update-ca-trust")
+  else:
+    raise Exception(f"{vm.BASE_OS_TYPE} not supported")
+
+  if docker_ce.IsDocker(vm):
+    vm.RemoteCommand("sudo systemctl restart docker")
+  else:
+    vm.RemoteCommand("sudo systemctl restart containerd")
+  vm.RemoteCommand("sudo systemctl restart kubelet")
+
+
+def CreateRegistry(vm, workers, port=5000):
+  registry_url = f"{vm.internal_ip}:{port}"
+  certs_dir = vm_util.PrependTempDir(f"{REGISTRY_CERTS_DIR}/{registry_url}")
+
+  vm_util.IssueCommand(["mkdir", "-p", certs_dir])
+  vm_util.IssueCommand(["openssl", "req", "-newkey", "rsa:4096", "-nodes", "-sha256", "-keyout", f"{certs_dir}/client.key", "--addext", f"subjectAltName = IP:{vm.internal_ip}", "-x509", "-days", "365", "-out", f"{certs_dir}/client.cert", "-subj", f"/CN={vm.internal_ip}"])
+  vm_util.IssueCommand(["chmod", "400", f"{certs_dir}/client.key"])
+  #vm_util.IssueCommand(["cp", "-f", f"{certs_dir}/client.cert", f"{certs_dir}/ca.crt"])
+
+  vm_util.RunThreaded(lambda vm1: _CopyCertsToWorker(vm1, certs_dir), list(set(workers+[vm])))
+
+  vm.PushFile(f"{certs_dir}/client.key", f"{certs_dir}/client.key")
+  _RobustRemoteCommand(vm, f"kubectl create secret tls registry-cert --cert={certs_dir}/client.cert --key={certs_dir}/client.key")
+
+  registry_yaml = REGISTRY_YAML.format(HOSTIP=vm.internal_ip, HOSTPORT=port).replace('"', '\\"')
+  vm.RemoteCommand(f"bash -c 'kubectl create -f <(printf \"{registry_yaml}\")'")
+  _RobustRemoteCommand(vm, f"kubectl wait pod --for=condition=ready -l app=registry")
+  return registry_url
+
+
+@vm_util.Retry()
+def _RobustRemoteCommand(vm, cmd):
+  vm.RemoteCommand(cmd)
+
+
+def Uninstall(vm):
+  try:
+    vm.RemoteCommand("sudo kubeadm reset --force")
+  except:
+    vm.RemoteCommand("sudo rm -rf /etc/kubernetes $HOME/.config", ignore_failure=True)
+    vm.RemoteCommand("sudo kubeadm reset --force", ignore_failure=True)
+  vm.RemoteCommand("sudo ip link delete cni0", ignore_failure=True)
+  vm.RemoteCommand("sudo rm -rf /etc/cni /var/lib/cni", ignore_failure=True)
+
+  if FLAGS.k8s_cni == "Flannel":
+    vm.RemoteCommand("sudo ip link delete flannel.1", ignore_failure=True)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/memcached_server.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/memcached_server.py
new file mode 100644
index 0000000..b127f33
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/memcached_server.py
@@ -0,0 +1,151 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Module containing memcached server installation and cleanup functions."""
+
+import logging
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+MEMCACHED_PORT = 11211
+
+flags.DEFINE_integer('memcached_size_mb', 64,
+                     'Size of memcached cache in megabytes.')
+
+flags.DEFINE_integer('memcached_num_threads', 4,
+                     'Number of worker threads.')
+flags.DEFINE_string('memcached_version', '1.6.9',
+                    'Memcached version to use.')
+
+DIR = linux_packages.INSTALL_DIR
+
+
+def _Install(vm):
+  """Installs the memcached server on the VM."""
+  vm.InstallPackages('wget')
+  vm.Install('build_tools')
+  vm.Install('event')
+  vm.RemoteCommand(
+      f'cd {DIR}; '
+      'wget https://www.memcached.org/files/'
+      f'memcached-{FLAGS.memcached_version}.tar.gz --no-check-certificate; '
+      f'tar -zxvf memcached-{FLAGS.memcached_version}.tar.gz; '
+      f'cd memcached-{FLAGS.memcached_version}; '
+      './configure && make && sudo make install')
+
+
+def YumInstall(vm):
+  """Installs the memcache package on the VM."""
+  _Install(vm)
+
+
+def AptInstall(vm):
+  """Installs the memcache package on the VM."""
+  _Install(vm)
+
+
+@vm_util.Retry(poll_interval=5, timeout=300,
+               retryable_exceptions=(errors.Resource.RetryableCreationError))
+def _WaitForServerUp(vm, port=MEMCACHED_PORT):
+  """Block until the memcached server is up and responsive.
+
+  Will timeout after 5 minutes, and raise an exception. Before the timeout
+  expires any exceptions are caught and the status check is retried.
+
+  We check the status of the server by issuing a 'stats' command. This should
+  return many lines of form 'STAT <name> <value>' if the server is up and
+  running.
+
+  Args:
+    vm: VirtualMachine memcached has been installed on.
+    port: int. Memcached port to use.
+
+  Raises:
+    errors.Resource.RetryableCreationError when response is not as expected or
+      if there is an error connecting to the port or otherwise running the
+      remote check command.
+  """
+  address = vm.internal_ip
+
+  logging.info('Trying to connect to memcached at %s:%s', address, port)
+  try:
+    out, _ = vm.RemoteCommand(
+        '(echo -e "stats\n")| netcat -q 1 %s %s' % (address, port))
+    if out.startswith('STAT '):
+      logging.info('memcached server stats received. Server up and running.')
+      return
+  except errors.VirtualMachine.RemoteCommandError as e:
+    raise errors.Resource.RetryableCreationError(
+        'memcached server not up yet: %s.' % str(e))
+  else:
+    raise errors.Resource.RetryableCreationError(
+        'memcached server not up yet. Expected "STAT" but got "%s".' % out)
+
+
+def ConfigureAndStart(vm, port=MEMCACHED_PORT, smp_affinity=False):
+  """Prepare the memcached server on a VM.
+
+  Args:
+    vm: VirtualMachine to install and start memcached on.
+    port: int. Memcached port to use.
+    smp_affinity: Boolean. Whether or not to set smp_affinity.
+  """
+  vm.Install('memcached_server')
+  if smp_affinity:
+    vm.SetSmpAffinity()
+
+  for scratch_disk in vm.scratch_disks:
+    vm.RemoteCommand('sudo umount %s' % scratch_disk.mount_point)
+
+  vm.RemoteCommand(
+      'ulimit -n 32768; '
+      'sudo nohup memcached '
+      # Increase maximum memcached server connections
+      '-u $USER -c 32768 '
+      f'-t {FLAGS.memcached_num_threads} '
+      # update default port
+      f'-p {port} '
+      # update memory size
+      f'-m {FLAGS.memcached_size_mb} '
+      # update security config to allow incoming network
+      '-l 0.0.0.0 -v &> log &')
+
+  _WaitForServerUp(vm, port)
+  logging.info('memcached server configured and started.')
+
+
+def GetVersion(vm):
+  """Returns the version of the memcached server installed."""
+  results, _ = vm.RemoteCommand('memcached -help |grep -m 1 "memcached"'
+                                '| tr -d "\n"')
+  return results
+
+
+def StopMemcached(vm):
+  vm.RemoteCommand('sudo pkill -9 memcached', ignore_failure=True)
+
+
+def FlushMemcachedServer(ip, port):
+  vm_util.IssueCommand(
+      '(echo -e "flush_all\n" ; sleep 1)| netcat %s %s' % (ip, port))
+
+
+def AptUninstall(vm):
+  """Removes the memcache package on the VM."""
+  del vm
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/proxy.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/proxy.py
new file mode 100644
index 0000000..213f7a5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/proxy.py
@@ -0,0 +1,5 @@
+
+def AddProxy(vm, service):
+  vm.RemoteCommand(f'sudo mkdir -p /etc/systemd/system/{service}.service.d')
+  vm.RemoteCommand(f'printf "[Service]\\nEnvironment=\"HTTP_PROXY=$http_proxy\" \"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\\n" | sudo tee /etc/systemd/system/{service}.service.d/proxy.conf')
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/ruby.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/ruby.py
new file mode 100644
index 0000000..1bd4ecc
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/ruby.py
@@ -0,0 +1,54 @@
+from perfkitbenchmarker import errors
+from absl import flags
+import logging
+
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('ruby_version', '2.7',
+                    'Version of ruby to be installed')
+EMON_MAIN_DIR = '/opt/emon'
+
+
+def _Install(vm):
+  cmds = ['curl -sSL https://rvm.io/mpapis.asc | gpg2 --import -',
+          'curl -sSL https://rvm.io/pkuczynski.asc | gpg2 --import -',
+          'curl -L get.rvm.io | bash -s stable',
+          'sed -i "1 i\source $HOME/.rvm/scripts/rvm" ~/.bashrc']
+  vm.RemoteCommand(' && '.join(cmds))
+  cmds = ['rvm reload',
+          'rvm requirements run',
+          'rvm install {0} '.format(FLAGS.ruby_version)]
+  vm.RemoteCommand(' && '.join(cmds))
+
+
+def _CompatibleRubyVersion(vm):
+  installed_ruby_version = vm.RemoteCommand("ruby -v | awk -F' ' '{{print $2}}'")[0].rstrip("\n")
+  # If apt or yum downloads a compatible version,  then don't download rvm
+  logging.info("Installed Version: {}, Version Needed: {}"
+               .format(installed_ruby_version, FLAGS.ruby_version))
+  if FLAGS.ruby_version in installed_ruby_version:
+    return True
+  vm.RemoteCommand('sudo touch {0}/pkb_ruby_file'.format(EMON_MAIN_DIR))
+  return False
+
+
+def YumInstall(vm):
+  """Installs the package on the VM."""
+  vm.RemoteCommand('sudo yum install -y ruby')
+  if not _CompatibleRubyVersion(vm):
+    logging.info("Installing a newer compatible verison of ruby")
+    vm.InstallPackages('gcc-c++ patch readline readline-devel zlib zlib-devel libffi-devel '
+                       'openssl-devel make bzip2 autoconf automake libtool bison sqlite-devel')
+    _Install(vm)
+
+
+def AptInstall(vm):
+  """Installs the package on the VM."""
+  vm.RemoteCommand('sudo apt-get install -y ruby')
+  if not _CompatibleRubyVersion(vm):
+    logging.info("Installing a newer compatible verison of ruby")
+    vm.InstallPackages('gnupg2 curl g++ gcc autoconf automake bison libc6-dev libffi-dev '
+                       'libgdbm-dev libncurses5-dev libsqlite3-dev libtool libyaml-dev make '
+                       'pkg-config sqlite3 zlib1g-dev libgmp-dev libreadline-dev libssl-dev')
+    _Install(vm)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/runwith.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/runwith.py
new file mode 100644
index 0000000..81364bf
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/runwith.py
@@ -0,0 +1,95 @@
+
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+from absl import flags
+import posixpath
+import json
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean("run_with_vm", False,
+                     "Whether to run with VM or docker")
+
+
+def DockerRun(vm, options, image):
+  if FLAGS.run_with_vm:
+    stdout, _ = vm.RemoteCommand("sudo -E docker create {options} {image}".format(options=" ".join(options), image=image))
+    container = stdout.strip()
+
+    stdout, _ = vm.RemoteCommand(f"sudo -E docker container inspect {container}" + " -f '{{json .}}'")
+    inspect = json.loads(stdout)
+
+    root_dir = posixpath.join(INSTALL_DIR, container)
+    vm.RemoteCommand(f"mkdir -p {root_dir}")
+    vm.RemoteCommand(f"sudo -E docker container export {container} | sudo tar xf - -C {root_dir}")
+    for mount1 in inspect["Mounts"]:
+      mount_path = posixpath.join(root_dir, mount1["Destination"][1:])
+      vm.RemoteCommand(f"sudo mkdir -p {mount_path}")
+      vm.RemoteCommand("sudo mount --bind {src} {dst}".format(src=mount1["Source"], dst=mount_path))
+ 
+    mount_path = posixpath.join(root_dir, "proc")
+    vm.RemoteCommand(f"sudo mount -t proc /proc {mount_path}")
+    for mount1 in ["sys", "dev"]:
+      mount_path = posixpath.join(root_dir, mount1)
+      vm.RemoteCommand(f"sudo mount --rbind /{mount1} {mount_path}")
+      vm.RemoteCommand(f"sudo mount --make-rslave {mount_path}")
+  
+    cmds = []
+    for cmd1 in inspect["Config"]["Cmd"]:
+      if " " in cmd1:
+        cmds.append("'{}'".format(cmd1.replace("'", "'\\''")))
+      else:
+        cmds.append(cmd1)
+
+    working_dir = inspect["Config"]["WorkingDir"] if inspect["Config"]["WorkingDir"] else "/"
+    logfile = posixpath.join(root_dir, ".logs")
+    user = inspect["Config"]["User"] if inspect["Config"]["User"] else "root"
+    stdout, _ = vm.RemoteCommand("sudo chroot --userspec={userspec} {root_dir} bash -c 'cd {dir};{envs} {cmd}' > {logfile} 2>&1 & echo $!".format(envs=" ".join(inspect["Config"]["Env"]), root_dir=root_dir, cmd=" ".join(cmds).replace("'", "'\\''"), dir=working_dir, logfile=logfile, userspec=user))
+    pid = stdout.strip()
+    return (container, pid)
+
+  stdout, _ = vm.RemoteCommand("sudo -E docker run --rm -d {options} {image}".format(options=" ".join(options), image=image))
+  container = stdout.strip()
+
+  # set perf cgroup
+  FLAGS.perf_options = FLAGS.perf_options.replace("%container%", f"docker/{container}")
+
+  return (container, None)
+
+
+def DockerWaitForCompletion(vm, container, timeout, logs_file):
+  if FLAGS.run_with_vm:
+    root_dir = posixpath.join(INSTALL_DIR, container)
+    export_logs = posixpath.join(root_dir, "export-logs")
+    vm.RemoteCommand(f"timeout {timeout}s cat {export_logs} > {logs_file}")
+  else:
+    vm.RemoteCommand(f"timeout {timeout}s sudo -E docker exec {container} cat /export-logs > {logs_file}")
+
+
+def DockerLogsCmd(container, flags=""):
+  if FLAGS.run_with_vm:
+    root_dir = posixpath.join(INSTALL_DIR, container)
+    log_file = posixpath.join(root_dir, ".logs")
+    return f"sudo tail {flags} {log_file}"
+
+  return f"sudo -E docker logs {flags} {container}"
+  
+
+def DockerRemove(vm, containers, container_id, pid):
+  if FLAGS.run_with_vm:
+    vm.RemoteCommand(f"sudo kill -9 {pid}", ignore_failure=True)
+
+    stdout, _ = vm.RemoteCommand(f"sudo -E docker container inspect {container_id}" + " -f '{{json .}}'")
+    inspect = json.loads(stdout)
+
+    root_dir = posixpath.join(INSTALL_DIR, container_id)
+    stdout, _ = vm.RemoteCommand(f"sudo mount | cut -f3 -d' ' | sort")
+    last_umounted = "na"
+    for mount_path in stdout.split("\n"):
+      if mount_path.startswith(root_dir + '/') and not mount_path.startswith(last_umounted):
+        vm.RemoteCommand(f"sudo umount -R {mount_path}", ignore_failure=True)
+        last_umounted = mount_path
+      
+    vm.RemoteCommand(f"sudo rm -rf {root_dir}")
+    
+  vm.RemoteCommand("sudo -E docker rm -v -f {}".format(" ".join(containers + [container_id])), ignore_failure = True)
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/skopeo.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/skopeo.py
new file mode 100644
index 0000000..313b726
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/skopeo.py
@@ -0,0 +1,69 @@
+
+from perfkitbenchmarker import vm_util
+#from perfkitbenchmarker.linux_packages import k8s
+from absl import flags
+
+FLAGS = flags.FLAGS
+flags.DEFINE_list("skopeo_insecure_registries", [], 
+                  "Specify the skopeo command line options")
+flags.DEFINE_list("skopeo_sut_accessible_registries", [], 
+                  "Specify the list of registries that the SUT has access.")
+flags.DEFINE_string("skopeo_src_cert_dir", None,
+                  "Specify the source certs directory")
+
+
+def IsSUTAccessible(registry_url):
+  return registry_url in FLAGS.skopeo_sut_accessible_registries
+
+
+def InspectImage(image, registry_url):
+  if registry_url:
+    if image.startswith(registry_url) and not IsSUTAccessible(registry_url):
+      options = []
+      if FLAGS.skopeo_src_cert_dir:
+        options.append(f"--src-cert-dir={FLAGS.skopeo_src_cert_dir}")
+      for r1 in FLAGS.skopeo_insecure_registries:
+        r2 = r1 if r1.endswith("/") else r1 + "/"
+        if image.startswith(r2):
+          options.append("--src-tls-verify=false")
+          break
+      basename = image[len(registry_url):]
+      return (basename, options + [f"docker://{image}"])
+    return None
+
+  try:
+    vm_util.IssueCommand(["skopeo", "inspect", f"docker-daemon:{image}"])
+    return (image, [f"docker-daemon:{image}"])
+  except:
+    pass
+  return None
+
+
+@vm_util.Retry()
+def _RobustSkopeoCopy(cmd):
+  vm_util.IssueCommand(["sudo", "-E", "skopeo", "copy"] + cmd, timeout=None)
+
+
+def CopyImagesToDocker(vm, images, port=12222):
+  daemon_url = f"localhost:{port}"
+  vm.RemoteCommand("", ssh_args = ["-fNL", f"{daemon_url}:/var/run/docker.sock"])
+  for image1 in images:
+    _RobustSkopeoCopy([f"--dest-daemon-host=http://{daemon_url}"] + images[image1][1] + [f"docker-daemon:{image1}"])
+
+  # cancel forwaring
+  vm.RemoteCommand("", ssh_args = ["-O", "exit"])
+
+
+def CopyImagesToRegistry(vm, images, registry_url, port=16666):
+  local_registry_url = f"localhost:{port}"
+  vm.RemoteCommand("", ssh_args = ["-fNL", f"{local_registry_url}:{registry_url}"])
+  
+  #certs_dir = vm_util.PrependTempDir(k8s.REGISTRY_CERTS_DIR)
+  for image1 in images:
+    local_image=f"{local_registry_url}/{images[image1][0]}"
+    #_RobustSkopeoCopy([f"--dest-cert-dir={certs_dir}", "--dest-tls-verify=false"] + images[image1][1] + [f"docker://{local_image}"])
+    _RobustSkopeoCopy(["--dest-tls-verify=false"] + images[image1][1] + [f"docker://{local_image}"])
+
+  # cancel forwaring
+  vm.RemoteCommand("", ssh_args = ["-O", "exit"])
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/spark.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/spark.py
new file mode 100644
index 0000000..8fe20a2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/spark.py
@@ -0,0 +1,237 @@
+# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing Apache Spark installation and configuration.
+
+For documentation of Spark Stalanone clusters, see:
+https://spark.apache.org/docs/latest/spark-standalone.html
+"""
+import functools
+import logging
+import os
+import posixpath
+import time
+from typing import Dict
+
+from absl import flags
+from packaging import version
+from perfkitbenchmarker import data
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import aws_credentials
+from perfkitbenchmarker.linux_packages import hadoop
+
+FLAGS = flags.FLAGS
+
+SPARK_VERSION_FLAG = flags.DEFINE_string('spark_version', '3.1.2',
+                                         'Version of spark.')
+
+DATA_FILES = [
+    'spark/spark-defaults.conf.j2', 'spark/spark-env.sh.j2', 'spark/workers.j2'
+]
+
+SPARK_DIR = posixpath.join(linux_packages.INSTALL_DIR, 'spark')
+SPARK_BIN = posixpath.join(SPARK_DIR, 'bin')
+SPARK_SBIN = posixpath.join(SPARK_DIR, 'sbin')
+SPARK_CONF_DIR = posixpath.join(SPARK_DIR, 'conf')
+SPARK_PRIVATE_KEY = posixpath.join(SPARK_CONF_DIR, 'spark_keyfile')
+
+SPARK_SUBMIT = posixpath.join(SPARK_BIN, 'spark-submit')
+
+
+def _SparkVersion() -> version.Version:
+  return version.Version(SPARK_VERSION_FLAG.value)
+
+
+def _ScalaVersion() -> version.Version:
+  if _SparkVersion().major >= 3:
+    # https://spark.apache.org/docs/3.0.0/#downloading
+    return version.Version('2.12')
+  else:
+    # https://spark.apache.org/docs/2.4.0/#downloading
+    return version.Version('2.11')
+
+
+def SparkExamplesJarPath() -> str:
+  return posixpath.join(
+      SPARK_DIR, 'examples/jars/',
+      f'spark-examples_{_ScalaVersion()}-{_SparkVersion()}.jar')
+
+
+def CheckPrerequisites():
+  """Verifies that the required resources are present.
+
+  Raises:
+    perfkitbenchmarker.data.ResourceNotFound: On missing resource.
+  """
+  for resource in DATA_FILES:
+    data.ResourcePath(resource)
+
+
+def Install(vm):
+  vm.Install('openjdk')
+  vm.Install('python3')
+  vm.Install('curl')
+  # Needed for HDFS not as a dependency.
+  # Also used on Spark's classpath to support s3a client.
+  vm.Install('hadoop')
+  spark_url = ('https://downloads.apache.org/spark/spark-{0}/'
+               'spark-{0}-bin-without-hadoop.tgz').format(FLAGS.spark_version)
+  vm.RemoteCommand(
+      ('mkdir {0} && curl -L {1} | '
+       'tar -C {0} --strip-components=1 -xzf -').format(SPARK_DIR, spark_url))
+
+
+# Scheduling constants.
+# Give 90% of VM memory to Spark for scheduling.
+# This is roughly consistent with Dataproc 2.0+
+SPARK_MEMORY_FRACTION = 0.9
+SPARK_DRIVER_MEMORY = 'spark.driver.memory'
+SPARK_WORKER_MEMORY = 'spark.executor.memory'
+SPARK_WORKER_VCPUS = 'spark.executor.cores'
+
+
+def GetConfiguration(driver_memory_mb: int,
+                     worker_memory_mb: int,
+                     worker_cores: int,
+                     num_workers: int,
+                     configure_s3: bool = False) -> Dict[str, str]:
+  """Calculate Spark configuration. Shared between VMs and k8s."""
+  conf = {
+      SPARK_DRIVER_MEMORY: f'{driver_memory_mb}m',
+      SPARK_WORKER_MEMORY: f'{worker_memory_mb}m',
+      SPARK_WORKER_VCPUS: str(worker_cores),
+      'spark.executor.instances': str(num_workers),
+      # Tell spark not to run job if it can't schedule all workers. This would
+      # silently degrade performance.
+      'spark.scheduler.minRegisteredResourcesRatio': '1'
+  }
+  if configure_s3:
+    # Configure S3A Hadoop's S3 filesystem
+    aws_access_key, aws_secret_key = aws_credentials.GetCredentials()
+    conf.update({
+        # Use s3:// scheme to be consistent with EMR
+        'spark.hadoop.fs.s3.impl': 'org.apache.hadoop.fs.s3a.S3AFileSystem',
+        'spark.hadoop.fs.s3a.access.key': aws_access_key,
+        'spark.hadoop.fs.s3a.secret.key': aws_secret_key,
+    })
+  return conf
+
+
+def _RenderConfig(vm,
+                  leader,
+                  workers,
+                  memory_fraction=SPARK_MEMORY_FRACTION,
+                  configure_s3=False):
+  """Load Spark Condfiguration on VM."""
+  # Use first worker to get worker configuration
+  worker = workers[0]
+  worker_cores = worker.NumCpusForBenchmark()
+  worker_memory_mb = int((worker.total_memory_kb / 1024) * memory_fraction)
+  driver_memory_mb = int((leader.total_memory_kb / 1024) * memory_fraction)
+
+  spark_conf = GetConfiguration(
+      driver_memory_mb=driver_memory_mb,
+      worker_memory_mb=worker_memory_mb,
+      worker_cores=worker_cores,
+      num_workers=len(workers),
+      configure_s3=configure_s3)
+
+  if vm.scratch_disks:
+    # TODO(pclay): support multiple scratch disks. A current suboptimal
+    # workaround is RAID0 local_ssds with --num_striped_disks.
+    scratch_dir = posixpath.join(vm.GetScratchDir(), 'spark')
+  else:
+    scratch_dir = posixpath.join('/tmp/pkb/local_scratch', 'spark')
+
+  optional_tools = None
+  if configure_s3:
+    optional_tools = 'hadoop-aws'
+
+  context = {
+      'spark_conf': spark_conf,
+      'leader_ip': leader.internal_ip,
+      'worker_ips': [vm.internal_ip for vm in workers],
+      'scratch_dir': scratch_dir,
+      'worker_vcpus': worker_cores,
+      'spark_private_key': SPARK_PRIVATE_KEY,
+      'worker_memory': spark_conf[SPARK_WORKER_MEMORY],
+      'hadoop_cmd': hadoop.HADOOP_CMD,
+      'python_cmd': 'python3',
+      'optional_tools': optional_tools
+  }
+
+  for file_name in DATA_FILES:
+    file_path = data.ResourcePath(file_name)
+    if file_name == 'spark/workers.j2':
+      # Spark calls its worker list slaves.
+      file_name = 'spark/slaves.j2'
+    remote_path = posixpath.join(SPARK_CONF_DIR, os.path.basename(file_name))
+    if file_name.endswith('.j2'):
+      vm.RenderTemplate(file_path, os.path.splitext(remote_path)[0], context)
+    else:
+      vm.RemoteCopy(file_path, remote_path)
+
+
+def _GetOnlineWorkerCount(leader):
+  """Curl Spark Master Web UI for worker status."""
+  cmd = ('curl http://localhost:8080 '
+         "| grep 'Alive Workers' "
+         "| grep -o '[0-9]\\+'")
+  stdout = leader.RemoteCommand(cmd)[0]
+  return int(stdout)
+
+
+def ConfigureAndStart(leader, workers, configure_s3=False):
+  """Run Spark Standalone and HDFS on a cluster.
+
+  Args:
+    leader: VM. leader VM - will be the HDFS NameNode, Spark Master.
+    workers: List of VMs. Each VM will run an HDFS DataNode, Spark Worker.
+    configure_s3: Whether to configure Spark to access S3.
+  """
+  # Start HDFS
+  hadoop.ConfigureAndStart(leader, workers, start_yarn=False)
+
+  vms = [leader] + workers
+  # If there are no workers set up in pseudo-distributed mode, where the leader
+  # node runs the worker daemons.
+  workers = workers or [leader]
+  fn = functools.partial(
+      _RenderConfig, leader=leader, workers=workers, configure_s3=configure_s3)
+  vm_util.RunThreaded(fn, vms)
+
+  leader.RemoteCommand("rm -f {0} && ssh-keygen -q -t rsa -N '' -f {0}".format(
+      SPARK_PRIVATE_KEY))
+
+  public_key = leader.RemoteCommand('cat {0}.pub'.format(SPARK_PRIVATE_KEY))[0]
+
+  def AddKey(vm):
+    vm.RemoteCommand('echo "{0}" >> ~/.ssh/authorized_keys'.format(public_key))
+
+  vm_util.RunThreaded(AddKey, vms)
+
+  # HDFS setup and formatting, Spark startup
+  leader.RemoteCommand(
+      'bash {0}/start-all.sh'.format(SPARK_SBIN), should_log=True)
+
+  logging.info('Sleeping 10s for Spark nodes to join.')
+  time.sleep(10)
+
+  logging.info('Checking Spark status.')
+  worker_online_count = _GetOnlineWorkerCount(leader)
+  if worker_online_count != len(workers):
+    raise ValueError('Not all nodes running Spark: {0} < {1}'.format(
+        worker_online_count, len(workers)))
+  else:
+    logging.info('Spark running on all %d workers', len(workers))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/storage_tools.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/storage_tools.py
new file mode 100644
index 0000000..cedbc7c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/storage_tools.py
@@ -0,0 +1,20 @@
+"""Module containing storage_tools installation.
+
+This provides nvme-cli among other useful storage tools.
+"""
+
+
+def _Install(vm):
+  vm.InstallPackages('nvme-cli')
+
+
+def YumInstall(vm):
+  _Install(vm)
+
+
+def AptInstall(vm):
+  _Install(vm)
+
+
+def SwupdInstall(vm):
+  vm.InstallPackages('storage-utils')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_packages/stream.py b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/stream.py
new file mode 100644
index 0000000..685355e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_packages/stream.py
@@ -0,0 +1,96 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+"""Module containing STREAM installation and cleanup functions."""
+
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+STREAM_DIR = '%s/STREAM' % INSTALL_DIR
+STREAM_PATH = STREAM_DIR + '/stream'
+GIT_REPO = 'https://github.com/jeffhammond/STREAM.git'
+
+
+def GetStreamExec(vm):
+  if FLAGS.stream_omp_num_threads == 0:
+    num_threads = vm.num_cpus
+  else:
+    num_threads = FLAGS.stream_omp_num_threads
+
+  if num_threads <= 1:
+    raise errors.Setup.InvalidSetupError(
+        'Stream could not be run on machine with CPU/Threads number <= 1')
+
+  if FLAGS.stream_binary_url:
+    source_psxe_vars = "source /opt/intel/psxe_runtime/linux/bin/psxevars.sh"
+    return 'export OMP_NUM_THREADS={0};export GOMP_CPU_AFFINITY="0-{1}:1";{2} && {3}'.format(num_threads,
+                                                                                             num_threads - 1,
+                                                                                             source_psxe_vars,
+                                                                                             STREAM_PATH)
+  else:
+    return 'export OMP_NUM_THREADS={0};export GOMP_CPU_AFFINITY="0-{1}:1";{2}'.format(num_threads,
+                                                                                      num_threads - 1,
+                                                                                      STREAM_PATH)
+
+
+def _GetInternalResources(vm, url):
+  stream_path = vm_util.PrependTempDir('stream')
+  vm_util.IssueCommand("curl -SL {0} -o {1}".format(url, stream_path).split(), timeout=None)
+  vm.RemoteCommand("mkdir -p {0}".format(STREAM_DIR))
+  vm.RemoteCopy(stream_path, STREAM_PATH)
+  vm.RemoteCommand("sudo chmod +x {0}".format(STREAM_PATH))
+
+
+def _Install(vm):
+  """Installs the stream package on the VM."""
+  if FLAGS.stream_binary_url:
+    vm.Install('intel_parallel_studio_runtime')
+    _GetInternalResources(vm, FLAGS.stream_binary_url)
+  else:
+    vm.RemoteCommand('git clone {0} {1}'.format(GIT_REPO, STREAM_DIR))
+    vm.RemoteCommand('cd {0}; {1} {2} '
+                     '-DSTREAM_ARRAY_SIZE={3} -DNTIMES={4} -DOFFSET={5} '
+                     'stream.c -o stream'.format(STREAM_DIR,
+                                                 FLAGS.compiler,
+                                                 FLAGS.stream_compiler_flags,
+                                                 FLAGS.stream_array_size,
+                                                 FLAGS.stream_ntimes,
+                                                 FLAGS.stream_offset)
+                     )
+
+
+def YumInstall(vm):
+  """Installs the stream package on the VM."""
+  # for RHEL 7
+  if vm.OS_TYPE == os_types.RHEL:
+    raise NotImplementedError
+  vm.Install('build_tools')
+  _Install(vm)
+
+
+def AptInstall(vm):
+  """Installs the stream package on the VM."""
+  vm.Install('build_tools')
+  vm.Install('compiler')
+  _Install(vm)
+
+
+def Uninstall(vm):
+  vm.RemoteCommand('sudo rm -rf {0}'.format(STREAM_DIR))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/linux_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/linux_virtual_machine.py
new file mode 100644
index 0000000..cd15599
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/linux_virtual_machine.py
@@ -0,0 +1,3035 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing mixin classes for linux virtual machines.
+
+These classes allow installation on both Debian and RHEL based linuxes.
+They also handle some initial setup (especially on RHEL based linuxes
+since by default sudo commands without a tty don't work) and
+can restore the VM to the state it was in before packages were
+installed.
+
+To install a package on a VM, just call vm.Install(package_name).
+The package name is just the name of the package module (i.e. the
+file name minus .py). The framework will take care of all cleanup
+for you.
+"""
+
+import abc
+import collections
+import copy
+import logging
+import os
+import pipes
+import posixpath
+import re
+import threading
+import time
+from typing import Dict, Set
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import regex_util
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+
+import yaml
+
+FLAGS = flags.FLAGS
+
+
+OS_PRETTY_NAME_REGEXP = r'PRETTY_NAME="(.*)"'
+CLEAR_BUILD_REGEXP = r'Installed version:\s*(.*)\s*'
+UPDATE_RETRIES = 5
+DEFAULT_SSH_PORT = 22
+REMOTE_KEY_PATH = '~/.ssh/id_rsa'
+CONTAINER_MOUNT_DIR = '/mnt'
+CONTAINER_WORK_DIR = '/root'
+
+# This pair of scripts used for executing long-running commands, which will be
+# resilient in the face of SSH connection errors.
+# EXECUTE_COMMAND runs a command, streaming stdout / stderr to a file, then
+# writing the return code to a file. An exclusive lock is acquired on the return
+# code file, so that other processes may wait for completion.
+EXECUTE_COMMAND = 'execute_command.py'
+# WAIT_FOR_COMMAND waits on the file lock created by EXECUTE_COMMAND,
+# then copies the stdout and stderr, exiting with the status of the command run
+# by EXECUTE_COMMAND.
+WAIT_FOR_COMMAND = 'wait_for_command.py'
+
+_DEFAULT_DISK_FS_TYPE = 'ext4'
+_DEFAULT_DISK_MOUNT_OPTIONS = 'discard'
+_DEFAULT_DISK_FSTAB_OPTIONS = 'defaults'
+
+# regex for parsing lscpu and /proc/cpuinfo
+_COLON_SEPARATED_RE = re.compile(r'^\s*(?P<key>.*?)\s*:\s*(?P<value>.*?)\s*$')
+
+flags.DEFINE_bool('setup_remote_firewall', False,
+                  'Whether PKB should configure the firewall of each remote'
+                  'VM to make sure it accepts all internal connections.')
+
+flags.DEFINE_list('sysctl', [],
+                  'Sysctl values to set. This flag should be a comma-separated '
+                  'list of path=value pairs. Each pair will be appended to'
+                  '/etc/sysctl.conf.  The presence of any items in this list '
+                  'will cause a reboot to occur after VM prepare. '
+                  'For example, if you pass '
+                  '--sysctls=vm.dirty_background_ratio=10,vm.dirty_ratio=25, '
+                  'PKB will append "vm.dirty_background_ratio=10" and'
+                  '"vm.dirty_ratio=25" on separate lines to /etc/sysctrl.conf'
+                  ' and then the machine will be rebooted before starting'
+                  'the benchmark.')
+
+flags.DEFINE_list(
+    'set_files',
+    [],
+    'Arbitrary filesystem configuration. This flag should be a '
+    'comma-separated list of path=value pairs. Each value will '
+    'be written to the corresponding path. For example, if you '
+    'pass --set_files=/sys/kernel/mm/transparent_hugepage/enabled=always, '
+    'then PKB will write "always" to '
+    '/sys/kernel/mm/transparent_hugepage/enabled before starting '
+    'the benchmark.')
+
+flags.DEFINE_bool('network_enable_BBR', False,
+                  'A shortcut to enable BBR congestion control on the network. '
+                  'equivalent to appending to --sysctls the following values '
+                  '"net.core.default_qdisc=fq, '
+                  '"net.ipv4.tcp_congestion_control=bbr" '
+                  'As with other sysctrls, will cause a reboot to happen.')
+
+flags.DEFINE_integer('num_disable_cpus', None,
+                     'Number of CPUs to disable on the virtual machine.'
+                     'If the VM has n CPUs, you can disable at most n-1.',
+                     lower_bound=1)
+flags.DEFINE_integer('disk_fill_size', 0,
+                     'Size of file to create in GBs.')
+flags.DEFINE_enum('disk_fs_type', _DEFAULT_DISK_FS_TYPE,
+                  [_DEFAULT_DISK_FS_TYPE, 'xfs'],
+                  'File system type used to format disk.')
+flags.DEFINE_integer(
+    'disk_block_size', None, 'Block size to format disk with.'
+    'Defaults to 4096 for ext4.')
+
+flags.DEFINE_bool(
+    'enable_transparent_hugepages', None, 'Whether to enable or '
+    'disable transparent hugepages. If unspecified, the setting '
+    'is unchanged from the default in the OS.')
+
+flags.DEFINE_integer(
+    'ssh_retries', 10, 'Default number of times to retry SSH.', lower_bound=0)
+
+flags.DEFINE_integer(
+    'scp_connect_timeout', 30, 'timeout for SCP connection.', lower_bound=0)
+
+flags.DEFINE_string(
+    'append_kernel_command_line', None,
+    'String to append to the kernel command line. The presence of any '
+    'non-empty string will cause a reboot to occur after VM prepare. '
+    'If unspecified, the kernel command line will be unmodified.')
+
+flags.DEFINE_integer(
+    'tcp_max_receive_buffer', None,
+    'Changes the third component of the sysctl value net.ipv4.tcp_rmem. '
+    'This sets the maximum receive buffer for TCP socket connections in bytes. '
+    'Increasing this value may increase single stream TCP throughput '
+    'for high latency connections')
+
+flags.DEFINE_integer(
+    'tcp_max_send_buffer', None,
+    'Changes the third component of the sysctl value net.ipv4.tcp_wmem. '
+    'This sets the maximum send buffer for TCP socket connections in bytes. '
+    'Increasing this value may increase single stream TCP throughput '
+    'for high latency connections')
+
+flags.DEFINE_integer(
+    'rmem_max', None,
+    'Sets the sysctl value net.core.rmem_max. This sets the max OS '
+    'receive buffer size in bytes for all types of connections')
+
+flags.DEFINE_integer(
+    'wmem_max', None,
+    'Sets the sysctl value net.core.wmem_max. This sets the max OS '
+    'send buffer size in bytes for all types of connections')
+
+flags.DEFINE_bool(
+    'skip_package_restore', False,
+    'Skip package restoring in the Teardown phase')
+
+flags.DEFINE_boolean('gce_hpc_tools', False,
+                     'Whether to apply the hpc-tools environment script.')
+
+GOVERNORS = ["conservative", "ondemand", "userspace", "powersave", "performance", "schedutil"]
+flags.DEFINE_enum('scaling_governor', None, GOVERNORS, 'If specified, sets the cpufreq governor.')
+
+flags.DEFINE_boolean('disable_smt', False,
+                     'Whether to disable SMT (Simultaneous Multithreading) '
+                     'in BIOS.')
+
+flags.DEFINE_boolean('enable_rsync', False,
+                     'Whether to enable rsync as the remote copy app')
+
+_DISABLE_YUM_CRON = flags.DEFINE_boolean(
+    'disable_yum_cron', True, 'Whether to disable the cron-run yum service.')
+
+RETRYABLE_SSH_RETCODE = 255
+
+
+class CpuVulnerabilities:
+  """The 3 different vulnerablity statuses from vm.cpu_vulernabilities.
+
+  Example input:
+    /sys/devices/system/cpu/vulnerabilities/itlb_multihit:KVM: Vulnerable
+  Is put into vulnerability with a key of "itlb_multihit" and value "KVM"
+
+  Unparsed lines are put into the unknown dict.
+  """
+
+  def __init__(self):
+    self.mitigations: Dict[str, str] = {}
+    self.vulnerabilities: Dict[str, str] = {}
+    self.notaffecteds: Set[str] = set()
+    self.unknowns: Dict[str, str] = {}
+
+  def AddLine(self, full_line: str) -> None:
+    """Parses a line of output from the cpu/vulnerabilities/* files."""
+    if not full_line:
+      return
+    file_path, line = full_line.split(':', 1)
+    file_name = posixpath.basename(file_path)
+    if self._AddMitigation(file_name, line):
+      return
+    if self._AddVulnerability(file_name, line):
+      return
+    if self._AddNotAffected(file_name, line):
+      return
+    self.unknowns[file_name] = line
+
+  def _AddMitigation(self, file_name, line):
+    match = re.match('^Mitigation: (.*)', line) or re.match(
+        '^([^:]+): Mitigation: (.*)$', line)
+    if match:
+      self.mitigations[file_name] = ':'.join(match.groups())
+      return True
+
+  def _AddVulnerability(self, file_name, line):
+    match = re.match('^Vulnerable: (.*)', line) or re.match(
+        '^Vulnerable$', line) or re.match('^([^:]+): Vulnerable$', line)
+    if match:
+      self.vulnerabilities[file_name] = ':'.join(match.groups())
+      return True
+
+  def _AddNotAffected(self, file_name, line):
+    match = re.match('^Not affected$', line)
+    if match:
+      self.notaffecteds.add(file_name)
+      return True
+
+  @property
+  def asdict(self) -> Dict[str, str]:
+    """Returns the parsed CPU vulnerabilities as a dict."""
+    ret = {}
+    if self.mitigations:
+      ret['mitigations'] = ','.join(sorted(self.mitigations))
+      for key, value in self.mitigations.items():
+        ret[f'mitigation_{key}'] = value
+    if self.vulnerabilities:
+      ret['vulnerabilities'] = ','.join(sorted(self.vulnerabilities))
+      for key, value in self.vulnerabilities.items():
+        ret[f'vulnerability_{key}'] = value
+    if self.unknowns:
+      ret['unknowns'] = ','.join(self.unknowns)
+      for key, value in self.unknowns.items():
+        ret[f'unknown_{key}'] = value
+    if self.notaffecteds:
+      ret['notaffecteds'] = ','.join(sorted(self.notaffecteds))
+    return ret
+
+
+class BaseLinuxMixin(virtual_machine.BaseOsMixin):
+  """Class that holds Linux related VM methods and attributes."""
+
+  # If multiple ssh calls are made in parallel using -t it will mess
+  # the stty settings up and the terminal will become very hard to use.
+  # Serializing calls to ssh with the -t option fixes the problem.
+  _pseudo_tty_lock = threading.Lock()
+
+  # TODO(user): Remove all uses of Python 2.
+  PYTHON_2_PACKAGE = 'python2'
+
+  def __init__(self, *args, **kwargs):
+    super(BaseLinuxMixin, self).__init__(*args, **kwargs)
+    # N.B. If you override ssh_port you must override remote_access_ports and
+    # primary_remote_access_port.
+    self.ssh_port = DEFAULT_SSH_PORT
+    self.remote_access_ports = [self.ssh_port]
+    self.primary_remote_access_port = self.ssh_port
+    self.has_private_key = False
+
+    self._remote_command_script_upload_lock = threading.Lock()
+    self._has_remote_command_script = False
+    self._needs_reboot = False
+    self._lscpu_cache = None
+    self._partition_table = {}
+    self._proccpu_cache = None
+    self._smp_affinity_script = None
+
+  def _Suspend(self):
+    """Suspends a VM."""
+    raise NotImplementedError()
+
+  def _Resume(self):
+    """Resumes a VM."""
+    raise NotImplementedError()
+
+  def _BeforeSuspend(self):
+    pass
+
+  def _CreateVmTmpDir(self):
+    self.RemoteCommand('mkdir -p %s' % vm_util.VM_TMP_DIR)
+
+  def _SetTransparentHugepages(self):
+    """Sets transparent hugepages based on --enable_transparent_hugepages.
+
+    If the flag is unset (None), this is a nop.
+    """
+    if FLAGS.enable_transparent_hugepages is None:
+      return
+    setting = 'always' if FLAGS.enable_transparent_hugepages else 'never'
+    self.RemoteCommand(
+        'echo %s | sudo tee /sys/kernel/mm/transparent_hugepage/enabled' %
+        setting)
+    self.os_metadata['transparent_hugepage'] = setting
+
+  def _SetupRobustCommand(self):
+    """Sets up the RobustRemoteCommand tooling.
+
+    This includes installing python3 and pushing scripts required by
+    RobustRemoteCommand to this VM.  There is a check to skip if previously
+    installed.
+    """
+    with self._remote_command_script_upload_lock:
+      if not self._has_remote_command_script:
+        # Python3 is needed for RobustRemoteCommands
+        self.Install('python3')
+
+        for f in (EXECUTE_COMMAND, WAIT_FOR_COMMAND):
+          remote_path = os.path.join(vm_util.VM_TMP_DIR, os.path.basename(f))
+          if os.path.basename(remote_path):
+            self.RemoteCommand('sudo rm -f ' + remote_path)
+          self.PushDataFile(f, remote_path)
+        self._has_remote_command_script = True
+
+  def RobustRemoteCommand(self, command, should_log=False, timeout=None,
+                          ignore_failure=False):
+    """Runs a command on the VM in a more robust way than RemoteCommand.
+
+    This is used for long-running commands that might experience network issues
+    that would normally interrupt a RemoteCommand and fail to provide results.
+    Executes a command via a pair of scripts on the VM:
+
+    * EXECUTE_COMMAND, which runs 'command' in a nohupped background process.
+    * WAIT_FOR_COMMAND, which first waits on confirmation that EXECUTE_COMMAND
+      has acquired an exclusive lock on a file with the command's status. This
+      is done by waiting for the existence of a file written by EXECUTE_COMMAND
+      once it successfully acquires an exclusive lock. Once confirmed,
+      WAIT_COMMAND waits to acquire the file lock held by EXECUTE_COMMAND until
+      'command' completes, then returns with the stdout, stderr, and exit status
+      of 'command'.
+
+    Temporary SSH failures (where ssh returns a 255) while waiting for the
+    command to complete will be tolerated and safely retried. However, if
+    remote command actually returns 255, SSH will return 1 instead to bypass
+    retry behavior.
+
+    Args:
+      command: The command to run.
+      should_log: Whether to log the command's output at the info level. The
+          output is always logged at the debug level.
+      timeout: The timeout for the command in seconds.
+      ignore_failure: Ignore any failure if set to true.
+
+    Returns:
+      A tuple of stdout, stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem establishing the connection, or
+          the command fails.
+    """
+    self._SetupRobustCommand()
+
+    execute_path = os.path.join(vm_util.VM_TMP_DIR,
+                                os.path.basename(EXECUTE_COMMAND))
+    wait_path = os.path.join(vm_util.VM_TMP_DIR,
+                             os.path.basename(WAIT_FOR_COMMAND))
+
+    uid = uuid.uuid4()
+    file_base = os.path.join(vm_util.VM_TMP_DIR, 'cmd%s' % uid)
+    wrapper_log = file_base + '.log'
+    stdout_file = file_base + '.stdout'
+    stderr_file = file_base + '.stderr'
+    status_file = file_base + '.status'
+    exclusive_file = file_base + '.exclusive'
+
+    if not isinstance(command, str):
+      command = ' '.join(command)
+
+    start_command = ['nohup', 'python3', execute_path,
+                     '--stdout', stdout_file,
+                     '--stderr', stderr_file,
+                     '--status', status_file,
+                     '--exclusive', exclusive_file,
+                     '--command', pipes.quote(command)]  # pyformat: disable
+    if timeout:
+      start_command.extend(['--timeout', str(timeout)])
+
+    start_command = '%s 1> %s 2>&1 &' % (' '.join(start_command),
+                                         wrapper_log)
+    self.RemoteCommand(start_command)
+    # sleep 1 to prevent _WaitForCommand from starting before start_command
+    time.sleep(1)
+
+    def _WaitForCommand():
+      wait_command = ['python3', wait_path,
+                      '--status', status_file,
+                      '--exclusive', exclusive_file]  # pyformat: disable
+      stdout = ''
+      while 'Command finished.' not in stdout:
+        stdout, _ = self.RemoteCommand(
+            ' '.join(wait_command), should_log=should_log, timeout=1800)
+      wait_command.extend([
+          '--stdout', stdout_file,
+          '--stderr', stderr_file,
+          '--delete',
+      ])  # pyformat: disable
+      return self.RemoteCommand(' '.join(wait_command), should_log=should_log,
+                                ignore_failure=ignore_failure)
+
+    try:
+      return _WaitForCommand()
+    except errors.VirtualMachine.RemoteCommandError:
+      # In case the error was with the wrapper script itself, print the log.
+      stdout, _ = self.RemoteCommand('cat %s' % wrapper_log, should_log=False)
+      if stdout.strip():
+        logging.warning('Exception during RobustRemoteCommand. '
+                        'Wrapper script log:\n%s', stdout)
+      raise
+
+  def SetupRemoteFirewall(self):
+    """Sets up IP table configurations on the VM."""
+    self.RemoteHostCommand('sudo iptables -A INPUT -j ACCEPT')
+    self.RemoteHostCommand('sudo iptables -A OUTPUT -j ACCEPT')
+
+  def SetupProxy(self):
+    """Sets up proxy configuration variables for the cloud environment."""
+    env_file = '/etc/environment'
+    commands = []
+    command_template = "grep -qxF '{0}' {1} || echo '{0}' | sudo tee -a {1}"
+
+    if FLAGS.http_proxy:
+      commands.append(command_template.format(
+          'http_proxy=' + FLAGS.http_proxy, env_file))
+
+    if FLAGS.https_proxy:
+      commands.append(command_template.format(
+          'https_proxy=' + FLAGS.https_proxy, env_file))
+
+    if FLAGS.ftp_proxy:
+      commands.append(command_template.format(
+          'ftp_proxy=' + FLAGS.ftp_proxy, env_file))
+
+    if FLAGS.no_proxy:
+      commands.append(command_template.format(
+          'no_proxy=' + FLAGS.no_proxy, env_file))
+
+    if commands:
+      self.RemoteCommand(';'.join(commands))
+      if FLAGS.ssh_reuse_connections:
+        # This will stop the existing connection once current commands
+        # exit, and will allow a new login and /etc/environment evaluation
+        self.RemoteCommand('', ssh_args=['-O', 'stop'])
+
+  def SetupPackageManager(self):
+    """Specific Linux flavors should override this."""
+    pass
+
+  def OfflineModePrepare(self):
+    """Specific Linux flavors should override this."""
+    pass
+
+  def PrepareVMEnvironment(self):
+    super(BaseLinuxMixin, self).PrepareVMEnvironment()
+    self.SetupProxy()
+    self._CreateVmTmpDir()
+    self._SetTransparentHugepages()
+    if FLAGS.setup_remote_firewall:
+      self.SetupRemoteFirewall()
+    if self.install_packages:
+      self._CreateInstallDir()
+      if self.is_static:
+        self.SnapshotPackages()
+      self.SetupPackageManager()
+      if FLAGS.enable_rsync:
+        self.InstallPackages('rsync')
+    self.SetFiles()
+    self.DoSysctls()
+    self._DoAppendKernelCommandLine()
+    self.DoConfigureNetworkForBBR()
+    self.DoConfigureTCPWindow()
+    self.UpdateEnvironmentPath()
+    self._RebootIfNecessary()
+    self._ApplyScalingGovernor()
+    self._DisableCpus()
+    self._RebootIfNecessary()
+    self.RecordAdditionalMetadata()
+    self.BurnCpu()
+    self.FillDisk()
+
+  def _CreateInstallDir(self):
+    self.RemoteCommand(
+        ('sudo mkdir -p {0}; '
+         'sudo chmod a+rwxt {0}').format(linux_packages.INSTALL_DIR))
+
+  # LinuxMixins do not implement _Start or _Stop
+  def _Start(self):
+    """Starts the VM."""
+    raise NotImplementedError()
+
+  def _Stop(self):
+    """Stops the VM."""
+    raise NotImplementedError()
+
+  def SetFiles(self):
+    """Apply --set_files to the VM."""
+
+    for pair in FLAGS.set_files:
+      path, value = pair.split('=')
+      self.RemoteCommand('echo "%s" | sudo tee %s' %
+                         (value, path))
+
+  def _ApplyScalingGovernor(self):
+      """Applies selected scaling governor to all CPUs
+
+      This setting does not persist if the VM is rebooted.
+
+      Raises:
+        ValueError: if cpufreq is not available or specified governor cannot be used
+      """
+      if not FLAGS.scaling_governor:
+          return
+
+      _, _, rc = self.RemoteCommandWithReturnCode('test -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor',
+                                                  ignore_failure=True)
+
+      if rc != 0:
+          raise ValueError("cpufreq is not available in the system")
+
+      stdout, _ = self.RemoteCommand('cat /sys/devices/system/cpu/cpu0/cpufreq/scaling_available_governors')
+      available_governors = stdout.split()
+      if FLAGS.scaling_governor not in available_governors:
+          msg = "Specified governor {} cannot be used. Available governors: {}".format(FLAGS.scaling_governor,
+                                                                                       available_governors)
+          raise ValueError(msg)
+
+      cmd = 'echo {} | sudo tee -a /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor'.format(FLAGS.scaling_governor)
+      self.RemoteCommand(cmd)
+
+  def _DisableCpus(self):
+    """Apply num_disable_cpus to the VM.
+
+    Raises:
+      ValueError: if num_disable_cpus is outside of (0 ... num_cpus-1)
+                  inclusive
+    """
+    if not FLAGS.num_disable_cpus:
+      return
+
+    self.num_disable_cpus = FLAGS.num_disable_cpus
+
+    if (self.num_disable_cpus <= 0 or
+        self.num_disable_cpus >= self.num_cpus):
+      raise ValueError('num_disable_cpus must be between 1 and '
+                       '(num_cpus - 1) inclusive.  '
+                       'num_disable_cpus: %i, num_cpus: %i' %
+                       (self.num_disable_cpus, self.num_cpus))
+
+    # We can't disable cpu 0, starting from the last cpu in /proc/cpuinfo.
+    # On multiprocessor systems, we also attempt to disable cpus on each
+    # physical processor based on "physical id" in order to keep a similar
+    # number of cpus on each physical processor.
+    # In addition, for each cpu we disable, we will look for cpu with same
+    # "core id" in order to disable vcpu pairs.
+    cpus = copy.deepcopy(self.CheckProcCpu().mappings)
+    cpu_mapping = collections.defaultdict(list)
+    for cpu, info in cpus.items():
+      numa = info.get('physical id')
+      cpu_mapping[int(numa)].append((cpu, int(info.get('core id'))))
+
+    # Sort cpus based on 'core id' on each numa node
+    for numa in cpu_mapping:
+      cpu_mapping[numa] = sorted(
+          cpu_mapping[numa],
+          key=lambda cpu_info: (cpu_info[1], cpu_info[0]))
+
+    def _GetNextCPUToDisable(num_disable_cpus):
+      """Get the next CPU id to disable."""
+      numa_nodes = list(cpu_mapping)
+      while num_disable_cpus:
+        for numa in sorted(numa_nodes, reverse=True):
+          cpu_id, _ = cpu_mapping[numa].pop()
+          num_disable_cpus -= 1
+          yield cpu_id
+          if not num_disable_cpus:
+            break
+
+    for cpu_id in _GetNextCPUToDisable(self.num_disable_cpus):
+      self.RemoteCommand('sudo bash -c "echo 0 > '
+                         f'/sys/devices/system/cpu/cpu{cpu_id}/online"')
+    self._proccpu_cache = None
+    self._lscpu_cache = None
+
+  def UpdateEnvironmentPath(self):
+    """Specific Linux flavors should override this."""
+    pass
+
+  def FillDisk(self):
+    """Fills the primary scratch disk with a zeros file."""
+    if FLAGS.disk_fill_size:
+      out_file = posixpath.join(self.scratch_disks[0].mount_point, 'fill_file')
+      self.RobustRemoteCommand(
+          'dd if=/dev/zero of={out_file} bs=1G count={fill_size}'.format(
+              out_file=out_file, fill_size=FLAGS.disk_fill_size))
+
+  def _ApplySysctlPersistent(self, sysctl_params):
+    """Apply "key=value" pairs to /etc/sysctl.conf and mark the VM for reboot.
+
+    The reboot ensures the values take effect and remain persistent across
+    future reboots.
+
+    Args:
+      sysctl_params: dict - the keys and values to write
+    """
+    if not sysctl_params:
+      return
+
+    for key, value in sysctl_params.items():
+      self.RemoteCommand('sudo bash -c \'echo "%s=%s" >> /etc/sysctl.conf\''
+                         % (key, value))
+
+    self._needs_reboot = True
+
+  def ApplySysctlPersistent(self, sysctl_params):
+    """Apply "key=value" pairs to /etc/sysctl.conf and reboot immediately.
+
+    The reboot ensures the values take effect and remain persistent across
+    future reboots.
+
+    Args:
+      sysctl_params: dict - the keys and values to write
+    """
+    self._ApplySysctlPersistent(sysctl_params)
+    self._RebootIfNecessary()
+
+  def DoSysctls(self):
+    """Apply --sysctl to the VM.
+
+       The Sysctl pairs are written persistently so that if a reboot
+       occurs, the flags are not lost.
+    """
+    sysctl_params = {}
+    for pair in FLAGS.sysctl:
+      key, value = pair.split('=')
+      sysctl_params[key] = value
+    self._ApplySysctlPersistent(sysctl_params)
+
+  def DoConfigureNetworkForBBR(self):
+    """Apply --network_enable_BBR to the VM."""
+    if not FLAGS.network_enable_BBR:
+      return
+
+    if not KernelRelease(self.kernel_release).AtLeast(4, 9):
+      raise flags.ValidationError(
+          'BBR requires a linux image with kernel 4.9 or newer')
+
+    # if the current congestion control mechanism is already BBR
+    # then nothing needs to be done (avoid unnecessary reboot)
+    if self.TcpCongestionControl() == 'bbr':
+      return
+
+    self._ApplySysctlPersistent({
+        'net.core.default_qdisc': 'fq',
+        'net.ipv4.tcp_congestion_control': 'bbr'
+    })
+
+  def DoConfigureTCPWindow(self):
+    """Change TCP window parameters in sysctl."""
+
+    # Return if none of these flags are set
+    if all(x is None for x in [FLAGS.tcp_max_receive_buffer,
+                               FLAGS.tcp_max_send_buffer,
+                               FLAGS.rmem_max,
+                               FLAGS.wmem_max]):
+      return
+
+    # Get current values from VM
+    stdout, _ = self.RemoteCommand('cat /proc/sys/net/ipv4/tcp_rmem')
+    rmem_values = stdout.split()
+    stdout, _ = self.RemoteCommand('cat /proc/sys/net/ipv4/tcp_wmem')
+    wmem_values = stdout.split()
+    stdout, _ = self.RemoteCommand('cat /proc/sys/net/core/rmem_max')
+    rmem_max = int(stdout)
+    stdout, _ = self.RemoteCommand('cat /proc/sys/net/core/wmem_max')
+    wmem_max = int(stdout)
+
+    # third number is max receive/send
+    max_receive = rmem_values[2]
+    max_send = wmem_values[2]
+
+    # if flags are set, override current values from vm
+    if FLAGS.tcp_max_receive_buffer:
+      max_receive = FLAGS.tcp_max_receive_buffer
+    if FLAGS.tcp_max_send_buffer:
+      max_send = FLAGS.tcp_max_send_buffer
+    if FLAGS.rmem_max:
+      rmem_max = FLAGS.rmem_max
+    if FLAGS.wmem_max:
+      wmem_max = FLAGS.wmem_max
+
+    # Add values to metadata
+    self.os_metadata['tcp_max_receive_buffer'] = max_receive
+    self.os_metadata['tcp_max_send_buffer'] = max_send
+    self.os_metadata['rmem_max'] = rmem_max
+    self.os_metadata['wmem_max'] = wmem_max
+
+    rmem_string = '{} {} {}'.format(rmem_values[0],
+                                    rmem_values[1],
+                                    max_receive)
+    wmem_string = '{} {} {}'.format(wmem_values[0],
+                                    wmem_values[1],
+                                    max_send)
+
+    self._ApplySysctlPersistent({
+        'net.ipv4.tcp_rmem': rmem_string,
+        'net.ipv4.tcp_wmem': wmem_string,
+        'net.core.rmem_max': rmem_max,
+        'net.core.wmem_max': wmem_max
+    })
+
+  def _RebootIfNecessary(self):
+    """Will reboot the VM if self._needs_reboot has been set."""
+    if self._needs_reboot:
+      self.Reboot()
+      self._needs_reboot = False
+
+  def TcpCongestionControl(self):
+    """Return the congestion control used for tcp."""
+    try:
+      resp, _ = self.RemoteCommand(
+          'cat /proc/sys/net/ipv4/tcp_congestion_control')
+      return resp.rstrip('\n')
+    except errors.VirtualMachine.RemoteCommandError:
+      return 'unknown'
+
+  def CheckLsCpu(self):
+    """Returns a LsCpuResults from the host VM."""
+    if not self._lscpu_cache:
+      lscpu, _ = self.RemoteCommand('lscpu')
+      self._lscpu_cache = LsCpuResults(lscpu)
+    return self._lscpu_cache
+
+  def CheckProcCpu(self):
+    """Returns a ProcCpuResults from the host VM."""
+    if not self._proccpu_cache:
+      proccpu, _ = self.RemoteCommand('cat /proc/cpuinfo')
+      self._proccpu_cache = ProcCpuResults(proccpu)
+    return self._proccpu_cache
+
+  def GetOsInfo(self):
+    """Returns information regarding OS type and version."""
+    stdout, _ = self.RemoteCommand('grep PRETTY_NAME /etc/os-release')
+    return regex_util.ExtractGroup(OS_PRETTY_NAME_REGEXP, stdout)
+
+  @property
+  def os_info(self):
+    """Get distribution-specific information."""
+    if self.os_metadata.get('os_info'):
+      return self.os_metadata['os_info']
+    else:
+      return self.GetOsInfo()
+
+  @property
+  def kernel_release(self):
+    """Return kernel release number."""
+    if self.os_metadata.get('kernel_release'):
+      return self.os_metadata.get('kernel_release')
+    else:
+      stdout, _ = self.RemoteCommand('uname -r')
+      return stdout.strip()
+
+  @property
+  def kernel_command_line(self):
+    """Return the kernel command line."""
+    return (self.os_metadata.get('kernel_command_line') or
+            self.RemoteCommand('cat /proc/cmdline')[0].strip())
+
+  @property
+  def partition_table(self):
+    """Return partition table information."""
+    if not self._partition_table:
+      cmd = 'sudo fdisk -l'
+      partition_tables = self.RemoteCommand(cmd)[0]
+      try:
+        self._partition_table = {
+            dev: int(size) for (dev, size) in regex_util.ExtractAllMatches(
+                r'Disk\s*(.*):[\s\w\.]*,\s(\d*)\sbytes', partition_tables)}
+      except regex_util.NoMatchError:
+        # TODO(user): Use alternative methods to retrieve partition table.
+        logging.warning('Partition table not found with "%s".', cmd)
+    return self._partition_table
+
+  @vm_util.Retry(log_errors=False, poll_interval=1)
+  def WaitForBootCompletion(self):
+    """Waits until the VM has booted."""
+    # Test for listening on the port first, because this will happen strictly
+    # first.
+    if (FLAGS.cluster_boot_test_port_listening and
+        self.port_listening_time is None):
+      self.TestConnectRemoteAccessPort()
+      self.port_listening_time = time.time()
+
+    self._WaitForSSH()
+
+    if self.bootable_time is None:
+      self.bootable_time = time.time()
+
+  @vm_util.Retry(log_errors=False, poll_interval=1)
+  def _WaitForSSH(self):
+    """Waits until the VM is ready."""
+    # Always wait for remote host command to succeed, because it is necessary to
+    # run benchmarks
+    resp, _ = self.RemoteHostCommand('hostname', retries=1,
+                                     suppress_warning=True)
+    if self.hostname is None:
+      self.hostname = resp[:-1]
+
+  def RecordAdditionalMetadata(self):
+    """After the VM has been prepared, store metadata about the VM."""
+    self.tcp_congestion_control = self.TcpCongestionControl()
+    lscpu_results = self.CheckLsCpu()
+    self.numa_node_count = lscpu_results.numa_node_count
+    self.os_metadata['threads_per_core'] = lscpu_results.threads_per_core
+    self.os_metadata['os_info'] = self.os_info
+    self.os_metadata['kernel_release'] = self.kernel_release
+    self.os_metadata.update(self.partition_table)
+    if FLAGS.append_kernel_command_line:
+      self.os_metadata['kernel_command_line'] = self.kernel_command_line
+      self.os_metadata[
+          'append_kernel_command_line'] = FLAGS.append_kernel_command_line
+
+  @vm_util.Retry(log_errors=False, poll_interval=1)
+  def VMLastBootTime(self):
+    """Returns the time the VM was last rebooted as reported by the VM.
+
+    See
+    https://unix.stackexchange.com/questions/165002/how-to-reliably-get-timestamp-at-which-the-system-booted.
+    """
+    stdout, _ = self.RemoteHostCommand(
+        'stat -c %z /proc/', retries=1, suppress_warning=True)
+    if stdout.startswith('1970-01-01'):
+      # Fix for ARM returning epochtime
+      date_fmt = '+%Y-%m-%d %H:%M:%S.%s %z'
+      date_cmd = "grep btime /proc/stat | awk '{print $2}'"
+      stdout, _ = self.RemoteHostCommand(f'date "{date_fmt}" -d@$({date_cmd})')
+    return stdout
+
+  def SnapshotPackages(self):
+    """Grabs a snapshot of the currently installed packages."""
+    pass
+
+  def RestorePackages(self):
+    """Restores the currently installed packages to those snapshotted."""
+    pass
+
+  def ProxyCleanup(self):
+    """ Restore to a state before SetupProxy() executed """
+    env_file = '/etc/environment'
+    commands = []
+    command_template = "sudo sed -i '\#^{0}$#d' {1}"
+
+    if FLAGS.http_proxy:
+      commands.append(command_template.format(
+          'http_proxy=' + FLAGS.http_proxy, env_file))
+
+    if FLAGS.https_proxy:
+      commands.append(command_template.format(
+          'https_proxy=' + FLAGS.https_proxy, env_file))
+
+    if FLAGS.ftp_proxy:
+      commands.append(command_template.format(
+          'ftp_proxy=' + FLAGS.ftp_proxy, env_file))
+
+    if FLAGS.no_proxy:
+      commands.append(command_template.format(
+          'no_proxy=' + FLAGS.no_proxy, env_file))
+
+    if commands:
+      self.RemoteCommand(';'.join(commands))
+
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Deletes the temp directory, restores packages, and uninstalls all
+    PerfKit packages.
+    """
+    for package_name in list(self._installed_packages):
+      try:
+        self.Uninstall(package_name)
+      except Exception as e:
+        logging.info('Got an exception ({0}) when uninstalling package {1}'
+                     'Attempting to continue to uninstall PerfKit package.'.format(e, package_name))
+
+    if not FLAGS.skip_package_restore:
+      self.RestorePackages()
+    self.RemoteCommand('sudo rm -rf %s' % linux_packages.INSTALL_DIR)
+
+  def GetPathToConfig(self, package_name):
+    """Returns the path to the config file for PerfKit packages.
+
+    This function is mostly useful when config files locations
+    don't match across distributions (such as mysql). Packages don't
+    need to implement it if this is not the case.
+    """
+    pass
+
+  def GetServiceName(self, package_name):
+    """Returns the service name of a PerfKit package.
+
+    This function is mostly useful when service names don't
+    match across distributions (such as mongodb). Packages don't
+    need to implement it if this is not the case.
+    """
+    pass
+
+  @vm_util.Retry()
+  def FormatDisk(self, device_path, disk_type=None):
+    """Formats a disk attached to the VM."""
+    # Some images may automount one local disk, but we don't
+    # want to fail if this wasn't the case.
+    if disk.NFS == disk_type:
+      return
+    if disk.SMB == disk_type:
+      return
+    umount_cmd = '[[ -d /mnt ]] && sudo umount /mnt; '
+    # TODO(user): Allow custom disk formatting options.
+    if FLAGS.disk_fs_type == 'xfs':
+      block_size = FLAGS.disk_block_size or 512
+      fmt_cmd = ('sudo mkfs.xfs -f -i size={0} {1}'.format(
+          block_size, device_path))
+    else:
+      block_size = FLAGS.disk_block_size or 4096
+      fmt_cmd = ('sudo mke2fs -F -E lazy_itable_init=0,discard -O '
+                 '^has_journal -t ext4 -b {0} {1}'.format(
+                     block_size, device_path))
+    self.os_metadata['disk_filesystem_type'] = FLAGS.disk_fs_type
+    self.os_metadata['disk_filesystem_blocksize'] = block_size
+    self.RemoteHostCommand(umount_cmd + fmt_cmd)
+
+  @vm_util.Retry(
+      timeout=vm_util.DEFAULT_TIMEOUT,
+      retryable_exceptions=(errors.VirtualMachine.RemoteCommandError,))
+  def MountDisk(self,
+                device_path,
+                mount_path,
+                disk_type=None,
+                mount_options=disk.DEFAULT_MOUNT_OPTIONS,
+                fstab_options=disk.DEFAULT_FSTAB_OPTIONS):
+    """Mounts a formatted disk in the VM."""
+    mount_options = '-o %s' % mount_options if mount_options else ''
+    if disk.NFS == disk_type:
+      mount_options = '-t nfs %s' % mount_options
+      fs_type = 'nfs'
+    elif disk.SMB == disk_type:
+      mount_options = '-t cifs %s' % mount_options
+      fs_type = 'smb'
+    else:
+      fs_type = FLAGS.disk_fs_type
+    fstab_options = fstab_options or ''
+    mnt_cmd = ('sudo mkdir -p {mount_path};'
+               'sudo mount {mount_options} {device_path} {mount_path} && '
+               'sudo chown $USER:$USER {mount_path};').format(
+                   mount_path=mount_path,
+                   device_path=device_path,
+                   mount_options=mount_options)
+    self.RemoteHostCommand(mnt_cmd)
+    # add to /etc/fstab to mount on reboot
+    mnt_cmd = ('echo "{device_path} {mount_path} {fs_type} {fstab_options}" '
+               '| sudo tee -a /etc/fstab').format(
+                   device_path=device_path,
+                   mount_path=mount_path,
+                   fs_type=fs_type,
+                   fstab_options=fstab_options)
+    self.RemoteHostCommand(mnt_cmd)
+
+  def LogVmDebugInfo(self):
+    """Logs the output of calling dmesg on the VM."""
+    if FLAGS.log_dmesg:
+      self.RemoteCommand('hostname && dmesg', should_log=True)
+
+  def BackupFiles(self, files):
+    """Creates a backup of the files in the files dict. A file
+       "f.txt" will be saved as "f.txt.<run_uri>". Works in tandem
+       with RestoreFiles.
+
+    Args:
+      files: Dictionary containing 2 lists of files, one for "system"
+             and one for "user" files. The difference is mave because
+             you only need to use sudo for "system" files.
+
+             example: FILES_TO_SAVE = {
+                          "system_files": [
+                              "/etc/hosts",
+                              "/etc/bash.bashrc"
+                          ],
+                          "user_files": [
+                          "~/.ssh/authorized_keys"
+                          ]
+                      }
+    """
+    cmds = []
+    for category in files:
+      for path in files[category]:
+        _, _, retcode = self.RemoteCommandWithReturnCode(
+            "test -f {0}.{1}".format(path, FLAGS.run_uri),
+            ignore_failure=True)
+        if retcode == 0:
+          logging.warn("File {} already backed up, SKIPPING".format(path))
+        else:
+          cmd = ""
+          if category == "system_files":
+            cmd += "sudo "
+          cmd += "cp -f {0} {0}.{1} 2>/dev/null".format(path, FLAGS.run_uri)
+          cmds.append(cmd)
+    if len(cmds) > 0:
+      self.RemoteCommand(' ; '.join(cmds), ignore_failure=True)
+
+  def RestoreFiles(self, files):
+    """Works in tandem with BackupFiles. Reverts files that were previously
+       backed up.
+
+    Args:
+      files: Dictionary containing 2 lists of files, one for "system"
+             and one for "user" files. The difference is mave because
+             you only need to use sudo for "system" files.
+
+             example: FILES_TO_SAVE = {
+                          "system_files": [
+                              "/etc/hosts",
+                              "/etc/bash.bashrc"
+                          ],
+                          "user_files": [
+                          "~/.ssh/authorized_keys"
+                          ]
+                      }
+    """
+    cmds = []
+    for category in files:
+      for path in files[category]:
+        cmd = ""
+        if category == "system_files":
+          cmd += "sudo "
+        cmd += "mv -f {0}.{1} {0} 2>/dev/null".format(path, FLAGS.run_uri)
+        cmds.append(cmd)
+    if len(cmds) > 0:
+     self.RemoteCommand(' ; '.join(cmds), ignore_failure=True)
+
+  def RemoteCopy(self, file_path, remote_path='', copy_to=True):
+    self.RemoteHostCopy(file_path, remote_path, copy_to)
+
+  def RemoteHostCopy(self, file_path, remote_path='', copy_to=True):
+    """Copies a file to or from the VM.
+
+    Args:
+      file_path: Local path to file.
+      remote_path: Optional path of where to copy file on remote host.
+      copy_to: True to copy to vm, False to copy from vm.
+
+    Raises:
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    if vm_util.RunningOnWindows():
+      if ':' in file_path:
+        # scp doesn't like colons in paths.
+        file_path = file_path.split(':', 1)[1]
+      # Replace the last instance of '\' with '/' to make scp happy.
+      file_path = '/'.join(file_path.rsplit('\\', 1))
+    remote_ip = '[%s]' % self.GetConnectionIp()
+    remote_location = '%s@%s:%s' % (
+        self.user_name, remote_ip, remote_path)
+    # An scp is not retried, so increase the connection timeout.
+    ssh_private_key = (self.ssh_private_key if self.is_static else
+                       vm_util.GetPrivateKeyPath())
+
+    if FLAGS.enable_rsync:
+      remote_shell = ["ssh", "-p", str(self.ssh_port)]
+      remote_shell.extend(vm_util.GetSshOptions(ssh_private_key, connect_timeout=FLAGS.scp_connect_timeout))
+      scp_env = {"RSYNC_RSH": " ".join(remote_shell)}
+      scp_cmd = ['rsync', '-aztpr']
+    else:
+      scp_env = None
+      scp_cmd = ['scp', '-P', str(self.ssh_port), '-pr']
+      scp_cmd.extend(vm_util.GetSshOptions(
+          ssh_private_key, connect_timeout=FLAGS.scp_connect_timeout))
+
+    if copy_to:
+      scp_cmd.extend([file_path, remote_location])
+    else:
+      scp_cmd.extend([remote_location, file_path])
+
+    stdout, stderr, retcode = vm_util.IssueCommand(scp_cmd, timeout=None, env=scp_env,
+                                                   raise_on_failure=False)
+
+    if retcode:
+      full_cmd = ' '.join(scp_cmd)
+      error_text = ('Got non-zero return code (%s) executing %s\n'
+                    'STDOUT: %sSTDERR: %s' %
+                    (retcode, full_cmd, stdout, stderr))
+      raise errors.VirtualMachine.RemoteCommandError(error_text)
+
+  def RemoteCommand(self, *args, **kwargs):
+    """Runs a command on the VM.
+
+    Args:
+      *args: Arguments passed directly to RemoteCommandWithReturnCode.
+      **kwargs: Keyword arguments passed directly to
+          RemoteCommandWithReturnCode.
+
+    Returns:
+      A tuple of stdout, stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem establishing the connection.
+    """
+    return self.RemoteCommandWithReturnCode(*args, **kwargs)[:2]
+
+  def RemoteCommandWithReturnCode(self, *args, **kwargs):
+    """Runs a command on the VM.
+
+    Args:
+      *args: Arguments passed directly to RemoteHostCommandWithReturnCode.
+      **kwargs: Keyword arguments passed directly to
+          RemoteHostCommandWithReturnCode.
+
+    Returns:
+      A tuple of stdout, stderr, return_code from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem establishing the connection.
+    """
+    return self.RemoteHostCommandWithReturnCode(*args, **kwargs)
+
+  def RemoteHostCommandWithReturnCode(self,
+                                      command,
+                                      should_log=False,
+                                      retries=None,
+                                      ignore_failure=False,
+                                      login_shell=False,
+                                      suppress_warning=False,
+                                      timeout=None,
+                                      ssh_args=None):
+    """Runs a command on the VM.
+
+    This is guaranteed to run on the host VM, whereas RemoteCommand might run
+    within i.e. a container in the host VM.
+
+    Args:
+      command: A valid bash command.
+      should_log: A boolean indicating whether the command result should be
+          logged at the info level. Even if it is false, the results will
+          still be logged at the debug level.
+      retries: The maximum number of times RemoteCommand should retry SSHing
+          when it receives a 255 return code. If None, it defaults to the value
+          of the flag ssh_retries.
+      ignore_failure: Ignore any failure if set to true.
+      login_shell: Run command in a login shell.
+      suppress_warning: Suppress the result logging from IssueCommand when the
+          return code is non-zero.
+      timeout: The timeout for IssueCommand.
+
+    Returns:
+      A tuple of stdout, stderr, return_code from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem establishing the connection.
+    """
+    if retries is None:
+      retries = FLAGS.ssh_retries
+    if vm_util.RunningOnWindows():
+      # Multi-line commands passed to ssh won't work on Windows unless the
+      # newlines are escaped.
+      command = command.replace('\n', '\\n')
+    ip_address = self.GetConnectionIp()
+    user_host = '%s@%s' % (self.user_name, ip_address)
+    ssh_cmd = ['ssh', '-A', '-p', str(self.ssh_port), user_host]
+    ssh_private_key = (self.ssh_private_key if self.is_static else
+                       vm_util.GetPrivateKeyPath())
+    ssh_cmd.extend(vm_util.GetSshOptions(ssh_private_key))
+    if ssh_args:
+      ssh_cmd.extend(ssh_args)
+    try:
+      if login_shell:
+        ssh_cmd.extend(['-t', '-t', 'bash -l -c "%s"' % command])
+        self._pseudo_tty_lock.acquire()
+      else:
+        ssh_cmd.append(command)
+
+      for _ in range(retries):
+        stdout, stderr, retcode = vm_util.IssueCommand(
+            ssh_cmd, force_info_log=should_log,
+            suppress_warning=suppress_warning,
+            timeout=timeout, raise_on_failure=False)
+        # Retry on 255 because this indicates an SSH failure
+        if retcode != RETRYABLE_SSH_RETCODE:
+          break
+    finally:
+      if login_shell:
+        self._pseudo_tty_lock.release()
+
+    if retcode:
+      full_cmd = ' '.join(ssh_cmd)
+      error_text = ('Got non-zero return code (%s) executing %s\n'
+                    'Full command: %s\nSTDOUT: %sSTDERR: %s' %
+                    (retcode, command, full_cmd, stdout, stderr))
+      if not ignore_failure:
+        raise errors.VirtualMachine.RemoteCommandError(error_text)
+
+    return (stdout, stderr, retcode)
+
+  def RemoteHostCommand(self, *args, **kwargs):
+    """Runs a command on the VM.
+
+    This is guaranteed to run on the host VM, whereas RemoteCommand might run
+    within i.e. a container in the host VM.
+
+    Args:
+      *args: Arguments passed directly to RemoteHostCommandWithReturnCode.
+      **kwargs: Keyword arguments passed directly to
+          RemoteHostCommandWithReturnCode.
+
+    Returns:
+      A tuple of stdout, stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem establishing the connection.
+    """
+    return self.RemoteHostCommandWithReturnCode(*args, **kwargs)[:2]
+
+  def _CheckRebootability(self):
+    if not self.IS_REBOOTABLE:
+      raise errors.VirtualMachine.VirtualMachineError(
+          "Trying to reboot a VM that isn't rebootable.")
+
+  def _Reboot(self):
+    """OS-specific implementation of reboot command."""
+    self._CheckRebootability()
+    self.RemoteCommand('sudo reboot', ignore_failure=True)
+
+  def _AfterReboot(self):
+    """Performs any OS-specific setup on the VM following reboot.
+
+    This will be called after every call to Reboot().
+    """
+    # clear out os_info and kernel_release as might have changed
+    previous_os_info = self.os_metadata.pop('os_info', None)
+    previous_kernel_release = self.os_metadata.pop('kernel_release', None)
+    previous_kernel_command = self.os_metadata.pop('kernel_command_line', None)
+    if previous_os_info or previous_kernel_release or previous_kernel_command:
+      self.RecordAdditionalMetadata()
+    if self._lscpu_cache:
+      self._lscpu_cache = None
+      self.CheckLsCpu()
+    if self.install_packages:
+      self._CreateInstallDir()
+    self._CreateVmTmpDir()
+    self._SetTransparentHugepages()
+    self._has_remote_command_script = False
+    self._DisableCpus()
+
+  def MoveFile(self, target, source_path, remote_path=''):
+    self.MoveHostFile(target, source_path, remote_path)
+
+  def MoveHostFile(self, target, source_path, remote_path=''):
+    """Copies a file from one VM to a target VM.
+
+    Args:
+      target: The target BaseVirtualMachine object.
+      source_path: The location of the file on the REMOTE machine.
+      remote_path: The destination of the file on the TARGET machine, default
+          is the home directory.
+    """
+    self.AuthenticateVm()
+
+    # TODO(user): For security we may want to include
+    #     -o UserKnownHostsFile=/dev/null in the scp command
+    #     however for the moment, this has happy side effects
+    #     ie: the key is added to know known_hosts which allows
+    #     OpenMPI to operate correctly.
+    remote_location = '%s@%s:%s' % (
+        target.user_name, target.ip_address, remote_path)
+    self.RemoteHostCommand('scp -P %s -o StrictHostKeyChecking=no -i %s %s %s' %
+                           (target.ssh_port, REMOTE_KEY_PATH, source_path,
+                            remote_location))
+
+  def AuthenticateVm(self):
+    """Authenticate a remote machine to access all peers."""
+    if not self.is_static and not self.has_private_key:
+      self.RemoteHostCopy(vm_util.GetPrivateKeyPath(),
+                          REMOTE_KEY_PATH)
+      self.RemoteCommand(
+          'echo "Host *\n  StrictHostKeyChecking no\n" > ~/.ssh/config')
+      self.RemoteCommand('chmod 600 ~/.ssh/config')
+      self.has_private_key = True
+
+  def TestAuthentication(self, peer):
+    """Tests whether the VM can access its peer.
+
+    Raises:
+      AuthError: If the VM cannot access its peer.
+    """
+    if not self.TryRemoteCommand('ssh %s hostname' % peer.internal_ip):
+      raise errors.VirtualMachine.AuthError(
+          'Authentication check failed. If you are running with Static VMs, '
+          'please make sure that %s can ssh into %s without supplying any '
+          'arguments except the ip address.' % (self, peer))
+
+  def CheckJavaVersion(self):
+    """Check the version of java on remote machine.
+
+    Returns:
+      The version of Java installed on remote machine.
+    """
+    version, _ = self.RemoteCommand('java -version 2>&1 >/dev/null | '
+                                    'grep version | '
+                                    'awk \'{print $3}\'')
+    return version[:-1]
+
+  def RemoveFile(self, filename):
+    """Deletes a file on a remote machine.
+
+    Args:
+      filename: Path to the file to delete.
+    """
+    self.RemoteCommand('sudo rm -rf %s' % filename)
+
+  def GetDeviceSizeFromPath(self, path):
+    """Gets the size of the a drive that contains the path specified.
+
+    Args:
+      path: The function will return the amount of space on the file system
+            that contains this file name.
+
+    Returns:
+      The size in 1K blocks of the file system containing the file.
+    """
+    df_command = "df -k -P %s | tail -n +2 | awk '{ print $2 }'" % path
+    stdout, _ = self.RemoteCommand(df_command)
+    return int(stdout)
+
+  def DropCaches(self):
+    """Drops the VM's caches."""
+    drop_caches_command = 'sync; sudo /sbin/sysctl vm.drop_caches=3'
+    self.RemoteCommand(drop_caches_command)
+
+  def _GetNumCpus(self):
+    """Returns the number of logical CPUs on the VM.
+
+    This method does not cache results (unlike "num_cpus").
+    """
+    stdout, _ = self.RemoteCommand(
+        'cat /proc/cpuinfo | grep processor | wc -l')
+    return int(stdout)
+
+  def _GetTotalFreeMemoryKb(self):
+    """Calculate amount of free memory in KB of the given vm.
+
+    Free memory is calculated as sum of free, cached, and buffers
+    as output from /proc/meminfo.
+
+    Args:
+      vm: vm to check
+
+    Returns:
+      free memory on the vm in KB
+    """
+    stdout, _ = self.RemoteCommand("""
+      awk '
+        BEGIN      {total =0}
+        /MemFree:/ {total += $2}
+        /Cached:/  {total += $2}
+        /Buffers:/ {total += $2}
+        END        {print total}
+        ' /proc/meminfo
+        """)
+    return int(stdout)
+
+  def _GetTotalMemoryKb(self):
+    """Returns the amount of physical memory on the VM in Kilobytes.
+
+    This method does not cache results (unlike "total_memory_kb").
+    """
+    meminfo_command = 'cat /proc/meminfo | grep MemTotal | awk \'{print $2}\''
+    stdout, _ = self.RemoteCommand(meminfo_command)
+    return int(stdout)
+
+  def _TestReachable(self, ip):
+    """Returns True if the VM can reach the ip address and False otherwise."""
+    return self.TryRemoteCommand('ping -c 1 %s' % ip)
+
+  def SetupLocalDisks(self):
+    """Performs Linux specific setup of local disks."""
+    pass
+
+  def CreateRamDisk(self, disk_spec):
+    """Performs Linux specific setup of ram disk."""
+    assert disk_spec.mount_point
+    ramdisk = self.RamDisk(disk_spec)
+    ramdisk.Mount(self)
+    self.scratch_disks.append(ramdisk)
+
+  class RamDisk(disk.MountableDisk):
+    """Linux specific setup of ram disk."""
+
+    def Mount(self, vm):
+      logging.info('Mounting and creating Ram Disk %s, %s',
+                   self.mount_point, self.disk_size)
+      mnt_cmd = ('sudo mkdir -p {0};sudo mount -t tmpfs -o size={1}g tmpfs {0};'
+                 'sudo chown -R $USER:$USER {0};').format(
+                     self.mount_point, self.disk_size)
+      vm.RemoteHostCommand(mnt_cmd)
+
+  def _CreateScratchDiskFromDisks(self, disk_spec, disks):
+    """Helper method to prepare data disks.
+
+    Given a list of BaseDisk objects, this will do most of the work creating,
+    attaching, striping, formatting, and mounting them. If multiple BaseDisk
+    objects are passed to this method, it will stripe them, combining them
+    into one 'logical' data disk (it will be treated as a single disk from a
+    benchmarks perspective). This is intended to be called from within a cloud
+    specific VM's CreateScratchDisk method.
+
+    Args:
+      disk_spec: The BaseDiskSpec object corresponding to the disk.
+      disks: A list of the disk(s) to be created, attached, striped,
+          formatted, and mounted. If there is more than one disk in
+          the list, then they will be striped together.
+    """
+    if len(disks) > 1:
+      # If the disk_spec called for a striped disk, create one.
+      disk_spec.device_path = '/dev/md%d' % len(self.scratch_disks)
+      data_disk = disk.StripedDisk(disk_spec, disks)
+    else:
+      data_disk = disks[0]
+
+    self.scratch_disks.append(data_disk)
+
+    if data_disk.disk_type != disk.LOCAL:
+      data_disk.Create()
+      data_disk.Attach(self)
+
+    if data_disk.is_striped:
+      device_paths = [d.GetDevicePath() for d in data_disk.disks]
+      self.StripeDisks(device_paths, data_disk.GetDevicePath())
+
+    if disk_spec.mount_point:
+      if isinstance(data_disk, disk.MountableDisk):
+        data_disk.Mount(self)
+      else:
+        self.FormatDisk(data_disk.GetDevicePath(), disk_spec.disk_type)
+        self.MountDisk(data_disk.GetDevicePath(), disk_spec.mount_point,
+                       disk_spec.disk_type, data_disk.mount_options,
+                       data_disk.fstab_options)
+
+  @vm_util.Retry(max_retries=3, poll_interval=5)
+  def StripeDisks(self, devices, striped_device):
+    """Raids disks together using mdadm.
+
+    Args:
+      devices: A list of device paths that should be striped together.
+      striped_device: The path to the device that will be created.
+    """
+    self.Install('mdadm')
+    stripe_cmd = ('yes | sudo mdadm --create %s --level=stripe --raid-devices='
+                  '%s %s' % (striped_device, len(devices), ' '.join(devices)))
+    self.RemoteHostCommand(stripe_cmd)
+
+    # Save the RAID layout on the disk
+    cmd = ('sudo mdadm --detail --scan | ' +
+           'sudo tee -a /etc/mdadm/mdadm.conf')
+    self.RemoteHostCommand(cmd)
+
+    # Make the disk available during reboot
+    cmd = 'sudo update-initramfs -u'
+    self.RemoteHostCommand(cmd)
+
+    # Automatically mount the disk after reboot
+    cmd = ('echo \'/dev/md0  /mnt/md0  ext4 defaults,nofail'
+           ',discard 0 0\' | sudo tee -a /etc/fstab')
+    self.RemoteHostCommand(cmd)
+
+  def BurnCpu(self, burn_cpu_threads=None, burn_cpu_seconds=None):
+    """Burns vm cpu for some amount of time and dirty cache.
+
+    Args:
+      burn_cpu_threads: Number of threads to burn cpu.
+      burn_cpu_seconds: Amount of time in seconds to burn cpu.
+    """
+    burn_cpu_threads = burn_cpu_threads or FLAGS.burn_cpu_threads
+    burn_cpu_seconds = burn_cpu_seconds or FLAGS.burn_cpu_seconds
+    if burn_cpu_seconds:
+      self.Install('sysbench')
+      end_time = time.time() + burn_cpu_seconds
+      self.RemoteCommand(
+          'nohup sysbench --num-threads=%s --test=cpu --cpu-max-prime=10000000 '
+          'run 1> /dev/null 2> /dev/null &' % burn_cpu_threads)
+      if time.time() < end_time:
+        time.sleep(end_time - time.time())
+      self.RemoteCommand('pkill -9 sysbench')
+
+  def SetSmpAffinity(self):
+    """Set SMP IRQ affinity."""
+    if self._smp_affinity_script:
+      self.PushDataFile(self._smp_affinity_script)
+      self.RemoteCommand('sudo bash %s' % self._smp_affinity_script)
+    else:
+      raise NotImplementedError()
+
+  def SetReadAhead(self, num_sectors, devices):
+    """Set read-ahead value for block devices.
+
+    Args:
+      num_sectors: int. Number of sectors of read ahead.
+      devices: list of strings. A list of block devices.
+    """
+    self.RemoteCommand(
+        'sudo blockdev --setra {0} {1}; sudo blockdev --setfra {0} {1};'.format(
+            num_sectors, ' '.join(devices)))
+
+  def GetSha256sum(self, path, filename):
+    """Gets the sha256sum hash for a filename in a path on the VM.
+
+    Args:
+      path: string; Path on the VM.
+      filename: string; Name of the file in the path.
+
+    Returns:
+      string; The sha256sum hash.
+    """
+    stdout, _ = self.RemoteCommand(
+        'sha256sum %s' % posixpath.join(path, filename))
+    sha256sum, _ = stdout.split()
+    return sha256sum
+
+  def _GetSmbService(self):
+    """Returns the SmbService created in the benchmark spec.
+
+    Before calling this method check that the disk.disk_type is equal to
+    disk.SMB or else an exception will be raised.
+
+    Returns:
+      The smb_service.BaseSmbService service for this cloud.
+
+    Raises:
+      CreationError: If no SMB service was created.
+    """
+    smb = getattr(context.GetThreadBenchmarkSpec(), 'smb_service')
+    if smb is None:
+      raise errors.Resource.CreationError('No SMB Service created')
+    return smb
+
+  def AppendKernelCommandLine(self, command_line, reboot=True):
+    """Appends the provided command-line to the VM and reboots by default.
+
+    This method should be overwritten by the desired Linux flavor to be useful.
+    Most (all?) Linux flavors modify the kernel command line by updating the
+    GRUB configuration files and rebooting.
+
+    Args:
+      command_line: The string to append to the kernel command line.
+      reboot: Whether or not to reboot to have the change take effect.
+    """
+    raise NotImplementedError(
+        'Kernel command-line appending for given Linux flavor not implemented.')
+
+  def _DoAppendKernelCommandLine(self):
+    """If the flag is set, attempts to append the provided kernel command line.
+
+    In addition, to consolidate reboots during VM prepare, this method sets the
+    needs reboot bit instead of immediately rebooting.
+    """
+    if FLAGS.disable_smt and self.CheckLsCpu().threads_per_core != 1:
+      FLAGS.append_kernel_command_line = ' '.join(
+          (FLAGS.append_kernel_command_line,
+           'nosmt')) if FLAGS.append_kernel_command_line else 'nosmt'
+    if FLAGS.append_kernel_command_line:
+      self.AppendKernelCommandLine(
+          FLAGS.append_kernel_command_line, reboot=False)
+      self._needs_reboot = True
+
+  @abc.abstractmethod
+  def InstallPackages(self, packages: str) -> None:
+    """Installs packages using the OS's package manager."""
+    pass
+
+  def _IsSmtEnabled(self):
+    """Whether simultaneous multithreading (SMT) is enabled on the vm.
+
+    Looks for the "nosmt" attribute in the booted linux kernel command line
+    parameters.
+
+    Returns:
+      Whether SMT is enabled on the vm.
+    """
+    return not bool(re.search(r'\bnosmt\b', self.kernel_command_line))
+
+  @property
+  def cpu_vulnerabilities(self) -> CpuVulnerabilities:
+    """Returns a CpuVulnerabilities of CPU vulnerabilities.
+
+    Output of "grep . .../cpu/vulnerabilities/*" looks like this:
+      /sys/devices/system/cpu/vulnerabilities/itlb_multihit:KVM: Vulnerable
+      /sys/devices/system/cpu/vulnerabilities/l1tf:Mitigation: PTE Inversion
+    Which gets turned into
+      CpuVulnerabilities(vulnerabilities={'itlb_multihit': 'KVM'},
+                         mitigations=    {'l1tf': 'PTE Inversion'})
+    """
+    text, _ = self.RemoteCommand(
+        'sudo grep . /sys/devices/system/cpu/vulnerabilities/*',
+        ignore_failure=True)
+    vuln = CpuVulnerabilities()
+    if not text:
+      logging.warning('No text response when getting CPU vulnerabilities')
+      return vuln
+    for line in text.splitlines():
+      vuln.AddLine(line)
+    return vuln
+
+
+class ClearMixin(BaseLinuxMixin):
+  """Class holding Clear Linux specific VM methods and attributes."""
+
+  OS_TYPE = os_types.CLEAR
+  BASE_OS_TYPE = os_types.CLEAR
+  PYTHON_2_PACKAGE = 'python-basic'
+
+  def OnStartup(self):
+    """Eliminates the need to have a tty to run sudo commands."""
+    super(ClearMixin, self).OnStartup()
+    self.RemoteHostCommand('sudo swupd autoupdate --disable')
+    self.RemoteHostCommand('sudo mkdir -p /etc/sudoers.d')
+    self.RemoteHostCommand('echo \'Defaults:{0} !requiretty\' | '
+                           'sudo tee /etc/sudoers.d/pkb'.format(self.user_name),
+                           login_shell=True)
+
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Performs the normal package cleanup, then deletes the file
+    added to the /etc/sudoers.d directory during startup.
+    """
+    super(ClearMixin, self).PackageCleanup()
+    self.RemoteCommand('sudo rm /etc/sudoers.d/pkb')
+
+  def SnapshotPackages(self):
+    """See base class."""
+    self.RemoteCommand('sudo swupd bundle-list > {0}/bundle_list'.format(
+        linux_packages.INSTALL_DIR))
+
+  def RestorePackages(self):
+    """See base class."""
+    self.RemoteCommand(
+        'sudo swupd bundle-list | grep --fixed-strings --line-regexp --invert-match --file '
+        '{0}/bundle_list | xargs --no-run-if-empty sudo swupd bundle-remove'
+        .format(linux_packages.INSTALL_DIR),
+        ignore_failure=True)
+
+  def HasPackage(self, package):
+    """Returns True iff the package is available for installation."""
+    return self.TryRemoteCommand(
+        'sudo swupd bundle-list --all | grep {0}'.format(package),
+        suppress_warning=True)
+
+  def InstallPackages(self, packages: str) -> None:
+    """Installs packages using the swupd bundle manager."""
+    self.RemoteCommand('sudo swupd bundle-add {0}'.format(packages))
+
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    if not self.install_packages:
+      return
+    if package_name not in self._installed_packages:
+      package = linux_packages.PACKAGES[package_name]
+      if hasattr(package, 'SwupdInstall'):
+        package.SwupdInstall(self)
+      elif hasattr(package, 'Install'):
+        package.Install(self)
+      else:
+        raise KeyError(
+            'Package {0} has no install method for Clear Linux.'.format(
+                package_name))
+      self._installed_packages.add(package_name)
+
+  def Uninstall(self, package_name):
+    """Uninstalls a PerfKit package on the VM."""
+    package = linux_packages.PACKAGES[package_name]
+    if hasattr(package, 'SwupdUninstall'):
+      package.SwupdUninstall(self)
+    elif hasattr(package, 'Uninstall'):
+      package.Uninstall(self)
+
+  def GetPathToConfig(self, package_name):
+    """See base class."""
+    package = linux_packages.PACKAGES[package_name]
+    return package.SwupdGetPathToConfig(self)
+
+  def GetServiceName(self, package_name):
+    """See base class."""
+    package = linux_packages.PACKAGES[package_name]
+    return package.SwupdGetServiceName(self)
+
+  def GetOsInfo(self):
+    """See base class."""
+    stdout, _ = self.RemoteCommand('swupd info | grep Installed')
+    return 'Clear Linux build: {0}'.format(
+        regex_util.ExtractGroup(CLEAR_BUILD_REGEXP, stdout))
+
+  def SetupProxy(self):
+    """Sets up proxy configuration variables for the cloud environment."""
+    super(ClearMixin, self).SetupProxy()
+    profile_file = '/etc/profile'
+    commands = []
+
+    if FLAGS.http_proxy:
+      commands.append("echo 'export http_proxy=%s' | sudo tee -a %s" % (
+          FLAGS.http_proxy, profile_file))
+
+    if FLAGS.https_proxy:
+      commands.append("echo 'https_proxy=%s' | sudo tee -a %s" % (
+          FLAGS.https_proxy, profile_file))
+
+    if FLAGS.ftp_proxy:
+      commands.append("echo 'ftp_proxy=%s' | sudo tee -a %s" % (
+          FLAGS.ftp_proxy, profile_file))
+
+    if FLAGS.no_proxy:
+      commands.append("echo 'export no_proxy=%s' | sudo tee -a %s" % (
+          FLAGS.no_proxy, profile_file))
+    if commands:
+      self.RemoteCommand(';'.join(commands))
+
+  def RemoteCommand(self, command, **kwargs):
+    """Runs a command inside the container.
+
+    Args:
+      command: Arguments passed directly to RemoteHostCommandWithReturnCode.
+      **kwargs: Keyword arguments passed directly to
+          RemoteHostCommandWithReturnCode.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+    """
+    # Escapes bash sequences
+    command = '. /etc/profile; %s' % (command)
+    return self.RemoteHostCommand(command, **kwargs)[:2]
+
+
+class BaseContainerLinuxMixin(BaseLinuxMixin):
+  """Class holding VM methods for minimal container-based OSes like Core OS.
+
+  These operating systems have SSH like other Linux OSes, but no package manager
+  to run Linux benchmarks without Docker.
+
+  Because they cannot install packages, they only support VM life cycle
+  benchmarks like cluster_boot.
+  """
+
+  def InstallPackages(self, package_name):
+    raise NotImplementedError('Container OSes have no package managers.')
+
+  def HasPackage(self, package: str) -> bool:
+    return False
+
+  # Install could theoretically be supported. A hermetic architecture
+  # appropriate binary could be copied into the VM and run.
+  # However because curl, wget, and object store clients cannot be installed and
+  # may or may not be present, copying the binary is non-trivial so simply
+  # block trying.
+
+  def Install(self, package_name):
+    raise NotImplementedError('Container OSes have no package managers.')
+
+  def Uninstall(self, package_name):
+    raise NotImplementedError('Container OSes have no package managers.')
+
+  def PrepareVMEnvironment(self):
+    # Don't try to install packages as normal, because it will fail.
+    pass
+
+
+class BaseRhelMixin(BaseLinuxMixin):
+  """Class holding RHEL/CentOS specific VM methods and attributes."""
+
+  # OS_TYPE = os_types.RHEL
+  BASE_OS_TYPE = os_types.RHEL
+
+  def OnStartup(self):
+    """Eliminates the need to have a tty to run sudo commands."""
+    super(BaseRhelMixin, self).OnStartup()
+    self.RemoteHostCommand('echo \'Defaults:%s !requiretty\' | '
+                           'sudo tee /etc/sudoers.d/pkb' % self.user_name,
+                           login_shell=True)
+    if FLAGS.gce_hpc_tools:
+      self.InstallGcpHpcTools()
+    if _DISABLE_YUM_CRON.value:
+      # yum cron can stall causing yum commands to hang
+      self.RemoteHostCommand('sudo systemctl disable yum-cron.service',
+                             ignore_failure=True)
+
+  def InstallGcpHpcTools(self):
+    """Installs the GCP HPC tools."""
+    self.Install('gce_hpc_tools')
+
+  def InstallEpelRepo(self):
+    """Installs the Extra Packages for Enterprise Linux repository."""
+    self.Install('epel_release')
+
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Performs the normal package cleanup, then deletes the file
+    added to the /etc/sudoers.d directory during startup.
+    """
+    super(BaseRhelMixin, self).PackageCleanup()
+    self.RemoteCommand('sudo rm -f /etc/sudoers.d/pkb')
+
+  def SnapshotPackages(self):
+    """Grabs a snapshot of the currently installed packages."""
+    self.RemoteCommand('rpm -qa > %s/rpm_package_list'
+                       % linux_packages.INSTALL_DIR)
+
+  def RestorePackages(self):
+    """Restores the currently installed packages to those snapshotted."""
+    self.RemoteCommand(
+        'rpm -qa | grep --fixed-strings --line-regexp --invert-match --file '
+        '%s/rpm_package_list | xargs --no-run-if-empty sudo rpm -e' %
+        linux_packages.INSTALL_DIR,
+        ignore_failure=True)
+
+  def HasPackage(self, package):
+    """Returns True iff the package is available for installation."""
+    return self.TryRemoteCommand('sudo yum info %s' % package,
+                                 suppress_warning=True)
+
+  # yum talks to the network on each request so transient issues may fix
+  # themselves on retry
+  @vm_util.Retry(max_retries=UPDATE_RETRIES)
+  def InstallPackages(self, packages):
+    """Installs packages using the yum package manager."""
+    self.RemoteCommand('sudo yum install -y %s' % packages)
+
+  @vm_util.Retry()
+  def InstallPackageGroup(self, package_group):
+    """Installs a 'package group' using the yum package manager."""
+    self.RemoteCommand('sudo yum groupinstall -y "%s"' % package_group)
+
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    if not self.install_packages:
+      return
+    if package_name not in self._installed_packages:
+      package = linux_packages.PACKAGES[package_name]
+      if hasattr(package, 'YumInstall'):
+        package.YumInstall(self)
+      elif hasattr(package, 'Install'):
+        package.Install(self)
+      else:
+        raise KeyError('Package %s has no install method for RHEL.' %
+                       package_name)
+      self._installed_packages.add(package_name)
+
+  def Uninstall(self, package_name):
+    """Uninstalls a PerfKit package on the VM."""
+    package = linux_packages.PACKAGES[package_name]
+    if hasattr(package, 'YumUninstall'):
+      package.YumUninstall(self)
+    elif hasattr(package, 'Uninstall'):
+      package.Uninstall(self)
+
+  def GetPathToConfig(self, package_name):
+    """Returns the path to the config file for PerfKit packages.
+
+    This function is mostly useful when config files locations
+    don't match across distributions (such as mysql). Packages don't
+    need to implement it if this is not the case.
+    """
+    package = linux_packages.PACKAGES[package_name]
+    return package.YumGetPathToConfig(self)
+
+  def GetServiceName(self, package_name):
+    """Returns the service name of a PerfKit package.
+
+    This function is mostly useful when service names don't
+    match across distributions (such as mongodb). Packages don't
+    need to implement it if this is not the case.
+    """
+    package = linux_packages.PACKAGES[package_name]
+    return package.YumGetServiceName(self)
+
+  def SetupProxy(self):
+    """Sets up proxy configuration variables for the cloud environment."""
+    super(BaseRhelMixin, self).SetupProxy()
+    yum_proxy_file = '/etc/yum.conf'
+
+    if FLAGS.http_proxy:
+      self.RemoteCommand("grep -qx 'proxy={0}' {1} || echo -e 'proxy={0}' |\
+        sudo tee -a {1}".format(FLAGS.http_proxy, yum_proxy_file))
+
+  def ProxyCleanup(self):
+    """ Restore to a state before SetupProxy() executed """
+    super(BaseRhelMixin, self).ProxyCleanup()
+    yum_proxy_file = '/etc/yum.conf'
+    command = "sudo sed -i '\#^proxy={0}$#d' {1}"
+
+    if FLAGS.http_proxy:
+      self.RemoteCommand(command.format(FLAGS.http_proxy, yum_proxy_file))
+
+  def AppendKernelCommandLine(self, command_line, reboot=True):
+    """Appends the provided command-line to the VM and reboots by default."""
+    self.RemoteCommand(
+        r'echo GRUB_CMDLINE_LINUX_DEFAULT=\"\${GRUB_CMDLINE_LINUX_DEFAULT} %s\"'
+        ' | sudo tee -a /etc/default/grub' % command_line)
+    self.RemoteCommand('sudo grub2-mkconfig -o /boot/grub2/grub.cfg')
+    self.RemoteCommand('sudo grub2-mkconfig -o /etc/grub2.cfg')
+    if reboot:
+      self.Reboot()
+
+  def AllowPortOsFirewall(self, start_port, end_port=None):
+    out, stderr, retcode = self.RemoteCommandWithReturnCode('sudo firewall-cmd --state', ignore_failure=True)
+    if retcode == 1:
+      logging.info("Firewalld does not appear to be installed, "
+                   "skipping opening port(s) {}-{}.".format(start_port, end_port))
+    else:
+      if out.startswith('running'):
+        cmd = 'sudo firewall-cmd --permanent --add-port {0}/tcp && ' \
+              'sudo firewall-cmd --permanent --add-port {0}/udp && ' \
+              'sudo firewall-cmd --reload'.format(start_port)
+        if end_port:
+          cmd = 'sudo firewall-cmd --permanent --add-port {0}-{1}/tcp && ' \
+                'sudo firewall-cmd --permanent --add-port {0}-{1}/udp && ' \
+                'sudo firewall-cmd --reload'.format(start_port, end_port)
+        self.RemoteCommand(cmd)
+      else:
+        logging.info("Firewalld does not appear to be running, "
+                     "skipping opening port(s) {}-{}.".format(start_port, end_port))
+
+  def _AssignAdditionalPrivateIpAddresses(self, netmask, gateway):
+    self.InstallPackages("net-tools iproute")
+    private_addresses = self.additional_private_ip_addresses
+    stderr = ""
+    route_cmd = "{0} | awk '/^default/{{print $NF}}' | sort | head -n 1"
+    route_paths = ["/usr/sbin/route", "route"]
+    for route_path in route_paths:
+      out, stderr, retcode = self.RemoteCommandWithReturnCode(route_cmd.format(route_path),
+                                                              ignore_failure=True)
+      if retcode == 0:
+        break
+    else:
+      raise errors.VirtualMachine.RemoteCommandError(stderr)
+    interface_name = str(out.strip())
+    out, _ = self.RemoteCommand("cat /sys/class/net/{0}/address".format(interface_name))
+    mac_address = str(out.strip())
+    dir_path = posixpath.join("/etc", "sysconfig", "network-scripts")
+    append_to_file = 'echo "{{0}}" | sudo tee -a {0}'
+    for i, addr in enumerate(private_addresses):
+      interface_num = i + 1
+      subinterface_name = "{0}:{1}".format(interface_name, interface_num)
+      file_path = posixpath.join(dir_path, "ifcfg-{0}".format(subinterface_name))
+      append_to_local_file = append_to_file.format(file_path)
+      cmds = [
+          append_to_local_file.format('DEVICE="{0}"'.format(subinterface_name)),
+          append_to_local_file.format("BOOTPROTO=static"),
+          append_to_local_file.format("ONBOOT=YES"),
+          append_to_local_file.format("TYPE=Ethernet"),
+          append_to_local_file.format("IPADDR={0}".format(addr)),
+          append_to_local_file.format("NETMASK={0}".format(netmask)),
+          append_to_local_file.format("GATEWAY={0}".format(gateway)),
+          append_to_local_file.format("HWADDR={0}".format(mac_address)),
+          "sudo ifup {0}".format(subinterface_name)
+      ]
+      self.RemoteCommand(' && '.join(cmds))
+
+
+class AmazonLinux2Mixin(BaseRhelMixin):
+  """Class holding Amazon Linux 2 VM methods and attributes."""
+  OS_TYPE = os_types.AMAZONLINUX2
+
+
+class Rhel7Mixin(BaseRhelMixin):
+  """Class holding RHEL 7 specific VM methods and attributes."""
+  OS_TYPE = os_types.RHEL7
+
+
+class Rhel8Mixin(BaseRhelMixin):
+  """Class holding RHEL 8 specific VM methods and attributes."""
+  OS_TYPE = os_types.RHEL8
+
+
+class CentOs7Mixin(BaseRhelMixin):
+  """Class holding CentOS 7 specific VM methods and attributes."""
+  OS_TYPE = os_types.CENTOS7
+
+
+class CentOs8Mixin(BaseRhelMixin, virtual_machine.DeprecatedOsMixin):
+  """Class holding CentOS 8 specific VM methods and attributes."""
+  OS_TYPE = os_types.CENTOS8
+  END_OF_LIFE = '2021-12-31'
+  ALTERNATIVE_OS = f'{os_types.CENTOS_STREAM8} or {os_types.CENTOS_STREAM9}'
+
+
+class CentOsStream8Mixin(BaseRhelMixin):
+  """Class holding CentOS Stream 8 specific VM methods and attributes."""
+  OS_TYPE = os_types.CENTOS_STREAM8
+
+
+class CentOsStream9Mixin(BaseRhelMixin):
+  """Class holding CentOS Stream 9 specific VM methods and attributes."""
+  OS_TYPE = os_types.CENTOS_STREAM9
+
+
+class RockyLinux8Mixin(BaseRhelMixin):
+  """Class holding Rocky Linux 8 specific VM methods and attributes."""
+  OS_TYPE = os_types.ROCKY_LINUX8
+
+
+class ContainerOptimizedOsMixin(BaseContainerLinuxMixin):
+  """Class holding COS specific VM methods and attributes."""
+  OS_TYPE = os_types.COS
+  BASE_OS_TYPE = os_types.CORE_OS
+
+  def PrepareVMEnvironment(self):
+    super(ContainerOptimizedOsMixin, self).PrepareVMEnvironment()
+    # COS mounts /home and /tmp with -o noexec, which blocks running benchmark
+    # binaries.
+    # TODO(user): Support reboots
+    self.RemoteCommand('sudo mount -o remount,exec /home')
+    self.RemoteCommand('sudo mount -o remount,exec /tmp')
+
+
+class CoreOsMixin(BaseContainerLinuxMixin):
+  """Class holding CoreOS Container Linux specific VM methods and attributes."""
+  OS_TYPE = os_types.CORE_OS
+  BASE_OS_TYPE = os_types.CORE_OS
+
+
+class BaseDebianMixin(BaseLinuxMixin):
+  """Class holding Debian specific VM methods and attributes."""
+
+  OS_TYPE = 'base-only'
+  BASE_OS_TYPE = os_types.DEBIAN
+  _PACKAGES = {}
+
+  def __init__(self, *args, **kwargs):
+    super(BaseDebianMixin, self).__init__(*args, **kwargs)
+
+    # Whether or not apt-get update has been called.
+    # We defer running apt-get update until the first request to install a
+    # package.
+    self._apt_updated = False
+
+  @vm_util.Retry(max_retries=UPDATE_RETRIES)
+  def AptUpdate(self):
+    """Updates the package lists on VMs using apt."""
+    try:
+      # setting the timeout on the apt-get to 5 minutes because
+      # it is known to get stuck.  In a normal update this
+      # takes less than 30 seconds.
+      self.RemoteCommand('sudo apt-get update', timeout=300)
+    except errors.VirtualMachine.RemoteCommandError as e:
+      # If there is a problem, remove the lists in order to get rid of
+      # "Hash Sum mismatch" errors (the files will be restored when
+      # apt-get update is run again).
+      self.RemoteCommand('sudo rm -r /var/lib/apt/lists/*')
+      raise e
+
+  def SnapshotPackages(self):
+    """Grabs a snapshot of the currently installed packages."""
+    initial_packages, _ = self.RemoteCommand('dpkg-query --show')
+    initial_packages = str(initial_packages.strip())
+    initial_packages = initial_packages.splitlines()
+    package_infos = '(?:\s+)?(.*?)(?::(.*?))?\s+(.*)'
+
+    for line in initial_packages:
+      package = re.search(package_infos, line).group(1)
+      architecture = re.search(package_infos, line).group(2)
+      version = re.search(package_infos, line).group(3)
+      package_pair = (version, architecture)
+      self._PACKAGES[package] = package_pair
+
+  def RestorePackages(self):
+    """Restores the currently installed packages to those snapshotted."""
+    initial_package_names = self._PACKAGES.keys()
+    # If snapshots are not available, do nothing
+    if len(initial_package_names) == 0:
+      logging.info("No package snapshot information available, could not restore system to its original state")
+      return
+
+    unavailable_package_names = []
+    MAX_PACKAGE_PER_CMD = 50
+
+    def _PurgePackages(current_packages, initial_package_names):
+      '''Purge unwanted packages that are in addition over default.'''
+      remove_packages = 'sudo DEBIAN_FRONTEND=noninteractive apt-get --purge remove -y'
+      package_idx = 0
+      commands_list = []
+      checked_packages = {}
+      validate_package = False
+      madison_package_infos = '(?:\s+)?(.*?)(?::.*?)?\s+\|\s+(.*?)(?=\s+\|)\s+\|\s+(?:.*?)\s+(?:.*?)\s+(?:(.*?)\s+)?'
+      current_package_infos = '(?:\s+)?(.*?)(?::(.*?))?\s+(.*)'
+      while package_idx < len(current_packages):
+        cmd = 'apt-cache madison'
+        # Batching packages is needed because SSH have a limit of how long the string can be
+        for count in range(MAX_PACKAGE_PER_CMD):
+          if package_idx < len(current_packages):
+            package = re.search(current_package_infos, current_packages[package_idx]).group(1)
+            cmd += " " + package
+            package_idx += 1
+          else:
+            break
+        commands_list.append(cmd)
+      for madison_pack in commands_list:
+        check_package, _ = self.RemoteCommand(madison_pack)
+        check_package = str(check_package.strip())
+        check_package = check_package.splitlines()
+        for line in check_package:
+          if not line or "|" not in line:
+            continue
+          found = re.search(madison_package_infos, line)
+          if found:
+            package = found.group(1)
+            version = found.group(2)
+            architecture = found.group(3)
+            package_pair = (version, architecture)
+            if package not in checked_packages.keys():
+              checked_packages[package] = [package_pair]
+            else:
+              checked_packages[package].append(package_pair)
+          else:
+            logging.warn('Unmatched package found: [%s] ', line)
+      for element in reversed(current_packages):
+        package_name = re.search(current_package_infos, element).group(1)
+        # for packages do not have apt-cache madison info
+        if package_name not in checked_packages:
+          continue
+        # remove packages that do not belong to initial setup
+        if package_name not in initial_package_names:
+          remove_packages += " " + package_name
+          unavailable_package_names.append(package_name)
+          current_packages.remove(element)
+          del checked_packages[package_name]
+          continue
+        for pair in checked_packages[package_name]:
+          try:
+            if self._PACKAGES[package_name][0] == pair[0] and self._PACKAGES[package_name][1] == pair[1]:
+              validate_package = True
+          except TypeError:
+            if self._PACKAGES[package_name][0] == pair[0]:
+              validate_package = True
+        if validate_package is False:
+          unavailable_package_names.append(package_name)
+        else:
+          validate_package = False
+      self.RemoteCommand(remove_packages, ignore_failure=True, suppress_warning=True)
+      return current_packages
+
+    def _GetAvailablePackages(current_packages):
+      '''Create commands batches to downgrade all default packages to their initial state'''
+      package_idx = 0
+      commands_list = []
+      while package_idx < len(current_packages):
+        cmd = 'sudo DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-downgrades'
+        # Batching packages is needed because SSH have a limit of how long the string can be
+        for count in range(MAX_PACKAGE_PER_CMD):
+          if package_idx < len(current_packages):
+            package = re.search(r'(?:\s+)?(.*?)(?::(.*?))?\s+(.*)', current_packages[package_idx]).group(1)
+            if package not in unavailable_package_names and package in self._PACKAGES:
+              cmd += " " + package + "=" + self._PACKAGES[package][0]
+            package_idx += 1
+          else:
+            break
+        commands_list.append(cmd)
+      return commands_list
+    current_packages, _ = self.RemoteCommand('dpkg-query --show')
+    current_packages = str(current_packages.strip())
+    current_packages = current_packages.splitlines()
+    current_packages = _PurgePackages(current_packages, initial_package_names)
+    commands_list = _GetAvailablePackages(current_packages)
+    for pack in commands_list:
+      self.RemoteCommand(pack, ignore_failure=True, suppress_warning=True)
+    self.RemoteCommand('sudo apt-get -y autoremove', ignore_failure=True, suppress_warning=True)
+
+  def HasPackage(self, package):
+    """Returns True iff the package is available for installation."""
+    return self.TryRemoteCommand('apt-get install --just-print %s' % package,
+                                 suppress_warning=True)
+
+  @vm_util.Retry()
+  def InstallPackages(self, packages):
+    """Installs packages using the apt package manager."""
+    if not self._apt_updated:
+      self.AptUpdate()
+      self._apt_updated = True
+    try:
+      install_command = ('sudo DEBIAN_FRONTEND=\'noninteractive\' '
+                         '/usr/bin/apt-get -y install %s' % (packages))
+      self.RemoteCommand(install_command)
+    except errors.VirtualMachine.RemoteCommandError as e:
+      # TODO(user): Remove code below after Azure fix their package repository,
+      # or add code to recover the sources.list
+      self.RemoteCommand(
+          'sudo sed -i.bk "s/azure.archive.ubuntu.com/archive.ubuntu.com/g" '
+          '/etc/apt/sources.list')
+      logging.info('Installing "%s" failed on %s. This may be transient. '
+                   'Updating package list.', packages, self)
+      self.AptUpdate()
+      raise e
+
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    if not self.install_packages:
+      return
+
+    if not self._apt_updated:
+      self.AptUpdate()
+      self._apt_updated = True
+
+    if package_name not in self._installed_packages:
+      package = linux_packages.PACKAGES[package_name]
+      if hasattr(package, 'AptInstall'):
+        package.AptInstall(self)
+      elif hasattr(package, 'Install'):
+        package.Install(self)
+      else:
+        raise KeyError('Package %s has no install method for Debian.' %
+                       package_name)
+      self._installed_packages.add(package_name)
+
+  def Uninstall(self, package_name):
+    """Uninstalls a PerfKit package on the VM."""
+    package = linux_packages.PACKAGES[package_name]
+    if hasattr(package, 'AptUninstall'):
+      package.AptUninstall(self)
+    elif hasattr(package, 'Uninstall'):
+      package.Uninstall(self)
+    self._installed_packages.discard(package_name)
+
+  def GetPathToConfig(self, package_name):
+    """Returns the path to the config file for PerfKit packages.
+
+    This function is mostly useful when config files locations
+    don't match across distributions (such as mysql). Packages don't
+    need to implement it if this is not the case.
+
+    Args:
+      package_name: the name of the package.
+    """
+    package = linux_packages.PACKAGES[package_name]
+    return package.AptGetPathToConfig(self)
+
+  def GetServiceName(self, package_name):
+    """Returns the service name of a PerfKit package.
+
+    This function is mostly useful when service names don't
+    match across distributions (such as mongodb). Packages don't
+    need to implement it if this is not the case.
+
+    Args:
+      package_name: the name of the package.
+    """
+    package = linux_packages.PACKAGES[package_name]
+    return package.AptGetServiceName(self)
+
+  def AllowPortOsFirewall(self, start_port, end_port=None):
+    out, stderr, retcode = self.RemoteCommandWithReturnCode('sudo ufw status', ignore_failure=True)
+    if retcode == 1:
+      logging.info("Ufw does not appear to be installed, "
+                   "skipping opening port(s) {}-{}.".format(start_port, end_port))
+    else:
+      if out.startswith('Status: active'):
+        cmd = 'sudo ufw allow {}'.format(start_port)
+        if end_port:
+          cmd = 'sudo ufw allow {}:{}'.format(start_port, end_port)
+        self.RemoteCommand(cmd)
+      else:
+        logging.info("Ufw does not appear to be running, "
+                     "skipping opening port(s) {}-{}.".format(start_port, end_port))
+
+  def SetupProxy(self):
+    """Sets up proxy configuration variables for the cloud environment."""
+    super(BaseDebianMixin, self).SetupProxy()
+    apt_proxy_file = '/etc/apt/apt.conf'
+    commands = []
+
+    if FLAGS.http_proxy:
+      commands.append("egrep -q 'http::proxy\s+\"{0}\";$' {1}"
+                      "|| echo -e 'Acquire::http::proxy \"{0}\";' |"
+                      "sudo tee -a {1}".format(FLAGS.http_proxy, apt_proxy_file))
+
+    if FLAGS.https_proxy:
+      commands.append("egrep -q 'https::proxy\s+\"{0}\";$' {1}"
+                      "|| echo -e 'Acquire::https::proxy \"{0}\";' |"
+                      "sudo tee -a {1}".format(FLAGS.https_proxy, apt_proxy_file))
+
+    if commands:
+      self.RemoteCommand(";".join(commands))
+
+  def ProxyCleanup(self):
+    """ Restore to a state before SetupProxy() executed """
+    super(BaseDebianMixin, self).ProxyCleanup()
+    apt_proxy_file = '/etc/apt/apt.conf'
+    commands = []
+    command_template = "sudo sed -i '\#{0}\";$#d' {1}"
+
+    if FLAGS.http_proxy:
+      commands.append(command_template.format(FLAGS.http_proxy, apt_proxy_file))
+
+    if FLAGS.https_proxy:
+      commands.append(command_template.format(FLAGS.https_proxy, apt_proxy_file))
+
+    if commands:
+      self.RemoteCommand(';'.join(commands))
+
+  def IncreaseSSHConnection(self, target):
+    """Increase maximum number of ssh connections on vm.
+
+    Args:
+      target: int. The max number of ssh connection.
+    """
+    self.RemoteCommand(r'sudo sed -i -e "s/.*MaxStartups.*/MaxStartups {0}/" '
+                       '/etc/ssh/sshd_config'.format(target))
+    self.RemoteCommand('sudo service ssh restart')
+
+  def AppendKernelCommandLine(self, command_line, reboot=True):
+    """Appends the provided command-line to the VM and reboots by default."""
+    self.RemoteCommand(
+        r'echo GRUB_CMDLINE_LINUX_DEFAULT=\"\${GRUB_CMDLINE_LINUX_DEFAULT} %s\"'
+        r' | sudo tee -a /etc/default/grub' % command_line)
+    self.RemoteCommand('sudo update-grub')
+    if reboot:
+      self.Reboot()
+
+
+class Debian9Mixin(BaseDebianMixin):
+  """Class holding Debian9 specific VM methods and attributes."""
+  OS_TYPE = os_types.DEBIAN9
+  # https://packages.debian.org/stretch/python
+  PYTHON_2_PACKAGE = 'python'
+
+
+class Debian10Mixin(BaseDebianMixin):
+  """Class holding Debian 10 specific VM methods and attributes."""
+  OS_TYPE = os_types.DEBIAN10
+
+
+class Debian11Mixin(BaseDebianMixin):
+  """Class holding Debian 11 specific VM methods and attributes."""
+  OS_TYPE = os_types.DEBIAN11
+
+  def PrepareVMEnvironment(self):
+    # Missing in some images. Required by PrepareVMEnvironment to determine
+    # partitioning.
+    self.InstallPackages('fdisk')
+    super().PrepareVMEnvironment()
+
+
+class BaseUbuntuMixin(BaseDebianMixin):
+  """Class holding Ubuntu specific VM methods and attributes."""
+
+  def AppendKernelCommandLine(self, command_line, reboot=True):
+    """Appends the provided command-line to the VM and reboots by default."""
+    self.RemoteCommand(
+        r'echo GRUB_CMDLINE_LINUX_DEFAULT=\"\${GRUB_CMDLINE_LINUX_DEFAULT} %s\"'
+        r' | sudo tee -a /etc/default/grub.d/50-cloudimg-settings.cfg' %
+        command_line)
+    self.RemoteCommand('sudo update-grub')
+    if reboot:
+      self.Reboot()
+
+  def _AssignAdditionalPrivateIpAddresses(self, netmask, gateway):
+    if self.OS_TYPE > os_types.UBUNTU1604:
+      if self.HasPackage("ifupdown"):
+        self.InstallPackages("ifupdown")
+    if self.OS_TYPE == os_types.UBUNTU2004:
+      if self.HasPackage("net-tools"):
+        self.InstallPackages('net-tools')
+    out, _ = self.RemoteCommand("route | awk '/^default/{{print $NF}}' | sort | head -n 1")
+    interface_name = str(out.strip())
+    private_addresses = self.additional_private_ip_addresses
+    pkb_net_cfg = posixpath.join("/etc", "network", "interfaces")
+    append_to_file = 'echo "{{0}}" | sudo tee -a {0}'.format(pkb_net_cfg)
+    subinterface_names = []
+    for i, addr in enumerate(private_addresses):
+      interface_num = i + 1
+      subinterface_name = "{0}:{1}".format(interface_name, interface_num)
+      subinterface_names.append(subinterface_name)
+      cmds = [
+          append_to_file.format("auto {0}".format(subinterface_name)),
+          append_to_file.format("iface {0}:{1} inet static".format(interface_name, interface_num)),
+          append_to_file.format("address {0}".format(addr)),
+          append_to_file.format("netmask {0}".format(netmask)),
+          append_to_file.format(""),
+      ]
+      self.RemoteCommand(' && '.join(cmds))
+    cmds = ["sudo ifup {0}".format(subiface) for subiface in subinterface_names]
+    self.RemoteCommand(' && '.join(cmds))
+
+
+class Ubuntu1604Mixin(BaseUbuntuMixin, virtual_machine.DeprecatedOsMixin):
+  """Class holding Ubuntu1604 specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU1604
+  PYTHON_2_PACKAGE = 'python'
+  END_OF_LIFE = '2021-05-01'
+  ALTERNATIVE_OS = os_types.UBUNTU1804
+
+
+class Ubuntu1804Mixin(BaseUbuntuMixin):
+  """Class holding Ubuntu1804 specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU1804
+  # https://packages.ubuntu.com/bionic/python
+  PYTHON_2_PACKAGE = 'python'
+
+
+class Ubuntu1804EfaMixin(Ubuntu1804Mixin):
+  """Class holding EFA specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU1804_EFA
+
+
+# Inherit Ubuntu 18's idiosyncracies.
+# Note https://bugs.launchpad.net/snappy/+bug/1659719 is also marked not fix in
+# focal.
+class Ubuntu2004Mixin(Ubuntu1804Mixin):
+  """Class holding Ubuntu2004 specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU2004
+  # https://packages.ubuntu.com/focal/python2
+  PYTHON_2_PACKAGE = 'python2'
+
+
+class Ubuntu2204Mixin(BaseUbuntuMixin):
+  """Class holding Ubuntu2204 specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU2204
+
+
+class Ubuntu1604Cuda9Mixin(Ubuntu1604Mixin):
+  """Class holding NVIDIA CUDA specific VM methods and attributes."""
+  OS_TYPE = os_types.UBUNTU1604_CUDA9
+
+
+class ContainerizedDebianMixin(BaseDebianMixin):
+  """Class representing a Containerized Virtual Machine.
+
+  A Containerized Virtual Machine is a VM that runs remote commands
+  within a Docker Container.
+  Any call to RemoteCommand() will be run within the container
+  whereas any call to RemoteHostCommand() will be run in the VM itself.
+  """
+
+  OS_TYPE = os_types.UBUNTU_CONTAINER
+  BASE_DOCKER_IMAGE = 'ubuntu:xenial'
+
+  def __init__(self, *args, **kwargs):
+    super(ContainerizedDebianMixin, self).__init__(*args, **kwargs)
+    self.docker_id = None
+
+  def _CheckDockerExists(self):
+    """Returns whether docker is installed or not."""
+    resp, _ = self.RemoteHostCommand('command -v docker', ignore_failure=True,
+                                     suppress_warning=True)
+    if resp.rstrip() == '':
+      return False
+    return True
+
+  def PrepareVMEnvironment(self):
+    """Initializes docker before proceeding with preparation."""
+    if not self._CheckDockerExists():
+      self.Install('docker')
+    # We need to explicitly create VM_TMP_DIR in the host because
+    # otherwise it will be implicitly created by Docker in InitDocker()
+    # (because of the -v option) and owned by root instead of perfkit,
+    # causing permission problems.
+    self.RemoteHostCommand('mkdir -p %s' % vm_util.VM_TMP_DIR)
+    self.InitDocker()
+    # This will create the VM_TMP_DIR in the container.
+    # Has to be done after InitDocker() because it needs docker_id.
+    self._CreateVmTmpDir()
+
+    super(ContainerizedDebianMixin, self).PrepareVMEnvironment()
+
+  def InitDocker(self):
+    """Initializes the docker container daemon."""
+    init_docker_cmd = ['sudo docker run -d '
+                       '--rm '
+                       '--net=host '
+                       '--workdir=%s '
+                       '-v %s:%s ' % (CONTAINER_WORK_DIR,
+                                      vm_util.VM_TMP_DIR,
+                                      CONTAINER_MOUNT_DIR)]
+    for sd in self.scratch_disks:
+      init_docker_cmd.append('-v %s:%s ' % (sd.mount_point, sd.mount_point))
+    init_docker_cmd.append('%s sleep infinity ' % self.BASE_DOCKER_IMAGE)
+    init_docker_cmd = ''.join(init_docker_cmd)
+
+    resp, _ = self.RemoteHostCommand(init_docker_cmd)
+    self.docker_id = resp.rstrip()
+    return self.docker_id
+
+  def RemoteCommand(self, command, **kwargs):
+    """Runs a command inside the container.
+
+    Args:
+      command: A valid bash command.
+      **kwargs: Keyword arguments passed directly to RemoteHostCommand.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+    """
+    # Escapes bash sequences
+    command = command.replace("'", r"'\''")
+
+    logging.info('Docker running: %s', command)
+    command = "sudo docker exec %s bash -c '%s'" % (self.docker_id, command)
+    return self.RemoteHostCommand(command, **kwargs)
+
+  def ContainerCopy(self, file_name, container_path='', copy_to=True):
+    """Copies a file to or from container_path to the host's vm_util.VM_TMP_DIR.
+
+    Args:
+      file_name: Name of the file in the host's vm_util.VM_TMP_DIR.
+      container_path: Optional path of where to copy file on container.
+      copy_to: True to copy to container, False to copy from container.
+    Raises:
+      RemoteExceptionError: If the source container_path is blank.
+    """
+    if copy_to:
+      if container_path == '':
+        container_path = CONTAINER_WORK_DIR
+
+      # Everything in vm_util.VM_TMP_DIR is directly accessible
+      # both in the host and in the container
+      source_path = posixpath.join(CONTAINER_MOUNT_DIR, file_name)
+      command = 'cp %s %s' % (source_path, container_path)
+      self.RemoteCommand(command)
+    else:
+      if container_path == '':
+        raise errors.VirtualMachine.RemoteExceptionError('Cannot copy '
+                                                         'from blank target')
+      destination_path = posixpath.join(CONTAINER_MOUNT_DIR, file_name)
+      command = 'cp %s %s' % (container_path, destination_path)
+      self.RemoteCommand(command)
+
+  @vm_util.Retry(
+      poll_interval=1, max_retries=3,
+      retryable_exceptions=(errors.VirtualMachine.RemoteCommandError,))
+  def RemoteCopy(self, file_path, remote_path='', copy_to=True):
+    """Copies a file to or from the container in the remote VM.
+
+    Args:
+      file_path: Local path to file.
+      remote_path: Optional path of where to copy file inside the container.
+      copy_to: True to copy to VM, False to copy from VM.
+    """
+    if copy_to:
+      file_name = os.path.basename(file_path)
+      tmp_path = posixpath.join(vm_util.VM_TMP_DIR, file_name)
+      self.RemoteHostCopy(file_path, tmp_path, copy_to)
+      self.ContainerCopy(file_name, remote_path, copy_to)
+    else:
+      file_name = posixpath.basename(remote_path)
+      tmp_path = posixpath.join(vm_util.VM_TMP_DIR, file_name)
+      self.ContainerCopy(file_name, remote_path, copy_to)
+      self.RemoteHostCopy(file_path, tmp_path, copy_to)
+
+  def MoveFile(self, target, source_path, remote_path=''):
+    """Copies a file from one VM to a target VM.
+
+    Copies a file from a container in the source VM to a container
+    in the target VM.
+
+    Args:
+      target: The target ContainerizedVirtualMachine object.
+      source_path: The location of the file on the REMOTE machine.
+      remote_path: The destination of the file on the TARGET machine, default
+          is the root directory.
+    """
+    file_name = posixpath.basename(source_path)
+
+    # Copies the file to vm_util.VM_TMP_DIR in source
+    self.ContainerCopy(file_name, source_path, copy_to=False)
+
+    # Moves the file to vm_util.VM_TMP_DIR in target
+    source_host_path = posixpath.join(vm_util.VM_TMP_DIR, file_name)
+    target_host_dir = vm_util.VM_TMP_DIR
+    self.MoveHostFile(target, source_host_path, target_host_dir)
+
+    # Copies the file to its final destination in the container
+    target.ContainerCopy(file_name, remote_path)
+
+  def SnapshotPackages(self):
+    """Grabs a snapshot of the currently installed packages."""
+    pass
+
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Stop the docker container launched with --rm.
+    """
+    if self.docker_id:
+      self.RemoteHostCommand('docker stop %s' % self.docker_id)
+
+
+class KernelRelease(object):
+  """Holds the contents of the linux kernel version returned from uname -r."""
+
+  def __init__(self, uname):
+    """KernelVersion Constructor.
+
+    Args:
+      uname: A string in the format of "uname -r" command
+    """
+
+    # example format would be: "4.5.0-96-generic"
+    # or "3.10.0-514.26.2.el7.x86_64" for centos
+    # major.minor.Rest
+    # in this example, major = 4, minor = 5
+    major_string, minor_string, _ = uname.split('.', 2)
+    self.major = int(major_string)
+    self.minor = int(minor_string)
+
+  def AtLeast(self, major, minor):
+    """Check If the kernel version meets a minimum bar.
+
+    The kernel version needs to be at least as high as the major.minor
+    specified in args.
+
+    Args:
+      major: The major number to test, as an integer
+      minor: The minor number to test, as an integer
+
+    Returns:
+      True if the kernel version is at least as high as major.minor,
+      False otherwise
+    """
+    if self.major < major:
+      return False
+    if self.major > major:
+      return True
+    return self.minor >= minor
+
+
+def _ParseTextProperties(text):
+  """Parses raw text that has lines in "key:value" form.
+
+  When comes across an empty line will return a dict of the current values.
+
+  Args:
+    text: Text of lines in "key:value" form.
+
+  Yields:
+    Dict of [key,value] values for a section.
+  """
+  current_data = {}
+  for line in (line.strip() for line in text.splitlines()):
+    if line:
+      m = _COLON_SEPARATED_RE.match(line)
+      if m:
+        current_data[m.group('key')] = m.group('value')
+      else:
+        logging.debug('Ignoring bad line "%s"', line)
+    else:
+      # Hit a section break
+      if current_data:
+        yield current_data
+        current_data = {}
+  if current_data:
+    yield current_data
+
+
+class LsCpuResults(object):
+  """Holds the contents of the command lscpu."""
+
+  def __init__(self, lscpu):
+    """LsCpuResults Constructor.
+
+    The lscpu command on Ubuntu 16.04 does *not* have the "--json" option for
+    json output, so keep on using the text format.
+
+    Args:
+      lscpu: A string in the format of "lscpu" command
+
+    Raises:
+      ValueError: if the format of lscpu isnt what was expected for parsing
+
+    Example value of lscpu is:
+    Architecture:          x86_64
+    CPU op-mode(s):        32-bit, 64-bit
+    Byte Order:            Little Endian
+    CPU(s):                12
+    On-line CPU(s) list:   0-11
+    Thread(s) per core:    2
+    Core(s) per socket:    6
+    Socket(s):             1
+    NUMA node(s):          1
+    Vendor ID:             GenuineIntel
+    CPU family:            6
+    Model:                 79
+    Stepping:              1
+    CPU MHz:               1202.484
+    BogoMIPS:              7184.10
+    Virtualization:        VT-x
+    L1d cache:             32K
+    L1i cache:             32K
+    L2 cache:              256K
+    L3 cache:              15360K
+    NUMA node0 CPU(s):     0-11
+    """
+    self.data = {}
+    for stanza in _ParseTextProperties(lscpu):
+      self.data.update(stanza)
+
+    def GetInt(key):
+      if key in self.data and self.data[key].isdigit():
+        return int(self.data[key])
+      raise ValueError('Could not find integer "{}" in {}'.format(
+          key, sorted(self.data)))
+
+    if 'NUMA node(s)' in self.data:
+      self.numa_node_count = GetInt('NUMA node(s)')
+    else:
+      self.numa_node_count = None
+    self.cores_per_socket = GetInt('Core(s) per socket')
+    self.socket_count = GetInt('Socket(s)')
+    self.threads_per_core = GetInt('Thread(s) per core')
+
+
+class ProcCpuResults(object):
+  """Parses /proc/cpuinfo text into grouped values.
+
+  Most of the cpuinfo is repeated per processor.  Known ones that change per
+  processor are listed in _PER_CPU_KEYS and are processed separately to make
+  reporting easier.
+
+  Example metadata for metric='proccpu':
+    |bugs:spec_store_bypass spectre_v1 spectre_v2 swapgs|,
+    |cache size:25344 KB|
+
+  Example metadata for metric='proccpu_mapping':
+    |proc_0:apicid=0;core id=0;initial apicid=0;physical id=0|,
+    |proc_1:apicid=2;core id=1;initial apicid=2;physical id=0|
+
+  Attributes:
+    text: The /proc/cpuinfo text.
+    mappings: Dict of [processor id: dict of values that change with cpu]
+    attributes: Dict of /proc/cpuinfo entries that are not in mappings.
+  """
+
+  # known attributes that vary with the processor id
+  _PER_CPU_KEYS = ['core id', 'initial apicid', 'apicid', 'physical id']
+  # attributes that should be sorted, for example turning the 'flags' value
+  # of "popcnt avx512bw" to "avx512bw popcnt"
+  _SORT_VALUES = ['flags', 'bugs']
+
+  def __init__(self, text):
+    self.mappings = {}
+    self.attributes = collections.defaultdict(set)
+    for stanza in _ParseTextProperties(text):
+      processor_id, single_values, multiple_values = self._ParseStanza(stanza)
+      if processor_id is None:  # can be 0
+        continue
+      if processor_id in self.mappings:
+        logging.warning('Processor id %s seen twice in %s', processor_id, text)
+        continue
+      self.mappings[processor_id] = single_values
+      for key, value in multiple_values.items():
+        self.attributes[key].add(value)
+
+  def GetValues(self):
+    """Dict of cpuinfo keys to its values.
+
+    Multiple values are joined by semicolons.
+
+    Returns:
+      Dict of [cpuinfo key:value string]
+    """
+    cpuinfo = {
+        key: ';'.join(sorted(values))
+        for key, values in self.attributes.items()
+    }
+    cpuinfo['proccpu'] = ','.join(sorted(self.attributes.keys()))
+    return cpuinfo
+
+  def _ParseStanza(self, stanza):
+    """Parses the cpuinfo section for an individual CPU.
+
+    Args:
+      stanza: Dict of the /proc/cpuinfo results for an individual CPU.
+
+    Returns:
+      Tuple of (processor_id, dict of values that are known to change with
+      each CPU, dict of other cpuinfo results).
+    """
+    singles = {}
+    if 'processor' not in stanza:
+      return None, None, None
+    processor_id = int(stanza.pop('processor'))
+    for key in self._PER_CPU_KEYS:
+      if key in stanza:
+        singles[key] = stanza.pop(key)
+    for key in self._SORT_VALUES:
+      if key in stanza:
+        stanza[key] = ' '.join(sorted(stanza[key].split()))
+    return processor_id, singles, stanza
+
+
+class JujuMixin(BaseDebianMixin):
+  """Class to allow running Juju-deployed workloads.
+
+  Bootstraps a Juju environment using the manual provider:
+  https://jujucharms.com/docs/stable/config-manual
+  """
+
+  # TODO: Add functionality to tear down and uninstall Juju
+  # (for pre-provisioned) machines + JujuUninstall for packages using charms.
+
+  OS_TYPE = os_types.JUJU
+
+  is_controller = False
+
+  # A reference to the juju controller, useful when operations occur against
+  # a unit's VM but need to be preformed from the controller.
+  controller = None
+
+  vm_group = None
+
+  machines = {}
+  units = []
+
+  installation_lock = threading.Lock()
+
+  environments_yaml = """
+  default: perfkit
+
+  environments:
+      perfkit:
+          type: manual
+          bootstrap-host: {0}
+  """
+
+  def _Bootstrap(self):
+    """Bootstrap a Juju environment."""
+    resp, _ = self.RemoteHostCommand('juju bootstrap')
+
+  def JujuAddMachine(self, unit):
+    """Adds a manually-created virtual machine to Juju.
+
+    Args:
+      unit: An object representing the unit's BaseVirtualMachine.
+    """
+    resp, _ = self.RemoteHostCommand('juju add-machine ssh:%s' %
+                                     unit.internal_ip)
+
+    # We don't know what the machine's going to be used for yet,
+    # but track it's placement for easier access later.
+    # We're looking for the output: created machine %d
+    machine_id = _[_.rindex(' '):].strip()
+    self.machines[machine_id] = unit
+
+  def JujuConfigureEnvironment(self):
+    """Configure a bootstrapped Juju environment."""
+    if self.is_controller:
+      resp, _ = self.RemoteHostCommand('mkdir -p ~/.juju')
+
+      with vm_util.NamedTemporaryFile() as tf:
+        tf.write(self.environments_yaml.format(self.internal_ip))
+        tf.close()
+        self.PushFile(tf.name, '~/.juju/environments.yaml')
+
+  def JujuEnvironment(self):
+    """Get the name of the current environment."""
+    output, _ = self.RemoteHostCommand('juju switch')
+    return output.strip()
+
+  def JujuRun(self, cmd):
+    """Run a command on the virtual machine.
+
+    Args:
+      cmd: The command to run.
+    """
+    output, _ = self.RemoteHostCommand(cmd)
+    return output.strip()
+
+  def JujuStatus(self, pattern=''):
+    """Return the status of the Juju environment.
+
+    Args:
+      pattern: Optionally match machines/services with a pattern.
+    """
+    output, _ = self.RemoteHostCommand('juju status %s --format=json' %
+                                       pattern)
+    return output.strip()
+
+  def JujuVersion(self):
+    """Return the Juju version."""
+    output, _ = self.RemoteHostCommand('juju version')
+    return output.strip()
+
+  def JujuSet(self, service, params=[]):
+    """Set the configuration options on a deployed service.
+
+    Args:
+      service: The name of the service.
+      params: A list of key=values pairs.
+    """
+    output, _ = self.RemoteHostCommand(
+        'juju set %s %s' % (service, ' '.join(params)))
+    return output.strip()
+
+  @vm_util.Retry(poll_interval=30, timeout=3600)
+  def JujuWait(self):
+    """Wait for all deployed services to be installed, configured, and idle."""
+    status = yaml.safe_load(self.JujuStatus())
+    for service in status['services']:
+      ss = status['services'][service]['service-status']['current']
+
+      # Accept blocked because the service may be waiting on relation
+      if ss not in ['active', 'unknown']:
+        raise errors.Juju.TimeoutException(
+            'Service %s is not ready; status is %s' % (service, ss))
+
+      if ss in ['error']:
+        # The service has failed to deploy.
+        debuglog = self.JujuRun('juju debug-log --limit 200')
+        logging.warning(debuglog)
+        raise errors.Juju.UnitErrorException(
+            'Service %s is in an error state' % service)
+
+      for unit in status['services'][service]['units']:
+        unit_data = status['services'][service]['units'][unit]
+        ag = unit_data['agent-state']
+        if ag != 'started':
+          raise errors.Juju.TimeoutException(
+              'Service %s is not ready; agent-state is %s' % (service, ag))
+
+        ws = unit_data['workload-status']['current']
+        if ws not in ['active', 'unknown']:
+          raise errors.Juju.TimeoutException(
+              'Service %s is not ready; workload-state is %s' % (service, ws))
+
+  def JujuDeploy(self, charm, vm_group):
+    """Deploy (and scale) this service to the machines in its vm group.
+
+    Args:
+      charm: The charm to deploy, i.e., cs:trusty/ubuntu.
+      vm_group: The name of vm_group the unit(s) should be deployed to.
+    """
+
+    # Find the already-deployed machines belonging to this vm_group
+    machines = []
+    for machine_id, unit in self.machines.items():
+      if unit.vm_group == vm_group:
+        machines.append(machine_id)
+
+    # Deploy the first machine
+    resp, _ = self.RemoteHostCommand(
+        'juju deploy %s --to %s' % (charm, machines.pop()))
+
+    # Get the name of the service
+    service = charm[charm.rindex('/') + 1:]
+
+    # Deploy to the remaining machine(s)
+    for machine in machines:
+      resp, _ = self.RemoteHostCommand(
+          'juju add-unit %s --to %s' % (service, machine))
+
+  def JujuRelate(self, service1, service2):
+    """Create a relation between two services.
+
+    Args:
+      service1: The first service to relate.
+      service2: The second service to relate.
+    """
+    resp, _ = self.RemoteHostCommand(
+        'juju add-relation %s %s' % (service1, service2))
+
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    package = linux_packages.PACKAGES[package_name]
+    try:
+      # Make sure another unit doesn't try
+      # to install the charm at the same time
+      with self.controller.installation_lock:
+        if package_name not in self.controller._installed_packages:
+          package.JujuInstall(self.controller, self.vm_group)
+          self.controller._installed_packages.add(package_name)
+    except AttributeError as e:
+      logging.warning('Failed to install package %s, falling back to Apt (%s)',
+                      package_name, e)
+      if package_name not in self._installed_packages:
+        if hasattr(package, 'AptInstall'):
+          package.AptInstall(self)
+        elif hasattr(package, 'Install'):
+          package.Install(self)
+        else:
+          raise KeyError('Package %s has no install method for Juju machines.' %
+                         package_name)
+        self._installed_packages.add(package_name)
+
+  def SetupPackageManager(self):
+    if self.is_controller:
+      resp, _ = self.RemoteHostCommand(
+          'sudo add-apt-repository ppa:juju/stable'
+      )
+    super(JujuMixin, self).SetupPackageManager()
+
+  def PrepareVMEnvironment(self):
+    """Install and configure a Juju environment."""
+    super(JujuMixin, self).PrepareVMEnvironment()
+    if self.is_controller:
+      self.InstallPackages('juju')
+
+      self.JujuConfigureEnvironment()
+
+      self.AuthenticateVm()
+
+      self._Bootstrap()
+
+      # Install the Juju agent on the other VMs
+      for unit in self.units:
+        unit.controller = self
+        self.JujuAddMachine(unit)
+
+
+class BaseLinuxVirtualMachine(BaseLinuxMixin,
+                              virtual_machine.BaseVirtualMachine):
+  """Linux VM for use with pytyping."""
diff --git a/script/cumulus/pkb/perfkitbenchmarker/log_util.py b/script/cumulus/pkb/perfkitbenchmarker/log_util.py
new file mode 100644
index 0000000..54493d3
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/log_util.py
@@ -0,0 +1,178 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities related to loggers and logging."""
+
+from contextlib import contextmanager
+import logging
+import sys
+import threading
+
+try:
+  import colorlog
+except ImportError:
+  colorlog = None
+
+
+DEBUG = 'debug'
+INFO = 'info'
+WARNING = 'warning'
+ERROR = 'error'
+LOG_LEVELS = {
+    DEBUG: logging.DEBUG,
+    INFO: logging.INFO,
+    WARNING: logging.WARNING,
+    ERROR: logging.ERROR
+}
+
+
+class ThreadLogContext(object):
+  """Per-thread context for log message prefix labels."""
+  def __init__(self, thread_log_context=None):
+    """Constructs a ThreadLogContext by copying a previous ThreadLogContext.
+
+    Args:
+      thread_log_context: A ThreadLogContext for an existing thread whose state
+        will be copied to initialize a ThreadLogContext for a new thread.
+    """
+    if thread_log_context:
+      self._label_list = thread_log_context._label_list[:]
+    else:
+      self._label_list = []
+    self._RecalculateLabel()
+
+  @property
+  def label(self):
+    return self._label
+
+  def _RecalculateLabel(self):
+    """Recalculate the string label used to to prepend log messages.
+
+    The label is the concatenation of all non-empty strings in the _label_list.
+    """
+    non_empty_string_list = [s for s in self._label_list if s]
+    if len(non_empty_string_list):
+      self._label = ' '.join(non_empty_string_list) + ' '
+    else:
+      self._label = ''
+
+  @contextmanager
+  def ExtendLabel(self, label_extension):
+    """Extends the string label used to prepend log messages.
+
+    Args:
+      label_extension: A string appended to the end of the current label.
+    """
+    self._label_list.append(label_extension)
+    self._RecalculateLabel()
+    yield
+    self._label_list.pop()
+    self._RecalculateLabel()
+
+
+class _ThreadData(threading.local):
+  def __init__(self):
+    self.pkb_thread_log_context = ThreadLogContext()
+
+thread_local = _ThreadData()
+
+
+def SetThreadLogContext(thread_log_context):
+  """Set the current thread's ThreadLogContext object.
+
+  Args:
+    thread_log_context: A ThreadLogContext to be written to thread local
+      storage.
+  """
+  thread_local.pkb_thread_log_context = thread_log_context
+
+
+def GetThreadLogContext():
+  """Get the current thread's ThreadLogContext object.
+
+  Returns:
+    The ThreadLogContext previously written via SetThreadLogContext.
+  """
+  return thread_local.pkb_thread_log_context
+
+
+class PkbLogFilter(logging.Filter):
+  """Filter that injects a thread's ThreadLogContext label into log messages.
+
+  Sets the LogRecord's pkb_label attribute with the ThreadLogContext label.
+  """
+  def filter(self, record):
+    record.pkb_label = GetThreadLogContext().label
+    return True
+
+
+def ConfigureBasicLogging():
+  """Initializes basic python logging before a log file is available."""
+  logging.basicConfig(format='%(levelname)-8s %(message)s', level=logging.INFO)
+
+
+def ConfigureLogging(stderr_log_level, log_path, run_uri,
+                     file_log_level=logging.DEBUG):
+  """Configure logging.
+
+  Note that this will destroy existing logging configuration!
+
+  This configures python logging to emit messages to stderr and a log file.
+
+  Args:
+    stderr_log_level: Messages at this level and above are emitted to stderr.
+    log_path: Path to the log file.
+    run_uri: A string containing the run_uri to be appended to the log prefix
+      labels.
+    file_log_level: Messages at this level and above are written to the log
+      file.
+  """
+  # Build the format strings for the stderr and log file message formatters.
+  stderr_format = ('%(asctime)s {} %(threadName)s %(pkb_label)s'
+                   '%(levelname)-8s %(message)s').format(run_uri)
+  stderr_color_format = ('%(log_color)s%(asctime)s {} %(threadName)s '
+                         '%(pkb_label)s%(levelname)-8s%(reset)s '
+                         '%(message)s').format(run_uri)
+  file_format = ('%(asctime)s {} %(threadName)s %(pkb_label)s'
+                 '%(filename)s:%(lineno)d %(levelname)-8s %(message)s')
+  file_format = file_format.format(run_uri)
+
+  # Reset root logger settings.
+  logger = logging.getLogger()
+  logger.handlers = []
+  logger.setLevel(logging.DEBUG)
+
+  # Initialize the main thread's ThreadLogContext. This object must be
+  # initialized to use the PkbLogFilter, and it is used to derive the
+  # ThreadLogContext of other threads started through vm_util.RunThreaded.
+  SetThreadLogContext(ThreadLogContext())
+
+  # Add handler to output to stderr.
+  handler = logging.StreamHandler()
+  handler.addFilter(PkbLogFilter())
+  handler.setLevel(stderr_log_level)
+  if colorlog is not None and sys.stderr.isatty():
+    formatter = colorlog.ColoredFormatter(stderr_color_format, reset=True)
+    handler.setFormatter(formatter)
+  else:
+    handler.setFormatter(logging.Formatter(stderr_format))
+  logger.addHandler(handler)
+
+  # Add handler for output to log file.
+  logging.info('Verbose logging to: %s', log_path)
+  handler = logging.FileHandler(filename=log_path)
+  handler.addFilter(PkbLogFilter())
+  handler.setLevel(file_log_level)
+  handler.setFormatter(logging.Formatter(file_format))
+  logger.addHandler(handler)
+  logging.getLogger('requests').setLevel(logging.ERROR)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/managed_memory_store.py b/script/cumulus/pkb/perfkitbenchmarker/managed_memory_store.py
new file mode 100644
index 0000000..8477981
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/managed_memory_store.py
@@ -0,0 +1,144 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for cloud managed memory stores."""
+
+import abc
+import logging
+from absl import flags
+from perfkitbenchmarker import resource
+
+# List of memory store types
+REDIS = 'REDIS'
+MEMCACHED = 'MEMCACHED'
+
+FLAGS = flags.FLAGS
+
+
+class Failover(object):
+  """Enum for redis failover options."""
+  FAILOVER_NONE = 'failover_none'
+  FAILOVER_SAME_ZONE = 'failover_same_zone'
+  FAILOVER_SAME_REGION = 'failover_same_region'
+
+
+flags.DEFINE_enum(
+    'redis_failover_style', Failover.FAILOVER_NONE, [
+        Failover.FAILOVER_NONE, Failover.FAILOVER_SAME_ZONE,
+        Failover.FAILOVER_SAME_REGION
+    ], 'Failover behavior of cloud redis cluster. Acceptable values are:'
+    'failover_none, failover_same_zone, and failover_same_region')
+
+# List of redis versions
+REDIS_3_2 = 'redis_3_2'
+REDIS_4_0 = 'redis_4_0'
+REDIS_5_0 = 'redis_5_0'
+REDIS_6_X = 'redis_6_x'
+REDIS_VERSIONS = [REDIS_3_2, REDIS_4_0, REDIS_5_0, REDIS_6_X]
+
+flags.DEFINE_string(
+    'managed_memory_store_version', None,
+    'The version of managed memory store to use. This flag '
+    'overrides Redis or Memcached version defaults that is set '
+    'in benchmark config. Defaults to None so that benchmark '
+    'config defaults are used.')
+flags.DEFINE_string(
+    'cloud_redis_region', 'us-central1', 'The region to spin up cloud redis in.'
+    'Defaults to the GCP region of us-central1.')
+
+MEMCACHED_NODE_COUNT = 1
+
+
+def GetManagedMemoryStoreClass(cloud, memory_store):
+  """Gets the cloud managed memory store class corresponding to 'cloud'.
+
+  Args:
+    cloud: String. Name of cloud to get the class for.
+    memory_store: String. Type of memory store to get the class for.
+
+  Returns:
+    Implementation class corresponding to the argument cloud
+
+  Raises:
+    Exception: An invalid cloud was provided
+  """
+  return resource.GetResourceClass(
+      BaseManagedMemoryStore, CLOUD=cloud, MEMORY_STORE=memory_store)
+
+
+def ParseReadableVersion(version):
+  """Parses Redis major and minor version number.
+
+  Used for Azure and AWS versions.
+
+  Args:
+    version: String. Version string to get parsed.
+
+  Returns:
+    Parsed version
+  """
+  if version.count('.') < 1:
+    logging.info(
+        'Could not parse version string correctly,'
+        'full Redis version returned: %s', version)
+    return version
+  return '.'.join(version.split('.', 2)[:2])
+
+
+class BaseManagedMemoryStore(resource.BaseResource):
+  """Object representing a cloud managed memory store."""
+
+  REQUIRED_ATTRS = ['CLOUD', 'MEMORY_STORE']
+  RESOURCE_TYPE = 'BaseManagedMemoryStore'
+
+  def __init__(self, spec):
+    """Initialize the cloud managed memory store object.
+
+    Args:
+      spec: spec of the managed memory store.
+    """
+    super(BaseManagedMemoryStore, self).__init__()
+    self.spec = spec
+    self.name = 'pkb-%s' % FLAGS.run_uri
+    self._ip = None
+    self._port = None
+    self._password = None
+
+  def GetMemoryStoreIp(self):
+    """Returns the Ip address of the managed memory store."""
+    if not self._ip:
+      self._PopulateEndpoint()
+    return self._ip
+
+  def GetMemoryStorePort(self):
+    """Returns the port number of the managed memory store."""
+    if not self._port:
+      self._PopulateEndpoint()
+    return self._port
+
+  @abc.abstractmethod
+  def _PopulateEndpoint(self):
+    """Populates the endpoint information for the managed memory store."""
+    raise NotImplementedError()
+
+  def GetMemoryStorePassword(self):
+    """Returns the access password of the managed memory store, if any."""
+    return self._password
+
+  def MeasureCpuUtilization(self):
+    """Measures the CPU utilization of an instance using the cloud's API."""
+    return NotImplementedError()
+
+  def GetInstanceSize(self):
+    """Returns size of instance in gigabytes."""
+    return NotImplementedError()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/memcache_service.py b/script/cumulus/pkb/perfkitbenchmarker/memcache_service.py
new file mode 100644
index 0000000..a61d786
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/memcache_service.py
@@ -0,0 +1,35 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class MemcacheService(object):
+  CLOUD = None
+
+  def __init__(self):
+    pass
+
+  def Create(self):
+    raise NotImplementedError
+
+  def Destroy(self):
+    raise NotImplementedError
+
+  def Flush(self):
+    raise NotImplementedError
+
+  def GetHosts(self):
+    raise NotImplementedError
+
+  def GetMetadata(self):
+    raise NotImplementedError
diff --git a/script/cumulus/pkb/perfkitbenchmarker/messaging_service.py b/script/cumulus/pkb/perfkitbenchmarker/messaging_service.py
new file mode 100644
index 0000000..4049276
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/messaging_service.py
@@ -0,0 +1,138 @@
+"""Common interface for messaging services resources.
+
+MessagingService class offers a common interface to provision resources, and to
+run different phases of the benchmark [Prepare, Run, Cleanup]. The messaging
+service benchmark uses the specific instance (from
+messaging_service_util.py file) to run the phases of the benchmark on it.
+Prepare and Cleanup phases runs from the benchmark VM, on the run phase the
+benchmark VM send commands to the Client VM. Client VM's implementations that
+runs the benchmark can be found on: /data/messaging_service.
+"""
+
+import abc
+import os
+from typing import Any, Dict
+from perfkitbenchmarker import resource
+
+MESSAGING_SERVICE_SCRIPTS_VM_PKB = os.path.join('~', 'perfkitbenchmarker')
+MESSAGING_SERVICE_SCRIPTS_VM_BIN_DIR = '~'
+MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR = os.path.join(
+    '~', 'perfkitbenchmarker', 'scripts', 'messaging_service_scripts')
+MESSAGING_SERVICE_SCRIPTS_VM_COMMON_DIR = os.path.join(
+    MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR, 'common')
+MESSAGING_SERVICE_SCRIPTS_COMMON_PREFIX = 'messaging_service_scripts/common/'
+MESSAGING_SERVICE_SCRIPTS_COMMON_FILES = [
+    '__init__.py',
+    'app.py',
+    'client.py',
+    'errors.py',
+    'runners.py',
+    'e2e/__init__.py',
+    'e2e/latency_runner.py',
+    'e2e/main_process.py',
+    'e2e/protocol.py',
+    'e2e/publisher.py',
+    'e2e/receiver.py',
+    'e2e/worker_utils.py',
+]
+
+
+def GetMessagingServiceClass(cloud, delivery):
+  """Gets the underlying Messaging Service class."""
+  return resource.GetResourceClass(
+      BaseMessagingService, CLOUD=cloud, DELIVERY=delivery)
+
+
+class BaseMessagingService(resource.BaseResource):
+  """Common interface of a messaging service resource.
+
+  Attributes:
+    client: The client virtual machine that runs the benchmark.
+  """
+
+  REQUIRED_ATTRS = ['CLOUD', 'DELIVERY']
+  RESOURCE_TYPE = 'BaseMessagingService'
+
+  # TODO(odiego): Move DELIVERY down to child classes when adding more options
+  DELIVERY = 'pull'
+
+  END_TO_END_LATENCY = 'end_to_end_latency'
+  PUBLISH_LATENCY = 'publish_latency'
+  PULL_LATENCY = 'pull_latency'
+
+  @classmethod
+  def FromSpec(cls, messaging_service_spec):
+    return cls()
+
+  def setVms(self, vm_groups):
+    self.client_vm = vm_groups['clients' if 'clients' in
+                               vm_groups else 'default'][0]
+
+  def PrepareClientVm(self):
+    self._InstallCommonClientPackages()
+    self._InstallCloudClients()
+
+  def _InstallCommonClientPackages(self):
+    """Installs common software for running benchmarks on the client VM."""
+    # Install commom packages
+    self.client_vm.Install('python3')
+    self.client_vm.Install('pip3')
+    self.client_vm.RemoteCommand('sudo pip3 install absl-py numpy')
+
+    # Upload common scripts
+    self.client_vm.RemoteCommand(
+        f'mkdir -p {MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR}')
+    self.client_vm.RemoteCommand(' '.join([
+        'find', MESSAGING_SERVICE_SCRIPTS_VM_PKB, '-type', 'd', '-exec',
+        'touch', "'{}/__init__.py'", '\\;'
+    ]))
+    self._CopyFiles(
+        MESSAGING_SERVICE_SCRIPTS_COMMON_PREFIX,
+        MESSAGING_SERVICE_SCRIPTS_COMMON_FILES,
+        MESSAGING_SERVICE_SCRIPTS_VM_COMMON_DIR)
+
+  def _CopyFiles(self, prefix, data_srcs, vm_dest_dir):
+    for subpath in data_srcs:
+      dirname = os.path.dirname(os.path.join(vm_dest_dir, subpath))
+      self.client_vm.RemoteCommand(f'mkdir -p {dirname}')
+      self.client_vm.PushDataFile(
+          os.path.join(prefix, subpath),
+          os.path.join(vm_dest_dir, subpath))
+
+  @abc.abstractmethod
+  def _InstallCloudClients(self):
+    """Installs software for running benchmarks on the client VM.
+
+    This method should be overriden by subclasses to install software specific
+    to the flavor of MessagingService they provide.
+    """
+    raise NotImplementedError
+
+  @abc.abstractmethod
+  def Run(self, benchmark_scenario: str, number_of_messages: str,
+          message_size: str) -> Dict[str, Any]:
+    """Runs remote commands on client VM - benchmark's run phase.
+
+    Runs a benchmark that consists of first publishing messages and then
+    pulling messages from messaging service, based on the configuration
+    specified through the FLAGS: benchmark_scenario, number_of_messages, and
+    message_size. Specific implementations should override this method.
+    Different providers needs different info to run the benchmark - for GCP we
+    need 'topic_name' and 'subscription_name', while for AWS 'queue_name'
+    suffices.
+
+    Args:
+      benchmark_scenario: Specifies which benchmark scenario to run.
+      number_of_messages: Number of messages to use on the benchmark.
+      message_size: Size of the messages that will be used on the benchmark. It
+        specifies the number of characters in those messages.
+
+    Returns:
+      Dictionary with metric_name (mean_latency, p50_latency...) as key and the
+      results from the benchmark as the value:
+        results = {
+          'mean_latency': 0.3423443...
+          ...
+        }
+    """
+    raise NotImplementedError
diff --git a/script/cumulus/pkb/perfkitbenchmarker/network.py b/script/cumulus/pkb/perfkitbenchmarker/network.py
new file mode 100644
index 0000000..027fe2c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/network.py
@@ -0,0 +1,332 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing abstract classes related to VM networking.
+
+The Firewall class provides a way of opening VM ports. The Network class allows
+VMs to communicate via internal ips and isolates PerfKitBenchmarker VMs from
+others in the
+same project.
+"""
+
+import abc
+import enum
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import regex_util
+from perfkitbenchmarker import resource
+
+
+flags.DEFINE_integer('mtu', None,
+                     'Network MTU to set, if any.  Only enabled for GCP.')
+
+
+class NetType(enum.Enum):
+  DEFAULT = 'default'
+  SINGLE = 'single'
+  MULTI = 'multi'
+
+
+class BaseFirewall(object):
+  """An object representing the Base Firewall."""
+
+  CLOUD = None
+
+  @classmethod
+  def GetFirewall(cls):
+    """Returns a BaseFirewall.
+
+    This method is used instead of directly calling the class's constructor.
+    It creates BaseFirewall instances and registers them.
+    If a BaseFirewall object has already been registered, that object
+    will be returned rather than creating a new one. This enables multiple
+    VMs to call this method and all share the same BaseFirewall object.
+    """
+    if cls.CLOUD is None:
+      raise errors.Error('Firewalls should have CLOUD attributes.')
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetFirewall called in a thread without a '
+                         'BenchmarkSpec.')
+    with benchmark_spec.firewalls_lock:
+      key = cls.CLOUD
+      if key not in benchmark_spec.firewalls:
+        benchmark_spec.firewalls[key] = cls()
+      return benchmark_spec.firewalls[key]
+
+  def AllowIcmp(self, vm):
+    """Opens the ICMP protocol on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the ICMP protocol for.
+    """
+    pass
+
+  def AllowPort(self, vm, start_port, end_port=None):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      start_port: The first local port in a range of ports to open.
+      end_port: The last port in a range of ports to open. If None, only
+        start_port will be opened.
+    """
+    pass
+
+  def DisallowAllPorts(self):
+    """Closes all ports on the firewall."""
+    pass
+
+
+class BaseNetworkSpec(object):
+  """Object containing all information needed to create a Network."""
+
+  def __init__(self, zone=None, cidr=None):
+    """Initializes the BaseNetworkSpec.
+
+    Args:
+      zone: The zone in which to create the network.
+      cidr: The subnet this network belongs to in CIDR notation
+    """
+    self.zone = zone
+    self.cidr = cidr
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+
+class BaseVpnGateway(object, metaclass=abc.ABCMeta):
+  """An object representing the Base VPN Gateway."""
+  CLOUD = None
+
+  def __init__(self, zone=None, cidr=None):
+    """Initializes the BaseVpnGateway.
+
+    Args:
+      zone: The zone in which to create the VpnGateway.
+      cidr: The cidr for the VpnGateway.
+    """
+    self.zone = zone
+    self.cidr: str = cidr
+    # Set to True if we need target Gateway up front (AWS)
+    self.require_target_to_init = False
+
+  @abc.abstractmethod
+  def IsTunnelReady(self, tunnel_id):
+    """Returns True if the tunnel is ready.
+
+    Args:
+      tunnel_id: The id of the tunnel to check.
+
+    Returns:
+      boolean.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def ConfigureTunnel(self, tunnel_config):
+    """Updates the tunnel_config object with new information.
+
+    Each provider may require different information to setup a VPN tunnel,
+    and all information needed to configure the tunnel may not be available
+    up front. Incremental updates to tunnel_config are made by calling this
+    function on each endpoint until either both endpoint tunnels are configured
+    or no more updates can be made.
+
+    Args:
+      tunnel_config: The tunnel_config object of the tunnel to configure.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def Create(self):
+    """Creates the actual VPN Gateway."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def Delete(self):
+    """Deletes the actual VPN Gateway."""
+    raise NotImplementedError()
+
+
+class BaseNetwork(object):
+  """Object representing a Base Network."""
+
+  CLOUD = None
+
+  def __init__(self, spec):
+    self.zone = spec.zone
+    self.cidr = spec.cidr
+
+  @staticmethod
+  def _GetNetworkSpecFromVm(vm):
+    """Returns a BaseNetworkSpec created from VM attributes."""
+    return BaseNetworkSpec(zone=vm.zone, cidr=vm.cidr)
+
+  @classmethod
+  def _GetKeyFromNetworkSpec(cls, spec):
+    """Returns a key used to register Network instances."""
+    if cls.CLOUD is None:
+      raise errors.Error('Networks should have CLOUD attributes.')
+    return (cls.CLOUD, spec.zone)
+
+  @classmethod
+  def GetNetwork(cls, vm):
+    """Returns a BaseNetwork.
+
+    This method is used instead of directly calling the class's constructor.
+    It creates BaseNetwork instances and registers them. If a BaseNetwork
+    object has already been registered with the same key, that object
+    will be returned rather than creating a new one. This enables multiple
+    VMs to call this method and all share the same BaseNetwork object.
+
+    Args:
+      vm: The VM for which the Network is being created.
+    """
+    return cls.GetNetworkFromNetworkSpec(cls._GetNetworkSpecFromVm(vm))
+
+  @staticmethod
+  def FormatCidrString(cidr_raw):
+    """Format CIDR string for use in resource name.
+
+    Removes or replaces illegal characters from CIDR.
+    eg '10.128.0.0/9' -> '10-128-0-0-9'
+
+    Args:
+      cidr_raw: The unformatted CIDR string.
+    Returns:
+      A CIDR string suitable for use in resource names.
+    Raises:
+      Error: Invalid CIDR format
+    """
+
+    delim = r'-'  # Safe delimiter for most providers
+    int_regex = r'[0-9]+'
+    octets_mask = regex_util.ExtractAllMatches(int_regex, str(cidr_raw))
+    if len(octets_mask) != 5:  # expecting 4 octets plus 1 prefix mask.
+      raise ValueError('Invalid CIDR format: "{0}"'.format(cidr_raw))
+    return delim.join(octets_mask)
+
+  @classmethod
+  def GetNetworkFromNetworkSpec(cls, spec):
+    """Returns a BaseNetwork.
+
+    This method is used instead of directly calling the class's constructor.
+    It creates BaseNetwork instances and registers them. If a BaseNetwork
+    object has already been registered with the same key, that object
+    will be returned rather than creating a new one. This enables multiple
+    VMs to call this method and all share the same BaseNetwork object.
+
+    Args:
+      spec: The network spec for the network.
+    """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetNetwork called in a thread without a '
+                         'BenchmarkSpec.')
+    key = cls._GetKeyFromNetworkSpec(spec)
+
+    #  Grab the list of other networks to setup firewalls, forwarding, etc.
+    if not hasattr(spec, 'custom_subnets'):
+      spec.__setattr__('custom_subnets', benchmark_spec.custom_subnets)
+
+    with benchmark_spec.networks_lock:
+      if key not in benchmark_spec.networks:
+        benchmark_spec.networks[key] = cls(spec)
+      return benchmark_spec.networks[key]
+
+  def Create(self):
+    """Creates the actual network."""
+    pass
+
+  def Delete(self):
+    """Deletes the actual network."""
+    pass
+
+  def Peer(self, peering_network):
+    """Peers the network with the peering_network.
+
+    This method is used for VPC peering. It will connect 2 VPCs together.
+
+    Args:
+      peering_network: BaseNetwork. The network to peer with.
+    """
+    pass
+
+
+class BaseVPCPeeringSpec(object):
+  """Object containing all information needed to create a VPC Peering Object."""
+
+  def __init__(self, network_a=None, network_b=None):
+    """Initializes BaseVPCPeeringSpec.
+
+    Args:
+      network_a: BaseNetwork. The network initiating the peering.
+      network_b: BaseNetwork. The network to be peered to.
+    """
+    self.network_a = network_a
+    self.network_b = network_b
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+
+class BaseVPCPeering(resource.BaseResource):
+  """Base class for VPC Peering.
+
+  This class holds VPC Peering methods and attributes relating to the
+  VPC Peering as a cloud resource.
+
+  Attributes:
+    network_a: BaseNetwork. The network initiating the peering.
+    network_b: BaseNetwork. The network to be peered to.
+  """
+
+  RESOURCE_TYPE = 'BaseVPCPeering'
+
+  def __init__(self, vpc_peering_spec):
+    """Initialize BaseVPCPeering class.
+
+    Args:
+      vpc_peering_spec: BaseVPCPeeringSpec. Spec for VPC peering object.
+    """
+    super(BaseVPCPeering, self).__init__()
+    self.network_a = vpc_peering_spec.network_a
+    self.network_b = vpc_peering_spec.network_b
+
+
+def GetCidrBlock(regional_index=0, subnet_index=0, mask_size=24):
+  """Returns a Cidr Block.
+
+  Each cloud region should be assigned a unique IP Address Space. And each
+  Subnet within a regional cloud network should also have an unique space. This
+  function returns the IP Address allocation based on the regional and subnet
+  index given. It is expected that each cloud regional network will have a
+  unique regional index and each of its subnets will also have a unique index.
+  Regional cidr blocks should be large enough to cover the subnet cidr blocks.
+  Chose a mask_size for regional cidr block accordingly. For example, a
+  mask_size of 16 with regional starting block 10.0.0.0 will cover a subnet of
+  10.0.1.0/24.
+
+  Args:
+    regional_index: Int. The IP Address allocation dependent on the region.
+      Default index is 0.
+    subnet_index: Int. The IP Address section dependent on the subnet.
+      Default index is 0.
+    mask_size: Int. Mask size to request from cidr block.
+      Default index is 24.
+  """
+  return '10.{}.{}.0/{}'.format(regional_index, subnet_index, mask_size)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/nfs_service.py b/script/cumulus/pkb/perfkitbenchmarker/nfs_service.py
new file mode 100644
index 0000000..6c730ac
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/nfs_service.py
@@ -0,0 +1,260 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Resource encapsulating provisioned cloud NFS services.
+
+Defines a resource for use in other benchmarks such as SpecSFS2014 and FIO.
+
+Example --benchmark_config_file:
+
+nfs_10_tb: &nfs_10_tb
+  AWS:
+    disk_type: nfs
+    mount_point: /scratch
+
+specsfs:
+  name: specsfs2014
+  flags:
+    specsfs2014_num_runs: 1
+    specsfs2014_load: 1
+  vm_groups:
+    clients:
+      disk_spec: *nfs_10_tb
+      vm_count: 1
+      os_type: rhel
+    gluster_servers:
+      vm_count: 0
+"""
+
+import abc
+import logging
+import re
+from typing import Optional
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+
+flags.DEFINE_string('nfs_tier', None, 'NFS Mode')
+flags.DEFINE_string('nfs_version', None, 'NFS Version')
+
+FLAGS = flags.FLAGS
+
+_MOUNT_NFS_RE = re.compile(r'.*type nfs \((.*?)\)', re.MULTILINE)
+
+UNMANAGED = 'Unmanaged'
+
+
+def GetNfsServiceClass(cloud):
+  """Get the NFS service corresponding to the cloud.
+
+  Args:
+    cloud: The name of the cloud to supply the NFS service.
+
+  Returns:
+    The NFS service class for this cloud.
+
+  Raises:
+    NotImplementedError: No service found for this cloud.
+  """
+  return resource.GetResourceClass(BaseNfsService, CLOUD=cloud)
+
+
+class BaseNfsService(resource.BaseResource):
+  """Object representing an NFS Service."""
+
+  # subclasses must override this with a list or tuple for acceptable
+  # "nfs_tier" values if applicable.
+  CLOUD = 'Unknown'
+  NFS_TIERS = None
+  RESOURCE_TYPE = 'BaseNfsService'
+  DEFAULT_NFS_VERSION = None
+  DEFAULT_TIER = None
+
+  def __init__(self, disk_spec: disk.BaseDiskSpec, zone):
+    super(BaseNfsService, self).__init__()
+    self.disk_spec = disk_spec
+    self.zone = zone
+    self.server_directory = '/'
+    self.nfs_tier = FLAGS.nfs_tier or self.DEFAULT_TIER
+    if self.nfs_tier and self.NFS_TIERS and self.nfs_tier not in self.NFS_TIERS:
+      # NFS service does not have to have a list of nfs_tiers nor does it have
+      # to be implemented by a provider
+      raise errors.Config.InvalidValue(
+          ('nfs_tier "%s" not in acceptable list "%s" '
+           'for cloud %s') % (self.nfs_tier, self.NFS_TIERS, self.CLOUD))
+    logging.debug('%s NFS service with nfs_tier %s zone %s default version %s',
+                  self.CLOUD, self.nfs_tier, self.zone,
+                  self.DEFAULT_NFS_VERSION)
+
+  def CreateNfsDisk(self):
+    mount_point = '%s:%s' % (self.GetRemoteAddress(), self.server_directory)
+    return disk.NfsDisk(self.disk_spec, mount_point, self.DEFAULT_NFS_VERSION,
+                        self.nfs_tier)
+
+  @abc.abstractmethod
+  def _IsReady(self):
+    """Boolean function to determine if disk is NFS mountable."""
+    pass
+
+  @abc.abstractmethod
+  def GetRemoteAddress(self):
+    """The NFS server's address."""
+    pass
+
+
+class StaticNfsService(BaseNfsService):
+  """Object allowing VMs to connect to a preprovisioned NFS endpoint."""
+  CLOUD = 'Static'
+
+  def __init__(self, disk_spec):
+    super(StaticNfsService, self).__init__(disk_spec, None)
+    self.ip_address = disk_spec.nfs_ip_address
+    self.server_directory = disk_spec.nfs_directory or '/'
+
+  def _Create(self):
+    pass
+
+  def _Delete(self):
+    pass
+
+  def CreateNfsDisk(self):
+    mount_point = '%s:/%s' % (self.GetRemoteAddress(), self.server_directory)
+    return disk.NfsDisk(self.disk_spec, mount_point, None, None)
+
+  def _IsReady(self):
+    """Boolean function to determine if disk is NFS mountable."""
+    return True
+
+  def GetRemoteAddress(self):
+    """The NFS server's address."""
+    return self.ip_address
+
+
+class UnmanagedNfsService(BaseNfsService):
+  """Object allowing VMs to connect to a local NFS disk."""
+  CLOUD = UNMANAGED
+
+  # Allows anybody to write to the NFS mount.
+  _EXPORT_FS_COMMAND = ' && '.join([
+      'sudo mkdir -p {export_dir}',
+      'sudo chown $USER:$USER {export_dir}',
+      'sudo chmod 777 {export_dir}',
+      ('echo "{export_dir} *(rw,sync,no_subtree_check,no_root_squash)" | '
+       'sudo tee -a /etc/exports'),
+      'sudo exportfs -a'
+  ])
+
+  _NFS_NAME = {
+      os_types.RHEL: 'nfs-server',
+      os_types.DEBIAN: 'nfs-kernel-server',
+  }
+  _NFS_RESTART_CMD = 'sudo systemctl restart {nfs_name}'
+
+  def __init__(self,
+               disk_spec: Optional[disk.BaseDiskSpec],
+               server_vm,
+               check_export_not_same_mount=True,
+               server_directory=None):
+    super(UnmanagedNfsService, self).__init__(disk_spec, None)
+    self.server_vm = server_vm
+    # Path on the server to export. Must be different from mount_point.
+    if server_directory:
+      self.server_directory = server_directory
+    elif disk_spec and disk_spec.device_path:
+      self.server_directory = disk_spec.device_path
+    else:
+      self.server_directory = '/pkb-nfs-server-directory'
+    logging.info('Exporting server directory %s', self.server_directory)
+    if check_export_not_same_mount and disk_spec:
+      assert self.server_directory != disk_spec.mount_point, (
+          'export server directory must be different from mount point')
+
+  def GetRemoteAddress(self):
+    """The NFS server's address."""
+    return self.server_vm.internal_ip
+
+  def _ExportNfsDir(self, export_dir_path):
+    """Export a directory on the NFS server to be shared with NFS clients.
+
+    Args:
+      export_dir_path: Path to the directory to export.
+    """
+    if self.server_vm.TryRemoteCommand(
+        f'grep "^{export_dir_path} " /etc/exports'):
+      logging.info('Already NFS exported directory %s', export_dir_path)
+    else:
+      self.server_vm.RemoteCommand(
+          self._EXPORT_FS_COMMAND.format(export_dir=export_dir_path))
+    nfs_name = self._NFS_NAME[self.server_vm.BASE_OS_TYPE]
+    self.server_vm.RemoteCommand(
+        self._NFS_RESTART_CMD.format(nfs_name=nfs_name))
+
+  def _Create(self):
+    assert self.server_vm, 'NFS server VM not created.'
+    self.server_vm.Install('nfs_server')
+    self._ExportNfsDir(self.server_directory)
+    # Restart NFS service upon reboot if required (Centos7)
+    self.server_vm.RemoteCommand(
+        'sudo systemctl enable nfs', ignore_failure=True)
+
+  def _Delete(self):
+    pass
+
+  def _IsReady(self):
+    """Boolean function to determine if disk is NFS mountable."""
+    return True
+
+
+def NfsExport(server_vm, local_disk_path):
+  """NFS exports the directory on the VM."""
+  service = UnmanagedNfsService(None, server_vm, False, local_disk_path)
+  service.Create()
+
+
+def NfsMount(server_ip, client_vm, client_path, server_path=None) -> None:
+  """NFS mounts the server's path on the client.
+
+  Args:
+    server_ip: IP address of the NFS server.
+    client_vm: The VM that will mount the NFS server's exported directory.
+    client_path: The mount point on the client.
+    server_path: The NFS exported directory on the server.  Defaults to the same
+      as the client_path.
+  """
+  client_vm.Install('nfs_utils')
+  fstab_line = (f'{server_ip}:{server_path or client_path} '
+                f'{client_path} nfs defaults 0 0')
+  client_vm.RemoteCommand(f'sudo mkdir -p {client_path}; '
+                          f'sudo chown {client_vm.user_name} {client_path}; '
+                          f'echo "{fstab_line}\n" | sudo tee -a /etc/fstab; '
+                          'sudo mount -a')
+
+
+def NfsExportAndMount(vms, client_path, server_path=None) -> None:
+  """NFS exports from the first VM to the others.
+
+  Args:
+    vms: List of VMs.  First is the NFS server, the others will mount it.
+    client_path: The path on the client to mount the NFS export.
+    server_path: The path on the server to export.  Default is the same as the
+      client_path
+  """
+  nfs_server, clients = vms[0], vms[1:]
+  NfsExport(nfs_server, server_path or client_path)
+  vm_util.RunThreaded(
+      lambda vm: NfsMount(nfs_server.internal_ip, vm, client_path, server_path),
+      clients)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/non_relational_db.py b/script/cumulus/pkb/perfkitbenchmarker/non_relational_db.py
new file mode 100644
index 0000000..24f3075
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/non_relational_db.py
@@ -0,0 +1,87 @@
+# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing base class for non-relational databases."""
+
+from typing import Dict, Optional
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker.configs import freeze_restore_spec
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+
+# List of nonrelational database types
+DYNAMODB = 'dynamodb'
+BIGTABLE = 'bigtable'
+
+
+FLAGS = flags.FLAGS
+
+
+class BaseNonRelationalDbSpec(freeze_restore_spec.FreezeRestoreSpec):
+  """Configurable options of a nonrelational database service."""
+
+  # Needed for registering the spec class and its subclasses. See BaseSpec.
+  SPEC_TYPE = 'BaseNonRelationalDbSpec'
+  SPEC_ATTRS = ['SERVICE_TYPE']
+
+  def __init__(self,
+               component_full_name: str,
+               flag_values: Optional[Dict[str, flags.FlagValues]] = None,
+               **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super()._GetOptionDecoderConstructions()
+    result.update({
+        'service_type': (
+            option_decoders.EnumDecoder,
+            {
+                'default':
+                    None,
+                'valid_values': [
+                    DYNAMODB,
+                    BIGTABLE,
+                ],
+            }),
+    })
+    return result
+
+
+class BaseNonRelationalDb(resource.BaseResource):
+  """Object representing a nonrelational database."""
+  REQUIRED_ATTRS = ['SERVICE_TYPE']
+  RESOURCE_TYPE = 'BaseNonRelationalDb'
+  SERVICE_TYPE = 'Base'
+
+
+def GetNonRelationalDbSpecClass(
+    service_type: str) -> Optional[spec.BaseSpecMetaClass]:
+  """Gets the non-relational db spec class corresponding to 'service_type'."""
+  return spec.GetSpecClass(BaseNonRelationalDbSpec, SERVICE_TYPE=service_type)
+
+
+def GetNonRelationalDbClass(
+    service_type: str) -> Optional[resource.AutoRegisterResourceMeta]:
+  """Gets the non-relational database class corresponding to 'service_type'."""
+  return resource.GetResourceClass(BaseNonRelationalDb,
+                                   SERVICE_TYPE=service_type)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/num_gpus_map_util.py b/script/cumulus/pkb/perfkitbenchmarker/num_gpus_map_util.py
new file mode 100644
index 0000000..52a058a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/num_gpus_map_util.py
@@ -0,0 +1,31 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""A map of machine types to their number of K80 GPUs"""
+
+gpus_per_vm = {
+    'n1-standard-4-k80x1': 1,
+    'n1-standard-8-k80x1': 1,
+    'n1-standard-8-k80x2': 2,
+    'n1-standard-16-k80x2': 2,
+    'n1-standard-16-k80x4': 4,
+    'n1-standard-32-k80x4': 4,
+    'n1-standard-32-k80x8': 8,
+    'p2.xlarge': 1,
+    'p2.8xlarge': 8,
+    'p2.16xlarge': 16,
+    'Standard_NC6': 1,
+    'Standard_NC12': 2,
+    'Standard_NC24': 4
+}
diff --git a/script/cumulus/pkb/perfkitbenchmarker/object_storage_service.py b/script/cumulus/pkb/perfkitbenchmarker/object_storage_service.py
new file mode 100644
index 0000000..a308951
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/object_storage_service.py
@@ -0,0 +1,383 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""An interface to object storage services."""
+
+
+import abc
+import logging
+import os
+import pathlib
+from typing import Optional
+
+from absl import flags
+from perfkitbenchmarker import errors
+import six
+
+flags.DEFINE_string('object_storage_credential_file', None,
+                    'Directory of credential file.')
+flags.DEFINE_string('boto_file_location', None,
+                    'The location of the boto file.')
+
+FLAGS = flags.FLAGS
+
+DEFAULT_BOTO_LOCATION_USER = '~/.boto'
+DEFAULT_BOTO_LOCATION_MACHINE = '/etc/boto.cfg'
+BOTO_LIB_VERSION = 'boto_lib_version'
+
+_OBJECT_STORAGE_REGISTRY = {}
+
+
+class AutoRegisterObjectStorageMeta(abc.ABCMeta):
+  """Metaclass for auto registration."""
+  STORAGE_NAME = None
+
+  def __init__(cls, name, bases, dct):
+    super(AutoRegisterObjectStorageMeta, cls).__init__(name, bases, dct)
+    if cls.STORAGE_NAME in _OBJECT_STORAGE_REGISTRY:
+      logging.info(
+          "Duplicate storage implementations for name '%s'. "
+          'Replacing %s with %s', cls.STORAGE_NAME,
+          _OBJECT_STORAGE_REGISTRY[cls.STORAGE_NAME].__name__, cls.__name__)
+    _OBJECT_STORAGE_REGISTRY[cls.STORAGE_NAME] = cls
+
+
+class ObjectStorageService(
+    six.with_metaclass(AutoRegisterObjectStorageMeta, object)):
+  """Base class for ObjectStorageServices."""
+
+  # Keeping the location in the service object is not very clean, but
+  # a nicer solution would be more complex, and we only use different
+  # locations in a very limited way. Also, true multi-location
+  # providers would require another abstraction for Azure service
+  # accounts, which would add more complexity.
+
+  # Service object lifecycle
+
+  def PrepareService(self, location):
+    """Get ready to use object storage.
+
+    This method should be called before any other method of this
+    class. Once it is called, all of this class' methods should work
+    with data in the given location.
+
+    Args:
+      location: where to place our data.
+    """
+    pass
+
+  def CleanupService(self):
+    """Clean up what we did.
+
+    No other method of this class should be called after
+    CleanupProvider.
+    """
+    pass
+
+  # Bucket management
+
+  @abc.abstractmethod
+  def MakeBucket(self, bucket, raise_on_failure=True):
+    """Make an object storage bucket.
+
+    Args:
+      bucket: the name of the bucket to create.
+      raise_on_failure: Whether to raise errors.Benchmarks.BucketCreationError
+          if the bucket fails to be created.
+    """
+    pass
+
+  @abc.abstractmethod
+  def Copy(self, src_url, dst_url, recursive=False):
+    """Copy files, objects and directories.
+
+    Note: Recursive copy behavior mimics gsutil cp -r where:
+    Copy(/foo/bar, /baz, True) copies the directory bar into /baz/bar whereas
+    aws s3 cp --recursive would copy the contents of bar into /baz.
+
+    Args:
+      src_url: string, the source url path.
+      dst_url: string, the destination url path.
+      recursive: whether to copy directories.
+    """
+    pass
+
+  @abc.abstractmethod
+  def CopyToBucket(self, src_path, bucket, object_path):
+    """Copy a local file to a bucket.
+
+    Args:
+      src_path: string, the local source path.
+      bucket: string, the destination bucket.
+      object_path: string, the object's path in the bucket.
+    """
+    pass
+
+  @abc.abstractmethod
+  def MakeRemoteCliDownloadUrl(self, bucket, object_path):
+    """Creates a download url for an object in a bucket.
+
+    This is used by GenerateCliDownloadFileCommand().
+
+    Args:
+      bucket: string, the name of the bucket.
+      object_path: string, the path of the object in the bucket.
+    """
+    pass
+
+  @abc.abstractmethod
+  def GenerateCliDownloadFileCommand(self, src_url, local_path):
+    """Generates a CLI command to copy src_url to local_path.
+
+    This is suitable for use in scripts e.g. startup scripts.
+
+    Args:
+      src_url: string, the source url path.
+      local_path: string, the local path.
+    """
+    pass
+
+  @abc.abstractmethod
+  def List(self, bucket):
+    """List providers, buckets, or objects.
+
+    Args:
+      bucket: the name of the bucket to list the contents of.
+    """
+    pass
+
+  def ListTopLevelSubfolders(self, bucket):
+    """Lists the top level folders (not files) in a bucket.
+
+    Args:
+      bucket: Name of the bucket to list the top level subfolders of.
+
+    Returns:
+      A list of top level subfolder names. Can be empty if there are no folders.
+    """
+    return []
+
+  @abc.abstractmethod
+  def DeleteBucket(self, bucket):
+    """Delete an object storage bucket.
+
+    This method should succeed even if bucket contains objects.
+
+    Args:
+      bucket: the name of the bucket to delete.
+    """
+    pass
+
+  @abc.abstractmethod
+  def EmptyBucket(self, bucket):
+    """Empty an object storage bucket.
+
+    Args:
+      bucket: the name of the bucket to empty.
+    """
+    pass
+
+  # Working with a VM
+
+  def PrepareVM(self, vm):
+    """Prepare a VM to use object storage.
+
+    Args:
+      vm: the VM to prepare.
+    """
+    pass
+
+  def CleanupVM(self, vm):
+    """Clean up a VM that was used in this benchmark.
+
+    Args:
+      vm: the VM to clean up.
+    """
+    pass
+
+  # CLI commands
+
+  @abc.abstractmethod
+  def CLIUploadDirectory(self, vm, directory, file_names, bucket):
+    """Upload directory contents to a bucket through the CLI.
+
+    The VM must have had PrepareVM called on it first. The command
+    will be wrapped in 'time ...'.
+
+    The caller must ensure that file_names is a full list of files in
+    the directory, so the provider implementation can either use a
+    generic "upload directory" command or use the file names. This
+    method *must* pass all file names to the CLI at once if possible,
+    not in a loop, to give it the chance to share connections and
+    overlap uploads.
+
+    Args:
+      vm: the VM to run commands on directory: the directory to
+      directory: the directory to upload files from
+      file_names: a list of paths (relative to directory) to upload
+      bucket: the bucket to upload the file to
+
+    Returns:
+      A tuple of the (stdout, stderr) of the command.
+    """
+    pass
+
+  @abc.abstractmethod
+  def CLIDownloadBucket(self, vm, bucket, objects, dest):
+    """Download bucket contents to a folder.
+
+    The VM must have had PrepareVM called on it first. The command
+    will be wrapped in 'time ...'.
+
+    The caller must ensure that objects is a full list of objects in
+    the bucket, so the provider implementation can either use a
+    generic "download bucket" command or use the object names. This
+    method *must* pass all object names to the CLI at once if
+    possible, not in a loop, to give it the chance to share
+    connections and overlap downloads.
+
+    Args:
+      vm: the VM to run commands on
+      bucket: the name of the bucket to download from
+      objects: a list of names of objects to download
+      dest: the name of the folder to download to
+
+    Returns:
+      A tuple of the (stdout, stderr) of the command.
+    """
+    pass
+
+  # General methods
+
+  def Metadata(self, vm):
+    """Provider-specific metadata for collected samples.
+
+    Args:
+      vm: the VM we're running on.
+
+    Returns:
+      A dict of key, value pairs to add to our sample metadata.
+    """
+
+    return {}
+
+  def UpdateSampleMetadata(self, samples):
+    """Updates metadata of samples with provider specific information.
+
+    Args:
+      samples: the samples that need the metadata to be updated with provider
+        specific information.
+    """
+    pass
+
+  def GetDownloadUrl(self,
+                     bucket: str,
+                     object_name: str,
+                     use_https=True) -> str:
+    """Get the URL to download objects over HTTP(S).
+
+    Args:
+      bucket: name of bucket
+      object_name: name of object
+      use_https: whether to use HTTPS or else HTTP
+
+    Returns:
+      The URL to download objects over.
+    """
+    raise NotImplementedError
+
+  def GetUploadUrl(self, bucket: str, object_name: str, use_https=True) -> str:
+    """Get the URL to upload objects over HTTP(S).
+
+    Args:
+      bucket: name of bucket
+      object_name: name of object
+      use_https: whether to use HTTPS or else HTTP
+
+    Returns:
+      The URL to upload objects over.
+    """
+    return self.GetDownloadUrl(bucket, object_name, use_https)
+
+  # Different services require uploads to be POST or PUT.
+  UPLOAD_HTTP_METHOD: Optional[str] = None
+
+  def MakeBucketPubliclyReadable(self, bucket: str, also_make_writable=False):
+    """Make a bucket readable and optionally writable by everyone."""
+    raise NotImplementedError
+
+  def APIScriptArgs(self):
+    """Extra arguments for the API test script.
+
+    The service implementation has two parts - one that runs in the
+    PKB controller, and one that runs on worker VMs. This method is
+    how the controller communicates service-specific information to
+    the workers.
+
+    Returns:
+      A list of strings, which will be passed as arguments to the API
+      test script.
+    """
+
+    return []
+
+  @classmethod
+  def APIScriptFiles(cls):
+    """Files to upload for the API test script.
+
+    Returns:
+      A list of file names. These files will be uploaded to the remote
+      VM if this service's API is being benchmarked.
+    """
+
+    return []
+
+
+def GetObjectStorageClass(storage_name) -> type(ObjectStorageService):
+  """Return the ObjectStorageService subclass corresponding to storage_name."""
+
+  return _OBJECT_STORAGE_REGISTRY[storage_name]
+
+
+# TODO(user): Move somewhere more generic
+def FindCredentialFile(default_location):
+  """Return the path to the credential file."""
+
+  credential_file = (
+      FLAGS.object_storage_credential_file or default_location)
+  credential_file = os.path.expanduser(credential_file)
+  if not (os.path.isfile(credential_file) or
+          os.path.isdir(credential_file)):
+    raise errors.Benchmarks.MissingObjectCredentialException(
+        'Credential cannot be found in %s' % credential_file)
+
+  return credential_file
+
+
+def FindBotoFile():
+  """Return the path to the boto file."""
+  paths_to_check = [
+      FLAGS.boto_file_location,
+      DEFAULT_BOTO_LOCATION_USER,
+      DEFAULT_BOTO_LOCATION_MACHINE,
+  ]
+
+  for path in paths_to_check:
+    if not path:
+      continue
+    if pathlib.Path(path).exists():
+      return path
+
+  raise errors.Benchmarks.MissingObjectCredentialException(
+      'Boto file cannot be found in %s.' % paths_to_check)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/os_types.py b/script/cumulus/pkb/perfkitbenchmarker/os_types.py
new file mode 100644
index 0000000..3e05c79
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/os_types.py
@@ -0,0 +1,112 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Supported types of operating systems that a VM may host."""
+
+from absl import flags
+
+AMAZONLINUX2 = 'amazonlinux2'
+CENTOS7 = 'centos7'
+CENTOS8 = 'centos8'  # deprecated
+CENTOS_STREAM8 = 'centos_stream8'
+CENTOS_STREAM9 = 'centos_stream9'
+CLEAR = 'clear'
+COS = 'cos'
+CORE_OS = 'core_os'
+DEBIAN9 = 'debian9'
+DEBIAN10 = 'debian10'
+DEBIAN11 = 'debian11'
+JUJU = 'juju'
+RHEL7 = 'rhel7'
+RHEL8 = 'rhel8'
+ROCKY_LINUX8 = 'rocky_linux8'
+UBUNTU_CONTAINER = 'ubuntu_container'
+UBUNTU1604 = 'ubuntu1604'  # deprecated
+UBUNTU1604_CUDA9 = 'ubuntu1604_cuda9'
+UBUNTU1804 = 'ubuntu1804'
+UBUNTU1804_EFA = 'ubuntu1804_efa'
+UBUNTU2004 = 'ubuntu2004'
+UBUNTU2204 = 'ubuntu2204'
+WINDOWS2012_CORE = 'windows2012_core'
+WINDOWS2016_CORE = 'windows2016_core'
+WINDOWS2019_CORE = 'windows2019_core'
+WINDOWS2022_CORE = 'windows2022_core'
+WINDOWS2012_DESKTOP = 'windows2012_desktop'
+WINDOWS2016_DESKTOP = 'windows2016_desktop'
+WINDOWS2019_DESKTOP = 'windows2019_desktop'
+WINDOWS2022_DESKTOP = 'windows2022_desktop'
+WINDOWS2019_SQLSERVER_2017_STANDARD = 'windows2019_desktop_sqlserver_2017_standard'
+WINDOWS2019_SQLSERVER_2017_ENTERPRISE = 'windows2019_desktop_sqlserver_2017_enterprise'
+WINDOWS2019_SQLSERVER_2019_STANDARD = 'windows2019_desktop_sqlserver_2019_standard'
+WINDOWS2019_SQLSERVER_2019_ENTERPRISE = 'windows2019_desktop_sqlserver_2019_enterprise'
+WINDOWS2022_SQLSERVER_2019_STANDARD = 'windows2022_desktop_sqlserver_2019_standard'
+WINDOWS2022_SQLSERVER_2019_ENTERPRISE = 'windows2022_desktop_sqlserver_2019_enterprise'
+# Base-only OS types
+DEBIAN = 'debian'
+RHEL = 'rhel'
+WINDOWS = 'windows'
+
+# These operating systems have SSH like other Linux OSes, but no package manager
+# to run Linux benchmarks without Docker.
+# Because they cannot install packages, they only support VM life cycle
+# benchmarks like cluster_boot.
+CONTAINER_OS_TYPES = [
+    CORE_OS,
+    COS,
+]
+
+LINUX_OS_TYPES = CONTAINER_OS_TYPES + [
+    AMAZONLINUX2,
+    CENTOS7,
+    CENTOS8,
+    CENTOS_STREAM8,
+    CENTOS_STREAM9,
+    CLEAR,
+    DEBIAN9,
+    DEBIAN10,
+    DEBIAN11,
+    JUJU,
+    RHEL7,
+    RHEL8,
+    ROCKY_LINUX8,
+    UBUNTU_CONTAINER,
+    UBUNTU1604,  # deprecated
+    UBUNTU1604_CUDA9,
+    UBUNTU1804,
+    UBUNTU1804_EFA,
+    UBUNTU2004,
+    UBUNTU2204,
+]
+WINDOWS_OS_TYPES = [
+    WINDOWS2012_CORE,
+    WINDOWS2016_CORE,
+    WINDOWS2019_CORE,
+    WINDOWS2022_CORE,
+    WINDOWS2012_DESKTOP,
+    WINDOWS2016_DESKTOP,
+    WINDOWS2019_DESKTOP,
+    WINDOWS2022_DESKTOP,
+    WINDOWS2019_SQLSERVER_2017_STANDARD,
+    WINDOWS2019_SQLSERVER_2017_ENTERPRISE,
+    WINDOWS2019_SQLSERVER_2019_STANDARD,
+    WINDOWS2019_SQLSERVER_2019_ENTERPRISE,
+    WINDOWS2022_SQLSERVER_2019_STANDARD,
+    WINDOWS2022_SQLSERVER_2019_ENTERPRISE,
+]
+ALL = LINUX_OS_TYPES + WINDOWS_OS_TYPES
+BASE_OS_TYPES = [CLEAR, CORE_OS, DEBIAN, RHEL, WINDOWS]
+
+# May change from time to time.
+DEFAULT = UBUNTU1804
+
+flags.DEFINE_enum('os_type', DEFAULT, ALL, 'The VM\'s OS type.')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/package_lookup.py b/script/cumulus/pkb/perfkitbenchmarker/package_lookup.py
new file mode 100644
index 0000000..f204bc3
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/package_lookup.py
@@ -0,0 +1,47 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Function to lookup modules from package names.
+
+PackageModule: Returns a package module given its name.
+
+This module works around a circular import issue where we cannot import
+benchmark_sets.py directly into virtual_machine.py. After SetUpPKB is called,
+package_lookup.PackageModule is equivalent to benchmark_sets.PackageModule.
+"""
+
+from perfkitbenchmarker import errors
+
+_global_package_module_function = None
+
+
+def SetPackageModuleFunction(function):
+  """Sets the function called by PackageModule; See benchmark_sets.py."""
+  global _global_package_module_function
+  _global_package_module_function = function
+
+
+def PackageModule(package_name):
+  """Finds the module for a benchmark by name.
+
+  Args:
+    package_name: The name of the package.
+
+  Returns:
+    The package's module, or None if the package is invalid.
+  """
+  if not _global_package_module_function:
+    raise errors.Setup.InvalidSetupError(
+        'Cannot call package_lookup.py; Was SetUpPKB called?')
+  return _global_package_module_function(package_name)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/pkb.py b/script/cumulus/pkb/perfkitbenchmarker/pkb.py
new file mode 100644
index 0000000..99885b1
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/pkb.py
@@ -0,0 +1,1903 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Runs all benchmarks in PerfKitBenchmarker.
+
+All benchmarks in PerfKitBenchmarker export the following interface:
+
+GetConfig: this returns, the name of the benchmark, the number of machines
+         required to run one instance of the benchmark, a detailed description
+         of the benchmark, and if the benchmark requires a scratch disk.
+Prepare: this function takes a list of VMs as an input parameter. The benchmark
+         will then get all binaries required to run the benchmark and, if
+         required, create data files.
+Run: this function takes a list of VMs as an input parameter. The benchmark will
+     then run the benchmark upon the machines specified. The function will
+     return a dictonary containing the results of the benchmark.
+Cleanup: this function takes a list of VMs as an input parameter. The benchmark
+         will then return the machine to the state it was at before Prepare
+         was called.
+
+PerfKitBenchmarker has the following run stages: provision, prepare,
+    run, cleanup, teardown, and all.
+
+provision: Read command-line flags, decide what benchmarks to run, and
+    create the necessary resources for each benchmark, including
+    networks, VMs, disks, and keys, and generate a run_uri, which can
+    be used to resume execution at later stages.
+prepare: Execute the Prepare function of each benchmark to install
+         necessary software, upload datafiles, etc.
+run: Execute the Run function of each benchmark and collect the
+     generated samples. The publisher may publish these samples
+     according to PKB's settings. The Run stage can be called multiple
+     times with the run_uri generated by the provision stage.
+cleanup: Execute the Cleanup function of each benchmark to uninstall
+         software and delete data files.
+teardown: Delete VMs, key files, networks, and disks created in the
+    'provision' stage.
+
+all: PerfKitBenchmarker will run all of the above stages (provision,
+     prepare, run, cleanup, teardown). Any resources generated in the
+     provision stage will be automatically deleted in the teardown
+     stage, even if there is an error in an earlier stage. When PKB is
+     running in this mode, the run cannot be repeated or resumed using
+     the run_uri.
+"""
+
+
+import collections
+import copy
+import getpass
+import itertools
+import json
+import logging
+import multiprocessing
+from os.path import isfile
+import pickle
+import random
+import secrets
+import re
+import sys
+import threading
+import time
+import types
+from typing import Any, Dict, List, Optional, Sequence, Set, Tuple
+import uuid
+from threading import Timer, Lock
+
+from absl import flags
+from perfkitbenchmarker import archive
+from perfkitbenchmarker import background_tasks
+from perfkitbenchmarker import benchmark_lookup
+from perfkitbenchmarker import benchmark_sets
+from perfkitbenchmarker import benchmark_spec as bm_spec
+from perfkitbenchmarker import benchmark_status
+from perfkitbenchmarker import configs
+from perfkitbenchmarker import context
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import events
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import linux_benchmarks
+from perfkitbenchmarker import log_util
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import package_lookup
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import publisher
+from perfkitbenchmarker import requirements
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import spark_service
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import static_virtual_machine
+from perfkitbenchmarker import timing_util
+from perfkitbenchmarker import traces
+from perfkitbenchmarker import version
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import windows_benchmarks
+try:
+  from perfkitbenchmarker import intel_publisher
+except:
+  intel_publisher = None
+from perfkitbenchmarker.configs import benchmark_config_spec
+from perfkitbenchmarker.linux_benchmarks import cluster_boot_benchmark
+from perfkitbenchmarker.linux_packages import build_tools
+import six
+from six.moves import zip
+
+LOG_FILE_NAME = 'pkb.log'
+COMPLETION_STATUS_FILE_NAME = 'completion_statuses.json'
+REQUIRED_INFO = ['scratch_disk', 'num_machines']
+REQUIRED_EXECUTABLES = frozenset(['ssh', 'ssh-keygen', 'scp', 'openssl'])
+MAX_RUN_URI_LENGTH = 12
+EMON_EDP_TARBALL = 'emon_edp.tar.gz'
+FLAGS = flags.FLAGS
+
+# Define patterns for help text processing.
+BASE_RELATIVE = '../'  # Relative path from markdown output to PKB home for link writing.
+MODULE_REGEX = r'^\s+?(.*?):.*'  # Pattern that matches module names.
+FLAGS_REGEX = r'(^\s\s--.*?(?=^\s\s--|\Z))+?'  # Pattern that matches each flag.
+FLAGNAME_REGEX = r'^\s+?(--.*?)(:.*\Z)'  # Pattern that matches flag name in each flag.
+DOCSTRING_REGEX = r'"""(.*?|$)"""'  # Pattern that matches triple quoted comments.
+
+flags.DEFINE_list('ssh_options', [], 'Additional options to pass to ssh.')
+flags.DEFINE_boolean('use_ipv6', False, 'Whether to use ipv6 for ssh/scp.')
+flags.DEFINE_list('benchmarks', [benchmark_sets.STANDARD_SET],
+                  'Benchmarks and/or benchmark sets that should be run. The '
+                  'default is the standard set. For more information about '
+                  'benchmarks and benchmark sets, see the README and '
+                  'benchmark_sets.py.')
+flags.DEFINE_boolean('multi_os_benchmark', False, 'Whether is benchmark will '
+                     'involve multiple os types.')
+flags.DEFINE_string('archive_bucket', None,
+                    'Archive results to the given S3/GCS bucket.')
+flags.DEFINE_string('project', None, 'GCP project ID under which '
+                    'to create the virtual machines')
+flags.DEFINE_multi_string(
+    'zone', [],
+    'Similar to the --zones flag, but allows the flag to be specified '
+    'multiple times on the commandline. For example, --zone=a --zone=b is '
+    'equivalent to --zones=a,b. Furthermore, any values specified by --zone '
+    'will be appended to those specfied by --zones.')
+flags.DEFINE_list(
+    'zones', [],
+    'A list of zones within which to run PerfKitBenchmarker. '
+    'This is specific to the cloud provider you are running on. '
+    'If multiple zones are given, PerfKitBenchmarker will create 1 VM in '
+    'zone, until enough VMs are created as specified in each '
+    'benchmark. The order in which this flag is applied to VMs is '
+    'undefined.')
+flags.DEFINE_list(
+    'extra_zones', [],
+    'Zones that will be appended to the "zones" list. This is functionally '
+    'the same, but allows flag matrices to have two zone axes.')
+# TODO(user): note that this is currently very GCE specific. Need to create a
+#    module which can translate from some generic types to provider specific
+#    nomenclature.
+flags.DEFINE_string('machine_type', None, 'Machine '
+                    'types that will be created for benchmarks that don\'t '
+                    'require a particular type.')
+flags.DEFINE_integer('num_vms', 1, 'For benchmarks which can make use of a '
+                     'variable number of machines, the number of VMs to use.')
+flags.DEFINE_string('image', None, 'Default image that will be '
+                    'linked to the VM')
+flags.DEFINE_string('run_uri', None, 'Name of the Run. If provided, this '
+                    'should be alphanumeric and less than or equal to %d '
+                    'characters in length.' % MAX_RUN_URI_LENGTH)
+flags.DEFINE_boolean('use_pkb_logging', True, 'Whether to use PKB-specific '
+                     'logging handlers. Disabling this will use the standard '
+                     'ABSL logging directly.')
+flags.DEFINE_boolean('log_dmesg', False, 'Whether to log dmesg from '
+                     'each VM to the PKB log file before the VM is deleted.')
+# Added by Cumulus
+flags.DEFINE_boolean('get_benchmark_usage', False, 'Whether to display additional '
+                     'usage information for the benchmark.')
+# End Added by Cumulus
+flags.DEFINE_boolean('always_teardown_on_exception', False, 'Whether to tear '
+                     'down VMs when there is exception during the PKB run. If'
+                     'enabled, VMs will be torn down even if FLAGS.run_stage '
+                     'does not specify teardown.')
+_RESTORE_PATH = flags.DEFINE_string('restore', None,
+                                    'Path to restore resources from.')
+_FREEZE_PATH = flags.DEFINE_string('freeze', None,
+                                   'Path to freeze resources to.')
+_COLLECT_MEMINFO = flags.DEFINE_bool('collect_meminfo', False,
+                                     'Whether to collect /proc/meminfo stats.')
+
+
+def GetCurrentUser():
+  """Get the current user name.
+
+  On some systems the current user information may be unavailable. In these
+  cases we just need a string to tag the created resources with. It should
+  not be a fatal error.
+
+  Returns:
+    User name OR default string if user name not available.
+  """
+  try:
+    return getpass.getuser()
+  except KeyError:
+    return 'user_unknown'
+
+
+flags.DEFINE_string(
+    'owner', GetCurrentUser(), 'Owner name. '
+    'Used to tag created resources and performance records.')
+flags.DEFINE_enum(
+    'log_level', log_util.INFO,
+    list(log_util.LOG_LEVELS.keys()),
+    'The log level to run at.')
+flags.DEFINE_enum(
+    'file_log_level', log_util.DEBUG, list(log_util.LOG_LEVELS.keys()),
+    'Anything logged at this level or higher will be written to the log file.')
+flags.DEFINE_integer('duration_in_seconds', None,
+                     'duration of benchmarks. '
+                     '(only valid for mesh_benchmark)')
+flags.DEFINE_string('static_vm_file', None,
+                    'The file path for the Static Machine file. See '
+                    'static_virtual_machine.py for a description of this file.')
+flags.DEFINE_boolean('version', False, 'Display the version and exit.',
+                     allow_override_cpp=True)
+flags.DEFINE_boolean('time_commands', False, 'Times each command issued.')
+flags.DEFINE_enum(
+    'scratch_disk_type', None,
+    [disk.STANDARD, disk.REMOTE_SSD, disk.PIOPS, disk.LOCAL],
+    'Type for all scratch disks. The default is standard')
+flags.DEFINE_string(
+    'data_disk_type', None,
+    'Type for all data disks. If a provider keeps the operating system and '
+    'user data on separate disks, this only affects the user data disk(s).'
+    'If the provider has OS and user data on the same disk, this flag affects'
+    'that disk.')
+flags.DEFINE_integer('scratch_disk_size', None, 'Size, in gb, for all scratch '
+                     'disks.')
+flags.DEFINE_list(
+    'data_disk_zones', [],
+    'The zone of the data disk. This is only used to provision regional pd with'
+    ' multiple zones on GCP.')
+flags.DEFINE_integer('data_disk_size', None, 'Size, in gb, for all data disks.')
+flags.DEFINE_integer('scratch_disk_iops', None,
+                     'IOPS for Provisioned IOPS (SSD) volumes in AWS.')
+flags.DEFINE_integer('scratch_disk_throughput', None,
+                     'Throughput (MB/s) for volumes in AWS.')
+flags.DEFINE_integer('num_striped_disks', None,
+                     'The number of data disks to stripe together to form one '
+                     '"logical" data disk. This defaults to 1 '
+                     '(except with local disks), which means no striping. '
+                     'When using local disks, they default to striping '
+                     'all disks together. The striped disks will appear as '
+                     'one disk (data_disk_0) in the metadata.',
+                     lower_bound=1)
+flags.DEFINE_bool('install_packages', None,
+                  'Override for determining whether packages should be '
+                  'installed. If this is false, no packages will be installed '
+                  'on any VMs. This option should probably only ever be used '
+                  'if you have already created an image with all relevant '
+                  'packages installed.')
+flags.DEFINE_bool(
+    'stop_after_benchmark_failure', False,
+    'Determines response when running multiple benchmarks serially and a '
+    'benchmark run fails. When True, no further benchmarks are scheduled, and '
+    'execution ends. When False, benchmarks continue to be scheduled. Does not '
+    'apply to keyboard interrupts, which will always prevent further '
+    'benchmarks from being scheduled.')
+flags.DEFINE_boolean(
+    'ignore_package_requirements', False,
+    'Disables Python package requirement runtime checks.')
+flags.DEFINE_enum('spark_service_type', None,
+                  [spark_service.PKB_MANAGED, spark_service.PROVIDER_MANAGED],
+                  'Type of spark service to use')
+flags.DEFINE_boolean(
+    'publish_after_run', False,
+    'If true, PKB will publish all samples available immediately after running '
+    'each benchmark. This may be useful in scenarios where the PKB run time '
+    'for all benchmarks is much greater than a single benchmark.')
+flags.DEFINE_integer(
+    'publish_period', None,
+    'The period in seconds to publish samples from repeated run stages. '
+    'This will only publish samples if publish_after_run is True.')
+flags.DEFINE_integer(
+    'run_stage_time', 0,
+    'PKB will run/re-run the run stage of each benchmark until it has spent '
+    'at least this many seconds. It defaults to 0, so benchmarks will only '
+    'be run once unless some other value is specified. This flag and '
+    'run_stage_iterations are mutually exclusive.')
+flags.DEFINE_integer(
+    'run_stage_iterations', 1,
+    'PKB will run/re-run the run stage of each benchmark this many times. '
+    'It defaults to 1, so benchmarks will only be run once unless some other '
+    'value is specified. This flag and run_stage_time are mutually exclusive.')
+flags.DEFINE_integer(
+    'run_stage_retries', 0,
+    'The number of allowable consecutive failures during the run stage. After '
+    'this number of failures any exceptions will cause benchmark termination. '
+    'If run_stage_time is exceeded, the run stage will not be retried even if '
+    'the number of failures is less than the value of this flag.')
+_MAX_RETRIES = flags.DEFINE_integer(
+    'retries', 0, 'The amount of times PKB should retry each benchmark.'
+    'Use with --retry_substatuses to specify which failure substatuses to '
+    'retry on. Defaults to all valid substatuses.')
+_RETRY_SUBSTATUSES = flags.DEFINE_multi_enum(
+    'retry_substatuses', benchmark_status.FailedSubstatus.RETRYABLE_SUBSTATUSES,
+    benchmark_status.FailedSubstatus.RETRYABLE_SUBSTATUSES,
+    'The failure substatuses to retry on. By default, failed runs are run with '
+    'the same previous config.')
+_RETRY_DELAY_SECONDS = flags.DEFINE_integer(
+    'retry_delay_seconds', 0, 'The time to wait in between retries.')
+# Retries could also allow for a dict of failed_substatus: 'zone'|'region'
+# retry method which would make the retry functionality more customizable.
+_SMART_QUOTA_RETRY = flags.DEFINE_bool(
+    'smart_quota_retry', False,
+    'If True, causes the benchmark to rerun in a zone in a different region '
+    'in the same geo on a quota exception. Currently only works for benchmarks '
+    'that specify a single zone (via --zone or --zones). The zone is selected '
+    'at random and overrides the --zones flag or the --zone flag, depending on '
+    'which is provided. QUOTA_EXCEEDED must be in the list of retry '
+    'substatuses for this to work.')
+_SMART_CAPACITY_RETRY = flags.DEFINE_bool(
+    'smart_capacity_retry', False,
+    'If True, causes the benchmark to rerun in a different zone in the same '
+    'region on a capacity/config exception. Currently only works for '
+    'benchmarks that specify a single zone (via --zone or --zones). The zone '
+    'is selected at random and overrides the --zones flag or the --zone flag, '
+    'depending on which is provided. INSUFFICIENT_CAPACITY and UNSUPPORTED '
+    'must be in the list of retry substatuses for this to work.')
+flags.DEFINE_boolean(
+    'boot_samples', False,
+    'Whether to publish boot time samples for all tests.')
+_MEASURE_DELETE = flags.DEFINE_boolean(
+    'delete_samples', False,
+    'Whether to publish delete time samples for all tests.')
+flags.DEFINE_boolean(
+    'gpu_samples', False,
+    'Whether to publish GPU memcpy bandwidth samples for GPU tests.')
+flags.DEFINE_integer(
+    'run_processes', None,
+    'The number of parallel processes to use to run benchmarks.',
+    lower_bound=1)
+flags.DEFINE_float(
+    'run_processes_delay', None,
+    'The delay in seconds between parallel processes\' invocation. '
+    'Increasing this value may reduce provider throttling issues.',
+    lower_bound=0)
+flags.DEFINE_string(
+    'completion_status_file', None,
+    'If specified, this file will contain the completion status of each '
+    'benchmark that ran (SUCCEEDED, FAILED, or SKIPPED). The file has one json '
+    'object per line, each with the following format:\n'
+    '{ "name": <benchmark name>, "flags": <flags dictionary>, '
+    '"status": <completion status> }')
+flags.DEFINE_string(
+    'helpmatch', '',
+    'Shows only flags defined in a module whose name matches the given regex.',
+    allow_override_cpp=True)
+flags.DEFINE_string(
+    'helpmatchmd', '',
+    'helpmatch query with markdown friendly output. '
+    'Shows only flags defined in a module whose name matches the given regex.',
+    allow_override_cpp=True)
+flags.DEFINE_boolean(
+    'create_failed_run_samples', False,
+    'If true, PKB will create a sample specifying that a run stage failed. '
+    'This sample will include metadata specifying the run stage that '
+    'failed, the exception that occurred, as well as all the flags that '
+    'were provided to PKB on the command line.')
+_CREATE_STARTED_RUN_SAMPLE = flags.DEFINE_boolean(
+    'create_started_run_sample', False,
+    'Whether PKB will create a sample at the start of the provision phase of '
+    'the benchmark run.')
+_CREATE_STARTED_STAGE_SAMPLES = flags.DEFINE_boolean(
+    'create_started_stage_samples', False,
+    'Whether PKB will create a sample at the start of the each stage of '
+    'the benchmark run.')
+flags.DEFINE_integer(
+    'failed_run_samples_error_length', 10240,
+    'If create_failed_run_samples is true, PKB will truncate any error '
+    'messages at failed_run_samples_error_length.')
+flags.DEFINE_boolean(
+    'dry_run', False,
+    'If true, PKB will print the flags configurations to be run and exit. '
+    'The configurations are generated from the command line flags, the '
+    'flag_matrix, and flag_zip.')
+flags.DEFINE_string(
+    'skip_pending_runs_file', None,
+    'If file exists, any pending runs will be not be executed.')
+flags.DEFINE_boolean(
+    'use_vpn', False,
+    'Creates VPN tunnels between vm_groups')
+flags.DEFINE_integer(
+    'after_prepare_sleep_time', 0,
+    'The time in seconds to sleep after the prepare phase. This can be useful '
+    'for letting burst tokens accumulate.')
+flags.DEFINE_integer(
+    'after_run_sleep_time', 0,
+    'The time in seconds to sleep after the run phase. This can be useful '
+    'for letting the VM sit idle after the bechmarking phase is complete.')
+flags.DEFINE_bool(
+    'before_run_pause', False,
+    'If true, wait for command line input before executing the run phase. '
+    'This is useful for debugging benchmarks during development.')
+flags.DEFINE_bool(
+    'before_cleanup_pause', False,
+    'If true, wait for command line input before executing the cleanup phase. '
+    'This is useful for debugging benchmarks during development.')
+flags.DEFINE_integer(
+    'timeout_minutes', 240,
+    'An upper bound on the time in minutes that the benchmark is expected to '
+    'run. This time is annotated or tagged on the resources of cloud '
+    'providers. Note that for retries, this applies to each individual retry.')
+flags.DEFINE_integer(
+    'persistent_timeout_minutes', 240,
+    'An upper bound on the time in minutes that resources left behind by the '
+    'benchmark. Some benchmarks purposefully create resources for other '
+    'benchmarks to use. Persistent timeout specifies how long these shared '
+    'resources should live.')
+
+flags.DEFINE_integer(
+    'trace_start_delay', 0,
+    'delay in seconds before the trace will start collecting data',
+    lower_bound=0)
+flags.DEFINE_integer(
+    'trace_duration', None,
+    'duration in seconds for which the trace will collect data')
+flags.DEFINE_boolean(
+    'trace_allow_benchmark_control', False,
+    'Specifies to allow benchmark-level traces control. '
+    'Traces will not be started/stopped surround run phase and instead '
+    'benchmarks which are designed to start/stop traces themselves will do so.')
+
+flags.DEFINE_bool('disable_interrupt_moderation', False,
+                  'Turn off the interrupt moderation networking feature')
+flags.DEFINE_bool('disable_rss', False,
+                  'Whether or not to disable the Receive Side Scaling feature.')
+flags.DEFINE_boolean('record_lscpu', False,
+                     'Whether to record the lscpu output in a sample')
+flags.DEFINE_boolean('record_proccpu', False,
+                     'Whether to record the /proc/cpuinfo output in a sample')
+flags.DEFINE_boolean('record_cpu_vuln', False,
+                     'Whether to record the CPU vulnerabilities on linux VMs')
+flags.DEFINE_boolean('record_gcc', False,
+                     'Whether to record the gcc version in a sample')
+flags.DEFINE_boolean('record_glibc', False,
+                     'Whether to record the glibc version in a sample')
+# Support for using a proxy in the cloud environment.
+flags.DEFINE_string('http_proxy', '',
+                    'Specify a proxy for HTTP in the form '
+                    '[user:passwd@]proxy.server:port.')
+flags.DEFINE_string('https_proxy', '',
+                    'Specify a proxy for HTTPS in the form '
+                    '[user:passwd@]proxy.server:port.')
+flags.DEFINE_string('ftp_proxy', '',
+                    'Specify a proxy for FTP in the form '
+                    '[user:passwd@]proxy.server:port.')
+flags.DEFINE_string('no_proxy', '',
+                    'Specify host(s) to exclude from proxy, e.g. '
+                    '--no_proxy=localhost,.example.com,192.168.0.1')
+flags.DEFINE_bool('randomize_run_order', False,
+                  'When running with more than one benchmarks, '
+                  'randomize order of the benchmarks.')
+flags.DEFINE_string('compiler', 'gcc',
+                    'Specify compiler to use for workload:'
+                    '--compiler=[gcc, aocc, icc]')
+flags.DEFINE_string('compiler_version', '8',
+                    'Use specific version of compiler:'
+                    '--compiler_version=2.1')
+flags.DEFINE_string('compiler_path', '',
+                    'Specify compiler path prefix'
+                    '--compiler_path=/usr/local/bin')
+flags.DEFINE_string('compiler_flags', '',
+                    'override flags to pass to the compiler command line, e.g. '
+                    '-O3 -mcmodel=medium -fopenmp -march=skylake-avx512')
+
+_TEARDOWN_EVENT = multiprocessing.Event()
+_ANY_ZONE = 'any'
+
+events.initialization_complete.connect(traces.RegisterAll)
+
+
+@flags.multi_flags_validator(
+    ['smart_quota_retry', 'smart_capacity_retry', 'retries', 'zones', 'zone'],
+    message='Smart zone retries requires exactly one single zone from --zones '
+    'or --zone, as well as retry count > 0.')
+def ValidateSmartZoneRetryFlags(flags_dict):
+  """Validates smart zone retry flags."""
+  if flags_dict['smart_quota_retry'] or flags_dict['smart_capacity_retry']:
+    if flags_dict['retries'] == 0:
+      return False
+    return (len(flags_dict['zones']) == 1 and
+            not flags_dict['zone']) or (len(flags_dict['zone']) == 1 and
+                                        not flags_dict['zones'])
+  return True
+
+
+@flags.multi_flags_validator(
+    ['retries', 'run_stage'],
+    message='Retries requires running all stages of the benchmark.')
+def ValidateRetriesAndRunStages(flags_dict):
+  if flags_dict['retries'] > 0 and flags_dict['run_stage'] != stages.STAGES:
+    return False
+  return True
+
+
+def _InjectBenchmarkInfoIntoDocumentation():
+  """Appends each benchmark's information to the main module's docstring."""
+  # TODO: Verify if there is other way of appending additional help
+  # message.
+  # Inject more help documentation
+  # The following appends descriptions of the benchmarks and descriptions of
+  # the benchmark sets to the help text.
+  benchmark_sets_list = [
+      '%s:  %s' %
+      (set_name, benchmark_sets.BENCHMARK_SETS[set_name]['message'])
+      for set_name in benchmark_sets.BENCHMARK_SETS]
+  sys.modules['__main__'].__doc__ = (
+      'PerfKitBenchmarker version: {version}\n\n{doc}\n'
+      'Benchmarks (default requirements):\n'
+      '\t{benchmark_doc}').format(
+          version=version.VERSION,
+          doc=__doc__,
+          benchmark_doc=_GenerateBenchmarkDocumentation())
+  sys.modules['__main__'].__doc__ += ('\n\nBenchmark Sets:\n\t%s'
+                                      % '\n\t'.join(benchmark_sets_list))
+
+
+def _ParseFlags(argv=sys.argv):
+  """Parses the command-line flags."""
+  try:
+    argv = FLAGS(argv)
+  except flags.Error as e:
+    logging.error(e)
+    logging.info('For usage instructions, use --helpmatch={module_name}')
+    logging.info('For example, ./pkb.py --helpmatch=benchmarks.fio')
+    sys.exit(1)
+
+
+def _PrintHelp(matches=None):
+  """Prints help for flags defined in matching modules.
+
+  Args:
+    matches: regex string or None. Filters help to only those whose name
+      matched the regex. If None then all flags are printed.
+  """
+  if not matches:
+    print(FLAGS)
+  else:
+    flags_by_module = FLAGS.flags_by_module_dict()
+    modules = sorted(flags_by_module)
+    regex = re.compile(matches)
+    for module_name in modules:
+      if regex.search(module_name):
+        print(FLAGS.module_help(module_name))
+
+
+def _PrintHelpMD(matches=None):
+  """Prints markdown formatted help for flags defined in matching modules.
+
+  Works just like --helpmatch.
+
+  Args:
+    matches: regex string or None. Filters help to only those whose name matched
+      the regex. If None then all flags are printed.
+  Raises:
+    RuntimeError: If unable to find module help.
+  Eg:
+  * all flags: `./pkb.py --helpmatchmd .*`  > testsuite_docs/all.md
+  * linux benchmarks: `./pkb.py --helpmatchmd linux_benchmarks.*`  >
+    testsuite_docs/linux_benchmarks.md  * specific modules `./pkb.py
+    --helpmatchmd iperf`  > testsuite_docs/iperf.md  * windows packages
+    `./pkb.py --helpmatchmd windows_packages.*`  >
+    testsuite_docs/windows_packages.md
+  * GCP provider: `./pkb.py --helpmatchmd providers.gcp.* >
+    testsuite_docs/providers_gcp.md`
+  """
+
+  flags_by_module = FLAGS.flags_by_module_dict()
+  modules = sorted(flags_by_module)
+  regex = re.compile(matches)
+  for module_name in modules:
+    if regex.search(module_name):
+      # Compile regex patterns.
+      module_regex = re.compile(MODULE_REGEX)
+      flags_regex = re.compile(FLAGS_REGEX, re.MULTILINE | re.DOTALL)
+      flagname_regex = re.compile(FLAGNAME_REGEX, re.MULTILINE | re.DOTALL)
+      docstring_regex = re.compile(DOCSTRING_REGEX, re.MULTILINE | re.DOTALL)
+      # Retrieve the helpmatch text to format.
+      helptext_raw = FLAGS.module_help(module_name)
+
+      # Converts module name to github linkable string.
+      # eg: perfkitbenchmarker.linux_benchmarks.iperf_vpn_benchmark ->
+      # perfkitbenchmarker/linux_benchmarks/iperf_vpn_benchmark.py
+      match = re.search(
+          module_regex,
+          helptext_raw,
+      )
+      if not match:
+        raise RuntimeError(
+            f'Unable to find "{module_regex}" in "{helptext_raw}"')
+      module = match.group(1)
+      module_link = module.replace('.', '/') + '.py'
+      # Put flag name in a markdown code block for visibility.
+      flags = re.findall(flags_regex, helptext_raw)
+      flags[:] = [flagname_regex.sub(r'`\1`\2', flag) for flag in flags]
+      # Get the docstring for the module without importing everything into our
+      # namespace. Probably a better way to do this
+      docstring = 'No description available'
+      # Only pull doststrings from inside pkb source files.
+      if isfile(module_link):
+        with open(module_link, 'r') as f:
+          source = f.read()
+          # Get the triple quoted matches.
+          docstring_match = re.search(docstring_regex, source)
+          # Some modules don't have docstrings.
+          # eg perfkitbenchmarker/providers/alicloud/flags.py
+          if docstring_match is not None:
+            docstring = docstring_match.group(1)
+      # Format output and print here.
+      if isfile(module_link):  # Only print links for modules we can find.
+        print('### [' + module, '](' + BASE_RELATIVE + module_link + ')\n')
+      else:
+        print('### ' + module + '\n')
+      print('#### Description:\n\n' + docstring + '\n\n#### Flags:\n')
+      print('\n'.join(flags) + '\n')
+
+
+def CheckVersionFlag():
+  """If the --version flag was specified, prints the version and exits."""
+  if FLAGS.version:
+    print(version.VERSION)
+    sys.exit(0)
+
+
+def _InitializeRunUri():
+  """Determines the PKB run URI and sets FLAGS.run_uri."""
+  if FLAGS.run_uri is None:
+    if stages.PROVISION in FLAGS.run_stage:
+      FLAGS.run_uri = str(uuid.uuid4())[-8:]
+    else:
+      # Attempt to get the last modified run directory.
+      run_uri = vm_util.GetLastRunUri()
+      if run_uri:
+        FLAGS.run_uri = run_uri
+        logging.warning(
+            'No run_uri specified. Attempting to run the following stages with '
+            '--run_uri=%s: %s', FLAGS.run_uri, ', '.join(FLAGS.run_stage))
+      else:
+        raise errors.Setup.NoRunURIError(
+            'No run_uri specified. Could not run the following stages: %s' %
+            ', '.join(FLAGS.run_stage))
+  elif not FLAGS.run_uri.isalnum() or len(FLAGS.run_uri) > MAX_RUN_URI_LENGTH:
+    raise errors.Setup.BadRunURIError('run_uri must be alphanumeric and less '
+                                      'than or equal to %d characters in '
+                                      'length.' % MAX_RUN_URI_LENGTH)
+
+
+def _CreateBenchmarkSpecs():
+  """Create a list of BenchmarkSpecs for each benchmark run to be scheduled.
+
+  Returns:
+    A list of BenchmarkSpecs.
+  """
+  specs = []
+  benchmark_tuple_list = benchmark_sets.GetBenchmarksFromFlags()
+  benchmark_counts = collections.defaultdict(itertools.count)
+  for benchmark_module, user_config in benchmark_tuple_list:
+    # Construct benchmark config object.
+    name = benchmark_module.BENCHMARK_NAME
+    expected_os_types = None if FLAGS.multi_os_benchmark else (
+        os_types.WINDOWS_OS_TYPES if FLAGS.os_type in os_types.WINDOWS_OS_TYPES
+        else os_types.LINUX_OS_TYPES)
+    with flag_util.OverrideFlags(FLAGS, user_config.get('flags')):
+      config_dict = benchmark_module.GetConfig(user_config)
+    config_spec_class = getattr(
+        benchmark_module, 'BENCHMARK_CONFIG_SPEC_CLASS',
+        benchmark_config_spec.BenchmarkConfigSpec)
+    config = config_spec_class(name, expected_os_types=expected_os_types,
+                               flag_values=FLAGS, **config_dict)
+
+    # Assign a unique ID to each benchmark run. This differs even between two
+    # runs of the same benchmark within a single PKB run.
+    uid = name + str(next(benchmark_counts[name]))
+
+    # Optional step to check flag values and verify files exist.
+    check_prereqs = getattr(benchmark_module, 'CheckPrerequisites', None)
+    if check_prereqs:
+      try:
+        with config.RedirectFlags(FLAGS):
+          check_prereqs(config)
+      except:
+        logging.exception('Prerequisite check failed for %s', name)
+        raise
+
+    specs.append(
+        bm_spec.BenchmarkSpec.GetBenchmarkSpec(benchmark_module, config, uid))
+
+  return specs
+
+
+def _WriteCompletionStatusFile(benchmark_specs, status_file):
+  """Writes a completion status file.
+
+  The file has one json object per line, each with the following format:
+
+  {
+    "name": <benchmark name>,
+    "status": <completion status>,
+    "failed_substatus": <failed substatus>,
+    "status_detail": <descriptive string (if present)>,
+    "flags": <flags dictionary>
+  }
+
+  Args:
+    benchmark_specs: The list of BenchmarkSpecs that ran.
+    status_file: The file object to write the json structures to.
+  """
+  for spec in benchmark_specs:
+    # OrderedDict so that we preserve key order in json file
+    status_dict = collections.OrderedDict()
+    status_dict['name'] = spec.name
+    status_dict['status'] = spec.status
+    if spec.failed_substatus:
+      status_dict['failed_substatus'] = spec.failed_substatus
+    if spec.status_detail:
+      status_dict['status_detail'] = spec.status_detail
+    status_dict['flags'] = spec.config.flags
+    status_file.write(json.dumps(status_dict) + '\n')
+
+
+def _SetRestoreSpec(spec: bm_spec.BenchmarkSpec) -> None:
+  """Unpickles the spec to restore resources from, if provided."""
+  restore_path = _RESTORE_PATH.value
+  if restore_path:
+    logging.info('Using restore spec at path: %s', restore_path)
+    with open(restore_path, 'rb') as spec_file:
+      spec.restore_spec = pickle.load(spec_file)
+
+
+def _SetFreezePath(spec: bm_spec.BenchmarkSpec) -> None:
+  """Sets the path to freeze resources to if provided."""
+  if _FREEZE_PATH.value:
+    spec.freeze_path = _FREEZE_PATH.value
+    logging.info('Using freeze path, %s', spec.freeze_path)
+
+
+def DoProvisionPhase(spec, timer):
+  """Performs the Provision phase of benchmark execution.
+
+  Args:
+    spec: The BenchmarkSpec created for the benchmark.
+    timer: An IntervalTimer that measures the start and stop times of resource
+      provisioning.
+  """
+  logging.info('Provisioning resources for benchmark %s', spec.name)
+  events.before_phase.send(stages.PROVISION, benchmark_spec=spec)
+  spec.ConstructContainerCluster()
+  spec.ConstructContainerRegistry()
+  # spark service needs to go first, because it adds some vms.
+  spec.ConstructSparkService()
+  spec.ConstructDpbService()
+  spec.ConstructVirtualMachines()
+  spec.ConstructRelationalDb()
+  spec.ConstructSpanner()
+  spec.ConstructNonRelationalDb()
+  spec.ConstructMessagingService()
+  # CapacityReservations need to be constructed after VirtualMachines because
+  # it needs information about the VMs (machine type, count, zone, etc). The
+  # CapacityReservations will be provisioned before VMs.
+  spec.ConstructCapacityReservations()
+  spec.ConstructTpu()
+  spec.ConstructEdwService()
+  spec.ConstructVPNService()
+  spec.ConstructNfsService()
+  spec.ConstructSmbService()
+  spec.ConstructDataDiscoveryService()
+  # Pickle the spec before we try to create anything so we can clean
+  # everything up on a second run if something goes wrong.
+  spec.Pickle()
+  events.benchmark_start.send(benchmark_spec=spec)
+  try:
+    with timer.Measure('Resource Provisioning'):
+      spec.Provision()
+  finally:
+    # Also pickle the spec after the resources are created so that
+    # we have a record of things like AWS ids. Otherwise we won't
+    # be able to clean them up on a subsequent run.
+    spec.Pickle()
+  events.after_phase.send(stages.PROVISION, benchmark_spec=spec)
+
+
+class InterruptChecker():
+  """An class that check interrupt on VM."""
+
+  def __init__(self, vms):
+    """Start check interrupt thread.
+
+    Args:
+      vms: A list of virtual machines.
+    """
+    self.vms = vms
+    self.check_threads = []
+    self.phase_status = threading.Event()
+    for vm in vms:
+      if vm.IsInterruptible():
+        check_thread = threading.Thread(target=self.CheckInterrupt, args=(vm,))
+        check_thread.start()
+        self.check_threads.append(check_thread)
+
+  def CheckInterrupt(self, vm):
+    """Check interrupt.
+
+    Args:
+      vm: the virtual machine object.
+
+    Returns:
+      None
+    """
+    while not self.phase_status.is_set():
+      vm.UpdateInterruptibleVmStatus(use_api=False)
+      if vm.WasInterrupted():
+        return
+      else:
+        self.phase_status.wait(vm.GetInterruptableStatusPollSeconds())
+
+  def EndCheckInterruptThread(self):
+    """End check interrupt thread."""
+    self.phase_status.set()
+
+    for check_thread in self.check_threads:
+      check_thread.join()
+
+  def EndCheckInterruptThreadAndRaiseError(self):
+    """End check interrupt thread and raise error.
+
+    Raises:
+      InsufficientCapacityCloudFailure when it catches interrupt.
+
+    Returns:
+      None
+    """
+    self.EndCheckInterruptThread()
+    if any(vm.IsInterruptible() and vm.WasInterrupted() for vm in self.vms):
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure('Interrupt')
+
+
+def DoPreparePhase(spec, timer):
+  """Performs the Prepare phase of benchmark execution.
+
+  Args:
+    spec: The BenchmarkSpec created for the benchmark.
+    timer: An IntervalTimer that measures the start and stop times of the
+      benchmark module's Prepare function.
+  """
+  # ### Added by Cumulus for svrinfo b0 ####
+  interrupt_checker = InterruptChecker(spec.vms)
+  # ### End Cumulus svrinfo block 0 ####
+  logging.info('Preparing benchmark %s', spec.name)
+  events.before_phase.send(stages.PREPARE, benchmark_spec=spec)
+  with timer.Measure('BenchmarkSpec Prepare'):
+    spec.Prepare()
+  with timer.Measure('Benchmark Prepare'):
+    spec.BenchmarkPrepare(spec)
+  spec.StartBackgroundWorkload()
+  if FLAGS.after_prepare_sleep_time:
+    logging.info('Sleeping for %s seconds after the prepare phase.',
+                 FLAGS.after_prepare_sleep_time)
+    time.sleep(FLAGS.after_prepare_sleep_time)
+  events.after_phase.send(stages.PREPARE, benchmark_spec=spec)
+
+  interrupt_checker.EndCheckInterruptThread()
+
+
+def DoRunPhase(spec, collector, timer):
+  """Performs the Run phase of benchmark execution.
+
+  Args:
+    spec: The BenchmarkSpec created for the benchmark.
+    collector: The SampleCollector object to add samples to.
+    timer: An IntervalTimer that measures the start and stop times of the
+      benchmark module's Run function.
+  """
+  if FLAGS.before_run_pause:
+    six.moves.input('Hit enter to begin Run.')
+  deadline = time.time() + FLAGS.run_stage_time
+  run_number = 0
+  consecutive_failures = 0
+  last_publish_time = time.time()
+
+  def _IsRunStageFinished():
+    if FLAGS.run_stage_time > 0:
+      return time.time() > deadline
+    else:
+      return run_number >= FLAGS.run_stage_iterations
+
+  trace_start_delay = FLAGS.trace_start_delay
+  trace_duration = FLAGS.trace_duration
+  benchmark_control_traces = spec.control_traces and FLAGS.trace_allow_benchmark_control
+
+  # Use mutable list under a lock as an atomic indicator of the traces being stopped
+  traces_stopped = []
+  lock = Lock()
+
+  def _StopTraces():
+    """
+    Stops all traces by sending on the after_phase signal from
+    the RUN_PHASE sender. RUN_PHASE is the sender all the modules in
+    perfkitbenchmarker/traces/ except svrinfo are listening to.
+    """
+    with lock:
+      if traces_stopped:
+        logging.info("traces already stopped")
+        return
+      events.stop_trace.send(stages.RUN, benchmark_spec=spec)
+      traces_stopped.append(1)
+
+  while True:
+    samples = []
+
+    if not traces_stopped:
+      logging.info("Installing traces...")
+      events.before_phase.send(stages.RUN, benchmark_spec=spec)
+      trace_stop_thread = None
+      if benchmark_control_traces:
+        if trace_start_delay:
+          logging.warning('Benchmark {} controls it\'s own traces behavior and is '
+                          'incompatible with --trace_start_delay.'.format(spec.name))
+      elif trace_start_delay:
+        logging.info('Traces installed & will start after {0} seconds'.format(trace_start_delay))
+        trace_start_thread = Timer(trace_start_delay, events.start_trace.send, [stages.RUN],
+                                   {"benchmark_spec": spec})
+        trace_start_thread.start()
+        if trace_duration is not None:
+          logging.info('Traces will run for {0} seconds'.format(trace_duration))
+          trace_stop_delay = trace_start_delay + trace_duration
+          trace_stop_thread = Timer(trace_stop_delay, _StopTraces)
+          trace_stop_thread.start()
+        else:
+          logging.info('Traces will run for the duration of the benchmark')
+      else:
+        events.start_trace.send(stages.RUN, benchmark_spec=spec)
+
+    try:
+      logging.info('Running benchmark %s', spec.name)
+      with timer.Measure('Benchmark Run'):
+        samples = spec.BenchmarkRun(spec)
+    except Exception:
+      consecutive_failures += 1
+      if consecutive_failures > FLAGS.run_stage_retries:
+        raise
+      logging.exception('Run failed (consecutive_failures=%s); retrying.',
+                        consecutive_failures)
+    else:
+      consecutive_failures = 0
+    finally:
+      if not benchmark_control_traces:
+        _StopTraces()
+        if trace_stop_thread is not None:
+          # This is to cover the case when the benchmark finishes running
+          # before the traces are stopped by the thread delayed by trace_duration seconds.
+          # Not cancelling the thread will not cause the traces to stop since we ensure
+          # the stop signal is only sent once in _StopTraces(). This just servers to
+          # clean up after the thread that is still waiting on the Timer.
+          trace_stop_thread.cancel()
+
+    if not traces_stopped:
+      events.after_phase.send(stages.RUN, benchmark_spec=spec)
+
+    if FLAGS.run_stage_time or FLAGS.run_stage_iterations:
+      for s in samples:
+        s.metadata['run_number'] = run_number
+
+    # Add boot time metrics on the first run iteration.
+    if run_number == 0 and (FLAGS.boot_samples or
+                            spec.name == cluster_boot_benchmark.BENCHMARK_NAME):
+      samples.extend(cluster_boot_benchmark.GetTimeToBoot(spec.vms))
+
+    if FLAGS.record_lscpu:
+      samples.extend(_CreateLscpuSamples(spec.vms))
+
+    if FLAGS.record_proccpu:
+      samples.extend(_CreateProcCpuSamples(spec.vms))
+    if FLAGS.record_cpu_vuln and run_number == 0:
+      samples.extend(_CreateCpuVulnerabilitySamples(spec.vms))
+
+    # Add workload uid to metadata
+    for s in samples:
+      s.metadata['uid'] = spec.uid
+    if FLAGS.record_gcc:
+      samples.extend(_CreateGccSamples(spec.vms))
+    if FLAGS.record_glibc:
+      samples.extend(_CreateGlibcSamples(spec.vms))
+
+    events.samples_created.send(
+        stages.RUN, benchmark_spec=spec, samples=samples)
+    collector.AddSamples(samples, spec.name, spec)
+    if (FLAGS.publish_after_run and FLAGS.publish_period is not None and
+        FLAGS.publish_period < (time.time() - last_publish_time)):
+      collector.PublishSamples()
+      last_publish_time = time.time()
+    run_number += 1
+    if _IsRunStageFinished():
+      if FLAGS.after_run_sleep_time:
+        logging.info('Sleeping for %s seconds after the run phase.',
+                     FLAGS.after_run_sleep_time)
+        time.sleep(FLAGS.after_run_sleep_time)
+      break
+
+
+def DoCleanupPhase(spec, timer):
+  """Performs the Cleanup phase of benchmark execution.
+
+  Cleanup phase work should be delegated to spec.BenchmarkCleanup to allow
+  non-PKB based cleanup if needed.
+
+  Args:
+    spec: The BenchmarkSpec created for the benchmark.
+    timer: An IntervalTimer that measures the start and stop times of the
+      benchmark module's Cleanup function.
+  """
+  if FLAGS.before_cleanup_pause:
+    six.moves.input('Hit enter to begin Cleanup.')
+  logging.info('Cleaning up benchmark %s', spec.name)
+  events.before_phase.send(stages.CLEANUP, benchmark_spec=spec)
+  if (spec.always_call_cleanup or any([vm.is_static for vm in spec.vms]) or
+      spec.dpb_service is not None):
+    events.before_phase.send(stages.CLEANUP, benchmark_spec=spec)
+    spec.StopBackgroundWorkload()
+    with timer.Measure('Benchmark Cleanup'):
+      spec.BenchmarkCleanup(spec)
+    events.after_phase.send(stages.CLEANUP, benchmark_spec=spec)
+
+
+def DoTeardownPhase(spec, collector, timer):
+  """Performs the Teardown phase of benchmark execution.
+
+  Teardown phase work should be delegated to spec.Delete to allow non-PKB based
+  teardown if needed.
+
+  Args:
+    spec: The BenchmarkSpec created for the benchmark.
+    collector: The SampleCollector object to add samples to
+      (if collecting delete samples)
+    timer: An IntervalTimer that measures the start and stop times of
+      resource teardown.
+  """
+  logging.info('Tearing down resources for benchmark %s', spec.name)
+  events.before_phase.send(stages.TEARDOWN, benchmark_spec=spec)
+  # Add delete time metrics after metadeta collected
+  if _MEASURE_DELETE.value:
+    samples = cluster_boot_benchmark.MeasureDelete(spec.vms)
+    collector.AddSamples(samples, spec.name, spec)
+
+  with timer.Measure('Resource Teardown'):
+    spec.Delete()
+  events.after_phase.send(stages.TEARDOWN, benchmark_spec=spec)
+
+
+def _SkipPendingRunsFile():
+  if FLAGS.skip_pending_runs_file and isfile(FLAGS.skip_pending_runs_file):
+    logging.warning('%s exists.  Skipping benchmark.',
+                    FLAGS.skip_pending_runs_file)
+    return True
+  else:
+    return False
+
+_SKIP_PENDING_RUNS_CHECKS = []
+
+
+def RegisterSkipPendingRunsCheck(func):
+  """Registers a function to skip pending runs.
+
+  Args:
+    func: A function which returns True if pending runs should be skipped.
+  """
+  _SKIP_PENDING_RUNS_CHECKS.append(func)
+
+
+@events.before_phase.connect
+def _PublishStageStartedSamples(
+    sender: str,
+    benchmark_spec: bm_spec.BenchmarkSpec):
+  """Publish the start of each stage."""
+  if sender == stages.PROVISION and _CREATE_STARTED_RUN_SAMPLE.value:
+    _PublishRunStartedSample(benchmark_spec)
+  if _CREATE_STARTED_STAGE_SAMPLES.value:
+    _PublishEventSample(
+        benchmark_spec,
+        f'{sender.capitalize()} Stage Started')
+
+
+def _PublishRunStartedSample(spec):
+  """Publishes a sample indicating that a run has started.
+
+  This sample is published immediately so that there exists some metric for any
+  run (even if the process dies).
+
+  Args:
+    spec: The BenchmarkSpec object with run information.
+  """
+  metadata = {
+      'flags': str(flag_util.GetProvidedCommandLineFlags())
+  }
+  _PublishEventSample(spec, 'Run Started', metadata)
+
+
+def _PublishEventSample(spec: bm_spec.BenchmarkSpec,
+                        event: str,
+                        metadata: Optional[Dict[str, Any]] = None,
+                        collector: Optional[publisher.SampleCollector] = None):
+  """Publishes a sample indicating the progress of the benchmark.
+
+  Value of sample is time of event in unix seconds
+
+  Args:
+    spec: The BenchmarkSpec object with run information.
+    event: The progress event to publish.
+    metadata: optional metadata to publish about the event.
+    collector: the SampleCollector to use.
+  """
+  # N.B. SampleCollector seems stateless so re-using vs creating a new one seems
+  # to have no effect.
+  if not collector:
+    collector = publisher.SampleCollector()
+  collector.AddSamples(
+      [sample.Sample(event, time.time(), 'seconds', metadata or {})],
+      spec.name, spec)
+  collector.PublishSamples()
+
+
+def RunBenchmark(spec, collector):
+  """Runs a single benchmark and adds the results to the collector.
+
+  Args:
+    spec: The BenchmarkSpec object with run information.
+    collector: The SampleCollector object to add samples to.
+  """
+
+  # Since there are issues with the handling SIGINT/KeyboardInterrupt (see
+  # further discussion in _BackgroundProcessTaskManager) this mechanism is
+  # provided for defense in depth to force skip pending runs after SIGINT.
+  for f in _SKIP_PENDING_RUNS_CHECKS:
+    if f():
+      logging.warning('Skipping benchmark.')
+      return
+
+  # Optional display of additional benchmark usage information, such as
+  # listing available benchmark tests.
+  if FLAGS.get_benchmark_usage is True:
+    if hasattr(spec, 'BenchmarkGetUsage'):
+      spec.BenchmarkGetUsage(spec)
+    else:
+      logging.info('Benchmark does not have additional usage information')
+    return
+
+  spec.status = benchmark_status.FAILED
+  current_run_stage = stages.PROVISION
+  # Modify the logger prompt for messages logged within this function.
+  label_extension = '{}({}/{})'.format(
+      spec.name, spec.sequence_number, spec.total_benchmarks)
+  context.SetThreadBenchmarkSpec(spec)
+  log_context = log_util.GetThreadLogContext()
+  with log_context.ExtendLabel(label_extension):
+    with spec.RedirectGlobalFlags():
+      end_to_end_timer = timing_util.IntervalTimer()
+      detailed_timer = timing_util.IntervalTimer()
+      interrupt_checker = None
+      try:
+        with end_to_end_timer.Measure('End to End'):
+
+          _SetRestoreSpec(spec)
+          _SetFreezePath(spec)
+
+          if stages.PROVISION in FLAGS.run_stage:
+            DoProvisionPhase(spec, detailed_timer)
+
+          if stages.PREPARE in FLAGS.run_stage:
+            current_run_stage = stages.PREPARE
+            interrupt_checker = InterruptChecker(spec.vms)
+            DoPreparePhase(spec, detailed_timer)
+            interrupt_checker.EndCheckInterruptThreadAndRaiseError()
+            interrupt_checker = None
+
+          if stages.RUN in FLAGS.run_stage:
+            current_run_stage = stages.RUN
+            interrupt_checker = InterruptChecker(spec.vms)
+            DoRunPhase(spec, collector, detailed_timer)
+            interrupt_checker.EndCheckInterruptThreadAndRaiseError()
+            interrupt_checker = None
+
+          if stages.CLEANUP in FLAGS.run_stage:
+            current_run_stage = stages.CLEANUP
+            interrupt_checker = InterruptChecker(spec.vms)
+            DoCleanupPhase(spec, detailed_timer)
+            interrupt_checker.EndCheckInterruptThreadAndRaiseError()
+            interrupt_checker = None
+
+          if stages.TEARDOWN in FLAGS.run_stage:
+            current_run_stage = stages.TEARDOWN
+            DoTeardownPhase(spec, collector, detailed_timer)
+
+        # Add timing samples.
+        if (FLAGS.run_stage == stages.STAGES and
+            timing_util.EndToEndRuntimeMeasurementEnabled()):
+          collector.AddSamples(
+              end_to_end_timer.GenerateSamples(), spec.name, spec)
+          events.samples_created.send(
+              stages.RUN, benchmark_spec=spec, samples=end_to_end_timer.GenerateSamples())
+        if timing_util.RuntimeMeasurementsEnabled():
+          collector.AddSamples(
+              detailed_timer.GenerateSamples(), spec.name, spec)
+
+        # Add resource related samples.
+        collector.AddSamples(spec.GetSamples(), spec.name, spec)
+
+      # except block will clean up benchmark specific resources on exception. It
+      # may also clean up generic resources based on
+      # FLAGS.always_teardown_on_exception.
+      except (Exception, KeyboardInterrupt) as e:
+        # Log specific type of failure, if known
+        # TODO(dlott) Move to exception chaining with Python3 support
+        if (isinstance(e, errors.Benchmarks.InsufficientCapacityCloudFailure)
+            or 'InsufficientCapacityCloudFailure' in str(e)):
+          spec.failed_substatus = (
+              benchmark_status.FailedSubstatus.INSUFFICIENT_CAPACITY)
+        elif (isinstance(e, errors.Benchmarks.QuotaFailure)
+              or 'QuotaFailure' in str(e)):
+          spec.failed_substatus = benchmark_status.FailedSubstatus.QUOTA
+        elif isinstance(e, errors.Benchmarks.KnownIntermittentError):
+          spec.failed_substatus = (
+              benchmark_status.FailedSubstatus.KNOWN_INTERMITTENT)
+        elif (isinstance(e, errors.Benchmarks.UnsupportedConfigError) or
+              'UnsupportedConfigError' in str(e)):
+          spec.failed_substatus = benchmark_status.FailedSubstatus.UNSUPPORTED
+        elif isinstance(e, errors.Resource.RestoreError):
+          spec.failed_substatus = (
+              benchmark_status.FailedSubstatus.RESTORE_FAILED)
+        elif isinstance(e, errors.Resource.FreezeError):
+          spec.failed_substatus = (
+              benchmark_status.FailedSubstatus.FREEZE_FAILED)
+        else:
+          spec.failed_substatus = (
+              benchmark_status.FailedSubstatus.UNCATEGORIZED)
+        spec.status_detail = str(e)
+
+        # Resource cleanup (below) can take a long time. Log the error to give
+        # immediate feedback, then re-throw.
+        logging.exception('Error during benchmark %s', spec.name)
+        if FLAGS.create_failed_run_samples:
+          PublishFailedRunSample(spec, str(e), current_run_stage, collector)
+
+        # If the particular benchmark requests us to always call cleanup, do it
+        # here.
+        if stages.CLEANUP in FLAGS.run_stage and spec.always_call_cleanup:
+          DoCleanupPhase(spec, detailed_timer)
+
+        if (FLAGS.always_teardown_on_exception and
+            stages.TEARDOWN not in FLAGS.run_stage):
+          # Note that if TEARDOWN is specified, it will happen below.
+          DoTeardownPhase(spec, collector, detailed_timer)
+        raise
+      # finally block will only clean up generic resources if teardown is
+      # included in FLAGS.run_stage.
+      finally:
+        if interrupt_checker:
+          interrupt_checker.EndCheckInterruptThread()
+        # Deleting resources should happen first so any errors with publishing
+        # don't prevent teardown.
+        if stages.TEARDOWN in FLAGS.run_stage:
+          spec.Delete()
+        if FLAGS.publish_after_run:
+          collector.PublishSamples()
+        events.benchmark_end.send(benchmark_spec=spec)
+        # Pickle spec to save final resource state.
+        spec.Pickle()
+  spec.status = benchmark_status.SUCCEEDED
+
+
+def PublishFailedRunSample(
+    spec: bm_spec.BenchmarkSpec,
+    error_message: str,
+    run_stage_that_failed: str,
+    collector: publisher.SampleCollector):
+  """Publish a sample.Sample representing a failed run stage.
+
+  The sample metric will have the name 'Run Failed';
+  the value will be the timestamp in Unix Seconds, and the unit will be
+  'seconds'.
+
+  The sample metadata will include the error message from the
+  Exception, the run stage that failed, as well as all PKB
+  command line flags that were passed in.
+
+  Args:
+    spec: benchmark_spec
+    error_message: error message that was caught, resulting in the
+      run stage failure.
+    run_stage_that_failed: run stage that failed by raising an Exception
+    collector: the collector to publish to.
+  """
+  # Note: currently all provided PKB command line flags are included in the
+  # metadata. We may want to only include flags specific to the benchmark that
+  # failed. This can be acomplished using gflag's FlagsByModuleDict().
+  metadata = {
+      'error_message': error_message[0:FLAGS.failed_run_samples_error_length],
+      'run_stage': run_stage_that_failed,
+      'flags': str(flag_util.GetProvidedCommandLineFlags())
+  }
+  vm_util.RunThreaded(lambda vm: vm.UpdateInterruptibleVmStatus(use_api=True),
+                      spec.vms)
+
+  interruptible_vm_count = 0
+  interrupted_vm_count = 0
+  vm_status_codes = []
+  for vm in spec.vms:
+    if vm.IsInterruptible():
+      interruptible_vm_count += 1
+      if vm.WasInterrupted():
+        interrupted_vm_count += 1
+        spec.failed_substatus = (
+            benchmark_status.FailedSubstatus.INTERRUPTED)
+        status_code = vm.GetVmStatusCode()
+        if status_code:
+          vm_status_codes.append(status_code)
+
+  if spec.failed_substatus:
+    metadata['failed_substatus'] = spec.failed_substatus
+
+  if interruptible_vm_count:
+    metadata.update({'interruptible_vms': interruptible_vm_count,
+                     'interrupted_vms': interrupted_vm_count,
+                     'vm_status_codes': vm_status_codes})
+  if interrupted_vm_count:
+    logging.error(
+        '%d interruptible VMs were interrupted in this failed PKB run.',
+        interrupted_vm_count)
+  _PublishEventSample(spec, 'Run Failed', metadata, collector)
+
+
+def _ShouldRetry(spec: bm_spec.BenchmarkSpec) -> bool:
+  """Returns whether the benchmark run should be retried."""
+  return (spec.status == benchmark_status.FAILED and
+          spec.failed_substatus in _RETRY_SUBSTATUSES.value)
+
+
+def RunBenchmarkTask(
+    spec: bm_spec.BenchmarkSpec
+) -> Tuple[Sequence[bm_spec.BenchmarkSpec], List[sample.SampleDict]]:
+  """Task that executes RunBenchmark.
+
+  This is designed to be used with RunParallelProcesses. Note that
+  for retries only the last run has its samples published.
+
+  Arguments:
+    spec: BenchmarkSpec. The spec to call RunBenchmark with.
+
+  Returns:
+    A BenchmarkSpec for each run iteration and a list of samples from the
+    last run.
+  """
+  # Many providers name resources using run_uris. When running multiple
+  # benchmarks in parallel, this causes name collisions on resources.
+  # By modifying the run_uri, we avoid the collisions.
+  if FLAGS.run_processes and FLAGS.run_processes > 1:
+    spec.config.flags['run_uri'] = FLAGS.run_uri + str(spec.sequence_number)
+    # Unset run_uri so the config value takes precedence.
+    FLAGS['run_uri'].present = 0
+
+  # Start Intel Contribution
+  collector = intel_publisher.IntelSampleCollector() if intel_publisher else publisher.SampleCollector()
+  # End Intel Contribution
+
+  zone_retry_manager = ZoneRetryManager()
+  # Set the run count.
+  max_run_count = 1 + _MAX_RETRIES.value
+
+  # Useful format string for debugging.
+  benchmark_info = (
+      f'{spec.sequence_number}/{spec.total_benchmarks} '
+      f'{spec.name} (UID: {spec.uid})'
+  )
+
+  result_specs = []
+  for current_run_count in range(max_run_count):
+    # Attempt to return the most recent results.
+    if _TEARDOWN_EVENT.is_set():
+      if result_specs and collector:
+        return result_specs, collector.samples
+      return [spec], []
+
+    run_start_msg = ('\n' + '-' * 85 + '\n' +
+                     'Starting benchmark %s attempt %s of %s' + '\n' + '-' * 85)
+    logging.info(run_start_msg, benchmark_info, current_run_count + 1,
+                 max_run_count)
+
+    # Start Intel Contribution
+    collector = intel_publisher.IntelSampleCollector() if intel_publisher else publisher.SampleCollector()
+    # End Intel Contribution
+
+    # Make a new copy of the benchmark_spec for each run since currently a
+    # benchmark spec isn't compatible with multiple runs. In particular, the
+    # benchmark_spec doesn't correctly allow for a provision of resources
+    # after tearing down.
+    spec_for_run = copy.deepcopy(spec)
+    result_specs.append(spec_for_run)
+    try:
+      RunBenchmark(spec_for_run, collector)
+      if intel_publisher:
+        # Added by Intel
+        collector.IntelPublishSamples()
+        # End added by Intel
+    except BaseException as e:  # pylint: disable=broad-except
+      logging.exception('Exception running benchmark')
+      msg = f'Benchmark {benchmark_info} failed.'
+      if isinstance(e, KeyboardInterrupt) or FLAGS.stop_after_benchmark_failure:
+        logging.error('%s Execution will not continue.', msg)
+        _TEARDOWN_EVENT.set()
+        break
+
+
+    # Don't retry on the last run.
+    if _ShouldRetry(spec_for_run) and current_run_count != max_run_count - 1:
+      logging.info(
+          'Benchmark should be retried. Waiting %s seconds before running.',
+          _RETRY_DELAY_SECONDS.value)
+      time.sleep(_RETRY_DELAY_SECONDS.value)
+
+      # Handle smart retries if specified.
+      zone_retry_manager.HandleSmartRetries(spec_for_run)
+
+    else:
+      logging.info(
+          'Benchmark should not be retried. '
+          'Finished %s runs of %s', current_run_count + 1, max_run_count)
+      break
+
+  # We need to return both the spec and samples so that we know
+  # the status of the test and can publish any samples that
+  # haven't yet been published.
+  return result_specs, collector.samples
+
+
+class ZoneRetryManager():
+  """Encapsulates state and functions for zone retries.
+
+  Attributes:
+    original_zone: If specified, the original zone provided to the benchmark.
+    zones_tried: Zones that have already been tried in previous runs.
+  """
+
+  def __init__(self):
+    self._CheckFlag()
+    if not _SMART_CAPACITY_RETRY.value and not _SMART_QUOTA_RETRY.value:
+      return
+    self._zones_tried: Set[str] = set()
+    self._regions_tried: Set[str] = set()
+    self._utils: types.ModuleType = providers.LoadProviderUtils(FLAGS.cloud)
+    self._SetOriginalZoneAndFlag()
+
+  def _GetCurrentZoneFlag(self):
+    return FLAGS[self._zone_flag].value[0]
+
+  def _CheckFlag(self) -> None:
+    for zone_flag in ['zone', 'zones']:
+      if FLAGS[zone_flag].value:
+        self._zone_flag = zone_flag
+        if self._GetCurrentZoneFlag() == _ANY_ZONE:
+          FLAGS['smart_capacity_retry'].parse(True)
+          FLAGS['smart_quota_retry'].parse(True)
+
+  def _SetOriginalZoneAndFlag(self) -> None:
+    """Records the flag name and zone value that the benchmark started with."""
+    # This is guaranteed to set values due to flag validator.
+    self._supported_zones = self._utils.GetZonesFromMachineType()
+    if self._GetCurrentZoneFlag() == _ANY_ZONE:
+      if _MAX_RETRIES.value < 1:
+        FLAGS['retries'].parse(len(self._supported_zones))
+      self._AssignNewZone()
+    self._original_zone = self._GetCurrentZoneFlag()
+    self._original_region = self._utils.GetRegionFromZone(self._original_zone)
+
+  def HandleSmartRetries(self, spec: bm_spec.BenchmarkSpec) -> None:
+    """Handles smart zone retry flags if provided."""
+    if (_SMART_QUOTA_RETRY.value and spec.failed_substatus
+        == benchmark_status.FailedSubstatus.QUOTA):
+      self._AssignZoneToNewRegion()
+    elif (_SMART_CAPACITY_RETRY.value and spec.failed_substatus in {
+        benchmark_status.FailedSubstatus.UNSUPPORTED,
+        benchmark_status.FailedSubstatus.INSUFFICIENT_CAPACITY
+    }):
+      self._AssignNewZone()
+
+  def _AssignZoneToNewRegion(self) -> None:
+    """Changes zone to be a new zone in the different region."""
+    region = self._utils.GetRegionFromZone(self._GetCurrentZoneFlag())
+    self._regions_tried.add(region)
+    regions_to_try = set(
+        self._utils.GetRegionFromZone(zone)
+        for zone in self._supported_zones) - self._regions_tried
+    # Restart from empty if we've exhausted all alternatives.
+    if not regions_to_try:
+      self._regions_tried.clear()
+      new_region = self._original_region
+    else:
+      new_region = secrets.choice(tuple(regions_to_try))
+    logging.info('Retry using new region %s', new_region)
+    self._ChooseAndSetNewZone(self._utils.GetZonesInRegion(new_region))
+
+  def _AssignNewZone(self) -> None:
+    """Changes zone to be a new zone."""
+    self._ChooseAndSetNewZone(self._supported_zones)
+
+  def _ChooseAndSetNewZone(self, possible_zones: Set[str]) -> None:
+    """Saves the current _zone_flag and sets it to a new zone.
+
+    Args:
+      possible_zones: The set of zones to choose from.
+    """
+    current_zone = self._GetCurrentZoneFlag()
+    if current_zone != _ANY_ZONE:
+      self._zones_tried.add(current_zone)
+    zones_to_try = possible_zones - self._zones_tried
+    # Restart from empty if we've exhausted all alternatives.
+    if not zones_to_try:
+      self._zones_tried.clear()
+      new_zone = self._original_zone
+    else:
+      new_zone = secrets.choice(tuple(zones_to_try))
+    logging.info('Retry using new zone %s', new_zone)
+    FLAGS[self._zone_flag].unparse()
+    FLAGS[self._zone_flag].parse([new_zone])
+
+
+def _LogCommandLineFlags():
+  result = []
+  for name in FLAGS:
+    flag = FLAGS[name]
+    if flag.present:
+      result.append(flag.serialize())
+  logging.info('Flag values:\n%s', '\n'.join(result))
+
+
+def SetUpPKB():
+  """Set globals and environment variables for PKB.
+
+  After SetUpPKB() returns, it should be possible to call PKB
+  functions, like benchmark_spec.Prepare() or benchmark_spec.Run().
+
+  SetUpPKB() also modifies the local file system by creating a temp
+  directory and storing new SSH keys.
+  """
+  try:
+    _InitializeRunUri()
+  except errors.Error as e:
+    logging.error(e)
+    sys.exit(1)
+
+  # Initialize logging.
+  vm_util.GenTempDir()
+  if FLAGS.use_pkb_logging:
+    log_util.ConfigureLogging(
+        stderr_log_level=log_util.LOG_LEVELS[FLAGS.log_level],
+        log_path=vm_util.PrependTempDir(LOG_FILE_NAME),
+        run_uri=FLAGS.run_uri,
+        file_log_level=log_util.LOG_LEVELS[FLAGS.file_log_level])
+  logging.info('PerfKitBenchmarker version: %s', version.VERSION)
+
+  # Translate deprecated flags and log all provided flag values.
+  disk.WarnAndTranslateDiskFlags()
+  _LogCommandLineFlags()
+
+  # Register skip pending runs functionality.
+  RegisterSkipPendingRunsCheck(_SkipPendingRunsFile)
+
+  # Check environment.
+  if not FLAGS.ignore_package_requirements:
+    requirements.CheckBasicRequirements()
+
+  for executable in REQUIRED_EXECUTABLES:
+    if not vm_util.ExecutableOnPath(executable):
+      raise errors.Setup.MissingExecutableError(
+          'Could not find required executable "%s"' % executable)
+
+  # Check mutually exclusive flags
+  if FLAGS.run_stage_iterations > 1 and FLAGS.run_stage_time > 0:
+    raise errors.Setup.InvalidFlagConfigurationError(
+        'Flags run_stage_iterations and run_stage_time are mutually exclusive')
+
+  vm_util.SSHKeyGen()
+  vm_util.InstallRsync()
+
+  if FLAGS.static_vm_file:
+    with open(FLAGS.static_vm_file) as fp:
+      static_virtual_machine.StaticVirtualMachine.ReadStaticVirtualMachineFile(
+          fp)
+
+  events.initialization_complete.send(parsed_flags=FLAGS)
+
+  benchmark_lookup.SetBenchmarkModuleFunction(benchmark_sets.BenchmarkModule)
+  package_lookup.SetPackageModuleFunction(benchmark_sets.PackageModule)
+
+  # Update max_concurrent_threads to use at least as many threads as VMs. This
+  # is important for the cluster_boot benchmark where we want to launch the VMs
+  # in parallel.
+  if not FLAGS.max_concurrent_threads:
+    FLAGS.max_concurrent_threads = max(
+        background_tasks.MAX_CONCURRENT_THREADS,
+        FLAGS.num_vms)
+    logging.info('Setting --max_concurrent_threads=%d.',
+                 FLAGS.max_concurrent_threads)
+
+
+def RunBenchmarkTasksInSeries(tasks):
+  """Runs benchmarks in series.
+
+  Arguments:
+    tasks: list of tuples of task: [(RunBenchmarkTask, (spec,), {}),]
+
+  Returns:
+    list of tuples of func results
+  """
+  return [func(*args, **kwargs) for func, args, kwargs in tasks]
+
+
+def RunBenchmarks():
+  """Runs all benchmarks in PerfKitBenchmarker.
+
+  Returns:
+    Exit status for the process.
+  """
+  benchmark_specs = _CreateBenchmarkSpecs()
+  if FLAGS.randomize_run_order:
+    random.shuffle(benchmark_specs)
+  if FLAGS.dry_run:
+    print('PKB will run with the following configurations:')
+    for spec in benchmark_specs:
+      print(spec)
+      print('')
+    return 0
+
+  benchmark_spec_lists = None
+  collector = publisher.SampleCollector()
+  try:
+    tasks = [(RunBenchmarkTask, (spec,), {})
+             for spec in benchmark_specs]
+    if FLAGS.run_processes is None:
+      spec_sample_tuples = RunBenchmarkTasksInSeries(tasks)
+    else:
+      spec_sample_tuples = background_tasks.RunParallelProcesses(
+          tasks, FLAGS.run_processes, FLAGS.run_processes_delay)
+    benchmark_spec_lists, sample_lists = list(zip(*spec_sample_tuples))
+    for sample_list in sample_lists:
+      collector.samples.extend(sample_list)
+
+  finally:
+    if collector.samples:
+      collector.PublishSamples()
+    # Use the last run in the series of runs.
+    if benchmark_spec_lists:
+      benchmark_specs = [spec_list[-1] for spec_list in benchmark_spec_lists]
+    if benchmark_specs:
+      logging.info(benchmark_status.CreateSummary(benchmark_specs))
+
+    logging.info('Complete logs can be found at: %s',
+                 vm_util.PrependTempDir(LOG_FILE_NAME))
+    logging.info('Completion statuses can be found at: %s',
+                 vm_util.PrependTempDir(COMPLETION_STATUS_FILE_NAME))
+
+  if stages.TEARDOWN not in FLAGS.run_stage:
+    logging.info(
+        'To run again with this setup, please use --run_uri=%s', FLAGS.run_uri)
+
+  if FLAGS.archive_bucket:
+    archive.ArchiveRun(vm_util.GetTempDir(), FLAGS.archive_bucket,
+                       gsutil_path=FLAGS.gsutil_path,
+                       prefix=FLAGS.run_uri + '_')
+
+  # Start Intel Contribution
+  if intel_publisher:
+    if intel_publisher.IntelSampleCollector.PublishEnabled() and FLAGS.intel_publisher_s3_archive_bucket_url:
+      patterns_to_ignore = []
+      # If emon is enabled and user selects not to publish EDP data, add it to the patterns_to_ignore list
+      if not FLAGS.edp_publish:
+        patterns_to_ignore.append(EMON_EDP_TARBALL)
+      intel_publisher.ArchiveToS3(vm_util.GetTempDir(), FLAGS.run_uri + '.zip', patterns_to_ignore)
+  # End Intel Contribution
+
+  # Write completion status file(s)
+  completion_status_file_name = (
+      vm_util.PrependTempDir(COMPLETION_STATUS_FILE_NAME))
+  with open(completion_status_file_name, 'w') as status_file:
+    _WriteCompletionStatusFile(benchmark_specs, status_file)
+  if FLAGS.completion_status_file:
+    with open(FLAGS.completion_status_file, 'w') as status_file:
+      _WriteCompletionStatusFile(benchmark_specs, status_file)
+
+  all_benchmarks_succeeded = all(spec.status == benchmark_status.SUCCEEDED
+                                 for spec in benchmark_specs)
+
+  return 0 if all_benchmarks_succeeded else 1
+
+
+def _GenerateBenchmarkDocumentation():
+  """Generates benchmark documentation to show in --help."""
+  benchmark_docs = []
+  for benchmark_module in (linux_benchmarks.BENCHMARKS +
+                           windows_benchmarks.BENCHMARKS):
+    benchmark_config = configs.LoadMinimalConfig(
+        benchmark_module.BENCHMARK_CONFIG, benchmark_module.BENCHMARK_NAME)
+    vm_groups = benchmark_config.get('vm_groups', {})
+    total_vm_count = 0
+    vm_str = ''
+    scratch_disk_str = ''
+    for group in six.itervalues(vm_groups):
+      group_vm_count = group.get('vm_count', 1)
+      if group_vm_count is None:
+        vm_str = 'variable'
+      else:
+        total_vm_count += group_vm_count
+      if group.get('disk_spec'):
+        scratch_disk_str = ' with scratch volume(s)'
+
+    name = benchmark_module.BENCHMARK_NAME
+    if benchmark_module in windows_benchmarks.BENCHMARKS:
+      name += ' (Windows)'
+    benchmark_docs.append('%s: %s (%s VMs%s)' %
+                          (name,
+                           benchmark_config['description'],
+                           vm_str or total_vm_count,
+                           scratch_disk_str))
+  return '\n\t'.join(benchmark_docs)
+
+
+def _CreateLscpuSamples(vms):
+  """Creates samples from linux VMs of lscpu output."""
+  samples = []
+  for vm in vms:
+    if vm.OS_TYPE in os_types.LINUX_OS_TYPES:
+      metadata = {'node_name': vm.name}
+      metadata.update(vm.CheckLsCpu().data)
+      samples.append(sample.Sample('lscpu', 0, '', metadata))
+  return samples
+
+
+def _CreateProcCpuSamples(vms):
+  """Creates samples from linux VMs of lscpu output."""
+  samples = []
+  for vm in vms:
+    if vm.OS_TYPE not in os_types.LINUX_OS_TYPES:
+      continue
+    data = vm.CheckProcCpu()
+    metadata = {'node_name': vm.name}
+    metadata.update(data.GetValues())
+    samples.append(sample.Sample('proccpu', 0, '', metadata))
+    metadata = {'node_name': vm.name}
+    for processor_id, raw_values in data.mappings.items():
+      values = ['%s=%s' % item for item in raw_values.items()]
+      metadata['proc_{}'.format(processor_id)] = ';'.join(sorted(values))
+    samples.append(sample.Sample('proccpu_mapping', 0, '', metadata))
+  return samples
+
+
+def _CreateCpuVulnerabilitySamples(vms) -> List[sample.Sample]:
+  """Returns samples of the VMs' CPU vulernabilites."""
+
+  def CreateSample(vm) -> Optional[sample.Sample]:
+    metadata = {'vm_name': vm.name}
+    metadata.update(vm.cpu_vulnerabilities.asdict)
+    return sample.Sample('cpu_vuln', 0, '', metadata)
+
+  linux_vms = [vm for vm in vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES]
+  return vm_util.RunThreaded(CreateSample, linux_vms)
+
+
+def _CreateGccSamples(vms):
+  """Creates samples from linux VMs of gcc version output."""
+
+  def _GetGccMetadata(vm):
+    return {
+        'name': vm.name,
+        'versiondump': build_tools.GetVersion(vm, 'gcc'),
+        'versioninfo': build_tools.GetVersionInfo(vm, 'gcc')
+    }
+
+  return [
+      sample.Sample('gcc_version', 0, '', metadata)
+      for metadata in vm_util.RunThreaded(_GetGccMetadata, vms)
+  ]
+
+
+def _CreateGlibcSamples(vms):
+  """Creates glibc samples from linux VMs of ldd output."""
+
+  def _GetGlibcVersionInfo(vm):
+    out, _ = vm.RemoteCommand('ldd --version', ignore_failure=True)
+    # return first line
+    return out.splitlines()[0] if out else None
+
+  def _GetGlibcMetadata(vm):
+    return {
+        'name': vm.name,
+        # TODO(user): Add glibc versiondump.
+        'versioninfo': _GetGlibcVersionInfo(vm)
+    }
+
+  return [
+      sample.Sample('glibc_version', 0, '', metadata)
+      for metadata in vm_util.RunThreaded(_GetGlibcMetadata, vms)
+  ]
+
+
+def _ParseMeminfo(meminfo_txt: str) -> Tuple[Dict[str, int], List[str]]:
+  """Returns the parsed /proc/meminfo data.
+
+  Response has entries such as {'MemTotal' : 32887056, 'Inactive': 4576524}. If
+  the /proc/meminfo entry has two values such as
+    MemTotal: 32887056 kB
+  checks that the last value is 'kB' If it is not then adds that line to the
+  2nd value in the tuple.
+
+  Args:
+    meminfo_txt: contents of /proc/meminfo
+
+  Returns:
+    Tuple where the first entry is a dict of the parsed keys and the second
+    are unparsed lines.
+  """
+  data: Dict[str, int] = {}
+  malformed: List[str] = []
+  for line in meminfo_txt.splitlines():
+    try:
+      key, full_value = re.split(r':\s+', line)
+      parts = full_value.split()
+      if len(parts) == 1 or (len(parts) == 2 and parts[1] == 'kB'):
+        data[key] = int(parts[0])
+      else:
+        malformed.append(line)
+    except ValueError:
+      # If the line does not match "key: value" or if the value is not an int
+      malformed.append(line)
+  return data, malformed
+
+
+@events.samples_created.connect
+def _CollectMeminfoHandler(sender: str, benchmark_spec: bm_spec.BenchmarkSpec,
+                           samples: List[sample.Sample]) -> None:
+  """Optionally creates /proc/meminfo samples.
+
+  If the flag --collect_meminfo is set appends a sample.Sample of /proc/meminfo
+  data for every VM in the run.
+
+  Parameter names cannot be changed as the method is called by events.send with
+  keyword arguments.
+
+  Args:
+    sender: Unused sender.
+    benchmark_spec: The benchmark spec.
+    samples: Generated samples that can be appended to.
+  """
+  del sender  # Unused as appending to samples with VMs from benchmark_spec
+  if not _COLLECT_MEMINFO.value:
+    return
+
+  def CollectMeminfo(vm):
+    txt, _ = vm.RemoteCommand('cat /proc/meminfo')
+    meminfo, malformed = _ParseMeminfo(txt)
+    meminfo.update({
+        'meminfo_keys': ','.join(sorted(meminfo)),
+        'meminfo_vmname': vm.name,
+        'meminfo_machine_type': vm.machine_type,
+        'meminfo_os_type': vm.OS_TYPE,
+    })
+    if malformed:
+      meminfo['meminfo_malformed'] = ','.join(sorted(malformed))
+    return sample.Sample('meminfo', 0, '', meminfo)
+
+  linux_vms = [
+      vm for vm in benchmark_spec.vms if vm.OS_TYPE in os_types.LINUX_OS_TYPES
+  ]
+
+  samples.extend(vm_util.RunThreaded(CollectMeminfo, linux_vms))
+
+
+def Main():
+  log_util.ConfigureBasicLogging()
+  _InjectBenchmarkInfoIntoDocumentation()
+  _ParseFlags()
+  if FLAGS.helpmatch:
+    _PrintHelp(FLAGS.helpmatch)
+    return 0
+  if FLAGS.helpmatchmd:
+    _PrintHelpMD(FLAGS.helpmatchmd)
+    return 0
+  CheckVersionFlag()
+  SetUpPKB()
+  return RunBenchmarks()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/placement_group.py b/script/cumulus/pkb/perfkitbenchmarker/placement_group.py
new file mode 100644
index 0000000..81257ce
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/placement_group.py
@@ -0,0 +1,121 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent Placement Group Object.
+
+Top-level Placement Group implementation.
+Cloud specific implementations of Placement Group needed.
+"""
+
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+
+FLAGS = flags.FLAGS
+
+PLACEMENT_GROUP_CLUSTER = 'cluster'
+PLACEMENT_GROUP_SUPERCLUSTER = 'supercluster'
+PLACEMENT_GROUP_SPREAD = 'spread'
+PLACEMENT_GROUP_NONE = 'none'
+PLACEMENT_GROUP_OPTIONS = frozenset([
+    PLACEMENT_GROUP_CLUSTER,
+    PLACEMENT_GROUP_SPREAD,
+    PLACEMENT_GROUP_NONE
+])
+
+# Default placement group style is specified by Cloud Specific Placement Group.
+flags.DEFINE_enum(
+    'placement_group_style', None,
+    list(PLACEMENT_GROUP_OPTIONS) + [PLACEMENT_GROUP_SUPERCLUSTER],
+    'The vm placement group option to use. Default set by cloud.')
+
+
+def GetPlacementGroupSpecClass(cloud):
+  """Returns the PlacementGroupSpec class corresponding to 'cloud'."""
+  return spec.GetSpecClass(BasePlacementGroupSpec, CLOUD=cloud)
+
+
+def GetPlacementGroupClass(cloud):
+  """Returns the PlacementGroup class corresponding to 'cloud'."""
+  return resource.GetResourceClass(BasePlacementGroup,
+                                   CLOUD=cloud)
+
+
+class BasePlacementGroupSpec(spec.BaseSpec):
+  """Storing various data about a placement group.
+
+  Attributes:
+    zone: The zone the in which the placement group will launch.
+  """
+
+  SPEC_TYPE = 'BasePlacementGroupSpec'
+  CLOUD = None
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super(BasePlacementGroupSpec, cls)._ApplyFlags(config_values, flag_values)
+    if FLAGS.placement_group_style:
+      config_values['placement_group_style'] = FLAGS.placement_group_style
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(BasePlacementGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({'zone': (option_decoders.StringDecoder, {'none_ok': True})})
+    return result
+
+
+class BasePlacementGroup(resource.BaseResource):
+  """Base class for Placement Groups.
+
+  This class holds Placement Group methods and attributes relating to the
+  Placement Groups as a cloud
+  resource.
+
+  Attributes:
+    zone: The zone the Placement Group was launched in.
+  """
+
+  RESOURCE_TYPE = 'BasePlacementGroup'
+
+  def __init__(self, placement_group_spec):
+    """Initialize BasePlacementGroup class.
+
+    Args:
+      placement_group_spec: placement_group.BasePlacementGroupSpec object of the
+        placement group.
+    """
+    super(BasePlacementGroup, self).__init__()
+    self.zone = placement_group_spec.zone
diff --git a/script/cumulus/pkb/perfkitbenchmarker/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/provider_info.py
new file mode 100644
index 0000000..44cd972
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/provider_info.py
@@ -0,0 +1,54 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing class for provider data.
+
+This contains the BaseProviderInfo class which is
+used for IsBenchmarkSupported
+
+"""
+
+import six
+
+
+_PROVIDER_INFO_REGISTRY = {}
+
+
+def GetProviderInfoClass(cloud):
+  """Returns the provider info class corresponding to the cloud."""
+  return _PROVIDER_INFO_REGISTRY.get(cloud, BaseProviderInfo)
+
+
+class AutoRegisterProviderInfoMeta(type):
+  """Metaclass which allows ProviderInfos to automatically be registered."""
+
+  def __init__(cls, name, bases, dct):
+    super(AutoRegisterProviderInfoMeta, cls).__init__(name, bases, dct)
+    if hasattr(cls, 'CLOUD') and cls.CLOUD is not None:
+      _PROVIDER_INFO_REGISTRY[cls.CLOUD] = cls
+
+
+class BaseProviderInfo(six.with_metaclass(AutoRegisterProviderInfoMeta)):
+  """Class that holds provider-related data."""
+
+  CLOUD = None
+
+  UNSUPPORTED_BENCHMARKS = []
+
+  @classmethod
+  def IsBenchmarkSupported(cls, benchmark):
+    if benchmark in cls.UNSUPPORTED_BENCHMARKS:
+      return False
+    else:
+      return True
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/__init__.py
new file mode 100644
index 0000000..58d0bf8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/__init__.py
@@ -0,0 +1,119 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Providers for PKB."""
+import importlib
+import logging
+import os
+import types
+
+from perfkitbenchmarker import events
+from perfkitbenchmarker import import_util
+from perfkitbenchmarker import requirements
+from perfkitbenchmarker.providers import aws
+from perfkitbenchmarker.providers import azure
+try:
+  from perfkitbenchmarker.providers import ibmcloud
+except:
+  pass
+
+GCP = 'GCP'
+AZURE = 'Azure'
+AWS = 'AWS'
+IBMCLOUD = 'IBMCloud'
+ALICLOUD = 'AliCloud'
+KUBERNETES = 'Kubernetes'
+DIGITALOCEAN = 'DigitalOcean'
+OPENSTACK = 'OpenStack'
+CLOUDSTACK = 'CloudStack'
+RACKSPACE = 'Rackspace'
+MESOS = 'Mesos'
+PROFITBRICKS = 'ProfitBricks'
+TENCENT = 'Tencent'
+DOCKER = 'Docker'
+YANDEX = "Yandex"
+
+VALID_CLOUDS = (GCP, AZURE, AWS, ALICLOUD, TENCENT)
+
+_imported_providers = set()
+
+
+def _GetProviderPackageName(cloud: str) -> str:
+  """Gets the name of the provider package that corresponds to the cloud."""
+  return cloud.lower()
+
+
+def LoadProviderFlags(providers):
+  """Imports just the flags module for each provider.
+
+  This allows PKB to load flag definitions from each provider to include in the
+  help text without actually loading any other provider-specific modules.
+
+  Args:
+    providers: series of strings. Each element is a value from VALID_CLOUDS
+        indicating a cloud provider for which to import the flags module.
+  """
+  for provider_name in providers:
+    normalized_name = _GetProviderPackageName(provider_name)
+    flags_module_name = '.'.join((__name__, normalized_name, 'flags'))
+    importlib.import_module(flags_module_name)
+
+
+# Import flag definitions for all cloud providers.
+LoadProviderFlags(VALID_CLOUDS)
+
+
+def LoadProviderUtils(cloud: str) -> types.ModuleType:
+  util_module_name = '.'.join(
+      (__name__, _GetProviderPackageName(cloud), 'util'))
+  return importlib.import_module(util_module_name)
+
+
+def LoadProvider(provider_name, ignore_package_requirements=True):
+  """Loads the all modules in the 'provider_name' package.
+
+  This function first checks the specified provider's Python package
+  requirements file, if one exists, and verifies that all requirements are met.
+  Next, it loads all modules in the specified provider's package. By loading
+  these modules, relevant classes (e.g. VMs) will register themselves.
+
+  Args:
+    provider_name: string chosen from VALID_CLOUDS. The name of the provider
+        whose modules should be loaded.
+    ignore_package_requirements: boolean. If True, the provider's Python package
+        requirements file is ignored.
+  """
+  if provider_name in _imported_providers:
+    return
+
+  # Check package requirements from the provider's pip requirements file.
+  normalized_name = _GetProviderPackageName(provider_name)
+  if not ignore_package_requirements:
+    requirements.CheckProviderRequirements(normalized_name)
+
+  # Load all modules in the provider's directory. Simply loading those modules
+  # will cause relevant classes (e.g. VM and disk classes) to register
+  # themselves so that they can be instantiated during resource provisioning.
+  provider_package_path = os.path.join(__path__[0], normalized_name)
+  try:
+    modules = tuple(import_util.LoadModulesForPath(
+        [provider_package_path], __name__ + '.' + normalized_name))
+    if not modules:
+      raise ImportError('No modules found for provider %s.' % provider_name)
+  except Exception:
+    logging.error('Unable to load provider %s.', provider_name)
+    raise
+
+  # Signal that the provider's modules have been imported.
+  _imported_providers.add(provider_name)
+  events.provider_imported.send(provider_name)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/__init__.py
new file mode 100644
index 0000000..d90275b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_disk.py
new file mode 100644
index 0000000..692d7af
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_disk.py
@@ -0,0 +1,145 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes related to AliCloud disks.
+"""
+
+import json
+import logging
+import string
+import threading
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.alicloud import util
+
+FLAGS = flags.FLAGS
+
+DISK_TYPE = {
+    disk.STANDARD: 'cloud',
+    disk.REMOTE_SSD: 'cloud_ssd',
+    disk.PIOPS: 'cloud_efficiency',
+    disk.LOCAL: 'ephemeral_ssd',
+    disk.REMOTE_ESSD: 'cloud_essd',
+}
+
+
+class AliDisk(disk.BaseDisk):
+  """Object representing an AliCloud Disk."""
+
+  _lock = threading.Lock()
+  vm_devices = {}
+
+  def __init__(self, disk_spec, zone):
+    super(AliDisk, self).__init__(disk_spec)
+    self.id = None
+    self.zone = zone
+    self.region = util.GetRegionByZone(self.zone)
+    self.attached_vm_id = None
+
+  def _Create(self):
+    """Creates the disk."""
+    create_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'CreateDisk',
+        '--RegionId %s' % self.region,
+        '--ZoneId %s' % self.zone,
+        '--Size %s' % self.disk_size,
+        '--DiskCategory %s' % DISK_TYPE[self.disk_type]]
+    if self.disk_type == disk.REMOTE_ESSD:
+        create_cmd.append('--PerformanceLevel %s' % FLAGS.ali_essd_performance_level)
+
+    if FLAGS.ali_resource_group_id:
+        create_cmd.append('--ResourceGroupId %s' % FLAGS.ali_resource_group_id)
+
+    create_cmd = util.GetEncodedCmd(create_cmd)
+    stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+    response = json.loads(stdout)
+    self.id = response['DiskId']
+
+  def _Delete(self):
+    """Deletes the disk."""
+    delete_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DeleteDisk',
+        '--DiskId %s' % self.id]
+    logging.info('Deleting AliCloud disk %s. This may fail if the disk is not '
+                 'yet detached, but will be retried.', self.id)
+    delete_cmd = util.GetEncodedCmd(delete_cmd)
+    vm_util.IssueRetryableCommand(delete_cmd)
+
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+    Args:
+      vm: The AliVirtualMachine instance to which the disk will be attached.
+    """
+    with self._lock:
+      self.attached_vm_id = vm.id
+      if self.attached_vm_id not in AliDisk.vm_devices:
+        AliDisk.vm_devices[self.attached_vm_id] = set(
+            string.ascii_lowercase[1:])
+      self.device_letter = min(AliDisk.vm_devices[self.attached_vm_id])
+      AliDisk.vm_devices[self.attached_vm_id].remove(self.device_letter)
+
+    attach_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'AttachDisk',
+        '--InstanceId %s' % self.attached_vm_id,
+        '--DiskId %s' % self.id,
+        '--Device %s' % self.GetVirtualDevicePath()]
+    attach_cmd = util.GetEncodedCmd(attach_cmd)
+    vm_util.IssueRetryableCommand(attach_cmd)
+
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    detach_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DetachDisk',
+        '--InstanceId %s' % self.attached_vm_id,
+        '--DiskId %s' % self.id]
+    detach_cmd = util.GetEncodedCmd(detach_cmd)
+    vm_util.IssueRetryableCommand(detach_cmd)
+
+    with self._lock:
+      assert self.attached_vm_id in AliDisk.vm_devices
+      AliDisk.vm_devices[self.attached_vm_id].add(self.device_letter)
+      self.attached_vm_id = None
+      self.device_letter = None
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside the VM."""
+    return '/dev/vd%s' % self.device_letter
+
+  def GetVirtualDevicePath(self):
+    """Returns the path to the device visible to console users."""
+    return '/dev/xvd%s' % self.device_letter
+
+  @vm_util.Retry(poll_interval=5, max_retries=30, log_errors=False)
+  def WaitForDiskStatus(self, status_list):
+    """Waits until disk is attach to the instance"""
+    logging.info('Waits until the disk\'s status is one of statuses: %s',
+                 status_list)
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeDisks',
+        '--RegionId %s' % self.region,
+        '--ZoneId %s' % self.zone,
+        '--DiskIds \'["%s"]\'' % self.id]
+    attach_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(attach_cmd)
+    response = json.loads(stdout)
+    disk = response['Disks']['Disk']
+    assert len(disk) == 1
+    status = disk[0]['Status']
+    assert status in status_list
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_network.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_network.py
new file mode 100644
index 0000000..79a9db3
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_network.py
@@ -0,0 +1,337 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to AliCloud VM networking.
+
+The Firewall class provides a way of opening VM ports. The Network class allows
+VMs to communicate via internal ips and isolates PerfKitBenchmarker VMs from
+others in the
+same project. See https://developers.google.com/compute/docs/networking for
+more information about AliCloud VM networking.
+"""
+
+import json
+import logging
+import threading
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import network
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.alicloud import util
+from six.moves import range
+
+FLAGS = flags.FLAGS
+MAX_NAME_LENGTH = 128
+
+
+class AliVpc(resource.BaseResource):
+  """An object representing an AliCloud VPC."""
+
+  def __init__(self, name, region):
+    super(AliVpc, self).__init__()
+    self.region = region
+    self.id = None
+    self.name = name
+
+  def _Create(self):
+    """Creates the VPC."""
+    create_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'CreateVpc',
+        '--VpcName %s' % self.name,
+        '--RegionId %s' % self.region,
+        '--CidrBlock 10.0.0.0/16']
+    create_cmd = util.GetEncodedCmd(create_cmd)
+    stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+    response = json.loads(stdout)
+    self.id = response['VpcId']
+
+  def _Exists(self):
+    """Returns true if the VPC exists."""
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeVpcs',
+        '--RegionId %s' % self.region,
+        '--VpcId %s' % self.id]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    vpcs = response['Vpcs']['Vpc']
+    assert len(vpcs) < 2, 'Too many VPCs.'
+    return len(vpcs) > 0
+
+  @vm_util.Retry(poll_interval=5, max_retries=30, log_errors=False)
+  def _WaitForVpcStatus(self, status_list):
+    """Waits until VPC's status is in status_list"""
+    logging.info('Waits until the status of VPC is in status_list: %s',
+                 status_list)
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeVpcs',
+        '--RegionId %s' % self.region,
+        '--VpcId %s' % self.id]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    vpcs = response['Vpcs']['Vpc']
+    assert len(vpcs) == 1
+    vpc_status = response['Vpcs']['Vpc'][0]['Status']
+    assert vpc_status in status_list
+
+  def _Delete(self):
+    """Delete's the VPC."""
+    delete_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DeleteVpc',
+        '--RegionId %s' % self.region,
+        '--VpcId %s' % self.id]
+    delete_cmd = util.GetEncodedCmd(delete_cmd)
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+
+class AliVSwitch(resource.BaseResource):
+  """An object representing an AliCloud VSwitch."""
+
+  def __init__(self, name, zone, vpc_id):
+    super(AliVSwitch, self).__init__()
+    self.region = util.GetRegionByZone(zone)
+    self.id = None
+    self.vpc_id = vpc_id
+    self.zone = zone
+    self.name = name
+
+  def _Create(self):
+    """Creates the VSwitch."""
+    create_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'CreateVSwitch',
+        '--VSwitchName %s' % self.name,
+        '--ZoneId %s' % self.zone,
+        '--RegionId %s' % self.region,
+        '--CidrBlock 10.0.0.0/24',
+        '--VpcId %s' % self.vpc_id,
+    ]
+    create_cmd = util.GetEncodedCmd(create_cmd)
+    stdout, _, _ = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+    response = json.loads(stdout)
+    self.id = response['VSwitchId']
+
+  def _Delete(self):
+    """Deletes the VSwitch."""
+    delete_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DeleteVSwitch',
+        '--RegionId %s' % self.region,
+        '--VSwitchId %s' % self.id]
+    delete_cmd = util.GetEncodedCmd(delete_cmd)
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the VSwitch exists."""
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeVSwitches',
+        '--RegionId %s' % self.region,
+        '--VpcId %s' % self.vpc_id,
+        '--ZoneId %s' % self.zone]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    vswitches = response['VSwitches']['VSwitch']
+    assert len(vswitches) < 2, 'Too many VSwitches.'
+    return len(vswitches) > 0
+
+
+class AliSecurityGroup(resource.BaseResource):
+  """Object representing an AliCloud Security Group."""
+
+  def __init__(self, name, region, use_vpc=True, vpc_id=None):
+    super(AliSecurityGroup, self).__init__()
+    self.name = name
+    self.region = region
+    self.use_vpc = use_vpc
+    self.vpc_id = vpc_id
+
+  def _Create(self):
+    """Creates the security group."""
+    create_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'CreateSecurityGroup',
+        '--SecurityGroupName %s' % self.name,
+        '--RegionId %s' % self.region]
+    if self.use_vpc:
+      create_cmd.append('--VpcId %s' % self.vpc_id)
+    create_cmd = util.GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    self.group_id = json.loads(stdout)['SecurityGroupId']
+
+  def _Delete(self):
+    """Deletes the security group."""
+    delete_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DeleteSecurityGroup',
+        '--RegionId %s' % self.region,
+        '--SecurityGroupId %s' % self.group_id]
+    delete_cmd = util.GetEncodedCmd(delete_cmd)
+    vm_util.IssueRetryableCommand(delete_cmd)
+
+  def _Exists(self):
+    """Returns true if the security group exists."""
+    show_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeSecurityGroups',
+        '--RegionId %s' % self.region,
+        '--SecurityGroupId %s' % self.group_id]
+    show_cmd = util.GetEncodedCmd(show_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(show_cmd)
+    response = json.loads(stdout)
+    securityGroups = response['SecurityGroups']['SecurityGroup']
+    assert len(securityGroups) < 2, 'Too many securityGroups.'
+    if not securityGroups:
+      return False
+    return True
+
+
+class AliFirewall(network.BaseFirewall):
+  """An object representing the AliCloud Firewall."""
+
+  CLOUD = providers.ALICLOUD
+
+  def __init__(self):
+    self.firewall_set = set()
+    self._lock = threading.Lock()
+
+  def AllowIcmp(self, vm):
+    """Opens the ICMP protocol on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the ICMP protocol for.
+    """
+    if vm.is_static:
+      return
+    with self._lock:
+      authorize_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'AuthorizeSecurityGroup',
+          '--IpProtocol ICMP',
+          '--PortRange -1/-1',
+          '--SourceCidrIp 0.0.0.0/0',
+          '--RegionId %s' % vm.region,
+          '--SecurityGroupId %s' % vm.group_id]
+      if FLAGS.ali_use_vpc:
+        authorize_cmd.append('--NicType intranet')
+      authorize_cmd = util.GetEncodedCmd(authorize_cmd)
+      vm_util.IssueRetryableCommand(authorize_cmd)
+
+  def AllowPort(self, vm, start_port, end_port=None, source_range=None):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      start_port: The first local port in a range of ports to open.
+      end_port: The last port in a range of ports to open. If None, only
+        start_port will be opened.
+      source_range: unsupported at present.
+    """
+
+    if not end_port:
+      end_port = start_port
+
+    for port in range(start_port, end_port + 1):
+      self._AllowPort(vm, port)
+
+  def _AllowPort(self, vm, port):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      port: The local port to open.
+    """
+    if vm.is_static:
+      return
+    entry = (port, vm.group_id)
+    if entry in self.firewall_set:
+      return
+    with self._lock:
+      if entry in self.firewall_set:
+        return
+      for protocol in ('tcp', 'udp'):
+        authorize_cmd = util.ALI_PREFIX + [
+            'ecs',
+            'AuthorizeSecurityGroup',
+            '--IpProtocol %s' % protocol,
+            '--PortRange %s/%s' % (port, port),
+            '--SourceCidrIp 0.0.0.0/0',
+            '--RegionId %s' % vm.region,
+            '--SecurityGroupId %s' % vm.group_id]
+        if FLAGS.ali_use_vpc:
+          authorize_cmd.append('--NicType intranet')
+        authorize_cmd = util.GetEncodedCmd(authorize_cmd)
+        vm_util.IssueRetryableCommand(authorize_cmd)
+      self.firewall_set.add(entry)
+
+  def DisallowAllPorts(self):
+    """Closes all ports on the firewall."""
+    pass
+
+
+class AliNetwork(network.BaseNetwork):
+  """Object representing a AliCloud Network."""
+
+  CLOUD = providers.ALICLOUD
+
+  def __init__(self, spec):
+    super(AliNetwork, self).__init__(spec)
+    self.name = (
+        'perfkit-%s-%s' % (FLAGS.run_uri, str(uuid.uuid4())[-12:]))
+    self.region = util.GetRegionByZone(spec.zone)
+    self.use_vpc = FLAGS.ali_use_vpc
+    if self.use_vpc:
+      self.vpc = AliVpc(self.name, self.region)
+      self.vswitch = None
+      self.security_group = None
+    else:
+      self.security_group = \
+          AliSecurityGroup(self.name, self.region, use_vpc=False)
+
+  @vm_util.Retry()
+  def Create(self):
+    """Creates the network."""
+    if self.use_vpc:
+      self.vpc.Create()
+      self.vpc._WaitForVpcStatus(['Available'])
+      if self.vswitch is None:
+        self.vswitch = AliVSwitch(self.name, self.zone, self.vpc.id)
+      self.vswitch.Create()
+
+      if self.security_group is None:
+        self.security_group = AliSecurityGroup(self.name,
+                                               self.region,
+                                               use_vpc=True,
+                                               vpc_id=self.vpc.id)
+      self.security_group.Create()
+    else:
+      self.security_group.Create()
+
+  def Delete(self):
+    """Deletes the network."""
+    if self.use_vpc:
+      self.security_group.Delete()
+      self.vswitch.Delete()
+      self.security_group.Delete()
+      self.vpc.Delete()
+    else:
+      self.security_group.Delete()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_virtual_machine.py
new file mode 100644
index 0000000..e7120ef
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/ali_virtual_machine.py
@@ -0,0 +1,458 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent an Ali Virtual Machine object.
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+import base64
+import json
+import logging
+import threading
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.alicloud import ali_disk
+from perfkitbenchmarker.providers.alicloud import ali_network
+from perfkitbenchmarker.providers.alicloud import util
+import six
+
+FLAGS = flags.FLAGS
+NON_HVM_PREFIXES = ['t1', 's1', 's2', 's3', 'm1']
+
+DRIVE_START_LETTER = 'b'
+DEFAULT_DISK_SIZE = 500
+INSTANCE = 'instance'
+IMAGE = 'image'
+SNAPSHOT = 'snapshot'
+DISK = 'disk'
+NONE = 'none'
+IO_OPTIMIZED = 'io_optimized'
+RESOURCE_TYPE = {
+    INSTANCE: 'instance',
+    IMAGE: 'image',
+    SNAPSHOT: 'snapshot',
+    DISK: 'disk',
+}
+SSH_PORT = 22
+
+
+NUM_LOCAL_VOLUMES = {
+    'ecs.t1.small': 4,
+    'ecs.s1.small': 4,
+    'ecs.s1.medium': 4,
+    'ecs.s2.small': 4,
+    'ecs.s2.large': 4,
+    'ecs.s2.xlarge': 4,
+    'ecs.s3.medium': 4,
+    'ecs.s3.large': 4,
+    'ecs.m1.medium': 4,
+}
+INSTANCE_EXISTS_STATUSES = frozenset(
+    ['Starting', 'Running', 'Stopping', 'Stopped'])
+INSTANCE_DELETED_STATUSES = frozenset([])
+INSTANCE_KNOWN_STATUSES = INSTANCE_EXISTS_STATUSES | INSTANCE_DELETED_STATUSES
+
+
+class AliVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing an AliCloud Virtual Machine."""
+
+  CLOUD = providers.ALICLOUD
+  DEFAULT_ZONE = 'cn-hangzhou-d'
+  DEFAULT_MACHINE_TYPE = 'ecs.s3.large'
+
+  _lock = threading.Lock()
+  imported_keyfile_set = set()
+  deleted_keyfile_set = set()
+
+  def __init__(self, vm_spec):
+    """Initialize a AliCloud virtual machine.
+    Args:
+      vm_spec: virtual_machine.BaseVirtualMachineSpec object of the VM.
+    """
+    super(AliVirtualMachine, self).__init__(vm_spec)
+    self.image = FLAGS.image
+    self.user_name = FLAGS.ali_user_name
+    self.key_pair_name = None
+    self.region = util.GetRegionByZone(self.zone)
+    self.bandwidth_in = FLAGS.ali_bandwidth_in
+    self.bandwidth_out = FLAGS.ali_bandwidth_out
+    self.scratch_disk_size = FLAGS.scratch_disk_size or DEFAULT_DISK_SIZE
+    self.system_disk_type = FLAGS.ali_system_disk_type
+    self.system_disk_size = FLAGS.ali_system_disk_size
+    self.eip_address_bandwidth = FLAGS.ali_eip_address_bandwidth
+    self.network = ali_network.AliNetwork.GetNetwork(self)
+    self.firewall = ali_network.AliFirewall.GetFirewall()
+
+  @vm_util.Retry(poll_interval=1, log_errors=False)
+  def _WaitForInstanceStatus(self, status_list):
+    """Waits until the instance's status is in status_list."""
+    logging.info('Waits until the instance\'s status is one of statuses: %s',
+                 status_list)
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeInstances',
+        '--RegionId %s' % self.region,
+        '--InstanceIds \'["%s"]\'' % self.id]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    instances = response['Instances']['Instance']
+    assert len(instances) == 1
+    status = instances[0]['Status']
+    assert status in status_list
+
+  @vm_util.Retry(poll_interval=5, max_retries=30, log_errors=False)
+  def _WaitForEipStatus(self, status_list):
+    """Waits until the instance's status is in status_list."""
+    logging.info('Waits until the eip\'s status is one of statuses: %s',
+                 status_list)
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeEipAddresses',
+        '--RegionId %s' % self.region,
+        '--AllocationId %s' % self.eip_id]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    EipAddresses = response['EipAddresses']['EipAddress']
+    assert len(EipAddresses) == 1
+    status = EipAddresses[0]['Status']
+    assert status in status_list
+
+  def _AllocatePubIp(self, region, instance_id):
+    """Allocate a public ip address and associate it to the instance."""
+    if FLAGS.ali_use_vpc:
+      allocatip_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'AllocateEipAddress',
+          '--RegionId %s' % region,
+          '--InternetChargeType PayByTraffic',
+          '--Bandwidth %s' % self.eip_address_bandwidth]
+      allocatip_cmd = util.GetEncodedCmd(allocatip_cmd)
+      stdout, _ = vm_util.IssueRetryableCommand(allocatip_cmd)
+      response = json.loads(stdout)
+      self.ip_address = response['EipAddress']
+      self.eip_id = response['AllocationId']
+
+      self._WaitForInstanceStatus(['Stopped', 'Running'])
+
+      associate_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'AssociateEipAddress',
+          '--RegionId %s' % region,
+          '--AllocationId  %s' % self.eip_id,
+          '--InstanceId %s' % instance_id,
+          '--InstanceType EcsInstance']
+      associate_cmd = util.GetEncodedCmd(associate_cmd)
+      vm_util.IssueRetryableCommand(associate_cmd)
+
+    else:
+      allocatip_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'AllocatePublicIpAddress',
+          '--RegionId %s' % region,
+          '--InstanceId %s' % instance_id]
+      allocatip_cmd = util.GetEncodedCmd(allocatip_cmd)
+      stdout, _ = vm_util.IssueRetryableCommand(allocatip_cmd)
+      response = json.loads(stdout)
+      self.ip_address = response['IpAddress']
+
+  @classmethod
+  def _GetDefaultImage(cls, region):
+    """Returns the default image given the machine type and region.
+    If no default is configured, this will return None.
+    """
+    if cls.IMAGE_NAME_FILTER is None:
+      return None
+
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeImages',
+        '--RegionId %s' % region,
+        '--ImageName \'%s\'' % cls.IMAGE_NAME_FILTER]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+
+    if not stdout:
+      return None
+
+    images = json.loads(stdout)['Images']['Image']
+    # We want to return the latest version of the image, and since the wildcard
+    # portion of the image name is the image's creation date, we can just take
+    # the image with the 'largest' name.
+    return max(images, key=lambda image: image['ImageName'])['ImageId']
+
+  @vm_util.Retry()
+  def _PostCreate(self):
+    """Get the instance's data and tag it."""
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeInstances',
+        '--RegionId %s' % self.region,
+        '--InstanceIds \'["%s"]\'' % self.id]
+    logging.info('Getting instance %s public IP. This will fail until '
+                 'a public IP is available, but will be retried.', self.id)
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    instance = response['Instances']['Instance'][0]
+    if self.network.use_vpc:
+      pub_ip_address = instance['EipAddress']['IpAddress']
+      self.internal_ip = \
+          instance['VpcAttributes']['PrivateIpAddress']['IpAddress'][0]
+    else:
+      pub_ip_address = instance['PublicIpAddress']['IpAddress'][0]
+      self.internal_ip = instance['InnerIpAddress']['IpAddress'][0]
+    assert self.ip_address == pub_ip_address
+    self.group_id = instance['SecurityGroupIds']['SecurityGroupId'][0]
+
+    self._WaitForInstanceStatus(['Running'])
+
+    self.firewall.AllowPort(self, SSH_PORT)
+    tags = {}
+    tags.update(self.vm_metadata)
+    util.AddTags(self.id, RESOURCE_TYPE[INSTANCE], self.region, **tags)
+    util.AddDefaultTags(self.id, RESOURCE_TYPE[INSTANCE], self.region)
+
+  def _CreateDependencies(self):
+    """Create VM dependencies."""
+    self.key_pair_name = AliCloudKeyFileManager.ImportKeyfile(self.region)
+
+  def _DeleteDependencies(self):
+    """Delete VM dependencies."""
+    if self.key_pair_name:
+      AliCloudKeyFileManager.DeleteKeyfile(self.region, self.key_pair_name)
+
+  def _Create(self):
+    """Create a VM instance."""
+
+    if self.image is None:
+      # This is here and not in the __init__ method bceauese _GetDefaultImage
+      # does a nontrivial amount of work (it calls the aliyuncli).
+      self.image = self._GetDefaultImage(self.region)
+
+    create_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'CreateInstance',
+        '--InstanceName perfkit-%s' % FLAGS.run_uri,
+        '--RegionId %s' % self.region,
+        '--ZoneId %s' % self.zone,
+        '--ImageId %s' % self.image,
+        '--InstanceType %s' % self.machine_type,
+        '--SecurityGroupId %s' % self.network.security_group.group_id,
+        '--KeyPairName %s' % self.key_pair_name,
+        '--SystemDisk.Category %s' % self.system_disk_type,
+        '--SystemDisk.Size %s' % self.system_disk_size]
+
+    if FLAGS.scratch_disk_type == disk.LOCAL:
+      disk_cmd = [
+          '--DataDisk1Category ephemeral_ssd',
+          '--DataDisk1Size %s' % self.scratch_disk_size,
+          '--DataDisk1Device %s%s' % (util.GetDrivePathPrefix(),
+                                      DRIVE_START_LETTER)]
+      create_cmd.extend(disk_cmd)
+
+    if FLAGS.ali_io_optimized is not None:
+      create_cmd.extend(['--IoOptimized optimized'])
+
+    if FLAGS.ali_use_vpc:
+      create_cmd.extend(['--VSwitchId %s' % self.network.vswitch.id])
+    else:
+      create_cmd.extend([
+          '--InternetChargeType PayByTraffic',
+          '--InternetMaxBandwidthIn %s' % self.bandwidth_in,
+          '--InternetMaxBandwidthOut %s' % self.bandwidth_out])
+
+    # Create user and add SSH key
+    public_key = AliCloudKeyFileManager.GetPublicKey()
+    user_data = util.ADD_USER_TEMPLATE.format(user_name=self.user_name,
+                                              public_key=public_key)
+    logging.debug('encoding startup script: %s', user_data)
+    create_cmd.extend(['--UserData', six.ensure_str(
+        base64.b64encode(user_data.encode('utf-8')))])
+
+    if FLAGS.ali_resource_group_id:
+      create_cmd.extend(['--ResourceGroupId %s' % FLAGS.ali_resource_group_id])
+
+    create_cmd = util.GetEncodedCmd(create_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    response = json.loads(stdout)
+    self.id = response['InstanceId']
+
+    self._AllocatePubIp(self.region, self.id)
+
+    start_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'StartInstance',
+        '--InstanceId %s' % self.id]
+    start_cmd = util.GetEncodedCmd(start_cmd)
+    vm_util.IssueRetryableCommand(start_cmd)
+
+  def _Delete(self):
+    """Delete a VM instance."""
+    stop_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'StopInstance',
+        '--InstanceId %s' % self.id]
+    stop_cmd = util.GetEncodedCmd(stop_cmd)
+    vm_util.IssueRetryableCommand(stop_cmd)
+
+    self._WaitForInstanceStatus(['Stopped'])
+
+    delete_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DeleteInstance',
+        '--InstanceId %s' % self.id]
+    delete_cmd = util.GetEncodedCmd(delete_cmd)
+    vm_util.IssueRetryableCommand(delete_cmd)
+
+    if FLAGS.ali_use_vpc:
+      self._WaitForEipStatus(['Available'])
+      release_eip_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'ReleaseEipAddress',
+          '--RegionId %s' % self.region,
+          '--AllocationId %s' % self.eip_id]
+      release_eip_cmd = util.GetEncodedCmd(release_eip_cmd)
+      vm_util.IssueRetryableCommand(release_eip_cmd)
+
+  def _Exists(self):
+    """Returns true if the VM exists."""
+    describe_cmd = util.ALI_PREFIX + [
+        'ecs',
+        'DescribeInstances',
+        '--RegionId %s' % self.region,
+        '--InstanceIds \'["%s"]\'' % str(self.id)]
+    describe_cmd = util.GetEncodedCmd(describe_cmd)
+    stdout, _ = vm_util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    instances = response['Instances']['Instance']
+    assert len(instances) < 2, 'Too many instances.'
+    if not instances:
+      return False
+    assert len(instances) == 1, 'Wrong number of instances.'
+    status = instances[0]['Status']
+    assert status in INSTANCE_KNOWN_STATUSES, status
+    return status in INSTANCE_EXISTS_STATUSES
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+    """
+    data_disk = ali_disk.AliDisk(disk_spec, self.zone)
+    self.scratch_disks.append(data_disk)
+
+    if disk_spec.disk_type != disk.LOCAL:
+      data_disk.Create()
+      data_disk.Attach(self)
+      data_disk.WaitForDiskStatus(['In_use'])
+    else:
+      data_disk.device_letter = DRIVE_START_LETTER
+
+    self.FormatDisk(data_disk.GetDevicePath(), disk_spec.disk_type)
+    self.MountDisk(data_disk.GetDevicePath(), disk_spec.mount_point,
+                   disk_spec.disk_type, data_disk.mount_options,
+                   data_disk.fstab_options)
+
+  def AddMetadata(self, **kwargs):
+    """Adds metadata to the VM."""
+    util.AddTags(self.id, RESOURCE_TYPE[INSTANCE], self.region, **kwargs)
+
+
+class AliCloudKeyFileManager(object):
+  """Object for managing AliCloud Keyfiles."""
+  _lock = threading.Lock()
+  imported_keyfile_set = set()
+  deleted_keyfile_set = set()
+  run_uri_key_names = {}
+
+  @classmethod
+  def ImportKeyfile(cls, region):
+    """Imports the public keyfile to AliCloud."""
+    with cls._lock:
+      if FLAGS.run_uri in cls.run_uri_key_names:
+        return cls.run_uri_key_names[FLAGS.run_uri]
+      public_key = cls.GetPublicKey()
+      key_name = cls.GetKeyNameForRun()
+      import_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'ImportKeyPair',
+          '--RegionId', region,
+          '--KeyPairName', key_name,
+          '--PublicKeyBody', json.dumps(public_key)]
+      vm_util.IssueRetryableCommand(import_cmd)
+      cls.run_uri_key_names[FLAGS.run_uri] = key_name
+      return key_name
+
+  @classmethod
+  def DeleteKeyfile(cls, region, key_name):
+    """Deletes the imported KeyPair for a run_uri."""
+    with cls._lock:
+      if FLAGS.run_uri not in cls.run_uri_key_names:
+        return
+      delete_cmd = util.ALI_PREFIX + [
+          'ecs',
+          'DeleteKeyPairs',
+          '--RegionId', region,
+          '--KeyPairNames', json.dumps([key_name])]
+      vm_util.IssueRetryableCommand(delete_cmd)
+      del cls.run_uri_key_names[FLAGS.run_uri]
+
+  @classmethod
+  def GetKeyNameForRun(cls):
+    return 'perfkit_key_{0}'.format(FLAGS.run_uri)
+
+  @classmethod
+  def GetPublicKey(cls):
+    cat_cmd = ['cat',
+               vm_util.GetPublicKeyPath()]
+    keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd)
+    return keyfile.strip()
+
+
+class Ubuntu1604BasedAliVirtualMachine(AliVirtualMachine,
+                                       linux_virtual_machine.Ubuntu1604Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu_16_04_64*alibase*.vhd'
+
+
+class Ubuntu1804BasedAliVirtualMachine(AliVirtualMachine,
+                                       linux_virtual_machine.Ubuntu1804Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu_18_04_x64*alibase*.vhd'
+
+
+class Ubuntu2004BasedAliVirtualMachine(AliVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2004Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu_20_04_x64*alibase*.vhd'
+
+
+# TODO to be verified
+class Ubuntu2204BasedAliVirtualMachine(AliVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2204Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu_22_04_x64*alibase*.vhd'
+
+
+class CentOs7BasedAliVirtualMachine(AliVirtualMachine,
+                                    linux_virtual_machine.CentOs7Mixin):
+  IMAGE_NAME_FILTER = 'centos_7_09_x64*alibase*.vhd'
+
+
+class CentOs8BasedAliVirtualMachine(AliVirtualMachine,
+                                    linux_virtual_machine.CentOs8Mixin):
+  IMAGE_NAME_FILTER = 'centos_8_2_x64*alibase*.vhd'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/flags.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/flags.py
new file mode 100644
index 0000000..4b5f966
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/flags.py
@@ -0,0 +1,44 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from absl import flags
+
+flags.DEFINE_string('ali_user_name', 'ubuntu',
+                    'This determines the user name that Perfkit will '
+                    'attempt to use. This must be changed in order to '
+                    'use any image other than ubuntu.')
+flags.DEFINE_integer('ali_bandwidth_in', 100, 'Inbound Bandwidth')
+flags.DEFINE_integer('ali_bandwidth_out', 100, 'Outbound Bandwidth')
+flags.DEFINE_string('ali_io_optimized', None,
+                    'IO optimized for disk in AliCloud. The default is '
+                    'None which means no IO optimized '
+                    '"optimized" means use IO optimized. If you '
+                    'choose optimized, you must specify the system disk type')
+flags.DEFINE_string('ali_system_disk_type', 'cloud_ssd',
+                    'System disk category for AliCloud. The default is '
+                    '"cloud" for General cloud disk, '
+                    '"cloud_ssd" for cloud ssd disk, '
+                    '"cloud_essd" for enhanced cloud ssd disk, '
+                    '"cloud_efficiency" for efficiency cloud disk, '
+                    '"ephemeral_ssd" for local ssd disk')
+flags.DEFINE_integer('ali_system_disk_size', 50,
+                     'System disk size in GB. Default is 50 GB.')
+flags.DEFINE_boolean('ali_use_vpc', True,
+                     'Use VPC to create networks')
+flags.DEFINE_integer('ali_eip_address_bandwidth', 100,
+                     'The rate limit of the EIP in Mbps.')
+flags.DEFINE_enum('ali_essd_performance_level', 'PL1', ['PL1', 'PL2', 'PL3'],
+                  'Performance level for disk when using --data_disk_type=remote_essd.')
+flags.DEFINE_string('ali_resource_group_id', '',
+                    'Specify the resource group id.')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/provider_info.py
new file mode 100644
index 0000000..c5a3507
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/provider_info.py
@@ -0,0 +1,24 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provider info for AliCloud."""
+
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+
+
+class AliCloudProviderInfo(provider_info.BaseProviderInfo):
+
+  UNSUPPORTED_BENCHMARKS = ['mysql_service']
+  CLOUD = providers.ALICLOUD
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/requirements.txt b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/requirements.txt
new file mode 100644
index 0000000..be9b05e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/requirements.txt
@@ -0,0 +1,15 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Requirements for running PerfKit Benchmarker on AliCloud.
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/util.py b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/util.py
new file mode 100644
index 0000000..9362234
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/alicloud/util.py
@@ -0,0 +1,107 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for working with AliCloud Web Services resources."""
+
+
+import shlex
+
+from absl import flags
+from perfkitbenchmarker import vm_util
+import six
+
+ALI_PREFIX = ['aliyun']
+ROOT = 'root'
+FLAGS = flags.FLAGS
+PASSWD_LEN = 20
+
+
+REGION_HZ = 'cn-hangzhou'
+
+
+ADD_USER_TEMPLATE = """#!/bin/bash
+echo "{user_name} ALL = NOPASSWD: ALL" >> /etc/sudoers
+useradd {user_name} --home /home/{user_name} --shell /bin/bash -m
+mkdir /home/{user_name}/.ssh
+echo "{public_key}" >> /home/{user_name}/.ssh/authorized_keys
+chown -R {user_name}:{user_name} /home/{user_name}/.ssh
+chmod 700 /home/{user_name}/.ssh
+chmod 600 /home/{user_name}/.ssh/authorized_keys
+"""
+
+
+def GetEncodedCmd(cmd):
+  cmd_line = ' '.join(cmd)
+  cmd_args = shlex.split(cmd_line)
+  return cmd_args
+
+
+def GetRegionByZone(zone):
+  if zone.find(REGION_HZ) != -1:
+    return REGION_HZ
+  s = zone.split('-')
+  if s[0] == 'cn':
+    s.pop()
+    return '-'.join(s)
+  else:
+    return zone[:-1]
+
+
+def AddTags(resource_id, resource_type, region, **kwargs):
+  """Adds tags to an AliCloud resource created by PerfKitBenchmarker.
+
+  Args:
+    resource_id: An extant AliCloud resource to operate on.
+    resource_type: The type of the resource.
+    region: The AliCloud region 'resource_id' was created in.
+    **kwargs: dict. Key-value pairs to set on the instance.
+  """
+  if not kwargs:
+    return
+
+  tag_cmd = ALI_PREFIX + [
+      'ecs', 'AddTags',
+      '--RegionId', region,
+      '--ResourceId', resource_id,
+      '--ResourceType', resource_type
+  ]
+  for index, (key, value) in enumerate(six.iteritems(kwargs)):
+    tag_cmd.extend([
+        '--Tag.{0}.Key'.format(index + 1), str(key),
+        '--Tag.{0}.Value'.format(index + 1), str(value)
+    ])
+  vm_util.IssueRetryableCommand(tag_cmd)
+
+
+def AddDefaultTags(resource_id, resource_type, region):
+  """Adds tags to an AliCloud resource created by PerfKitBenchmarker.
+
+  By default, resources are tagged with "owner" and "perfkitbenchmarker-run"
+  key-value
+  pairs.
+
+  Args:
+    resource_id: An extant AliCloud resource to operate on.
+    resource_type: The type of the 'resource_id'
+    region: The AliCloud region 'resource_id' was created in.
+  """
+  tags = {'owner': FLAGS.owner, 'perfkitbenchmarker-run': FLAGS.run_uri}
+  AddTags(resource_id, resource_type, region, **tags)
+
+
+def GetDrivePathPrefix():
+  if FLAGS.ali_io_optimized is None:
+    return '/dev/xvd'
+  elif FLAGS.ali_io_optimized:
+    return '/dev/vd'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/__init__.py
new file mode 100644
index 0000000..8955062
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provider for AWS."""
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/athena.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/athena.py
new file mode 100644
index 0000000..5a59711
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/athena.py
@@ -0,0 +1,382 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's Athena EDW service."""
+
+import copy
+import datetime
+import json
+import logging
+import re
+from typing import Dict, Text, Tuple
+
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import s3
+from perfkitbenchmarker.providers.aws import util
+
+LATEST_CLIENT_JAR = 'athena-java-client-2.1.jar'
+
+AWS_ATHENA_CMD_PREFIX = ['aws', 'athena']
+AWS_ATHENA_CMD_POSTFIX = ['--output', 'json']
+# TODO(user): Derive the full table set from the TPC suite.
+TPC_H_TABLES = [
+    'customer', 'lineitem', 'nation', 'orders', 'part', 'partsupp', 'region',
+    'supplier'
+]
+TPC_DS_TABLES = [
+    'call_center', 'catalog_page', 'catalog_returns', 'catalog_sales',
+    'customer', 'customer_address', 'customer_demographics', 'date_dim',
+    'dbgen_version', 'household_demographics', 'income_band', 'inventory',
+    'item', 'promotion', 'reason', 'ship_mode', 'store', 'store_returns',
+    'store_sales', 'time_dim', 'warehouse', 'web_page', 'web_returns',
+    'web_sales', 'web_site'
+]
+
+FLAGS = flags.FLAGS
+
+
+class AthenaQueryError(RuntimeError):
+  pass
+
+
+def GetAthenaClientInterface(database: str, output_bucket: str,
+                             region: str) -> edw_service.EdwClientInterface:
+  """Builds and Returns the requested Athena client Interface.
+
+  Args:
+    database: Name of the Athena database to execute queries against.
+    output_bucket: String name of the S3 bucket to store query output.
+    region: String aws region in which the database exists and client operations
+      are performed.
+
+  Returns:
+    A concrete Client Interface object (subclass of EdwClientInterface)
+
+  Raises:
+    RuntimeError: if an unsupported athena_client_interface is requested
+  """
+  if FLAGS.athena_client_interface == 'JAVA':
+    return JavaClientInterface(database, output_bucket, region)
+  raise RuntimeError('Unknown Athena Client Interface requested.' +
+                     FLAGS.athena_client_interface)
+
+
+class GenericClientInterface(edw_service.EdwClientInterface):
+  """Generic Client Interface class for Athena.
+
+  Attributes:
+    database: String name of the Athena database to execute queries against.
+    output_bucket: String name of the S3 bucket to store query output.
+    region: String aws region in which the database exists and client operations
+      are performed.
+  """
+
+  def __init__(self, database: str, output_bucket: str, region: str):
+    super(GenericClientInterface, self).__init__()
+    self.database = database
+    self.output_bucket = 's3://%s' % output_bucket
+    self.region = region
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    client_workgroup = FLAGS.athena_workgroup or 'dynamic'
+    return {
+        'client': f'{FLAGS.athena_client_interface}_{client_workgroup}',
+        'client_region': self.region
+    }
+
+
+class JavaClientInterface(GenericClientInterface):
+  """Java Client Interface class for Athena.
+  """
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the Java Execution Environment and a uber jar with
+    a) Athena Java client libraries,
+    b) An application to execute a query and gather execution details, and
+    collect CW metrics
+    c) their dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+    # Push the executable jar to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(
+        package_name, [LATEST_CLIENT_JAR], '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, run_metadata)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      run_metadata: A dictionary of query execution attributes eg. script name
+    """
+    query_command = (f'java -cp {LATEST_CLIENT_JAR} '
+                     'com.google.cloud.performance.edw.Single '
+                     f'--region {self.region} '
+                     f'--database {self.database} '
+                     f'--output_location {self.output_bucket} '
+                     f'--query_file {query_name} '
+                     f'--query_timeout_secs {FLAGS.athena_query_timeout} '
+                     f'--collect_metrics {FLAGS.athena_metrics_collection}')
+
+    if not FLAGS.athena_metrics_collection:
+      # execute the query in requested persistent workgroup
+      query_command = f'{query_command} --workgroup {FLAGS.athena_workgroup} '
+      query_command = f'{query_command} --delete_workgroup False'
+    else:
+      # the dynamic workgroup may have to live beyond the benchmark
+      query_command = (f'{query_command} '
+                       f'--delete_workgroup {FLAGS.athena_workgroup_delete}')
+
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    details = copy.copy(self.GetMetadata())  # Copy the base metadata
+    details.update(json.loads(stdout)['details'])
+    details['query_start'] = json.loads(stdout)['query_start']
+    details['query_end'] = json.loads(stdout)['query_end']
+    performance = json.loads(stdout)['query_wall_time_in_secs']
+    return performance, details
+
+
+def ReadScript(script_uri):
+  """Method to read a sql script based on its local path.
+
+  Arguments:
+    script_uri: Local URI of file containing SQL query.
+
+  Returns:
+    Query String contents of the URI location.
+
+  Raises:
+    IOError: If the script cannot be read.
+  """
+  with open(script_uri) as fp:
+    return fp.read()
+
+
+def PrepareQueryString(query_string_template, substitutions):
+  """Method to read a template Athena script and substitute placeholders.
+
+  Args:
+    query_string_template: Template version of the Athena query.
+    substitutions: A dictionary of string placeholder keys and corresponding
+      string values.
+
+  Returns:
+     Materialized Athena query as a string.
+  """
+  for key, value in substitutions.items():
+    query_string = query_string_template.replace(key, value)
+  return query_string
+
+
+def RunScriptCommand(script_command):
+  """Method to execute an AWS Athena cli command.
+
+  Args:
+    script_command: Fully compiled AWS Athena cli command.
+
+  Returns:
+    String stdout result of executing the query.
+    Script Command execution duration in seconds (rounded).
+
+  Raises:
+    AthenaQueryError: If the return code does not indicate success.
+  """
+  start_time = datetime.datetime.now()
+  stdout, _, retcode = vm_util.IssueCommand(
+      script_command, raise_on_failure=False)
+  if retcode:
+    raise AthenaQueryError
+  end_time = datetime.datetime.now()
+  return stdout, int((end_time - start_time).total_seconds())
+
+
+class Athena(edw_service.EdwService):
+  """Object representing a Athena data warehouse."""
+
+  CLOUD = providers.AWS
+  SERVICE_TYPE = 'athena'
+
+  def __init__(self, edw_service_spec):
+    super(Athena, self).__init__(edw_service_spec)
+    self.region = util.GetRegionFromZone(FLAGS.zones[0])
+    self.output_bucket = '-'.join(
+        [FLAGS.athena_output_location_prefix, self.region, FLAGS.run_uri])
+    self.client_interface = GetAthenaClientInterface(self.cluster_identifier,
+                                                     self.output_bucket,
+                                                     self.region)
+    self.s3_service = s3.S3Service()
+    self.s3_service.PrepareService(self.region)
+    self.s3_service.MakeBucket(self.output_bucket)
+    if FLAGS.provision_athena:
+      self.data_bucket = 'pkb' + self.cluster_identifier.replace('_', '')
+      self.tables = (
+          TPC_H_TABLES if FLAGS.edw_tpc_dsb_type == 'tpc_h' else TPC_DS_TABLES)
+      self.athena_db_create_time = 0
+      self.athena_table_create_time = 0
+
+  def BuildAthenaCommand(self, query_string, database=None):
+    """Method to compile a AWS Athena cli command.
+
+    Arguments:
+      query_string: A string with the query that needs to be executed on Athena.
+      database: The Athena database against which the query should be executed.
+
+    Returns:
+      Fully compiled AWS Athena cli command.
+    """
+    cmd = []
+    cmd.extend(AWS_ATHENA_CMD_PREFIX)
+    cmd.extend([
+        '--region', self.region,
+        'start-query-execution',
+        '--query-string', query_string
+    ])
+    if database:
+      cmd.extend(['--query-execution-context', ('Database=%s' % database)])
+    cmd.extend([
+        '--result-configuration',
+        ('OutputLocation=s3://%s' % self.output_bucket)
+    ])
+    cmd.extend(AWS_ATHENA_CMD_POSTFIX)
+    return cmd
+
+  def _Create(self):
+    """Create a Athena data warehouse."""
+
+    def _EmptyDatabase():
+      """Remove tables, if they exist, so they can be refreshed.
+
+      If the database and/or tables don't already exist, the drop commands
+      will simply fail, which won't raise errors.
+      """
+      drop_script_path = data.ResourcePath('edw/athena/%s/ddl/drop.sql' %
+                                           FLAGS.edw_tpc_dsb_type)
+      drop_script_contents = ReadScript(drop_script_path)
+      # Drop all tables so the database can be dropped.
+      for table in self.tables:
+        # Remove the folder backing each parquet table so they can be refreshed.
+        vm_util.IssueCommand([
+            'aws', 's3', 'rm',
+            's3://%s/%s_parquet' % (self.data_bucket, table), '--recursive'
+        ], raise_on_failure=False)
+        # The parquet tables don't have the type suffix so that the queries can
+        # run as written without having to change the table names.
+        for suffix in ['_csv', '']:
+          script_contents = PrepareQueryString(drop_script_contents,
+                                               {'{table}': table + suffix})
+          script_command = self.BuildAthenaCommand(
+              script_contents, database=self.cluster_identifier)
+          RunScriptCommand(script_command)
+
+      drop_database_query_string = PrepareQueryString(
+          'drop database database_name',
+          {'database_name': self.cluster_identifier})
+      script_command = self.BuildAthenaCommand(drop_database_query_string)
+      RunScriptCommand(script_command)
+
+    def _CreateDatabase():
+      create_database_query_string = PrepareQueryString(
+          'create database database_name',
+          {'database_name': self.cluster_identifier})
+      script_command = self.BuildAthenaCommand(create_database_query_string)
+      return RunScriptCommand(script_command)
+
+    def _CreateTable(table_create_sql_template):
+      template_script_path = data.ResourcePath(table_create_sql_template)
+      template_script_contents = ReadScript(template_script_path)
+      script_contents = PrepareQueryString(template_script_contents,
+                                           {'{bucket}': self.data_bucket})
+      script_command = self.BuildAthenaCommand(
+          script_contents, database=self.cluster_identifier)
+      return RunScriptCommand(script_command)
+
+    def _CreateAllTables():
+      """Create all TPC benchmarking tables."""
+      cumulative_table_create_time = 0
+      for table in self.tables:
+        for suffix in ['_csv', '_parquet']:
+          script = 'edw/athena/%s/ddl/%s.sql' % (FLAGS.edw_tpc_dsb_type,
+                                                 table + suffix)
+          _, table_create_time = _CreateTable(script)
+          cumulative_table_create_time += table_create_time
+      return cumulative_table_create_time
+
+    _EmptyDatabase()
+    _, self.athena_db_create_time = _CreateDatabase()
+    self.athena_table_create_time = _CreateAllTables()
+
+  def _Exists(self):
+    """Method to validate the existence of a Athena data warehouse.
+
+    Returns:
+      Boolean value indicating the existence of a Athena data warehouse.
+    """
+    raise NotImplementedError
+
+  def _Delete(self):
+    """Delete a Athena data warehouse."""
+    if not FLAGS.teardown_athena:
+      logging.info('The current resource is requested to be long living.')
+      return
+    raise NotImplementedError
+
+  def Cleanup(self):
+    # Direct cleanup is used instead of _DeleteDependencies because the Athena
+    # warehouse resource isn't created/deleted each time.
+    self.s3_service.DeleteBucket(self.output_bucket)
+
+  def GetDataDetails(self) -> Dict[str, str]:
+    """Returns a dictionary with underlying data details.
+
+    cluster_identifier = <dataset_id>
+    Data details are extracted from the dataset_id that follows the format:
+    <dataset>_<format>_<compression>_<partitioning>
+    eg.
+    tpch100_parquet_uncompressed_unpartitoned
+
+    Returns:
+      A dictionary set to underlying data's details (format, etc.)
+    """
+    data_details = {}
+    # If the information isn't in the cluster identifier, skip collecting it.
+    if '_' not in self.cluster_identifier:
+      return data_details
+    parsed_id = re.split(r'_', self.cluster_identifier)
+    data_details['format'] = parsed_id[1]
+    data_details['compression'] = parsed_id[2]
+    data_details['partitioning'] = parsed_id[3]
+    return data_details
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for the Athena data warehouse."""
+    basic_data = super(Athena, self).GetMetadata()
+    basic_data.update({'database': self.cluster_identifier})
+    basic_data.update(self.GetDataDetails())
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_capacity_reservation.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_capacity_reservation.py
new file mode 100644
index 0000000..ccc9097
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_capacity_reservation.py
@@ -0,0 +1,200 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""CapacityReservation for AWS virtual machines.
+
+AWS EC2 has the concept of capacity reservations which allow the
+user to request a reservation for a given number of VMs of a
+specified shape (machine type and os type) in a given zone, for
+an optionally-supplied duration. This module implements this functionaly.
+
+A useful feature of using AwsCapacityReservation is that it allows the
+user to specify a region instead of a zone, and this module will automatically
+pick a zone that has capacity, and the VM(s) will then be launched in that zone.
+
+AwsCapacityReservation modifies all the VMs in a given vm_group in the
+following way:
+  1. The capacity_reservation_id attribute on the VM is set after the
+     reservation is created. The VM needs to reference this id during
+     creation.
+  2. If the user supplied a region instead of zone, then this module
+     will update the zone attribute on the VM, as well as the zone
+     attribute on the VM's network instance.
+
+A run of PKB may have several capacity reservations; there is a 1:1 mapping
+from AWS vm_groups to AwsCapacityReservation instances. This is because all
+VMs in a VM group share the same shape and zone.
+"""
+
+import datetime
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import capacity_reservation
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+_INSUFFICIENT_CAPACITY = 'InsufficientInstanceCapacity'
+
+
+class InvalidVmGroupSizeError(Exception):
+  pass
+
+
+class UnsupportedOsTypeError(Exception):
+  pass
+
+
+class CreationError(Exception):
+  pass
+
+
+class AwsCapacityReservation(capacity_reservation.BaseCapacityReservation):
+  """An object representing an AWS EC2 CapacityReservation."""
+  CLOUD = providers.AWS
+
+  def __init__(self, vm_group):
+    if not vm_group:
+      raise InvalidVmGroupSizeError(
+          'AwsCapacityReservation must be initialized with at least one '
+          'VM in the vm_group.')
+
+    super(AwsCapacityReservation, self).__init__(vm_group)
+    self.zone_or_region = vm_group[0].zone
+    self.region = util.GetRegionFromZone(self.zone_or_region)
+    self.machine_type = vm_group[0].machine_type
+    self.os_type = vm_group[0].OS_TYPE
+    self.vm_count = len(vm_group)
+
+  def _Create(self):
+    """Creates the AWS CapacaityReservation.
+
+    A reservation will be created given the VM shape in self.vm_groups.
+    Count is determined by the number of VMs in said group. The reservation
+    will have a lifetime determined by the general PKB concept of
+    timeout_minutes. If the reservation exceeds this timeout, AWS will
+    cancel it automatically. The VMs in the reservation will not be deleted.
+    Note that an empty capacity reservation will encur costs for the
+    VM shape / count, even if no VMs are using it.
+
+    After the reservation is created, this method updates all the VMs
+    in self.vm_groups by setting the capacity_reservation_id, as well
+    as the zone attributes on the VM, and the VM's network instance.
+
+    Raises:
+      UnsupportedOsTypeError: If creating a capacity reservation for the
+        given os type is not supported.
+      CreationError: If a capacity reservation cannot be created in the
+        region (typically indicates a stockout).
+    """
+    if self.os_type in os_types.LINUX_OS_TYPES:
+      instance_platform = 'Linux/UNIX'
+    elif self.os_type in os_types.WINDOWS_OS_TYPES:
+      instance_platform = 'Windows'
+    else:
+      raise UnsupportedOsTypeError(
+          'Unsupported os_type for AWS CapacityReservation: %s.'
+          % self.os_type)
+
+    # If the user did not specify an AZ, we need to try to create the
+    # CapacityReservation in a specifc AZ until it succeeds.
+    # Then update the zone attribute on all the VMs in the group,
+    # as well as the zone attribute on the VMs' network instance.
+    if util.IsRegion(self.zone_or_region):
+      zones_to_try = util.GetZonesInRegion(self.region)
+    else:
+      zones_to_try = [self.zone_or_region]
+
+    end_date = (
+        datetime.datetime.utcnow() +
+        datetime.timedelta(minutes=FLAGS.timeout_minutes))
+    for zone in zones_to_try:
+      cmd = util.AWS_PREFIX + [
+          'ec2',
+          'create-capacity-reservation',
+          '--instance-type=%s' % self.machine_type,
+          '--instance-platform=%s' % instance_platform,
+          '--availability-zone=%s' % zone,
+          '--instance-count=%s' % self.vm_count,
+          '--instance-match-criteria=targeted',
+          '--region=%s' % self.region,
+          '--end-date-type=limited',
+          '--end-date=%s' % end_date.isoformat(),
+      ]
+      stdout, stderr, retcode = vm_util.IssueCommand(cmd,
+                                                     raise_on_failure=False)
+      if retcode:
+        logging.info('Unable to create CapacityReservation in %s. '
+                     'This may be retried. Details: %s', zone, stderr)
+        if _INSUFFICIENT_CAPACITY in stderr:
+          logging.error(util.STOCKOUT_MESSAGE)
+          raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+              util.STOCKOUT_MESSAGE + ' CapacityReservation in ' + zone)
+        continue
+      json_output = json.loads(stdout)
+      self.capacity_reservation_id = (
+          json_output['CapacityReservation']['CapacityReservationId'])
+      self._UpdateVmsInGroup(self.capacity_reservation_id, zone)
+      return
+    raise CreationError('Unable to create CapacityReservation in any of the '
+                        'following zones: %s.' % zones_to_try)
+
+  def _Delete(self):
+    """Deletes the capacity reservation."""
+    cmd = util.AWS_PREFIX + [
+        'ec2',
+        'cancel-capacity-reservation',
+        '--capacity-reservation-id=%s' % self.capacity_reservation_id,
+        '--region=%s' % self.region,
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the underlying reservation exists and is active."""
+    cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-capacity-reservations',
+        '--capacity-reservation-id=%s' % self.capacity_reservation_id,
+        '--region=%s' % self.region,
+    ]
+    stdout, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      return False
+
+    json_output = json.loads(stdout)
+    return json_output['CapacityReservations'][0]['State'] == 'active'
+
+  def _UpdateVmsInGroup(self, capacity_reservation_id, zone):
+    """Updates the VMs in a group with necessary reservation details.
+
+    AWS virtual machines need to reference the capacity reservation id
+    during creation, so it is set on all VMs in the group. Additionally,
+    this class may determine which zone to run in, so that needs to be
+    updated too (on the VM, and the VM's network instance).
+
+    Args:
+      capacity_reservation_id: ID of the reservation created by this instance.
+      zone: Zone chosen by this class, or if it was supplied, the zone
+      provided by the user. In the latter case, setting the zone is equivalent
+      to a no-op.
+    """
+    for vm in self.vm_group:
+      vm.capacity_reservation_id = capacity_reservation_id
+      vm.zone = zone
+      vm.network.zone = zone
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_parameter_group.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_parameter_group.py
new file mode 100644
index 0000000..d02290b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_parameter_group.py
@@ -0,0 +1,49 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's Redshift Cluster Parameter Group."""
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+
+class RedshiftClusterParameterGroup(resource.BaseResource):
+  """Cluster Parameter Group associated with a Redshift cluster.
+
+  Attributes:
+    name: A string name of the cluster parameter group.
+  """
+
+  def __init__(self, cmd_prefix):
+    super(RedshiftClusterParameterGroup, self).__init__(user_managed=False)
+    self.cmd_prefix = cmd_prefix
+    self.name = 'pkb-' + FLAGS.run_uri
+
+  def _Create(self):
+    cmd = self.cmd_prefix + [
+        'redshift', 'create-cluster-parameter-group', '--parameter-group-name',
+        self.name, '--parameter-group-family', 'redshift-1.0', '--description',
+        'Cluster Parameter group for run uri {}'.format(FLAGS.run_uri)
+    ]
+    vm_util.IssueCommand(cmd)
+
+  def _Delete(self):
+    """Delete a redshift cluster parameter group."""
+    cmd = self.cmd_prefix + [
+        'redshift', 'delete-cluster-parameter-group', '--parameter-group-name',
+        self.name
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_subnet_group.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_subnet_group.py
new file mode 100644
index 0000000..abdaed9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_cluster_subnet_group.py
@@ -0,0 +1,55 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's Redshift Cluster Subnet Group."""
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+
+class RedshiftClusterSubnetGroup(resource.BaseResource):
+  """Cluster Subnet Group associated with a Redshift cluster launched in a vpc.
+
+  A cluster subnet group allows you to specify a set of subnets in your VPC.
+
+
+  Attributes:
+    name: A string name of the cluster subnet group.
+    subnet_id: A string name of the subnet id associated with the group.
+  """
+
+  def __init__(self, cmd_prefix):
+    super(RedshiftClusterSubnetGroup, self).__init__(user_managed=False)
+    self.cmd_prefix = cmd_prefix
+    self.name = 'pkb-' + FLAGS.run_uri
+    self.subnet_id = ''
+
+  def _Create(self):
+    cmd = self.cmd_prefix + [
+        'redshift', 'create-cluster-subnet-group',
+        '--cluster-subnet-group-name', self.name, '--description',
+        'Cluster Subnet Group for run uri {}'.format(
+            FLAGS.run_uri), '--subnet-ids', self.subnet_id
+    ]
+    vm_util.IssueCommand(cmd)
+
+  def _Delete(self):
+    """Delete a redshift cluster subnet group."""
+    cmd = self.cmd_prefix + [
+        'redshift', 'delete-cluster-subnet-group',
+        '--cluster-subnet-group-name', self.name
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_container_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_container_service.py
new file mode 100755
index 0000000..7575d99
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_container_service.py
@@ -0,0 +1,575 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains classes/functions related to AWS container clusters."""
+
+import json
+import os
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_load_balancer
+from perfkitbenchmarker.providers.aws import aws_logs
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import s3
+from perfkitbenchmarker.providers.aws import util
+import requests
+import six
+import yaml
+
+FLAGS = flags.FLAGS
+_ECS_NOT_READY = frozenset(['PROVISIONING', 'PENDING'])
+
+
+class EcrRepository(resource.BaseResource):
+  """Class representing an Elastic Container Registry image repository."""
+
+  def __init__(self, name, region):
+    super(EcrRepository, self).__init__()
+    self.name = name
+    self.region = region
+
+  def _Create(self):
+    """Creates the image repository."""
+    if self._Exists():
+      self.user_managed = True
+      return
+    create_cmd = util.AWS_PREFIX + [
+        'ecr', 'create-repository', '--region', self.region,
+        '--repository-name', self.name
+    ]
+    _, stderr, retcode = vm_util.IssueCommand(
+        create_cmd, raise_on_failure=False)
+    if retcode:
+      if 'InsufficientInstanceCapacity' in stderr:
+        raise errors.Benchmarks.InsufficientCapacityCloudFailure(stderr)
+      if 'InstanceLimitExceeded' in stderr or 'VpcLimitExceeded' in stderr:
+        raise errors.Benchmarks.QuotaFailure(stderr)
+      raise errors.Resource.CreationError(
+          'Failed to create EKS Cluster: {} return code: {}'.format(
+              retcode, stderr))
+
+  def _Exists(self):
+    """Returns True if the repository exists."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ecr', 'describe-repositories', '--region', self.region,
+        '--repository-names', self.name
+    ]
+    stdout, _, _ = vm_util.IssueCommand(
+        describe_cmd, suppress_warning=True, raise_on_failure=False)
+    if not stdout or not json.loads(stdout)['repositories']:
+      return False
+    return True
+
+  def _Delete(self):
+    """Deletes the repository."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ecr', 'delete-repository', '--region', self.region,
+        '--repository-name', self.name, '--force'
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+
+class ElasticContainerRegistry(container_service.BaseContainerRegistry):
+  """Class for building and storing container images on AWS."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self, registry_spec):
+    super(ElasticContainerRegistry, self).__init__(registry_spec)
+    self.account = self.project or util.GetAccount()
+    self.region = util.GetRegionFromZone(self.zone.split(',')[0])
+    self.repositories = []
+
+  def _Delete(self):
+    """Deletes the repositories."""
+    for repository in self.repositories:
+      repository.Delete()
+
+  def PrePush(self, image):
+    """Prepares registry to push a given image."""
+    repository_name = '{namespace}/{name}'.format(
+        namespace=self.name, name=image.name)
+    repository = EcrRepository(repository_name, self.region)
+    self.repositories.append(repository)
+    repository.Create()
+
+  def GetFullRegistryTag(self, image):
+    """Gets the full tag of the image."""
+    tag = '{account}.dkr.ecr.{region}.amazonaws.com/{namespace}/{name}'.format(
+        account=self.account,
+        region=self.region,
+        namespace=self.name,
+        name=image)
+    return tag
+
+  def Login(self):
+    """Logs in to the registry."""
+    get_login_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecr', 'get-login', '--no-include-email'
+    ]
+    stdout, _, _ = vm_util.IssueCommand(get_login_cmd)
+    login_cmd = stdout.split()
+    vm_util.IssueCommand(login_cmd)
+
+  def RemoteBuild(self, image):
+    """Build the image remotely."""
+    # TODO(ehankland) use AWS codebuild to build the image.
+    raise NotImplementedError()
+
+
+class TaskDefinition(resource.BaseResource):
+  """Class representing an AWS task definition."""
+
+  def __init__(self, name, container_spec, cluster):
+    super(TaskDefinition, self).__init__()
+    self.name = name
+    self.cpus = container_spec.cpus
+    self.memory = container_spec.memory
+    self.image = container_spec.image
+    self.container_port = container_spec.container_port
+    self.region = cluster.region
+    self.arn = None
+    self.log_group = aws_logs.LogGroup(self.region, 'pkb')
+
+  def _CreateDependencies(self):
+    """Create the log group if it doesn't exist."""
+    if not self.log_group.Exists():
+      self.log_group.Create()
+
+  def _Create(self):
+    """Create the task definition."""
+    register_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'register-task-definition', '--family',
+        self.name, '--execution-role-arn', 'ecsTaskExecutionRole',
+        '--network-mode', 'awsvpc', '--requires-compatibilities=FARGATE',
+        '--cpu',
+        str(int(1024 * self.cpus)), '--memory',
+        str(self.memory), '--container-definitions',
+        self._GetContainerDefinitions()
+    ]
+    stdout, _, _ = vm_util.IssueCommand(register_cmd)
+    response = json.loads(stdout)
+    self.arn = response['taskDefinition']['taskDefinitionArn']
+
+  def _Delete(self):
+    """Deregister the task definition."""
+    if self.arn is None:
+      return
+    deregister_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'deregister-task-definition',
+        '--task-definition', self.arn
+    ]
+    vm_util.IssueCommand(deregister_cmd)
+
+  def _GetContainerDefinitions(self):
+    """Returns a JSON representation of the container definitions."""
+    definitions = [{
+        'name': self.name,
+        'image': self.image,
+        'essential': True,
+        'portMappings': [{
+            'containerPort': self.container_port,
+            'protocol': 'TCP'
+        }],
+        'logConfiguration': {
+            'logDriver': 'awslogs',
+            'options': {
+                'awslogs-group': 'pkb',
+                'awslogs-region': self.region,
+                'awslogs-stream-prefix': 'pkb'
+            }
+        }
+    }]
+    return json.dumps(definitions)
+
+
+class EcsTask(container_service.BaseContainer):
+  """Class representing an ECS/Fargate task."""
+
+  def __init__(self, name, container_spec, cluster):
+    super(EcsTask, self).__init__(container_spec)
+    self.name = name
+    self.task_def = cluster.task_defs[name]
+    self.arn = None
+    self.region = cluster.region
+    self.cluster_name = cluster.name
+    self.subnet_id = cluster.network.subnet.id
+    self.ip_address = None
+    self.security_group_id = (
+        cluster.network.regional_network.vpc.default_security_group_id)
+
+  def _GetNetworkConfig(self):
+    network_config = {
+        'awsvpcConfiguration': {
+            'subnets': [self.subnet_id],
+            'securityGroups': [self.security_group_id],
+            'assignPublicIp': 'ENABLED',
+        }
+    }
+    return json.dumps(network_config)
+
+  def _GetOverrides(self):
+    """Returns a JSON representaion of task overrides.
+
+    While the container level resources can be overridden, they have no
+    effect on task level resources for Fargate tasks. This means
+    that modifying a container spec will only affect the command of any
+    new containers launched from it and not cpu/memory.
+    """
+    overrides = {
+        'containerOverrides': [{
+            'name': self.name,
+        }]
+    }
+    if self.command:
+      overrides['containerOverrides'][0]['command'] = self.command
+    return json.dumps(overrides)
+
+  def _Create(self):
+    """Creates the task."""
+    run_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'run-task', '--cluster',
+        self.cluster_name, '--task-definition', self.task_def.arn,
+        '--launch-type', 'FARGATE', '--network-configuration',
+        self._GetNetworkConfig(), '--overrides',
+        self._GetOverrides()
+    ]
+    stdout, _, _ = vm_util.IssueCommand(run_cmd)
+    response = json.loads(stdout)
+    self.arn = response['tasks'][0]['taskArn']
+
+  def _PostCreate(self):
+    """Gets the tasks IP address."""
+    container = self._GetTask()['containers'][0]
+    self.ip_address = container['networkInterfaces'][0]['privateIpv4Address']
+
+  def _DeleteDependencies(self):
+    """Delete the task def."""
+    self.task_def.Delete()
+
+  def _Delete(self):
+    """Deletes the task."""
+    if self.arn is None:
+      return
+    stop_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'stop-task', '--cluster',
+        self.cluster_name, '--task', self.arn
+    ]
+    vm_util.IssueCommand(stop_cmd)
+
+  def _GetTask(self):
+    """Returns a dictionary representation of the task."""
+    describe_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'describe-tasks', '--cluster',
+        self.cluster_name, '--tasks', self.arn
+    ]
+    stdout, _, _ = vm_util.IssueCommand(describe_cmd)
+    response = json.loads(stdout)
+    return response['tasks'][0]
+
+  def _IsReady(self):
+    """Returns true if the task has stopped pending."""
+    return self._GetTask()['lastStatus'] not in _ECS_NOT_READY
+
+  def WaitForExit(self, timeout=None):
+    """Waits until the task has finished running."""
+
+    @vm_util.Retry(
+        timeout=timeout,
+        retryable_exceptions=(container_service.RetriableContainerException,))
+    def _WaitForExit():
+      task = self._GetTask()
+      if task['lastStatus'] != 'STOPPED':
+        raise container_service.RetriableContainerException(
+            'Task is not STOPPED.')
+      return task
+
+    return _WaitForExit()
+
+  def GetLogs(self):
+    """Returns the logs from the container."""
+    task_id = self.arn.split('/')[-1]
+    log_stream = 'pkb/{name}/{task_id}'.format(name=self.name, task_id=task_id)
+    return six.text_type(
+        aws_logs.GetLogStreamAsString(self.region, log_stream, 'pkb'))
+
+
+class EcsService(container_service.BaseContainerService):
+  """Class representing an ECS/Fargate service."""
+
+  def __init__(self, name, container_spec, cluster):
+    super(EcsService, self).__init__(container_spec)
+    self.client_token = str(uuid.uuid4())[:32]
+    self.name = name
+    self.task_def = cluster.task_defs[name]
+    self.arn = None
+    self.region = cluster.region
+    self.cluster_name = cluster.name
+    self.subnet_id = cluster.network.subnet.id
+    self.security_group_id = (
+        cluster.network.regional_network.vpc.default_security_group_id)
+    self.load_balancer = aws_load_balancer.LoadBalancer(
+        [cluster.network.subnet])
+    self.target_group = aws_load_balancer.TargetGroup(
+        cluster.network.regional_network.vpc, self.container_port)
+    self.port = 80
+
+  def _CreateDependencies(self):
+    """Creates the load balancer for the service."""
+    self.load_balancer.Create()
+    self.target_group.Create()
+    listener = aws_load_balancer.Listener(self.load_balancer, self.target_group,
+                                          self.port)
+    listener.Create()
+    self.ip_address = self.load_balancer.dns_name
+
+  def _DeleteDependencies(self):
+    """Deletes the service's load balancer."""
+    self.task_def.Delete()
+    self.load_balancer.Delete()
+    self.target_group.Delete()
+
+  # TODO(ferneyhough): Consider supporting the flag container_cluster_version.
+  def _Create(self):
+    """Creates the service."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region',
+        self.region,
+        'ecs',
+        'create-service',
+        '--desired-count',
+        '1',
+        '--client-token',
+        self.client_token,
+        '--cluster',
+        self.cluster_name,
+        '--service-name',
+        self.name,
+        '--task-definition',
+        self.task_def.arn,
+        '--launch-type',
+        'FARGATE',
+        '--network-configuration',
+        self._GetNetworkConfig(),
+        '--load-balancers',
+        self._GetLoadBalancerConfig(),
+    ]
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """Deletes the service."""
+    update_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'update-service', '--cluster',
+        self.cluster_name, '--service', self.name, '--desired-count', '0'
+    ]
+    vm_util.IssueCommand(update_cmd)
+    delete_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'delete-service', '--cluster',
+        self.cluster_name, '--service', self.name
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _GetNetworkConfig(self):
+    network_config = {
+        'awsvpcConfiguration': {
+            'subnets': [self.subnet_id],
+            'securityGroups': [self.security_group_id],
+            'assignPublicIp': 'ENABLED',
+        }
+    }
+    return json.dumps(network_config)
+
+  def _GetLoadBalancerConfig(self):
+    """Returns the JSON representation of the service load balancers."""
+    load_balancer_config = [{
+        'targetGroupArn': self.target_group.arn,
+        'containerName': self.name,
+        'containerPort': self.container_port,
+    }]
+    return json.dumps(load_balancer_config)
+
+  def _IsReady(self):
+    """Returns True if the Service is ready."""
+    url = 'http://%s' % self.ip_address
+    try:
+      r = requests.get(url)
+    except requests.ConnectionError:
+      return False
+    if r.status_code == 200:
+      return True
+    return False
+
+
+class FargateCluster(container_service.BaseContainerCluster):
+  """Class representing an AWS Fargate cluster."""
+
+  CLOUD = providers.AWS
+  CLUSTER_TYPE = 'Fargate'
+
+  def __init__(self, cluster_spec):
+    super(FargateCluster, self).__init__(cluster_spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.network = aws_network.AwsNetwork.GetNetwork(self)
+    self.firewall = aws_network.AwsFirewall.GetFirewall()
+    self.name = 'pkb-%s' % FLAGS.run_uri
+    self.task_defs = {}
+    self.arn = None
+
+  def _Create(self):
+    """Creates the cluster."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'create-cluster', '--cluster-name',
+        self.name
+    ]
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    response = json.loads(stdout)
+    self.arn = response['cluster']['clusterArn']
+
+  def _Exists(self):
+    """Returns True if the cluster exists."""
+    if not self.arn:
+      return False
+    describe_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'describe-clusters', '--clusters',
+        self.arn
+    ]
+    stdout, _, _ = vm_util.IssueCommand(describe_cmd)
+    response = json.loads(stdout)
+    clusters = response['clusters']
+    if not clusters or clusters[0]['status'] == 'INACTIVE':
+      return False
+    return True
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    delete_cmd = util.AWS_PREFIX + [
+        '--region', self.region, 'ecs', 'delete-cluster', '--cluster', self.name
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def DeployContainer(self, name, container_spec):
+    """Deploys the container according to the spec."""
+    if name not in self.task_defs:
+      task_def = TaskDefinition(name, container_spec, self)
+      self.task_defs[name] = task_def
+      task_def.Create()
+    task = EcsTask(name, container_spec, self)
+    self.containers[name].append(task)
+    task.Create()
+
+  def DeployContainerService(self, name, container_spec):
+    """Deploys the container service according to the spec."""
+    if name not in self.task_defs:
+      task_def = TaskDefinition(name, container_spec, self)
+      self.task_defs[name] = task_def
+      task_def.Create()
+    service = EcsService(name, container_spec, self)
+    self.services[name] = service
+    self.firewall.AllowPortInSecurityGroup(service.region,
+                                           service.security_group_id,
+                                           service.container_port)
+    service.Create()
+
+
+class AwsKopsCluster(container_service.KubernetesCluster):
+  """Class representing a kops based Kubernetes cluster."""
+
+  CLOUD = providers.AWS
+  CLUSTER_TYPE = 'kops'
+
+  def __init__(self, spec):
+    super(AwsKopsCluster, self).__init__(spec)
+    self.name += '.k8s.local'
+    self.config_bucket = 'kops-%s-%s' % (FLAGS.run_uri, str(uuid.uuid4()))
+    self.region = util.GetRegionFromZone(self.zone)
+    self.s3_service = s3.S3Service()
+    self.s3_service.PrepareService(self.region)
+
+  def _CreateDependencies(self):
+    """Create the bucket to store cluster config."""
+    self.s3_service.MakeBucket(self.config_bucket)
+
+  def _DeleteDependencies(self):
+    """Delete the bucket that stores cluster config."""
+    self.s3_service.DeleteBucket(self.config_bucket)
+
+  def _Create(self):
+    """Creates the cluster."""
+    # Create the cluster spec but don't provision any resources.
+    create_cmd = [
+        FLAGS.kops, 'create', 'cluster',
+        '--name=%s' % self.name,
+        '--zones=%s' % self.zone,
+        '--node-count=%s' % self.num_nodes,
+        '--node-size=%s' % self.machine_type
+    ]
+    env = os.environ.copy()
+    env['KUBECONFIG'] = FLAGS.kubeconfig
+    env['KOPS_STATE_STORE'] = 's3://%s' % self.config_bucket
+    vm_util.IssueCommand(create_cmd, env=env)
+
+    # Download the cluster spec and modify it.
+    get_cmd = [FLAGS.kops, 'get', 'cluster', self.name, '--output=yaml']
+    stdout, _, _ = vm_util.IssueCommand(get_cmd, env=env)
+    spec = yaml.safe_load(stdout)
+    spec['metadata']['creationTimestamp'] = None
+    spec['spec']['api']['loadBalancer']['idleTimeoutSeconds'] = 3600
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    spec['spec']['cloudLabels'] = {
+        'owner': FLAGS.owner,
+        'perfkitbenchmarker-run': FLAGS.run_uri,
+        'benchmark': benchmark_spec.name,
+        'perfkit_uuid': benchmark_spec.uuid,
+        'benchmark_uid': benchmark_spec.uid
+    }
+
+    # Replace the cluster spec.
+    with vm_util.NamedTemporaryFile() as tf:
+      yaml.dump(spec, tf)
+      tf.close()
+      replace_cmd = [FLAGS.kops, 'replace', '--filename=%s' % tf.name]
+      vm_util.IssueCommand(replace_cmd, env=env)
+
+    # Create the actual cluster.
+    update_cmd = [FLAGS.kops, 'update', 'cluster', self.name, '--yes']
+    vm_util.IssueCommand(update_cmd, env=env)
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    super()._Delete()
+    delete_cmd = [
+        FLAGS.kops, 'delete', 'cluster',
+        '--name=%s' % self.name,
+        '--state=s3://%s' % self.config_bucket, '--yes'
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _IsReady(self):
+    """Returns True if the cluster is ready, else False."""
+    validate_cmd = [
+        FLAGS.kops, 'validate', 'cluster',
+        '--name=%s' % self.name,
+        '--state=s3://%s' % self.config_bucket
+    ]
+    env = os.environ.copy()
+    env['KUBECONFIG'] = FLAGS.kubeconfig
+    _, _, retcode = vm_util.IssueCommand(
+        validate_cmd, env=env, suppress_warning=True, raise_on_failure=False)
+    return not retcode
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dax.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dax.py
new file mode 100644
index 0000000..0104821
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dax.py
@@ -0,0 +1,173 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS' DAX cluster.
+
+DAX cluster can be created and deleted.
+"""
+
+
+import json
+import logging
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_iam_role
+from perfkitbenchmarker.providers.aws import util
+
+_DAX_CLUSTER_NAME_TEMPLATE = 'pkb-dax-{uid}'
+_DAX_SUBNET_GROUP_TEMPLATE = 'pkb-subnet-group-{uid}'
+_DAX_SERVICE = 'dax.amazonaws.com'
+_DAX_ACTION = 'dynamodb:*'
+_DAX_TCP_PORT = 8011
+_DAX_ROLE_NAME_TEMPLATE = 'PkbDaxServiceRole{uid}'
+_DAX_POLICY_NAME_TEMPLATE = 'PolicyForPkbDaxServiceRole{uid}'
+_DAX_STATUS_AVAILABLE = 'available'
+_DYNAMODB_RESOURCE_TEMPLATE = 'arn:aws:dynamodb:{region}:{account}:*'
+
+FLAGS = flags.FLAGS
+
+
+class AwsDax(resource.BaseResource):
+  """Class representing an AWS Dax cluster."""
+
+  def __init__(self, benchmark_uid, zone, network):
+    super(AwsDax, self).__init__()
+    self.benchmark_uid = benchmark_uid
+    self.zone = zone
+    self.region = util.GetRegionFromZone(self.zone)
+    self.vpc = network.regional_network.vpc
+    self.subnet_id = network.subnet.id
+    self.account = util.GetAccount()
+    self.iam_role = aws_iam_role.AwsIamRole(
+        self.account, _DAX_ROLE_NAME_TEMPLATE.format(uid=self.benchmark_uid),
+        _DAX_POLICY_NAME_TEMPLATE.format(uid=self.benchmark_uid), _DAX_SERVICE,
+        _DAX_ACTION,
+        _DYNAMODB_RESOURCE_TEMPLATE.format(
+            region=self.region, account=self.account))
+    self.cluster_endpoint = None
+    self.subnet_group_name = _DAX_SUBNET_GROUP_TEMPLATE.format(
+        uid=self.benchmark_uid)
+    self.cluster_name = _DAX_CLUSTER_NAME_TEMPLATE.format(
+        uid=self.benchmark_uid)
+
+  def _CreateDependencies(self):
+    """See base class.
+
+    Creates the IAM role and subnet group used by the DAX cluster.
+    """
+
+    self.iam_role.Create()
+    cmd = util.AWS_PREFIX + [
+        'dax', 'create-subnet-group', '--subnet-group-name',
+        self.subnet_group_name, '--subnet-ids', self.subnet_id
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+    if retcode != 0:
+      logging.warning('Failed to create subnet group! %s', stderror)
+
+  def _Create(self):
+    """See base class."""
+    cmd = util.AWS_PREFIX + [
+        'dax', 'create-cluster', '--cluster-name', self.cluster_name,
+        '--node-type', FLAGS.aws_dax_node_type, '--replication-factor',
+        str(FLAGS.aws_dax_replication_factor), '--iam-role-arn',
+        self.iam_role.GetRoleArn(), '--subnet-group', self.subnet_group_name,
+        '--sse-specification', 'Enabled=true', '--region', self.region
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+    if retcode != 0:
+      logging.warning('Failed to create dax cluster! %s', stderror)
+
+  def _DeleteDependencies(self):
+    """See base class."""
+    cmd = util.AWS_PREFIX + [
+        'dax', 'delete-subnet-group', '--subnet-group-name',
+        self.subnet_group_name
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to delete subnet group! %s', stderror)
+
+    self.iam_role.Delete()
+
+  def _Delete(self):
+    """See base class."""
+    cmd = util.AWS_PREFIX + [
+        'dax', 'delete-cluster', '--cluster-name', self.cluster_name
+    ]
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to delete dax cluster! %s', stderror)
+
+  def _Exists(self):
+    """See base class."""
+    cmd = util.AWS_PREFIX + [
+        'dax', 'describe-clusters', '--cluster-names', self.cluster_name
+    ]
+    _, _, retcode = vm_util.IssueCommand(
+        cmd, suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _IsReady(self):
+    """See base class.
+
+    Returns:
+      True if the DAX cluster is ready.
+    """
+    cmd = util.AWS_PREFIX + [
+        'dax', 'describe-clusters', '--cluster-names', self.cluster_name
+    ]
+
+    stdout, _, retcode = vm_util.IssueCommand(
+        cmd, suppress_warning=True, raise_on_failure=False)
+    if retcode != 0 or not stdout:
+      return False
+    result = json.loads(stdout)
+    status = result['Clusters'][0]['Status']
+    if not status:
+      return False
+
+    if status == _DAX_STATUS_AVAILABLE and not self.cluster_endpoint:
+      endpoint = result['Clusters'][0]['ClusterDiscoveryEndpoint']
+      self.cluster_endpoint = '{}:{}'.format(endpoint['Address'],
+                                             endpoint['Port'])
+    return status == _DAX_STATUS_AVAILABLE
+
+  def _PostCreate(self):
+    """See base class.
+
+    Enables the Dax Port on the security group's inbound rule.
+    """
+    for security_group in self.vpc.GetSecurityGroups():
+      if security_group['GroupName'] == 'default':
+        cmd = util.AWS_PREFIX + [
+            'ec2', 'authorize-security-group-ingress', '--group-id',
+            security_group['GroupId'], '--protocol', 'tcp', '--port',
+            str(_DAX_TCP_PORT)
+        ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+    if retcode != 0:
+      logging.warning('Failed to config Dax port! %s', stderror)
+
+  def GetClusterEndpoint(self):
+    """Returns the DAX cluster's endpoint."""
+    if not self._IsReady():
+      raise errors.Benchmarks.PrepareException(
+          'GetEndpoint when preparing dax cluster: cluster not ready yet.')
+    return self.cluster_endpoint
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_disk.py
new file mode 100644
index 0000000..5fe8720
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_disk.py
@@ -0,0 +1,565 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes related to AWS disks.
+
+Disks can be created, deleted, attached to VMs, and detached from VMs.
+See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/EBSVolumeTypes.html to
+determine valid disk types.
+See http://aws.amazon.com/ebs/details/ for more information about AWS (EBS)
+disks.
+"""
+
+import json
+import logging
+import string
+import threading
+
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.aws import util
+
+
+class AwsStateRetryableError(Exception):
+  """Error for retrying when an AWS disk is in a transitional state."""
+
+VOLUME_EXISTS_STATUSES = frozenset(['creating', 'available', 'in-use', 'error'])
+VOLUME_DELETED_STATUSES = frozenset(['deleting', 'deleted'])
+VOLUME_KNOWN_STATUSES = VOLUME_EXISTS_STATUSES | VOLUME_DELETED_STATUSES
+
+STANDARD = 'standard'
+GP2 = 'gp2'
+GP3 = 'gp3'
+IO1 = 'io1'
+IO2 = 'io2'
+ST1 = 'st1'
+SC1 = 'sc1'
+
+DISK_TYPE = {
+    disk.STANDARD: STANDARD,
+    disk.REMOTE_SSD: GP2,
+    disk.PIOPS: IO1
+}
+
+DISK_METADATA = {
+    STANDARD: {
+        disk.MEDIA: disk.HDD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    GP2: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    GP3: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    IO1: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    IO2: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    ST1: {
+        disk.MEDIA: disk.HDD,
+        disk.REPLICATION: disk.ZONE
+    },
+    SC1: {
+        disk.MEDIA: disk.HDD,
+        disk.REPLICATION: disk.ZONE
+    }
+}
+
+LOCAL_SSD_METADATA = {
+    disk.MEDIA: disk.SSD,
+    disk.REPLICATION: disk.NONE,
+}
+
+LOCAL_HDD_METADATA = {
+    disk.MEDIA: disk.HDD,
+    disk.REPLICATION: disk.NONE,
+}
+
+LOCAL_HDD_PREFIXES = ['d2', 'hs1', 'h1', 'c1', 'cc2', 'm1', 'm2']
+# Following lists based on
+# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/instance-types.html
+NON_EBS_NVME_TYPES = [
+    'c4',
+    'd2',
+    'f1',
+    'g3',
+    'h1',
+    'i3',
+    'm4',
+    'p2',
+    'p3',
+    'r4',
+    't2',
+    'x1',
+    'x1e',
+    'm1',
+    'm3',
+    'c1',
+    'cc2',
+    'c3',
+    'm2',
+    'cr1',
+    'r3',
+    'hs1',
+    'i2',
+    'g2',
+    't1',
+]
+NON_LOCAL_NVME_TYPES = LOCAL_HDD_PREFIXES + [
+    'c3', 'cr1', 'g2', 'i2', 'm3', 'r3', 'x1', 'x1e']
+
+# Following dictionary based on
+# https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/InstanceStorage.html
+NUM_LOCAL_VOLUMES = {
+    'c1.medium': 1,
+    'c1.xlarge': 4,
+    'c3.large': 2,
+    'c3.xlarge': 2,
+    'c3.2xlarge': 2,
+    'c3.4xlarge': 2,
+    'c3.8xlarge': 2,
+    'cc2.8xlarge': 4,
+    'cg1.4xlarge': 2,
+    'cr1.8xlarge': 2,
+    'g2.2xlarge': 1,
+    'hi1.4xlarge': 2,
+    'hs1.8xlarge': 24,
+    'i2.xlarge': 1,
+    'i2.2xlarge': 2,
+    'i2.4xlarge': 4,
+    'i2.8xlarge': 8,
+    'm1.small': 1,
+    'm1.medium': 1,
+    'm1.large': 2,
+    'm1.xlarge': 4,
+    'm2.xlarge': 1,
+    'm2.2xlarge': 1,
+    'm2.4xlarge': 2,
+    'm3.medium': 1,
+    'm3.large': 1,
+    'm3.xlarge': 2,
+    'm3.2xlarge': 2,
+    'r3.large': 1,
+    'r3.xlarge': 1,
+    'r3.2xlarge': 1,
+    'r3.4xlarge': 1,
+    'r3.8xlarge': 2,
+    'd2.xlarge': 3,
+    'd2.2xlarge': 6,
+    'd2.4xlarge': 12,
+    'd2.8xlarge': 24,
+    'd3.xlarge': 3,
+    'd3.2xlarge': 6,
+    'd3.4xlarge': 12,
+    'd3.8xlarge': 24,
+    'd3en.large': 1,
+    'd3en.xlarge': 2,
+    'd3en.2xlarge': 4,
+    'd3en.4xlarge': 8,
+    'd3en.6xlarge': 12,
+    'd3en.8xlarge': 16,
+    'd3en.12xlarge': 24,
+    'i3.large': 1,
+    'i3.xlarge': 1,
+    'i3.2xlarge': 1,
+    'i3.4xlarge': 2,
+    'i3.8xlarge': 4,
+    'i3.16xlarge': 8,
+    'i3.metal': 8,
+    'i4i.large': 1,
+    'i4i.xlarge': 1,
+    'i4i.2xlarge': 1,
+    'i4i.4xlarge': 1,
+    'i4i.8xlarge': 2,
+    'i4i.16xlarge': 4,
+    'i4i.32xlarge': 8,
+    'is4gen.medium': 1,
+    'is4gen.large': 1,
+    'is4gen.xlarge': 1,
+    'is4gen.2xlarge': 1,
+    'is4gen.4xlarge': 2,
+    'is4gen.8xlarge': 4,
+    'im4gn.large': 1,
+    'im4gn.xlarge': 1,
+    'im4gn.2xlarge': 1,
+    'im4gn.4xlarge': 1,
+    'im4gn.8xlarge': 2,
+    'im4gn.16xlarge': 4,
+    'i3en.large': 1,
+    'i3en.xlarge': 1,
+    'i3en.2xlarge': 2,
+    'i3en.3xlarge': 1,
+    'i3en.6xlarge': 2,
+    'i3en.12xlarge': 4,
+    'i3en.24xlarge': 8,
+    'i3en.metal': 8,
+    'c5ad.large': 1,
+    'c5ad.xlarge': 1,
+    'c5ad.2xlarge': 1,
+    'c5ad.4xlarge': 2,
+    'c5ad.8xlarge': 2,
+    'c5ad.12xlarge': 2,
+    'c5ad.16xlarge': 2,
+    'c5ad.24xlarge': 2,
+    'c5d.large': 1,
+    'c5d.xlarge': 1,
+    'c5d.2xlarge': 1,
+    'c5d.4xlarge': 1,
+    'c5d.9xlarge': 1,
+    'c5d.12xlarge': 2,
+    'c5d.18xlarge': 2,
+    'c5d.24xlarge': 4,
+    'c5d.metal': 4,
+    'c6gd.large': 1,
+    'c6gd.xlarge': 1,
+    'c6gd.2xlarge': 1,
+    'c6gd.4xlarge': 1,
+    'c6gd.8xlarge': 1,
+    'c6gd.12xlarge': 2,
+    'c6gd.16xlarge': 2,
+    'c6gd.metal': 2,
+    'm5d.large': 1,
+    'm5d.xlarge': 1,
+    'm5d.2xlarge': 1,
+    'm5d.4xlarge': 2,
+    'm5d.8xlarge': 2,
+    'm5d.12xlarge': 2,
+    'm5d.24xlarge': 4,
+    'm5d.metal': 4,
+    'm5ad.large': 1,
+    'm5ad.xlarge': 1,
+    'm5ad.2xlarge': 1,
+    'm5ad.4xlarge': 2,
+    'm5ad.8xlarge': 2,
+    'm5ad.12xlarge': 2,
+    'm5ad.16xlarge': 4,
+    'm5ad.24xlarge': 4,
+    'm6gd.large': 1,
+    'm6gd.xlarge': 1,
+    'm6gd.2xlarge': 1,
+    'm6gd.4xlarge': 1,
+    'm6gd.8xlarge': 1,
+    'm6gd.12xlarge': 2,
+    'm6gd.16xlarge': 2,
+    'm6gd.metal': 2,
+    'r5d.large': 1,
+    'r5d.xlarge': 1,
+    'r5d.2xlarge': 1,
+    'r5d.4xlarge': 2,
+    'r5d.12xlarge': 2,
+    'r5d.24xlarge': 4,
+    'z1d.large': 1,
+    'z1d.xlarge': 1,
+    'z1d.2xlarge': 1,
+    'z1d.3xlarge': 2,
+    'z1d.6xlarge': 1,
+    'z1d.12xlarge': 2,
+    'x1.16xlarge': 1,
+    'x1.32xlarge': 2,
+    'x1e.xlarge': 1,
+    'x1e.2xlarge': 1,
+    'x1e.4xlarge': 1,
+    'x1e.8xlarge': 1,
+    'x1e.16xlarge': 1,
+    'x1e.32xlarge': 2,
+    'f1.2xlarge': 1,
+    'f1.4xlarge': 1,
+    'f1.16xlarge': 4,
+    'p3dn.24xlarge': 2,
+    'm5d.metal': 4,
+    'c5d.metal': 4,
+    'm6gd.metal': 2,
+    'm5dn.large': 1,
+    'm5dn.xlarge': 1,
+    'm5dn.2xlarge': 1,
+    'm5dn.4xlarge': 2,
+    'm5dn.8xlarge': 2,
+    'm5dn.12xlarge': 2,
+    'm5dn.16xlarge': 4,
+    'm5dn.24xlarge': 4,
+    'm5dn.metal': 4,
+    'p4d.24xlarge': 8
+}
+
+
+def LocalDiskIsHDD(machine_type):
+  """Check whether the local disks use spinning magnetic storage."""
+  return machine_type.split('.')[0].lower() in LOCAL_HDD_PREFIXES
+
+
+def LocalDriveIsNvme(machine_type):
+  """Check if the machine type uses NVMe driver."""
+  return machine_type.split('.')[0].lower() not in NON_LOCAL_NVME_TYPES
+
+
+def EbsDriveIsNvme(machine_type):
+  """Check if the machine type uses NVMe driver."""
+  instance_family = machine_type.split('.')[0].lower()
+  return (instance_family not in NON_EBS_NVME_TYPES or
+          'metal' in machine_type)
+
+
+AWS = 'AWS'
+disk.RegisterDiskTypeMap(AWS, DISK_TYPE)
+
+
+class AwsDiskSpec(disk.BaseDiskSpec):
+  """Object holding the information needed to create an AwsDisk.
+
+  Attributes:
+    iops: None or int. IOPS for Provisioned IOPS (SSD) volumes in AWS.
+    throughput: None or int. Throughput for (SSD) volumes in AWS.
+  """
+
+  CLOUD = providers.AWS
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super(AwsDiskSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['aws_provisioned_iops'].present:
+      config_values['iops'] = flag_values.aws_provisioned_iops
+    if flag_values['aws_provisioned_throughput'].present:
+      config_values['throughput'] = flag_values.aws_provisioned_throughput
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(AwsDiskSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'iops': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        })
+    })
+    result.update({
+        'throughput': (option_decoders.IntDecoder, {
+            'default': None,
+            'none_ok': True
+        })
+    })
+    return result
+
+
+class AwsDisk(disk.BaseDisk):
+  """Object representing an Aws Disk."""
+
+  _lock = threading.Lock()
+  vm_devices = {}
+
+  def __init__(self, disk_spec, zone, machine_type):
+    super(AwsDisk, self).__init__(disk_spec)
+    self.iops = disk_spec.iops
+    self.throughput = disk_spec.throughput
+    self.id = None
+    self.zone = zone
+    self.region = util.GetRegionFromZone(zone)
+    self.device_letter = None
+    self.attached_vm_id = None
+    self.machine_type = machine_type
+    if self.disk_type != disk.LOCAL:
+      self.metadata.update(DISK_METADATA.get(self.disk_type, {}))
+    else:
+      self.metadata.update((LOCAL_HDD_METADATA
+                            if LocalDiskIsHDD(machine_type)
+                            else LOCAL_SSD_METADATA))
+    if self.iops:
+      self.metadata['iops'] = self.iops
+    if self.throughput:
+      self.metadata['throughput'] = self.throughput
+
+  def AssignDeviceLetter(self, letter_suggestion, nvme_boot_drive_index):
+    if (LocalDriveIsNvme(self.machine_type) and
+        EbsDriveIsNvme(self.machine_type)):
+      first_device_letter = 'b'
+      local_drive_number = ord(letter_suggestion) - ord(first_device_letter)
+      logging.info('local drive number is: %d', local_drive_number)
+      if local_drive_number < nvme_boot_drive_index:
+        self.device_letter = letter_suggestion
+      else:
+        # skip the boot drive
+        self.device_letter = chr(ord(letter_suggestion) + 1)
+    else:
+      self.device_letter = letter_suggestion
+
+  def _Create(self):
+    """Creates the disk."""
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-volume',
+        '--region=%s' % self.region,
+        '--size=%s' % self.disk_size,
+        '--volume-type=%s' % self.disk_type]
+    if not util.IsRegion(self.zone):
+      create_cmd.append('--availability-zone=%s' % self.zone)
+    if self.disk_type in [IO1, IO2]:
+      create_cmd.append('--iops=%s' % self.iops)
+    if self.disk_type == GP3 and self.iops:
+      create_cmd.append('--iops=%s' % self.iops)
+    if self.disk_type == GP3 and self.throughput:
+      create_cmd.append('--throughput=%s' % self.throughput)
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    response = json.loads(stdout)
+    self.id = response['VolumeId']
+    util.AddDefaultTags(self.id, self.region)
+
+  def _Delete(self):
+    """Deletes the disk."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-volume',
+        '--region=%s' % self.region,
+        '--volume-id=%s' % self.id]
+    logging.info('Deleting AWS volume %s. This may fail if the disk is not '
+                 'yet detached, but will be retried.', self.id)
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the disk exists."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-volumes',
+        '--region=%s' % self.region,
+        '--filter=Name=volume-id,Values=%s' % self.id]
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    volumes = response['Volumes']
+    assert len(volumes) < 2, 'Too many volumes.'
+    if not volumes:
+      return False
+    status = volumes[0]['State']
+    assert status in VOLUME_KNOWN_STATUSES, status
+    return status in VOLUME_EXISTS_STATUSES
+
+  @vm_util.Retry(
+      poll_interval=0.5,
+      log_errors=True,
+      retryable_exceptions=(AwsStateRetryableError,))
+  def _WaitForAttachedState(self):
+    """Returns if the state of the disk is attached.
+
+    Returns:
+      Whether the disk is in an attached state. If not, raises an
+      error.
+
+    Raises:
+      AwsUnknownStatusError: If an unknown status is returned from AWS.
+      AwsStateRetryableError: If the disk attach is pending. This is retried.
+    """
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-volumes',
+        '--region=%s' % self.region,
+        '--volume-ids=%s' % self.id,
+    ]
+
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    status = response['Volumes'][0]['Attachments'][0]['State']
+    if status.lower() != 'attached':
+      logging.info('Disk (id:%s) attaching to '
+                   'VM (id:%s) has status %s.',
+                   self.id, self.attached_vm_id, status)
+
+      raise AwsStateRetryableError()
+
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The AwsVirtualMachine instance to which the disk will be attached.
+    """
+    with self._lock:
+      self.attached_vm_id = vm.id
+      if self.attached_vm_id not in AwsDisk.vm_devices:
+        AwsDisk.vm_devices[self.attached_vm_id] = set(
+            string.ascii_lowercase)
+      self.device_letter = min(AwsDisk.vm_devices[self.attached_vm_id])
+      AwsDisk.vm_devices[self.attached_vm_id].remove(self.device_letter)
+
+    device_name = '/dev/xvdb%s' % self.device_letter
+    attach_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'attach-volume',
+        '--region=%s' % self.region,
+        '--instance-id=%s' % self.attached_vm_id,
+        '--volume-id=%s' % self.id,
+        '--device=%s' % device_name]
+    logging.info('Attaching AWS volume %s. This may fail if the disk is not '
+                 'ready, but will be retried.', self.id)
+    util.IssueRetryableCommand(attach_cmd)
+    self._WaitForAttachedState()
+
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    detach_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'detach-volume',
+        '--region=%s' % self.region,
+        '--instance-id=%s' % self.attached_vm_id,
+        '--volume-id=%s' % self.id]
+    util.IssueRetryableCommand(detach_cmd)
+
+    with self._lock:
+      assert self.attached_vm_id in AwsDisk.vm_devices
+      AwsDisk.vm_devices[self.attached_vm_id].add(self.device_letter)
+      self.attached_vm_id = None
+      self.device_letter = None
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside the VM."""
+    if self.disk_type == disk.LOCAL:
+      if LocalDriveIsNvme(self.machine_type):
+        first_device_letter = 'b'
+        return '/dev/nvme%sn1' % str(
+            ord(self.device_letter) - ord(first_device_letter))
+      return '/dev/xvd%s' % self.device_letter
+    else:
+      if EbsDriveIsNvme(self.machine_type):
+        first_device_letter = 'a'
+        # Modified by Cumulus - upstream code breaks on instances with EBS-only NVMe drives
+        if self.machine_type in NUM_LOCAL_VOLUMES:
+          return '/dev/nvme%sn1' % (
+              1 + NUM_LOCAL_VOLUMES[self.machine_type] +
+              ord(self.device_letter) - ord(first_device_letter))
+        else:
+          return '/dev/nvme%sn1' % (
+              1 + ord(self.device_letter) - ord(first_device_letter))
+        # End Cumulus mods
+      else:
+        return '/dev/xvdb%s' % self.device_letter
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dpb_emr.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dpb_emr.py
new file mode 100644
index 0000000..87e90eb
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dpb_emr.py
@@ -0,0 +1,409 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's EMR service.
+
+Clusters can be created and deleted.
+"""
+
+import collections
+import json
+import logging
+from typing import Optional
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_disk
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import aws_virtual_machine
+from perfkitbenchmarker.providers.aws import s3
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dpb_emr_release_label', None,
+                    'DEPRECATED use dpb_service.version.')
+
+INVALID_STATES = ['TERMINATED_WITH_ERRORS', 'TERMINATED']
+READY_CHECK_SLEEP = 30
+READY_CHECK_TRIES = 60
+READY_STATE = 'WAITING'
+JOB_WAIT_SLEEP = 30
+EMR_TIMEOUT = 14400
+
+disk_to_hdfs_map = {
+    aws_disk.ST1: 'HDD (ST1)',
+    aws_disk.GP2: 'SSD (GP2)',
+    disk.LOCAL: 'Local SSD',
+}
+
+DATAPROC_TO_EMR_CONF_FILES = {
+    # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html
+    'core': 'core-site',
+    'hdfs': 'hdfs-site',
+    # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-spark-configure.html
+    'spark': 'spark-defaults',
+}
+
+
+def _GetClusterConfiguration():
+  """Return a JSON string containing dpb_cluster_properties."""
+  properties = collections.defaultdict(lambda: {})
+  for entry in FLAGS.dpb_cluster_properties:
+    file, kv = entry.split(':')
+    key, value = kv.split('=')
+    if file not in DATAPROC_TO_EMR_CONF_FILES:
+      raise errors.Config.InvalidValue(
+          'Unsupported EMR configuration file "{}". '.format(file) +
+          'Please add it to aws_dpb_emr.DATAPROC_TO_EMR_CONF_FILES.')
+    properties[DATAPROC_TO_EMR_CONF_FILES[file]][key] = value
+  json_conf = []
+  for file, props in properties.items():
+    json_conf.append({
+        # https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-configure-apps.html
+        'Classification': file,
+        'Properties': props,
+    })
+  return json.dumps(json_conf)
+
+
+class EMRRetryableException(Exception):
+  pass
+
+
+class AwsDpbEmr(dpb_service.BaseDpbService):
+  """Object representing a AWS EMR cluster.
+
+  Attributes:
+    cluster_id: ID of the cluster.
+    project: ID of the project in which the cluster is being launched.
+    dpb_service_type: Set to 'emr'.
+    cmd_prefix: Setting default prefix for the emr commands (region optional).
+    network: Dedicated network for the EMR cluster
+    storage_service: Region specific instance of S3 for bucket management.
+    bucket_to_delete: Cluster associated bucket to be cleaned up.
+    dpb_version: EMR version to use.
+  """
+
+  CLOUD = providers.AWS
+  SERVICE_TYPE = 'emr'
+
+  def __init__(self, dpb_service_spec):
+    super(AwsDpbEmr, self).__init__(dpb_service_spec)
+    self.dpb_service_type = AwsDpbEmr.SERVICE_TYPE
+    self.project = None
+    self.cmd_prefix = list(util.AWS_PREFIX)
+    if self.dpb_service_zone:
+      self.region = util.GetRegionFromZone(self.dpb_service_zone)
+    else:
+      raise errors.Setup.InvalidSetupError(
+          'dpb_service_zone must be provided, for provisioning.')
+    self.cmd_prefix += ['--region', self.region]
+    self.network = aws_network.AwsNetwork.GetNetworkFromNetworkSpec(
+        aws_network.AwsNetworkSpec(zone=self.dpb_service_zone))
+    self.storage_service = s3.S3Service()
+    self.storage_service.PrepareService(self.region)
+    self.persistent_fs_prefix = 's3://'
+    self.bucket_to_delete = None
+    self.dpb_version = FLAGS.dpb_emr_release_label or self.dpb_version
+    self._cluster_create_time = None
+    if not self.dpb_version:
+      raise errors.Setup.InvalidSetupError(
+          'dpb_service.version must be provided.')
+
+  def GetClusterCreateTime(self) -> Optional[float]:
+    """Returns the cluster creation time.
+
+    On this implementation, the time returned is based on the timestamps
+    reported by the EMR API (which is stored in the _cluster_create_time
+    attribute).
+
+    Returns:
+      A float representing the creation time in seconds or None.
+    """
+    return self._cluster_create_time
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    del benchmark_config  # Unused
+
+  @property
+  def security_group_id(self):
+    """Returns the security group ID of this Cluster."""
+    return self.network.regional_network.vpc.default_security_group_id
+
+  def _CreateDependencies(self):
+    """Set up the ssh key."""
+    super(AwsDpbEmr, self)._CreateDependencies()
+    aws_virtual_machine.AwsKeyFileManager.ImportKeyfile(self.region)
+
+  def _Create(self):
+    """Creates the cluster."""
+    name = 'pkb_' + FLAGS.run_uri
+
+    # Set up ebs details if disk_spec is present in the config
+    ebs_configuration = None
+    if self.spec.worker_group.disk_spec:
+      # Make sure nothing we are ignoring is included in the disk spec
+      assert self.spec.worker_group.disk_spec.device_path is None
+      assert self.spec.worker_group.disk_spec.disk_number is None
+      assert self.spec.worker_group.disk_spec.iops is None
+      self.dpb_hdfs_type = disk_to_hdfs_map[
+          self.spec.worker_group.disk_spec.disk_type]
+      if self.spec.worker_group.disk_spec.disk_type != disk.LOCAL:
+        ebs_configuration = {'EbsBlockDeviceConfigs': [
+            {'VolumeSpecification': {
+                'SizeInGB': self.spec.worker_group.disk_spec.disk_size,
+                'VolumeType': self.spec.worker_group.disk_spec.disk_type},
+             'VolumesPerInstance': self.spec.worker_group.disk_count}]}
+
+    # Create the specification for the master and the worker nodes
+    instance_groups = []
+    core_instances = {'InstanceCount': self.spec.worker_count,
+                      'InstanceGroupType': 'CORE',
+                      'InstanceType':
+                          self.spec.worker_group.vm_spec.machine_type}
+    if ebs_configuration:
+      core_instances.update({'EbsConfiguration': ebs_configuration})
+
+    master_instance = {'InstanceCount': 1,
+                       'InstanceGroupType': 'MASTER',
+                       'InstanceType':
+                           self.spec.worker_group.vm_spec.machine_type}
+    if ebs_configuration:
+      master_instance.update({'EbsConfiguration': ebs_configuration})
+
+    instance_groups.append(core_instances)
+    instance_groups.append(master_instance)
+
+    # Spark SQL needs to access Hive
+    cmd = self.cmd_prefix + ['emr', 'create-cluster', '--name', name,
+                             '--release-label', self.dpb_version,
+                             '--use-default-roles',
+                             '--instance-groups',
+                             json.dumps(instance_groups),
+                             '--application', 'Name=Spark',
+                             'Name=Hadoop', 'Name=Hive',
+                             '--log-uri', self.base_dir]
+
+    ec2_attributes = [
+        'KeyName=' + aws_virtual_machine.AwsKeyFileManager.GetKeyNameForRun(),
+        'SubnetId=' + self.network.subnet.id,
+        # Place all VMs in default security group for simplicity and speed of
+        # provisioning
+        'EmrManagedMasterSecurityGroup=' + self.security_group_id,
+        'EmrManagedSlaveSecurityGroup=' + self.security_group_id,
+    ]
+    cmd += ['--ec2-attributes', ','.join(ec2_attributes)]
+
+    if FLAGS.dpb_cluster_properties:
+      cmd += ['--configurations', _GetClusterConfiguration()]
+
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    self.cluster_id = result['ClusterId']
+    logging.info('Cluster created with id %s', self.cluster_id)
+    for tag_key, tag_value in util.MakeDefaultTags().items():
+      self._AddTag(tag_key, tag_value)
+
+  def _AddTag(self, key, value):
+    cmd = self.cmd_prefix + ['emr', 'add-tags',
+                             '--resource-id', self.cluster_id,
+                             '--tag',
+                             '{}={}'.format(key, value)]
+    vm_util.IssueCommand(cmd)
+
+  def _Delete(self):
+    if self.cluster_id:
+      delete_cmd = self.cmd_prefix + ['emr',
+                                      'terminate-clusters',
+                                      '--cluster-ids',
+                                      self.cluster_id]
+      vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _DeleteDependencies(self):
+    super(AwsDpbEmr, self)._DeleteDependencies()
+    aws_virtual_machine.AwsKeyFileManager.DeleteKeyfile(self.region)
+
+  def _Exists(self):
+    """Check to see whether the cluster exists."""
+    if not self.cluster_id:
+      return False
+    cmd = self.cmd_prefix + ['emr',
+                             'describe-cluster',
+                             '--cluster-id',
+                             self.cluster_id]
+    stdout, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      return False
+    result = json.loads(stdout)
+    if result['Cluster']['Status']['State'] in INVALID_STATES:
+      return False
+    else:
+      return True
+
+  def _IsReady(self):
+    """Check to see if the cluster is ready."""
+    logging.info('Checking _Ready cluster: %s', self.cluster_id)
+    cmd = self.cmd_prefix + ['emr',
+                             'describe-cluster', '--cluster-id',
+                             self.cluster_id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    # TODO(saksena): Handle error outcomees when spinning up emr clusters
+    is_ready = result['Cluster']['Status']['State'] == READY_STATE
+    if is_ready:
+      self._cluster_create_time = self._ParseClusterCreateTime(result)
+    return is_ready
+
+  @classmethod
+  def _ParseClusterCreateTime(cls, data) -> Optional[float]:
+    """Parses the cluster create time from an API response dict."""
+    creation_ts = None
+    ready_ts = None
+    try:
+      creation_ts = data['Cluster']['Status']['Timeline']['CreationDateTime']
+      ready_ts = data['Cluster']['Status']['Timeline']['ReadyDateTime']
+      return ready_ts - creation_ts
+    except (LookupError, TypeError):
+      return None
+
+  def _GetCompletedJob(self, job_id):
+    """See base class."""
+    cmd = self.cmd_prefix + [
+        'emr', 'describe-step', '--cluster-id', self.cluster_id, '--step-id',
+        job_id
+    ]
+    stdout, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode:
+      if 'ThrottlingException' in stderr:
+        logging.warning('Rate limited while polling EMR step:\n%s\nRetrying.',
+                        stderr)
+        return None
+      else:
+        raise errors.VmUtil.IssueCommandError(
+            f'Getting step status failed:\n{stderr}')
+    result = json.loads(stdout)
+    state = result['Step']['Status']['State']
+    if state == 'FAILED':
+      raise dpb_service.JobSubmissionError(
+          result['Step']['Status']['FailureDetails'])
+    if state == 'COMPLETED':
+      pending_time = result['Step']['Status']['Timeline']['CreationDateTime']
+      start_time = result['Step']['Status']['Timeline']['StartDateTime']
+      end_time = result['Step']['Status']['Timeline']['EndDateTime']
+      return dpb_service.JobResult(
+          run_time=end_time - start_time,
+          pending_time=start_time - pending_time)
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=5,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_stdout_file=None,
+                job_type=None,
+                properties=None):
+    """See base class."""
+    if job_arguments:
+      # Escape commas in arguments
+      job_arguments = (arg.replace(',', '\\,') for arg in job_arguments)
+
+    all_properties = self.GetJobProperties()
+    all_properties.update(properties or {})
+
+    if job_type == 'hadoop':
+      if not (jarfile or classname):
+        raise ValueError('You must specify jarfile or classname.')
+      if jarfile and classname:
+        raise ValueError('You cannot specify both jarfile and classname.')
+      arg_list = []
+      # Order is important
+      if classname:
+        # EMR does not support passing classnames as jobs. Instead manually
+        # invoke `hadoop CLASSNAME` using command-runner.jar
+        jarfile = 'command-runner.jar'
+        arg_list = ['hadoop', classname]
+      # Order is important
+      arg_list += ['-D{}={}'.format(k, v) for k, v in all_properties.items()]
+      if job_arguments:
+        arg_list += job_arguments
+      arg_spec = 'Args=[' + ','.join(arg_list) + ']'
+      step_list = ['Jar=' + jarfile, arg_spec]
+    elif job_type == self.SPARK_JOB_TYPE:
+      arg_list = []
+      if job_files:
+        arg_list += ['--files', ','.join(job_files)]
+      if job_jars:
+        arg_list += ['--jars', ','.join(job_jars)]
+      for k, v in all_properties.items():
+        arg_list += ['--conf', '{}={}'.format(k, v)]
+      # jarfile must be last before args
+      arg_list += ['--class', classname, jarfile]
+      if job_arguments:
+        arg_list += job_arguments
+      arg_spec = '[' + ','.join(arg_list) + ']'
+      step_type_spec = 'Type=Spark'
+      step_list = [step_type_spec, 'Args=' + arg_spec]
+    elif job_type == self.PYSPARK_JOB_TYPE:
+      arg_list = []
+      if job_files:
+        arg_list += ['--files', ','.join(job_files)]
+      if job_jars:
+        arg_list += ['--jars', ','.join(job_jars)]
+      for k, v in all_properties.items():
+        arg_list += ['--conf', '{}={}'.format(k, v)]
+      # pyspark_file must be last before args
+      arg_list += [pyspark_file]
+      if job_arguments:
+        arg_list += job_arguments
+      arg_spec = 'Args=[{}]'.format(','.join(arg_list))
+      step_list = ['Type=Spark', arg_spec]
+    elif job_type == self.SPARKSQL_JOB_TYPE:
+      assert not job_arguments
+      arg_list = [query_file]
+      jar_spec = 'Jar="command-runner.jar"'
+      for k, v in all_properties.items():
+        arg_list += ['--conf', '{}={}'.format(k, v)]
+      arg_spec = 'Args=[spark-sql,-f,{}]'.format(','.join(arg_list))
+      step_list = [jar_spec, arg_spec]
+
+    step_string = ','.join(step_list)
+
+    step_cmd = self.cmd_prefix + ['emr',
+                                  'add-steps',
+                                  '--cluster-id',
+                                  self.cluster_id,
+                                  '--steps',
+                                  step_string]
+    stdout, _, _ = vm_util.IssueCommand(step_cmd)
+    result = json.loads(stdout)
+    step_id = result['StepIds'][0]
+    return self._WaitForJob(step_id, EMR_TIMEOUT, job_poll_interval)
+
+  def DistributedCopy(self, source, destination):
+    """Method to copy data using a distributed job on the cluster."""
+    job_arguments = ['s3-dist-cp']
+    job_arguments.append('--src={}'.format(source))
+    job_arguments.append('--dest={}'.format(destination))
+    return self.SubmitJob(
+        'command-runner.jar',
+        job_arguments=job_arguments,
+        job_type=dpb_service.BaseDpbService.HADOOP_JOB_TYPE)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dynamodb.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dynamodb.py
new file mode 100644
index 0000000..35282ea
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_dynamodb.py
@@ -0,0 +1,494 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS' dynamodb tables.
+
+Tables can be created and deleted.
+"""
+
+import json
+import logging
+from typing import Any, Collection, Dict, List, Optional, Tuple, Sequence
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import non_relational_db
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string(
+    'aws_dynamodb_primarykey', None,
+    'The primaryKey of dynamodb table. This switches to sortkey if using sort.'
+    'If testing GSI/LSI, use the range keyname of the index you want to test.'
+    'Defaults to primary_key')
+flags.DEFINE_boolean(
+    'aws_dynamodb_use_sort', None,
+    'Determine whether to use sort key or not. Defaults to False.')
+flags.DEFINE_string(
+    'aws_dynamodb_sortkey', None,
+    'The sortkey of dynamodb table. This switches to primarykey if using sort.'
+    'If testing GSI/LSI, use the primary keyname of the index you want to test.'
+    'Defaults to sort_key.')
+flags.DEFINE_enum(
+    'aws_dynamodb_attributetype', None, ['S', 'N', 'B'],
+    'The type of attribute, default to S (String).'
+    'Alternates are N (Number) and B (Binary).'
+    'Defaults to S.')
+flags.DEFINE_integer('aws_dynamodb_read_capacity', None,
+                     'Set RCU for dynamodb table. Defaults to 25.')
+flags.DEFINE_integer('aws_dynamodb_write_capacity', None,
+                     'Set WCU for dynamodb table. Defaults to 25.')
+flags.DEFINE_integer('aws_dynamodb_lsi_count', None,
+                     'Set amount of Local Secondary Indexes. Only set 0-5.'
+                     'Defaults to 0.')
+flags.DEFINE_integer('aws_dynamodb_gsi_count', None,
+                     'Set amount of Global Secondary Indexes. Only set 0-5.'
+                     'Defaults to 0.')
+
+# Throughput constants
+_FREE_TIER_RCU = 25
+_FREE_TIER_WCU = 25
+
+_DEFAULT_ZONE = 'us-east-1b'
+
+
+class DynamoDbSpec(non_relational_db.BaseNonRelationalDbSpec):
+  """Configurable options of a DynamoDB instance."""
+
+  SERVICE_TYPE = non_relational_db.DYNAMODB
+
+  table_name: str
+  zone: str
+  rcu: int
+  wcu: int
+  primary_key: str
+  sort_key: str
+  attribute_type: str
+  lsi_count: int
+  gsi_count: int
+  use_sort: bool
+
+  def __init__(self, component_full_name, flag_values, **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes / constructor args for each configurable option."""
+    result = super()._GetOptionDecoderConstructions()
+    none_ok = {'default': None, 'none_ok': False}
+    result.update({
+        'table_name': (option_decoders.StringDecoder, none_ok),
+        'zone': (option_decoders.StringDecoder, none_ok),
+        'rcu': (option_decoders.IntDecoder, none_ok),
+        'wcu': (option_decoders.IntDecoder, none_ok),
+        'primary_key': (option_decoders.StringDecoder, none_ok),
+        'sort_key': (option_decoders.StringDecoder, none_ok),
+        'attribute_type': (option_decoders.StringDecoder, none_ok),
+        'lsi_count': (option_decoders.IntDecoder, none_ok),
+        'gsi_count': (option_decoders.IntDecoder, none_ok),
+        'use_sort': (option_decoders.BooleanDecoder, none_ok),
+    })
+    return result
+
+  @classmethod
+  def _ValidateConfig(cls, config_values) -> None:
+    if 'lsi_count' in config_values:
+      if not -1 < config_values['lsi_count'] < 6:
+        raise errors.Config.InvalidValue('lsi_count must be from 0-5')
+      if (not config_values.get('use_sort', False) and
+          config_values['lsi_count'] != 0):
+        raise errors.Config.InvalidValue('lsi_count requires use_sort=True')
+    if not -1 < config_values.get('gsi_count', 0) < 6:
+      raise errors.Config.InvalidValue('gsi_count must be from 0-5')
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values) -> None:
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super()._ApplyFlags(config_values, flag_values)
+    option_name_from_flag = {
+        'aws_dynamodb_read_capacity': 'rcu',
+        'aws_dynamodb_write_capacity': 'wcu',
+        'aws_dynamodb_primarykey': 'primary_key',
+        'aws_dynamodb_sortkey': 'sort_key',
+        'aws_dynamodb_attributetype': 'attribute_type',
+        'aws_dynamodb_lsi_count': 'lsi_count',
+        'aws_dynamodb_gsi_count': 'gsi_count',
+        'aws_dynamodb_use_sort': 'use_sort',
+    }
+    for flag_name, option_name in option_name_from_flag.items():
+      if flag_values[flag_name].present:
+        config_values[option_name] = flag_values[flag_name].value
+
+    # Handle the zone flag.
+    for zone_flag_name in ['zone', 'zones']:
+      if flag_values[zone_flag_name].present:
+        config_values['zone'] = flag_values[zone_flag_name].value[0]
+
+    cls._ValidateConfig(config_values)
+
+  def __repr__(self) -> str:
+    return str(self.__dict__)
+
+
+class AwsDynamoDBInstance(non_relational_db.BaseNonRelationalDb):
+  """Class for working with DynamoDB."""
+  SERVICE_TYPE = non_relational_db.DYNAMODB
+
+  def __init__(self,
+               table_name: Optional[str] = None,
+               zone: Optional[str] = None,
+               rcu: Optional[int] = None,
+               wcu: Optional[int] = None,
+               primary_key: Optional[str] = None,
+               sort_key: Optional[str] = None,
+               attribute_type: Optional[str] = None,
+               lsi_count: Optional[int] = None,
+               gsi_count: Optional[int] = None,
+               use_sort: Optional[bool] = None,
+               **kwargs):
+    super(AwsDynamoDBInstance, self).__init__(**kwargs)
+    self.table_name = table_name or f'pkb-{FLAGS.run_uri}'
+    self.zone = zone or _DEFAULT_ZONE
+    self.region = util.GetRegionFromZone(self.zone)
+    self.resource_arn: str = None  # Set during the _Exists() call.
+
+    self.rcu = rcu or _FREE_TIER_RCU
+    self.wcu = wcu or _FREE_TIER_WCU
+    self.throughput = (
+        f'ReadCapacityUnits={self.rcu},WriteCapacityUnits={self.wcu}')
+
+    self.primary_key = primary_key or 'primary_key'
+    self.sort_key = sort_key or 'sort_key'
+    self.use_sort = use_sort or False
+    self.attribute_type = attribute_type or 'S'
+
+    self.lsi_count = lsi_count or 0
+    self.lsi_indexes = self._CreateLocalSecondaryIndex()
+    self.gsi_count = gsi_count or 0
+    self.gsi_indexes = self._CreateGlobalSecondaryIndex()
+
+  @classmethod
+  def FromSpec(cls, spec: DynamoDbSpec) -> 'AwsDynamoDBInstance':
+    return cls(
+        table_name=spec.table_name,
+        zone=spec.zone,
+        rcu=spec.rcu,
+        wcu=spec.wcu,
+        primary_key=spec.primary_key,
+        sort_key=spec.sort_key,
+        attribute_type=spec.attribute_type,
+        lsi_count=spec.lsi_count,
+        gsi_count=spec.gsi_count,
+        use_sort=spec.use_sort,
+        enable_freeze_restore=spec.enable_freeze_restore,
+        create_on_restore_error=spec.create_on_restore_error,
+        delete_on_freeze_error=spec.delete_on_freeze_error)
+
+  def _CreateLocalSecondaryIndex(self) -> List[str]:
+    """Used to create local secondary indexes."""
+    lsi_items = []
+    lsi_entry = []
+    attr_list = []
+    for lsi in range(0, self.lsi_count):
+      lsi_item = json.dumps({
+          'IndexName': f'lsiidx{str(lsi)}',
+          'KeySchema': [{
+              'AttributeName': self.primary_key,
+              'KeyType': 'HASH'
+          }, {
+              'AttributeName': f'lattr{str(lsi)}',
+              'KeyType': 'RANGE'
+          }],
+          'Projection': {
+              'ProjectionType': 'KEYS_ONLY'
+          }
+      })
+      lsi_entry.append(lsi_item)
+      attr_list.append(
+          json.dumps({
+              'AttributeName': f'lattr{str(lsi)}',
+              'AttributeType': self.attribute_type
+          }))
+    lsi_items.append('[' + ','.join(lsi_entry) + ']')
+    lsi_items.append(','.join(attr_list))
+    return lsi_items
+
+  def _CreateGlobalSecondaryIndex(self) -> List[str]:
+    """Used to create global secondary indexes."""
+    gsi_items = []
+    gsi_entry = []
+    attr_list = []
+    for gsi in range(0, self.gsi_count):
+      gsi_item = json.dumps({
+          'IndexName': f'gsiidx{str(gsi)}',
+          'KeySchema': [{
+              'AttributeName': f'gsikey{str(gsi)}',
+              'KeyType': 'HASH'
+          }, {
+              'AttributeName': f'gattr{str(gsi)}',
+              'KeyType': 'RANGE'
+          }],
+          'Projection': {
+              'ProjectionType': 'KEYS_ONLY'
+          },
+          'ProvisionedThroughput': {
+              'ReadCapacityUnits': 5,
+              'WriteCapacityUnits': 5
+          }
+      })
+      gsi_entry.append(gsi_item)
+      attr_list.append(
+          json.dumps({
+              'AttributeName': f'gattr{str(gsi)}',
+              'AttributeType': self.attribute_type
+          }))
+      attr_list.append(
+          json.dumps({
+              'AttributeName': f'gsikey{str(gsi)}',
+              'AttributeType': self.attribute_type
+          }))
+    gsi_items.append('[' + ','.join(gsi_entry) + ']')
+    gsi_items.append(','.join(attr_list))
+    return gsi_items
+
+  def _SetAttrDefnArgs(self, cmd: List[str], args: Sequence[str]) -> None:
+    attr_def_args = _MakeArgs(args)
+    cmd[10] = f'[{attr_def_args}]'
+    logging.info('adding to --attribute-definitions')
+
+  def _SetKeySchemaArgs(self, cmd: List[str], args: Sequence[str]) -> None:
+    key_schema_args = _MakeArgs(args)
+    cmd[12] = f'[{key_schema_args}]'
+    logging.info('adding to --key-schema')
+
+  def _PrimaryKeyJson(self) -> str:
+    return json.dumps({'AttributeName': self.primary_key, 'KeyType': 'HASH'})
+
+  def _PrimaryAttrsJson(self) -> str:
+    return json.dumps({
+        'AttributeName': self.primary_key,
+        'AttributeType': self.attribute_type
+    })
+
+  def _SortAttrsJson(self) -> str:
+    return json.dumps({
+        'AttributeName': self.sort_key,
+        'AttributeType': self.attribute_type
+    })
+
+  def _SortKeyJson(self) -> str:
+    return json.dumps({'AttributeName': self.sort_key, 'KeyType': 'RANGE'})
+
+  def _Create(self) -> None:
+    """Creates the dynamodb table."""
+    cmd = util.AWS_PREFIX + [
+        'dynamodb',
+        'create-table',
+        '--region', self.region,
+        '--table-name', self.table_name,
+        '--attribute-definitions', self._PrimaryAttrsJson(),
+        '--key-schema', self._PrimaryKeyJson(),
+        '--provisioned-throughput', self.throughput,
+        '--tags'
+    ] + util.MakeFormattedDefaultTags()
+    if self.lsi_count > 0 and self.use_sort:
+      self._SetAttrDefnArgs(cmd, [
+          self._PrimaryAttrsJson(),
+          self._SortAttrsJson(), self.lsi_indexes[1]
+      ])
+      cmd.append('--local-secondary-indexes')
+      cmd.append(self.lsi_indexes[0])
+      self._SetKeySchemaArgs(
+          cmd, [self._PrimaryKeyJson(),
+                self._SortKeyJson()])
+    elif self.use_sort:
+      self._SetAttrDefnArgs(
+          cmd, [self._PrimaryAttrsJson(),
+                self._SortAttrsJson()])
+      self._SetKeySchemaArgs(
+          cmd, [self._PrimaryKeyJson(),
+                self._SortKeyJson()])
+    if self.gsi_count > 0:
+      self._SetAttrDefnArgs(
+          cmd, cmd[10].strip('[]').split(',') + [self.gsi_indexes[1]])
+      cmd.append('--global-secondary-indexes')
+      cmd.append(self.gsi_indexes[0])
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to create table! %s', stderror)
+
+  def _Delete(self) -> None:
+    """Deletes the dynamodb table."""
+    cmd = util.AWS_PREFIX + [
+        'dynamodb',
+        'delete-table',
+        '--region', self.region,
+        '--table-name', self.table_name]
+    logging.info('Attempting deletion: ')
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsReady(self) -> bool:
+    """Check if dynamodb table is ready."""
+    logging.info('Getting table ready status for %s', self.table_name)
+    cmd = util.AWS_PREFIX + [
+        'dynamodb',
+        'describe-table',
+        '--region', self.region,
+        '--table-name', self.table_name]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    return result['Table']['TableStatus'] == 'ACTIVE'
+
+  def _Exists(self) -> bool:
+    """Returns true if the dynamodb table exists."""
+    logging.info('Checking if table %s exists', self.table_name)
+    result = self._DescribeTable()
+    if not result:
+      return False
+    if not self.resource_arn:
+      self.resource_arn = result['TableArn']
+    return True
+
+  def _DescribeTable(self) -> Dict[Any, Any]:
+    """Calls describe on dynamodb table."""
+    cmd = util.AWS_PREFIX + [
+        'dynamodb',
+        'describe-table',
+        '--region', self.region,
+        '--table-name', self.table_name]
+    stdout, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find table %s, %s', self.table_name, stderr)
+      return {}
+    return json.loads(stdout)['Table']
+
+  def GetEndPoint(self) -> str:
+    return f'http://dynamodb.{self.region}.amazonaws.com'
+
+  def GetResourceMetadata(self) -> Dict[str, Any]:
+    """Returns a dict containing metadata about the dynamodb instance.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    return {
+        'aws_dynamodb_primarykey': self.primary_key,
+        'aws_dynamodb_use_sort': self.use_sort,
+        'aws_dynamodb_sortkey': self.sort_key,
+        'aws_dynamodb_attributetype': self.attribute_type,
+        'aws_dynamodb_read_capacity': self.rcu,
+        'aws_dynamodb_write_capacity': self.wcu,
+        'aws_dynamodb_lsi_count': self.lsi_count,
+        'aws_dynamodb_gsi_count': self.gsi_count,
+    }
+
+  def SetThroughput(self,
+                    rcu: Optional[int] = None,
+                    wcu: Optional[int] = None) -> None:
+    """Updates the table's rcu and wcu."""
+    if not rcu:
+      rcu = self.rcu
+    if not wcu:
+      wcu = self.wcu
+    cmd = util.AWS_PREFIX + [
+        'dynamodb', 'update-table',
+        '--table-name', self.table_name,
+        '--region', self.region,
+        '--provisioned-throughput',
+        f'ReadCapacityUnits={rcu},WriteCapacityUnits={wcu}',
+    ]
+    logging.info('Setting %s table provisioned throughput to %s rcu and %s wcu',
+                 self.table_name, rcu, wcu)
+    util.IssueRetryableCommand(cmd)
+    while not self._IsReady():
+      continue
+
+  def _GetThroughput(self) -> Tuple[int, int]:
+    """Returns the current (rcu, wcu) of the table."""
+    output = self._DescribeTable()['ProvisionedThroughput']
+    return output['ReadCapacityUnits'], output['WriteCapacityUnits']
+
+  @vm_util.Retry(poll_interval=1, max_retries=3,
+                 retryable_exceptions=(errors.Resource.CreationError))
+  def _GetTagResourceCommand(self, tags: Sequence[str]) -> Sequence[str]:
+    """Returns the tag-resource command with the provided tags.
+
+    This function will retry up to max_retries to allow for instance creation to
+    finish.
+
+    Args:
+      tags: List of formatted tags to append to the instance.
+
+    Returns:
+      A list of arguments for the 'tag-resource' command.
+
+    Raises:
+      errors.Resource.CreationError: If the current instance does not exist.
+    """
+    if not self._Exists():
+      raise errors.Resource.CreationError(
+          f'Cannot get resource arn of non-existent instance {self.table_name}')
+    return util.AWS_PREFIX + [
+        'dynamodb', 'tag-resource', '--resource-arn', self.resource_arn,
+        '--region', self.region, '--tags'
+    ] + list(tags)
+
+  def UpdateWithDefaultTags(self) -> None:
+    """Adds default tags to the table."""
+    tags = util.MakeFormattedDefaultTags()
+    cmd = self._GetTagResourceCommand(tags)
+    logging.info('Setting default tags on table %s', self.table_name)
+    util.IssueRetryableCommand(cmd)
+
+  def UpdateTimeout(self, timeout_minutes: int) -> None:
+    """Updates the timeout associated with the table."""
+    tags = util.MakeFormattedDefaultTags(timeout_minutes)
+    cmd = self._GetTagResourceCommand(tags)
+    logging.info('Updating timeout tags on table %s with timeout minutes %s',
+                 self.table_name, timeout_minutes)
+    util.IssueRetryableCommand(cmd)
+
+  def _Freeze(self) -> None:
+    """See base class.
+
+    Lowers provisioned throughput to free-tier levels. There is a limit to how
+    many times throughput on a table may by lowered per day. See:
+    https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/ProvisionedThroughput.html.
+    """
+    # Check that we actually need to lower before issuing command.
+    rcu, wcu = self._GetThroughput()
+    if rcu > _FREE_TIER_RCU or wcu > _FREE_TIER_WCU:
+      logging.info('(rcu=%s, wcu=%s) is higher than free tier.', rcu, wcu)
+      self.SetThroughput(rcu=_FREE_TIER_RCU, wcu=_FREE_TIER_WCU)
+
+  def _Restore(self) -> None:
+    """See base class.
+
+    Restores provisioned throughput back to benchmarking levels.
+    """
+    self.SetThroughput(self.rcu, self.wcu)
+
+
+def _MakeArgs(args: Collection[str]) -> str:
+  return ','.join(args)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py
new file mode 100644
index 0000000..78d51eb
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticache_redis.py
@@ -0,0 +1,212 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS' elasticache redis clusters.
+
+Clusters can be created and deleted.
+"""
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+REDIS_VERSION_MAPPING = {'redis_3_2': '3.2.10',
+                         'redis_4_0': '4.0.10',
+                         'redis_5_0': '5.0.6',
+                         'redis_6_x': '6.x'}
+
+
+class ElastiCacheRedis(managed_memory_store.BaseManagedMemoryStore):
+  """Object representing a AWS Elasticache redis instance."""
+
+  CLOUD = providers.AWS
+  MEMORY_STORE = managed_memory_store.REDIS
+
+  def __init__(self, spec):
+    super(ElastiCacheRedis, self).__init__(spec)
+    self.subnet_group_name = 'subnet-%s' % self.name
+    self.version = REDIS_VERSION_MAPPING[spec.config.cloud_redis.redis_version]
+    self.node_type = FLAGS.cache_node_type
+    self.redis_region = FLAGS.cloud_redis_region
+    self.failover_zone = FLAGS.aws_elasticache_failover_zone
+    self.failover_subnet = None
+    self.failover_style = FLAGS.redis_failover_style
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    if (FLAGS.managed_memory_store_version and
+        FLAGS.managed_memory_store_version not in
+        managed_memory_store.REDIS_VERSIONS):
+      raise errors.Config.InvalidValue('Invalid Redis version.')
+    if FLAGS.redis_failover_style in [
+        managed_memory_store.Failover.FAILOVER_NONE,
+        managed_memory_store.Failover.FAILOVER_SAME_ZONE]:
+      if FLAGS.aws_elasticache_failover_zone:
+        raise errors.Config.InvalidValue(
+            'The aws_elasticache_failover_zone flag is ignored. '
+            'There is no need for a failover zone when there is no failover. '
+            'Same zone failover will fail over to the same zone.')
+    else:
+      if (not FLAGS.aws_elasticache_failover_zone or
+          FLAGS.aws_elasticache_failover_zone[:-1] != FLAGS.cloud_redis_region):
+        raise errors.Config.InvalidValue(
+            'Invalid failover zone. '
+            'A failover zone in %s must be specified. ' %
+            FLAGS.cloud_redis_region)
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the instance.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = {
+        'cloud_redis_failover_style':
+            self.failover_style,
+        'cloud_redis_version':
+            managed_memory_store.ParseReadableVersion(self.version),
+        'cloud_redis_node_type':
+            self.node_type,
+        'cloud_redis_region':
+            self.redis_region,
+        'cloud_redis_primary_zone':
+            self.spec.vms[0].zone,
+        'cloud_redis_failover_zone':
+            self.failover_zone,
+    }
+    return result
+
+  def _CreateDependencies(self):
+    """Create the subnet dependencies."""
+    subnet_id = self.spec.vms[0].network.subnet.id
+    cmd = ['aws', 'elasticache', 'create-cache-subnet-group',
+           '--region', self.redis_region,
+           '--cache-subnet-group-name', self.subnet_group_name,
+           '--cache-subnet-group-description', '"PKB redis benchmark subnet"',
+           '--subnet-ids', subnet_id]
+
+    if self.failover_style == (
+        managed_memory_store.Failover.FAILOVER_SAME_REGION):
+      regional_network = self.spec.vms[0].network.regional_network
+      vpc_id = regional_network.vpc.id
+      cidr = regional_network.vpc.NextSubnetCidrBlock()
+      self.failover_subnet = aws_network.AwsSubnet(
+          self.failover_zone, vpc_id, cidr_block=cidr)
+      self.failover_subnet.Create()
+      cmd += [self.failover_subnet.id]
+
+    vm_util.IssueCommand(cmd)
+
+  def _DeleteDependencies(self):
+    """Delete the subnet dependencies."""
+    cmd = ['aws', 'elasticache', 'delete-cache-subnet-group',
+           '--region=%s' % self.redis_region,
+           '--cache-subnet-group-name=%s' % self.subnet_group_name]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    if self.failover_subnet:
+      self.failover_subnet.Delete()
+
+  def _Create(self):
+    """Creates the cluster."""
+    cmd = ['aws', 'elasticache', 'create-replication-group',
+           '--engine', 'redis',
+           '--engine-version', self.version,
+           '--replication-group-id', self.name,
+           '--replication-group-description', self.name,
+           '--region', self.redis_region,
+           '--cache-node-type', self.node_type,
+           '--cache-subnet-group-name', self.subnet_group_name,
+           '--preferred-cache-cluster-a-zs', self.spec.vms[0].zone]
+
+    if self.failover_style == managed_memory_store.Failover.FAILOVER_SAME_REGION:
+      cmd += [self.failover_zone]
+
+    elif self.failover_style == managed_memory_store.Failover.FAILOVER_SAME_ZONE:
+      cmd += [self.spec.vms[0].zone]
+
+    if self.failover_style != managed_memory_store.Failover.FAILOVER_NONE:
+      cmd += ['--automatic-failover-enabled',
+              '--num-cache-clusters', '2']
+
+    cmd += ['--tags']
+    cmd += util.MakeFormattedDefaultTags()
+    _, stderr, _ = vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    if 'InsufficientCacheClusterCapacity' in stderr:
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(stderr)
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    cmd = ['aws', 'elasticache', 'delete-replication-group',
+           '--region', self.redis_region,
+           '--replication-group-id', self.name]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsDeleting(self):
+    """Returns True if cluster is being deleted and false otherwise."""
+    cluster_info = self.DescribeInstance()
+    return cluster_info.get('Status', '') == 'deleting'
+
+  def _IsReady(self):
+    """Returns True if cluster is ready and false otherwise."""
+    cluster_info = self.DescribeInstance()
+    return cluster_info.get('Status', '') == 'available'
+
+  def _Exists(self):
+    """Returns true if the cluster exists and is not being deleted."""
+    cluster_info = self.DescribeInstance()
+    return ('Status' in cluster_info and
+            cluster_info['Status'] not in ['deleting', 'create-failed'])
+
+  def DescribeInstance(self):
+    """Calls describe on cluster.
+
+    Returns:
+      dict mapping string cluster_info property key to value.
+    """
+    cmd = ['aws', 'elasticache', 'describe-replication-groups',
+           '--region', self.redis_region,
+           '--replication-group-id', self.name]
+    stdout, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find cluster %s, %s', self.name, stderr)
+      return {}
+    for cluster_info in json.loads(stdout)['ReplicationGroups']:
+      if cluster_info['ReplicationGroupId'] == self.name:
+        return cluster_info
+    return {}
+
+  @vm_util.Retry(max_retries=5)
+  def _PopulateEndpoint(self):
+    """Populates address and port information from cluster_info.
+
+    Raises:
+      errors.Resource.RetryableGetError:
+      Failed to retrieve information on cluster
+    """
+    cluster_info = self.DescribeInstance()
+    if not cluster_info:
+      raise errors.Resource.RetryableGetError(
+          'Failed to retrieve information on %s', self.name)
+
+    primary_endpoint = cluster_info['NodeGroups'][0]['PrimaryEndpoint']
+    self._ip = primary_endpoint['Address']
+    self._port = primary_endpoint['Port']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py
new file mode 100644
index 0000000..b8f8478
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_elasticached_memcached.py
@@ -0,0 +1,159 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS' elasticache memcached clusters."""
+
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+
+MEMCACHED_VERSIONS = ['1.5.10', '1.5.16', '1.6.6']
+FLAGS = flags.FLAGS
+
+
+class ElastiCacheMemcached(managed_memory_store.BaseManagedMemoryStore):
+  """Object representing a AWS Elasticache memcached instance."""
+
+  CLOUD = providers.AWS
+  MEMORY_STORE = managed_memory_store.MEMCACHED
+
+  def __init__(self, spec):
+    super(ElastiCacheMemcached, self).__init__(spec)
+    self.subnet_group_name = 'subnet-%s' % self.name
+    self.zone = self.spec.vms[0].zone
+    self.region = util.GetRegionFromZone(self.zone)
+    self.node_type = FLAGS.cache_node_type
+    self.version = FLAGS.managed_memory_store_version
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    if (FLAGS.managed_memory_store_version and
+        FLAGS.managed_memory_store_version not in MEMCACHED_VERSIONS):
+      raise errors.Config.InvalidValue('Invalid Memcached version.')
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cache cluster.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = {
+        'cloud_memcached_version': self.version,
+        'cloud_memcached_node_type': self.node_type,
+    }
+    return result
+
+  def _CreateDependencies(self):
+    """Create the subnet dependencies."""
+    subnet_id = self.spec.vms[0].network.subnet.id
+    cmd = ['aws', 'elasticache', 'create-cache-subnet-group',
+           '--region', self.region,
+           '--cache-subnet-group-name', self.subnet_group_name,
+           '--cache-subnet-group-description', '"memcached benchmark subnet"',
+           '--subnet-ids', subnet_id]
+
+    vm_util.IssueCommand(cmd)
+
+  def _DeleteDependencies(self):
+    """Delete the subnet dependencies."""
+    cmd = ['aws', 'elasticache', 'delete-cache-subnet-group',
+           '--region', self.region,
+           '--cache-subnet-group-name', self.subnet_group_name]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Create(self):
+    """Creates the cache cluster."""
+    cmd = ['aws', 'elasticache', 'create-cache-cluster',
+           '--engine', 'memcached',
+           '--region', self.region,
+           '--cache-cluster-id', self.name,
+           '--preferred-availability-zone', self.zone,
+           '--num-cache-nodes', str(managed_memory_store.MEMCACHED_NODE_COUNT),
+           '--cache-node-type', self.node_type,
+           '--cache-subnet-group-name', self.subnet_group_name]
+
+    if self.version:
+      cmd += ['--engine-version', self.version]
+
+    cmd += ['--tags']
+    cmd += util.MakeFormattedDefaultTags()
+    vm_util.IssueCommand(cmd)
+
+  def _Delete(self):
+    """Deletes the cache cluster."""
+    cmd = ['aws', 'elasticache', 'delete-cache-cluster',
+           '--region', self.region,
+           '--cache-cluster-id', self.name]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsDeleting(self):
+    """Returns True if cluster is being deleted and false otherwise."""
+    cluster_info = self._DescribeInstance()
+    return cluster_info.get('CacheClusterStatus', '') == 'deleting'
+
+  def _IsReady(self):
+    """Returns True if cluster is ready and false otherwise."""
+    cluster_info = self._DescribeInstance()
+    if cluster_info.get('CacheClusterStatus', '') == 'available':
+      self.version = cluster_info.get('EngineVersion')
+      return True
+    return False
+
+  def _Exists(self):
+    """Returns true if the cluster exists and is not being deleted."""
+    cluster_info = self._DescribeInstance()
+    return cluster_info.get('CacheClusterStatus', '') not in [
+        '', 'deleting', 'create-failed']
+
+  def _DescribeInstance(self):
+    """Calls describe on cluster.
+
+    Returns:
+      dict mapping string cluster_info property key to value.
+    """
+    cmd = ['aws', 'elasticache', 'describe-cache-clusters',
+           '--region', self.region,
+           '--cache-cluster-id', self.name]
+    stdout, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find cluster %s, %s', self.name, stderr)
+      return {}
+    for cluster_info in json.loads(stdout)['CacheClusters']:
+      if cluster_info['CacheClusterId'] == self.name:
+        return cluster_info
+    return {}
+
+  @vm_util.Retry(max_retries=5)
+  def _PopulateEndpoint(self):
+    """Populates address and port information from cluster_info.
+
+    Raises:
+      errors.Resource.RetryableGetError:
+      Failed to retrieve information on cluster
+    """
+    cluster_info = self._DescribeInstance()
+    if not cluster_info:
+      raise errors.Resource.RetryableGetError(
+          'Failed to retrieve information on {0}.'.format(self.name))
+
+    endpoint = cluster_info['ConfigurationEndpoint']
+    self._ip = endpoint['Address']
+    self._port = endpoint['Port']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_emr.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_emr.py
new file mode 100644
index 0000000..7469a8d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_emr.py
@@ -0,0 +1,405 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's spark service.
+
+Spark clusters can be created and deleted.
+"""
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import spark_service
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+
+
+FLAGS = flags.FLAGS
+
+DEFAULT_MACHINE_TYPE = 'm3.xlarge'
+RELEASE_LABEL = 'emr-5.23.0'
+READY_CHECK_SLEEP = 30
+READY_CHECK_TRIES = 60
+READY_STATE = 'WAITING'
+
+JOB_WAIT_SLEEP = 30
+
+DELETED_STATES = ['TERMINATED_WITH_ERRORS', 'TERMINATED']
+
+MANAGER_SG = 'EmrManagedMasterSecurityGroup'
+WORKER_SG = 'EmrManagedSlaveSecurityGroup'
+
+# Certain machine types require a subnet.
+NEEDS_SUBNET = ['m4', 'c4', 'm5', 'c5']
+
+
+class AwsSecurityGroup(resource.BaseResource):
+  """Object representing a AWS Security Group.
+
+  A security group is created automatically when an Amazon EMR cluster
+  is created.  It is not deleted automatically, and the subnet and VPN
+  cannot be deleted until the security group is deleted.
+
+  Because of this, there's no _Create method, only a _Delete and an
+  _Exists method.
+  """
+
+  def __init__(self, cmd_prefix, group_id):
+    super(AwsSecurityGroup, self).__init__()
+    self.created = True
+    self.group_id = group_id
+    self.cmd_prefix = cmd_prefix
+
+  def _Delete(self):
+    cmd = self.cmd_prefix + ['ec2', 'delete-security-group',
+                             '--group-id=' + self.group_id]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    cmd = self.cmd_prefix + ['ec2', 'describe-security-groups',
+                             '--group-id=' + self.group_id]
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    # if the security group doesn't exist, the describe command gives an error.
+    return retcode == 0
+
+  def _Create(self):
+    if not self.created:
+      raise NotImplementedError()
+
+
+class AwsEMR(spark_service.BaseSparkService):
+  """Object representing a AWS EMR cluster.
+
+  Attributes:
+    cluster_id: Cluster identifier, set in superclass.
+    project: Enclosing project for the cluster.
+    cmd_prefix: emr prefix, including region
+    network: network to use; set if needed by machine type
+    bucket_to_delete: bucket name to delete when cluster is
+    terminated.
+  """
+
+  CLOUD = providers.AWS
+  SPARK_SAMPLE_LOCATION = '/usr/lib/spark/lib/spark-examples.jar'
+  SERVICE_NAME = 'emr'
+
+  def __init__(self, spark_service_spec):
+    super(AwsEMR, self).__init__(spark_service_spec)
+    # TODO(hildrum) use availability zone when appropriate
+    worker_machine_type = self.spec.worker_group.vm_spec.machine_type
+    leader_machine_type = self.spec.master_group.vm_spec.machine_type
+    self.cmd_prefix = list(util.AWS_PREFIX)
+
+    if self.zone:
+      region = util.GetRegionFromZone(self.zone)
+      self.cmd_prefix += ['--region', region]
+
+    # Certain machine types require subnets.
+    if (self.spec.static_cluster_id is None and
+        (worker_machine_type[0:2] in NEEDS_SUBNET or
+         leader_machine_type[0:2] in NEEDS_SUBNET)):
+      # GetNetwork is supposed to take a VM, but all it uses
+      # from the VM is the zone attribute, which self has.
+      self.network = aws_network.AwsNetwork.GetNetwork(self)
+    else:
+      self.network = None
+    self.bucket_to_delete = None
+
+  def _CreateLogBucket(self):
+    bucket_name = 's3://pkb-{0}-emr'.format(FLAGS.run_uri)
+    cmd = self.cmd_prefix + ['s3', 'mb', bucket_name]
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      raise Exception('Error creating logs bucket')
+    self.bucket_to_delete = bucket_name
+    return bucket_name
+
+  def _Create(self):
+    """Creates the cluster."""
+    name = 'pkb_' + FLAGS.run_uri
+    logs_bucket = FLAGS.aws_emr_loguri or self._CreateLogBucket()
+
+    instance_groups = []
+    for group_type, group_spec in [
+        ('CORE', self.spec.worker_group),
+        ('MASTER', self.spec.master_group)]:
+      instance_properties = {'InstanceCount': group_spec.vm_count,
+                             'InstanceGroupType': group_type,
+                             'InstanceType': group_spec.vm_spec.machine_type,
+                             'Name': group_type + ' group'}
+      if group_spec.disk_spec:
+        # Make sure nothing we are ignoring is included in the disk spec
+        assert group_spec.disk_spec.device_path is None
+        assert group_spec.disk_spec.disk_number is None
+        assert group_spec.disk_spec.mount_point is None
+        assert group_spec.disk_spec.iops is None
+        ebs_configuration = {'EbsBlockDeviceConfigs': [
+            {'VolumeSpecification':
+             {'SizeInGB': group_spec.disk_spec.disk_size,
+              'VolumeType': group_spec.disk_spec.disk_type},
+             'VolumesPerInstance':
+             group_spec.disk_spec.num_striped_disks}]}
+        instance_properties.update({'EbsConfiguration': ebs_configuration})
+      instance_groups.append(instance_properties)
+
+    # we need to store the cluster id.
+    cmd = self.cmd_prefix + ['emr', 'create-cluster', '--name', name,
+                             '--release-label', RELEASE_LABEL,
+                             '--use-default-roles',
+                             '--instance-groups',
+                             json.dumps(instance_groups),
+                             '--application', 'Name=Spark',
+                             'Name=Hadoop',
+                             '--log-uri', logs_bucket]
+    if self.network:
+      cmd += ['--ec2-attributes', 'SubnetId=' + self.network.subnet.id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    self.cluster_id = result['ClusterId']
+    logging.info('Cluster created with id %s', self.cluster_id)
+    for tag_key, tag_value in util.MakeDefaultTags().items():
+      self._AddTag(tag_key, tag_value)
+
+  def _AddTag(self, key, value):
+    """Add the key value pair as a tag to the emr cluster."""
+    cmd = self.cmd_prefix + ['emr', 'add-tags',
+                             '--resource-id', self.cluster_id,
+                             '--tag',
+                             '{}={}'.format(key, value)]
+    vm_util.IssueCommand(cmd)
+
+  def _DeleteSecurityGroups(self):
+    """Delete the security groups associated with this cluster."""
+    cmd = self.cmd_prefix + ['emr', 'describe-cluster',
+                             '--cluster-id', self.cluster_id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    cluster_desc = json.loads(stdout)
+    sec_object = cluster_desc['Cluster']['Ec2InstanceAttributes']
+    manager_sg = sec_object[MANAGER_SG]
+    worker_sg = sec_object[WORKER_SG]
+
+    # the manager group and the worker group reference each other, so neither
+    # can be deleted.  First we delete the references to the manager group in
+    # the worker group.  Then we delete the manager group, and then, finally the
+    # worker group.
+
+    # remove all references to the manager group from the worker group.
+    for proto, port in [('tcp', '0-65535'), ('udp', '0-65535'), ('icmp', '-1')]:
+      for group1, group2 in [(worker_sg, manager_sg), (manager_sg, worker_sg)]:
+        cmd = self.cmd_prefix + ['ec2', 'revoke-security-group-ingress',
+                                 '--group-id=' + group1,
+                                 '--source-group=' + group2,
+                                 '--protocol=' + proto,
+                                 '--port=' + port]
+        vm_util.IssueCommand(cmd)
+
+    # Now we need to delete the manager, then the worker.
+    for group in manager_sg, worker_sg:
+      sec_group = AwsSecurityGroup(self.cmd_prefix, group)
+      sec_group.Delete()
+
+  def _Delete(self):
+    """Deletes the cluster."""
+
+    cmd = self.cmd_prefix + ['emr', 'terminate-clusters', '--cluster-ids',
+                             self.cluster_id]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _DeleteDependencies(self):
+    if self.network:
+      self._DeleteSecurityGroups()
+    if self.bucket_to_delete:
+      bucket_del_cmd = self.cmd_prefix + ['s3', 'rb', '--force',
+                                          self.bucket_to_delete]
+      vm_util.IssueCommand(bucket_del_cmd)
+
+  def _Exists(self):
+    """Check to see whether the cluster exists."""
+    cmd = self.cmd_prefix + ['emr', 'describe-cluster',
+                             '--cluster-id', self.cluster_id]
+    stdout, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      return False
+    result = json.loads(stdout)
+    if result['Cluster']['Status']['State'] in DELETED_STATES:
+      return False
+    else:
+      return True
+
+  def _IsReady(self):
+    """Check to see if the cluster is ready."""
+    cmd = self.cmd_prefix + ['emr', 'describe-cluster', '--cluster-id',
+                             self.cluster_id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    if result['Cluster']['Status']['State'] == 'TERMINATED_WITH_ERRORS':
+      reason = result['Cluster']['Status']['StateChangeReason']['Message']
+      message = reason
+      if reason.startswith('Subnet is required'):
+        message = ('Cluster creation failed because this machine type requires '
+                   'a subnet.  To ensure PKB creates a subnet for this machine '
+                   'type, update the NEEDS_SUBNET variable of '
+                   'providers/aws/aws_emr.py to contain prefix of this machine '
+                   'type. Raw AWS message={0}'.format(reason))
+        raise Exception(message)
+    return result['Cluster']['Status']['State'] == READY_STATE
+
+  def _GetLogBase(self):
+    """Gets the base uri for the logs."""
+    cmd = self.cmd_prefix + ['emr', 'describe-cluster', '--cluster-id',
+                             self.cluster_id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    if 'LogUri' in result['Cluster']:
+      self.logging_enabled = True
+      log_uri = result['Cluster']['LogUri']
+      if log_uri.startswith('s3n'):
+        log_uri = 's3' + log_uri[3:]
+      return log_uri
+    else:
+      return None
+
+  def _CheckForFile(self, filename):
+    """Wait for file to appear on s3."""
+    cmd = self.cmd_prefix + ['s3', 'ls', filename]
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    return retcode == 0
+
+  def _IsStepDone(self, step_id):
+    """Determine whether the step is done.
+
+    Args:
+      step_id: The step id to query.
+    Returns:
+      A dictionary describing the step if the step the step is complete,
+          None otherwise.
+    """
+
+    cmd = self.cmd_prefix + ['emr', 'describe-step', '--cluster-id',
+                             self.cluster_id, '--step-id', step_id]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    state = result['Step']['Status']['State']
+    if state == 'COMPLETED' or state == 'FAILED':
+      return result
+    else:
+      return None
+
+  def _MakeHadoopStep(self, jarfile, classname, job_arguments):
+    """Construct an EMR step with a type CUSTOM_JAR."""
+    step_list = ['Type=CUSTOM_JAR', 'Jar=' + jarfile]
+    if classname:
+      step_list.append('MainClass=' + classname)
+    if job_arguments:
+      arg_string = '[' + ','.join(job_arguments) + ']'
+    step_list.append('Args=' + arg_string)
+    return step_list
+
+  def _MakeSparkStep(self, jarfile, classname, job_arguments):
+    arg_list = ['--class', classname, jarfile]
+    if job_arguments:
+      arg_list += job_arguments
+    arg_string = '[' + ','.join(arg_list) + ']'
+    step_list = ['Type=Spark', 'Args=' + arg_string]
+    return step_list
+
+  def SubmitJob(self, jarfile, classname, job_poll_interval=JOB_WAIT_SLEEP,
+                job_arguments=None, job_stdout_file=None,
+                job_type=spark_service.SPARK_JOB_TYPE):
+    """Submit the job.
+
+    Submit the job and wait for it to complete.  If job_stdout_file is not
+    None, also way for the job's stdout to appear and put that in
+    job_stdout_file.
+
+    Args:
+      jarfile: Jar file containing the class to submit.
+      classname: Name of the class.
+      job_poll_interval: Submit job will poll until the job is done; this is
+          the time between checks.
+      job_arguments: Arguments to pass to the job.
+      job_stdout_file: Name of a file in which to put the job's standard out.
+          If there is data here already, it will be overwritten.
+
+    """
+
+    @vm_util.Retry(poll_interval=job_poll_interval, fuzz=0)
+    def WaitForFile(filename):
+      if not self._CheckForFile(filename):
+        raise Exception('File not found yet')
+
+    @vm_util.Retry(timeout=FLAGS.aws_emr_job_wait_time,
+                   poll_interval=job_poll_interval, fuzz=0)
+    def WaitForStep(step_id):
+      result = self._IsStepDone(step_id)
+      if result is None:
+        raise Exception('Step {0} not complete.'.format(step_id))
+      return result
+
+    if job_type == spark_service.SPARK_JOB_TYPE:
+      step_list = self._MakeSparkStep(jarfile, classname, job_arguments)
+    elif job_type == spark_service.HADOOP_JOB_TYPE:
+      step_list = self._MakeHadoopStep(jarfile, classname, job_arguments)
+    else:
+      raise Exception('Job type %s unsupported for EMR' % job_type)
+    step_string = ','.join(step_list)
+    cmd = self.cmd_prefix + ['emr', 'add-steps', '--cluster-id',
+                             self.cluster_id, '--steps', step_string]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    step_id = result['StepIds'][0]
+    metrics = {}
+
+    result = WaitForStep(step_id)
+    pending_time = result['Step']['Status']['Timeline']['CreationDateTime']
+    start_time = result['Step']['Status']['Timeline']['StartDateTime']
+    end_time = result['Step']['Status']['Timeline']['EndDateTime']
+    metrics[spark_service.WAITING] = start_time - pending_time
+    metrics[spark_service.RUNTIME] = end_time - start_time
+    step_state = result['Step']['Status']['State']
+    metrics[spark_service.SUCCESS] = step_state == 'COMPLETED'
+
+    # Now we need to take the standard out and put it in the designated path,
+    # if appropriate.
+    if job_stdout_file:
+      log_base = self._GetLogBase()
+      if log_base is None:
+        logging.warning('SubmitJob requested output, but EMR cluster was not '
+                        'created with logging')
+        return metrics
+
+      # log_base ends in a slash.
+      s3_stdout = '{0}{1}/steps/{2}/stdout.gz'.format(log_base,
+                                                      self.cluster_id,
+                                                      step_id)
+      WaitForFile(s3_stdout)
+      dest_file = '{0}.gz'.format(job_stdout_file)
+      cp_cmd = ['aws', 's3', 'cp', s3_stdout, dest_file]
+      _, _, retcode = vm_util.IssueCommand(cp_cmd, raise_on_failure=False)
+      if retcode == 0:
+        uncompress_cmd = ['gunzip', '-f', dest_file]
+        vm_util.IssueCommand(uncompress_cmd)
+    return metrics
+
+  def SetClusterProperty(self):
+    pass
+
+  def ExecuteOnMaster(self, script_path, script_args):
+    raise NotImplementedError()
+
+  def CopyFromMaster(self, remote_path, local_path):
+    raise NotImplementedError()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_glue_crawler.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_glue_crawler.py
new file mode 100644
index 0000000..f4052c4
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_glue_crawler.py
@@ -0,0 +1,203 @@
+"""Module containing class for AWS's Glue Crawler."""
+
+import json
+from typing import Any, Dict, Optional, Tuple
+
+from absl import flags
+from perfkitbenchmarker import data_discovery_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+
+class CrawlNotCompletedError(Exception):
+  """Used to signal a crawl is still running."""
+
+
+class CrawlFailedError(Exception):
+  """Used to signal a crawl has failed."""
+
+
+class AwsGlueCrawler(data_discovery_service.BaseDataDiscoveryService):
+  """AWS Glue Crawler Resource Class.
+
+  Attributes:
+    db_name: Name of the Glue database that will be provisioned.
+    crawler_name: Name of the crawler that will be provisioned.
+    role: Role the crawler will use. Refer to aws_glue_crawler_role flag for
+      more info.
+    sample_size: How many files will be crawled in each leaf directory. Refer to
+      aws_glue_crawler_sample_size flag for more info.
+  """
+
+  CLOUD = providers.AWS
+  SERVICE_TYPE = 'glue'
+  READY = 'READY'
+  FAILED = 'FAILED'
+  CRAWL_TIMEOUT = 21600
+  CRAWL_POLL_INTERVAL = 5
+
+  def __init__(self):
+    super().__init__()
+    self.db_name = f'pkb-db-{FLAGS.run_uri}'
+    self.crawler_name = f'pkb-crawler-{FLAGS.run_uri}'
+    self.role = FLAGS.aws_glue_crawler_role
+    self.sample_size = FLAGS.aws_glue_crawler_sample_size
+
+  def _Create(self) -> None:
+    # creating database
+    database_input = {
+        'Name': self.db_name,
+        'Description': '\n'.join(
+            f'{k}={v}' for k, v in util.MakeDefaultTags().items()),
+    }
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'create-database',
+        '--database-input', json.dumps(database_input),
+        f'--region={self.region}',
+    ]
+    vm_util.IssueCommand(cmd)
+
+    targets = {'S3Targets': [{'Path': self.data_discovery_path}]}
+    if self.sample_size is not None:
+      targets['S3Targets'][0]['SampleSize'] = self.sample_size
+
+    # creating crawler
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'create-crawler',
+        '--name', self.crawler_name,
+        '--role', self.role,
+        '--database-name', self.db_name,
+        '--targets', json.dumps(targets),
+        '--region', self.region,
+        '--tags', ','.join(
+            f'{k}={v}' for k, v in util.MakeDefaultTags().items()),
+    ]
+    vm_util.IssueCommand(cmd)
+
+  def _Exists(self) -> bool:
+    return self._DbExists() and self._CrawlerExists()
+
+  def _IsReady(self, raise_on_crawl_failure=False) -> bool:
+    stdout, _, _ = self._GetCrawler()
+    data = json.loads(stdout)
+    if (data['Crawler'].get('LastCrawl', {}).get('Status') == self.FAILED and
+        raise_on_crawl_failure):
+      raise CrawlFailedError(
+          data['Crawler'].get('LastCrawl', {}).get('ErrorMessage', ''))
+    return data['Crawler']['State'] == self.READY
+
+  def _Delete(self) -> None:
+    # deleting database
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'delete-database',
+        '--name', self.db_name,
+        '--region', self.region,
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    # deleting crawler
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'delete-crawler',
+        '--name', self.crawler_name,
+        '--region', self.region,
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsDeleting(self) -> bool:
+    crawler_exists = self._CrawlerExists()
+    db_exists = self._DbExists()
+    if db_exists is None or crawler_exists is None:
+      return True
+    return self._DbExists() or self._CrawlerExists()
+
+  def DiscoverData(self) -> float:
+    """Runs the AWS Glue Crawler. Returns the time elapsed in secs."""
+
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'start-crawler',
+        '--name', self.crawler_name,
+        '--region', self.region,
+    ]
+    vm_util.IssueCommand(cmd)
+    self._WaitUntilCrawlerReady()
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'get-crawler-metrics',
+        '--crawler-name-list', self.crawler_name,
+        '--region', self.region,
+    ]
+    output, _, _ = vm_util.IssueCommand(cmd)
+    data = json.loads(output)
+    assert (isinstance(data['CrawlerMetricsList'], list) and
+            len(data['CrawlerMetricsList']) == 1)
+    return data['CrawlerMetricsList'][0]['LastRuntimeSeconds']
+
+  def GetMetadata(self) -> Dict[str, Any]:
+    """Return a dictionary of the metadata for this service."""
+    metadata = super().GetMetadata()
+    metadata.update(
+        aws_glue_crawler_sample_size=self.sample_size,
+        aws_glue_db_name=self.db_name,
+        aws_glue_crawler_name=self.crawler_name,
+    )
+    return metadata
+
+  @vm_util.Retry(
+      timeout=CRAWL_TIMEOUT,
+      poll_interval=CRAWL_POLL_INTERVAL,
+      fuzz=0,
+      retryable_exceptions=CrawlNotCompletedError,)
+  def _WaitUntilCrawlerReady(self):
+    if not self._IsReady(raise_on_crawl_failure=True):
+      raise CrawlNotCompletedError(
+          f'Crawler {self.crawler_name} still running.')
+
+  def _DbExists(self) -> Optional[bool]:
+    """Whether the database exists or not.
+
+    It might return None if the API call failed with an unknown error.
+
+    Returns:
+      A bool or None.
+    """
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'get-database',
+        '--name', self.db_name,
+        '--region', self.region,
+    ]
+    _, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if not retcode:
+      return True
+    return False if 'EntityNotFoundException' in stderr else None
+
+  def _CrawlerExists(self) -> Optional[bool]:
+    """Whether the crawler exists or not.
+
+    It might return None if the API call failed with an unknown error.
+
+    Returns:
+      A bool or None.
+    """
+    _, stderr, retcode = self._GetCrawler(raise_on_failure=False)
+    if not retcode:
+      return True
+    return False if 'EntityNotFoundException' in stderr else None
+
+  def _GetCrawler(self, raise_on_failure=True) -> Tuple[str, str, int]:
+    """Calls the AWS CLI to retrieve a crawler."""
+    cmd = util.AWS_PREFIX + [
+        'glue',
+        'get-crawler',
+        '--name', self.crawler_name,
+        '--region', self.region,
+    ]
+    return vm_util.IssueCommand(cmd, raise_on_failure=raise_on_failure)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_iam_role.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_iam_role.py
new file mode 100644
index 0000000..7161320
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_iam_role.py
@@ -0,0 +1,184 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS' dynamodb tables.
+
+Tables can be created and deleted.
+"""
+
+
+import json
+import logging
+import time
+
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+# https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_version.html
+_POLICY_VERSION = '2012-10-17'
+
+_ROLE_ARN_TEMPLATE = 'arn:aws:iam::{account}:role/{role_name}'
+_POLICY_ARN_TEMPLATE = 'arn:aws:iam::{account}:policy/{policy_name}'
+
+_TRUST_RELATIONSHIP_FILE = 'service-trust-relationship.json'
+_ROLE_POLICY_FILE = 'service-role-policy.json'
+_ROLE_CREATION_DELAY = 30
+
+_TRUST_RELATIONSHIP_TEMPLATE = """{{
+    "Version": "{version}",
+    "Statement": [
+       {{
+            "Effect": "Allow",
+            "Principal": {{
+                "Service": "{service}"
+            }},
+            "Action": "sts:AssumeRole"
+        }}
+    ]
+}}"""
+
+_ROLE_POLICY_TEMPLATE = """{{
+    "Version": "{version}",
+    "Statement": [
+        {{
+            "Action": [
+                "{action}"
+            ],
+            "Effect": "Allow",
+            "Resource": [
+                "{resource_arn}"
+            ]
+        }}
+    ]
+}}"""
+
+
+class AwsIamRole(resource.BaseResource):
+  """Class representing an AWS IAM role."""
+
+  def __init__(self,
+               account,
+               role_name,
+               policy_name,
+               service,
+               action,
+               resource_arn,
+               policy_version=None):
+    super(AwsIamRole, self).__init__()
+    self.account = account
+    self.role_name = role_name
+    self.policy_name = policy_name
+    self.service = service
+    self.action = action
+    self.resource_arn = resource_arn
+    self.policy_version = policy_version or _POLICY_VERSION
+    self.role_arn = _ROLE_ARN_TEMPLATE.format(
+        account=self.account, role_name=self.role_name)
+    self.policy_arn = _POLICY_ARN_TEMPLATE.format(
+        account=self.account, policy_name=self.policy_name)
+
+  def _Create(self):
+    """See base class."""
+    if not self._RoleExists():
+      with open(_TRUST_RELATIONSHIP_FILE, 'w+') as relationship_file:
+        relationship_file.write(
+            _TRUST_RELATIONSHIP_TEMPLATE.format(
+                version=self.policy_version, service=self.service))
+
+      cmd = util.AWS_PREFIX + [
+          'iam', 'create-role', '--role-name', self.role_name,
+          '--assume-role-policy-document',
+          'file://{}'.format(_TRUST_RELATIONSHIP_FILE)
+      ]
+
+      _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+      if retcode != 0:
+        logging.warning('Failed to create role! %s', stderror)
+
+    if not self._PolicyExists():
+      with open(_ROLE_POLICY_FILE, 'w+') as policy_file:
+        policy_file.write(
+            _ROLE_POLICY_TEMPLATE.format(
+                version=self.policy_version,
+                action=self.action,
+                resource_arn=self.resource_arn))
+      cmd = util.AWS_PREFIX + [
+          'iam', 'create-policy', '--policy-name', 'PolicyFor' + self.role_name,
+          '--policy-document', 'file://{}'.format(_ROLE_POLICY_FILE)
+      ]
+
+      _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+      if retcode != 0:
+        logging.warning('Failed to create policy! %s', stderror)
+
+    cmd = util.AWS_PREFIX + [
+        'iam', 'attach-role-policy', '--role-name', self.role_name,
+        '--policy-arn', self.policy_arn
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=True)
+    if retcode != 0:
+      logging.warning('Failed to attach role policy! %s', stderror)
+
+    # Make sure the role is available for the downstream users (e.g., DAX).
+    # Without this, the step of creating DAX cluster may fail.
+    # TODO(user): use a more robust way to handle this.
+    time.sleep(_ROLE_CREATION_DELAY)
+
+  def _Delete(self):
+    """See base class."""
+    cmd = util.AWS_PREFIX + [
+        'iam', 'detach-role-policy', '--role-name', self.role_name,
+        '--policy-arn', self.policy_arn
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to delete role policy! %s', stderror)
+
+    cmd = util.AWS_PREFIX + [
+        'iam', 'delete-policy', '--policy-arn', self.policy_arn
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to delete policy! %s', stderror)
+
+    cmd = util.AWS_PREFIX + [
+        'iam', 'delete-role', '--role-name', self.role_name
+    ]
+
+    _, stderror, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Failed to delete role! %s', stderror)
+
+  def GetRoleArn(self):
+    """Returns the role's Amazon Resource Name (ARN)."""
+    return self.role_arn
+
+  def _RoleExists(self):
+    """Returns true if the IAM role exists."""
+    cmd = util.AWS_PREFIX + ['iam', 'get-role', '--role-name', self.role_name]
+    stdout, _, retcode = vm_util.IssueCommand(
+        cmd, suppress_warning=True, raise_on_failure=False)
+    return retcode == 0 and stdout and json.loads(stdout)['Role']
+
+  def _PolicyExists(self):
+    """Returns true if the IAM policy used by the role exists."""
+    cmd = util.AWS_PREFIX + [
+        'iam', 'get-policy', '--policy-arn', self.policy_arn
+    ]
+    stdout, _, retcode = vm_util.IssueCommand(
+        cmd, suppress_warning=True, raise_on_failure=False)
+    return retcode == 0 and stdout and json.loads(stdout)['Policy']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_load_balancer.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_load_balancer.py
new file mode 100644
index 0000000..cdf33a2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_load_balancer.py
@@ -0,0 +1,150 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to AWS's load balancers."""
+
+import json
+
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+
+class TargetGroup(resource.BaseResource):
+  """Class represeting an AWS target group."""
+
+  def __init__(self, vpc, port):
+    """Initializes the TargetGroup object.
+
+    Args:
+      vpc: AwsVpc object which contains the targets for load balancing.
+      port: The internal port that the load balancer connects to.
+    """
+    super(TargetGroup, self).__init__()
+    self.arn = None
+    self.region = vpc.region
+    self.name = 'pkb-%s' % FLAGS.run_uri
+    self.protocol = 'TCP'
+    self.port = port
+    self.vpc_id = vpc.id
+
+  def _Create(self):
+    """Create the target group."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'elbv2', 'create-target-group',
+        '--target-type', 'ip',
+        '--name', self.name,
+        '--protocol', self.protocol,
+        '--port', str(self.port),
+        '--vpc-id', self.vpc_id
+    ]
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    response = json.loads(stdout)
+    self.arn = response['TargetGroups'][0]['TargetGroupArn']
+
+  def _Delete(self):
+    """Delete the target group."""
+    if self.arn is None:
+      return
+    delete_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'elbv2', 'delete-target-group',
+        '--target-group-arn', self.arn
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+
+class LoadBalancer(resource.BaseResource):
+  """Class representing an AWS load balancer."""
+
+  def __init__(self, subnets):
+    """Initializes the LoadBalancer object.
+
+    Args:
+      subnets: List of AwsSubnet objects.
+    """
+    super(LoadBalancer, self).__init__()
+    self.region = subnets[0].region
+    self.name = 'pkb-%s' % FLAGS.run_uri
+    self.subnet_ids = [subnet.id for subnet in subnets]
+    self.type = 'network'
+    self.arn = None
+    self.dns_name = None
+
+  def _Create(self):
+    """Create the load balancer."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'elbv2', 'create-load-balancer',
+        '--name', self.name,
+        '--type', self.type,
+        '--tags'] + util.MakeFormattedDefaultTags()
+    # Add --subnets argument to the command.
+    create_cmd.append('--subnets')
+    create_cmd.extend(self.subnet_ids)
+
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    load_balancer = json.loads(stdout)['LoadBalancers'][0]
+    self.arn = load_balancer['LoadBalancerArn']
+    self.dns_name = load_balancer['DNSName']
+
+  def _Delete(self):
+    """Delete the load balancer."""
+    if self.arn is None:
+      return
+    delete_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'elbv2', 'delete-load-balancer',
+        '--load-balancer-arn', self.arn
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+
+class Listener(resource.BaseResource):
+  """Class representing an AWS listener."""
+
+  def __init__(self, load_balancer, target_group, port):
+    super(Listener, self).__init__()
+    self.load_balancer_arn = load_balancer.arn
+    self.target_group_arn = target_group.arn
+    self.port = port
+    self.protocol = target_group.protocol
+    self.region = target_group.region
+
+  def _GetDefaultActions(self):
+    """Returns a JSON representation of the default actions for the listener."""
+    actions = [{
+        'Type': 'forward',
+        'TargetGroupArn': self.target_group_arn
+    }]
+    return json.dumps(actions)
+
+  def _Create(self):
+    """Create the listener."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'elbv2', 'create-listener',
+        '--load-balancer-arn', self.load_balancer_arn,
+        '--protocol', self.protocol,
+        '--port', str(self.port),
+        '--default-actions', self._GetDefaultActions()
+    ]
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """Listeners will be deleted along with their associated load balancers."""
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_logs.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_logs.py
new file mode 100644
index 0000000..cf9c8cd
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_logs.py
@@ -0,0 +1,101 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to AWS CloudWatch Logs."""
+
+import json
+
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+
+class LogGroup(resource.BaseResource):
+  """Class representing a CloudWatch log group."""
+
+  def __init__(self, region, name, retention_in_days=7):
+    super(LogGroup, self).__init__()
+    self.region = region
+    self.name = name
+    self.retention_in_days = retention_in_days
+
+  def _Create(self):
+    """Create the log group."""
+    create_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'logs', 'create-log-group',
+        '--log-group-name', self.name
+    ]
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """Delete the log group."""
+    delete_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'logs', 'delete-log-group',
+        '--log-group-name', self.name
+    ]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def Exists(self):
+    """Returns True if the log group exists."""
+    describe_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'logs', 'describe-log-groups',
+        '--log-group-name-prefix', self.name,
+        '--no-paginate'
+    ]
+    stdout, _, _ = vm_util.IssueCommand(describe_cmd)
+    log_groups = json.loads(stdout)['logGroups']
+    group = next((group for group in log_groups
+                  if group['logGroupName'] == self.name), None)
+    return bool(group)
+
+  def _PostCreate(self):
+    """Set the retention policy."""
+    put_cmd = util.AWS_PREFIX + [
+        '--region', self.region,
+        'logs', 'put-retention-policy',
+        '--log-group-name', self.name,
+        '--retention-in-days', str(self.retention_in_days)
+    ]
+    vm_util.IssueCommand(put_cmd)
+
+
+def GetLogs(region, stream_name, group_name, token=None):
+  """Fetches the JSON formatted log stream starting at the token."""
+  get_cmd = util.AWS_PREFIX + [
+      '--region', region,
+      'logs', 'get-log-events',
+      '--start-from-head',
+      '--log-group-name', group_name,
+      '--log-stream-name', stream_name,
+  ]
+  if token:
+    get_cmd.extend(['--next-token', token])
+  stdout, _, _ = vm_util.IssueCommand(get_cmd)
+  return json.loads(stdout)
+
+
+def GetLogStreamAsString(region, stream_name, log_group):
+  """Returns the messages of the log stream as a string."""
+  log_lines = []
+  token = None
+  events = []
+  while token is None or events:
+    response = GetLogs(region, stream_name, log_group, token)
+    events = response['events']
+    token = response['nextForwardToken']
+    for event in events:
+      log_lines.append(event['message'])
+  return '\n'.join(log_lines)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_network.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_network.py
new file mode 100644
index 0000000..61574da
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_network.py
@@ -0,0 +1,957 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes related to AWS VM networking.
+
+The Firewall class provides a way of opening VM ports. The Network class allows
+VMs to communicate via internal ips and isolates PerfKitBenchmarker VMs from
+others in
+the same project. See https://aws.amazon.com/documentation/vpc/
+for more information about AWS Virtual Private Clouds.
+"""
+
+import json
+import logging
+import threading
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import network
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_placement_group
+from perfkitbenchmarker.providers.aws import aws_vpc_endpoint
+from perfkitbenchmarker.providers.aws import util
+
+_AWS_VPC = flags.DEFINE_string(
+    'aws_vpc', None,
+    'The static AWS VPC id to use. If unset, creates a new VPC.')
+_AWS_SUBNET = flags.DEFINE_string(
+    'aws_subnet', None,
+    'The static AWS subnet id to use.  Set value to "default" to use '
+    'default subnet. If unset, creates a new subnet.')
+flags.DEFINE_bool('aws_efa', False, 'Whether to use an Elastic Fiber Adapter.')
+flags.DEFINE_string('aws_efa_version', '1.12.1',
+                    'Version of AWS EFA to use (must also pass in --aws_efa).')
+flags.DEFINE_integer('aws_efa_count', 1, 'The number of EFAs per instance.')
+flags.DEFINE_multi_enum('aws_endpoint', [], ['s3'],
+                        'List of AWS endpoints to create')
+
+FLAGS = flags.FLAGS
+
+
+REGION = 'region'
+ZONE = 'zone'
+
+
+class AwsFirewall(network.BaseFirewall):
+  """An object representing the AWS Firewall."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self):
+    self.firewall_set = set()
+    self.firewall_icmp_set = set()
+    self._lock = threading.Lock()
+
+  def AllowIcmp(self, vm):
+    """Opens the ICMP protocol on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the ICMP protocol for.
+    """
+    source = '0.0.0.0/0'
+
+    # region, group_id, source
+    entry = (vm.region, vm.group_id, source)
+    with self._lock:
+      if entry in self.firewall_icmp_set:
+        return
+      # When defining ICMP firewall rules using the aws cli,
+      # port specifies the type of ICMP traffic allowed,
+      # with -1 meaning all ICMP types
+      # https://docs.aws.amazon.com/cli/latest/reference/ec2/authorize-security-group-ingress.html
+      authorize_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'authorize-security-group-ingress',
+          '--region=%s' % vm.region,
+          '--group-id=%s' % vm.group_id,
+          '--protocol=icmp',
+          '--port=-1',
+          '--cidr=%s' % source]
+      util.IssueRetryableCommand(
+          authorize_cmd)
+      self.firewall_icmp_set.add(entry)
+
+  def AllowPort(self, vm, start_port, end_port=None, source_range=None):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      start_port: The first local port to open in a range.
+      end_port: The last local port to open in a range. If None, only start_port
+        will be opened.
+      source_range: List of source CIDRs to allow for this port. If None, all
+        sources are allowed. i.e. ['0.0.0.0/0']
+    """
+    if vm.is_static or vm.network.is_static:
+      return
+    self.AllowPortInSecurityGroup(vm.region, vm.group_id, start_port, end_port,
+                                  source_range)
+
+  def AllowPortInSecurityGroup(self,
+                               region,
+                               security_group,
+                               start_port,
+                               end_port=None,
+                               source_range=None):
+    """Opens a port on the firewall for a security group.
+
+    Args:
+      region: The region of the security group
+      security_group: The security group in which to open the ports
+      start_port: The first local port to open in a range.
+      end_port: The last local port to open in a range. If None, only start_port
+        will be opened.
+      source_range: List of source CIDRs to allow for this port.
+    """
+    end_port = end_port or start_port
+    source_range = source_range or ['0.0.0.0/0']
+    for source in source_range:
+      entry = (start_port, end_port, region, security_group, source)
+      if entry in self.firewall_set:
+        continue
+      if self._RuleExists(region, security_group, start_port, end_port, source):
+        self.firewall_set.add(entry)
+        continue
+      with self._lock:
+        if entry in self.firewall_set:
+          continue
+        authorize_cmd = util.AWS_PREFIX + [
+            'ec2',
+            'authorize-security-group-ingress',
+            '--region=%s' % region,
+            '--group-id=%s' % security_group,
+            '--port=%s-%s' % (start_port, end_port),
+            '--cidr=%s' % source,
+        ]
+        util.IssueRetryableCommand(authorize_cmd + ['--protocol=tcp'])
+        util.IssueRetryableCommand(authorize_cmd + ['--protocol=udp'])
+        self.firewall_set.add(entry)
+
+  def _RuleExists(self, region, security_group, start_port, end_port, source):
+    """Whether the firewall rule exists in the VPC."""
+    query_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-security-groups',
+        '--region=%s' % region,
+        '--group-ids=%s' % security_group,
+        '--filters',
+        'Name=ip-permission.cidr,Values={}'.format(source),
+        'Name=ip-permission.from-port,Values={}'.format(start_port),
+        'Name=ip-permission.to-port,Values={}'.format(end_port),
+    ]
+    stdout, _ = util.IssueRetryableCommand(query_cmd)
+    # "groups" will be an array of all the matching firewall rules
+    groups = json.loads(stdout)['SecurityGroups']
+    return bool(groups)
+
+  def DisallowAllPorts(self):
+    """Closes all ports on the firewall."""
+    pass
+
+
+class AwsVpc(resource.BaseResource):
+  """An object representing an Aws VPC."""
+
+  def __init__(self, region, vpc_id=None, regional_network_index=0):
+    super(AwsVpc, self).__init__(vpc_id is not None)
+    self.region = region
+    self.regional_network_index = regional_network_index
+    self.cidr = network.GetCidrBlock(self.regional_network_index, 0, 16)
+    self.id = vpc_id
+    # Subnets are assigned per-AZ.
+    # _subnet_index tracks the next unused 10.x.y.0/24 block.
+    self._subnet_index = 0
+    # Lock protecting _subnet_index
+    self._subnet_index_lock = threading.Lock()
+    self.default_security_group_id = None
+    if self.id:
+      self._SetSecurityGroupId()
+    self._endpoints = [
+        aws_vpc_endpoint.CreateEndpointService(service, self)
+        for service in set(FLAGS.aws_endpoint)
+    ]
+
+  def _Create(self):
+    """Creates the VPC."""
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-vpc',
+        '--region=%s' % self.region,
+        '--cidr-block=%s' % self.cidr]
+    stdout, stderr, retcode = vm_util.IssueCommand(
+        create_cmd, raise_on_failure=False)
+    if 'VpcLimitExceeded' in stderr:
+      raise errors.Benchmarks.QuotaFailure(stderr)
+    if retcode:
+      raise errors.Resource.CreationError(
+          'Failed to create Vpc: %s return code: %s' % (retcode, stderr))
+
+    response = json.loads(stdout)
+    self.id = response['Vpc']['VpcId']
+    self._EnableDnsHostnames()
+    util.AddDefaultTags(self.id, self.region)
+
+  def _PostCreate(self):
+    self._SetSecurityGroupId()
+    for endpoint in self._endpoints:
+      endpoint.Create()
+
+  def _SetSecurityGroupId(self):
+    """Looks up the VPC default security group."""
+    groups = self.GetSecurityGroups('default')
+    if len(groups) != 1:
+      raise ValueError('Expected one security group, got {} in {}'.format(
+          len(groups), groups))
+    self.default_security_group_id = groups[0]['GroupId']
+    logging.info('Default security group ID: %s',
+                 self.default_security_group_id)
+    if FLAGS.aws_efa:
+      self._AllowSelfOutBound()
+
+  def GetSecurityGroups(self, group_name=None):
+    cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-security-groups',
+        '--region', self.region,
+        '--filters',
+        'Name=vpc-id,Values=' + self.id]
+    if group_name:
+      cmd.append('Name=group-name,Values={}'.format(group_name))
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    return json.loads(stdout)['SecurityGroups']
+
+  def _Exists(self):
+    """Returns true if the VPC exists."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-vpcs',
+        '--region=%s' % self.region,
+        '--filter=Name=vpc-id,Values=%s' % self.id]
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    vpcs = response['Vpcs']
+    assert len(vpcs) < 2, 'Too many VPCs.'
+    return len(vpcs) > 0
+
+  def _EnableDnsHostnames(self):
+    """Sets the enableDnsHostnames attribute of this VPC to True.
+
+    By default, instances launched in non-default VPCs are assigned an
+    unresolvable hostname. This breaks the hadoop benchmark.  Setting the
+    enableDnsHostnames attribute to 'true' on the VPC resolves this. See:
+    http://docs.aws.amazon.com/AmazonVPC/latest/UserGuide/VPC_DHCP_Options.html
+    """
+    enable_hostnames_command = util.AWS_PREFIX + [
+        'ec2',
+        'modify-vpc-attribute',
+        '--region=%s' % self.region,
+        '--vpc-id', self.id,
+        '--enable-dns-hostnames',
+        '{ "Value": true }']
+
+    util.IssueRetryableCommand(enable_hostnames_command)
+
+  def _PreDelete(self):
+    """See base class.
+
+    Deletes the AWS endpoints if created.
+    """
+    for endpoint in self._endpoints:
+      endpoint.Delete()
+
+  def _Delete(self):
+    """Deletes the VPC."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-vpc',
+        '--region=%s' % self.region,
+        '--vpc-id=%s' % self.id]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def NextSubnetCidrBlock(self):
+    """Returns the next available /24 CIDR block in this VPC.
+
+    Each VPC has a 10.0.0.0/16 CIDR block.
+    Each subnet is assigned a /24 within this allocation.
+    Calls to this method return the next unused /24.
+
+    Returns:
+      A string representing the next available /24 block, in CIDR notation.
+    Raises:
+      ValueError: when no additional subnets can be created.
+    """
+    with self._subnet_index_lock:
+      if self._subnet_index >= (1 << 8) - 1:
+        raise ValueError('Exceeded subnet limit ({0}).'.format(
+            self._subnet_index))
+      cidr = network.GetCidrBlock(self.regional_network_index,
+                                  self._subnet_index)
+      self._subnet_index += 1
+    return cidr
+
+  @vm_util.Retry()
+  def _AllowSelfOutBound(self):
+    """Allow outbound connections on all ports in the default security group.
+
+    Details: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html
+    """
+    cmd = util.AWS_PREFIX + [
+        'ec2', 'authorize-security-group-egress',
+        '--region', self.region, '--group-id', self.default_security_group_id,
+        '--protocol', 'all', '--source-group', self.default_security_group_id
+    ]
+    try:
+      vm_util.IssueCommand(cmd)
+    except errors.VmUtil.IssueCommandError as ex:
+      # do not retry if this rule already exists
+      if ex.message.find('InvalidPermission.Duplicate') == -1:
+        raise ex
+
+  def AllowVpcPeerInBound(self, peer_vpc):
+    """Allow inbound connections on all ports in the default security group from peer vpc.
+
+    Args:
+      peer_vpc: AwsVpc. Peer vpc to allow inbound traffic from.
+    """
+    cmd = util.AWS_PREFIX + [
+        'ec2', 'authorize-security-group-ingress',
+        '--region=%s' % self.region,
+        '--group-id=%s' % self.default_security_group_id,
+        '--protocol=%s' % 'all',
+        '--cidr=%s' % peer_vpc.cidr
+    ]
+    vm_util.IssueRetryableCommand(cmd)
+
+
+class AwsSubnet(resource.BaseResource):
+  """An object representing an Aws subnet."""
+
+  def __init__(self, zone, vpc_id, cidr_block='10.0.0.0/24', subnet_id=None):
+    super(AwsSubnet, self).__init__(subnet_id is not None)
+    self.zone = zone
+    self.region = util.GetRegionFromZone(zone)
+    self.vpc_id = vpc_id
+    self.id = subnet_id
+    self.cidr_block = cidr_block
+
+  def _Create(self):
+    """Creates the subnet."""
+
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-subnet',
+        '--region=%s' % self.region,
+        '--vpc-id=%s' % self.vpc_id,
+        '--cidr-block=%s' % self.cidr_block]
+    if not util.IsRegion(self.zone):
+      create_cmd.append('--availability-zone=%s' % self.zone)
+
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    response = json.loads(stdout)
+    self.id = response['Subnet']['SubnetId']
+    util.AddDefaultTags(self.id, self.region)
+
+  def _Delete(self):
+    """Deletes the subnet."""
+    logging.info('Deleting subnet %s. This may fail if all instances in the '
+                 'subnet have not completed termination, but will be retried.',
+                 self.id)
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-subnet',
+        '--region=%s' % self.region,
+        '--subnet-id=%s' % self.id]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the subnet exists."""
+    return bool(self.GetDict())
+
+  def GetDict(self):
+    """The 'aws ec2 describe-subnets' for this VPC / subnet id.
+
+    Returns:
+      A dict of the single subnet or an empty dict if there are no subnets.
+
+    Raises:
+      AssertionError: If there is more than one subnet.
+    """
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2', 'describe-subnets',
+        '--region=%s' % self.region,
+        '--filter=Name=vpc-id,Values=%s' % self.vpc_id
+    ]
+    if self.id:
+      describe_cmd.append('--filter=Name=subnet-id,Values=%s' % self.id)
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    subnets = response['Subnets']
+    assert len(subnets) < 2, 'Too many subnets.'
+    return subnets[0] if subnets else {}
+
+
+class AwsInternetGateway(resource.BaseResource):
+  """An object representing an Aws Internet Gateway."""
+
+  def __init__(self, region, vpc_id=None):
+    super(AwsInternetGateway, self).__init__(vpc_id is not None)
+    self.region = region
+    self.vpc_id = None
+    self.id = None
+    self.attached = False
+    if vpc_id:
+      self.vpc_id = vpc_id
+      self.id = self.GetDict().get('InternetGatewayId')
+      # if a gateway was found then it is attached to this VPC
+      self.attached = bool(self.id)
+
+  def _Create(self):
+    """Creates the internet gateway."""
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-internet-gateway',
+        '--region=%s' % self.region]
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    response = json.loads(stdout)
+    self.id = response['InternetGateway']['InternetGatewayId']
+    util.AddDefaultTags(self.id, self.region)
+
+  def _Delete(self):
+    """Deletes the internet gateway."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-internet-gateway',
+        '--region=%s' % self.region,
+        '--internet-gateway-id=%s' % self.id]
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the internet gateway exists."""
+    return bool(self.GetDict())
+
+  def GetDict(self):
+    """The 'aws ec2 describe-internet-gateways' for this VPC / gateway id.
+
+    Returns:
+      A dict of the single gateway or an empty dict if there are no gateways.
+
+    Raises:
+      AssertionError: If there is more than one internet gateway.
+    """
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-internet-gateways',
+        '--region=%s' % self.region,
+    ]
+    if self.id:
+      describe_cmd.append('--filter=Name=internet-gateway-id,Values=%s' %
+                          self.id)
+    elif self.vpc_id:
+      # Only query with self.vpc_id if the self.id is NOT set -- after calling
+      # Detach() this object will set still have a vpc_id but will be filtered
+      # out in a query if using attachment.vpc-id.
+      # Using self.vpc_id instead of self.attached as the init phase always
+      # sets it to False.
+      describe_cmd.append('--filter=Name=attachment.vpc-id,Values=%s' %
+                          self.vpc_id)
+    else:
+      raise errors.Error('Must have a VPC id or a gateway id')
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    internet_gateways = response['InternetGateways']
+    assert len(internet_gateways) < 2, 'Too many internet gateways.'
+    return internet_gateways[0] if internet_gateways else {}
+
+  def Attach(self, vpc_id):
+    """Attaches the internet gateway to the VPC."""
+    if not self.attached:
+      self.vpc_id = vpc_id
+      attach_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'attach-internet-gateway',
+          '--region=%s' % self.region,
+          '--internet-gateway-id=%s' % self.id,
+          '--vpc-id=%s' % self.vpc_id]
+      util.IssueRetryableCommand(attach_cmd)
+      self.attached = True
+
+  def Detach(self):
+    """Detaches the internet gateway from the VPC."""
+
+    def _suppress_failure(stdout, stderr, retcode):
+      """Suppresses Detach failure when internet gateway is in a bad state."""
+      del stdout  # unused
+      if retcode and ('InvalidInternetGatewayID.NotFound' in stderr or
+                      'Gateway.NotAttached' in stderr):
+        return True
+      return False
+
+    if self.attached and not self.user_managed:
+      detach_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'detach-internet-gateway',
+          '--region=%s' % self.region,
+          '--internet-gateway-id=%s' % self.id,
+          '--vpc-id=%s' % self.vpc_id]
+      util.IssueRetryableCommand(detach_cmd, suppress_failure=_suppress_failure)
+      self.attached = False
+
+
+class AwsRouteTable(resource.BaseResource):
+  """An object representing a route table."""
+
+  def __init__(self, region, vpc_id):
+    super(AwsRouteTable, self).__init__()
+    self.region = region
+    self.vpc_id = vpc_id
+    self.id: str = None  # set by _PostCreate
+
+  def _Create(self):
+    """Creates the route table.
+
+    This is a no-op since every VPC has a default route table.
+    """
+    pass
+
+  def _Delete(self):
+    """Deletes the route table.
+
+    This is a no-op since the default route table gets deleted with the VPC.
+    """
+    pass
+
+  @vm_util.Retry()
+  def _PostCreate(self):
+    """Gets data about the route table."""
+    self.id = self.GetDict()[0]['RouteTableId']
+
+  def GetDict(self):
+    """Returns an array of the currently existing routes for this VPC."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-route-tables',
+        '--region=%s' % self.region,
+        '--filters=Name=vpc-id,Values=%s' % self.vpc_id]
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    return json.loads(stdout)['RouteTables']
+
+  def RouteExists(self):
+    """Returns true if the 0.0.0.0/0 route already exists."""
+    route_tables = self.GetDict()
+    if not route_tables:
+      return False
+    for route in route_tables[0].get('Routes', []):
+      if route.get('DestinationCidrBlock') == '0.0.0.0/0':
+        return True
+    return False
+
+  def CreateRoute(self, internet_gateway_id):
+    """Adds a route to the internet gateway."""
+    if self.RouteExists():
+      logging.info('Internet route already exists.')
+      return
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-route',
+        '--region=%s' % self.region,
+        '--route-table-id=%s' % self.id,
+        '--gateway-id=%s' % internet_gateway_id,
+        '--destination-cidr-block=0.0.0.0/0']
+    util.IssueRetryableCommand(create_cmd)
+
+  def CreateVpcPeeringRoute(self, vpc_peering_id, destination_cidr):
+    """Adds a route to peer VPC."""
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-route',
+        '--region=%s' % self.region,
+        '--route-table-id=%s' % self.id,
+        '--vpc-peering-connection-id=%s' % vpc_peering_id,
+        '--destination-cidr-block=%s' % destination_cidr]
+    util.IssueRetryableCommand(create_cmd)
+
+
+class _AwsRegionalNetwork(network.BaseNetwork):
+  """Object representing regional components of an AWS network.
+
+  The benchmark spec contains one instance of this class per region, which an
+  AwsNetwork may retrieve or create via _AwsRegionalNetwork.GetForRegion.
+
+  Attributes:
+    region: string. The AWS region.
+    vpc: an AwsVpc instance.
+    internet_gateway: an AwsInternetGateway instance.
+    route_table: an AwsRouteTable instance. The default route table.
+  """
+
+  _regional_network_count = 0
+  _regional_network_lock = threading.Lock()
+
+  CLOUD = providers.AWS
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+  def __init__(self, region, vpc_id=None):
+    self.region = region
+    self.internet_gateway = AwsInternetGateway(region, vpc_id)
+    self.route_table = None
+    self.created = False
+
+    # Locks to ensure that a single thread creates / deletes the instance.
+    self._create_lock = threading.Lock()
+
+    # Tracks the number of AwsNetworks using this _AwsRegionalNetwork.
+    # Incremented by Create(); decremented by Delete();
+    # When a Delete() call decrements _reference_count to 0, the RegionalNetwork
+    # is destroyed.
+    self._reference_count = 0
+    self._reference_count_lock = threading.Lock()
+
+    # Each regional network needs unique cidr_block for VPC peering.
+    with _AwsRegionalNetwork._regional_network_lock:
+      self.vpc = AwsVpc(self.region, vpc_id,
+                        _AwsRegionalNetwork._regional_network_count)
+      self.cidr_block = network.GetCidrBlock(
+          _AwsRegionalNetwork._regional_network_count)
+      _AwsRegionalNetwork._regional_network_count += 1
+
+  @classmethod
+  def GetForRegion(cls, region, vpc_id=None):
+    """Retrieves or creates an _AwsRegionalNetwork.
+
+    Args:
+      region: string. AWS region name.
+      vpc_id: string. AWS VPC id.
+
+    Returns:
+      _AwsRegionalNetwork. If an _AwsRegionalNetwork for the same region already
+      exists in the benchmark spec, that instance is returned. Otherwise, a new
+      _AwsRegionalNetwork is created and returned.
+    """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetNetwork called in a thread without a '
+                         'BenchmarkSpec.')
+    key = cls.CLOUD, REGION, region
+    # Because this method is only called from the AwsNetwork constructor, which
+    # is only called from AwsNetwork.GetNetwork, we already hold the
+    # benchmark_spec.networks_lock.
+    if key not in benchmark_spec.regional_networks:
+      benchmark_spec.regional_networks[key] = cls(region, vpc_id)
+    return benchmark_spec.regional_networks[key]
+
+  def Create(self):
+    """Creates the network."""
+    with self._reference_count_lock:
+      assert self._reference_count >= 0, self._reference_count
+      self._reference_count += 1
+
+    # Access here must be synchronized. The first time the block is executed,
+    # the network will be created. Subsequent attempts to create the
+    # network block until the initial attempt completes, then return.
+    with self._create_lock:
+      if self.created:
+        return
+
+      self.vpc.Create()
+
+      self.internet_gateway.Create()
+      self.internet_gateway.Attach(self.vpc.id)
+
+      if self.route_table is None:
+        self.route_table = AwsRouteTable(self.region, self.vpc.id)
+      self.route_table.Create()
+      self.route_table.CreateRoute(self.internet_gateway.id)
+
+      self.created = True
+
+  def Delete(self):
+    """Deletes the network."""
+    # Only actually delete if there are no more references.
+    with self._reference_count_lock:
+      assert self._reference_count >= 1, self._reference_count
+      self._reference_count -= 1
+      if self._reference_count:
+        return
+
+    if self.created:
+      self.internet_gateway.Detach()
+      self.internet_gateway.Delete()
+      self.vpc.Delete()
+
+
+class AwsNetworkSpec(network.BaseNetworkSpec):
+  """Configuration for creating an AWS network."""
+
+  def __init__(self, zone, vpc_id=None, subnet_id=None):
+    super(AwsNetworkSpec, self).__init__(zone)
+    if vpc_id or subnet_id:
+      logging.info('Confirming vpc (%s) and subnet (%s) selections', vpc_id,
+                   subnet_id)
+      my_subnet = AwsSubnet(self.zone, vpc_id, subnet_id=subnet_id).GetDict()
+      self.vpc_id = my_subnet['VpcId']
+      self.subnet_id = my_subnet['SubnetId']
+      self.cidr_block = my_subnet['CidrBlock']
+      logging.info('Using vpc %s subnet %s cidr %s', self.vpc_id,
+                   self.subnet_id, self.cidr_block)
+    else:
+      self.vpc_id = None
+      self.subnet_id = None
+      self.cidr_block = None
+
+
+def _get_default_vpc_id(region: str) -> str:
+  """Returns the default VPC ID for the region.
+
+  Creates a default VPC if one did not exist previously
+
+
+  Args:
+    region: Region of the default VPC.
+
+  Returns: Default VPC ID
+
+  Raises:
+    UnsupportedConfigError: When default VPC does not exist and cannot be
+      created.
+  """
+  vpc_cmd = util.AWS_PREFIX + [
+      'ec2', 'describe-vpcs',
+      '--region', region,
+      '--filters', 'Name=isDefault,Values=true'
+  ]
+  stdout, _ = vm_util.IssueRetryableCommand(vpc_cmd)
+  vpcs = json.loads(stdout)['Vpcs']
+  if vpcs:
+    return vpcs[0]['VpcId']
+  create_cmd = util.AWS_PREFIX + [
+      'ec2', 'create-default-vpc', '--region', region,
+  ]
+  stdout, _, ret = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+  if ret:
+    raise errors.Benchmarks.UnsupportedConfigError(
+        f'AWS default VPC does not exist for region {region}.')
+  return json.loads(stdout)['Vpc']['VpcId']
+
+
+def _get_default_subnet_id(zone: str) -> str:
+  """Returns the default subnet ID for the zone.
+
+  Creates a default subnet if one did not exist previously
+
+
+  Args:
+    zone: Zone of the default subnet.
+
+  Returns: Default Subnet ID
+
+  Raises:
+    UnsupportedConfigError: When default subnet does not exist and cannot be
+      created.
+  """
+  region = util.GetRegionFromZone(zone)
+  subnet_cmd = util.AWS_PREFIX + [
+      'ec2', 'describe-subnets',
+      '--region', region, '--filter',
+      f'Name=availabilityZone,Values={zone}',
+      'Name=defaultForAz,Values=true'
+  ]
+  stdout, _ = vm_util.IssueRetryableCommand(subnet_cmd)
+  subnets = json.loads(stdout)['Subnets']
+  if subnets:
+    return subnets[0]['SubnetId']
+  create_cmd = util.AWS_PREFIX + [
+      'ec2', 'create-default-subnet',
+      '--region', region,
+      '--availability-zone', zone
+  ]
+  stdout, _, ret = vm_util.IssueCommand(create_cmd, raise_on_failure=False)
+  if ret:
+    raise errors.Benchmarks.UnsupportedConfigError(
+        f'AWS default subnet does not exist for zone {zone}.')
+  return json.loads(stdout)['Subnet']['SubnetId']
+
+
+class AwsNetwork(network.BaseNetwork):
+  """Object representing an AWS Network.
+
+  Attributes:
+    region: The AWS region the Network is in.
+    regional_network: The AwsRegionalNetwork for 'region'.
+    subnet: the AwsSubnet for this zone.
+    placement_group: An AwsPlacementGroup instance.
+  """
+
+  CLOUD = providers.AWS
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+  def __init__(self, spec):
+    """Initializes AwsNetwork instances.
+
+    Args:
+      spec: An AwsNetworkSpec object.
+    """
+    super(AwsNetwork, self).__init__(spec)
+    self.region = util.GetRegionFromZone(spec.zone)
+    self.regional_network = _AwsRegionalNetwork.GetForRegion(
+        self.region, spec.vpc_id)
+    self.subnet = None
+    self.vpc_peering = None
+    if (FLAGS.placement_group_style ==
+        placement_group.PLACEMENT_GROUP_NONE):
+      self.placement_group = None
+    else:
+      placement_group_spec = aws_placement_group.AwsPlacementGroupSpec(
+          'AwsPlacementGroupSpec', flag_values=FLAGS, zone=spec.zone)
+      self.placement_group = aws_placement_group.AwsPlacementGroup(
+          placement_group_spec)
+    self.is_static = False
+    if spec.vpc_id:
+      self.is_static = True
+      self.subnet = AwsSubnet(
+          self.zone,
+          spec.vpc_id,
+          cidr_block=self.regional_network.cidr_block,
+          subnet_id=spec.subnet_id)
+
+  @staticmethod
+  def _GetNetworkSpecFromVm(vm):
+    """Returns an AwsNetworkSpec created from VM attributes and flags."""
+    if _AWS_SUBNET.value == 'default':
+      vpc_id = _get_default_vpc_id(vm.region)
+      subnet_id = _get_default_subnet_id(vm.zone)
+    else:
+      vpc_id = _AWS_VPC.value
+      subnet_id = _AWS_SUBNET.value
+    return AwsNetworkSpec(vm.zone, vpc_id, subnet_id)
+
+  def Create(self):
+    """Creates the network."""
+    self.regional_network.Create()
+
+    if self.subnet is None:
+      cidr = self.regional_network.vpc.NextSubnetCidrBlock()
+      self.subnet = AwsSubnet(self.zone, self.regional_network.vpc.id,
+                              cidr_block=cidr)
+      self.subnet.Create()
+    if self.placement_group:
+      self.placement_group.Create()
+
+  def Delete(self):
+    """Deletes the network."""
+    if self.subnet:
+      self.subnet.Delete()
+    if self.placement_group:
+      self.placement_group.Delete()
+    if hasattr(self, 'vpc_peering') and self.vpc_peering:
+      self.vpc_peering.Delete()
+    self.regional_network.Delete()
+
+  def Peer(self, peering_network):
+    """Peers the network with the peering_network.
+
+    This method is used for VPC peering. It will connect 2 VPCs together.
+
+    Args:
+      peering_network: BaseNetwork. The network to peer with.
+    """
+
+    # Skip Peering if the networks are the same
+    if self.regional_network is peering_network.regional_network:
+      return
+
+    spec = network.BaseVPCPeeringSpec(self.regional_network,
+                                      peering_network.regional_network)
+    self.vpc_peering = AwsVpcPeering(spec)
+    peering_network.vpc_peering = self.vpc_peering
+    self.vpc_peering.Create()
+
+  @classmethod
+  def _GetKeyFromNetworkSpec(cls, spec):
+    """Returns a key used to register Network instances."""
+    return (cls.CLOUD, ZONE, spec.zone)
+
+
+class AwsVpcPeering(network.BaseVPCPeering):
+  """Object containing all information needed to create a VPC Peering Object."""
+
+  def _Create(self):
+    """Creates the peering object.
+
+    Documentation on creating a vpc object:
+    https://docs.aws.amazon.com/vpc/latest/peering/vpc-pg.pdf
+    """
+    # Creates Peering Connection
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-vpc-peering-connection',
+        '--region=%s' % self.network_a.region,
+        '--peer-region=%s' % self.network_b.region,
+        '--vpc-id=%s' % self.network_a.vpc.id,
+        '--peer-vpc-id=%s' % self.network_b.vpc.id]
+
+    stdout, _ = vm_util.IssueRetryableCommand(create_cmd)
+    response = json.loads(stdout)
+
+    self.id = response['VpcPeeringConnection'][
+        'VpcPeeringConnectionId']
+
+    # Accepts Peering Connection
+    accept_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'accept-vpc-peering-connection',
+        '--region=%s' % self.network_b.region,
+        '--vpc-peering-connection-id=%s' % self.id]
+    vm_util.IssueRetryableCommand(accept_cmd)
+
+    util.AddDefaultTags(self.id, self.network_a.region)
+    logging.info('Creating VPC peering between %s and %s',
+                 self.network_a.vpc.cidr, self.network_b.vpc.cidr)
+
+    # Adds VPC peering to both networks' route tables
+    self.network_a.route_table.CreateVpcPeeringRoute(self.id,
+                                                     self.network_b.vpc.cidr)
+    self.network_b.route_table.CreateVpcPeeringRoute(self.id,
+                                                     self.network_a.vpc.cidr)
+
+    # Updates security group to allow inbound traffic from peering networks
+    self.network_a.vpc.AllowVpcPeerInBound(self.network_b.vpc)
+    self.network_b.vpc.AllowVpcPeerInBound(self.network_a.vpc)
+
+  def _Delete(self):
+    """Creates the deletes the peering object."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-vpc-peering-connection',
+        '--region=%s' % self.network_a.region,
+        '--vpc-peering-connection-id=%s' % self.id]
+    vm_util.IssueCommand(delete_cmd)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_nfs_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_nfs_service.py
new file mode 100644
index 0000000..001733d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_nfs_service.py
@@ -0,0 +1,256 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS NFS implementation.
+
+See https://aws.amazon.com/efs/
+
+This launches an EFS instance and creates a mount point.  Individual AwsDisks
+will then mount the share.
+
+The AfsNfsService object is a resource.BaseResource that has two resources
+underneath it:
+1. A resource to connect to the filer.
+2. A resource to connect to the mount point on the filer.
+
+Lifecycle:
+1. EFS service created and blocks until it is available, as it is needed to
+   make the mount point.
+2. Issues a non-blocking call to create the mount point.  Does not block as the
+   NfsDisk will block on it being available.
+3. The NfsDisk then mounts the mount point and uses the disk like normal.
+4. On teardown the mount point is first deleted.  Blocks on that returning.
+5. The EFS service is then deleted.  Does not block as can take some time.
+"""
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import nfs_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+
+class AwsNfsService(nfs_service.BaseNfsService):
+  """An AWS NFS resource.
+
+  Creates the AWS EFS file system and mount point for use with NFS clients.
+
+  See https://aws.amazon.com/efs/
+  """
+
+  CLOUD = providers.AWS
+  NFS_TIERS = ('generalPurpose', 'maxIO')
+  DEFAULT_NFS_VERSION = '4.1'
+  DEFAULT_TIER = 'generalPurpose'
+
+  def __init__(self, disk_spec, zone):
+    super(AwsNfsService, self).__init__(disk_spec, zone)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.aws_commands = AwsEfsCommands(self.region)
+    self.disk_spec.disk_size = 0
+    self.filer_id = None
+    self.mount_id = None
+    self.throughput_mode = FLAGS.efs_throughput_mode
+    self.provisioned_throughput = FLAGS.efs_provisioned_throughput
+
+  @property
+  def network(self):
+    network_spec = aws_network.AwsNetworkSpec(self.zone)
+    return aws_network.AwsNetwork.GetNetworkFromNetworkSpec(network_spec)
+
+  @property
+  def subnet_id(self):
+    if hasattr(self.network, 'subnet'):
+      return self.network.subnet.id
+    else:
+      raise errors.Config.InvalidValue('No subnet in network %s' % self.network)
+
+  @property
+  def security_group(self):
+    if hasattr(self.network, 'vpc'):
+      return self.network.vpc.default_security_group_id
+    # not required when making the mount target
+    return None
+
+  def _Create(self):
+    logging.info('Creating NFS resource, subnet: %s, security group: %s',
+                 self.subnet_id, self.security_group)
+    self._CreateFiler()
+    logging.info('Waiting for filer to start up')
+    self.aws_commands.WaitUntilFilerAvailable(self.filer_id)
+    # create the mount point but do not wait for it, superclass will call the
+    # _IsReady() method.
+    self._CreateMount()
+
+  def _Delete(self):
+    # deletes on the file-system and mount-target are immediate
+    self._DeleteMount()
+    if not FLAGS.aws_delete_file_system:
+      return
+    self._DeleteFiler()
+
+  def GetRemoteAddress(self):
+    if self.filer_id is None:
+      raise errors.Resource.RetryableGetError('Filer not created')
+    return '{name}.efs.{region}.amazonaws.com'.format(
+        name=self.filer_id, region=self.region)
+
+  def _IsReady(self):
+    return self.aws_commands.IsMountAvailable(self.mount_id)
+
+  def _CreateFiler(self):
+    """Creates the AWS EFS service."""
+    if self.filer_id:
+      logging.warning('_CreateFiler() already called for %s', self.filer_id)
+      return
+    if FLAGS.aws_efs_token:
+      filer = self.aws_commands.GetFiler(FLAGS.aws_efs_token)
+      if filer:
+        self.nfs_tier = filer['PerformanceMode']
+        self.filer_id = filer['FileSystemId']
+        self.disk_spec.disk_size = int(
+            round(filer['SizeInBytes']['Value'] / 10.0 ** 9))
+        return
+    token = FLAGS.aws_efs_token or 'nfs-token-%s' % FLAGS.run_uri
+    self.filer_id = self.aws_commands.CreateFiler(
+        token, self.nfs_tier, self.throughput_mode, self.provisioned_throughput)
+    self.aws_commands.AddTagsToFiler(self.filer_id)
+    logging.info('Created filer %s with address %s', self.filer_id,
+                 self.GetRemoteAddress())
+
+  def _CreateMount(self):
+    """Creates an NFS mount point on an EFS service."""
+    if self.mount_id:
+      logging.warning('_CreateMount() already called for %s', self.mount_id)
+      return
+    if not self.filer_id:
+      raise errors.Resource.CreationError('Did not create a filer first')
+    logging.info('Creating NFS mount point')
+    self.mount_id = self.aws_commands.CreateMount(
+        self.filer_id, self.subnet_id, self.security_group)
+    logging.info('Mount target %s starting up', self.mount_id)
+
+  def _DeleteMount(self):
+    """Deletes the EFS mount point.
+    """
+    if not self.mount_id:
+      return
+    logging.info('Deleting NFS mount mount %s', self.mount_id)
+    self.aws_commands.DeleteMount(self.mount_id)
+    self.mount_id = None
+
+  def _DeleteFiler(self):
+    """Deletes the EFS service.
+
+    Raises:
+      RetryableDeletionError: If the mount point exists.
+    """
+    if not self.filer_id:
+      return
+    if self.mount_id:
+      # this isn't retryable as the mount point wasn't deleted
+      raise errors.Resource.RetryableDeletionError(
+          'Did not delete mount point first')
+    logging.info('Deleting NFS filer %s', self.filer_id)
+    self.aws_commands.DeleteFiler(self.filer_id)
+    self.filer_id = None
+
+
+class AwsEfsCommands(object):
+  """Commands for interacting with AWS EFS.
+
+  Args:
+    region: AWS region for the NFS service.
+  """
+
+  def __init__(self, region):
+    self.efs_prefix = util.AWS_PREFIX + ['--region', region, 'efs']
+
+  def GetFiler(self, token):
+    """Returns the filer using the creation token or None."""
+    args = ['describe-file-systems', '--creation-token', token]
+    response = self._IssueAwsCommand(args)
+    file_systems = response['FileSystems']
+    if not file_systems:
+      return None
+    assert len(file_systems) < 2, 'Too many file systems.'
+    return file_systems[0]
+
+  def CreateFiler(self, token, nfs_tier, throughput_mode,
+                  provisioned_throughput):
+    args = ['create-file-system', '--creation-token', token]
+    if nfs_tier is not None:
+      args += ['--performance-mode', nfs_tier]
+    args += ['--throughput-mode', throughput_mode]
+    if throughput_mode == 'provisioned':
+      args += ['--provisioned-throughput-in-mibps', provisioned_throughput]
+    return self._IssueAwsCommand(args)['FileSystemId']
+
+  def AddTagsToFiler(self, filer_id):
+    tags = util.MakeFormattedDefaultTags()
+    args = ['create-tags', '--file-system-id', filer_id, '--tags'] + tags
+    self._IssueAwsCommand(args, False)
+
+  @vm_util.Retry()
+  def WaitUntilFilerAvailable(self, filer_id):
+    if not self._IsAvailable('describe-file-systems', '--file-system-id',
+                             'FileSystems', filer_id):
+      raise errors.Resource.RetryableCreationError(
+          '{} not ready'.format(filer_id))
+
+  @vm_util.Retry()
+  def DeleteFiler(self, file_system_id):
+    args = self.efs_prefix + [
+        'delete-file-system', '--file-system-id', file_system_id]
+    _, stderr, retcode = vm_util.IssueCommand(args, raise_on_failure=False)
+    if retcode and 'FileSystemInUse' in stderr:
+      raise Exception('Mount Point hasn\'t finished deleting.')
+
+  def CreateMount(self, file_system_id, subnet_id, security_group=None):
+    args = [
+        'create-mount-target', '--file-system-id', file_system_id,
+        '--subnet-id', subnet_id
+    ]
+    if security_group:
+      args += ['--security-groups', security_group]
+    return self._IssueAwsCommand(args)['MountTargetId']
+
+  def IsMountAvailable(self, mount_target_id):
+    if mount_target_id is None:
+      # caller called _IsReady() before the mount point was created
+      return False
+    return self._IsAvailable('describe-mount-targets', '--mount-target-id',
+                             'MountTargets', mount_target_id)
+
+  def DeleteMount(self, mount_target_id):
+    self._IssueAwsCommand(
+        ['delete-mount-target', '--mount-target-id', mount_target_id], False)
+
+  def _IsAvailable(self, describe_cmd, id_attr, response_attribute, id_value):
+    describe = self._IssueAwsCommand([describe_cmd, id_attr, id_value])
+    status = describe[response_attribute][0].get('LifeCycleState')
+    return status == 'available'
+
+  def _IssueAwsCommand(self, args, return_json=True):
+    args = self.efs_prefix + [str(arg) for arg in args]
+    stdout, _, retcode = vm_util.IssueCommand(args, raise_on_failure=False)
+    if retcode:
+      return None
+    return json.loads(stdout) if return_json else stdout
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_placement_group.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_placement_group.py
new file mode 100644
index 0000000..08a00a2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_placement_group.py
@@ -0,0 +1,117 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent an AWS Placement Group object.
+
+Cloud specific implementations of Placement Group.
+"""
+
+import json
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.aws import util
+
+
+FLAGS = flags.FLAGS
+
+
+class AwsPlacementGroupSpec(placement_group.BasePlacementGroupSpec):
+  """Object containing the information needed to create an AwsPlacementGroup.
+
+  Attributes:
+      zone: The AWS zone the Placement Group is in.
+  """
+
+  CLOUD = providers.AWS
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(AwsPlacementGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'placement_group_style': (option_decoders.EnumDecoder, {
+            'valid_values': placement_group.PLACEMENT_GROUP_OPTIONS,
+            'default': placement_group.PLACEMENT_GROUP_CLUSTER,
+        })
+    })
+    return result
+
+
+class AwsPlacementGroup(placement_group.BasePlacementGroup):
+  """Object representing an AWS Placement Group."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self, aws_placement_group_spec):
+    """Init method for AwsPlacementGroup.
+
+    Args:
+      aws_placement_group_spec: Object containing the
+        information needed to create an AwsPlacementGroup.
+    """
+    super(AwsPlacementGroup, self).__init__(aws_placement_group_spec)
+    self.name = (
+        'perfkit-%s-%s' % (FLAGS.run_uri, str(uuid.uuid4())[-12:]))
+    self.region = util.GetRegionFromZone(self.zone)
+    self.strategy = aws_placement_group_spec.placement_group_style
+
+  def _Create(self):
+    """Creates the Placement Group."""
+    formatted_tags = util.FormatTagSpecifications('placement-group',
+                                                  util.MakeDefaultTags())
+
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'create-placement-group',
+        '--region=%s' % self.region,
+        '--group-name=%s' % self.name,
+        '--strategy=%s' % self.strategy,
+        '--tag-specifications=%s' % formatted_tags
+    ]
+
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """Deletes the Placement Group."""
+    delete_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'delete-placement-group',
+        '--region=%s' % self.region,
+        '--group-name=%s' % self.name]
+    # Failed deletes are ignorable (probably already deleted).
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the Placement Group exists."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-placement-groups',
+        '--region=%s' % self.region,
+        '--filter=Name=group-name,Values=%s' % self.name]
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    placement_groups = response['PlacementGroups']
+    assert len(placement_groups) < 2, 'Too many placement groups.'
+    return bool(placement_groups)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_relational_db.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_relational_db.py
new file mode 100644
index 0000000..2697785
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_relational_db.py
@@ -0,0 +1,847 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Managed relational database provisioning and teardown for AWS RDS."""
+
+
+import datetime
+import json
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import sql_engine_utils
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_disk
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+
+DEFAULT_MYSQL_VERSION = '5.7.16'
+DEFAULT_POSTGRES_VERSION = '9.6.9'
+
+DEFAULT_MYSQL_AURORA_VERSION = '5.7.12'
+DEFAULT_MYSQL56_AURORA_VERSION = '5.6.10a'
+DEFAULT_POSTGRES_AURORA_VERSION = '9.6.9'
+DEFAULT_SQLSERVER_VERSION = '14.00.3223.3.v1'
+
+IS_READY_TIMEOUT = 60 * 60 * 1  # 1 hour (RDS HA takes a long time to prepare)
+
+_MAP_ENGINE_TO_DEFAULT_VERSION = {
+    sql_engine_utils.MYSQL: DEFAULT_MYSQL_VERSION,
+    sql_engine_utils.AURORA_MYSQL: DEFAULT_MYSQL_AURORA_VERSION,
+    sql_engine_utils.AURORA_MYSQL56: DEFAULT_MYSQL56_AURORA_VERSION,
+    sql_engine_utils.POSTGRES: DEFAULT_POSTGRES_VERSION,
+    sql_engine_utils.AURORA_POSTGRES: DEFAULT_POSTGRES_AURORA_VERSION,
+    sql_engine_utils.SQLSERVER_EXPRESS: DEFAULT_SQLSERVER_VERSION,
+    sql_engine_utils.SQLSERVER_STANDARD: DEFAULT_SQLSERVER_VERSION,
+    sql_engine_utils.SQLSERVER_ENTERPRISE: DEFAULT_SQLSERVER_VERSION,
+}
+
+_AURORA_ENGINES = (
+    sql_engine_utils.AURORA_MYSQL56, sql_engine_utils.AURORA_MYSQL,
+    sql_engine_utils.AURORA_POSTGRES)
+
+_SQL_SERVER_ENGINES = (
+    sql_engine_utils.SQLSERVER_EXPRESS,
+    sql_engine_utils.SQLSERVER_STANDARD,
+    sql_engine_utils.SQLSERVER_ENTERPRISE)
+
+_RDS_ENGINES = (
+    sql_engine_utils.MYSQL,
+    sql_engine_utils.POSTGRES,
+    sql_engine_utils.SQLSERVER_EXPRESS,
+    sql_engine_utils.SQLSERVER_STANDARD,
+    sql_engine_utils.SQLSERVER_ENTERPRISE)
+
+MYSQL_SUPPORTED_MAJOR_VERSIONS = ['5.7', '8.0']
+POSTGRES_SUPPORTED_MAJOR_VERSIONS = ['9.6', '10', '11', '12', '13']
+
+
+
+class AwsRelationalDbCrossRegionError(Exception):
+  pass
+
+
+class AwsRelationalDbParameterError(Exception):
+  """Exceptions for invalid Db parameters."""
+  pass
+
+
+class AwsRelationalDb(relational_db.BaseRelationalDb):
+  """An object representing an AWS RDS managed relational database.
+
+  Currenty MySQL and Postgres are supported. This class requires that a
+  client vm be available as an attribute on the instance before Create() is
+  called, which is the current behavior of PKB. This is necessary to setup the
+  networking correctly. The following steps are performed to provision the
+  database:
+    1. get the client's VPC
+    2. get the client's zone
+    3. create a new subnet in the VPC's region that is different from the
+        client's zone
+    4. create a new db subnet group using the client's zone, and the newly
+        created zone
+    5. authorize Postgres traffic on the VPC's default security group
+    6. create the RDS instance in the requested region using the new db
+        subnet group and VPC security group.
+
+  On teardown, all resources are deleted.
+
+  Note that the client VM's region and the region requested for the database
+  must be the same.
+
+  At the moment there is no way to specify the primary zone when creating a
+  high availability instance, which means that the client and server may
+  be launched in different zones, which hurts network performance.
+  In other words, the 'zone' attribute on the relational_db db_spec
+  has no effect, and is only used to specify the region.
+
+  To filter out runs that cross zones, be sure to check the sample metadata for
+  'zone' (client's zone), 'relational_db_zone' (primary RDS zone),
+  and 'relational_db_secondary_zone' (secondary RDS zone).
+
+  If the instance was NOT launched in the high availability configuration, the
+  server will be launched in the zone requested, and
+  relational_db_secondary_zone will not exist in the metadata.
+  """
+  CLOUD = providers.AWS
+
+  def __init__(self, relational_db_spec):
+    super(AwsRelationalDb, self).__init__(relational_db_spec)
+    self.cluster_id = None
+    self.all_instance_ids = []
+    self.primary_zone = None
+    self.secondary_zone = None
+    self.parameter_group = None
+
+    if hasattr(self.spec, 'zones') and self.spec.zones is not None:
+      self.zones = self.spec.zones
+    else:
+      self.zones = [self.spec.db_spec.zone]
+
+    self.region = util.GetRegionFromZones(self.zones)
+    self.subnets_owned_by_db = []
+    self.subnets_used_by_db = []
+
+    self.unmanaged_db_exists = None if self.is_managed_db else False
+
+    # dependencies which will be created
+    self.db_subnet_group_name: str = None
+    self.security_group_id: str = None
+
+  def GetResourceMetadata(self):
+    """Returns the metadata associated with the resource.
+
+    All keys will be prefaced with relational_db before
+    being published (done in publisher.py).
+
+    Returns:
+      metadata: dict of AWS Managed DB metadata.
+    """
+    metadata = super(AwsRelationalDb, self).GetResourceMetadata()
+    metadata.update({
+        'zone': self.primary_zone,
+    })
+
+    if self.spec.high_availability:
+      metadata.update({
+          'secondary_zone': self.secondary_zone,
+      })
+
+    if hasattr(self.spec.db_disk_spec, 'iops'):
+      metadata.update({
+          'disk_iops': self.spec.db_disk_spec.iops,
+      })
+
+    return metadata
+
+  @staticmethod
+  def GetDefaultEngineVersion(engine):
+    """Returns the default version of a given database engine.
+
+    Args:
+      engine (string): type of database (my_sql or postgres).
+    Returns:
+      (string): Default engine version.
+    Raises:
+      Exception: If unrecognized engine is specified.
+    """
+    if engine not in _MAP_ENGINE_TO_DEFAULT_VERSION:
+      raise Exception('Unspecified default version for {0}'.format(engine))
+    return _MAP_ENGINE_TO_DEFAULT_VERSION[engine]
+
+  def _GetNewZones(self):
+    """Returns a list of zones, excluding the one that the client VM is in."""
+    all_zones = util.GetZonesInRegion(self.region)
+    for zone in self.zones:
+      all_zones.remove(zone)
+    return all_zones
+
+  def _CreateSubnetInZone(self, new_subnet_zone):
+    """Creates a new subnet in the same region as the client VM.
+
+    Args:
+      new_subnet_zone: The zone for the subnet to be created.
+                       Must be in the same region as the client
+
+    Returns:
+      the new subnet resource
+    """
+    cidr = self.client_vm.network.regional_network.vpc.NextSubnetCidrBlock()
+    logging.info('Attempting to create a subnet in zone %s', new_subnet_zone)
+    new_subnet = (
+        aws_network.AwsSubnet(
+            new_subnet_zone,
+            self.client_vm.network.regional_network.vpc.id,
+            cidr))
+    new_subnet.Create()
+    logging.info('Successfully created a new subnet, subnet id is: %s',
+                 new_subnet.id)
+
+    # save for cleanup
+    self.subnets_used_by_db.append(new_subnet)
+    self.subnets_owned_by_db.append(new_subnet)
+    return new_subnet
+
+  def _CreateSubnetInAllZonesAssumeClientZoneExists(self):
+    client_zone = self.client_vm.network.subnet.zone
+    for zone in self.zones:
+      if zone != client_zone:
+        self._CreateSubnetInZone(zone)
+      else:
+        self.subnets_used_by_db.append(self.client_vm.network.subnet)
+
+  def _CreateSubnetInAdditionalZone(self):
+    """Creates a new subnet in the same region as the client VM.
+
+    The zone will be different from the client's zone (but in the same region).
+
+    Returns:
+      the new subnet resource
+
+    Raises:
+      Exception: if unable to create a subnet in any zones in the region.
+    """
+    new_subnet_zones = self._GetNewZones()
+    while len(new_subnet_zones) >= 1:
+      new_subnet_zone = new_subnet_zones.pop()
+      try:
+        new_subnet = self._CreateSubnetInZone(new_subnet_zone)
+        return new_subnet
+      except:
+        logging.info('Unable to create subnet in zone %s', new_subnet_zone)
+    raise Exception('Unable to create subnet in any availability zones')
+
+  def _CreateDbSubnetGroup(self, subnets):
+    """Creates a new db subnet group.
+
+    Args:
+      subnets: a list of strings.
+               The db subnet group will consit of all subnets in this list.
+    """
+    db_subnet_group_name = 'pkb-db-subnet-group-{0}'.format(FLAGS.run_uri)
+
+    create_db_subnet_group_cmd = util.AWS_PREFIX + (
+        ['rds',
+         'create-db-subnet-group',
+         '--db-subnet-group-name', db_subnet_group_name,
+         '--db-subnet-group-description', 'pkb_subnet_group_for_db',
+         '--region', self.region,
+         '--subnet-ids'] + [subnet.id for subnet in subnets] +
+        ['--tags'] + util.MakeFormattedDefaultTags())
+
+    vm_util.IssueCommand(create_db_subnet_group_cmd)
+
+    # save for cleanup
+    self.db_subnet_group_name = db_subnet_group_name
+    self.security_group_id = (self.client_vm.network.regional_network.
+                              vpc.default_security_group_id)
+
+  def _SetupNetworking(self):
+    """Sets up the networking required for the RDS database."""
+    if self.spec.engine in _RDS_ENGINES:
+      self.subnets_used_by_db.append(self.client_vm.network.subnet)
+      self._CreateSubnetInAdditionalZone()
+    elif self.spec.engine in _AURORA_ENGINES:
+      self._CreateSubnetInAllZonesAssumeClientZoneExists()
+    else:
+      raise Exception('Unknown how to create network for {0}'.format(
+          self.spec.engine))
+
+    self._CreateDbSubnetGroup(self.subnets_used_by_db)
+
+    open_port_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'authorize-security-group-ingress',
+        '--group-id', self.security_group_id,
+        '--source-group', self.security_group_id,
+        '--protocol', 'tcp',
+        '--port={0}'.format(self.port),
+        '--region', self.region]
+    stdout, stderr, _ = vm_util.IssueCommand(open_port_cmd)
+    logging.info('Granted DB port ingress, stdout is:\n%s\nstderr is:\n%s',
+                 stdout, stderr)
+
+  def _TeardownNetworking(self):
+    """Tears down all network resources that were created for the database."""
+    if hasattr(self, 'db_subnet_group_name'):
+      delete_db_subnet_group_cmd = util.AWS_PREFIX + [
+          'rds',
+          'delete-db-subnet-group',
+          '--db-subnet-group-name', self.db_subnet_group_name,
+          '--region', self.region]
+      vm_util.IssueCommand(delete_db_subnet_group_cmd, raise_on_failure=False)
+
+    for subnet_for_db in self.subnets_owned_by_db:
+      subnet_for_db.Delete()
+
+  def _TeardownParameterGroup(self):
+    """Tears down all parameter group that were created for the database."""
+    if self.parameter_group:
+      delete_db_parameter_group_cmd = util.AWS_PREFIX + [
+          'rds', 'delete-db-parameter-group', '--db-parameter-group-name',
+          self.parameter_group, '--region', self.region
+      ]
+      vm_util.IssueCommand(
+          delete_db_parameter_group_cmd, raise_on_failure=False)
+
+  def _CreateAwsSqlInstance(self):
+    if self.spec.engine in _RDS_ENGINES:
+      instance_identifier = self.instance_id
+      self.all_instance_ids.append(instance_identifier)
+      cmd = util.AWS_PREFIX + [
+          'rds', 'create-db-instance',
+          '--db-instance-identifier=%s' % instance_identifier,
+          '--engine=%s' % self.spec.engine,
+          '--master-username=%s' % self.spec.database_username,
+          '--master-user-password=%s' % self.spec.database_password,
+          '--allocated-storage=%s' % self.spec.db_disk_spec.disk_size,
+          '--storage-type=%s' % self.spec.db_disk_spec.disk_type,
+          '--db-instance-class=%s' % self.spec.db_spec.machine_type,
+          '--no-auto-minor-version-upgrade',
+          '--region=%s' % self.region,
+          '--engine-version=%s' % self.spec.engine_version,
+          '--db-subnet-group-name=%s' % self.db_subnet_group_name,
+          '--vpc-security-group-ids=%s' % self.security_group_id,
+          '--availability-zone=%s' % self.spec.db_spec.zone, '--tags'
+      ] + util.MakeFormattedDefaultTags()
+
+      if self.spec.engine in _SQL_SERVER_ENGINES:
+        cmd = cmd + ['--license-model=license-included']
+
+      if self.spec.db_disk_spec.disk_type == aws_disk.IO1:
+        cmd.append('--iops=%s' % self.spec.db_disk_spec.iops)
+      # TODO(ferneyhough): add backup_enabled and backup_window
+
+      vm_util.IssueCommand(cmd)
+
+    elif self.spec.engine in _AURORA_ENGINES:
+      zones_needed_for_high_availability = len(self.zones) > 1
+      if zones_needed_for_high_availability != self.spec.high_availability:
+        raise Exception('When db_high_availability is true, multiple '
+                        'zones must be specified.  When '
+                        'db_high_availability is false, one zone '
+                        'should be specified.   '
+                        'db_high_availability: {0}  '
+                        'zone count: {1} '.format(
+                            zones_needed_for_high_availability,
+                            len(self.zones)))
+
+      cluster_identifier = 'pkb-db-cluster-' + FLAGS.run_uri
+      # Create the cluster.
+      cmd = util.AWS_PREFIX + [
+          'rds', 'create-db-cluster',
+          '--db-cluster-identifier=%s' % cluster_identifier,
+          '--engine=%s' % self.spec.engine,
+          '--engine-version=%s' % self.spec.engine_version,
+          '--master-username=%s' % self.spec.database_username,
+          '--master-user-password=%s' % self.spec.database_password,
+          '--region=%s' % self.region,
+          '--db-subnet-group-name=%s' % self.db_subnet_group_name,
+          '--vpc-security-group-ids=%s' % self.security_group_id,
+          '--availability-zones=%s' % self.spec.zones[0],
+          '--tags'] + util.MakeFormattedDefaultTags()
+
+      self.cluster_id = cluster_identifier
+      vm_util.IssueCommand(cmd)
+
+      for zone in self.zones:
+
+        # The first instance is assumed to be writer -
+        # and so use the instance_id  for that id.
+        if zone == self.zones[0]:
+          instance_identifier = self.instance_id
+        else:
+          instance_identifier = self.instance_id + '-' + zone
+
+        self.all_instance_ids.append(instance_identifier)
+
+        cmd = util.AWS_PREFIX + [
+            'rds', 'create-db-instance',
+            '--db-instance-identifier=%s' % instance_identifier,
+            '--db-cluster-identifier=%s' % cluster_identifier,
+            '--engine=%s' % self.spec.engine,
+            '--engine-version=%s' % self.spec.engine_version,
+            '--no-auto-minor-version-upgrade',
+            '--db-instance-class=%s' % self.spec.db_spec.machine_type,
+            '--region=%s' % self.region,
+            '--availability-zone=%s' % zone, '--tags'
+        ] + util.MakeFormattedDefaultTags()
+        vm_util.IssueCommand(cmd)
+
+    else:
+      raise Exception('Unknown how to create AWS data base engine {0}'.format(
+          self.spec.engine))
+
+  def _Create(self):
+    """Creates the AWS RDS instance.
+
+    Raises:
+      Exception: if unknown how to create self.spec.engine.
+
+    """
+    if self.is_managed_db:
+      self._CreateAwsSqlInstance()
+    else:
+      self.endpoint = self.server_vm.ip_address
+      self._SetupUnmanagedDatabase()
+      self.firewall = aws_network.AwsFirewall()
+      self.firewall.AllowPortInSecurityGroup(
+          self.server_vm.region,
+          self.server_vm.network.regional_network.vpc.default_security_group_id,
+          self.port,
+          self.port,
+          ['%s/32' % self.client_vm.ip_address])
+      self.unmanaged_db_exists = True
+
+  def _IsDeleting(self):
+    """See Base class BaseResource in perfkitbenchmarker.resource.py."""
+
+    for instance_id in self.all_instance_ids:
+      json_output = self._DescribeInstance(instance_id)
+      if json_output:
+        state = json_output['DBInstances'][0]['DBInstanceStatus']
+        if state == 'deleting':
+          return True
+
+    return False
+
+  def _Delete(self):
+    """Deletes the underlying resource.
+
+    Implementations of this method should be idempotent since it may
+    be called multiple times, even if the resource has already been
+    deleted.
+    """
+    if not self.is_managed_db:
+      if hasattr(self, 'firewall'):
+        self.firewall.DisallowAllPorts()
+      self.unmanaged_db_exists = False
+      self.PrintUnmanagedDbStats()
+      return
+
+    for current_instance_id in self.all_instance_ids:
+      cmd = util.AWS_PREFIX + [
+          'rds',
+          'delete-db-instance',
+          '--db-instance-identifier=%s' % current_instance_id,
+          '--skip-final-snapshot',
+          '--region', self.region,
+      ]
+      vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    if self.cluster_id is not None:
+      cmd = util.AWS_PREFIX + [
+          'rds',
+          'delete-db-cluster',
+          '--db-cluster-identifier=%s' % self.cluster_id,
+          '--skip-final-snapshot',
+          '--region', self.region,
+      ]
+      vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the underlying resource exists.
+
+    Supplying this method is optional. If it is not implemented then the
+    default is to assume success when _Create and _Delete do not raise
+    exceptions.
+    """
+    if not self.is_managed_db:
+      return self.unmanaged_db_exists
+    for current_instance_id in self.all_instance_ids:
+      json_output = self._DescribeInstance(current_instance_id)
+      if not json_output:
+        return False
+
+    return True
+
+  def _ParseEndpointFromInstance(self, describe_instance_json):
+    """Parses the json output from the CLI and returns the endpoint.
+
+    Args:
+      describe_instance_json: output in json format from calling
+        'aws rds describe-db-instances'
+
+    Returns:
+      endpoint of the server as a string
+    """
+    return describe_instance_json['DBInstances'][0]['Endpoint']['Address']
+
+  def _ParsePortFromInstance(self, describe_instance_json):
+    """Parses the json output from the CLI and returns the port.
+
+    Args:
+      describe_instance_json: output in json format from calling
+        'aws rds describe-db-instances'
+
+    Returns:
+      port on which the server is listening, as an int
+    """
+    if describe_instance_json is None:
+      return None
+    return int(describe_instance_json['DBInstances'][0]['Endpoint']['Port'])
+
+  def _ParseEndpointFromCluster(self, describe_cluster_json):
+    """Parses the json output from the CLI and returns the endpoint.
+
+    Args:
+      describe_cluster_json: output in json format from calling
+        'aws rds describe-db-clusters'
+
+    Returns:
+      endpoint of the server as a string
+    """
+    return describe_cluster_json['DBClusters'][0]['Endpoint']
+
+  def _SavePrimaryAndSecondaryZones(self, describe_instance_json):
+    """Saves the primary, and secondary (only if HA) zone of the server.
+
+    Args:
+      describe_instance_json: output in json format from calling
+        'aws rds describe-db-instances'
+    """
+
+    if self.spec.engine in _AURORA_ENGINES:
+      self.primary_zone = self.zones[0]
+      if len(self.zones) > 1:
+        self.secondary_zone = ','.join(self.zones[1:])
+    else:
+      db_instance = describe_instance_json['DBInstances'][0]
+      self.primary_zone = (
+          db_instance['AvailabilityZone'])
+      if self.spec.high_availability:
+        if 'SecondaryAvailabilityZone' in db_instance:
+          self.secondary_zone = db_instance['SecondaryAvailabilityZone']
+        else:
+          # the secondary DB for RDS is in the second subnet.
+          self.secondary_zone = self.subnets_used_by_db[1].zone
+
+  def _IsReady(self, timeout=IS_READY_TIMEOUT):
+    """Return true if the underlying resource is ready.
+
+    This method will query all of the instance every 5 seconds until
+    its instance state is 'available', or until a timeout occurs.
+
+    Args:
+      timeout: timeout in seconds
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out
+        or an Exception occurred.
+    """
+    if not self.is_managed_db:
+      return self._IsReadyUnmanaged()
+
+    if not self.all_instance_ids:
+      return False
+
+    for instance_id in self.all_instance_ids:
+      if not self._IsInstanceReady(instance_id, timeout):
+        return False
+
+    return True
+
+  def _PostCreate(self):
+    """Perform general post create operations on the cluster.
+
+    Raises:
+       Exception:  If could not ready the instance after modification to
+                   multi-az.
+    """
+    super()._PostCreate()
+
+    if not self.is_managed_db:
+      self.client_vm_query_tools.InstallPackages()
+    else:
+      need_ha_modification = self.spec.engine in _RDS_ENGINES
+
+      if self.spec.high_availability and need_ha_modification:
+        # When extending the database to be multi-az, the second region
+        # is picked by where the second subnet has been created.
+        cmd = util.AWS_PREFIX + [
+            'rds',
+            'modify-db-instance',
+            '--db-instance-identifier=%s' % self.instance_id,
+            '--multi-az',
+            '--apply-immediately',
+            '--region=%s' % self.region
+        ]
+        vm_util.IssueCommand(cmd)
+
+        if not self._IsInstanceReady(
+            self.instance_id, timeout=IS_READY_TIMEOUT):
+          raise Exception('Instance could not be set to ready after '
+                          'modification for high availability')
+
+      json_output = self._DescribeInstance(self.instance_id)
+      self._SavePrimaryAndSecondaryZones(json_output)
+      if self.cluster_id:
+        self._GetPortsForClusterInstance(self.cluster_id)
+      else:
+        self._GetPortsForWriterInstance(self.all_instance_ids[0])
+
+    self.client_vm_query_tools.InstallPackages()
+
+  def _IsInstanceReady(self, instance_id, timeout=IS_READY_TIMEOUT):
+    """Return true if the instance is ready.
+
+    This method will query the instance every 5 seconds until
+    its instance state is 'available', or until a timeout occurs.
+
+    Args:
+      instance_id: string of the instance to check is ready
+      timeout: timeout in seconds
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out
+        or an Exception occurred.
+    """
+    start_time = datetime.datetime.now()
+
+    while True:
+      if (datetime.datetime.now() - start_time).seconds >= timeout:
+        logging.exception('Timeout waiting for sql instance to be ready')
+        return False
+      json_output = self._DescribeInstance(instance_id)
+      if json_output:
+        try:
+          state = json_output['DBInstances'][0]['DBInstanceStatus']
+          pending_values = (
+              json_output['DBInstances'][0]['PendingModifiedValues'])
+          waiting_param = json_output['DBInstances'][0]['DBParameterGroups'][0][
+              'ParameterApplyStatus'] == 'applying'
+          logging.info('Instance state: %s', state)
+          if pending_values:
+            logging.info('Pending values: %s', (str(pending_values)))
+
+          if waiting_param:
+            logging.info('Applying parameter')
+
+          if state == 'available' and not pending_values and not waiting_param:
+            break
+        except:
+          logging.exception(
+              'Error attempting to read stdout. Creation failure.')
+          return False
+      time.sleep(5)
+
+    return True
+
+  def _DescribeInstance(self, instance_id):
+    cmd = util.AWS_PREFIX + [
+        'rds',
+        'describe-db-instances',
+        '--db-instance-identifier=%s' % instance_id,
+        '--region=%s' % self.region
+    ]
+    stdout, _, retcode = vm_util.IssueCommand(cmd, suppress_warning=True,
+                                              raise_on_failure=False)
+    if retcode != 0:
+      return None
+    json_output = json.loads(stdout)
+    return json_output
+
+  def _DescribeCluster(self, cluster_id):
+    cmd = util.AWS_PREFIX + [
+        'rds',
+        'describe-db-clusters',
+        '--db-cluster-identifier=%s' % cluster_id,
+        '--region=%s' % self.region
+    ]
+    stdout, _, _ = vm_util.IssueCommand(cmd, suppress_warning=True)
+    json_output = json.loads(stdout)
+    return json_output
+
+  def _Reboot(self):
+    """Reboot the database and wait until the database is in ready state."""
+    # Can only reboot when the instance is in ready state
+    if not self._IsInstanceReady(self.instance_id, timeout=IS_READY_TIMEOUT):
+      raise Exception('Instance is not in a state that can reboot')
+
+    cmd = util.AWS_PREFIX + [
+        'rds', 'reboot-db-instance',
+        '--db-instance-identifier=%s' % self.instance_id,
+        '--region=%s' % self.region
+    ]
+
+    vm_util.IssueCommand(cmd, suppress_warning=True)
+
+    if not self._IsInstanceReady(self.instance_id, timeout=IS_READY_TIMEOUT):
+      raise Exception('Instance could not be set to ready after '
+                      'reboot')
+
+  def _ApplyManagedDbFlags(self):
+    """Apply managed flags on RDS."""
+    if self.spec.db_flags:
+      if self.spec.engine == 'postgres':
+        cumulus_default_paramter_group = 'cumulus-default-postgres'
+      elif self.spec.engine == 'mysql':
+        cumulus_default_paramter_group = 'cumulus-default-mysql'
+
+      self.parameter_group = 'pkb-parameter-group-' + FLAGS.run_uri
+      cmd = util.AWS_PREFIX + [
+          'rds', 'copy-db-parameter-group',
+          '--source-db-parameter-group-identifier=%s' % cumulus_default_paramter_group,
+          '--target-db-parameter-group-identifier=%s' % self.parameter_group,
+          '--target-db-parameter-group-description="Configuration options chosen by user"',
+          '--region=%s' % self.region
+      ]
+
+      vm_util.IssueCommand(cmd, suppress_warning=True)
+
+      cmd = util.AWS_PREFIX + [
+          'rds', 'modify-db-instance',
+          '--db-instance-identifier=%s' % self.instance_id,
+          '--db-parameter-group-name=%s' % self.parameter_group,
+          '--region=%s' % self.region, '--apply-immediately'
+      ]
+
+      vm_util.IssueCommand(cmd, suppress_warning=True, raise_on_failure=False)
+
+      for flag in self.spec.db_flags:
+        key_value_pair = flag.split('=')
+        if len(key_value_pair) != 2:
+          raise AwsRelationalDbParameterError('Malformed parameter %s' % flag)
+        cmd = util.AWS_PREFIX + [
+            'rds', 'modify-db-parameter-group',
+            '--db-parameter-group-name=%s' % self.parameter_group,
+            '--parameters=ParameterName=%s,ParameterValue=%s,ApplyMethod=pending-reboot'
+            % (key_value_pair[0], key_value_pair[1]),
+            '--region=%s' % self.region
+        ]
+
+        vm_util.IssueCommand(cmd, suppress_warning=True)
+
+      self._Reboot()
+
+
+  def _GetParameterGroupFamily(self):
+    """Get the parameter group family string.
+
+    Parameter group family is formatted as engine type plus version.
+
+    Returns:
+      ParameterGroupFamiliy name of rds resources.
+
+    Raises:
+      NotImplementedError: If there is no supported ParameterGroupFamiliy.
+    """
+    all_supported_versions = (
+        MYSQL_SUPPORTED_MAJOR_VERSIONS + POSTGRES_SUPPORTED_MAJOR_VERSIONS)
+    for version in all_supported_versions:
+      if self.spec.engine_version.startswith(version):
+        return self.spec.engine + version
+
+    raise NotImplementedError('The parameter group of engine %s,'
+                              ' version %s is not supported' %
+                              (self.spec.engine, self.spec.engine_version))
+
+  def _GetPortsForWriterInstance(self, instance_id):
+    """Assigns the ports and endpoints from the instance_id to self.
+
+    These will be used to communicate with the data base.
+    """
+    json_output = self._DescribeInstance(instance_id)
+    self.endpoint = self._ParseEndpointFromInstance(json_output)
+
+  def _GetPortsForClusterInstance(self, cluster_id):
+    """Assigns the ports and endpoints from the cluster_id to self.
+
+    These will be used to communicate with the data base.
+    """
+    json_output = self._DescribeCluster(cluster_id)
+    self.endpoint = self._ParseEndpointFromCluster(json_output)
+
+  def _AssertClientAndDbInSameRegion(self):
+    """Asserts that the client vm is in the same region requested by the server.
+
+    Raises:
+      AwsRelationalDbCrossRegionError: if the client vm is in a
+        different region that is requested by the server.
+    """
+    if self.client_vm.region != self.region:
+      raise AwsRelationalDbCrossRegionError(
+          ('client_vm and relational_db server '
+           'must be in the same region'))
+
+  def _CreateDependencies(self):
+    """Method that will be called once before _CreateResource() is called.
+
+    Supplying this method is optional. It is intended to allow additional
+    flexibility in creating resource dependencies separately from _Create().
+    """
+    if self.is_managed_db:
+      self._AssertClientAndDbInSameRegion()
+      self._SetupNetworking()
+
+  def _DeleteDependencies(self):
+    """Method that will be called once after _DeleteResource() is called.
+
+    Supplying this method is optional. It is intended to allow additional
+    flexibility in deleting resource dependencies separately from _Delete().
+    """
+    if self.is_managed_db:
+      self._TeardownNetworking()
+      self._TeardownParameterGroup()
+
+  def _FailoverHA(self):
+    """Fail over from master to replica."""
+
+    if self.spec.engine in _RDS_ENGINES:
+      cmd = util.AWS_PREFIX + [
+          'rds',
+          'reboot-db-instance',
+          '--db-instance-identifier=%s' % self.instance_id,
+          '--force-failover',
+          '--region=%s' % self.region
+      ]
+      vm_util.IssueCommand(cmd)
+    elif self.spec.engine in _AURORA_ENGINES:
+      new_primary_id = self.all_instance_ids[1]
+      cmd = util.AWS_PREFIX + [
+          'rds',
+          'failover-db-cluster',
+          '--db-cluster-identifier=%s' % self.cluster_id,
+          '--target-db-instance-identifier=%s' % new_primary_id,
+          '--region=%s' % self.region
+      ]
+      vm_util.IssueCommand(cmd)
+    else:
+      raise Exception('Unknown how to failover {0}'.format(
+          self.spec.engine))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_sqs.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_sqs.py
new file mode 100644
index 0000000..939724c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_sqs.py
@@ -0,0 +1,116 @@
+"""AWS SQS interface for resources.
+
+This class handles resource creation/cleanup for SQS benchmark on AWS.
+"""
+
+import json
+import os
+
+from absl import flags
+from perfkitbenchmarker import messaging_service as msgsvc
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+MESSAGING_SERVICE_SCRIPTS_VM_AWS_DIR = os.path.join(
+    msgsvc.MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR, 'aws')
+MESSAGING_SERVICE_SCRIPTS_AWS_PREFIX = 'messaging_service_scripts/aws'
+MESSAGING_SERVICE_SCRIPTS_AWS_FILES = ['__init__.py', 'aws_sqs_client.py']
+MESSAGING_SERVICE_SCRIPTS_AWS_BIN = 'messaging_service_scripts/aws_benchmark.py'
+
+
+class AwsSqs(msgsvc.BaseMessagingService):
+  """AWS SQS Interface Class."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self):
+    super().__init__()
+    self.queue_name = 'pkb-queue-{0}'.format(FLAGS.run_uri)
+
+  def _Create(self):
+    """Handles AWS resources provision.
+
+    It creates an AWS SQS queue.
+    """
+    cmd = util.AWS_PREFIX + [
+        'sqs',
+        'create-queue',
+        '--queue-name', self.queue_name,
+        '--region', self.region
+    ]
+    vm_util.IssueCommand(cmd)
+
+  def _Exists(self) -> bool:
+    """Checks whether SQS queue already exists."""
+    cmd = util.AWS_PREFIX + [
+        'sqs',
+        'get-queue-url',
+        '--queue-name', self.queue_name,
+        '--region', self.region
+    ]
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    return retcode == 0
+
+  def _Delete(self):
+    """Handle SQS queue deletion."""
+    cmd = util.AWS_PREFIX + [
+        'sqs',
+        'delete-queue',
+        '--queue-url', self._GetQueue(),
+        '--region', self.region
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsDeleting(self):
+    """Overrides BaseResource._IsDeleting.
+
+    Used internally while deleting to check if the deletion is still in
+    progress.
+
+    Returns:
+      A bool. True if the resource is not yet deleted, else False.
+    """
+    return self._Exists()
+
+  def _InstallCloudClients(self):
+    self.client_vm.RemoteCommand(
+        'sudo pip3 install boto3', ignore_failure=False)
+
+    self._CopyFiles(
+        MESSAGING_SERVICE_SCRIPTS_AWS_PREFIX,
+        MESSAGING_SERVICE_SCRIPTS_AWS_FILES,
+        MESSAGING_SERVICE_SCRIPTS_VM_AWS_DIR)
+    self.client_vm.PushDataFile(MESSAGING_SERVICE_SCRIPTS_AWS_BIN)
+
+    # copy AWS creds
+    self.client_vm.Install('aws_credentials')
+
+  def Run(self, benchmark_scenario: str, number_of_messages: str,
+          message_size: str):
+    """Runs remote commands on client VM - benchmark's run phase."""
+    command = (f'python3 -m aws_benchmark '
+               f'--queue_name={self.queue_name} '
+               f'--region={self.region} '
+               f'--benchmark_scenario={benchmark_scenario} '
+               f'--number_of_messages={number_of_messages} '
+               f'--message_size={message_size}')
+    stdout, _ = self.client_vm.RemoteCommand(command)
+    results = json.loads(stdout)
+    return results
+
+  @property
+  def region(self):
+    return util.GetRegionFromZone(self.client_vm.zone)
+
+  def _GetQueue(self) -> str:
+    """Get SQS queue URL from AWS."""
+    cmd = util.AWS_PREFIX + [
+        'sqs',
+        'get-queue-url',
+        '--queue-name', self.queue_name,
+        '--region', self.region
+    ]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    return json.loads(stdout)['QueueUrl']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_virtual_machine.py
new file mode 100644
index 0000000..1a6032a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_virtual_machine.py
@@ -0,0 +1,1655 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Class to represent an AWS Virtual Machine object.
+
+Images: aws ec2 describe-images --owners self amazon
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+
+import base64
+import collections
+import json
+import logging
+import posixpath
+import re
+import threading
+import time
+import uuid
+# Added by Intel
+import ipaddress
+# End Added by Intel
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import windows_virtual_machine
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.aws import aws_disk
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+from six.moves import range
+
+# Added by Intel
+try:
+    unicode
+except NameError:
+    unicode = str
+# End added by Intel
+
+FLAGS = flags.FLAGS
+
+HVM = 'hvm'
+PV = 'paravirtual'
+NON_HVM_PREFIXES = ['m1', 'c1', 't1', 'm2']
+NON_PLACEMENT_GROUP_PREFIXES = frozenset(['t2', 'm3', 't3', 't3a'])
+DRIVE_START_LETTER = 'b'
+TERMINATED = 'terminated'
+SHUTTING_DOWN = 'shutting-down'
+INSTANCE_EXISTS_STATUSES = frozenset(['running', 'stopping', 'stopped'])
+INSTANCE_DELETED_STATUSES = frozenset([SHUTTING_DOWN, TERMINATED])
+INSTANCE_TRANSITIONAL_STATUSES = frozenset(['pending'])
+INSTANCE_KNOWN_STATUSES = (INSTANCE_EXISTS_STATUSES | INSTANCE_DELETED_STATUSES
+                           | INSTANCE_TRANSITIONAL_STATUSES)
+HOST_EXISTS_STATES = frozenset(
+    ['available', 'under-assessment', 'permanent-failure'])
+HOST_RELEASED_STATES = frozenset(['released', 'released-permanent-failure'])
+KNOWN_HOST_STATES = HOST_EXISTS_STATES | HOST_RELEASED_STATES
+
+AWS_INITIATED_SPOT_TERMINATING_TRANSITION_STATUSES = frozenset(
+    ['marked-for-termination', 'marked-for-stop'])
+
+AWS_INITIATED_SPOT_TERMINAL_STATUSES = frozenset(
+    ['instance-terminated-by-price', 'instance-terminated-by-service',
+     'instance-terminated-no-capacity',
+     'instance-terminated-capacity-oversubscribed',
+     'instance-terminated-launch-group-constraint'])
+
+USER_INITIATED_SPOT_TERMINAL_STATUSES = frozenset(
+    ['request-canceled-and-instance-running', 'instance-terminated-by-user'])
+
+# These are the project numbers of projects owning common images.
+# Some numbers have corresponding owner aliases, but they are not used here.
+AMAZON_LINUX_IMAGE_PROJECT = [
+    '137112412989',  # alias amazon most regions
+    '210953353124',  # alias amazon for af-south-1
+    '910595266909',  # alias amazon for ap-east-1
+    '071630900071',  # alias amazon for eu-south-1
+]
+# From https://wiki.debian.org/Cloud/AmazonEC2Image/Stretch
+# Marketplace AMI exists, but not in all regions
+DEBIAN_9_IMAGE_PROJECT = ['379101102735']
+# From https://wiki.debian.org/Cloud/AmazonEC2Image/Buster
+# From https://wiki.debian.org/Cloud/AmazonEC2Image/Bullseye
+DEBIAN_IMAGE_PROJECT = ['136693071363']
+# Owns AMIs lists here:
+# https://wiki.centos.org/Cloud/AWS#Official_CentOS_Linux_:_Public_Images
+# Also owns the AMIS listed in
+# https://builds.coreos.fedoraproject.org/streams/stable.json
+CENTOS_IMAGE_PROJECT = ['125523088429']
+MARKETPLACE_IMAGE_PROJECT = ['679593333241']  # alias aws-marketplace
+# https://access.redhat.com/articles/2962171
+RHEL_IMAGE_PROJECT = ['309956199498']
+# https://help.ubuntu.com/community/EC2StartersGuide#Official_Ubuntu_Cloud_Guest_Amazon_Machine_Images_.28AMIs.29
+UBUNTU_IMAGE_PROJECT = ['099720109477']  # Owned by canonical
+# Some Windows images are also available in marketplace project, but this is the
+# one selected by the AWS console.
+WINDOWS_IMAGE_PROJECT = ['801119661308']  # alias amazon
+UBUNTU_EFA_IMAGE_PROJECT = ['898082745236']
+
+# Processor architectures
+ARM = 'arm64'
+X86 = 'x86_64'
+
+# Machine type to ARM architecture.
+_MACHINE_TYPE_PREFIX_TO_ARM_ARCH = {
+    'a1': 'cortex-a72',
+    'c6g': 'graviton2',
+    'c7g': 'graviton3',
+    'g5g': 'graviton2',
+    'm6g': 'graviton2',
+    'r6g': 'graviton2',
+    't4g': 'graviton2',
+    'im4g': 'graviton2',
+    'is4ge': 'graviton2',
+    'x2g': 'graviton2',
+}
+
+# Parameters for use with Elastic Fiber Adapter
+_EFA_PARAMS = {
+    'InterfaceType': 'efa',
+    'DeviceIndex': 0,
+    'NetworkCardIndex': 0,
+    'Groups': '',
+    'SubnetId': ''
+}
+# Location of EFA installer
+_EFA_URL = ('https://s3-us-west-2.amazonaws.com/aws-efa-installer/'
+            'aws-efa-installer-{version}.tar.gz')
+
+
+class AwsTransitionalVmRetryableError(Exception):
+  """Error for retrying _Exists when an AWS VM is in a transitional state."""
+
+
+class AwsDriverDoesntSupportFeatureError(Exception):
+  """Raised if there is an attempt to set a feature not supported."""
+
+
+class AwsUnexpectedWindowsAdapterOutputError(Exception):
+  """Raised when querying the status of a windows adapter failed."""
+
+
+class AwsUnknownStatusError(Exception):
+  """Error indicating an unknown status was encountered."""
+
+
+class AwsImageNotFoundError(Exception):
+  """Error indicating no appropriate AMI could be found."""
+
+
+def GetRootBlockDeviceSpecForImage(image_id, region):
+  """Queries the CLI and returns the root block device specification as a dict.
+
+  Args:
+    image_id: The EC2 image id to query
+    region: The EC2 region in which the image resides
+
+  Returns:
+    The root block device specification as returned by the AWS cli,
+    as a Python dict. If the image is not found, or if the response
+    is malformed, an exception will be raised.
+  """
+  command = util.AWS_PREFIX + [
+      'ec2',
+      'describe-images',
+      '--region=%s' % region,
+      '--image-ids=%s' % image_id,
+      '--query', 'Images[]']
+  stdout, _ = util.IssueRetryableCommand(command)
+  images = json.loads(stdout)
+  assert images
+  assert len(images) == 1, (
+      'Expected to receive only one image description for %s' % image_id)
+  image_spec = images[0]
+  root_device_name = image_spec['RootDeviceName']
+  block_device_mappings = image_spec['BlockDeviceMappings']
+  root_block_device_dict = next((x for x in block_device_mappings if
+                                 x['DeviceName'] == root_device_name))
+  return root_block_device_dict
+
+
+def GetBlockDeviceMap(machine_type, root_volume_size_gb=None,
+                      image_id=None, region=None):
+  """Returns the block device map to expose all devices for a given machine.
+
+  Args:
+    machine_type: The machine type to create a block device map for.
+    root_volume_size_gb: The desired size of the root volume, in GiB,
+      or None to the default provided by AWS.
+    image_id: The image id (AMI) to use in order to lookup the default
+      root device specs. This is only required if root_volume_size
+      is specified.
+    region: The region which contains the specified image. This is only
+      required if image_id is specified.
+
+  Returns:
+    The json representation of the block device map for a machine compatible
+    with the AWS CLI, or if the machine type has no local disks, it will
+    return None. If root_volume_size_gb and image_id are provided, the block
+    device map will include the specification for the root volume.
+
+  Raises:
+    ValueError: If required parameters are not passed.
+  """
+  mappings = []
+  if root_volume_size_gb is not None:
+    if image_id is None:
+      raise ValueError(
+          'image_id must be provided if root_volume_size_gb is specified')
+    if region is None:
+      raise ValueError(
+          'region must be provided if image_id is specified')
+    root_block_device = GetRootBlockDeviceSpecForImage(image_id, region)
+    root_block_device['Ebs']['VolumeSize'] = root_volume_size_gb
+    root_block_device['Ebs']['DeleteOnTermination'] = True
+    # The 'Encrypted' key must be removed or the CLI will complain
+    if not FLAGS.aws_vm_hibernate:
+      root_block_device['Ebs'].pop('Encrypted')
+    else:
+      root_block_device['Ebs']['Encrypted'] = True
+    mappings.append(root_block_device)
+
+  if (machine_type in aws_disk.NUM_LOCAL_VOLUMES and
+      not aws_disk.LocalDriveIsNvme(machine_type)):
+    for i in range(aws_disk.NUM_LOCAL_VOLUMES[machine_type]):
+      od = collections.OrderedDict()
+      od['VirtualName'] = 'ephemeral%s' % i
+      od['DeviceName'] = '/dev/xvd%s' % chr(ord(DRIVE_START_LETTER) + i)
+      mappings.append(od)
+  if mappings:
+    return json.dumps(mappings)
+  return None
+
+
+def IsPlacementGroupCompatible(machine_type):
+  """Returns True if VMs of 'machine_type' can be put in a placement group."""
+  prefix = machine_type.split('.')[0]
+  return prefix not in NON_PLACEMENT_GROUP_PREFIXES
+
+
+def GetArmArchitecture(machine_type):
+  """Returns the specific ARM processor architecture of the VM."""
+  # c6g.medium -> c6g, m6gd.large -> m6g, c5n.18xlarge -> c5
+  prefix = re.split(r'[dn]?\.', machine_type)[0]
+  return _MACHINE_TYPE_PREFIX_TO_ARM_ARCH.get(prefix)
+
+
+def GetProcessorArchitecture(machine_type):
+  """Returns the processor architecture of the VM."""
+  if GetArmArchitecture(machine_type):
+    return ARM
+  else:
+    return X86
+
+
+class AwsDedicatedHost(resource.BaseResource):
+  """Object representing an AWS host.
+
+  Attributes:
+    region: The AWS region of the host.
+    zone: The AWS availability zone of the host.
+    machine_type: The machine type of VMs that may be created on the host.
+    client_token: A uuid that makes the creation request idempotent.
+    id: The host_id of the host.
+  """
+
+  def __init__(self, machine_type, zone):
+    super(AwsDedicatedHost, self).__init__()
+    self.machine_type = machine_type
+    self.zone = zone
+    self.region = util.GetRegionFromZone(self.zone)
+    self.client_token = str(uuid.uuid4())
+    self.id = None
+    self.fill_fraction = 0.0
+
+  def _Create(self):
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'allocate-hosts',
+        '--region=%s' % self.region,
+        '--client-token=%s' % self.client_token,
+        '--instance-type=%s' % self.machine_type,
+        '--availability-zone=%s' % self.zone,
+        '--auto-placement=off',
+        '--quantity=1']
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    if self.id:
+      delete_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'release-hosts',
+          '--region=%s' % self.region,
+          '--host-ids=%s' % self.id]
+      vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  @vm_util.Retry()
+  def _Exists(self):
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-hosts',
+        '--region=%s' % self.region,
+        '--filter=Name=client-token,Values=%s' % self.client_token]
+    stdout, _, _ = vm_util.IssueCommand(describe_cmd)
+    response = json.loads(stdout)
+    hosts = response['Hosts']
+    assert len(hosts) < 2, 'Too many hosts.'
+    if not hosts:
+      return False
+    host = hosts[0]
+    self.id = host['HostId']
+    state = host['State']
+    assert state in KNOWN_HOST_STATES, state
+    return state in HOST_EXISTS_STATES
+
+
+class AwsVmSpec(virtual_machine.BaseVmSpec):
+  """Object containing the information needed to create an AwsVirtualMachine.
+
+  Attributes:
+      use_dedicated_host: bool. Whether to create this VM on a dedicated host.
+  """
+
+  CLOUD = providers.AWS
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super(AwsVmSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['aws_boot_disk_size'].present:
+      config_values['boot_disk_size'] = flag_values.aws_boot_disk_size
+    if flag_values['aws_spot_instances'].present:
+      config_values['use_spot_instance'] = flag_values.aws_spot_instances
+    if flag_values['aws_spot_price'].present:
+      config_values['spot_price'] = flag_values.aws_spot_price
+    if flag_values['aws_spot_block_duration_minutes'].present:
+      config_values['spot_block_duration_minutes'] = int(
+          flag_values.aws_spot_block_duration_minutes)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(AwsVmSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'use_spot_instance': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+        'spot_price': (option_decoders.FloatDecoder, {
+            'default': None
+        }),
+        'spot_block_duration_minutes': (option_decoders.IntDecoder, {
+            'default': None
+        }),
+        'boot_disk_size': (option_decoders.IntDecoder, {
+            'default': None
+        })
+    })
+
+    return result
+
+
+def _GetKeyfileSetKey(region):
+  """Returns a key to use for the keyfile set.
+
+  This prevents other runs in the same process from reusing the key.
+
+  Args:
+    region: The region the keyfile is in.
+  """
+  return (region, FLAGS.run_uri)
+
+
+class AwsKeyFileManager(object):
+  """Object for managing AWS Keyfiles."""
+  _lock = threading.Lock()
+  imported_keyfile_set = set()
+  deleted_keyfile_set = set()
+
+  @classmethod
+  def ImportKeyfile(cls, region):
+    """Imports the public keyfile to AWS."""
+    with cls._lock:
+      if _GetKeyfileSetKey(region) in cls.imported_keyfile_set:
+        return
+      cat_cmd = ['cat',
+                 vm_util.GetPublicKeyPath()]
+      keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd)
+      formatted_tags = util.FormatTagSpecifications('key-pair',
+                                                    util.MakeDefaultTags())
+      import_cmd = util.AWS_PREFIX + [
+          'ec2', '--region=%s' % region,
+          'import-key-pair',
+          '--key-name=%s' % cls.GetKeyNameForRun(),
+          '--public-key-material=%s' % keyfile,
+          '--tag-specifications=%s' % formatted_tags,
+      ]
+      _, stderr, retcode = vm_util.IssueCommand(
+          import_cmd, raise_on_failure=False)
+      if retcode:
+        if 'KeyPairLimitExceeded' in stderr:
+          raise errors.Benchmarks.QuotaFailure(
+              'KeyPairLimitExceeded in %s: %s' % (region, stderr))
+        else:
+          raise errors.Benchmarks.PrepareException(stderr)
+
+      cls.imported_keyfile_set.add(_GetKeyfileSetKey(region))
+      if _GetKeyfileSetKey(region) in cls.deleted_keyfile_set:
+        cls.deleted_keyfile_set.remove(_GetKeyfileSetKey(region))
+
+  @classmethod
+  def DeleteKeyfile(cls, region):
+    """Deletes the imported keyfile for a region."""
+    with cls._lock:
+      if _GetKeyfileSetKey(region) in cls.deleted_keyfile_set:
+        return
+      delete_cmd = util.AWS_PREFIX + [
+          'ec2', '--region=%s' % region,
+          'delete-key-pair',
+          '--key-name=%s' % cls.GetKeyNameForRun()]
+      util.IssueRetryableCommand(delete_cmd)
+      cls.deleted_keyfile_set.add(_GetKeyfileSetKey(region))
+      if _GetKeyfileSetKey(region) in cls.imported_keyfile_set:
+        cls.imported_keyfile_set.remove(_GetKeyfileSetKey(region))
+
+  @classmethod
+  def GetKeyNameForRun(cls):
+    return 'perfkit-key-{0}'.format(FLAGS.run_uri)
+
+
+class AwsVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing an AWS Virtual Machine."""
+
+  CLOUD = providers.AWS
+
+  # The IMAGE_NAME_FILTER is passed to the AWS CLI describe-images command to
+  # filter images by name. This must be set by subclasses, but may be overridden
+  # by the aws_image_name_filter flag.
+  IMAGE_NAME_FILTER = None
+
+  # The IMAGE_NAME_REGEX can be used to further filter images by name. It
+  # applies after the IMAGE_NAME_FILTER above. Note that before this regex is
+  # applied, Python's string formatting is used to replace {virt_type} and
+  # {disk_type} by the respective virtualization type and root disk type of the
+  # VM, allowing the regex to contain these strings. This regex supports
+  # arbitrary Python regular expressions to further narrow down the set of
+  # images considered.
+  IMAGE_NAME_REGEX = None
+
+  # List of projects that own the AMIs of this OS type. Default to
+  # AWS Marketplace official image project.  Note that opt-in regions may have a
+  # different image owner than default regions.
+  IMAGE_OWNER = MARKETPLACE_IMAGE_PROJECT
+
+  # Some AMIs use a project code to find the latest (in addition to owner, and
+  # filter)
+  IMAGE_PRODUCT_CODE_FILTER = None
+
+  # CoreOS only distinguishes between stable and testing images in the
+  # description
+  IMAGE_DESCRIPTION_FILTER = None
+
+  DEFAULT_ROOT_DISK_TYPE = 'gp2'
+  DEFAULT_ROOT_DISK_SIZE_GB = 16
+  DEFAULT_USER_NAME = 'ec2-user'
+
+  _lock = threading.Lock()
+  deleted_hosts = set()
+  host_map = collections.defaultdict(list)
+
+  def __init__(self, vm_spec):
+    """Initialize a AWS virtual machine.
+
+    Args:
+      vm_spec: virtual_machine.BaseVirtualMachineSpec object of the vm.
+
+    Raises:
+      ValueError: If an incompatible vm_spec is passed.
+    """
+    super(AwsVirtualMachine, self).__init__(vm_spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.user_name = FLAGS.aws_user_name or self.DEFAULT_USER_NAME
+    if self.machine_type in aws_disk.NUM_LOCAL_VOLUMES:
+      self.max_local_disks = aws_disk.NUM_LOCAL_VOLUMES[self.machine_type]
+    self.user_data = None
+    self.network = aws_network.AwsNetwork.GetNetwork(self)
+    self.placement_group = getattr(vm_spec, 'placement_group',
+                                   self.network.placement_group)
+    self.firewall = aws_network.AwsFirewall.GetFirewall()
+    self.use_dedicated_host = vm_spec.use_dedicated_host
+    self.num_vms_per_host = vm_spec.num_vms_per_host
+    self.use_spot_instance = vm_spec.use_spot_instance
+    self.spot_price = vm_spec.spot_price
+    self.spot_block_duration_minutes = vm_spec.spot_block_duration_minutes
+    self.boot_disk_size = vm_spec.boot_disk_size
+    self.client_token = str(uuid.uuid4())
+    self.host = None
+    self.id = None
+    self.metadata.update({
+        'spot_instance':
+            self.use_spot_instance,
+        'spot_price':
+            self.spot_price,
+        'spot_block_duration_minutes':
+            self.spot_block_duration_minutes,
+        'placement_group_strategy':
+            self.placement_group.strategy
+            if self.placement_group else placement_group.PLACEMENT_GROUP_NONE,
+        'aws_credit_specification':
+            FLAGS.aws_credit_specification
+            if FLAGS.aws_credit_specification else 'none'
+    })
+    self.spot_early_termination = False
+    self.spot_status_code = None
+    # See:
+    # https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/enhanced-networking-os.html
+    self._smp_affinity_script = 'smp_affinity.sh'
+
+    if self.use_dedicated_host and util.IsRegion(self.zone):
+      raise ValueError(
+          'In order to use dedicated hosts, you must specify an availability '
+          'zone, not a region ("zone" was %s).' % self.zone)
+
+    if self.use_dedicated_host and self.use_spot_instance:
+      raise ValueError(
+          'Tenancy=host is not supported for Spot Instances')
+    self.allocation_id = None
+    self.association_id = None
+    self.aws_tags = {}
+
+  @property
+  def host_list(self):
+    """Returns the list of hosts that are compatible with this VM."""
+    return self.host_map[(self.machine_type, self.zone)]
+
+  @property
+  def group_id(self):
+    """Returns the security group ID of this VM."""
+    return self.network.regional_network.vpc.default_security_group_id
+
+  @classmethod
+  def GetDefaultImage(cls, machine_type, region):
+    """Returns the default image given the machine type and region.
+
+    If specified, the aws_image_name_filter and aws_image_name_regex flags will
+    override os_type defaults.
+
+    Args:
+      machine_type: The machine_type of the VM, used to determine virtualization
+        type.
+      region: The region of the VM, as images are region specific.
+
+    Raises:
+      AwsImageNotFoundError: If a default image cannot be found.
+
+    Returns:
+      The ID of the latest image, or None if no default image is configured or
+      none can be found.
+    """
+
+    # These cannot be REQUIRED_ATTRS, because nesting REQUIRED_ATTRS breaks.
+    if not cls.IMAGE_OWNER:
+      raise NotImplementedError('AWS OSMixins require IMAGE_OWNER')
+    if not cls.IMAGE_NAME_FILTER:
+      raise NotImplementedError('AWS OSMixins require IMAGE_NAME_FILTER')
+
+    if FLAGS.aws_image_name_filter:
+      cls.IMAGE_NAME_FILTER = FLAGS.aws_image_name_filter
+
+    if FLAGS.aws_image_name_regex:
+      cls.IMAGE_NAME_REGEX = FLAGS.aws_image_name_regex
+
+    prefix = machine_type.split('.')[0]
+    virt_type = PV if prefix in NON_HVM_PREFIXES else HVM
+    processor_architecture = GetProcessorArchitecture(machine_type)
+
+    describe_cmd = util.AWS_PREFIX + [
+        '--region=%s' % region,
+        'ec2',
+        'describe-images',
+        '--query', ('Images[*].{Name:Name,ImageId:ImageId,'
+                    'CreationDate:CreationDate}'),
+        '--filters',
+        'Name=name,Values=%s' % cls.IMAGE_NAME_FILTER,
+        'Name=block-device-mapping.volume-type,Values=%s' %
+        cls.DEFAULT_ROOT_DISK_TYPE,
+        'Name=virtualization-type,Values=%s' % virt_type,
+        'Name=architecture,Values=%s' % processor_architecture]
+    if cls.IMAGE_PRODUCT_CODE_FILTER:
+      describe_cmd.extend(['Name=product-code,Values=%s' %
+                           cls.IMAGE_PRODUCT_CODE_FILTER])
+    if cls.IMAGE_DESCRIPTION_FILTER:
+      describe_cmd.extend(['Name=description,Values=%s' %
+                           cls.IMAGE_DESCRIPTION_FILTER])
+    describe_cmd.extend(['--owners'] + cls.IMAGE_OWNER)
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+
+    if not stdout:
+      raise AwsImageNotFoundError('aws describe-images did not produce valid '
+                                  'output.')
+
+    if cls.IMAGE_NAME_REGEX:
+      # Further filter images by the IMAGE_NAME_REGEX filter.
+      image_name_regex = cls.IMAGE_NAME_REGEX.format(
+          virt_type=virt_type, disk_type=cls.DEFAULT_ROOT_DISK_TYPE,
+          architecture=processor_architecture)
+      images = []
+      excluded_images = []
+      for image in json.loads(stdout):
+        if re.search(image_name_regex, image['Name']):
+          images.append(image)
+        else:
+          excluded_images.append(image)
+
+      if excluded_images:
+        logging.debug('Excluded the following images with regex "%s": %s',
+                      image_name_regex,
+                      sorted(image['Name'] for image in excluded_images))
+    else:
+      images = json.loads(stdout)
+
+    if not images:
+      raise AwsImageNotFoundError('No AMIs with given filters found.')
+
+    return max(images, key=lambda image: image['CreationDate'])['ImageId']
+
+  @vm_util.Retry(max_retries=2)
+  def _PostCreate(self):
+    """Get the instance's data and tag it."""
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-instances',
+        '--region=%s' % self.region,
+        '--instance-ids=%s' % self.id]
+    logging.info('Getting instance %s public IP. This will fail until '
+                 'a public IP is available, but will be retried.', self.id)
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    instance = response['Reservations'][0]['Instances'][0]
+    self.internal_ip = instance['PrivateIpAddress']
+    if util.IsRegion(self.zone):
+      self.zone = str(instance['Placement']['AvailabilityZone'])
+
+    assert self.group_id == instance['SecurityGroups'][0]['GroupId'], (
+        self.group_id, instance['SecurityGroups'][0]['GroupId'])
+    if FLAGS.aws_efa:
+      self._ConfigureEfa(instance)
+    elif 'PublicIpAddress' in instance:
+      self.ip_address = instance['PublicIpAddress']
+    else:
+      raise errors.Resource.RetryableCreationError('Public IP not ready.')
+
+  def _ConfigureEfa(self, instance):
+    """Configuare EFA and associate Elastic IP.
+
+    Args:
+      instance: dict which contains instance info.
+    """
+    if FLAGS.aws_efa_count > 1:
+      self._ConfigureElasticIp(instance)
+    else:
+      self.ip_address = instance['PublicIpAddress']
+    if FLAGS.aws_efa_version:
+      # Download EFA then call InstallEfa method so that subclass can override
+      self.InstallPackages('curl')
+      url = _EFA_URL.format(version=FLAGS.aws_efa_version)
+      tarfile = posixpath.basename(url)
+      self.RemoteCommand(f'curl -O {url}; tar -xzf {tarfile}')
+      self._InstallEfa()
+      # Run test program to confirm EFA working
+      self.RemoteCommand('cd aws-efa-installer; '
+                         'PATH=${PATH}:/opt/amazon/efa/bin ./efa_test.sh')
+
+  def _ConfigureElasticIp(self, instance):
+    """Create and associate Elastic IP.
+
+    Args:
+      instance: dict which contains instance info.
+    """
+    network_interface_id = None
+    for network_interface in instance['NetworkInterfaces']:
+      # The primary network interface (eth0) for the instance.
+      if network_interface['Attachment']['DeviceIndex'] == 0:
+        network_interface_id = network_interface['NetworkInterfaceId']
+        break
+    assert network_interface_id is not None
+
+    stdout, _, _ = vm_util.IssueCommand(util.AWS_PREFIX +
+                                        ['ec2', 'allocate-address',
+                                         f'--region={self.region}',
+                                         '--domain=vpc'])
+    response = json.loads(stdout)
+    self.ip_address = response['PublicIp']
+    self.allocation_id = response['AllocationId']
+
+    util.AddDefaultTags(self.allocation_id, self.region)
+
+    stdout, _, _ = vm_util.IssueCommand(
+        util.AWS_PREFIX + ['ec2', 'associate-address',
+                           f'--region={self.region}',
+                           f'--allocation-id={self.allocation_id}',
+                           f'--network-interface-id={network_interface_id}'])
+    response = json.loads(stdout)
+    self.association_id = response['AssociationId']
+
+  def _InstallEfa(self):
+    """Installs AWS EFA packages.
+
+    See https://aws.amazon.com/hpc/efa/
+    """
+    if not self.TryRemoteCommand('ulimit -l | grep unlimited'):
+      self.RemoteCommand(f'echo "{self.user_name} - memlock unlimited" | '
+                         'sudo tee -a /etc/security/limits.conf')
+    self.RemoteCommand('cd aws-efa-installer; sudo ./efa_installer.sh -y')
+    if not self.TryRemoteCommand('ulimit -l | grep unlimited'):
+      # efa_installer.sh should reboot enabling this change, reboot if necessary
+      self.Reboot()
+
+  def _CreateDependencies(self):
+    """Create VM dependencies."""
+    AwsKeyFileManager.ImportKeyfile(self.region)
+    # GetDefaultImage calls the AWS CLI.
+    self.image = self.image or self.GetDefaultImage(self.machine_type,
+                                                    self.region)
+    self.AllowRemoteAccessPorts()
+
+    if self.use_dedicated_host:
+      with self._lock:
+        if (not self.host_list or (self.num_vms_per_host and
+                                   self.host_list[-1].fill_fraction +
+                                   1.0 / self.num_vms_per_host > 1.0)):
+          host = AwsDedicatedHost(self.machine_type, self.zone)
+          self.host_list.append(host)
+          host.Create()
+        self.host = self.host_list[-1]
+        if self.num_vms_per_host:
+          self.host.fill_fraction += 1.0 / self.num_vms_per_host
+
+  def _DeleteDependencies(self):
+    """Delete VM dependencies."""
+    AwsKeyFileManager.DeleteKeyfile(self.region)
+    if self.host:
+      with self._lock:
+        if self.host in self.host_list:
+          self.host_list.remove(self.host)
+        if self.host not in self.deleted_hosts:
+          self.host.Delete()
+          self.deleted_hosts.add(self.host)
+
+  def _Create(self):
+    """Create a VM instance."""
+    placement = []
+    if not util.IsRegion(self.zone):
+      placement.append('AvailabilityZone=%s' % self.zone)
+    if self.use_dedicated_host:
+      placement.append('Tenancy=host,HostId=%s' % self.host.id)
+      num_hosts = len(self.host_list)
+    elif self.placement_group:
+      if IsPlacementGroupCompatible(self.machine_type):
+        placement.append('GroupName=%s' % self.placement_group.name)
+      else:
+        logging.warning(
+            'VM not placed in Placement Group. VM Type %s not supported',
+            self.machine_type)
+    placement = ','.join(placement)
+    block_device_map = GetBlockDeviceMap(self.machine_type,
+                                         self.boot_disk_size,
+                                         self.image,
+                                         self.region)
+    if not self.aws_tags:
+      # Set tags for the AWS VM. If we are retrying the create, we have to use
+      # the same tags from the previous call.
+      self.aws_tags.update(self.vm_metadata)
+      self.aws_tags.update(util.MakeDefaultTags())
+    create_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'run-instances',
+        '--region=%s' % self.region,
+        '--client-token=%s' % self.client_token,
+        '--image-id=%s' % self.image,
+        '--instance-type=%s' % self.machine_type,
+        '--key-name=%s' % AwsKeyFileManager.GetKeyNameForRun(),
+        '--tag-specifications=%s' %
+        util.FormatTagSpecifications('instance', self.aws_tags)]
+
+    if FLAGS.aws_vm_hibernate:
+      create_cmd.extend([
+          '--hibernation-options=Configured=true',
+      ])
+
+    if FLAGS.disable_smt:
+      query_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'describe-instance-types',
+          '--instance-types',
+          self.machine_type,
+          '--query',
+          'InstanceTypes[0].VCpuInfo.DefaultCores'
+      ]
+      stdout, _, retcode = vm_util.IssueCommand(query_cmd)
+      cores = int(json.loads(stdout))
+      create_cmd.append(f'--cpu-options=CoreCount={cores},ThreadsPerCore=1')
+    if FLAGS.aws_efa:
+      efas = ['--network-interfaces']
+      for device_index in range(FLAGS.aws_efa_count):
+        efa_params = _EFA_PARAMS.copy()
+        efa_params.update({
+            'NetworkCardIndex': device_index,
+            'DeviceIndex': device_index,
+            'Groups': self.group_id,
+            'SubnetId': self.network.subnet.id
+        })
+        if FLAGS.aws_efa_count == 1:
+          efa_params['AssociatePublicIpAddress'] = True
+        efas.append(','.join(f'{key}={value}' for key, value in
+                             sorted(efa_params.items())))
+      create_cmd.extend(efas)
+    else:
+      create_cmd.append('--associate-public-ip-address')
+      create_cmd.append(f'--subnet-id={self.network.subnet.id}')
+    if block_device_map:
+      create_cmd.append('--block-device-mappings=%s' % block_device_map)
+    if placement:
+      create_cmd.append('--placement=%s' % placement)
+    if FLAGS.aws_credit_specification:
+      create_cmd.append('--credit-specification=%s' %
+                        FLAGS.aws_credit_specification)
+    if self.user_data:
+      create_cmd.append('--user-data=%s' % self.user_data)
+    if self.capacity_reservation_id:
+      create_cmd.append(
+          '--capacity-reservation-specification=CapacityReservationTarget='
+          '{CapacityReservationId=%s}' % self.capacity_reservation_id)
+    if self.use_spot_instance:
+      instance_market_options = collections.OrderedDict()
+      spot_options = collections.OrderedDict()
+      spot_options['SpotInstanceType'] = 'one-time'
+      spot_options['InstanceInterruptionBehavior'] = 'terminate'
+      if self.spot_price:
+        spot_options['MaxPrice'] = str(self.spot_price)
+      if self.spot_block_duration_minutes:
+        spot_options['BlockDurationMinutes'] = self.spot_block_duration_minutes
+      instance_market_options['MarketType'] = 'spot'
+      instance_market_options['SpotOptions'] = spot_options
+      create_cmd.append(
+          '--instance-market-options=%s' % json.dumps(instance_market_options))
+    _, stderr, retcode = vm_util.IssueCommand(create_cmd,
+                                              raise_on_failure=False)
+
+    arm_arch = GetArmArchitecture(self.machine_type)
+    if arm_arch:
+      self.host_arch = arm_arch
+
+    if self.use_dedicated_host and 'InsufficientCapacityOnHost' in stderr:
+      if self.num_vms_per_host:
+        raise errors.Resource.CreationError(
+            'Failed to create host: %d vms of type %s per host exceeds '
+            'memory capacity limits of the host' %
+            (self.num_vms_per_host, self.machine_type))
+      else:
+        logging.warning(
+            'Creation failed due to insufficient host capacity. A new host will '
+            'be created and instance creation will be retried.')
+        with self._lock:
+          if num_hosts == len(self.host_list):
+            host = AwsDedicatedHost(self.machine_type, self.zone)
+            self.host_list.append(host)
+            host.Create()
+          self.host = self.host_list[-1]
+        self.client_token = str(uuid.uuid4())
+        raise errors.Resource.RetryableCreationError()
+    if 'InsufficientInstanceCapacity' in stderr:
+      if self.use_spot_instance:
+        self.spot_status_code = 'InsufficientSpotInstanceCapacity'
+        self.spot_early_termination = True
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(stderr)
+    if 'SpotMaxPriceTooLow' in stderr:
+      self.spot_status_code = 'SpotMaxPriceTooLow'
+      self.spot_early_termination = True
+      raise errors.Resource.CreationError(stderr)
+    if 'InstanceLimitExceeded' in stderr or 'VcpuLimitExceeded' in stderr:
+      raise errors.Benchmarks.QuotaFailure(stderr)
+    if 'RequestLimitExceeded' in stderr:
+      if FLAGS.retry_on_rate_limited:
+        raise errors.Resource.RetryableCreationError(stderr)
+      else:
+        raise errors.Benchmarks.QuotaFailure(stderr)
+
+    # When launching more than 1 VM into the same placement group, there is an
+    # occasional error that the placement group has already been used in a
+    # separate zone. Retrying fixes this error.
+    if 'InvalidPlacementGroup.InUse' in stderr:
+      raise errors.Resource.RetryableCreationError(stderr)
+    if 'Unsupported' in stderr:
+      raise errors.Benchmarks.UnsupportedConfigError(stderr)
+    if retcode:
+      raise errors.Resource.CreationError(
+          'Failed to create VM: %s return code: %s' % (retcode, stderr))
+
+  @vm_util.Retry(
+      poll_interval=0.5,
+      log_errors=True,
+      retryable_exceptions=(AwsTransitionalVmRetryableError,))
+  def _WaitForStoppedStatus(self):
+    """Returns the status of the VM.
+
+    Returns:
+      Whether the VM is suspended i.e. in a stopped status. If not, raises an
+      error
+
+    Raises:
+      AwsUnknownStatusError: If an unknown status is returned from AWS.
+      AwsTransitionalVmRetryableError: If the VM is pending. This is retried.
+    """
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-instance-status',
+        '--region=%s' % self.region,
+        '--instance-ids=%s' % self.id,
+        '--include-all-instances',
+    ]
+
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    status = response['InstanceStatuses'][0]['InstanceState']['Name']
+    if status.lower() != 'stopped':
+      logging.info('VM has status %s.', status)
+
+      raise AwsTransitionalVmRetryableError()
+
+  def _BeforeSuspend(self):
+    """Prepares the instance for suspend by having the VM sleep for a given duration.
+
+    This ensures the VM is ready for hibernation
+    """
+    # Add a timer that waits for a given duration after vm instance is
+    # created before calling suspend on the vm to ensure that the vm is
+    # ready for hibernation in aws.
+    time.sleep(600)
+
+  def _PostSuspend(self):
+    self._WaitForStoppedStatus()
+
+  def _Suspend(self):
+    """Suspends a VM instance."""
+    suspend_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'stop-instances',
+        '--region=%s' % self.region,
+        '--instance-ids=%s' % self.id,
+        '--hibernate',
+    ]
+    try:
+      vm_util.IssueCommand(suspend_cmd)
+    except:
+      raise errors.Benchmarks.KnownIntermittentError(
+          'Instance is still not ready to hibernate')
+
+    self._PostSuspend()
+
+  @vm_util.Retry(
+      poll_interval=0.5,
+      retryable_exceptions=(AwsTransitionalVmRetryableError,))
+  def _WaitForNewIP(self):
+    """Checks for a new IP address, waiting if the VM is still pending.
+
+    Raises:
+      AwsTransitionalVmRetryableError: If VM is pending. This is retried.
+    """
+    status_cmd = util.AWS_PREFIX + [
+        'ec2', 'describe-instances', f'--region={self.region}',
+        f'--instance-ids={self.id}'
+    ]
+    stdout, _, _ = vm_util.IssueCommand(status_cmd)
+    response = json.loads(stdout)
+    instance = response['Reservations'][0]['Instances'][0]
+    if 'PublicIpAddress' in instance:
+      self.ip_address = instance['PublicIpAddress']
+    else:
+      logging.info('VM is pending.')
+      raise AwsTransitionalVmRetryableError()
+
+  def _PostResume(self):
+    self._WaitForNewIP()
+
+  def _Resume(self):
+    """Resumes a VM instance."""
+    resume_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'start-instances',
+        '--region=%s' % self.region,
+        '--instance-ids=%s' % self.id,
+    ]
+    vm_util.IssueCommand(resume_cmd)
+    self._PostResume()
+
+  def _Delete(self):
+    """Delete a VM instance."""
+    if self.id:
+      delete_cmd = util.AWS_PREFIX + [
+          'ec2',
+          'terminate-instances',
+          '--region=%s' % self.region,
+          '--instance-ids=%s' % self.id]
+      vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+    if hasattr(self, 'spot_instance_request_id'):
+      cancel_cmd = util.AWS_PREFIX + [
+          '--region=%s' % self.region,
+          'ec2',
+          'cancel-spot-instance-requests',
+          '--spot-instance-request-ids=%s' % self.spot_instance_request_id]
+      vm_util.IssueCommand(cancel_cmd, raise_on_failure=False)
+
+    if FLAGS.aws_efa:
+      if self.association_id:
+        vm_util.IssueCommand(util.AWS_PREFIX +
+                             ['ec2', 'disassociate-address',
+                              f'--region={self.region}',
+                              f'--association-id={self.association_id}'])
+
+      if self.allocation_id:
+        vm_util.IssueCommand(util.AWS_PREFIX +
+                             ['ec2', 'release-address',
+                              f'--region={self.region}',
+                              f'--allocation-id={self.allocation_id}'])
+
+  #  _Start or _Stop not yet implemented for AWS
+  def _Start(self):
+    """Starts the VM."""
+    if not self.id:
+      raise errors.Benchmarks.RunError(
+          'Expected VM id to be non-null. Please make sure the VM exists.')
+    start_cmd = util.AWS_PREFIX + [
+        'ec2', 'start-instances',
+        f'--region={self.region}',
+        f'--instance-ids={self.id}'
+    ]
+    vm_util.IssueCommand(start_cmd)
+
+  def _PostStart(self):
+    self._WaitForNewIP()
+
+  def _Stop(self):
+    """Stops the VM."""
+    if not self.id:
+      raise errors.Benchmarks.RunError(
+          'Expected VM id to be non-null. Please make sure the VM exists.')
+    stop_cmd = util.AWS_PREFIX + [
+        'ec2', 'stop-instances',
+        f'--region={self.region}',
+        f'--instance-ids={self.id}'
+    ]
+    vm_util.IssueCommand(stop_cmd)
+
+  def _PostStop(self):
+    self._WaitForStoppedStatus()
+
+  def _UpdateInterruptibleVmStatusThroughApi(self):
+    if hasattr(self, 'spot_instance_request_id'):
+      describe_cmd = util.AWS_PREFIX + [
+          '--region=%s' % self.region,
+          'ec2',
+          'describe-spot-instance-requests',
+          '--spot-instance-request-ids=%s' % self.spot_instance_request_id]
+      stdout, _, _ = vm_util.IssueCommand(describe_cmd)
+      sir_response = json.loads(stdout)['SpotInstanceRequests']
+      self.spot_status_code = sir_response[0]['Status']['Code']
+      self.spot_early_termination = (
+          self.spot_status_code in AWS_INITIATED_SPOT_TERMINAL_STATUSES)
+
+  @vm_util.Retry(
+      poll_interval=1,
+      log_errors=False,
+      retryable_exceptions=(AwsTransitionalVmRetryableError,))
+  def _Exists(self):
+    """Returns whether the VM exists.
+
+    This method waits until the VM is no longer pending.
+
+    Returns:
+      Whether the VM exists.
+
+    Raises:
+      AwsUnknownStatusError: If an unknown status is returned from AWS.
+      AwsTransitionalVmRetryableError: If the VM is pending. This is retried.
+    """
+    describe_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'describe-instances',
+        '--region=%s' % self.region,
+        '--filter=Name=client-token,Values=%s' % self.client_token]
+
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    response = json.loads(stdout)
+    reservations = response['Reservations']
+    assert len(reservations) < 2, 'Too many reservations.'
+    if not reservations:
+      if not self.create_start_time:
+        return False
+      if self.delete_start_time and not self.created:
+        return False
+      logging.info('No reservation returned by describe-instances. This '
+                   'sometimes shows up immediately after a successful '
+                   'run-instances command. Retrying describe-instances '
+                   'command.')
+      raise AwsTransitionalVmRetryableError()
+    instances = reservations[0]['Instances']
+    assert len(instances) == 1, 'Wrong number of instances.'
+    status = instances[0]['State']['Name']
+    self.id = instances[0]['InstanceId']
+    if self.use_spot_instance:
+      self.spot_instance_request_id = instances[0]['SpotInstanceRequestId']
+
+    if status not in INSTANCE_KNOWN_STATUSES:
+      raise AwsUnknownStatusError('Unknown status %s' % status)
+    if status in INSTANCE_TRANSITIONAL_STATUSES:
+      logging.info('VM has status %s; retrying describe-instances command.',
+                   status)
+      raise AwsTransitionalVmRetryableError()
+    # In this path run-instances succeeded, a pending instance was created, but
+    # not fulfilled so it moved to terminated.
+    if (status == TERMINATED and
+        instances[0]['StateReason']['Code'] ==
+        'Server.InsufficientInstanceCapacity'):
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+          instances[0]['StateReason']['Message'])
+    # In this path run-instances succeeded, a pending instance was created, but
+    # instance is shutting down due to internal server error. This is a
+    # retryable command for run-instance.
+    # Client token needs to be refreshed for idempotency.
+    if (status == SHUTTING_DOWN and
+        instances[0]['StateReason']['Code'] == 'Server.InternalError'):
+      self.client_token = str(uuid.uuid4())
+    return status in INSTANCE_EXISTS_STATUSES
+
+  def _GetNvmeBootIndex(self):
+    if aws_disk.LocalDriveIsNvme(self.machine_type) and \
+       aws_disk.EbsDriveIsNvme(self.machine_type):
+      self.Install('storage_tools')
+      # identify boot drive
+      # If this command ever fails consider 'findmnt -nM / -o source'
+      cmd = ('realpath /dev/disk/by-label/cloudimg-rootfs '
+             '| grep --only-matching "nvme[0-9]*"')
+      boot_drive = self.RemoteCommand(cmd, ignore_failure=True)[0].strip()
+      if boot_drive:
+        # get the boot drive index by dropping the nvme prefix
+        boot_idx = int(boot_drive[4:])
+        logging.info('found boot drive at nvme index %d', boot_idx)
+        return boot_idx
+      else:
+        logging.warning('Failed to identify NVME boot drive index. Assuming 0.')
+        return 0
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+
+    Raises:
+      CreationError: If an NFS disk is listed but the NFS service not created.
+    """
+    # Instantiate the disk(s) that we want to create.
+    disks = []
+    nvme_boot_drive_index = self._GetNvmeBootIndex()
+    for _ in range(disk_spec.num_striped_disks):
+      if disk_spec.disk_type == disk.NFS:
+        data_disk = self._GetNfsService().CreateNfsDisk()
+      else:
+        data_disk = aws_disk.AwsDisk(disk_spec, self.zone, self.machine_type)
+      if disk_spec.disk_type == disk.LOCAL:
+        device_letter = chr(ord(DRIVE_START_LETTER) + self.local_disk_counter)
+        data_disk.AssignDeviceLetter(device_letter, nvme_boot_drive_index)
+        # Local disk numbers start at 1 (0 is the system disk).
+        data_disk.disk_number = self.local_disk_counter + 1
+        self.local_disk_counter += 1
+        if self.local_disk_counter > self.max_local_disks:
+          raise errors.Error('Not enough local disks.')
+      elif disk_spec.disk_type == disk.NFS:
+        pass
+      else:
+        # Remote disk numbers start at 1 + max_local disks (0 is the system disk
+        # and local disks occupy [1, max_local_disks]).
+        data_disk.disk_number = (self.remote_disk_counter +
+                                 1 + self.max_local_disks)
+        self.remote_disk_counter += 1
+      disks.append(data_disk)
+
+    self._CreateScratchDiskFromDisks(disk_spec, disks)
+
+  def AddMetadata(self, **kwargs):
+    """Adds metadata to the VM."""
+    util.AddTags(self.id, self.region, **kwargs)
+    if self.use_spot_instance:
+      util.AddDefaultTags(self.spot_instance_request_id, self.region)
+
+  def InstallCli(self):
+    """Installs the AWS cli and credentials on this AWS vm."""
+    self.Install('awscli')
+    self.Install('aws_credentials')
+
+  def DownloadPreprovisionedData(self, install_path, module_name, filename):
+    """Downloads a data file from an AWS S3 bucket with pre-provisioned data.
+
+    Use --aws_preprovisioned_data_bucket to specify the name of the bucket.
+
+    Args:
+      install_path: The install path on this VM.
+      module_name: Name of the module associated with this data file.
+      filename: The name of the file that was downloaded.
+    """
+    self.InstallCli()
+    # TODO(deitz): Add retry logic.
+    self.RemoteCommand(GenerateDownloadPreprovisionedDataCommand(
+        install_path, module_name, filename))
+
+  def ShouldDownloadPreprovisionedData(self, module_name, filename):
+    """Returns whether or not preprovisioned data is available."""
+    # MSW - There is something wrong with this function, it fails miserably when
+    # FLAGS.aws_preprovisioned_data_bucket is not set. It appears that when we
+    # attempt to install epel, it looks for it in an S3 bucket instead of d/l
+    # directly. This might be okay in some cases, but is not the typical use
+    # case. This if statement is a quick and dirty workaround until I can
+    # think of a better solution in the context of the framework.
+    if (FLAGS.aws_preprovisioned_data_bucket):
+      self.Install('aws_credentials')
+      self.Install('awscli')
+      return FLAGS.aws_preprovisioned_data_bucket and self.TryRemoteCommand(
+          GenerateStatPreprovisionedDataCommand(module_name, filename))
+    else:
+      return False
+
+  def IsInterruptible(self):
+    """Returns whether this vm is an interruptible vm (spot vm).
+
+    Returns: True if this vm is an interruptible vm (spot vm).
+    """
+    return self.use_spot_instance
+
+  def WasInterrupted(self):
+    """Returns whether this spot vm was terminated early by AWS.
+
+    Returns: True if this vm was terminated early by AWS.
+    """
+    return self.spot_early_termination
+
+  def GetVmStatusCode(self):
+    """Returns the early termination code if any.
+
+    Returns: Early termination code.
+    """
+    return self.spot_status_code
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the VM.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = super(AwsVirtualMachine, self).GetResourceMetadata()
+    result['boot_disk_type'] = self.DEFAULT_ROOT_DISK_TYPE
+    result['boot_disk_size'] = self.boot_disk_size
+    if self.use_dedicated_host:
+      result['num_vms_per_host'] = self.num_vms_per_host
+    result['efa'] = FLAGS.aws_efa
+    if FLAGS.aws_efa:
+      result['efa_version'] = FLAGS.aws_efa_version
+      result['efa_count'] = FLAGS.aws_efa_count
+    result['preemptible'] = self.use_spot_instance
+    return result
+
+
+class ClearBasedAwsVirtualMachine(AwsVirtualMachine,
+                                  linux_virtual_machine.ClearMixin):
+  IMAGE_NAME_FILTER = 'clear/images/*/clear-*'
+  DEFAULT_USER_NAME = 'clear'
+
+
+class CoreOsBasedAwsVirtualMachine(AwsVirtualMachine,
+                                   linux_virtual_machine.CoreOsMixin):
+  IMAGE_NAME_FILTER = 'fedora-coreos-*-hvm'
+  # CoreOS only distinguishes between stable and testing in the description
+  IMAGE_DESCRIPTION_FILTER = 'Fedora CoreOS stable *'
+  IMAGE_OWNER = CENTOS_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'core'
+
+
+class Debian9BasedAwsVirtualMachine(AwsVirtualMachine,
+                                    linux_virtual_machine.Debian9Mixin):
+  # From https://wiki.debian.org/Cloud/AmazonEC2Image/Stretch
+  IMAGE_NAME_FILTER = 'debian-stretch-*64-*'
+  IMAGE_OWNER = DEBIAN_9_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'admin'
+
+  def _BeforeSuspend(self):
+    """Prepares the aws vm for hibernation."""
+    raise NotImplementedError()
+
+
+class Debian10BasedAwsVirtualMachine(AwsVirtualMachine,
+                                     linux_virtual_machine.Debian10Mixin):
+  # From https://wiki.debian.org/Cloud/AmazonEC2Image/Buster
+  IMAGE_NAME_FILTER = 'debian-10-*64*'
+  IMAGE_OWNER = DEBIAN_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'admin'
+
+
+class Debian11BasedAwsVirtualMachine(AwsVirtualMachine,
+                                     linux_virtual_machine.Debian11Mixin):
+  # From https://wiki.debian.org/Cloud/AmazonEC2Image/Buster
+  IMAGE_NAME_FILTER = 'debian-11-*64*'
+  IMAGE_OWNER = DEBIAN_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'admin'
+
+
+class UbuntuBasedAwsVirtualMachine(AwsVirtualMachine):
+  IMAGE_OWNER = UBUNTU_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'ubuntu'
+
+
+class Ubuntu1604BasedAwsVirtualMachine(UbuntuBasedAwsVirtualMachine,
+                                       linux_virtual_machine.Ubuntu1604Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu/images/*/ubuntu-xenial-16.04-*64-server-20*'
+
+  def _InstallEfa(self):
+    super(Ubuntu1604BasedAwsVirtualMachine, self)._InstallEfa()
+    self.Reboot()
+    self.WaitForBootCompletion()
+
+
+class Ubuntu1804BasedAwsVirtualMachine(UbuntuBasedAwsVirtualMachine,
+                                       linux_virtual_machine.Ubuntu1804Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu/images/*/ubuntu-bionic-18.04-*64-server-20*'
+
+
+class Ubuntu1804EfaBasedAwsVirtualMachine(
+    UbuntuBasedAwsVirtualMachine, linux_virtual_machine.Ubuntu1804EfaMixin):
+  IMAGE_OWNER = UBUNTU_EFA_IMAGE_PROJECT
+  IMAGE_NAME_FILTER = 'Deep Learning AMI (Ubuntu 18.04) Version *'
+
+
+class Ubuntu2004BasedAwsVirtualMachine(UbuntuBasedAwsVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2004Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu/images/*/ubuntu-focal-20.04-*64-server-20*'
+
+
+class Ubuntu2204BasedAwsVirtualMachine(UbuntuBasedAwsVirtualMachine,
+                                       linux_virtual_machine.Ubuntu2204Mixin):
+  IMAGE_NAME_FILTER = 'ubuntu/images/*/ubuntu-jammy-22.04-*64-server-20*'
+
+
+class JujuBasedAwsVirtualMachine(UbuntuBasedAwsVirtualMachine,
+                                 linux_virtual_machine.JujuMixin):
+  """Class with configuration for AWS Juju virtual machines."""
+  IMAGE_NAME_FILTER = 'ubuntu/images/*/ubuntu-trusty-14.04-*64-server-20*'
+
+
+class AmazonLinux2BasedAwsVirtualMachine(
+    AwsVirtualMachine, linux_virtual_machine.AmazonLinux2Mixin):
+  """Class with configuration for AWS Amazon Linux 2 virtual machines."""
+  IMAGE_NAME_FILTER = 'amzn2-ami-*-*-*'
+  IMAGE_OWNER = AMAZON_LINUX_IMAGE_PROJECT
+
+
+class Rhel7BasedAwsVirtualMachine(AwsVirtualMachine,
+                                  linux_virtual_machine.Rhel7Mixin):
+  """Class with configuration for AWS RHEL 7 virtual machines."""
+  # Documentation on finding RHEL images:
+  # https://access.redhat.com/articles/2962171
+  IMAGE_NAME_FILTER = 'RHEL-7*_GA*'
+  IMAGE_OWNER = RHEL_IMAGE_PROJECT
+
+
+class Rhel8BasedAwsVirtualMachine(AwsVirtualMachine,
+                                  linux_virtual_machine.Rhel8Mixin):
+  """Class with configuration for AWS RHEL 8 virtual machines."""
+  # Documentation on finding RHEL images:
+  # https://access.redhat.com/articles/2962181
+  # All RHEL AMIs are HVM. HVM- blocks HVM_BETA.
+  IMAGE_NAME_FILTER = 'RHEL-8*_HVM-*'
+  IMAGE_OWNER = RHEL_IMAGE_PROJECT
+
+
+class CentOs7BasedAwsVirtualMachine(AwsVirtualMachine,
+                                    linux_virtual_machine.CentOs7Mixin):
+  """Class with configuration for AWS CentOS 7 virtual machines."""
+  # Documentation on finding the CentOS 7 image:
+  # https://wiki.centos.org/Cloud/AWS#x86_64
+  IMAGE_NAME_FILTER = 'CentOS 7*'
+  IMAGE_OWNER = CENTOS_IMAGE_PROJECT
+  DEFAULT_USER_NAME = 'centos'
+
+  def _InstallEfa(self):
+    logging.info('Upgrading Centos7 kernel, installing kernel headers and '
+                 'rebooting before installing EFA.')
+    self.RemoteCommand('sudo yum upgrade -y kernel')
+    self.InstallPackages('kernel-devel')
+    self.Reboot()
+    self.WaitForBootCompletion()
+    super(CentOs7BasedAwsVirtualMachine, self)._InstallEfa()
+
+
+class CentOs8BasedAwsVirtualMachine(AwsVirtualMachine,
+                                    linux_virtual_machine.CentOs8Mixin):
+  """Class with configuration for AWS CentOS 8 virtual machines."""
+  # This describes the official AMIs listed here:
+  # https://wiki.centos.org/Cloud/AWS#Official_CentOS_Linux_:_Public_Images
+  IMAGE_OWNER = CENTOS_IMAGE_PROJECT
+  IMAGE_NAME_FILTER = 'CentOS 8*'
+  DEFAULT_USER_NAME = 'centos'
+
+
+class CentOsStream8BasedAwsVirtualMachine(
+    AwsVirtualMachine, linux_virtual_machine.CentOsStream8Mixin):
+  """Class with configuration for AWS CentOS Stream 8 virtual machines."""
+  # This describes the official AMIs listed here:
+  # https://wiki.centos.org/Cloud/AWS#Official_CentOS_Linux_:_Public_Images
+  IMAGE_OWNER = CENTOS_IMAGE_PROJECT
+  IMAGE_NAME_FILTER = 'CentOS Stream 8*'
+  DEFAULT_USER_NAME = 'centos'
+
+
+class RockyLinux8BasedAwsVirtualMachine(AwsVirtualMachine,
+                                        linux_virtual_machine.RockyLinux8Mixin):
+  """Class with configuration for AWS Rocky Linux 8 virtual machines."""
+  IMAGE_OWNER = MARKETPLACE_IMAGE_PROJECT
+  IMAGE_PRODUCT_CODE_FILTER = 'cotnnspjrsi38lfn8qo4ibnnm'
+  IMAGE_NAME_FILTER = 'Rocky-8-*'
+  DEFAULT_USER_NAME = 'rocky'
+
+
+class CentOsStream9BasedAwsVirtualMachine(
+    AwsVirtualMachine, linux_virtual_machine.CentOsStream9Mixin):
+  """Class with configuration for AWS CentOS Stream 9 virtual machines."""
+  # This describes the official AMIs listed here:
+  # https://wiki.centos.org/Cloud/AWS#Official_CentOS_Linux_:_Public_Images
+  IMAGE_OWNER = CENTOS_IMAGE_PROJECT
+  IMAGE_NAME_FILTER = 'CentOS Stream 9*'
+
+
+class BaseWindowsAwsVirtualMachine(AwsVirtualMachine,
+                                   windows_virtual_machine.BaseWindowsMixin):
+  """Support for Windows machines on AWS."""
+  DEFAULT_USER_NAME = 'Administrator'
+  IMAGE_OWNER = WINDOWS_IMAGE_PROJECT
+
+  def __init__(self, vm_spec):
+    super(BaseWindowsAwsVirtualMachine, self).__init__(vm_spec)
+    self.user_data = ('<powershell>%s</powershell>' %
+                      windows_virtual_machine.STARTUP_SCRIPT)
+
+  @vm_util.Retry()
+  def _GetDecodedPasswordData(self):
+    # Retrieve a base64 encoded, encrypted password for the VM.
+    get_password_cmd = util.AWS_PREFIX + [
+        'ec2',
+        'get-password-data',
+        '--region=%s' % self.region,
+        '--instance-id=%s' % self.id]
+    stdout, _ = util.IssueRetryableCommand(get_password_cmd)
+    response = json.loads(stdout)
+    password_data = response['PasswordData']
+
+    # AWS may not populate the password data until some time after
+    # the VM shows as running. Simply retry until the data shows up.
+    if not password_data:
+      raise ValueError('No PasswordData in response.')
+
+    # Decode the password data.
+    return base64.b64decode(password_data)
+
+  def _PostCreate(self):
+    """Retrieve generic VM info and then retrieve the VM's password."""
+    super(BaseWindowsAwsVirtualMachine, self)._PostCreate()
+
+    # Get the decoded password data.
+    decoded_password_data = self._GetDecodedPasswordData()
+
+    # Write the encrypted data to a file, and use openssl to
+    # decrypt the password.
+    with vm_util.NamedTemporaryFile() as tf:
+      tf.write(decoded_password_data)
+      tf.close()
+      decrypt_cmd = ['openssl',
+                     'rsautl',
+                     '-decrypt',
+                     '-in',
+                     tf.name,
+                     '-inkey',
+                     vm_util.GetPrivateKeyPath()]
+      password, _ = vm_util.IssueRetryableCommand(decrypt_cmd)
+      self.password = password
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the VM.
+
+    Returns:
+      dict mapping metadata key to value.
+    """
+    result = super(BaseWindowsAwsVirtualMachine, self).GetResourceMetadata()
+    result['disable_interrupt_moderation'] = self.disable_interrupt_moderation
+    return result
+
+  @vm_util.Retry(
+      max_retries=10,
+      retryable_exceptions=(AwsUnexpectedWindowsAdapterOutputError,
+                            errors.VirtualMachine.RemoteCommandError))
+  def DisableInterruptModeration(self):
+    """Disable the networking feature 'Interrupt Moderation'."""
+
+    # First ensure that the driver supports interrupt moderation
+    net_adapters, _ = self.RemoteCommand('Get-NetAdapter')
+    if 'Intel(R) 82599 Virtual Function' not in net_adapters:
+      raise AwsDriverDoesntSupportFeatureError(
+          'Driver not tested with Interrupt Moderation in PKB.')
+    aws_int_dis_path = ('HKLM\\SYSTEM\\ControlSet001\\Control\\Class\\'
+                        '{4d36e972-e325-11ce-bfc1-08002be10318}\\0011')
+    command = 'reg add "%s" /v *InterruptModeration /d 0 /f' % aws_int_dis_path
+    self.RemoteCommand(command)
+    try:
+      self.RemoteCommand('Restart-NetAdapter -Name "Ethernet 2"')
+    except IOError:
+      # Restarting the network adapter will always fail because
+      # the winrm connection used to issue the command will be
+      # broken.
+      pass
+    int_dis_value, _ = self.RemoteCommand(
+        'reg query "%s" /v *InterruptModeration' % aws_int_dis_path)
+    # The second line should look like:
+    #     *InterruptModeration    REG_SZ    0
+    registry_query_lines = int_dis_value.splitlines()
+    if len(registry_query_lines) < 3:
+      raise AwsUnexpectedWindowsAdapterOutputError(
+          'registry query failed: %s ' % int_dis_value)
+    registry_query_result = registry_query_lines[2].split()
+    if len(registry_query_result) < 3:
+      raise AwsUnexpectedWindowsAdapterOutputError(
+          'unexpected registry query response: %s' % int_dis_value)
+    if registry_query_result[2] != '0':
+      raise AwsUnexpectedWindowsAdapterOutputError(
+          'InterruptModeration failed to disable')
+
+
+class Windows2012CoreAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine, windows_virtual_machine.Windows2012CoreMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2012-R2_RTM-English-64Bit-Core-*'
+
+
+class Windows2016CoreAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine, windows_virtual_machine.Windows2016CoreMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2016-English-Core-Base-*'
+
+
+class Windows2019CoreAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine, windows_virtual_machine.Windows2019CoreMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2019-English-Core-Base-*'
+
+
+class Windows2022CoreAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine, windows_virtual_machine.Windows2022CoreMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2022-English-Core-Base-*'
+
+
+class Windows2012DesktopAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2012DesktopMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2012-R2_RTM-English-64Bit-Base-*'
+
+
+class Windows2016DesktopAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2016DesktopMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2016-English-Full-Base-*'
+
+
+class Windows2019DesktopAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2019DesktopMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2019-English-Full-Base-*'
+
+
+class Windows2022DesktopAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2022DesktopMixin):
+  IMAGE_NAME_FILTER = 'Windows_Server-2022-English-Full-Base-*'
+
+
+class Windows2019DesktopSQLServer2019StandardAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Standard):
+  IMAGE_NAME_FILTER = 'Windows_Server-2019-English-Full-SQL_2019_Standard-*'
+
+
+class Windows2019DesktopSQLServer2019EnterpriseAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Enterprise):
+  IMAGE_NAME_FILTER = 'Windows_Server-2019-English-Full-SQL_2019_Enterprise-*'
+
+
+class Windows2022DesktopSQLServer2019StandardAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Standard):
+  IMAGE_NAME_FILTER = 'Windows_Server-2022-English-Full-SQL_2019_Standard-*'
+
+
+class Windows2022DesktopSQLServer2019EnterpriseAwsVirtualMachine(
+    BaseWindowsAwsVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Enterprise):
+  IMAGE_NAME_FILTER = 'Windows_Server-2022-English-Full-SQL_2019_Enterprise-*'
+
+
+def GenerateDownloadPreprovisionedDataCommand(install_path, module_name,
+                                              filename):
+  """Returns a string used to download preprovisioned data."""
+  return 'aws s3 cp --only-show-errors s3://%s/%s/%s %s' % (
+      FLAGS.aws_preprovisioned_data_bucket, module_name, filename,
+      posixpath.join(install_path, filename))
+
+
+def GenerateStatPreprovisionedDataCommand(module_name, filename):
+  """Returns a string used to download preprovisioned data."""
+  return 'aws s3api head-object --bucket %s --key %s/%s' % (
+      FLAGS.aws_preprovisioned_data_bucket, module_name, filename)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_vpc_endpoint.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_vpc_endpoint.py
new file mode 100644
index 0000000..895356f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/aws_vpc_endpoint.py
@@ -0,0 +1,169 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""An AWS VPC Endpoint.
+
+See https://docs.aws.amazon.com/vpc/latest/userguide/vpc-endpoints.html
+
+A VPC Endpoint provides routing between a VPC and an AWS service without using
+the internet connection interface.
+
+For example when an S3 Endpoint is created for region us-west-1 the route table
+for that VPC is updated so that requests for the IP addresses associated with
+com.amazonaws.us-west-1.s3 now go through this new interface and not out through
+the original internet gateway.
+"""
+
+import json
+
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker.providers.aws import util
+
+
+def GetAwsVpcEndpointClass(aws_service):
+  """Returns the AwsVpcEndpoint class for the given service."""
+  return resource.GetResourceClass(
+      AwsVpcEndpoint, CLOUD=providers.AWS, AWS_SERVICE=aws_service)
+
+
+def CreateEndpointService(aws_service, vpc):
+  """Creates the named VPC endpoint in the given VPC.
+
+  Args:
+    aws_service: The AWS service to use.
+    vpc: The VPC to launch the endpoint service in.
+
+  Returns:
+    The resource.BaseResource of the endpoint service.
+  """
+  service_class = GetAwsVpcEndpointClass(aws_service)
+  return service_class(vpc)
+
+
+class AwsVpcEndpoint(resource.BaseResource):
+  """An AWS Endpoint.
+
+  Attributes:
+    region: The AWS region of the VPC
+    vpc: The aws_network.AwsVpc object to make the connection in.  The VPC does
+      not initially need an ID but does when Create() is called.
+  """
+  REQUIRED_ATTRS = ['CLOUD', 'AWS_SERVICE']
+  RESOURCE_TYPE = 'AwsVpcEndpoint'
+  CLOUD = providers.AWS
+  AWS_SERVICE: str  # must be set by derived classes
+
+  def __init__(self, vpc):
+    super(AwsVpcEndpoint, self).__init__()
+    assert vpc, 'Must have a VPC object (does not require an id).'
+    self._vpc = vpc
+    self.region = self._vpc.region
+    assert self.region, 'VPC region must be set'
+    self._service_name = 'com.amazonaws.{}.{}'.format(self.region,
+                                                      self.AWS_SERVICE)
+    # in the Create() method query to see if an endpoint already defined
+    self.id = None
+
+  @property
+  def vpc_id(self):
+    """Returns the VPC id.  Can be None."""
+    return self._vpc.id
+
+  @property
+  def endpoint_id(self):
+    """Returns the endpoint id for the defined VPC."""
+    if not self.vpc_id:
+      # When creating an SDDC there will not be a VPC to have an endpoint
+      return None
+    ids = self._RunCommand(['describe-vpc-endpoints'] + util.AwsFilter({
+        'vpc-id': self.vpc_id,
+        'service-name': self._service_name
+    }) + ['--query', 'VpcEndpoints[].VpcEndpointId'])
+    if not ids:
+      # There is a VPC but no endpoint
+      return None
+    assert len(ids) == 1, 'More than 1 VPC endpoint found: {}'.format(ids)
+    return ids[0]
+
+  @property
+  def route_table_id(self):
+    """Returns the route table id for the VPC.
+
+    Raises:
+      AssertionError: If no VPC is defined or if there are 0 or more than 1
+        routing tables found.
+    """
+    assert self.vpc_id, 'No defined VPC id.'
+    table_ids = self._RunCommand(['describe-route-tables'] +
+                                 util.AwsFilter({'vpc-id': self.vpc_id}) +
+                                 ['--query', 'RouteTables[].RouteTableId'])
+    assert len(table_ids) == 1, 'Only want 1 route table: {}'.format(table_ids)
+    return table_ids[0]
+
+  def _Create(self):
+    """See base class.
+
+    Raises:
+      AssertionError: If no VPC is defined.
+    """
+    assert self.vpc_id, 'No defined VPC id.'
+    self.id = self.endpoint_id
+    if self.id:
+      # Endpoint already created
+      return
+    create_response = self._RunCommand([
+        'create-vpc-endpoint', '--vpc-endpoint-type', 'Gateway', '--vpc-id',
+        self.vpc_id, '--service-name', self._service_name, '--route-table-ids',
+        self.route_table_id
+    ])
+    self.id = create_response['VpcEndpoint']['VpcEndpointId']
+
+  def _PostCreate(self):
+    """See base class."""
+    util.AddDefaultTags(self.id, self.region)
+
+  def _Exists(self):
+    """See base class."""
+    return bool(self.endpoint_id)
+
+  def _Delete(self):
+    """See base class."""
+    endpoint_id = self.id or self.endpoint_id
+    if endpoint_id:
+      self._RunCommand(
+          ['delete-vpc-endpoints', '--vpc-endpoint-ids', endpoint_id])
+
+  def _RunCommand(self, cmds):
+    """Runs the AWS ec2 command in the defined region.
+
+    Args:
+      cmds: List of AWS ec2 commands to run, example: ['describe-route-tables']
+
+    Returns:
+      Dict of the AWS response.
+    """
+    cmd = util.AWS_PREFIX + ['ec2', '--region=%s' % self.region] + list(cmds)
+    stdout, _ = util.IssueRetryableCommand(cmd)
+    return json.loads(stdout)
+
+
+class AwsVpcS3Endpoint(AwsVpcEndpoint):
+  """An AWS VPC S3 Endpoint.
+
+  Attributes:
+    region: The AWS region of the VPC
+    vpc: The aws_network.AwsVpc object to make the connection in.  The VPC does
+      not initially need an ID but does when Create() is called.
+  """
+  AWS_SERVICE = 's3'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py
new file mode 100644
index 0000000..a1b2f63
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elastic_kubernetes_service.py
@@ -0,0 +1,235 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains classes/functions related to EKS (Elastic Kubernetes Service).
+
+This requires that the eksServiceRole IAM role has already been created and
+requires that the aws-iam-authenticator binary has been installed.
+See https://docs.aws.amazon.com/eks/latest/userguide/getting-started.html for
+instructions.
+"""
+
+import json
+import logging
+import re
+from typing import Any, Dict
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import aws
+from perfkitbenchmarker.providers.aws import aws_disk
+from perfkitbenchmarker.providers.aws import aws_virtual_machine
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+PROXY_FILE = 'proxy_ip_list/proxy_ip_list.txt'
+
+
+class EksCluster(container_service.KubernetesCluster):
+  """Class representing an Elastic Kubernetes Service cluster."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self, spec):
+    super(EksCluster, self).__init__(spec)
+    # EKS requires a region and optionally a list of one or zones.
+    # Interpret the zone as a comma separated list of zones or a region.
+    self.control_plane_zones = self.zone and self.zone.split(',')
+    if not self.control_plane_zones:
+      raise errors.Config.MissingOption(
+          'container_cluster.vm_spec.AWS.zone is required.')
+    elif len(self.control_plane_zones) == 1 and util.IsRegion(self.zone):
+      self.region = self.zone
+      self.control_plane_zones = []
+      logging.info("Interpreting zone '%s' as a region", self.zone)
+    else:
+      self.region = util.GetRegionFromZones(self.control_plane_zones)
+    # control_plane_zones must be a superset of the node zones
+    for nodepool in self.nodepools.values():
+      if (nodepool.vm_config.zone and
+          nodepool.vm_config.zone not in self.control_plane_zones):
+        self.control_plane_zones.append(nodepool.vm_config.zone)
+    if len(self.control_plane_zones) == 1:
+      # eksctl essentially requires you pass --zones if you pass --node-zones
+      # and --zones must have at least 2 zones
+      # https://github.com/weaveworks/eksctl/issues/4735
+      self.control_plane_zones.append(self.region +
+                                      ('b' if self.zone.endswith('a') else 'a'))
+    self.cluster_version = FLAGS.container_cluster_version
+    # TODO(user) support setting boot disk type if EKS does.
+    self.boot_disk_type = self.vm_config.DEFAULT_ROOT_DISK_TYPE
+    self.node_group_name = 'eks'
+    self.ssh_access = FLAGS.aws_eks_ssh_access
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cluster.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = super(EksCluster, self).GetResourceMetadata()
+    result['boot_disk_type'] = self.boot_disk_type
+    result['boot_disk_size'] = self.vm_config.boot_disk_size
+    return result
+
+  def _CreateDependencies(self):
+    """Set up the ssh key."""
+    aws_virtual_machine.AwsKeyFileManager.ImportKeyfile(self.region)
+
+  def _DeleteDependencies(self):
+    """Delete the ssh key."""
+    aws_virtual_machine.AwsKeyFileManager.DeleteKeyfile(self.region)
+
+  def _Create(self):
+    """Creates the control plane and worker nodes."""
+    eksctl_flags = {
+        'kubeconfig': FLAGS.kubeconfig,
+        'managed': True,
+        'name': self.name,
+        'nodegroup-name': container_service.DEFAULT_NODEPOOL,
+        'version': self.cluster_version,
+        # NAT mode uses an EIP.
+        'vpc-nat-mode': 'Disable',
+    }
+    # If multiple zones are passed use them for the control plane.
+    # Otherwise EKS will auto-select control plane zones in the region.
+    eksctl_flags['zones'] = ','.join(self.control_plane_zones)
+    if self.min_nodes != self.max_nodes:
+      eksctl_flags.update({
+          'nodes-min': self.min_nodes,
+          'nodes-max': self.max_nodes,
+      })
+    eksctl_flags.update(
+        self._GetNodeFlags(container_service.DEFAULT_NODEPOOL, self.num_nodes,
+                           self.vm_config))
+
+    cmd = [FLAGS.eksctl, 'create', 'cluster'] + sorted(
+        '--{}={}'.format(k, v) for k, v in eksctl_flags.items() if v)
+    stdout, _, retcode = vm_util.IssueCommand(
+        cmd, timeout=1800, raise_on_failure=False)
+    if retcode:
+      # TODO(pclay): add other quota errors
+      if 'The maximum number of VPCs has been reached' in stdout:
+        raise errors.Benchmarks.QuotaFailure(stdout)
+      else:
+        raise errors.Resource.CreationError(stdout)
+
+    for name, node_group in self.nodepools.items():
+      self._CreateNodeGroup(name, node_group)
+
+  def _CreateNodeGroup(self, name: str, node_group):
+    """Creates a node group."""
+    eksctl_flags = {
+        'cluster': self.name,
+        'name': name,
+        # Support ARM: https://github.com/weaveworks/eksctl/issues/3569
+        'skip-outdated-addons-check': True
+    }
+    eksctl_flags.update(
+        self._GetNodeFlags(name, node_group.num_nodes, node_group.vm_config))
+    cmd = [FLAGS.eksctl, 'create', 'nodegroup'] + sorted(
+        '--{}={}'.format(k, v) for k, v in eksctl_flags.items() if v)
+    vm_util.IssueCommand(cmd, timeout=600)
+
+  def _GetNodeFlags(self, node_group: str, num_nodes: int,
+                    vm_config) -> Dict[str, Any]:
+    """Get common flags for creating clusters and node_groups."""
+    tags = util.MakeDefaultTags()
+    return {
+        'nodes':
+            num_nodes,
+        'node-labels':
+            f'pkb_nodepool={node_group}',
+        'node-type':
+            vm_config.machine_type,
+        'node-volume-size':
+            vm_config.boot_disk_size,
+        # vm_config.zone may be split a comma separated list
+        'node-zones':
+            vm_config.zone,
+        'region':
+            self.region,
+        'tags':
+            ','.join(f'{k}={v}' for k, v in tags.items()),
+        'ssh-public-key':
+            aws_virtual_machine.AwsKeyFileManager.GetKeyNameForRun(),
+    }
+
+  def _Delete(self):
+    """Deletes the control plane and worker nodes."""
+    super()._Delete()
+    cmd = [FLAGS.eksctl, 'delete', 'cluster',
+           '--name', self.name,
+           '--region', self.region]
+    vm_util.IssueCommand(cmd, timeout=1800)
+
+  def _IsReady(self):
+    """Returns True if the workers are ready, else False."""
+    get_cmd = [
+        FLAGS.kubectl, '--kubeconfig', FLAGS.kubeconfig,
+        'get', 'nodes',
+    ]
+    stdout, _, _ = vm_util.IssueCommand(get_cmd)
+    ready_nodes = len(re.findall('Ready', stdout))
+    return ready_nodes >= self.min_nodes
+
+  def _PostCreate(self):
+    """Adds CIDR IP range of ingress SSH security group rules."""
+    if self.ssh_access:
+      group_name = 'eksctl-{}-nodegroup-{}-remoteAccess'.format(self.name, self.node_group_name)
+      cmd = aws.util.AWS_PREFIX + ['ec2', 'describe-security-groups', '--filters',
+                                   'Name=group-name,Values={}'.format(group_name),
+                                   '--query', 'SecurityGroups[*].{ID:GroupId}',
+                                   '--region', self.region]
+      raw_output, stderror, retcode = vm_util.IssueCommand(cmd)
+      if retcode != 0:
+        logging.warning('Failed to add CIDR IP range of ingress SSH security group rules! %s', stderror)
+        return
+
+      json_output = json.loads(raw_output)
+      if len(json_output) != 1:
+        logging.warning("Failed to add CIDR IP range of ingress SSH security group rules! "
+                        "Couldn't find {} security group!".format(group_name))
+        return
+
+      if not ('ID' in json_output[0]):
+        logging.warning("Failed to add CIDR IP range of ingress SSH security group rules! "
+                        "Missing security group id!")
+      group_id = json_output[0]['ID']
+
+      CIDRs = vm_util.GetCIDRList(PROXY_FILE)
+      if CIDRs is None or len(CIDRs) == 0:
+        logging.warning('Failed to add CIDR IP range of ingress SSH security group rules! No rules in "{}"!',
+                        data.ResourcePath(PROXY_FILE))
+        return
+
+      for cidr in CIDRs:
+        cmd = aws.util.AWS_PREFIX + ['ec2', 'authorize-security-group-ingress',
+                                     '--group-id', group_id, '--protocol', 'tcp',
+                                     '--port', '22', '--cidr', cidr, '--region', self.region]
+
+        _, stderror, retcode = vm_util.IssueCommand(cmd)
+        if retcode != 0:
+          logging.warning('Failed to add %q CIDR IP range of ingress SSH security group rule! %s', cidr, stderror)
+          return
+
+  def GetDefaultStorageClass(self) -> str:
+    """Get the default storage class for the provider."""
+    # https://docs.aws.amazon.com/eks/latest/userguide/storage-classes.html
+    return aws_disk.GP2
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elasticache.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elasticache.py
new file mode 100644
index 0000000..af463f8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/elasticache.py
@@ -0,0 +1,140 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import logging
+
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import memcached_server
+from perfkitbenchmarker.memcache_service import MemcacheService
+from perfkitbenchmarker.providers.aws import aws_network
+from perfkitbenchmarker.providers.aws import util
+
+
+ELASTICACHE_PORT = 11211
+
+
+class ElastiCacheMemcacheService(MemcacheService):
+  """Class for AWS elasticache memcache service."""
+
+  CLOUD = providers.AWS
+
+  def __init__(self, network, cluster_id, region, node_type, num_servers=1):
+    self.cluster_id = cluster_id
+    self.region = region
+    self.node_type = node_type
+    self.num_servers = num_servers
+    self.hosts = []  # [(ip, port)]
+
+    self.vpc_id = network.subnet.vpc_id
+    self.security_group_id = (
+        network.regional_network.vpc.default_security_group_id)
+    self.subnet_id = network.subnet.id
+    self.subnet_group_name = '%ssubnet' % cluster_id
+
+  def Create(self):
+    # Open the port memcached needs
+    aws_network.AwsFirewall.GetFirewall().AllowPortInSecurityGroup(
+        self.region, self.security_group_id, ELASTICACHE_PORT)
+
+    # Create a cache subnet group
+    cmd = ['aws', 'elasticache', 'create-cache-subnet-group',
+           '--region=%s' % self.region,
+           '--cache-subnet-group-name=%s' % self.subnet_group_name,
+           '--cache-subnet-group-description="PKB memcached_ycsb benchmark"',
+           '--subnet-ids=%s' % self.subnet_id]
+    vm_util.IssueCommand(cmd)
+
+    # Create the cluster
+    cmd = ['aws', 'elasticache', 'create-cache-cluster',
+           '--engine=memcached',
+           '--cache-subnet-group-name=%s' % self.subnet_group_name,
+           '--cache-cluster-id=%s' % self.cluster_id,
+           '--num-cache-nodes=%s' % self.num_servers,
+           '--region=%s' % self.region,
+           '--cache-node-type=%s' % self.node_type,
+           '--tags'] + util.MakeFormattedDefaultTags()
+    vm_util.IssueCommand(cmd)
+
+    # Wait for the cluster to come up
+    cluster_info = self._WaitForClusterUp()
+
+    # Parse out the hosts
+    self.hosts = [(node['Endpoint']['Address'], node['Endpoint']['Port'])
+                  for node in cluster_info['CacheNodes']]
+    assert len(self.hosts) == self.num_servers
+
+  def Destroy(self):
+    # Delete the ElastiCache cluster
+    cmd = ['aws', 'elasticache', 'delete-cache-cluster',
+           '--cache-cluster-id=%s' % self.cluster_id,
+           '--region=%s' % self.region]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+    # Don't have to delete the subnet group. It will be deleted with the subnet.
+
+  def Flush(self):
+    vm_util.RunThreaded(memcached_server.FlushMemcachedServer, self.hosts)
+
+  def GetHosts(self):
+    return ['%s:%s' % (ip, port) for ip, port in self.hosts]
+
+  def GetMetadata(self):
+    return {'num_servers': self.num_servers,
+            'elasticache_region': self.region,
+            'elasticache_node_type': self.node_type}
+
+  def _GetClusterInfo(self):
+    cmd = ['aws', 'elasticache', 'describe-cache-clusters']
+    cmd += ['--cache-cluster-id=%s' % self.cluster_id]
+    cmd += ['--region=%s' % self.region]
+    cmd += ['--show-cache-node-info']
+    out, _, _ = vm_util.IssueCommand(cmd)
+    return json.loads(out)['CacheClusters'][0]
+
+  @vm_util.Retry(poll_interval=15, timeout=300,
+                 retryable_exceptions=(errors.Resource.RetryableCreationError))
+  def _WaitForClusterUp(self):
+    """Block until the ElastiCache memcached cluster is up.
+
+    Will timeout after 5 minutes, and raise an exception. Before the timeout
+    expires any exceptions are caught and the status check is retried.
+
+    We check the status of the cluster using the AWS CLI.
+
+    Returns:
+      The cluster info json as a dict
+
+    Raises:
+      errors.Resource.RetryableCreationError when response is not as expected or
+        if there is an error connecting to the port or otherwise running the
+        remote check command.
+    """
+    logging.info('Trying to get ElastiCache cluster info for %s',
+                 self.cluster_id)
+    cluster_status = None
+    try:
+      cluster_info = self._GetClusterInfo()
+      cluster_status = cluster_info['CacheClusterStatus']
+      if cluster_status == 'available':
+        logging.info('ElastiCache memcached cluster is up and running.')
+        return cluster_info
+    except errors.VirtualMachine.RemoteCommandError as e:
+      raise errors.Resource.RetryableCreationError(
+          'ElastiCache memcached cluster not up yet: %s.' % str(e))
+    else:
+      raise errors.Resource.RetryableCreationError(
+          'ElastiCache memcached cluster not up yet. Status: %s' %
+          cluster_status)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/flags.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/flags.py
new file mode 100644
index 0000000..231ff6d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/flags.py
@@ -0,0 +1,120 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing flags applicable across benchmark run on AWS."""
+
+from absl import flags
+
+flags.DEFINE_string(
+    'aws_user_name', '', 'This determines the user name that Perfkit will '
+    'attempt to use. Defaults are OS specific.')
+flags.DEFINE_integer('aws_provisioned_iops', None,
+                     'IOPS for Provisioned IOPS (SSD) volumes in AWS.')
+flags.DEFINE_integer('aws_provisioned_throughput', None,
+                     'Provisioned throughput (MB/s) for (SSD) volumes in AWS.')
+
+flags.DEFINE_string('aws_dax_node_type', 'dax.r4.large',
+                    'The node type used for creating AWS DAX cluster.')
+flags.DEFINE_integer('aws_dax_replication_factor', 3,
+                     'The replication factor of AWS DAX cluster.')
+flags.DEFINE_string('aws_emr_loguri', None,
+                    'The log-uri parameter to pass to AWS when creating a '
+                    'cluster.  If not set, a bucket will be created.')
+flags.DEFINE_integer('aws_emr_job_wait_time', 18000,
+                     'The time to wait for an EMR job to finish, in seconds')
+flags.DEFINE_boolean('aws_spot_instances', False,
+                     'Whether to use AWS spot instances for any AWS VMs.')
+flags.DEFINE_float('aws_spot_price', None,
+                   'The spot price to bid for AWS spot instances. Defaults '
+                   'to on-demand price when left as None.')
+flags.DEFINE_enum('aws_spot_block_duration_minutes', None,
+                  ['60', '120', '180', '240', '300', '360'], 'The required '
+                  'duration for the Spot Instances (also known as Spot blocks),'
+                  ' in minutes. This value must be a multiple of 60.')
+flags.DEFINE_integer('aws_boot_disk_size', None,
+                     'The boot disk size in GiB for AWS VMs.')
+flags.DEFINE_string('kops', 'kops',
+                    'The path to the kops binary.')
+flags.DEFINE_string('aws_image_name_filter', None,
+                    'The filter to use when searching for an image for a VM. '
+                    'See usage details in aws_virtual_machine.py around '
+                    'IMAGE_NAME_FILTER.')
+flags.DEFINE_string('aws_image_name_regex', None,
+                    'The Python regex to use to further filter images for a '
+                    'VM. This applies after the aws_image_name_filter. See '
+                    'usage details in aws_virtual_machine.py around '
+                    'IMAGE_NAME_REGEX.')
+flags.DEFINE_string('aws_preprovisioned_data_bucket', None,
+                    'AWS bucket where pre-provisioned data has been copied.')
+flags.DEFINE_string('cache_node_type',
+                    'cache.m4.large',
+                    'The AWS cache node type to use for elasticache clusters.')
+flags.DEFINE_string('aws_elasticache_failover_zone',
+                    None,
+                    'AWS elasticache failover zone')
+flags.DEFINE_string('aws_efs_token', None,
+                    'The creation token used to create the EFS resource. '
+                    'If the file system already exists, it will use that '
+                    'instead of creating a new one.')
+flags.DEFINE_boolean('aws_delete_file_system', True,
+                     'Whether to delete the EFS file system.')
+flags.DEFINE_enum('efs_throughput_mode', 'provisioned',
+                  ['provisioned', 'bursting'],
+                  'The throughput mode to use for EFS.')
+flags.DEFINE_float('efs_provisioned_throughput', 1024.0,
+                   'The throughput limit of EFS (in MiB/s) when run in '
+                   'provisioned mode.')
+flags.DEFINE_boolean('provision_athena', False,
+                     'Whether to provision the Athena database.')
+flags.DEFINE_boolean('teardown_athena', True,
+                     'Whether to teardown the Athena database.')
+flags.DEFINE_string(
+    'athena_output_location_prefix', 'athena-cli-results',
+    'Prefix of the S3 bucket name for Athena Query Output. Suffix will be the '
+    'region and the run URI, and the bucket will be dynamically created and '
+    'deleted during the test.')
+flags.DEFINE_string('eksctl', 'eksctl', 'Path to eksctl.')
+flags.DEFINE_enum('redshift_client_interface', 'JDBC', ['JDBC'],
+                  'The Runtime Interface used when interacting with Redshift.')
+flags.DEFINE_enum('athena_client_interface', 'JAVA', ['JAVA'],
+                  'The Runtime Interface used when interacting with Athena.')
+flags.DEFINE_string('athena_query_timeout', '600', 'Query timeout in seconds.')
+flags.DEFINE_string('athena_workgroup', '',
+                    'Use athena workgroup to separate applications and choose '
+                    'execution configuration like the engine version.')
+flags.DEFINE_boolean(
+    'athena_metrics_collection', False,
+    'Should the cloud watch metrics be collected for Athena query executions.')
+flags.DEFINE_boolean(
+    'athena_workgroup_delete', True,
+    'Should the dedicated athena workgroups be deleted or kept alive for investigations.'
+)
+flags.DEFINE_boolean(
+    'aws_eks_ssh_access', False,
+    'Determine whether to have SSH access to the EKS nodes.')
+flags.DEFINE_enum('aws_credit_specification', None,
+                  ['CpuCredits=unlimited', 'CpuCredits=standard'],
+                  'Credit specification for burstable vms.')
+flags.DEFINE_boolean('aws_vm_hibernate', False,
+                     'Whether to hibernate(suspend) an aws vm'
+                     'instance.')
+flags.DEFINE_string(
+    'aws_glue_crawler_role', None,
+    "Role's ARN to be used by the crawler. Must have policies that grant "
+    'permission for using AWS Glue and read access to S3.')
+flags.DEFINE_integer(
+    'aws_glue_crawler_sample_size', None,
+    'Sets how many files will be crawled in each leaf directory. If left '
+    'unset, all the files will be crawled. May range from 1 to 249.',
+    1, 249
+)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/provider_info.py
new file mode 100644
index 0000000..8a15208
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/provider_info.py
@@ -0,0 +1,22 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""AWS provider info."""
+
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+
+
+class AWSProviderInfo(provider_info.BaseProviderInfo):
+
+  CLOUD = providers.AWS
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/redshift.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/redshift.py
new file mode 100644
index 0000000..f8b88d8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/redshift.py
@@ -0,0 +1,536 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's Redshift EDW service.
+
+Clusters can be created (based on new configuration or restored from a snapshot)
+and deleted.
+"""
+
+import copy
+import json
+import os
+from typing import Dict, List, Text, Tuple
+
+from absl import flags
+from perfkitbenchmarker import benchmark_spec
+from perfkitbenchmarker import data
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import aws_cluster_parameter_group
+from perfkitbenchmarker.providers.aws import aws_cluster_subnet_group
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+VALID_EXIST_STATUSES = ['creating', 'available']
+DELETION_STATUSES = ['deleting']
+READY_STATUSES = ['available']
+ELIMINATE_AUTOMATED_SNAPSHOT_RETENTION = '--automated-snapshot-retention-period=0'
+DEFAULT_DATABASE_NAME = 'dev'
+BOOTSTRAP_DB = 'sample'
+REDSHIFT_JDBC_JAR = 'redshift-jdbc-client-1.0.jar'
+
+
+def AddTags(resource_arn, region):
+  """Adds tags to a Redshift cluster created by PerfKitBenchmarker.
+
+  Args:
+    resource_arn: The arn of AWS resource to operate on.
+    region: The AWS region resource was created in.
+  """
+  cmd_prefix = util.AWS_PREFIX
+  tag_cmd = cmd_prefix + ['redshift', 'create-tags', '--region=%s' % region,
+                          '--resource-name', resource_arn, '--tags']
+  tag_cmd += util.MakeFormattedDefaultTags()
+  vm_util.IssueCommand(tag_cmd)
+
+
+def GetDefaultRegion():
+  """Utility method to supply the default region."""
+  cmd_prefix = util.AWS_PREFIX
+  default_region_cmd = cmd_prefix + ['configure', 'get', 'region']
+  stdout, _, _ = vm_util.IssueCommand(default_region_cmd)
+  return stdout
+
+
+def GetRedshiftClientInterface(database: str, user: str,
+                               password: str) -> edw_service.EdwClientInterface:
+  """Builds and Returns the requested Redshift client Interface.
+
+  Args:
+    database: Name of the database to run queries against.
+    user: Redshift username for authentication.
+    password: Redshift password for authentication.
+
+  Returns:
+    A concrete Client Interface object.
+
+  Raises:
+    RuntimeError: if an unsupported redshift_client_interface is requested
+  """
+  if FLAGS.redshift_client_interface == 'CLI':
+    return CliClientInterface(database, user, password)
+  if FLAGS.redshift_client_interface == 'JDBC':
+    return JdbcClientInterface(database, user, password)
+  raise RuntimeError('Unknown Redshift Client Interface requested.')
+
+
+class CliClientInterface(edw_service.EdwClientInterface):
+  """Command Line Client Interface class for Redshift.
+
+  Uses the native Redshift client that ships with pgbench.
+  https://docs.aws.amazon.com/cli/latest/reference/redshift/index.html
+
+  Attributes:
+    host: Host endpoint to be used for interacting with the cluster.
+    database: Name of the database to run queries against.
+    user: Redshift username for authentication.
+    password: Redshift password for authentication.
+  """
+
+  def __init__(self, database: str, user: str, password: str):
+    self.database = database
+    self.user = user
+    self.password = password
+
+    # set by SetProvisionedAttributes()
+    self.host = None
+
+  def SetProvisionedAttributes(self, bm_spec: benchmark_spec.BenchmarkSpec):
+    """Sets any attributes that were unknown during initialization."""
+    super(CliClientInterface, self).SetProvisionedAttributes(bm_spec)
+    self.host = bm_spec.edw_service.endpoint
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the redshift tool dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('pip')
+    self.client_vm.RemoteCommand('sudo pip install absl-py')
+    self.client_vm.Install('pgbench')
+
+    # Push the framework to execute a sql query and gather performance details
+    service_specific_dir = os.path.join('edw', Redshift.SERVICE_TYPE)
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir, 'script_runner.sh')))
+    runner_permission_update_cmd = 'chmod 755 {}'.format('script_runner.sh')
+    self.client_vm.RemoteCommand(runner_permission_update_cmd)
+    self.client_vm.PushFile(
+        data.ResourcePath(os.path.join('edw', 'script_driver.py')))
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir,
+                         'provider_specific_script_driver.py')))
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = ('python script_driver.py --script={} --host={} '
+                     '--database={} --user={} --password={}').format(
+                         query_name, self.host, self.database, self.user,
+                         self.password)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    performance = json.loads(stdout)
+    details = copy.copy(self.GetMetadata())
+    details['job_id'] = performance[query_name]['job_id']
+    return float(performance[query_name]['execution_time']), details
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.redshift_client_interface}
+
+
+class JdbcClientInterface(edw_service.EdwClientInterface):
+  """Native JDBC Client Interface class for Redshift.
+
+  https://docs.aws.amazon.com/redshift/latest/mgmt/jdbc20-install.html
+
+  Attributes:
+    host: Host endpoint to be used for interacting with the cluster.
+    database: Name of the database to run queries against.
+    user: Redshift username for authentication.
+    password: Redshift password for authentication.
+  """
+
+  def __init__(self, database: str, user: str, password: str):
+    self.database = database
+    # Use the default port.
+    self.port = '5439'
+    self.user = user
+    self.password = password
+
+    # set in SetProvisionedAttributes()
+    self.host = None
+
+  def SetProvisionedAttributes(self, bm_spec: benchmark_spec.BenchmarkSpec):
+    """Sets any attributes that were unknown during initialization."""
+    super(JdbcClientInterface, self).SetProvisionedAttributes(bm_spec)
+    endpoint = bm_spec.edw_service.endpoint
+    self.host = f'jdbc:redshift://{endpoint}:{self.port}/{self.database}'
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the redshift tool dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+
+    # Push the executable jar to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(package_name,
+                                                    [REDSHIFT_JDBC_JAR], '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute.
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = ('java -cp {} com.google.cloud.performance.edw.Single '
+                     '--endpoint {} --query_file {}').format(
+                         REDSHIFT_JDBC_JAR, self.host, query_name)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    performance = json.loads(stdout)
+    details = copy.copy(self.GetMetadata())
+    if 'failure_reason' in performance:
+      details.update({'failure_reason': performance['failure_reason']})
+    else:
+      details.update(performance['details'])
+    return performance['query_wall_time_in_secs'], details
+
+  def ExecuteSimultaneous(self, submission_interval: int,
+                          queries: List[str]) -> str:
+    """Executes queries simultaneously on client and return performance details.
+
+    Simultaneous app expects queries as white space separated query file names.
+
+    Args:
+      submission_interval: Simultaneous query submission interval in
+        milliseconds.
+      queries: List of strings (names) of queries to execute.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    cmd = ('java -cp {} com.google.cloud.performance.edw.Simultaneous '
+           '--endpoint {} --submission_interval {} --query_files {}'.format(
+               REDSHIFT_JDBC_JAR, self.host, submission_interval,
+               ' '.join(queries)))
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+  def ExecuteThroughput(self, concurrency_streams: List[List[str]]) -> str:
+    """Executes a throughput test and returns performance details.
+
+    Args:
+      concurrency_streams: List of streams to execute simultaneously, each of
+        which is a list of string names of queries.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    cmd = ('java -cp {} com.google.cloud.performance.edw.Throughput '
+           '--endpoint {} --query_streams {}'.format(
+               REDSHIFT_JDBC_JAR, self.host,
+               ' '.join([','.join(stream) for stream in concurrency_streams])))
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.redshift_client_interface}
+
+
+class Redshift(edw_service.EdwService):
+  """Object representing a Redshift cluster.
+
+  Attributes:
+    cluster_id: ID of the cluster.
+    project: ID of the project.
+  """
+
+  CLOUD = providers.AWS
+  SERVICE_TYPE = 'redshift'
+
+  READY_TIMEOUT = 7200
+
+  def __init__(self, edw_service_spec):
+    super(Redshift, self).__init__(edw_service_spec)
+    # pkb setup attribute
+    self.project = None
+    self.cmd_prefix = list(util.AWS_PREFIX)
+    if FLAGS.zones:
+      self.zone = FLAGS.zones[0]
+      self.region = util.GetRegionFromZone(self.zone)
+    else:
+      self.region = GetDefaultRegion()
+    self.cmd_prefix += ['--region', self.region]
+
+    # Redshift specific attribute (see if they can be set)
+    self.cluster_subnet_group = None
+    self.cluster_parameter_group = None
+    self.arn = ''
+    self.cluster_subnet_group = aws_cluster_subnet_group.RedshiftClusterSubnetGroup(
+        self.cmd_prefix)
+    self.cluster_parameter_group = aws_cluster_parameter_group.RedshiftClusterParameterGroup(
+        self.cmd_prefix)
+
+    if self.db is None:
+      self.db = DEFAULT_DATABASE_NAME
+    self.client_interface = GetRedshiftClientInterface(self.db, self.user,
+                                                       self.password)
+
+  def _CreateDependencies(self):
+    self.cluster_subnet_group.Create()
+    self.cluster_parameter_group.Create()
+
+  def _Create(self):
+    """Create the redshift cluster resource."""
+    if self.snapshot:
+      self.Restore(self.snapshot, self.cluster_identifier)
+    else:
+      self.Initialize(self.cluster_identifier, self.node_type, self.node_count,
+                      self.user, self.password, self.cluster_parameter_group,
+                      self.cluster_subnet_group)
+
+  def Initialize(self, cluster_identifier, node_type, node_count, user,
+                 password, cluster_parameter_group, cluster_subnet_group):
+    """Method to initialize a Redshift cluster from an configuration parameters.
+
+    The cluster is initialized in the EC2-VPC platform, that runs it in a
+    virtual private cloud (VPC). This allows control access to the cluster by
+    associating one or more VPC security groups with the cluster.
+
+    To create a cluster in a VPC, first create an Amazon Redshift cluster subnet
+    group by providing subnet information of the VPC, and then provide the
+    subnet group when launching the cluster.
+
+
+    Args:
+      cluster_identifier: A unique identifier for the cluster.
+      node_type: The node type to be provisioned for the cluster.
+       Valid Values: ds2.xlarge | ds2.8xlarge | ds2.xlarge | ds2.8xlarge |
+         dc1.large | dc1.8xlarge | dc2.large | dc2.8xlarge
+      node_count: The number of compute nodes in the cluster.
+      user: The user name associated with the master user account for the
+        cluster that is being created.
+      password: The password associated with the master user account for the
+        cluster that is being created.
+      cluster_parameter_group: Cluster Parameter Group associated with the
+        cluster.
+      cluster_subnet_group: Cluster Subnet Group associated with the cluster.
+
+    Returns:
+      None
+
+
+    Raises:
+      MissingOption: If any of the required parameters is missing.
+    """
+    if not (cluster_identifier and node_type and user and password):
+      raise errors.Config.MissingOption('Need cluster_identifier, user and '
+                                        'password set for creating a cluster.')
+
+    prefix = [
+        'redshift', 'create-cluster', '--cluster-identifier', cluster_identifier
+    ]
+
+    if node_count == 1:
+      worker_count_cmd = ['--cluster-type', 'single-node']
+    else:
+      worker_count_cmd = ['--number-of-nodes', str(node_count)]
+
+    postfix = [
+        '--node-type', node_type, '--master-username', user,
+        '--master-user-password', password, '--cluster-parameter-group-name',
+        cluster_parameter_group.name, '--cluster-subnet-group-name',
+        cluster_subnet_group.name, '--publicly-accessible',
+        ELIMINATE_AUTOMATED_SNAPSHOT_RETENTION
+    ]
+
+    cmd = self.cmd_prefix + prefix + worker_count_cmd + postfix
+    stdout, stderr, _ = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if not stdout:
+      raise errors.Resource.CreationError('Cluster creation failure: '
+                                          '{}'.format(stderr))
+
+  def _ValidateSnapshot(self, snapshot_identifier):
+    """Validate the presence of a cluster snapshot based on its metadata."""
+    cmd = self.cmd_prefix + ['redshift', 'describe-cluster-snapshots',
+                             '--snapshot-identifier', snapshot_identifier]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    if not stdout:
+      raise errors.Config.InvalidValue('Cluster snapshot indicated by '
+                                       'edw_service_cluster_snapshot does not'
+                                       ' exist: {}.'
+                                       .format(snapshot_identifier))
+    result = json.loads(stdout)
+    return result['Snapshots'][0]['Status'] == 'available'
+
+  def _SnapshotDetails(self, snapshot_identifier):
+    """Delete a redshift cluster and disallow creation of a snapshot."""
+    cmd = self.cmd_prefix + ['redshift', 'describe-cluster-snapshots',
+                             '--snapshot-identifier', snapshot_identifier]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    result = json.loads(stdout)
+    node_type = result['Snapshots'][0]['NodeType']
+    node_count = result['Snapshots'][0]['NumberOfNodes']
+    return node_type, node_count
+
+  def Restore(self, snapshot_identifier, cluster_identifier):
+    """Method to restore a Redshift cluster from an existing snapshot.
+
+    A snapshot of cluster in VPC can be restored only in VPC. Therefore, subnet
+    group name where the cluster is to be restored must be provided.
+
+    vpc-security-group-ids are not specified at the time of restoration, and it
+    is expected that the default VPC security group which gets associated with
+    the cluster has appropriate ingress and egress rules.
+
+    Ref: http://docs.aws.amazon.com/cli/latest/reference/
+    redshift/restore-from-cluster-snapshot.html
+
+    Args:
+      snapshot_identifier: Identifier of the snapshot to restore
+      cluster_identifier:  Identifier of the restored cluster
+    Returns:
+      None
+    """
+
+    if not (self.user and self.password and self.db):
+      raise errors.Config.MissingOption(
+          'Need the db, user and password set for restoring a cluster')
+
+    if self._ValidateSnapshot(snapshot_identifier):
+      node_type, node_count = self._SnapshotDetails(snapshot_identifier)
+      # For a restored cluster update the cluster shape and size based on the
+      # snapshot's configuration
+      self.node_type = node_type
+      self.node_count = node_count
+      cmd = self.cmd_prefix + ['redshift', 'restore-from-cluster-snapshot',
+                               '--cluster-identifier', cluster_identifier,
+                               '--snapshot-identifier', snapshot_identifier,
+                               '--cluster-subnet-group-name',
+                               self.cluster_subnet_group.name,
+                               '--cluster-parameter-group-name',
+                               self.cluster_parameter_group.name,
+                               '--publicly-accessible',
+                               '--automated-snapshot-retention-period=1']
+      stdout, stderr, _ = vm_util.IssueCommand(cmd)
+      if not stdout:
+        raise errors.Resource.CreationError('Cluster creation failure: '
+                                            '{}'.format(stderr))
+
+  def __DescribeCluster(self):
+    """Describe a redshift cluster."""
+    cmd = self.cmd_prefix + ['redshift', 'describe-clusters',
+                             '--cluster-identifier', self.cluster_identifier]
+    return vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Method to validate the existence of a redshift cluster.
+
+    Provision pipeline: returns True during the provisioning (status in
+    'creating', 'available') to prevent retry of creation
+
+    Deletion pipeline: returns True, during the deletion (status in
+    'deleting') which causes a retry of deletion, an idempotent operation.
+    TODO(saksena): handle the deletion step more cleanly, and spin till deletion
+
+    Returns:
+      Boolean value indicating the existence of a cluster.
+    """
+    stdout, _, _ = self.__DescribeCluster()
+    if (not stdout or (json.loads(stdout)['Clusters'][0]['ClusterStatus'] not in
+                       VALID_EXIST_STATUSES)):
+      return False
+    else:
+      return True
+
+  def _IsReady(self):
+    """Method to return if the cluster is ready to handle queries."""
+    stdout, _, _ = self.__DescribeCluster()
+    return json.loads(stdout)['Clusters'][0]['ClusterStatus'] in READY_STATUSES
+
+  def _PostCreate(self):
+    """Perform general post create operations on the cluster.
+
+    Get the endpoint to be used for interacting with the cluster and apply
+    tags on the cluster.
+    """
+    stdout, _, _ = self.__DescribeCluster()
+    self.endpoint = json.loads(stdout)['Clusters'][0]['Endpoint']['Address']
+    account = util.GetAccount()
+    self.arn = 'arn:aws:redshift:{}:{}:cluster:{}'.format(self.region, account,
+                                                          self.
+                                                          cluster_identifier)
+    AddTags(self.arn, self.region)
+
+  def _Delete(self):
+    """Delete a redshift cluster and disallow creation of a snapshot."""
+    cmd = self.cmd_prefix + ['redshift', 'delete-cluster',
+                             '--cluster-identifier', self.cluster_identifier,
+                             '--skip-final-cluster-snapshot']
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _IsDeleting(self):
+    """Method to check if the cluster is being deleting."""
+    stdout, _, _ = self.__DescribeCluster()
+    if not stdout:
+      return False
+    else:
+      return (json.loads(stdout)['Clusters'][0]['ClusterStatus'] in
+              DELETION_STATUSES)
+
+  def _DeleteDependencies(self):
+    """Delete dependencies of a redshift cluster."""
+    self.cluster_subnet_group.Delete()
+    self.cluster_parameter_group.Delete()
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this cluster."""
+    basic_data = super(Redshift, self).GetMetadata()
+    basic_data['region'] = self.region
+    if self.snapshot is not None:
+      basic_data['snapshot'] = self.snapshot
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/requirements.txt b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/requirements.txt
new file mode 100644
index 0000000..847fdfa
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/requirements.txt
@@ -0,0 +1,17 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Requirements for running PerfKit Benchmarker on AWS.
+awscli>=1.19.75
+colorama==0.3.7
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/s3.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/s3.py
new file mode 100644
index 0000000..144c678
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/s3.py
@@ -0,0 +1,219 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains classes/functions related to S3."""
+
+import json
+import os
+import posixpath
+from typing import List
+
+from absl import flags
+from absl import logging
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import object_storage_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import util
+
+FLAGS = flags.FLAGS
+
+AWS_CREDENTIAL_LOCATION = '.aws'
+DEFAULT_AWS_REGION = 'us-east-1'
+_READ = 's3:GetObject'
+_WRITE = 's3:PutObject'
+
+
+class S3Service(object_storage_service.ObjectStorageService):
+  """Interface to Amazon S3."""
+
+  STORAGE_NAME = providers.AWS
+
+  region: str
+
+  def PrepareService(self, location):
+    self.region = location or DEFAULT_AWS_REGION
+
+  def MakeBucket(self, bucket_name, raise_on_failure=True):
+    command = [
+        'aws', 's3', 'mb',
+        's3://%s' % bucket_name,
+        '--region=%s' % self.region
+    ]
+    _, stderr, ret_code = vm_util.IssueCommand(command, raise_on_failure=False)
+    if ret_code and raise_on_failure:
+      raise errors.Benchmarks.BucketCreationError(stderr)
+
+    # Tag the bucket with the persistent timeout flag so that buckets can
+    # optionally stick around after PKB runs.
+    default_tags = util.MakeFormattedDefaultTags(
+        timeout_minutes=max(FLAGS.timeout_minutes,
+                            FLAGS.persistent_timeout_minutes))
+    tag_set = ','.join('{%s}' % tag for tag in default_tags)
+    vm_util.IssueRetryableCommand(
+        ['aws', 's3api', 'put-bucket-tagging',
+         '--bucket', bucket_name,
+         '--tagging', 'TagSet=[%s]' % tag_set,
+         '--region=%s' % self.region])
+
+  def Copy(self, src_url, dst_url, recursive=False):
+    """See base class."""
+    cmd = ['aws', 's3', 'cp', '--region', self.region]
+    if recursive:
+      cmd.append('--recursive')
+      # Fix cp to mimic gsutil behavior
+      dst_url = os.path.join(dst_url, os.path.basename(src_url))
+    cmd += [src_url, dst_url]
+    vm_util.IssueCommand(cmd)
+
+  def CopyToBucket(self, src_path, bucket, object_path):
+    """See base class."""
+    dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path)
+    vm_util.IssueCommand(['aws', 's3', 'cp', src_path, dst_url,
+                          '--region', self.region])
+
+  def MakeRemoteCliDownloadUrl(self, bucket, object_path):
+    """See base class."""
+    path = posixpath.join(bucket, object_path)
+    return 's3://' + path
+
+  def GenerateCliDownloadFileCommand(self, src_url, local_path):
+    """See base class."""
+    return 'aws s3 cp "%s" "%s" --region=%s' % (
+        src_url, local_path, self.region)
+
+  def List(self, bucket):
+    """See base class."""
+    stdout, _, _ = vm_util.IssueCommand(
+        ['aws', 's3', 'ls', bucket, '--region', self.region])
+    return stdout
+
+  def ListTopLevelSubfolders(self, bucket):
+    """Lists the top level folders (not files) in a bucket.
+
+    Each result that is a folder has "PRE" in front of the name (meaning
+    prefix), eg. "PRE customer/", so that part is removed from each line. When
+    there's more than one result, splitting on the newline returns a final blank
+    row, so blank values are skipped.
+
+    Args:
+      bucket: Name of the bucket to list the top level subfolders of.
+
+    Returns:
+      A list of top level subfolder names. Can be empty if there are no folders.
+    """
+    return [
+        obj.split('PRE ')[1].strip().replace('/', '')
+        for obj in self.List(bucket).split('\n')
+        if obj and obj.endswith('/')
+    ]
+
+  @vm_util.Retry()
+  def DeleteBucket(self, bucket):
+    """See base class."""
+
+    def _SuppressFailure(stdout, stderr, retcode):
+      """Suppresses failure when bucket does not exist."""
+      del stdout  # unused
+      if retcode and 'NoSuchBucket' in stderr:
+        return True
+      return False
+
+    vm_util.IssueCommand(
+        ['aws', 's3', 'rb',
+         's3://%s' % bucket,
+         '--region', self.region,
+         '--force'],  # --force deletes even if bucket contains objects.
+        suppress_failure=_SuppressFailure)
+
+  def EmptyBucket(self, bucket):
+    vm_util.IssueCommand(
+        ['aws', 's3', 'rm',
+         's3://%s' % bucket,
+         '--region', self.region,
+         '--recursive'])
+
+  def MakeBucketPubliclyReadable(self, bucket, also_make_writable=False):
+    """See base class."""
+    actions = [_READ]
+    logging.warning('Making bucket %s publicly readable!', bucket)
+    if also_make_writable:
+      actions.append(_WRITE)
+      logging.warning('Making bucket %s publicly writable!', bucket)
+    vm_util.IssueCommand([
+        'aws', 's3api', 'put-bucket-policy', '--region', self.region,
+        '--bucket', bucket, '--policy',
+        _MakeS3BucketPolicy(bucket, actions)
+    ])
+
+  def GetDownloadUrl(self, bucket, object_name, use_https=True):
+    """See base class."""
+    assert self.region
+    scheme = 'https' if use_https else 'http'
+    return f'{scheme}://{bucket}.s3.{self.region}.amazonaws.com/{object_name}'
+
+  UPLOAD_HTTP_METHOD = 'PUT'
+
+  def PrepareVM(self, vm):
+    vm.Install('awscli')
+    vm.Install('boto3')
+
+    vm.PushFile(
+        object_storage_service.FindCredentialFile('~/' +
+                                                  AWS_CREDENTIAL_LOCATION),
+        AWS_CREDENTIAL_LOCATION)
+    vm.PushFile(object_storage_service.FindBotoFile(),
+                object_storage_service.DEFAULT_BOTO_LOCATION_USER)
+
+  def CleanupVM(self, vm):
+    vm.Uninstall('awscli')
+
+  def CLIUploadDirectory(self, vm, directory, file_names, bucket):
+    return vm.RemoteCommand(
+        'time aws s3 sync %s s3://%s/' % (directory, bucket))
+
+  def CLIDownloadBucket(self, vm, bucket, objects, dest):
+    return vm.RemoteCommand(
+        'time aws s3 sync s3://%s/ %s' % (bucket, dest))
+
+  def Metadata(self, vm):
+    return {
+        object_storage_service.BOTO_LIB_VERSION:
+            linux_packages.GetPipPackageVersion(vm, 'boto3')
+    }
+
+  def APIScriptArgs(self):
+    return ['--region=' + self.region]
+
+  @classmethod
+  def APIScriptFiles(cls):
+    return ['s3.py']
+
+
+def _MakeS3BucketPolicy(bucket: str,
+                        actions: List[str],
+                        object_prefix='') -> str:
+  # https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_examples_s3_rw-bucket.html
+  return json.dumps({
+      'Version':
+          '2012-10-17',
+      'Statement': [{
+          'Principal': '*',
+          'Sid': 'PkbAcl',
+          'Effect': 'Allow',
+          'Action': actions,
+          'Resource': [f'arn:aws:s3:::{bucket}/{object_prefix}*']
+      }]
+  })
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/snowflake.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/snowflake.py
new file mode 100644
index 0000000..611d9ee
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/snowflake.py
@@ -0,0 +1,207 @@
+# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for Snowflake EDW service resource hosted on AWS."""
+
+import copy
+import json
+from typing import Dict, List, Text, Tuple
+from absl import flags
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import providers
+
+
+FLAGS = flags.FLAGS
+
+
+def GetSnowflakeClientInterface(warehouse: str, database: str,
+                                schema: str) -> edw_service.EdwClientInterface:
+  """Builds and Returns the requested Snowflake client Interface.
+
+  Args:
+    warehouse: String name of the Snowflake virtual warehouse to use during the
+      benchmark
+    database: String name of the Snowflake database to use during the  benchmark
+    schema: String name of the Snowflake schema to use during the  benchmark
+
+  Returns:
+    A concrete Client Interface object (subclass of EdwClientInterface)
+
+  Raises:
+    RuntimeError: if an unsupported snowflake_client_interface is requested
+  """
+  if FLAGS.snowflake_client_interface == 'JDBC':
+    return JdbcClientInterface(warehouse, database, schema)
+  raise RuntimeError('Unknown Snowflake Client Interface requested.')
+
+
+class JdbcClientInterface(edw_service.EdwClientInterface):
+  """Jdbc Client Interface class for Snowflake.
+
+  Attributes:
+    warehouse: String name of the virtual warehouse used during benchmark
+    database: String name of the database to benchmark
+    schema: String name of the schema to benchmark
+  """
+
+  def __init__(self, warehouse: str, database: str, schema: str):
+    self.warehouse = warehouse
+    self.database = database
+    self.schema = schema
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs a java client application that uses the JDBC driver for connecting
+     to a database server.
+    https://docs.snowflake.com/en/user-guide/jdbc.html
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+
+    # Push the executable jar to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(
+        package_name, ['snowflake-jdbc-client-2.0.jar'], '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = ('java -cp snowflake-jdbc-client-2.0.jar '
+                     'com.google.cloud.performance.edw.Single --warehouse {} '
+                     '--database {} --schema {} --query_file {}').format(
+                         self.warehouse, self.database, self.schema, query_name)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    details = copy.copy(self.GetMetadata())  # Copy the base metadata
+    details.update(json.loads(stdout)['details'])
+    return json.loads(stdout)['query_wall_time_in_secs'], details
+
+  def ExecuteSimultaneous(self, submission_interval: int,
+                          queries: List[str]) -> str:
+    """Executes queries simultaneously on client and return performance details.
+
+    Simultaneous app expects queries as white space separated query file names.
+
+    Args:
+      submission_interval: Simultaneous query submission interval in
+        milliseconds.
+      queries: List of strings (names) of queries to execute.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    query_command = (
+        'java -cp snowflake-jdbc-client-2.0.jar '
+        'com.google.cloud.performance.edw.Simultaneous --warehouse {} '
+        '--database {} --schema {} --submission_interval {} --query_files {}'
+    ).format(self.warehouse, self.database, self.schema, submission_interval,
+             ' '.join(queries))
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    return stdout
+
+  def ExecuteThroughput(self, concurrency_streams: List[List[str]]) -> str:
+    """Executes a throughput test and returns performance details.
+
+    Args:
+      concurrency_streams: List of streams to execute simultaneously, each of
+        which is a list of string names of queries.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    query_command = ('java -cp snowflake-jdbc-client-2.0.jar '
+                     'com.google.cloud.performance.edw.Throughput --warehouse'
+                     ' {} --database {} --schema {} --query_streams {}').format(
+                         self.warehouse, self.database, self.schema, ' '.join([
+                             ','.join(stream) for stream in concurrency_streams
+                         ]))
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    return stdout
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.snowflake_client_interface}
+
+
+class Snowflake(edw_service.EdwService):
+  """Object representing a Snowflake Data Warehouse Instance hosted on AWS."""
+  CLOUD = providers.AWS
+  SERVICE_TYPE = 'snowflake_aws'
+
+  def __init__(self, edw_service_spec):
+    super(Snowflake, self).__init__(edw_service_spec)
+    self.warehouse = FLAGS.snowflake_warehouse
+    self.database = FLAGS.snowflake_database
+    self.schema = FLAGS.snowflake_schema
+    self.client_interface = GetSnowflakeClientInterface(self.warehouse,
+                                                        self.database,
+                                                        self.schema)
+
+  def IsUserManaged(self, edw_service_spec):
+    # TODO(saksena): Remove the assertion after implementing provisioning of
+    # virtual warehouses.
+    return True
+
+  def _Create(self):
+    """Create a Snowflake cluster."""
+    raise NotImplementedError
+
+  def _Exists(self):
+    """Method to validate the existence of a Snowflake cluster.
+
+    Returns:
+      Boolean value indicating the existence of a cluster.
+    """
+    return True
+
+  def _Delete(self):
+    """Delete a Snowflake cluster."""
+    raise NotImplementedError
+
+  def GetMetadata(self):
+    """Return a metadata dictionary of the benchmarked Snowflake cluster."""
+    basic_data = super(Snowflake, self).GetMetadata()
+    basic_data['warehouse'] = self.warehouse
+    basic_data['database'] = self.database
+    basic_data['schema'] = self.schema
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
+
+
+class Snowflakeexternal(Snowflake):
+  """Class representing Snowflake External Warehouses."""
+
+  SERVICE_TYPE = 'snowflakeexternal_aws'
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Return a dictionary of the metadata for the Snowflake External service.
+
+    Returns:
+      A dictionary set to service details.
+    """
+    basic_data = super(Snowflakeexternal, self).GetMetadata()
+    basic_data['edw_service_type'] = Snowflakeexternal.SERVICE_TYPE
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/spectrum.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/spectrum.py
new file mode 100644
index 0000000..3d70577
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/spectrum.py
@@ -0,0 +1,137 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for AWS's Spectrum EDW service.
+
+Clusters can be created (based on new configuration or restored from a snapshot)
+and deleted.
+"""
+
+import json
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.aws import redshift
+from perfkitbenchmarker.providers.aws import util
+
+
+FLAGS = flags.FLAGS
+
+
+READY_STATUSES = ['available']
+SNAPSHOT_READY_STATUSES = ['completed']
+
+
+def AddTags(resource_arn, region):
+  """Adds tags to a Redshift cluster created by PerfKitBenchmarker.
+
+  Args:
+    resource_arn: The arn of AWS resource to operate on.
+    region: The AWS region resource was created in.
+  """
+  cmd_prefix = util.AWS_PREFIX
+  tag_cmd = cmd_prefix + ['redshift', 'create-tags', '--region=%s' % region,
+                          '--resource-name', resource_arn, '--tags']
+  tag_cmd += util.MakeFormattedDefaultTags()
+  vm_util.IssueCommand(tag_cmd)
+
+
+class AddingIAMRole(object):
+  """IAM Role to associate with the cluster.
+
+  IAM Role can be associated with the cluster to access to other services such
+  as S3.
+
+  Attributes:
+    cluster_identifier: Identifier of the cluster
+    iam_role_name: Role name of the IAM
+  """
+
+  def __init__(self, cluster_identifier, iam_role_name, cmd_prefix):
+    self.cmd_prefix = cmd_prefix
+    self.cluster_identifier = cluster_identifier
+    self.iam_role_name = iam_role_name
+    cmd = self.cmd_prefix + ['redshift',
+                             'modify-cluster-iam-roles',
+                             '--cluster-identifier',
+                             self.cluster_identifier,
+                             '--add-iam-roles',
+                             self.iam_role_name]
+    vm_util.IssueCommand(cmd)
+
+
+class Spectrum(redshift.Redshift):
+  """Object representing a Spectrum cluster.
+
+  Attributes:
+    cluster_id: ID of the cluster.
+    project: ID of the project.
+  """
+
+  SERVICE_TYPE = 'spectrum'
+
+  def __init__(self, edw_service_spec):
+    super(Spectrum, self).__init__(edw_service_spec)
+    # Cluster setup attributes
+    self.iam_role = edw_service_spec.iam_role
+
+  def _IsReady(self):
+    """Method to return if the cluster is ready to handle queries."""
+    return self._IsClusterReady() and self._IsSnapshotRestored()
+
+  def _IsClusterReady(self):
+    """Method to return if the cluster is ready."""
+    stdout, _, _ = self.__DescribeCluster()
+    return json.loads(stdout)['Clusters'][0]['ClusterStatus'] in READY_STATUSES
+
+  def __DescribeCluster(self):
+    """Describe a spectrum cluster."""
+    cmd = self.cmd_prefix + ['redshift', 'describe-clusters',
+                             '--cluster-identifier', self.cluster_identifier]
+    return vm_util.IssueCommand(cmd)
+
+  def _IsSnapshotRestored(self):
+    """Method to return if the cluster snapshot is completed restoring."""
+    stdout, _, _, = self.__DescribeCluster()
+    return (json.loads(stdout)['Clusters'][0]['RestoreStatus']['Status'] in
+            SNAPSHOT_READY_STATUSES)
+
+  def _PostCreate(self):
+    """Perform general post create operations on the cluster.
+
+    Get the endpoint to be used for interacting with the cluster and apply
+    tags on the cluster.
+    """
+    @vm_util.Retry(poll_interval=self.POLL_INTERVAL, fuzz=0,
+                   timeout=self.READY_TIMEOUT,
+                   retryable_exceptions=(
+                       errors.Resource.RetryableCreationError,))
+    def WaitUntilReady():
+      if not self._IsReady():
+        raise errors.Resource.RetryableCreationError('Adding IAM Role')
+
+    stdout, _, _ = self.__DescribeCluster()
+    self.adding_iam_role = None
+    if self.iam_role is not None:
+      self.adding_iam_role = AddingIAMRole(self.cluster_identifier,
+                                           self.iam_role,
+                                           self.cmd_prefix)
+      WaitUntilReady()
+
+    stdout, _, _ = self.__DescribeCluster()
+    self.endpoint = json.loads(stdout)['Clusters'][0]['Endpoint']['Address']
+    account = util.GetAccount()
+    self.arn = 'arn:aws:redshift:{}:{}:cluster:{}'.format(self.region, account,
+                                                          self.
+                                                          cluster_identifier)
+    AddTags(self.arn, self.region)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/aws/util.py b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/util.py
new file mode 100644
index 0000000..7be6659
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/aws/util.py
@@ -0,0 +1,297 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for working with Amazon Web Services resources."""
+
+
+import collections
+import json
+import re
+import string
+from typing import Dict, Set
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+import six
+
+AWS_PATH = 'aws'
+AWS_PREFIX = [AWS_PATH, '--output', 'json']
+FLAGS = flags.FLAGS
+STOCKOUT_MESSAGE = ('Creation failed due to insufficient capacity indicating a '
+                    'potential stockout scenario.')
+
+
+def IsRegion(zone_or_region):
+  """Returns whether "zone_or_region" is a region."""
+  if not re.match(r'[a-z]{2}-[a-z]+-[0-9][a-z]?$', zone_or_region):
+    raise ValueError(
+        '%s is not a valid AWS zone or region name' % zone_or_region)
+  return zone_or_region[-1] in string.digits
+
+
+def GetRegionFromZone(zone_or_region: str) -> str:
+  """Returns the region a zone is in (or "zone_or_region" if it's a region)."""
+  if IsRegion(zone_or_region):
+    return zone_or_region
+  return zone_or_region[:-1]
+
+
+def GetRegionFromZones(zones):
+  """Returns the region a set of zones are in.
+
+  Args:
+    zones: A set of zones.
+  Raises:
+    Exception: if the zones are in different regions.
+  """
+  region = None
+  for zone in zones:
+    current_region = GetRegionFromZone(zone)
+    if region is None:
+      region = current_region
+    else:
+      if region != current_region:
+        raise Exception('Not All zones are in the same region %s not same as '
+                        '%s. zones: %s' %
+                        (region, current_region, ','.join(zones)))
+  return region
+
+
+def GetZonesInRegion(region: str) -> Set[str]:
+  """Returns all available zones in a given region."""
+  get_zones_cmd = AWS_PREFIX + [
+      'ec2',
+      'describe-availability-zones',
+      '--region={0}'.format(region)
+  ]
+  stdout, _, _ = vm_util.IssueCommand(get_zones_cmd)
+  response = json.loads(stdout)
+  return {
+      item['ZoneName']
+      for item in response['AvailabilityZones']
+      if item['State'] == 'available'
+  }
+
+
+def GetZonesFromMachineType() -> Set[str]:
+  """Returns all available zones for a given machine type."""
+  zones = set()
+  for region in GetAllRegions():
+    get_zones_cmd = AWS_PREFIX + [
+        'ec2', 'describe-instance-type-offerings',
+        '--location-type=availability-zone', f'--region={region}'
+    ] + AwsFilter({'instance-type': FLAGS.machine_type})
+    stdout, _, _ = vm_util.IssueCommand(get_zones_cmd)
+    response = json.loads(stdout)
+    for item in response['InstanceTypeOfferings']:
+      zones.add(item['Location'])
+  return zones
+
+
+def GetAllRegions() -> Set[str]:
+  """Returns all enabled AWS regions."""
+  get_regions_cmd = AWS_PREFIX + [
+      'ec2',
+      'describe-regions',
+  ]
+  stdout, _, _ = vm_util.IssueCommand(get_regions_cmd)
+  response = json.loads(stdout)
+  return {
+      item['RegionName']
+      for item in response['Regions']
+      if item['OptInStatus'] in ('opt-in-not-required', 'opted-in')
+  }
+
+
+def GetGeoFromRegion(region: str) -> str:
+  """Gets valid geo from the region, i.e. region us-west-1 returns us."""
+  return region.split('-')[0]
+
+
+def GetRegionsInGeo(geo: str) -> Set[str]:
+  """Gets valid regions in the geo."""
+  return {region for region in GetAllRegions() if region.startswith(geo)}
+
+
+def GetAllZones() -> Set[str]:
+  """Returns all available AWS zones."""
+  results = set()
+  for region in GetAllRegions():
+    results.update(GetZonesInRegion(region))
+  return results
+
+
+def GroupZonesIntoRegions(zones):
+  """Returns a map of regions to zones."""
+  regions_to_zones_map = collections.defaultdict(set)
+  for zone in zones:
+    region = GetRegionFromZone(zone)
+    regions_to_zones_map[region].add(zone)
+  return regions_to_zones_map
+
+
+def FormatTags(tags_dict):
+  """Format a dict of tags into arguments for 'tag' parameter.
+
+  Args:
+    tags_dict: Tags to be formatted.
+
+  Returns:
+    A list of tags formatted as arguments for 'tag' parameter.
+  """
+  return [
+      'Key=%s,Value=%s' % (k, v) for k, v in sorted(six.iteritems(tags_dict))
+  ]
+
+
+def FormatTagSpecifications(resource_type, tags_dict):
+  """Format a dict of tags into arguments for 'tag-specifications' parameter.
+
+  Args:
+    resource_type: resource type to be tagged.
+    tags_dict: Tags to be formatted.
+
+  Returns:
+    A list of tags formatted as arguments for 'tag-specifications' parameter.
+  """
+  tags = ','.join('{Key=%s,Value=%s}' %
+                  (k, v) for k, v in six.iteritems(tags_dict))
+  return 'ResourceType=%s,Tags=[%s]' % (resource_type, tags)
+
+
+def AddTags(resource_id, region, **kwargs):
+  """Adds tags to an AWS resource created by PerfKitBenchmarker.
+
+  Args:
+    resource_id: An extant AWS resource to operate on.
+    region: The AWS region 'resource_id' was created in.
+    **kwargs: dict. Key-value pairs to set on the instance.
+  """
+  if not kwargs:
+    return
+
+  tag_cmd = AWS_PREFIX + [
+      'ec2',
+      'create-tags',
+      '--region=%s' % region,
+      '--resources', resource_id,
+      '--tags'] + FormatTags(kwargs)
+  IssueRetryableCommand(tag_cmd)
+
+
+def MakeDefaultTags(timeout_minutes=None):
+  """Default tags for an AWS resource created by PerfKitBenchmarker.
+
+  Args:
+    timeout_minutes: Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    Dict of default tags, contributed from the benchmark spec.
+  """
+  benchmark_spec = context.GetThreadBenchmarkSpec()
+  if not benchmark_spec:
+    return {}
+  return benchmark_spec.GetResourceTags(timeout_minutes=timeout_minutes)
+
+
+def MakeFormattedDefaultTags(timeout_minutes=None):
+  """Get the default tags formatted correctly for --tags parameter."""
+  return FormatTags(MakeDefaultTags(timeout_minutes=timeout_minutes))
+
+
+def AddDefaultTags(resource_id, region):
+  """Adds tags to an AWS resource created by PerfKitBenchmarker.
+
+  By default, resources are tagged with "owner" and "perfkitbenchmarker-run"
+  key-value
+  pairs.
+
+  Args:
+    resource_id: An extant AWS resource to operate on.
+    region: The AWS region 'resource_id' was created in.
+  """
+  tags = MakeDefaultTags()
+  AddTags(resource_id, region, **tags)
+
+
+def _GetCallerId() -> Dict[str, str]:
+  cmd = AWS_PREFIX + ['sts', 'get-caller-identity']
+  stdout, _, _ = vm_util.IssueCommand(cmd)
+  return json.loads(stdout)
+
+
+def GetAccount() -> str:
+  """Retrieve details about the current IAM identity.
+
+  http://docs.aws.amazon.com/cli/latest/reference/sts/get-caller-identity.html
+
+  Returns:
+    A string of the AWS account ID number of the account that owns or contains
+    the calling entity.
+  """
+  return _GetCallerId()['Account']
+
+
+def GetCallerArn() -> str:
+  """Retrieve the ARN of the AWS credentials used."""
+  return _GetCallerId()['Arn']
+
+
+@vm_util.Retry()
+def IssueRetryableCommand(cmd, env=None, suppress_failure=None):
+  """Tries running the provided command until it succeeds or times out.
+
+  On Windows, the AWS CLI doesn't correctly set the return code when it
+  has an error (at least on version 1.7.28). By retrying the command if
+  we get output on stderr, we can work around this issue.
+
+  Args:
+    cmd: A list of strings such as is given to the subprocess.Popen()
+        constructor.
+    env: An alternate environment to pass to the Popen command.
+    suppress_failure: A function to pass to vm_util.IssueCommand()
+
+  Returns:
+    A tuple of stdout and stderr from running the provided command.
+  """
+  stdout, stderr, retcode = vm_util.IssueCommand(
+      cmd, env=env, raise_on_failure=False, suppress_failure=suppress_failure)
+  if retcode:
+    raise errors.VmUtil.CalledProcessException(
+        'Command returned a non-zero exit code.\n')
+  if stderr:
+    raise errors.VmUtil.CalledProcessException(
+        'The command had output on stderr:\n%s' % stderr)
+  return stdout, stderr
+
+
+def AwsFilter(filter_keys_and_values):
+  """Returns a list suitable for an AWS command line filter.
+
+  Example:
+    AwsFilter({'a': 'b', 'c': 'd'}) returns a three element array:
+    ['--filters', 'Name=a,Values=b', 'Name=c,Values=d']
+
+  For an example see
+  https://docs.aws.amazon.com/cli/latest/reference/ec2/describe-instances.html#options
+
+  Args:
+    filter_keys_and_values: A dict with the key as the name of the AWS attribute
+      and the value is the value of that attribute
+  """
+  filters = ['--filters']
+  for name, value in sorted(filter_keys_and_values.items()):
+    filters.append('Name={},Values={}'.format(name, value))
+  return filters
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/__init__.py
new file mode 100644
index 0000000..bf3b09c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/__init__.py
@@ -0,0 +1,16 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provider for Azure."""
+
+AZURE_PATH = 'az'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_blob_storage.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_blob_storage.py
new file mode 100644
index 0000000..81806ac
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_blob_storage.py
@@ -0,0 +1,263 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains classes/functions related to Azure Blob Storage."""
+
+import datetime
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import object_storage_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+
+FLAGS = flags.FLAGS
+
+DEFAULT_AZURE_REGION = 'eastus2'
+
+
+class AzureBlobStorageService(object_storage_service.ObjectStorageService):
+  """Interface to Azure Blob Storage.
+
+  Relevant documentation:
+  http://azure.microsoft.com/en-us/documentation/articles/xplat-cli/
+  """
+
+  def __init__(self):
+    self.storage_account = None
+    self.resource_group = None
+
+  STORAGE_NAME = providers.AZURE
+
+  def PrepareService(self,
+                     region,
+                     existing_storage_account_and_resource_group=None,
+                     try_to_create_storage_account_and_resource_group=False):
+    """See base class (without additional args).
+
+    TODO(deitz): We should use the same interface across the clouds without
+    additional arguments.
+
+    Args:
+      region: where to place our data.
+      existing_storage_account_and_resource_group: An existing storage account
+        and resource group for reading objects that may have already been
+        created.
+      try_to_create_storage_account_and_resource_group: Whether to try to create
+        the storage account and resource group in case it does not exist yet.
+        This supports invoking the object_storage_service_benchmark multiple
+        times on the same bucket name and creating the resource group the first
+        time. While this defaults to False, if there is no existing storage
+        account and resource group passed to this function via
+        existing_storage_account_and_resource_group, then one will be created.
+    """
+    # abs is "Azure Blob Storage"
+    prefix = 'pkb%sabs' % FLAGS.run_uri
+
+    # Maybe extract existing storage account and resource group names
+    existing_storage_account, existing_resource_group = None, None
+    if existing_storage_account_and_resource_group:
+      existing_storage_account, existing_resource_group = (
+          existing_storage_account_and_resource_group)
+      assert existing_storage_account is not None
+      assert existing_resource_group is not None
+    else:
+      # We don't have an existing storage account or resource group so we better
+      # create one.
+      try_to_create_storage_account_and_resource_group = True
+    storage_account_name = existing_storage_account or prefix + 'storage'
+    resource_group_name = existing_resource_group or prefix + '-resource-group'
+
+    # If we have an existing storage account and resource, we typically would
+    # not try to create it. If try_to_create_storage_account_and_resource_group
+    # is True, however, then we do try to create it. In this case, we shouldn't
+    # raise on a failure since it may already exist.
+    raise_on_create_failure = not (
+        existing_storage_account_and_resource_group and
+        try_to_create_storage_account_and_resource_group)
+
+    # We use a separate resource group so that our buckets can optionally stick
+    # around after PKB runs. This is useful for things like cold reads tests
+    self.resource_group = azure_network.AzureResourceGroup(
+        resource_group_name,
+        use_existing=not try_to_create_storage_account_and_resource_group,
+        timeout_minutes=max(FLAGS.timeout_minutes,
+                            FLAGS.persistent_timeout_minutes),
+        raise_on_create_failure=raise_on_create_failure)
+    self.resource_group.Create()
+
+    # We use a different Azure storage account than the VM account
+    # because a) we need to be able to set the storage class
+    # separately, including using a blob-specific storage account and
+    # b) this account might be in a different location than any
+    # VM-related account.
+    self.storage_account = azure_network.AzureStorageAccount(
+        FLAGS.azure_storage_type,
+        region or DEFAULT_AZURE_REGION,
+        storage_account_name,
+        kind=FLAGS.azure_blob_account_kind,
+        resource_group=self.resource_group,
+        use_existing=not try_to_create_storage_account_and_resource_group,
+        raise_on_create_failure=raise_on_create_failure)
+    self.storage_account.Create()
+
+  def CleanupService(self):
+    if hasattr(self, 'storage_account') and self.storage_account:
+      self.storage_account.Delete()
+    if hasattr(self, 'resource_group') and self.resource_group:
+      self.resource_group.Delete()
+
+  def MakeBucket(self, bucket, raise_on_failure=True):
+    _, stderr, ret_code = vm_util.IssueCommand(
+        [azure.AZURE_PATH, 'storage', 'container', 'create', '--name', bucket] +
+        self.storage_account.connection_args,
+        raise_on_failure=False)
+    if ret_code and raise_on_failure:
+      raise errors.Benchmarks.BucketCreationError(stderr)
+
+  def DeleteBucket(self, bucket):
+    if (not hasattr(self, 'storage_account') or
+        not self.storage_account or
+        not hasattr(self.storage_account, 'connection_args') or
+        not self.storage_account.connection_args):
+      logging.warning(
+          'storage_account not properly configured. Skipping DeleteBucket %s',
+          bucket)
+      return
+
+    vm_util.IssueCommand(
+        [azure.AZURE_PATH, 'storage', 'container', 'delete', '--name', bucket] +
+        self.storage_account.connection_args,
+        raise_on_failure=False)
+
+  def Copy(self, src_url, dst_url, recursive=False):
+    """See base class."""
+    raise NotImplementedError()
+
+  def CopyToBucket(self, src_path, bucket, object_path):
+    vm_util.IssueCommand(['az', 'storage', 'blob', 'upload',
+                          '--account-name', self.storage_account.name,
+                          '--file', src_path,
+                          '--container', bucket,
+                          '--name', object_path] +
+                         self.storage_account.connection_args)
+
+  def _GenerateDownloadToken(self, bucket, object_path):
+    blob_store_expiry = datetime.datetime.utcnow() + datetime.timedelta(
+        days=365)
+    stdout, _, _ = vm_util.IssueCommand([
+        'az', 'storage', 'blob', 'generate-sas',
+        '--account-name', self.storage_account.name,
+        '--container-name', bucket,
+        '--name', object_path,
+        '--expiry', blob_store_expiry.strftime('%Y-%m-%dT%H:%M:%SZ'),
+        '--permissions', 'r'
+    ] + self.storage_account.connection_args)
+    token = stdout.strip('\n').strip('"')
+    return token
+
+  def MakeRemoteCliDownloadUrl(self, bucket, object_path):
+    """See base class."""
+    token = self._GenerateDownloadToken(bucket, object_path)
+    url = 'https://{acc}.blob.core.windows.net/{con}/{src}?{tkn}'.format(
+        acc=self.storage_account.name,
+        con=bucket,
+        src=object_path,
+        tkn=token)
+    return url
+
+  def GenerateCliDownloadFileCommand(self, src_url, dst_url):
+    """See base class."""
+    return 'wget -O {dst_url} "{src_url}"'.format(src_url=src_url,
+                                                  dst_url=dst_url)
+
+  def List(self, bucket):
+    """See base class."""
+    stdout, _, _ = vm_util.IssueCommand([
+        'az', 'storage', 'blob', 'list', '--container-name', bucket,
+        '--account-name', self.storage_account.name
+    ])
+    return [metadata['name'] for metadata in json.loads(str(stdout))]
+
+  def ListTopLevelSubfolders(self, bucket):
+    """Lists the top level folders (not files) in a bucket.
+
+    Each listed item is a full file name, eg. "supplier/supplier.csv", so just
+    the high level folder name is extracted, and repetitions are eliminated for
+    when there's multiple files in a folder.
+
+    Args:
+      bucket: Name of the bucket to list the top level subfolders of.
+
+    Returns:
+      A list of top level subfolder names. Can be empty if there are no folders.
+    """
+    unique_folders = set([
+        obj.split('/')[0].strip()
+        for obj in self.List(bucket)
+        if obj and obj.contains('/')
+    ])
+    return list(unique_folders)
+
+  def EmptyBucket(self, bucket):
+    # Emptying buckets on Azure is hard. We pass for now - this will
+    # increase our use of storage space, but should not affect the
+    # benchmark results.
+    pass
+
+  def PrepareVM(self, vm):
+    vm.Install('azure_cli')
+    vm.Install('azure_sdk')
+    vm.Install('azure_credentials')
+
+  def CLIUploadDirectory(self, vm, directory, file_names, bucket):
+    return vm.RemoteCommand(
+        ('time for file in {files}; '
+         'do azure storage blob upload -q {directory}/$file {bucket} '
+         '--connection-string {connection_string}; '
+         'done').format(
+             files=' '.join(file_names),
+             directory=directory,
+             bucket=bucket,
+             connection_string=self.storage_account.connection_string))
+
+  def CLIDownloadBucket(self, vm, bucket, objects, dest):
+    return vm.RemoteCommand(
+        ('time for object in {objects}; '
+         'do azure storage blob download {bucket} $object {dest} '
+         '--connection-string {connection_string}; '
+         'done').format(
+             objects=' '.join(objects),
+             bucket=bucket,
+             dest=dest,
+             connection_string=self.storage_account.connection_string))
+
+  def Metadata(self, vm):
+    return {
+        'azure_lib_version': linux_packages.GetPipPackageVersion(vm, 'azure')
+    }
+
+  def APIScriptArgs(self):
+    return [
+        '--azure_account=%s' % self.storage_account.name,
+        '--azure_key=%s' % self.storage_account.key
+    ]
+
+  @classmethod
+  def APIScriptFiles(cls):
+    return ['azure_service.py']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_container_instances.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_container_instances.py
new file mode 100644
index 0000000..79a691f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_container_instances.py
@@ -0,0 +1,171 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains classes/functions related to Azure Container Instances.
+
+For now, these only support benchmarks that don't make use of the container's
+private ip since they can't be connected to vnets.
+"""
+
+import json
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import context
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import util
+
+FLAGS = flags.FLAGS
+
+
+class AciContainer(container_service.BaseContainer):
+  """Class representing an ACI container."""
+
+  def __init__(self, container_spec, name, resource_group):
+    super(AciContainer, self).__init__(container_spec)
+    self.name = name
+    self.resource_group = resource_group
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    self.registry = benchmark_spec.container_registry
+
+  def _Create(self):
+    """Creates the container."""
+    create_cmd = [
+        azure.AZURE_PATH,
+        'container',
+        'create',
+        '--name',
+        self.name,
+        '--image',
+        self.image,
+        '--restart-policy',
+        'Never',
+        '--cpu',
+        str(int(self.cpus)),
+        '--memory',
+        '%0.1f' % (self.memory / 1024.0),
+    ] + self.resource_group.args
+    if self.registry and self.registry.CLOUD == providers.AZURE:
+      create_cmd.extend([
+          '--registry-login-server',
+          self.registry.login_server,
+          '--registry-username',
+          self.registry.service_principal.app_id,
+          '--registry-password',
+          self.registry.service_principal.password,
+      ])
+    if self.command:
+      # Note that this is inconsistent with other containers which use lists
+      # of command/args. This creates some differences mostly when
+      # the command contains quotes.
+      create_cmd.extend(['--command-line', ' '.join(self.command)])
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """Deletes the container."""
+    delete_cmd = [
+        azure.AZURE_PATH,
+        'container',
+        'delete',
+        '--name',
+        self.name,
+        '--yes',
+    ] + self.resource_group.args
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  @property
+  def ip_address(self):
+    """Container instances don't have private ips yet."""
+    raise NotImplementedError('ACI containers don\'t have private ips.')
+
+  @ip_address.setter
+  def ip_address(self, value):
+    """Sets the containers ip_address."""
+    self.__ip_address = value
+
+  def _GetContainerInstance(self):
+    """Gets a representation of the container and returns it."""
+    show_cmd = [azure.AZURE_PATH, 'container', 'show', '--name', self.name
+                ] + self.resource_group.args
+    stdout, _, _ = vm_util.IssueCommand(show_cmd)
+    return json.loads(stdout)
+
+  def _IsReady(self):
+    """Returns true if the container has stopped pending."""
+    state = self._GetContainerInstance()['instanceView']['state']
+    return state != 'Pending'
+
+  def WaitForExit(self, timeout=None):
+    """Waits until the container has finished running."""
+
+    @vm_util.Retry(
+        timeout=timeout,
+        retryable_exceptions=(container_service.RetriableContainerException,))
+    def _WaitForExit():
+      container = self._GetContainerInstance()['containers'][0]
+      state = container['instanceView']['currentState']['state']
+      if state != 'Terminated':
+        raise container_service.RetriableContainerException(
+            f'Container in ({state}). Not yet in expected state Terminated.')
+      return container
+
+    return _WaitForExit()
+
+  def GetLogs(self):
+    """Returns the logs from the container."""
+    logs_cmd = [azure.AZURE_PATH, 'container', 'logs', '--name', self.name
+                ] + self.resource_group.args
+    stdout, _, _ = vm_util.IssueCommand(logs_cmd)
+    return stdout
+
+
+class AciCluster(container_service.BaseContainerCluster):
+  """Class that can deploy ACI containers."""
+
+  CLOUD = providers.AZURE
+  CLUSTER_TYPE = 'aci'
+
+  def __init__(self, cluster_spec):
+    super(AciCluster, self).__init__(cluster_spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.resource_group = azure_network.GetResourceGroup(self.region)
+
+  def _Create(self):
+    """ACI has no cluster."""
+    pass
+
+  def _Delete(self):
+    """ACI has no cluster."""
+    pass
+
+  def _CreateDependencies(self):
+    """Creates the resource group."""
+    self.resource_group.Create()
+
+  def _DeleteDependencies(self):
+    """Deletes the resource group."""
+    self.resource_group.Delete()
+
+  def DeployContainer(self, base_name, container_spec):
+    """Deploys Containers according to the ContainerSpec."""
+    name = base_name + str(len(self.containers[base_name]))
+    container = AciContainer(container_spec, name, self.resource_group)
+    self.containers[base_name].append(container)
+    container.Create()
+
+  def DeployContainerService(self, name, container_spec):
+    """Deploys a ContainerSerivice according to the ContainerSpec."""
+    raise NotImplementedError()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_disk.py
new file mode 100644
index 0000000..9ae8945
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_disk.py
@@ -0,0 +1,278 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to Azure disks.
+
+Disks can be created, deleted, attached to VMs, and detached from VMs.
+At this time, Azure only supports one disk type, so the disk spec's disk type
+is ignored.
+See http://msdn.microsoft.com/en-us/library/azure/dn790303.aspx for more
+information about azure disks.
+"""
+
+
+import itertools
+import json
+import re
+import threading
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import flags as azure_flags
+from perfkitbenchmarker.providers.azure import util
+from six.moves import range
+
+FLAGS = flags.FLAGS
+
+MAX_DRIVE_SUFFIX_LENGTH = 2  # Last allowable device is /dev/sdzz.
+
+PREMIUM_STORAGE = 'Premium_LRS'
+STANDARD_DISK = 'Standard_LRS'
+ULTRA_STORAGE = 'UltraSSD_LRS'
+
+DISK_TYPE = {disk.STANDARD: STANDARD_DISK, disk.REMOTE_SSD: PREMIUM_STORAGE}
+
+HOST_CACHING = 'host_caching'
+
+AZURE = 'Azure'
+disk.RegisterDiskTypeMap(AZURE, DISK_TYPE)
+
+AZURE_REPLICATION_MAP = {
+    azure_flags.LRS: disk.ZONE,
+    azure_flags.ZRS: disk.REGION,
+    # Deliberately omitting PLRS, because that is set explicty in __init__,
+    # and (RA)GRS, because those are asynchronously replicated.
+}
+
+LOCAL_SSD_PREFIXES = {'Standard_D', 'Standard_G', 'Standard_L'}
+
+AZURE_NVME_TYPES = [
+    r'(Standard_L[0-9]+s_v2)',
+]
+
+# https://docs.microsoft.com/en-us/azure/virtual-machines/azure-vms-no-temp-disk
+# D/Ev4 and D/E(i)sv4 VMs do not have tmp/OS disk; Dv3, Dsv3, and Ddv4 VMs do.
+# Same for *v5, including Milan machines.
+AZURE_NO_TMP_DISK_TYPES = [
+    r'(Standard_D[0-9]+s?_v4)',
+    r'(Standard_E[0-9]+i?s?_v4)',
+    r'(Standard_D[0-9]+s?_v5)',
+    r'(Standard_E[0-9]+i?s?_v5)',
+    r'(Standard_D[0-9]+as?_v5)',
+    r'(Standard_E[0-9]+as?_v5)',
+
+    r'(Standard_D[0-9]+ps_v5)',
+    r'(Standard_D[0-9]+pls_v5)',
+    r'(Standard_E[0-9]+ps_v5)',
+]
+
+
+def _ProductWithIncreasingLength(iterable, max_length):
+  """Yields increasing length cartesian products of iterable."""
+  for length in range(1, max_length + 1):
+    for p in itertools.product(iterable, repeat=length):
+      yield p
+
+
+def _GenerateDrivePathSuffixes(machine_type):
+  """Yields drive path suffix strings.
+
+  Drive path suffixes in the form 'a', 'b', 'c', 'd', ..., 'z', 'aa', 'ab', etc.
+  Note: the os-disk will be /dev/sda, and the temporary disk will be /dev/sdb:
+  https://docs.microsoft.com/en-us/azure/virtual-machines/linux/faq#can-i-use-the-temporary-disk-devsdb1-to-store-data
+  Some newer VMs (e.g. Dsv4 VMs) do not have temporary disks.
+
+  The linux kernel code that determines this naming can be found here:
+  https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/drivers/scsi/sd.c?h=v2.6.37#n2262
+
+  Quoting the link from above:
+  SCSI disk names starts at sda. The 26th device is sdz and the 27th is sdaa.
+  The last one for two lettered suffix is sdzz which is followed by sdaaa.
+  """
+  character_range = range(ord('a'), ord('z') + 1)
+  products = _ProductWithIncreasingLength(
+      character_range, MAX_DRIVE_SUFFIX_LENGTH)
+
+  for p in products:
+    yield ''.join(chr(c) for c in p)
+
+
+class TooManyAzureDisksError(Exception):
+  """Exception raised when too many disks are attached."""
+  pass
+
+
+def LocalDiskIsSSD(machine_type):
+  """Check whether the local disk is an SSD drive."""
+
+  return any((machine_type.startswith(prefix) for prefix in LOCAL_SSD_PREFIXES))
+
+
+def LocalDriveIsNvme(machine_type):
+  """Check if the machine type uses NVMe driver."""
+  return any(
+      re.search(machine_series, machine_type)
+      for machine_series in AZURE_NVME_TYPES)
+
+
+def HasTempDrive(machine_type):
+  """Check if the machine type has the temp drive (sdb)."""
+  return not any(
+      re.search(machine_series, machine_type)
+      for machine_series in AZURE_NO_TMP_DISK_TYPES)
+
+
+class AzureDisk(disk.BaseDisk):
+  """Object representing an Azure Disk."""
+
+  _lock = threading.Lock()
+
+  def __init__(self,
+               disk_spec,
+               vm,
+               lun,
+               is_image=False):
+    super(AzureDisk, self).__init__(disk_spec)
+    self.host_caching = FLAGS.azure_host_caching
+    self.vm = vm
+    self.vm_name = vm.name
+    self.name = self.vm_name + str(lun)
+    self.resource_group = azure_network.GetResourceGroup()
+    self.storage_account = vm.storage_account
+    # lun is Azure's abbreviation for "logical unit number"
+    self.lun = lun
+    self.is_image = is_image
+    self._deleted = False
+    self.machine_type = vm.machine_type
+    if self.disk_type == PREMIUM_STORAGE:
+      self.metadata.update({
+          disk.MEDIA: disk.SSD,
+          disk.REPLICATION: disk.ZONE,
+          HOST_CACHING: self.host_caching,
+      })
+    elif self.disk_type == STANDARD_DISK:
+      self.metadata.update({
+          disk.MEDIA: disk.HDD,
+          disk.REPLICATION: AZURE_REPLICATION_MAP[FLAGS.azure_storage_type],
+          HOST_CACHING: self.host_caching,
+      })
+    elif self.disk_type == disk.LOCAL:
+      media = disk.SSD if LocalDiskIsSSD(self.machine_type) else disk.HDD
+
+      self.metadata.update({
+          disk.MEDIA: media,
+          disk.REPLICATION: disk.NONE,
+      })
+
+  def _Create(self):
+    """Creates the disk."""
+    assert not self.is_image
+
+    if self.disk_type == ULTRA_STORAGE and not self.vm.availability_zone:
+      raise Exception(f'Azure Ultradisk is being created in zone "{self.zone}"'
+                      'which was not specified to have an availability zone. '
+                      'Availability zones are specified with zone-\\d  e.g. '
+                      'eastus1-2 for availability zone 2 in zone eastus1')
+    with self._lock:
+      _, _, retcode = vm_util.IssueCommand([
+          azure.AZURE_PATH, 'vm', 'disk', 'attach', '--new', '--caching',
+          self.host_caching, '--name', self.name, '--lun',
+          str(self.lun), '--sku', self.disk_type, '--vm-name', self.vm_name,
+          '--size-gb',
+          str(self.disk_size)
+      ] + self.resource_group.args, raise_on_failure=False, timeout=600)
+
+      if retcode:
+        raise errors.Resource.RetryableCreationError(
+            'Error creating Azure disk.')
+
+      _, _, retcode = vm_util.IssueCommand([
+          azure.AZURE_PATH, 'disk', 'update', '--name', self.name, '--set',
+          util.GetTagsJson(self.resource_group.timeout_minutes)
+      ] + self.resource_group.args, raise_on_failure=False)
+
+      if retcode:
+        raise errors.Resource.RetryableCreationError(
+            'Error tagging Azure disk.')
+
+      if (self.disk_type == ULTRA_STORAGE and (
+          FLAGS.azure_provisioned_iops or FLAGS.azure_provisioned_throughput)):
+        args = ([azure.AZURE_PATH, 'disk', 'update', '--name', self.name] +
+                self.resource_group.args)
+
+        if FLAGS.azure_provisioned_iops:
+          args = args + ['--disk-iops-read-write',
+                         str(FLAGS.azure_provisioned_iops)]
+        if FLAGS.azure_provisioned_throughput:
+          args = args + ['--disk-mbps-read-write',
+                         str(FLAGS.azure_provisioned_throughput)]
+
+        _, _, _ = vm_util.IssueCommand(args, raise_on_failure=True)
+
+  def _Delete(self):
+    """Deletes the disk."""
+    assert not self.is_image
+    self._deleted = True
+
+  def _Exists(self):
+    """Returns true if the disk exists."""
+    assert not self.is_image
+    if self._deleted:
+      return False
+
+    stdout, _, _ = vm_util.IssueCommand([
+        azure.AZURE_PATH, 'disk', 'show', '--output', 'json', '--name',
+        self.name
+    ] + self.resource_group.args, raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except:
+      return False
+
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The AzureVirtualMachine instance to which the disk will be attached.
+    """
+    pass  # TODO(user): Implement Attach()
+    # (not critical because disks are attached to VMs when created)
+
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    # Not needed since the resource group can be deleted
+    # without detaching disks.
+    pass
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside the VM."""
+    REMOTE_DRIVE_PATH_SUFFIXES = list(_GenerateDrivePathSuffixes(self.machine_type))
+    if self.disk_type == disk.LOCAL:
+      if LocalDriveIsNvme(self.machine_type):
+        return '/dev/nvme%sn1' % str(self.lun)
+      # Temp disk naming isn't always /dev/sdb:
+      # https://github.com/MicrosoftDocs/azure-docs/issues/54055
+      return '/dev/disk/cloud/azure_resource'
+    else:
+      try:
+        start_index = 1  # the os drive is always at index 0; skip the OS drive.
+        if HasTempDrive(self.machine_type):
+          start_index += 1
+        return '/dev/sd%s' % REMOTE_DRIVE_PATH_SUFFIXES[start_index + self.lun]
+      except IndexError:
+        raise TooManyAzureDisksError()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py
new file mode 100644
index 0000000..999d812
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_kubernetes_service.py
@@ -0,0 +1,272 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains classes/functions related to Azure Kubernetes Service."""
+
+import json
+from typing import List
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import service_principal
+from perfkitbenchmarker.providers.azure import util
+
+FLAGS = flags.FLAGS
+
+
+class AzureContainerRegistry(container_service.BaseContainerRegistry):
+  """Class for building and storing container images on Azure."""
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, registry_spec):
+    super(AzureContainerRegistry, self).__init__(registry_spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.resource_group = azure_network.GetResourceGroup(self.region)
+    self.login_server = None
+    self.sku = 'Basic'
+    self._deleted = False
+    self.acr_id = None
+    self.service_principal = service_principal.ServicePrincipal.GetInstance()
+
+  def _Exists(self):
+    """Returns True if the registry exists."""
+    if self._deleted:
+      return False
+    stdout, _, _ = vm_util.IssueCommand([
+        azure.AZURE_PATH, 'acr', 'show', '--name', self.name,
+    ], suppress_warning=True, raise_on_failure=False)
+    try:
+      registry = json.loads(stdout)
+      self.login_server = registry['loginServer']
+      self.acr_id = registry['id']
+      return True
+    except ValueError:
+      return False
+
+  def _Create(self):
+    """Creates the registry."""
+    if self._Exists():
+      return
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'acr', 'create',
+        '--name', self.name,
+        '--sku', self.sku
+    ] + self.resource_group.args)
+
+  def _Delete(self):
+    """Deletes the registry."""
+    # This will be deleted along with the resource group
+    self._deleted = True
+
+  def _PostCreate(self):
+    """Allow the service principle to read from the repository."""
+    # If we bootstrapped our own credentials into the AKS cluster it already,
+    # has read permission, because it created the repo.
+    if not FLAGS.bootstrap_azure_service_principal:
+      create_role_assignment_cmd = [
+          azure.AZURE_PATH, 'role', 'assignment', 'create',
+          '--assignee', self.service_principal.app_id,
+          '--role', 'Reader',
+          '--scope', self.acr_id,
+      ]
+      vm_util.IssueRetryableCommand(create_role_assignment_cmd)
+
+  def _CreateDependencies(self):
+    """Creates the resource group."""
+    self.resource_group.Create()
+    self.service_principal.Create()
+
+  def _DeleteDependencies(self):
+    """Deletes the resource group."""
+    self.service_principal.Delete()
+
+  def Login(self):
+    """Logs in to the registry."""
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'acr', 'login',
+        '--name', self.name,
+    ])
+
+  def GetFullRegistryTag(self, image):
+    """Gets the full tag of the image."""
+    full_tag = '{login_server}/{name}'.format(
+        login_server=self.login_server, name=image)
+    return full_tag
+
+
+class AksCluster(container_service.KubernetesCluster):
+  """Class representing an Azure Kubernetes Service cluster."""
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, spec):
+    """Initializes the cluster."""
+    super(AksCluster, self).__init__(spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.resource_group = azure_network.GetResourceGroup(self.region)
+    self.node_resource_group = None
+    self.name = 'pkbcluster%s' % FLAGS.run_uri
+    # TODO(pclay): replace with built in service principal once I figure out how
+    # to make it work with ACR
+    self.service_principal = service_principal.ServicePrincipal.GetInstance()
+    self.cluster_version = FLAGS.container_cluster_version
+    self._deleted = False
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cluster.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = super(AksCluster, self).GetResourceMetadata()
+    result['boot_disk_type'] = self.vm_config.os_disk.disk_type
+    result['boot_disk_size'] = self.vm_config.os_disk.disk_size
+    return result
+
+  def _Create(self):
+    """Creates the AKS cluster."""
+    cmd = [
+        azure.AZURE_PATH, 'aks', 'create',
+        '--name', self.name,
+        '--location', self.region,
+        '--ssh-key-value', vm_util.GetPublicKeyPath(),
+        '--service-principal', self.service_principal.app_id,
+        # TODO(pclay): avoid logging client secret
+        '--client-secret',
+        self.service_principal.password,
+        '--nodepool-name',
+        container_service.DEFAULT_NODEPOOL,
+        '--nodepool-labels',
+        f'pkb_nodepool={container_service.DEFAULT_NODEPOOL}',
+    ] + self._GetNodeFlags(self.num_nodes, self.vm_config)
+
+    # TODO(pclay): expose quota and capacity errors
+    # Creating an AKS cluster with a fresh service principal usually fails due
+    # to a race condition. Active Directory knows the service principal exists,
+    # but AKS does not. (https://github.com/Azure/azure-cli/issues/9585)
+    # Use 5 min timeout on service principle retry. cmd will fail fast.
+    vm_util.Retry(timeout=300)(vm_util.IssueCommand)(
+        cmd,
+        # Half hour timeout on creating the cluster.
+        timeout=1800)
+
+    for name, node_pool in self.nodepools.items():
+      self._CreateNodePool(name, node_pool)
+
+  def _CreateNodePool(self, name: str, node_pool):
+    """Creates a node pool."""
+    cmd = [
+        azure.AZURE_PATH, 'aks', 'nodepool', 'add',
+        '--cluster-name', self.name,
+        '--name', name,
+        '--labels', f'pkb_nodepool={name}',
+    ] + self._GetNodeFlags(node_pool.num_nodes, node_pool.vm_config)
+    vm_util.IssueCommand(cmd, timeout=600)
+
+  def _GetNodeFlags(self, num_nodes: int, vm_config) -> List[str]:
+    """Common flags for create and nodepools add."""
+    args = [
+        '--node-vm-size', vm_config.machine_type,
+        '--node-count', str(num_nodes),
+    ] + self.resource_group.args
+    if self.vm_config.zone and self.vm_config.zone != self.region:
+      zones = ' '.join(zone[-1] for zone in self.vm_config.zone.split(','))
+      args += ['--zones', zones]
+    if self.vm_config.os_disk and self.vm_config.os_disk.disk_size:
+      args += ['--node-osdisk-size', str(self.vm_config.os_disk.disk_size)]
+    if self.cluster_version:
+      args += ['--kubernetes-version', self.cluster_version]
+    return args
+
+  def _Exists(self):
+    """Returns True if the cluster exists."""
+    if self._deleted:
+      return False
+    stdout, _, _ = vm_util.IssueCommand([
+        azure.AZURE_PATH, 'aks', 'show', '--name', self.name,
+    ] + self.resource_group.args, raise_on_failure=False)
+    try:
+      cluster = json.loads(stdout)
+      self.node_resource_group = cluster['nodeResourceGroup']
+      return True
+    except ValueError:
+      return False
+
+  def _Delete(self):
+    """Deletes the AKS cluster."""
+    # Do not call super._Delete() as it will try to delete containers and the
+    # cluster may have already been deleted by deleting a corresponding
+    # AzureContainerRegistry. The registry deletes the shared resource group.
+    #
+    # Normally only azure networks manage resource groups because,
+    # PKB assumes all benchmarks use VMs and VMs always depend on networks.
+    # However container benchmarks do not provision networks and
+    # directly manage their own resource groups. However ContainerClusters and
+    # ContainerRegistries can be used independently so they must both directly
+    # mangage the undlerlying resource group. This is indempotent, but can cause
+    # AKS clusters to have been deleted before calling _Delete().
+    #
+    # If it has not yet been deleted it will be deleted along with the resource
+    # group.
+    self._deleted = True
+
+  def _PostCreate(self):
+    """Tags the cluster resource group."""
+    super(AksCluster, self)._PostCreate()
+    set_tags_cmd = [
+        azure.AZURE_PATH, 'group', 'update', '-g', self.node_resource_group,
+        '--set', util.GetTagsJson(self.resource_group.timeout_minutes)
+    ]
+    vm_util.IssueCommand(set_tags_cmd)
+
+  def _IsReady(self):
+    """Returns True if the cluster is ready."""
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'aks', 'get-credentials',
+        '--admin',
+        '--name', self.name,
+        '--file', FLAGS.kubeconfig,
+    ] + self.resource_group.args, suppress_warning=True)
+    version_cmd = [FLAGS.kubectl, '--kubeconfig', FLAGS.kubeconfig, 'version']
+    _, _, retcode = vm_util.IssueCommand(version_cmd, suppress_warning=True,
+                                         raise_on_failure=False)
+    if retcode:
+      return False
+    # POD creation will fail until the default service account is created.
+    get_cmd = [
+        FLAGS.kubectl, '--kubeconfig', FLAGS.kubeconfig,
+        'get', 'serviceAccounts'
+    ]
+    stdout, _, _ = vm_util.IssueCommand(get_cmd)
+    return 'default' in stdout
+
+  def _CreateDependencies(self):
+    """Creates the resource group."""
+    self.resource_group.Create()
+    self.service_principal.Create()
+
+  def _DeleteDependencies(self):
+    """Deletes the resource group."""
+    self.service_principal.Delete()
+
+  def GetDefaultStorageClass(self) -> str:
+    """Get the default storage class for the provider."""
+    # https://docs.microsoft.com/en-us/azure/aks/csi-storage-drivers
+    # Premium_LRS
+    return 'managed-csi-premium'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_network.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_network.py
new file mode 100644
index 0000000..59b8082
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_network.py
@@ -0,0 +1,707 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to Azure VM networking.
+
+The Firewall class provides a way of opening VM ports. The Network class allows
+VMs to communicate via internal ips and isolates PerfKitBenchmarker VMs from
+others in
+the same project. See http://msdn.microsoft.com/library/azure/jj156007.aspx
+for more information about Azure Virtual Networks.
+"""
+
+import json
+import logging
+import threading
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import network
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_placement_group
+from perfkitbenchmarker.providers.azure import util
+
+FLAGS = flags.FLAGS
+CIDR_LIST_THRESHOLD = 14
+# sending more than 14 IP addresses or cidr blocks will cause API call failure
+SSH_PORT = 22
+
+flags.DEFINE_boolean('azure_infiniband', False,
+                     'Install Mellanox OpenFabrics drivers')
+
+DEFAULT_REGION = 'eastus2'
+
+REGION = 'region'
+ZONE = 'zone'
+
+
+def GetResourceGroup(zone=None):
+  """Get the resource group for the current benchmark."""
+  spec = context.GetThreadBenchmarkSpec()
+  # This is protected by spec.networks_lock, so there's no race
+  # condition with checking for the attribute and then creating a
+  # resource group.
+  try:
+    return spec.azure_resource_group
+  except AttributeError:
+    group = AzureResourceGroup(
+        'pkb%s-%s' % (FLAGS.run_uri, spec.uid), zone=zone)
+    spec.azure_resource_group = group
+    return group
+
+
+class AzureResourceGroup(resource.BaseResource):
+  """A Resource Group, the basic unit of Azure provisioning."""
+
+  def __init__(self,
+               name,
+               zone=None,
+               use_existing=False,
+               timeout_minutes=None,
+               raise_on_create_failure=True):
+    super(AzureResourceGroup, self).__init__()
+    self.name = name
+    self.use_existing = use_existing
+    self.timeout_minutes = timeout_minutes
+    self.raise_on_create_failure = raise_on_create_failure
+    # A resource group's region doesn't affect the region of
+    # actual resources, but we need to choose *some* region for every
+    # benchmark, even if the user doesn't specify one.
+    self.region = util.GetRegionFromZone(
+        FLAGS.zones[0] if FLAGS.zones else zone or DEFAULT_REGION)
+    # Whenever an Azure CLI command needs a resource group, it's
+    # always specified the same way.
+    self.args = ['--resource-group', self.name]
+
+  def _Create(self):
+    if not self.use_existing:
+      # A resource group can own resources in multiple zones, but the
+      # group itself needs to have a region. Therefore,
+      # FLAGS.zones[0].
+      _, _, retcode = vm_util.IssueCommand(
+          [
+              azure.AZURE_PATH, 'group', 'create', '--name', self.name,
+              '--location', self.region, '--tags'
+          ] + util.GetTags(self.timeout_minutes),
+          raise_on_failure=False)
+
+      if retcode and self.raise_on_create_failure:
+        raise errors.Resource.RetryableCreationError(
+            'Error creating Azure resource group')
+
+  def _Exists(self):
+    stdout, _, _ = vm_util.IssueCommand(
+        [azure.AZURE_PATH, 'group', 'show', '--name', self.name],
+        suppress_warning=True,
+        raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+  def _Delete(self):
+    # Ignore delete failures (potentially already deleted) and timeouts as
+    # delete should complete even if we stop waiting for the response.
+    vm_util.IssueCommand(
+        [azure.AZURE_PATH, 'group', 'delete', '--yes', '--name', self.name],
+        timeout=600,
+        raise_on_failure=False,
+        raise_on_timeout=False)
+
+  def AddTag(self, key, value):
+    """Add a single tag to an existing Resource Group.
+
+    Args:
+      key: tag key
+      value: tag value
+
+    Raises:
+      errors.resource.CreationError on failure.
+    """
+    tag_cmd = [
+        azure.AZURE_PATH, 'group', 'update', '--name', self.name, '--set',
+        'tags.' + util.FormatTag(key, value)
+    ]
+    _, _, retcode = vm_util.IssueCommand(tag_cmd, raise_on_failure=False)
+    if retcode:
+      raise errors.Resource.CreationError('Error tagging Azure resource group.')
+
+
+class AzureStorageAccount(resource.BaseResource):
+  """Object representing an Azure Storage Account."""
+
+  total_storage_accounts = 0
+
+  def __init__(self,
+               storage_type,
+               region,
+               name,
+               kind=None,
+               access_tier=None,
+               resource_group=None,
+               use_existing=False,
+               raise_on_create_failure=True):
+    super(AzureStorageAccount, self).__init__()
+    self.storage_type = storage_type
+    self.name = name
+    self.resource_group = resource_group or GetResourceGroup()
+    self.region = region
+    self.kind = kind or 'Storage'
+    self.use_existing = use_existing
+    self.raise_on_create_failure = raise_on_create_failure
+
+    AzureStorageAccount.total_storage_accounts += 1
+
+    if kind == 'BlobStorage':
+      self.access_tier = access_tier or 'Hot'
+    else:
+      # Access tiers are only valid for blob storage accounts.
+      assert access_tier is None
+      self.access_tier = access_tier
+
+  def _Create(self):
+    """Creates the storage account."""
+    if not self.use_existing:
+      create_cmd = [
+          azure.AZURE_PATH, 'storage', 'account', 'create', '--kind', self.kind,
+          '--sku', self.storage_type, '--name', self.name,
+          '--min-tls-version', 'TLS1_2',
+          '--allow-blob-public-access', 'false', '--tags'
+      ] + util.GetTags(
+          self.resource_group.timeout_minutes) + self.resource_group.args
+      if self.region:
+        create_cmd.extend(['--location', self.region])
+      if self.kind == 'BlobStorage':
+        create_cmd.extend(['--access-tier', self.access_tier])
+      if FLAGS.min_tls_version:
+                create_cmd.extend(['--min-tls-version', FLAGS.min_tls_version])
+      vm_util.IssueCommand(
+          create_cmd, raise_on_failure=self.raise_on_create_failure)
+
+  def _PostCreate(self):
+    """Get our connection string and our keys."""
+    self.connection_string = util.GetAzureStorageConnectionString(
+        self.name, self.resource_group.args)
+    self.connection_args = ['--connection-string', self.connection_string]
+    self.key = util.GetAzureStorageAccountKey(self.name,
+                                              self.resource_group.args)
+
+  def _Delete(self):
+    """Deletes the storage account."""
+    delete_cmd = [
+        azure.AZURE_PATH, 'storage', 'account', 'delete', '--name', self.name,
+        '--yes'
+    ] + self.resource_group.args
+    vm_util.IssueCommand(delete_cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the storage account exists."""
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'storage', 'account', 'show', '--output', 'json',
+            '--name', self.name
+        ] + self.resource_group.args,
+        suppress_warning=True,
+        raise_on_failure=False)
+
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+
+class AzureVirtualNetwork(network.BaseNetwork):
+  """Object representing an Azure Virtual Network.
+
+  The benchmark spec contains one instance of this class per region, which an
+  AzureNetwork may retrieve or create via AzureVirtualNetwork.GetForRegion.
+
+  Attributes:
+    name: string. Name of the virtual network.
+    resource_group: Resource Group instance that network belongs to.
+    region: string. Azure region of the network.
+  """
+
+  # Initializes an address space for a new AzureVirtualNetwork
+  _regional_network_count = 0
+  vnet_lock = threading.Lock()
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, spec, region, name, number_subnets):
+    super(AzureVirtualNetwork, self).__init__(spec)
+    self.name = name
+    self.resource_group = GetResourceGroup()
+    self.region = region
+    self.args = ['--vnet-name', self.name]
+    self.address_index = 0
+    self.regional_index = AzureVirtualNetwork._regional_network_count
+    self.address_spaces = []
+    for zone_num in range(number_subnets):
+      self.address_spaces.append(
+          network.GetCidrBlock(self.regional_index, zone_num))
+    self.is_created = False
+
+  @classmethod
+  def GetForRegion(cls, spec, region, name, number_subnets=1):
+    """Retrieves or creates an AzureVirtualNetwork.
+
+    Args:
+      spec: BaseNetworkSpec. Spec for Azure Network.
+      region: string. Azure region name.
+      name: string. Azure Network Name.
+      number_subnets: int. Optional. Number of subnets that network will
+        contain.
+
+    Returns:
+      AzureVirtualNetwork. If an AzureVirtualNetwork for the same region already
+      exists in the benchmark spec, that instance is returned. Otherwise, a new
+      AzureVirtualNetwork is created and returned.
+    """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetNetwork called in a thread without a '
+                         'BenchmarkSpec.')
+    key = cls.CLOUD, REGION, region
+    # Because this method is only called from the AzureNetwork constructor,
+    # which is only called from AzureNetwork.GetNetwork, we already hold the
+    # benchmark_spec.networks_lock.
+    number_subnets = max(number_subnets, len(FLAGS.zones))
+    if key not in benchmark_spec.regional_networks:
+      benchmark_spec.regional_networks[key] = cls(spec, region, name,
+                                                  number_subnets)
+      AzureVirtualNetwork._regional_network_count += 1
+    return benchmark_spec.regional_networks[key]
+
+  def GetNextAddressSpace(self):
+    """Returns the next available address space for next subnet."""
+    with self.vnet_lock:
+      assert self.address_index < len(
+          self.address_spaces), 'Only allocated {} addresses'.format(
+              len(self.address_spaces))
+      next_address_space = self.address_spaces[self.address_index]
+      self.address_index += 1
+      return next_address_space
+
+  def Create(self):
+    """Creates the virtual network."""
+    with self.vnet_lock:
+      if self.is_created:
+        return
+
+      logging.info('Creating %d Azure subnets in %s', len(self.address_spaces),
+                   self.region)
+      vm_util.IssueRetryableCommand([
+          azure.AZURE_PATH, 'network', 'vnet', 'create', '--location', self
+          .region, '--name', self.name, '--address-prefixes'
+      ] + self.address_spaces + self.resource_group.args)
+
+      self.is_created = True
+
+  def Delete(self):
+    """Deletes the virtual network."""
+    pass
+
+  @vm_util.Retry()
+  def Exists(self):
+    """Returns true if the virtual network exists."""
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'network', 'vnet', 'show', '--output', 'json',
+            '--name', self.name
+        ] + self.resource_group.args,
+        suppress_warning=True,
+        raise_on_failure=False)
+
+    return bool(json.loads(stdout))
+
+
+class AzureSubnet(resource.BaseResource):
+  """Object representing an Azure Subnet."""
+
+  def __init__(self, vnet, name):
+    super(AzureSubnet, self).__init__()
+    self.resource_group = GetResourceGroup()
+    self.vnet = vnet
+    self.name = name
+    self.args = ['--subnet', self.name]
+    self.address_space = None
+
+  def _Create(self):
+    # Avoids getting additional address space when create retries.
+    if not self.address_space:
+      self.address_space = self.vnet.GetNextAddressSpace()
+
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'network', 'vnet', 'subnet', 'create', '--vnet-name',
+        self.vnet.name, '--address-prefix', self.address_space, '--name',
+        self.name
+    ] + self.resource_group.args)
+
+  @vm_util.Retry()
+  def _Exists(self):
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'network', 'vnet', 'subnet', 'show',
+            '--vnet-name', self.vnet.name, '--output', 'json', '--name',
+            self.name
+        ] + self.resource_group.args,
+        raise_on_failure=False)
+
+    return bool(json.loads(stdout))
+
+  def _Delete(self):
+    pass
+
+
+class AzureNetworkSecurityGroup(resource.BaseResource):
+  """Object representing an Azure Network Security Group."""
+
+  def __init__(self, region, subnet, name):
+    super(AzureNetworkSecurityGroup, self).__init__()
+
+    self.region = region
+    self.subnet = subnet
+    self.name = name
+    self.resource_group = GetResourceGroup()
+    self.args = ['--nsg', self.name]
+
+    self.rules_lock = threading.Lock()
+    # Mapping of (start_port, end_port, source) -> rule name, used to
+    # deduplicate rules. We expect duplicate rules because PKB will
+    # call AllowPort() for each VM on a subnet, but the rules are
+    # actually applied to the entire subnet.
+    self.rules = {}
+    # True if the special 'DenyAll' rule is present.
+    self.have_deny_all_rule = False
+
+  def _Create(self):
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'network', 'nsg', 'create', '--location',
+        self.region, '--name', self.name
+    ] + self.resource_group.args)
+
+  @vm_util.Retry()
+  def _Exists(self):
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'network', 'nsg', 'show', '--output', 'json',
+            '--name', self.name
+        ] + self.resource_group.args,
+        raise_on_failure=False)
+
+    return bool(json.loads(stdout))
+
+  def _Delete(self):
+    pass
+
+  def _GetRulePriority(self, rule, rule_name):
+    # Azure priorities are between 100 and 4096, but we reserve 4095
+    # for the special DenyAll rule created by DisallowAllPorts.
+    rule_priority = 100 + len(self.rules)
+    if rule_priority >= 4095:
+      raise ValueError('Too many firewall rules!')
+    self.rules[rule] = rule_name
+    return rule_priority
+
+  def AttachToSubnet(self):
+    vm_util.IssueRetryableCommand([
+        azure.AZURE_PATH, 'network', 'vnet', 'subnet', 'update', '--name',
+        self.subnet.name, '--network-security-group', self.name
+    ] + self.resource_group.args + self.subnet.vnet.args)
+
+  def _ReturnSubString(self, str_ori, list_bound):
+    list_ori = str_ori.split(",")
+    list_sub, list_remain = self._ReturnSubList(list_ori, list_bound)
+
+    # convert them back to string
+    separator = ","
+    str_sub = separator.join(list_sub)
+    str_remain = separator.join(list_remain)
+    return str_sub, str_remain
+
+  def _ReturnSubList(self, list_orig, list_bound):
+    list_sublist = list_orig[0:list_bound]
+    list_remain = list_orig[list_bound:]
+    return list_sublist, list_remain
+
+  def _CreateAndIssueCommand(self, start_port, end_port, source_range, source_range_str, iteration):
+    rule = (start_port, end_port, source_range_str)
+    with self.rules_lock:
+      if rule in self.rules:
+        return
+      port_range = '%s-%s' % (start_port, end_port)
+      rule_name = 'allow-%s-intel-public-cidrs-%s' % (port_range, iteration)
+      rule_priority = self._GetRulePriority(rule, rule_name)
+
+    network_cmd = [
+        azure.AZURE_PATH, 'network', 'nsg', 'rule', 'create', '--name',
+        rule_name, '--destination-port-range', port_range, '--access', 'Allow',
+        '--priority',
+        str(rule_priority)
+    ] + ['--source-address-prefixes'] + source_range
+    network_cmd.extend(self.resource_group.args + self.args)
+    vm_util.IssueRetryableCommand(network_cmd)
+
+  def AllowPort(self, vm, start_port, end_port=None, source_range=None):
+    """Open a port or port range.
+
+    Args:
+      vm: the virtual machine to open the port for.
+      start_port: either a single port or the start of a range.
+      end_port: if given, the end of the port range.
+      source_range: List of source CIDRs to allow for this port. If None, all
+        sources are allowed. i.e. ['0.0.0.0/0']
+
+    Raises:
+      ValueError: when there are too many firewall rules.
+    """
+    source_range = source_range or ['0.0.0.0/0']
+    end_port = end_port or start_port
+
+    source_range.sort()
+    # Replace slashes as they are not allowed in an azure rule name.
+    source_range_str = ','.join(source_range).replace('/', '_')
+
+    list_bound = CIDR_LIST_THRESHOLD
+    iteration = 1
+    while len(source_range) > list_bound:
+      source_range, source_range_remain = self._ReturnSubList(source_range, list_bound)
+      source_range_str, source_range_str_remain = self._ReturnSubString(source_range_str, list_bound)
+      self._CreateAndIssueCommand(start_port, end_port, source_range, source_range_str, iteration)
+      source_range = source_range_remain
+      source_range_str = source_range_str_remain
+      iteration += 1
+
+    if len(source_range) > 0:
+      # do some remaining work on source_range and source_range_str
+      self._CreateAndIssueCommand(start_port, end_port, source_range, source_range_str, iteration)
+
+  def AllowIcmp(self):
+    source_address = '0.0.0.0/0'
+    # '*' in Azure represents all ports
+    rule = ('*', source_address)
+    rule_name = 'allow-icmp'
+    with self.rules_lock:
+      if rule in self.rules:
+        return
+      rule_priority = self._GetRulePriority(rule, rule_name)
+      network_cmd = [
+          azure.AZURE_PATH, 'network', 'nsg', 'rule', 'create', '--name',
+          rule_name, '--access', 'Allow', '--source-address-prefixes',
+          source_address, '--source-port-ranges', '*',
+          '--destination-port-ranges', '*', '--priority',
+          str(rule_priority), '--protocol', 'Icmp'
+      ]
+      network_cmd.extend(self.resource_group.args + self.args)
+      vm_util.IssueRetryableCommand(network_cmd)
+
+
+class AzureFirewall(network.BaseFirewall):
+  """A fireall on Azure is a Network Security Group.
+
+  NSGs are per-subnet, but this class is per-provider, so we just
+  proxy methods through to the right NSG instance.
+  """
+
+  CLOUD = providers.AZURE
+
+  def AllowPort(self, vm, start_port, end_port=None, source_range=None):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      start_port: The local port to open.
+      end_port: if given, open the range [start_port, end_port].
+      source_range: unsupported at present.
+    """
+
+    vm.network.nsg.AllowPort(
+        vm, start_port, end_port=end_port, source_range=source_range)
+
+  def DisallowAllPorts(self):
+    """Closes all ports on the firewall."""
+    pass
+
+  def AllowIcmp(self, vm):
+    """Opens the ICMP protocol on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the ICMP protocol for.
+    """
+
+    vm.network.nsg.AllowIcmp()
+
+
+class AzureNetwork(network.BaseNetwork):
+  """Locational network components.
+
+  A container object holding all of the network-related objects that
+  we need for an Azure zone (aka region).
+  """
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, spec):
+    super(AzureNetwork, self).__init__(spec)
+    self.resource_group = GetResourceGroup()
+    self.region = util.GetRegionFromZone(self.zone)
+    self.availability_zone = util.GetAvailabilityZoneFromZone(self.zone)
+
+    placement_group_spec = azure_placement_group.AzurePlacementGroupSpec(
+        'AzurePlacementGroupSpec',
+        flag_values=FLAGS,
+        zone=self.zone,
+        resource_group=self.resource_group.name)
+
+    is_dedicated_host = bool(FLAGS.dedicated_hosts)
+    in_availability_zone = bool(self.availability_zone)
+    cluster_placement_group = (
+        FLAGS.placement_group_style == placement_group.PLACEMENT_GROUP_CLUSTER)
+    spread_placement_group = (
+        FLAGS.placement_group_style == placement_group.PLACEMENT_GROUP_SPREAD)
+
+    if cluster_placement_group:
+      self.placement_group = azure_placement_group.AzureProximityGroup(
+          placement_group_spec)
+    # With dedicated hosting and/or an availability zone, an availability set
+    # cannot be created
+    elif spread_placement_group and not (is_dedicated_host or
+                                         in_availability_zone):
+      self.placement_group = azure_placement_group.AzureAvailSet(
+          placement_group_spec)
+    else:
+      self.placement_group = None
+
+    # Storage account names can't include separator characters :(.
+    storage_account_prefix = 'pkb%s' % FLAGS.run_uri
+
+    # Storage account names must be 3-24 characters long and use
+    # numbers and lower-case letters only, which leads us to this
+    # awful naming scheme.
+    suffix = 'storage%d' % AzureStorageAccount.total_storage_accounts
+    self.storage_account = AzureStorageAccount(
+        FLAGS.azure_storage_type, self.region,
+        storage_account_prefix[:24 - len(suffix)] + suffix)
+
+    # Length restriction from https://docs.microsoft.com/en-us/azure/azure-resource-manager/management/resource-name-rules#microsoftnetwork  pylint: disable=line-too-long
+    prefix = '%s-%s' % (self.resource_group.name, self.region)
+    vnet_name = prefix + '-vnet'
+    if len(vnet_name) > 64:
+      vnet_name = prefix[:59] + '-vnet'
+    self.vnet = AzureVirtualNetwork.GetForRegion(spec, self.region, vnet_name)
+    subnet_name = self.vnet.name
+    if self.availability_zone:
+      subnet_name += '-' + self.availability_zone
+    subnet_name += '-subnet'
+    self.subnet = AzureSubnet(self.vnet, subnet_name)
+    self.nsg = AzureNetworkSecurityGroup(self.region, self.subnet,
+                                         self.subnet.name + '-nsg')
+
+  @vm_util.Retry()
+  def Create(self):
+    """Creates the network."""
+    # If the benchmark includes multiple zones,
+    # self.resource_group.Create() will be called more than once. But
+    # BaseResource will prevent us from running the underlying Azure
+    # commands more than once, so that is fine.
+    self.resource_group.Create()
+
+    if self.placement_group:
+      self.placement_group.Create()
+
+    self.storage_account.Create()
+
+    self.vnet.Create()
+
+    self.subnet.Create()
+
+    self.nsg.Create()
+    self.nsg.AttachToSubnet()
+
+  def Delete(self):
+    """Deletes the network."""
+    # If the benchmark includes multiple zones, this will be called
+    # multiple times, but there will be no bad effects from multiple
+    # deletes.
+    self.resource_group.Delete()
+
+  def Peer(self, peering_network):
+    """Peers the network with the peering_network.
+
+    This method is used for VPC peering. It will connect 2 VPCs together.
+
+    Args:
+      peering_network: BaseNetwork. The network to peer with.
+    """
+
+    # Skip Peering if the networks are the same
+    if self.vnet is peering_network.vnet:
+      return
+
+    spec = network.BaseVPCPeeringSpec(self.vnet,
+                                      peering_network.vnet)
+    self.vpc_peering = AzureVpcPeering(spec)
+    peering_network.vpc_peering = self.vpc_peering
+    self.vpc_peering.Create()
+
+  @classmethod
+  def _GetKeyFromNetworkSpec(cls, spec):
+    """Returns a key used to register Network instances."""
+    return (cls.CLOUD, ZONE, spec.zone)
+
+
+class AzureVpcPeering(network.BaseVPCPeering):
+  """Object containing all information needed to create a VPC Peering Object."""
+
+  def _Create(self):
+    """Creates the peering object."""
+    self.name = '%s-%s-%s' % (self.network_a.resource_group.name,
+                              self.network_a.region, self.network_b.region)
+
+    # Creates Peering Connection
+    create_cmd = [
+        azure.AZURE_PATH, 'network', 'vnet', 'peering', 'create',
+        '--name', self.name,
+        '--vnet-name', self.network_a.name,
+        '--remote-vnet', self.network_b.name,
+        '--allow-vnet-access'
+    ] + self.network_a.resource_group.args
+
+    vm_util.IssueRetryableCommand(create_cmd)
+
+    # Accepts Peering Connection
+    accept_cmd = [
+        azure.AZURE_PATH, 'network', 'vnet', 'peering', 'create',
+        '--name', self.name,
+        '--vnet-name', self.network_b.name,
+        '--remote-vnet', self.network_a.name,
+        '--allow-vnet-access'
+    ] + self.network_b.resource_group.args
+    vm_util.IssueRetryableCommand(accept_cmd)
+
+    logging.info('Created VPC peering between %s and %s',
+                 self.network_a.address_spaces[0],
+                 self.network_b.address_spaces[0])
+
+  def _Delete(self):
+    """Deletes the peering connection."""
+    # Gets Deleted with resource group deletion
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_placement_group.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_placement_group.py
new file mode 100644
index 0000000..1203c4d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_placement_group.py
@@ -0,0 +1,148 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent an Azure Placement Group object.
+
+Cloud specific implementations of Placement Group.
+"""
+
+import abc
+import json
+
+from absl import flags
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import util
+
+
+FLAGS = flags.FLAGS
+
+
+class AzurePlacementGroupSpec(placement_group.BasePlacementGroupSpec):
+  """Object containing the information needed to create an AzurePlacementGroup.
+
+  Attributes:
+      zone: The Azure zone the Placement Group is in.
+  """
+
+  CLOUD = providers.AZURE
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(AzurePlacementGroupSpec,
+                   cls)._GetOptionDecoderConstructions()
+    result.update({
+        'resource_group': (option_decoders.StringDecoder, {'none_ok': False}),
+        'placement_group_style': (option_decoders.EnumDecoder, {
+            'valid_values': placement_group.PLACEMENT_GROUP_OPTIONS,
+            'default': placement_group.PLACEMENT_GROUP_NONE,
+        })
+    })
+    return result
+
+
+class AzurePlacementGroup(placement_group.BasePlacementGroup):
+  """Object representing an Azure Placement Group."""
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, azure_placement_group_spec):
+    """Init method for AzurePlacementGroup.
+
+    Args:
+      azure_placement_group_spec: Object containing the
+        information needed to create an AzurePlacementGroup.
+    """
+    super(AzurePlacementGroup, self).__init__(azure_placement_group_spec)
+    self.resource_group = azure_placement_group_spec.resource_group
+    self.name = '%s-%s' % (self.resource_group, self.zone)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.strategy = azure_placement_group_spec.placement_group_style
+
+  @abc.abstractmethod
+  def AddVmArgs(self):
+    """List of arguments to add to vm creation."""
+    raise NotImplementedError()
+
+
+class AzureAvailSet(AzurePlacementGroup):
+  """Object representing an Azure Availability Set."""
+
+  def _Create(self):
+    """Create the availability set."""
+    create_cmd = [
+        azure.AZURE_PATH, 'vm', 'availability-set', 'create',
+        '--resource-group', self.resource_group, '--name', self.name
+    ]
+    if self.region:
+      create_cmd.extend(['--location', self.region])
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    pass
+
+  @vm_util.Retry()
+  def _Exists(self):
+    """Returns True if the availability set exists."""
+    show_cmd = [
+        azure.AZURE_PATH, 'vm', 'availability-set', 'show', '--output', 'json',
+        '--resource-group', self.resource_group, '--name', self.name
+    ]
+    stdout, _, _ = vm_util.IssueCommand(show_cmd, raise_on_failure=False)
+    return bool(json.loads(stdout))
+
+  def AddVmArgs(self):
+    """Returns Azure command to add VM to availability set."""
+    return ['--availability-set', self.name]
+
+
+class AzureProximityGroup(AzurePlacementGroup):
+  """Object representing an Azure Proximity Placement Group."""
+
+  def _Create(self):
+    """Create the Proximity Placement Group."""
+    create_cmd = [
+        azure.AZURE_PATH, 'ppg', 'create',
+        '--resource-group', self.resource_group, '--name', self.name
+    ]
+    if self.region:
+      create_cmd.extend(['--location', self.region])
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    pass
+
+  @vm_util.Retry()
+  def _Exists(self):
+    """Returns True if the Proximity Placement Group exists."""
+    show_cmd = [
+        azure.AZURE_PATH, 'ppg', 'show', '--output', 'json',
+        '--resource-group', self.resource_group, '--name', self.name
+    ]
+    stdout, _, _ = vm_util.IssueCommand(show_cmd, raise_on_failure=False)
+    return bool(json.loads(stdout))
+
+  def AddVmArgs(self):
+    """Returns Azure command to add VM to placement group."""
+    return ['--ppg', self.name]
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_redis_cache.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_redis_cache.py
new file mode 100644
index 0000000..10cfb2e
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_redis_cache.py
@@ -0,0 +1,190 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes for Azure Redis Cache.
+"""
+
+import json
+import time
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+
+FLAGS = flags.FLAGS
+# 15min timeout for issuing az redis delete command.
+TIMEOUT = 900
+EXISTS_RETRY_TIMES = 3
+EXISTS_RETRY_POLL = 30
+
+
+class AzureRedisCache(managed_memory_store.BaseManagedMemoryStore):
+  """Object representing an Azure Redis Cache."""
+
+  CLOUD = providers.AZURE
+  MEMORY_STORE = managed_memory_store.REDIS
+
+  # Azure redis could take up to an hour to create
+  READY_TIMEOUT = 60 * 60  # 60 minutes
+
+  def __init__(self, spec):
+    super(AzureRedisCache, self).__init__(spec)
+    self.redis_region = FLAGS.cloud_redis_region
+    self.resource_group = azure_network.GetResourceGroup(self.redis_region)
+    self.azure_redis_size = FLAGS.azure_redis_size
+    self.failover_style = FLAGS.redis_failover_style
+    if self.failover_style == managed_memory_store.Failover.FAILOVER_SAME_REGION:
+      self.azure_tier = 'Premium'
+    else:
+      self.azure_tier = 'Basic'
+    self.redis_version = None
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cache.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = {
+        'cloud_redis_failover_style':
+            self.failover_style,
+        'cloud_redis_region':
+            self.redis_region,
+        'cloud_redis_azure_tier':
+            self.azure_tier,
+        'cloud_redis_azure_redis_size':
+            self.azure_redis_size,
+        'cloud_redis_version':
+            managed_memory_store.ParseReadableVersion(self.redis_version),
+    }
+    return result
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    """Check benchmark prerequisites on the input flag parameters.
+
+    Args:
+      benchmark_config: Unused.
+
+    Raises:
+      errors.Config.InvalidValue: Input flag parameters are invalid.
+    """
+    if FLAGS.managed_memory_store_version:
+      raise errors.Config.InvalidValue(
+          'Custom Redis version not supported on Azure Redis. ')
+    if FLAGS.redis_failover_style in [
+        managed_memory_store.Failover.FAILOVER_SAME_ZONE]:
+      raise errors.Config.InvalidValue(
+          'Azure redis with failover in the same zone is not supported.')
+
+  def _Create(self):
+    """Creates the cache."""
+    cmd = [
+        azure.AZURE_PATH, 'redis', 'create',
+        '--resource-group', self.resource_group.name,
+        '--location', self.redis_region,
+        '--name', self.name,
+        '--sku', self.azure_tier,
+        '--vm-size', self.azure_redis_size,
+        '--enable-non-ssl-port',
+    ]
+    vm_util.IssueCommand(cmd, timeout=TIMEOUT)
+
+  def _Delete(self):
+    """Deletes the cache."""
+    cmd = [
+        azure.AZURE_PATH, 'redis', 'delete',
+        '--resource-group', self.resource_group.name,
+        '--name', self.name,
+        '--yes',
+    ]
+    vm_util.IssueCommand(cmd, timeout=TIMEOUT)
+
+  def DescribeCache(self):
+    """Calls show on the cache to get information about it.
+
+    Returns:
+      stdout, stderr and retcode.
+    """
+    stdout, stderr, retcode = vm_util.IssueCommand([
+        azure.AZURE_PATH, 'redis', 'show',
+        '--resource-group', self.resource_group.name,
+        '--name', self.name,
+    ], raise_on_failure=False)
+    return stdout, stderr, retcode
+
+  def _Exists(self):
+    """Returns True if the cache exists.
+
+    Returns:
+      True if cache exists and false otherwise.
+    """
+    # Retry to ensure there is no transient error in describe cache
+    for _ in range(EXISTS_RETRY_TIMES):
+      _, _, retcode = self.DescribeCache()
+
+      if retcode == 0:
+        return True
+      time.sleep(EXISTS_RETRY_POLL)
+    return retcode == 0
+
+  def _IsReady(self):
+    """Returns True if the cache is ready.
+
+    Returns:
+      True if cache is ready and false otherwise.
+    """
+    stdout, _, retcode = self.DescribeCache()
+    if (retcode == 0 and
+        json.loads(stdout).get('provisioningState', None) == 'Succeeded'):
+      self.redis_version = json.loads(stdout).get('redisVersion', 'unspecified')
+      return True
+    return False
+
+  def GetMemoryStorePassword(self):
+    """See base class."""
+    if not self._password:
+      self._PopulateEndpoint()
+    return self._password
+
+  @vm_util.Retry(max_retries=5)
+  def _PopulateEndpoint(self):
+    """Populates endpoint information for the instance.
+
+    Raises:
+      errors.Resource.RetryableGetError:
+      Failed to retrieve information on cache.
+    """
+    stdout, _, retcode = self.DescribeCache()
+    if retcode != 0:
+      raise errors.Resource.RetryableGetError(
+          'Failed to retrieve information on %s.', self.name)
+    response = json.loads(stdout)
+    self._ip = response['hostName']
+    self._port = response['port']
+
+    stdout, _, retcode = vm_util.IssueCommand([
+        azure.AZURE_PATH, 'redis', 'list-keys',
+        '--resource-group', self.resource_group.name,
+        '--name', self.name,
+    ], raise_on_failure=False)
+    if retcode != 0:
+      raise errors.Resource.RetryableGetError(
+          'Failed to retrieve information on %s.', self.name)
+    response = json.loads(stdout)
+    self._password = response['primaryKey']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_relational_db.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_relational_db.py
new file mode 100644
index 0000000..b60d07b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_relational_db.py
@@ -0,0 +1,573 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Relational database provisioning and teardown for Azure RDS."""
+
+import datetime
+import json
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import sql_engine_utils
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import util
+
+DEFAULT_DATABASE_NAME = 'database'
+
+FLAGS = flags.FLAGS
+
+DEFAULT_MYSQL_VERSION = '5.7'
+DEFAULT_POSTGRES_VERSION = '9.6'
+DEFALUT_SQLSERVER_VERSION = 'DEFAULT'
+
+# Disk size configurations details at
+# https://docs.microsoft.com/en-us/cli/azure/mysql/server?view=azure-cli-latest#az_mysql_server_create
+AZURE_MIN_DB_DISK_SIZE_MB = 5120  # Minimum db disk size supported by Azure
+AZURE_MAX_DB_DISK_SIZE_MB = 16777216  # Maximum db disk size supported by Azure
+
+IS_READY_TIMEOUT = 60 * 60 * 1  # 1 hour (might take some time to prepare)
+
+# Longest time recorded is 20 minutes when
+# creating STANDARD_D64_V3 - 12/02/2020
+# The Azure command timeout with the following error message:
+#
+# Deployment failed. Correlation ID: fcdc3c76-33cc-4eb1-986c-fbc30ce7d820.
+# The operation timed out and automatically rolled back.
+# Please retry the operation.
+CREATE_AZURE_DB_TIMEOUT = 60 * 30
+
+
+class AzureRelationalDb(relational_db.BaseRelationalDb):
+  """An object representing an Azure RDS relational database.
+
+  Currently Postgres is supported. This class requires that a
+  client vm be available as an attribute on the instance before Create() is
+  called, which is the current behavior of PKB. This is necessary to setup the
+  networking correctly. The following steps are performed to provision the
+  database:
+    1. create the RDS instance in the requested region.
+
+  Instructions from:
+  https://docs.microsoft.com/en-us/azure/postgresql/quickstart-create-server-database-azure-cli
+
+  On teardown, all resources are deleted.
+
+  Note that the client VM's region and the region requested for the database
+  must be the same.
+
+  """
+  CLOUD = providers.AZURE
+
+  database_name: str
+
+  def __init__(self, relational_db_spec):
+    super(AzureRelationalDb, self).__init__(relational_db_spec)
+    if util.IsZone(self.spec.db_spec.zone):
+      raise errors.Config.InvalidValue(
+          'Availability zones are currently not supported by Azure DBs')
+    self.region = util.GetRegionFromZone(self.spec.db_spec.zone)
+    self.resource_group = azure_network.GetResourceGroup(self.region)
+
+    self.unmanaged_db_exists = None if self.is_managed_db else False
+
+  def GetResourceMetadata(self):
+    """Returns the metadata associated with the resource.
+
+    All keys will be prefaced with relational_db before
+    being published (done in publisher.py).
+
+    Returns:
+      metadata: dict of Azure  DB metadata.
+
+    """
+    metadata = super(AzureRelationalDb, self).GetResourceMetadata()
+    metadata.update({
+        'zone': self.spec.db_spec.zone,
+    })
+
+    if hasattr(self.spec.db_disk_spec, 'iops'):
+      metadata.update({
+          'disk_iops': self.spec.db_disk_spec.iops,
+      })
+
+    return metadata
+
+  @staticmethod
+  def GetDefaultEngineVersion(engine):
+    """Returns the default version of a given database engine.
+
+    Args:
+      engine (string): type of database (my_sql or postgres).
+    Returns:
+      (string): Default engine version.
+    Raises:
+      RelationalDbEngineNotFoundError: if an unknown engine is
+                                                  requested.
+    """
+    if engine == sql_engine_utils.POSTGRES:
+      return DEFAULT_POSTGRES_VERSION
+    elif engine == sql_engine_utils.MYSQL:
+      return DEFAULT_MYSQL_VERSION
+    elif engine == sql_engine_utils.SQLSERVER:
+      return DEFALUT_SQLSERVER_VERSION
+    else:
+      raise relational_db.RelationalDbEngineNotFoundError(
+          'Unsupported engine {0}'.format(engine))
+
+  def GetAzCommandForEngine(self):
+    engine = self.spec.engine
+    if engine == sql_engine_utils.POSTGRES:
+      return 'postgres'
+    elif engine == sql_engine_utils.MYSQL:
+      return 'mysql'
+    elif engine == sql_engine_utils.SQLSERVER:
+      return 'sql'
+    raise relational_db.RelationalDbEngineNotFoundError(
+        'Unsupported engine {0}'.format(engine))
+
+  def GetConfigFromMachineType(self, machine_type):
+    """Returns a tuple of (edition, family, vcore) from Azure machine type.
+
+    Args:
+     machine_type (string): Azure machine type i.e GP_Gen5_4
+    Returns:
+      (string, string, string): edition, family, vcore
+    Raises:
+      UnsupportedError: if the machine type is not supported.
+    """
+    machine_type = machine_type.split('_')
+    if len(machine_type) != 3:
+      raise relational_db.UnsupportedError(
+          'Unsupported machine type {0},'
+          ' sample machine type GP_Gen5_2'.format(machine_type))
+    edition = machine_type[0]
+    if edition == 'BC':
+      edition = 'BusinessCritical'
+    elif edition == 'GP':
+      edition = 'GeneralPurpose'
+    else:
+      raise relational_db.UnsupportedError(
+          'Unsupported edition {}. Only supports BC or GP'.format(machine_type))
+
+    family = machine_type[1]
+    vcore = machine_type[2]
+    return (edition, family, vcore)
+
+  def SetDbConfiguration(self, name, value):
+    """Set configuration for the database instance.
+
+    Args:
+        name: string, the name of the settings to change
+        value: value, string the value to set
+    """
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'configuration',
+        'set',
+        '--name',
+        name,
+        '--value',
+        value,
+        '--resource-group',
+        self.resource_group.name,
+        '--server',
+        self.instance_id
+    ]
+    vm_util.IssueCommand(cmd)
+
+  def RenameDatabase(self, new_name):
+    """Renames an the database instace."""
+    engine = self.spec.engine
+    if engine == sql_engine_utils.SQLSERVER:
+      cmd = [
+          azure.AZURE_PATH,
+          self.GetAzCommandForEngine(),
+          'db',
+          'rename',
+          '--resource-group',
+          self.resource_group.name,
+          '--server',
+          self.instance_id,
+          '--name',
+          self.database_name,
+          '--new-name',
+          new_name
+      ]
+      vm_util.IssueCommand(cmd)
+      self.database_name = new_name
+    else:
+      raise relational_db.RelationalDbEngineNotFoundError(
+          'Unsupported engine {0}'.format(engine))
+
+  def _ApplyManagedDbFlags(self):
+    """Applies the MySqlFlags to a managed instance."""
+    for flag in FLAGS.db_flags:
+      name_and_value = flag.split('=')
+      cmd = [
+          azure.AZURE_PATH,
+          self.GetAzCommandForEngine(), 'server', 'configuration', 'set',
+          '--name', name_and_value[0], '--resource-group',
+          self.resource_group.name, '--server', self.instance_id, '--value',
+          name_and_value[1]
+      ]
+      _, stderr, _ = vm_util.IssueCommand(cmd, raise_on_failure=False)
+      if stderr:
+        raise Exception('Invalid MySQL flags: {0}.  Error {1}'.format(
+            name_and_value, stderr))
+
+    self._Reboot()
+
+  def _CreateMySqlOrPostgresInstance(self):
+    """Creates a managed MySql or Postgres instance."""
+    if not self.spec.high_availability:
+      raise Exception('Azure databases can only be used in high '
+                      'availability. Please rerurn with flag '
+                      '--managed_db_high_availability=True')
+
+    # Valid storage sizes range from minimum of 5120 MB
+    # and additional increments of 1024 MB up to maximum of 16777216 MB.
+    azure_disk_size_mb = self.spec.db_disk_spec.disk_size * 1024
+    if azure_disk_size_mb > AZURE_MAX_DB_DISK_SIZE_MB:
+      error_msg = ('Azure disk size was specified as in the disk spec as %s,'
+                   'got rounded to %s which is greater than the '
+                   'maximum of 16777216 MB' % (
+                       self.spec.db_disk_spec.disk_size, azure_disk_size_mb))
+      raise errors.Config.InvalidValue(error_msg)
+
+    elif azure_disk_size_mb < AZURE_MIN_DB_DISK_SIZE_MB:
+      error_msg = ('Azure disk size was specified '
+                   'as in the disk spec as %s, got rounded to %s '
+                   'which is smaller than the minimum of 5120 MB' % (
+                       self.spec.db_disk_spec.disk_size, azure_disk_size_mb))
+      raise errors.Config.InvalidValue(error_msg)
+
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'create',
+        '--resource-group',
+        self.resource_group.name,
+        '--name',
+        self.instance_id,
+        '--location',
+        self.region,
+        '--admin-user',
+        self.spec.database_username,
+        '--admin-password',
+        self.spec.database_password,
+        '--storage-size',
+        str(azure_disk_size_mb),
+        '--sku-name',
+        self.spec.db_spec.machine_type,
+        '--version',
+        self.spec.engine_version,
+    ]
+
+    vm_util.IssueCommand(cmd, timeout=CREATE_AZURE_DB_TIMEOUT)
+
+  def _CreateSqlServerInstance(self):
+    """Creates a managed sql server instance."""
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'create',
+        '--resource-group',
+        self.resource_group.name,
+        '--name',
+        self.instance_id,
+        '--location',
+        self.region,
+        '--admin-user',
+        self.spec.database_username,
+        '--admin-password',
+        self.spec.database_password
+    ]
+    vm_util.IssueCommand(cmd)
+
+    # Azure support two ways of specifying machine type DTU or with vcores
+    # if compute units is specified we will use the DTU model
+    if self.spec.db_spec.compute_units is not None:
+      # Supported families & capacities for 'Standard' are:
+      # [(None, 10), (None, 20), (None, 50), (None, 100), (None, 200),
+      # (None, 400), (None, 800), (None, 1600), (None, 3000)]
+
+      # Supported families & capacities for 'Premium' are:
+      # [(None, 125), (None, 250), (None, 500), (None, 1000), (None, 1750),
+      #  (None, 4000)].
+
+      cmd = [
+          azure.AZURE_PATH,
+          self.GetAzCommandForEngine(),
+          'db',
+          'create',
+          '--resource-group',
+          self.resource_group.name,
+          '--server',
+          self.instance_id,
+          '--name',
+          DEFAULT_DATABASE_NAME,
+          '--edition',
+          self.spec.db_spec.tier,
+          '--capacity',
+          str(self.spec.db_spec.compute_units),
+          '--zone-redundant',
+          'true' if self.spec.high_availability else 'false'
+      ]
+    else:
+      # Sample machine_type: GP_Gen5_2
+      edition, family, vcore = (
+          self.GetConfigFromMachineType(self.spec.db_spec.machine_type))
+      cmd = [
+          azure.AZURE_PATH,
+          self.GetAzCommandForEngine(),
+          'db',
+          'create',
+          '--resource-group',
+          self.resource_group.name,
+          '--server',
+          self.instance_id,
+          '--name',
+          DEFAULT_DATABASE_NAME,
+          '--edition',
+          edition,
+          '--family',
+          family,
+          '--capacity',
+          vcore,
+          '--zone-redundant',
+          'true' if self.spec.high_availability else 'false'
+      ]
+    vm_util.IssueCommand(cmd)
+    self.database_name = DEFAULT_DATABASE_NAME
+
+  def _CreateAzureManagedSqlInstance(self):
+    """Creates an Azure Sql Instance from a managed service."""
+    if self.spec.engine == sql_engine_utils.POSTGRES:
+      self._CreateMySqlOrPostgresInstance()
+    elif self.spec.engine == sql_engine_utils.MYSQL:
+      self._CreateMySqlOrPostgresInstance()
+    elif self.spec.engine == sql_engine_utils.SQLSERVER:
+      self._CreateSqlServerInstance()
+    else:
+      raise NotImplementedError('Unknown how to create Azure data base '
+                                'engine {0}'.format(self.spec.engine))
+
+  def _CreateAzureUnmanagedSqlInstance(self):
+    """Creates an Azure Sql Instance hosted inside of a VM."""
+    self.endpoint = self.server_vm.ip_address
+    self._SetupUnmanagedDatabase()
+    self.firewall = azure_network.AzureFirewall()
+    self.firewall.AllowPort(
+        self.server_vm,
+        self.port,
+        source_range=['%s/32' % self.client_vm.ip_address])
+
+  def _Create(self):
+    """Creates the Azure RDS instance.
+
+    Raises:
+      NotImplementedError: if unknown how to create self.spec.engine.
+      Exception: if attempting to create a non high availability database.
+
+    """
+    if self.is_managed_db:
+      self._CreateAzureManagedSqlInstance()
+    else:
+      self.unmanaged_db_exists = True
+      self._CreateAzureUnmanagedSqlInstance()
+
+  def _Delete(self):
+    """Deletes the underlying resource.
+
+    Implementations of this method should be idempotent since it may
+    be called multiple times, even if the resource has already been
+    deleted.
+    """
+    if not self.is_managed_db:
+      if hasattr(self, 'firewall'):
+        self.firewall.DisallowAllPorts()
+      self.unmanaged_db_exists = False
+      self.PrintUnmanagedDbStats()
+      return
+
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'delete',
+        '--resource-group', self.resource_group.name,
+        '--name', self.instance_id,
+        '--yes'
+    ]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the underlying resource exists.
+
+    Supplying this method is optional. If it is not implemented then the
+    default is to assume success when _Create and _Delete do not raise
+    exceptions.
+    """
+    if not self.is_managed_db:
+      return self.unmanaged_db_exists
+
+    json_server_show = self._AzServerShow()
+    if json_server_show is None:
+      return False
+    return True
+
+  def _IsReady(self, timeout=IS_READY_TIMEOUT):
+    """Return true if the underlying resource is ready.
+
+    This method will query the instance every 5 seconds until
+    its instance state is 'available', or until a timeout occurs.
+
+    Args:
+      timeout: timeout in seconds
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out
+        or an Exception occurred.
+    """
+
+    return self._IsInstanceReady(timeout)
+
+  def _PostCreate(self):
+    """Perform general post create operations on the cluster.
+
+    """
+    super()._PostCreate()
+
+    if self.is_managed_db:
+      cmd = [
+          azure.AZURE_PATH,
+          self.GetAzCommandForEngine(),
+          'server',
+          'firewall-rule',
+          'create',
+          '--resource-group', self.resource_group.name,
+          '--server', self.instance_id,
+          '--name', 'AllowAllIps',
+          '--start-ip-address', '0.0.0.0',
+          '--end-ip-address', '255.255.255.255'
+      ]
+      vm_util.IssueCommand(cmd)
+      self._AssignEndpointForWriterInstance()
+
+      if self.spec.engine == 'mysql' or self.spec.engine == 'postgres':
+        # Azure will add @domainname after the database username
+        self.spec.database_username = (self.spec.database_username + '@' +
+                                       self.endpoint.split('.')[0])
+
+    self.client_vm_query_tools.InstallPackages()
+
+  def _Reboot(self):
+    """Reboot the managed db."""
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'restart',
+        '--resource-group', self.resource_group.name,
+        '--name', self.instance_id
+    ]
+    vm_util.IssueCommand(cmd)
+
+    if not self._IsInstanceReady():
+      raise Exception('Instance could not be set to ready after '
+                      'reboot')
+
+  def _IsInstanceReady(self, timeout=IS_READY_TIMEOUT):
+    """Return true if the instance is ready.
+
+    This method will query the instance every 5 seconds until
+    its instance state is 'Ready', or until a timeout occurs.
+
+    Args:
+      timeout: timeout in seconds
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out
+        or an Exception occurred.
+    """
+    if not self.is_managed_db:
+      return self._IsReadyUnmanaged()
+
+    start_time = datetime.datetime.now()
+
+    while True:
+      if (datetime.datetime.now() - start_time).seconds >= timeout:
+        logging.warning('Timeout waiting for sql instance to be ready')
+        return False
+
+      server_show_json = self._AzServerShow()
+      if server_show_json is not None:
+        engine = self.spec.engine
+        if engine == sql_engine_utils.POSTGRES:
+          state = server_show_json['userVisibleState']
+        elif engine == sql_engine_utils.MYSQL:
+          state = server_show_json['userVisibleState']
+        elif engine == sql_engine_utils.SQLSERVER:
+          state = server_show_json['state']
+        else:
+          raise relational_db.RelationalDbEngineNotFoundError(
+              'The db engine does not contain a valid state')
+
+        if state == 'Ready':
+          break
+      time.sleep(5)
+
+    return True
+
+  def _AzServerShow(self):
+    """Runs the azure command az server show.
+
+    Returns:
+      json object representing holding the of the show command on success.
+      None for a non 0 retcode.  A non 0 retcode can occur if queried
+      before the database has finished being created.
+    """
+    cmd = [
+        azure.AZURE_PATH,
+        self.GetAzCommandForEngine(),
+        'server',
+        'show',
+        '--resource-group', self.resource_group.name,
+        '--name', self.instance_id
+    ]
+    stdout, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode != 0:
+      return None
+    json_output = json.loads(stdout)
+    return json_output
+
+  def _AssignEndpointForWriterInstance(self):
+    """Assigns the ports and endpoints from the instance_id to self.
+
+    These will be used to communicate with the data base
+    """
+    server_show_json = self._AzServerShow()
+    self.endpoint = server_show_json['fullyQualifiedDomainName']
+
+  def _FailoverHA(self):
+    raise NotImplementedError()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_service_bus.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_service_bus.py
new file mode 100644
index 0000000..84f9b1d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_service_bus.py
@@ -0,0 +1,187 @@
+"""Azure Service Bus interface for resources.
+
+This class handles resource creation/cleanup for messaging service benchmark
+on Azure Service Bus.
+https://docs.microsoft.com/en-us/azure/service-bus-messaging/
+"""
+
+import json
+import logging
+import os
+from typing import Any, Dict
+
+from absl import flags
+from perfkitbenchmarker import messaging_service as msgsvc
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+
+FLAGS = flags.FLAGS
+MESSAGING_SERVICE_SCRIPTS_VM_AZURE_DIR = os.path.join(
+    msgsvc.MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR, 'azure')
+MESSAGING_SERVICE_SCRIPTS_AZURE_PREFIX = 'messaging_service_scripts/azure'
+MESSAGING_SERVICE_SCRIPTS_AZURE_FILES = [
+    '__init__.py', 'azure_service_bus_client.py'
+]
+MESSAGING_SERVICE_SCRIPTS_AZURE_BIN = 'messaging_service_scripts/azure_benchmark.py'
+
+
+class AzureServiceBus(msgsvc.BaseMessagingService):
+  """Azure Service Bus Interface Class."""
+
+  CLOUD = providers.AZURE
+
+  def __init__(self):
+    super().__init__()
+    self.topic_name = 'pkb-topic-{0}'.format(FLAGS.run_uri)
+    self.subscription_name = 'pkb-subscription-{0}'.format(FLAGS.run_uri)
+    self.namespace_name = 'pkb-namespace-{0}'.format(FLAGS.run_uri)
+    self.resource_group = azure_network.GetResourceGroup()
+
+  def _Create(self):
+    """Handles provision of resources needed for Azure Service Bus benchmark."""
+    self._CreateNamespace()
+    self._CreateTopic()
+    self._CreateSubscription()
+
+  def _Exists(self):
+    return (self._NamespaceExists() and self._TopicExists() and
+            self._SubscriptionExists())
+
+  def _Delete(self):
+    self._DeleteSubscription()
+    self._DeleteTopic()
+    self._DeleteNamespace()
+
+  def _IsDeleting(self):
+    """Overrides BaseResource._IsDeleting.
+
+    Used internally while deleting to check if the deletion is still in
+    progress.
+
+    Returns:
+      A bool. True if the resource is not yet deleted, else False.
+    """
+    return (self._NamespaceExists() or self._TopicExists() or
+            self._SubscriptionExists())
+
+  def Run(self, benchmark_scenario: str, number_of_messages: str,
+          message_size: str) -> Dict[str, Any]:
+    connection_str = self._GetPrimaryConnectionString()
+    command = (f'python3 -m azure_benchmark '
+               f'--topic_name={self.topic_name} '
+               f'--subscription_name={self.subscription_name} '
+               f'--benchmark_scenario={benchmark_scenario} '
+               f'--number_of_messages={number_of_messages} '
+               f'--message_size={message_size} '
+               f'--connection_str="{connection_str}" ')
+    results = self.client_vm.RemoteCommand(command)
+    results = json.loads(results[0])
+    return results
+
+  def _InstallCloudClients(self):
+    # Install/uploads Azure specific modules/files.
+    self.client_vm.RemoteCommand(
+        'sudo pip3 install azure-servicebus', ignore_failure=False)
+
+    self._CopyFiles(MESSAGING_SERVICE_SCRIPTS_AZURE_PREFIX,
+                    MESSAGING_SERVICE_SCRIPTS_AZURE_FILES,
+                    MESSAGING_SERVICE_SCRIPTS_VM_AZURE_DIR)
+    self.client_vm.PushDataFile(MESSAGING_SERVICE_SCRIPTS_AZURE_BIN)
+
+  @property
+  def location(self):
+    return self.client_vm.zone
+
+  def _CreateTopic(self):
+    """Creates Service Bus topic."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'create', '--name',
+        self.topic_name, '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd)
+
+  def _TopicExists(self) -> bool:
+    """Checks whether Service Bus topic already exists."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'show', '--name',
+        self.topic_name, '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    return retcode == 0
+
+  def _DeleteTopic(self):
+    """Handle Service Bus topic deletion."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'delete', '--name',
+        self.topic_name, '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _CreateSubscription(self):
+    """Creates Service Bus subscription."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'subscription', 'create',
+        '--name', self.subscription_name, '--topic-name', self.topic_name,
+        '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd)
+
+  def _SubscriptionExists(self) -> bool:
+    """Checks whether Service Bus subscription already exists."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'subscription', 'show',
+        '--name', self.subscription_name, '--topic-name', self.topic_name,
+        '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    return retcode == 0
+
+  def _DeleteSubscription(self):
+    """Handle Service Bus subscription deletion."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'topic', 'subscription', 'delete',
+        '--name', self.subscription_name, '--topic-name', self.topic_name,
+        '--namespace-name', self.namespace_name
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _CreateNamespace(self):
+    """Creates an Azure Service Bus Namespace."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'namespace', 'create', '--name',
+        self.namespace_name, '--location', self.location
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd)
+
+  def _NamespaceExists(self) -> bool:
+    """Checks if our Service Bus Namespace exists."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'namespace', 'show', '--name',
+        self.namespace_name
+    ] + self.resource_group.args
+    _, _, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    return retcode == 0
+
+  def _DeleteNamespace(self):
+    """Deletes the Azure Service Bus namespace."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'namespace', 'delete', '--name',
+        self.namespace_name
+    ] + self.resource_group.args
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _GetPrimaryConnectionString(self):
+    """Gets Azure Service Bus Namespace connection string."""
+    cmd = [
+        azure.AZURE_PATH, 'servicebus', 'namespace', 'authorization-rule',
+        'keys', 'list', '--name=RootManageSharedAccessKey', '--namespace-name',
+        self.namespace_name, '--query=primaryConnectionString', '-o=tsv'
+    ] + self.resource_group.args
+    output, stderror, retcode = vm_util.IssueCommand(
+        cmd, raise_on_failure=False)
+    if retcode:
+      logging.warning(
+          'Failed to get Service Bus Namespace connection string! %s', stderror)
+    return output.strip()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_smb_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_smb_service.py
new file mode 100644
index 0000000..f6013e2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_smb_service.py
@@ -0,0 +1,162 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Azure SMB implementation.
+
+See Azure Files
+https://docs.microsoft.com/en-us/azure/storage/files/storage-files-introduction
+
+This launches an Azure Files instance and creates a mount point.  Individual
+AzureDisks will then mount the share.
+
+The AzureSmbService object is a resource.BaseResource that has two resources
+underneath it:
+1. A resource to connect to the filer.
+2. A resource to connect to the mount point on the filer.
+
+Lifecycle:
+1. Azure Files service created and blocks until it is available, as it is needed
+   to make the mount point.
+2. Issues a non-blocking call to create the mount point.  Does not block as the
+   SmbDisk will block on it being available.
+3. The SmbDisk then mounts the mount point and uses the disk like normal.
+4. On teardown the mount point is first deleted.  Blocks on that returning.
+5. The Azure Files service is then deleted.  Does not block as can take some
+   time.
+
+"""
+
+import json
+import logging
+from typing import List
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import network
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import smb_service
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import util
+
+FLAGS = flags.FLAGS
+
+
+class AzureSmbService(smb_service.BaseSmbService):
+  """An Azure SMB resource.
+
+  Creates the Azure Files file system and mount point for use with SMB clients.
+
+  See
+  https://docs.microsoft.com/en-us/azure/storage/files/storage-files-introduction
+  """
+
+  CLOUD = providers.AZURE
+  SMB_TIERS = ('Standard_LRS', 'Premium_LRS')
+  # TODO(spencerkim): Add smb tier and version to metadata
+  DEFAULT_SMB_VERSION = '3.0'
+  DEFAULT_TIER = 'Standard_LRS'
+
+  def __init__(self, disk_spec, zone):
+    super(AzureSmbService, self).__init__(disk_spec, zone)
+    self.name = 'azure-smb-fs-%s' % FLAGS.run_uri
+    self.region = util.GetRegionFromZone(self.zone)
+    self.resource_group = azure_network.GetResourceGroup(self.region)
+
+    # set during _Create()
+    self.connection_args: List[str] = None
+    self.storage_account_key: str = None
+    self.storage_account_name: str = None
+
+  @property
+  def network(self):
+    network_spec = network.BaseNetworkSpec(self.zone)
+    return azure_network.AzureNetwork.GetNetworkFromNetworkSpec(network_spec)
+
+  def GetRemoteAddress(self):
+    logging.debug('Calling GetRemoteAddress on SMB server %s', self.name)
+    if self.name is None:
+      raise errors.Resource.RetryableGetError('Filer not created')
+    return '//{storage}.file.core.windows.net/{name}'.format(
+        storage=self.storage_account_name, name=self.name)
+
+  def GetStorageAccountAndKey(self):
+    logging.debug('Calling GetStorageAccountAndKey on SMB server %s', self.name)
+    if self.name is None:
+      raise errors.Resource.RetryableGetError('Filer not created')
+    return {'user': self.storage_account_name, 'pw': self.storage_account_key}
+
+  def _Create(self):
+    """Creates an Azure Files share.
+
+    For Standard Files, see
+    https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-create-file-share#create-file-share-through-command-line-interface-cli
+    and for Premium Files, see
+    https://docs.microsoft.com/en-us/azure/storage/files/storage-how-to-create-premium-fileshare#create-a-premium-file-share-using-azure-cli
+    """
+    logging.info('Creating SMB server %s', self.name)
+    if FLAGS.smb_tier == 'Standard_LRS':
+      storage_account_number = azure_network.AzureStorageAccount.total_storage_accounts - 1
+      self.storage_account_name = 'pkb%s' % FLAGS.run_uri + 'storage' + str(
+          storage_account_number)
+    elif FLAGS.smb_tier == 'Premium_LRS':
+      storage_account_number = (
+          azure_network.AzureStorageAccount.total_storage_accounts)
+      self.storage_account_name = 'pkb%s' % FLAGS.run_uri + 'filestorage' + str(
+          storage_account_number)
+      # Premium Files uses a different storage account kind from Standard Files.
+      # See links in description for more details.
+      self.storage_account = azure_network.AzureStorageAccount(
+          storage_type='Premium_LRS',
+          region=FLAGS.zone[0] or 'westus2',
+          name=self.storage_account_name,
+          kind='FileStorage',
+          resource_group=self.resource_group,
+          use_existing=False)
+      self.storage_account.Create()
+
+    self.connection_args = util.GetAzureStorageConnectionArgs(
+        self.storage_account_name, self.resource_group.args)
+    self.storage_account_key = util.GetAzureStorageAccountKey(
+        self.storage_account_name, self.resource_group.args)
+
+    self._AzureSmbCommand('create')
+
+  def _Delete(self):
+    logging.info('Deleting SMB server %s', self.name)
+    self._AzureSmbCommand('delete')
+
+  def _Exists(self):
+    logging.debug('Calling Exists on SMB server %s', self.name)
+    return self._AzureSmbCommand('exists')['exists']
+
+  def _IsReady(self):
+    logging.debug('Calling IsReady on SMB server %s', self.name)
+    return self._Exists()
+
+  def _Describe(self):
+    logging.debug('Calling Describe on SMB server %s', self.name)
+    output = self._AzureSmbCommand('show')
+    return output
+
+  def _AzureSmbCommand(self, verb):
+    cmd = [azure.AZURE_PATH, 'storage', 'share', verb, '--output', 'json']
+    cmd += ['--name', self.name]
+    if verb == 'create':
+      cmd += ['--quota', str(FLAGS.data_disk_size)]
+    cmd += self.connection_args
+    stdout, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+    if retcode:
+      raise errors.Error('Error running command %s : %s' % (verb, stderr))
+    return json.loads(stdout)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_sql_data_warehouse.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_sql_data_warehouse.py
new file mode 100644
index 0000000..187edb4
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_sql_data_warehouse.py
@@ -0,0 +1,395 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for Azure's SQL data warehouse EDW service.
+
+Clusters can be paused and unpaused.
+"""
+
+import copy
+import json
+import os
+from typing import Dict, List, Text, Tuple
+
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+
+
+FLAGS = flags.FLAGS
+
+
+VALID_EXIST_STATUSES = ['Resuming', 'Online']
+READY_STATUSES = ['Online']
+PAUSING_STATUSES = ['Pausing']
+SYNAPSE_JDBC_JAR = 'synapse-jdbc-client-1.0.jar'
+
+
+def GetSqlDataWarehouseClientInterface(
+    server_name: str, database: str, user: str, password: str,
+    resource_group: str) -> edw_service.EdwClientInterface:
+  """Builds and Returns the requested SqlDataWarehouse client Interface.
+
+  Args:
+    server_name: Name of the SqlDataWarehouse server to use.
+    database: Name of the database to run queries against.
+    user: SqlDataWarehouse username for authentication.
+    password: SqlDataWarehouse password for authentication.
+    resource_group: Azure resource group used to whitelist the VM's IP address.
+
+  Returns:
+    A concrete Client Interface object.
+
+  Raises:
+    RuntimeError: if an unsupported sqldatawarehouse_client_interface is
+      requested.
+  """
+  if FLAGS.sqldatawarehouse_client_interface == 'CLI':
+    return CliClientInterface(server_name, database, user, password,
+                              resource_group)
+  if FLAGS.sqldatawarehouse_client_interface == 'JDBC':
+    return JdbcClientInterface(server_name, database, user, password,
+                               resource_group)
+  raise RuntimeError('Unknown SqlDataWarehouse Client Interface requested.')
+
+
+class CliClientInterface(edw_service.EdwClientInterface):
+  """Command Line Client Interface class for Azure SqlDataWarehouse.
+
+  Uses the native SqlDataWarehouse client that ships with the Azure CLI.
+  https://docs.microsoft.com/en-us/cli/azure/sql/server?view=azure-cli-latest
+
+  Attributes:
+    server_name: Name of the SqlDataWarehouse server to use.
+    database: Name of the database to run queries against.
+    user: Redshift username for authentication.
+    password: Redshift password for authentication.
+    resource_group: Azure resource group used to whitelist the VM's IP address.
+  """
+
+  def __init__(self, server_name: str, database: str, user: str, password: str,
+               resource_group: str):
+    self.server_name = server_name
+    self.database = database
+    self.user = user
+    self.password = password
+    self.resource_group = resource_group
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the sql server tool dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('pip')
+    self.client_vm.RemoteCommand('sudo pip install absl-py')
+    self.client_vm.Install('mssql_tools')
+    self.whitelist_ip = self.client_vm.ip_address
+
+    cmd = [
+        azure.AZURE_PATH, 'sql', 'server', 'firewall-rule', 'create', '--name',
+        self.whitelist_ip, '--resource-group', self.resource_group, '--server',
+        self.server_name, '--end-ip-address', self.whitelist_ip,
+        '--start-ip-address', self.whitelist_ip
+    ]
+    vm_util.IssueCommand(cmd)
+
+    # Push the framework to execute a sql query and gather performance details
+    service_specific_dir = os.path.join('edw',
+                                        Azuresqldatawarehouse.SERVICE_TYPE)
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir, 'script_runner.sh')))
+    runner_permission_update_cmd = 'chmod 755 {}'.format('script_runner.sh')
+    self.client_vm.RemoteCommand(runner_permission_update_cmd)
+    self.client_vm.PushFile(
+        data.ResourcePath(os.path.join('edw', 'script_driver.py')))
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir,
+                         'provider_specific_script_driver.py')))
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = (
+        'python script_driver.py --script={} --server={} --database={} '
+        '--user={} --password={} --query_timeout={}').format(
+            query_name, self.server_name, self.database, self.user,
+            self.password, FLAGS.query_timeout)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    performance = json.loads(stdout)
+    details = copy.copy(self.GetMetadata())
+    details['job_id'] = performance[query_name]['job_id']
+    return float(performance[query_name]['execution_time']), details
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.sqldatawarehouse_client_interface}
+
+
+class JdbcClientInterface(edw_service.EdwClientInterface):
+  """JDBC Client Interface class for Azure SqlDataWarehouse.
+
+  Attributes:
+    server_name: Name of the SqlDataWarehouse server to use.
+    database: Name of the database to run queries against.
+    user: Redshift username for authentication.
+    password: Redshift password for authentication.
+    resource_group: Azure resource group used to whitelist the VM's IP address.
+  """
+
+  def __init__(self, server_name: str, database: str, user: str, password: str,
+               resource_group: str):
+    self.server_name = server_name
+    self.database = database
+    self.user = user
+    self.password = password
+    self.resource_group = resource_group
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the sql server tool dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+    self.client_vm.Install('mssql_tools')
+    self.client_vm.Install('azure_cli')
+    self.whitelist_ip = self.client_vm.ip_address
+
+    cmd = [
+        azure.AZURE_PATH, 'sql', 'server', 'firewall-rule', 'create', '--name',
+        self.whitelist_ip, '--resource-group', self.resource_group, '--server',
+        self.server_name, '--end-ip-address', self.whitelist_ip,
+        '--start-ip-address', self.whitelist_ip
+    ]
+    vm_util.IssueCommand(cmd)
+
+    # Push the executable jar to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(package_name,
+                                                    [SYNAPSE_JDBC_JAR], '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = (f'java -cp {SYNAPSE_JDBC_JAR} '
+                     f'com.google.cloud.performance.edw.Single '
+                     f'--server {self.server_name} --database {self.database} '
+                     f'--query_timeout {FLAGS.query_timeout} '
+                     f'--query_file {query_name}')
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    performance = json.loads(stdout)
+    details = copy.copy(self.GetMetadata())
+    if 'failure_reason' in performance:
+      details.update({'failure_reason': performance['failure_reason']})
+    else:
+      details.update(performance['details'])
+    return performance['query_wall_time_in_secs'], details
+
+  def ExecuteSimultaneous(self, submission_interval: int,
+                          queries: List[str]) -> str:
+    """Executes queries simultaneously on client and return performance details.
+
+    Simultaneous app expects queries as white space separated query file names.
+
+    Args:
+      submission_interval: Simultaneous query submission interval in
+        milliseconds.
+      queries: List of strings (names) of queries to execute.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    query_list = ' '.join(queries)
+    cmd = (f'java -cp {SYNAPSE_JDBC_JAR} '
+           f'com.google.cloud.performance.edw.Simultaneous '
+           f'--server {self.server_name} --database {self.database} '
+           f'--submission_interval {submission_interval} --query_timeout '
+           f'{FLAGS.query_timeout} --query_files {query_list}')
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+  def ExecuteThroughput(self, concurrency_streams: List[List[str]]) -> str:
+    """Executes a throughput test and returns performance details.
+
+    Args:
+      concurrency_streams: List of streams to execute simultaneously, each of
+        which is a list of string names of queries.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    query_list = ' '.join([','.join(stream) for stream in concurrency_streams])
+    cmd = (
+        f'java -cp {SYNAPSE_JDBC_JAR} '
+        f'com.google.cloud.performance.edw.Throughput '
+        f'--server {self.server_name} --database {self.database} '
+        f'--query_timeout {FLAGS.query_timeout} --query_streams {query_list}')
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.sqldatawarehouse_client_interface}
+
+
+class Azuresqldatawarehouse(edw_service.EdwService):
+  """Object representing an Azure SQL data warehouse."""
+
+  CLOUD = providers.AZURE
+  SERVICE_TYPE = 'azuresqldatawarehouse'
+
+  def __init__(self, edw_service_spec):
+    super(Azuresqldatawarehouse, self).__init__(edw_service_spec)
+    self.whitelist_ip = None
+    self.resource_group = edw_service_spec.resource_group
+    self.server_name = edw_service_spec.server_name
+    self.client_interface = GetSqlDataWarehouseClientInterface(
+        self.server_name, self.db, self.user, self.password,
+        self.resource_group)
+
+  def WhitelistIPAddress(self, ip_address):
+    """To whitelist the IP address on the cluster."""
+    self.whitelist_ip = ip_address
+
+    cmd = [azure.AZURE_PATH,
+           'sql',
+           'server',
+           'firewall-rule',
+           'create',
+           '--name',
+           self.whitelist_ip,
+           '--resource-group',
+           self.resource_group,
+           '--server',
+           self.server_name,
+           '--end-ip-address',
+           self.whitelist_ip,
+           '--start-ip-address',
+           self.whitelist_ip]
+    vm_util.IssueCommand(cmd)
+
+  def __DescribeCluster(self):
+    """Describe cluster."""
+    cmd = [azure.AZURE_PATH,
+           'sql',
+           'dw',
+           'show',
+           '--name',
+           self.db,
+           '--resource-group',
+           self.resource_group,
+           '--server',
+           self.server_name]
+    return vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _Exists(self):
+    """Method to validate the existence of cluster.
+
+    Returns:
+      Boolean value indicating the existence of a cluster.
+    """
+    stdout, _, _ = self.__DescribeCluster()
+    if not stdout or (json.loads(stdout)['status'] not in VALID_EXIST_STATUSES):
+      return False
+    else:
+      return True
+
+  def _IsReady(self):
+    """Method to return if the cluster is ready to handle queries."""
+    stdout, _, _ = self.__DescribeCluster()
+    return json.loads(stdout)['status'] in READY_STATUSES
+
+  def _Create(self):
+    """Resuming the cluster."""
+    cmd = [azure.AZURE_PATH,
+           'sql',
+           'dw',
+           'resume',
+           '--name',
+           self.db,
+           '--resource-group',
+           self.resource_group,
+           '--server',
+           self.server_name]
+    vm_util.IssueCommand(cmd, timeout=420)
+
+  def _IsDeleting(self):
+    """Method to check if the cluster is pausing."""
+    stdout, _, _ = self.__DescribeCluster()
+    if not stdout:
+      return False
+    else:
+      return json.loads(stdout)['status'] in PAUSING_STATUSES
+
+  def _Delete(self):
+    """Pausing cluster."""
+    cmd = [azure.AZURE_PATH,
+           'sql',
+           'dw',
+           'pause',
+           '--name',
+           self.db,
+           '--resource-group',
+           self.resource_group,
+           '--server',
+           self.server_name]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def _DeleteDependencies(self):
+    """Delete dependencies of the cluster."""
+    if self.client_interface.whitelist_ip is not None:
+      cmd = [
+          azure.AZURE_PATH, 'sql', 'server', 'firewall-rule', 'delete',
+          '--name', self.client_interface.whitelist_ip, '--resource-group',
+          self.resource_group, '--server', self.server_name
+      ]
+      vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this cluster."""
+    basic_data = super(Azuresqldatawarehouse, self).GetMetadata()
+    basic_data['resource_group'] = self.resource_group
+    basic_data['server_name'] = self.server_name
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_virtual_machine.py
new file mode 100644
index 0000000..6496be2
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/azure_virtual_machine.py
@@ -0,0 +1,1165 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Class to represent an Azure Virtual Machine object.
+
+Zones:
+run 'azure vm location list'
+Machine Types:
+http://msdn.microsoft.com/en-us/library/azure/dn197896.aspx
+Images:
+run 'azure vm image list'
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+
+import abc
+import collections
+import itertools
+import json
+import logging
+import posixpath
+import re
+import threading
+import six
+
+from absl import flags
+from perfkitbenchmarker import custom_virtual_machine_spec
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import windows_virtual_machine
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers import azure
+from perfkitbenchmarker.providers.azure import azure_disk
+from perfkitbenchmarker.providers.azure import azure_network
+from perfkitbenchmarker.providers.azure import util
+from six.moves import range
+# Added by Intel
+import yaml
+import ipaddress
+
+try:
+    unicode
+except NameError:
+    unicode = str
+# End added by Intel
+
+FLAGS = flags.FLAGS
+NUM_LOCAL_VOLUMES = {
+    'Standard_L8s_v2': 1,
+    'Standard_L16s_v2': 2,
+    'Standard_L32s_v2': 4,
+    'Standard_L64s_v2': 8,
+    'Standard_L80s_v2': 10
+}
+
+_MACHINE_TYPES_ARM64 = (
+    'Standard_D2ps_v5', 'Standard_D4ps_v5', 'Standard_D8ps_v5',
+    'Standard_D16ps_v5', 'Standard_D32ps_v5', 'Standard_D48ps_v5',
+    'Standard_D64ps_v5', 'Standard_D2pds_v5', 'Standard_D4pds_v5',
+    'Standard_D8pds_v5', 'Standard_D16pds_v5', 'Standard_D32pds_v5',
+    'Standard_D48pds_v5', 'Standard_D64pds_v5',
+
+    'Standard_D2pls_v5', 'Standard_D4pls_v5', 'Standard_D8pls_v5',
+    'Standard_D16pls_v5', 'Standard_D32pls_v5', 'Standard_D48pls_v5',
+    'Standard_D64pls_v5',
+
+    'Standard_D2plds_v5', 'Standard_D4plds_v5', 'Standard_D8plds_v5',
+    'Standard_D16plds_v5', 'Standard_D32plds_v5', 'Standard_D48plds_v5',
+    'Standard_D64plds_v5',
+
+    'Standard_E2ps_v5', 'Standard_E4ps_v5', 'Standard_E8ps_v5',
+    'Standard_E16ps_v5', 'Standard_E20ps_v5', 'Standard_E32ps_v5',
+
+    'Standard_E2pds_v5', 'Standard_E4pds_v5', 'Standard_E8pds_v5', 
+    'Standard_E16pds_v5', 'Standard_E20pds_v5', 'Standard_E32pds_v5',
+)
+
+_MACHINE_TYPES_ONLY_SUPPORT_GEN2_IMAGES = (
+    'Standard_ND96asr_v4', 'Standard_ND96asr_A100_v4',
+    'Standard_ND96amsr_A100_v4', 'Standard_M208ms_v2', 'Standard_M208s_v2',
+    'Standard_M416ms_v2', 'Standard_M416s_v2', 'Standard_ND40rs_v2',
+    'Standard_M32ms_v2', 'Standard_M64s_v2', 'Standard_M64ms_v2',
+    'Standard_M128s_v2', 'Standard_M128ms_v2', 'Standard_M192is_v2',
+    'Standard_M192ims_v2', 'Standard_M32dms_v2', 'Standard_M64ds_v2',
+    'Standard_M128ds_v2', 'Standard_M128dms_v2', 'Standard_M192ids_v2',
+    'Standard_M192idms_v2', 'Standard_DC2s_v2', 'Standard_DC2s_v3',
+    'Standard_DC32ds_v3', 'Standard_DC32s_v3', 'Standard_DC48ds_v3',
+    'Standard_DC48s_v3', 'Standard_DC4ds_v3', 'Standard_DC4s_v2',
+    'Standard_DC4s_v3', 'Standard_DC8_v2', 'Standard_DC8ds_v3',
+    'Standard_DC8s_v3', 'Standard_FX12mds', 'Standard_FX24mds',
+    'Standard_FX36mds', 'Standard_FX48mds', 'Standard_FX4mds',
+    'Standard_M64dms_v2', 'Standard_DC16ds_v3', 'Standard_DC16s_v3',
+    'Standard_DC1ds_v3', 'Standard_DC1s_v3', 'Standard_DC24ds_v3',
+    'Standard_DC24s_v3', 'Standard_DC2ds_v3', 'Standard_DC1s_v2',
+) + _MACHINE_TYPES_ARM64
+
+
+# https://docs.microsoft.com/en-us/azure/virtual-machines/windows/scheduled-events
+_SCHEDULED_EVENTS_CMD = ('curl -H Metadata:true http://169.254.169.254/metadata'
+                         '/scheduledevents?api-version=2019-01-01')
+
+_SCHEDULED_EVENTS_CMD_WIN = ('Invoke-RestMethod -Headers @{"Metadata"="true"} '
+                             '-Uri http://169.254.169.254/metadata/'
+                             'scheduledevents?api-version=2019-01-01 | '
+                             'ConvertTo-Json')
+
+
+class AzureVmSpec(virtual_machine.BaseVmSpec):
+  """Object containing the information needed to create a AzureVirtualMachine.
+
+  Attributes:
+    tier: None or string. performance tier of the machine.
+    compute_units: int.  number of compute units for the machine.
+    accelerated_networking: boolean. True if supports accelerated_networking.
+    boot_disk_size: None or int. The size of the boot disk in GB.
+    boot_disk_type: string or None. The type of the boot disk.
+    low_priority: boolean. True if the VM should be low-priority, else False.
+  """
+
+  CLOUD = providers.AZURE
+
+  def __init__(self, *args, **kwargs):
+    super(AzureVmSpec, self).__init__(*args, **kwargs)
+    if isinstance(self.machine_type,
+                  custom_virtual_machine_spec.AzurePerformanceTierDecoder):
+      self.tier = self.machine_type.tier
+      self.compute_units = self.machine_type.compute_units
+      self.machine_type = None
+    else:
+      self.tier = None
+      self.compute_units = None
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super(AzureVmSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['machine_type'].present:
+      config_values['machine_type'] = yaml.safe_load(flag_values.machine_type)
+    if flag_values['azure_accelerated_networking'].present:
+      config_values['accelerated_networking'] = (
+          flag_values.azure_accelerated_networking)
+    if flag_values['azure_low_priority_vms'].present:
+      config_values['low_priority'] = flag_values.azure_low_priority_vms
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(AzureVmSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'machine_type':
+            (custom_virtual_machine_spec.AzureMachineTypeDecoder, {}),
+        'accelerated_networking': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+        'boot_disk_size': (option_decoders.IntDecoder, {
+            'default': None
+        }),
+        'boot_disk_type': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'low_priority': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+    })
+    return result
+
+
+# Per-VM resources are defined here.
+class AzurePublicIPAddress(resource.BaseResource):
+  """Class to represent an Azure Public IP Address."""
+
+  def __init__(self, region, availability_zone, name, dns_name=None):
+    super(AzurePublicIPAddress, self).__init__()
+    self.region = region
+    self.availability_zone = availability_zone
+    self.name = name
+    self._deleted = False
+    self.resource_group = azure_network.GetResourceGroup()
+    self.dns_name = dns_name
+
+  def _Create(self):
+    cmd = [
+        azure.AZURE_PATH, 'network', 'public-ip', 'create', '--location',
+        self.region, '--name', self.name
+    ] + self.resource_group.args
+
+    if self.availability_zone:
+      # Availability Zones require Standard IPs.
+      # TODO(user): Consider setting this by default
+      cmd += ['--zone', self.availability_zone, '--sku', 'Standard']
+
+    if self.dns_name:
+      cmd += ['--dns-name', self.dns_name]
+
+    _, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+
+    if retcode and re.search(r'Cannot create more than \d+ public IP addresses',
+                             stderr):
+      raise errors.Benchmarks.QuotaFailure(
+          virtual_machine.QUOTA_EXCEEDED_MESSAGE + stderr)
+
+  def _Exists(self):
+    if self._deleted:
+      return False
+
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'network', 'public-ip', 'show', '--output',
+            'json', '--name', self.name
+        ] + self.resource_group.args,
+        raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+  def GetIPAddress(self):
+    stdout, _ = vm_util.IssueRetryableCommand([
+        azure.AZURE_PATH, 'network', 'public-ip', 'show', '--output', 'json',
+        '--name', self.name
+    ] + self.resource_group.args)
+
+    response = json.loads(stdout)
+    return response['ipAddress']
+
+  def _Delete(self):
+    self._deleted = True
+
+
+class AzureNIC(resource.BaseResource):
+  """Class to represent an Azure NIC."""
+
+  def __init__(self,
+               subnet,
+               name,
+               public_ip,
+               accelerated_networking,
+               network_security_group=None,
+               private_ip=None):
+    super(AzureNIC, self).__init__()
+    self.subnet = subnet
+    self.name = name
+    self.public_ip = public_ip
+    self.private_ip = private_ip
+    self._deleted = False
+    self.resource_group = azure_network.GetResourceGroup()
+    self.region = self.subnet.vnet.region
+    self.args = ['--nics', self.name]
+    self.accelerated_networking = accelerated_networking
+    self.network_security_group = network_security_group
+
+  def _Create(self):
+    cmd = [
+        azure.AZURE_PATH, 'network', 'nic', 'create', '--location',
+        self.region, '--vnet-name', self.subnet.vnet.name, '--subnet',
+        self.subnet.name, '--public-ip-address', self.public_ip, '--name',
+        self.name
+    ] + self.resource_group.args
+    if self.private_ip:
+      cmd += ['--private-ip-address', self.private_ip]
+    if self.accelerated_networking:
+      cmd += ['--accelerated-networking', 'true']
+    if self.network_security_group:
+      cmd += ['--network-security-group', self.network_security_group.name]
+    vm_util.IssueCommand(cmd)
+
+  def _Exists(self):
+    if self._deleted:
+      return False
+    # Same deal as AzurePublicIPAddress. 'show' doesn't error out if
+    # the resource doesn't exist, but no-op 'set' does.
+    stdout, _, _ = vm_util.IssueCommand(
+        [
+            azure.AZURE_PATH, 'network', 'nic', 'show', '--output', 'json',
+            '--name', self.name
+        ] + self.resource_group.args,
+        raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+  def GetInternalIP(self):
+    """Grab some data."""
+
+    stdout, _ = vm_util.IssueRetryableCommand([
+        azure.AZURE_PATH, 'network', 'nic', 'show', '--output', 'json',
+        '--name', self.name
+    ] + self.resource_group.args)
+
+    response = json.loads(stdout)
+    return response['ipConfigurations'][0]['privateIpAddress']
+
+  def _Delete(self):
+    self._deleted = True
+
+
+class AzureDedicatedHostGroup(resource.BaseResource):
+  """Object representing an Azure host group (a collection of dedicated hosts).
+
+  A host group is required for dedicated host creation.
+  Attributes:
+    name: The name of the vm - to be part of the host group name.
+    location: The region the host group will exist in.
+    resource_group: The group of resources for the host group.
+  """
+
+  def __init__(self, name, region, resource_group, availability_zone):
+    super(AzureDedicatedHostGroup, self).__init__()
+    self.name = name + 'Group'
+    self.region = region
+    self.resource_group = resource_group
+    self.availability_zone = availability_zone
+
+  def _Create(self):
+    """See base class."""
+    create_cmd = ([
+        azure.AZURE_PATH,
+        'vm',
+        'host',
+        'group',
+        'create',
+        '--name',
+        self.name,
+        '--location',
+        self.region,
+        # number of fault domains (physical racks) to span across
+        # TODO(buggay): add support for multiple fault domains
+        # https://docs.microsoft.com/en-us/azure/virtual-machines/windows/dedicated-hosts#high-availability-considerations
+        '--platform-fault-domain-count',
+        '1',
+    ] + self.resource_group.args)
+
+    if self.availability_zone:
+      create_cmd.extend(['--zone', self.availability_zone])
+
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """See base class."""
+    delete_cmd = ([
+        azure.AZURE_PATH,
+        'vm',
+        'host',
+        'group',
+        'delete',
+        '--host-group',
+        self.name,
+    ] + self.resource_group.args)
+    vm_util.IssueCommand(delete_cmd)
+
+  def _Exists(self):
+    """See base class."""
+    show_cmd = [
+        azure.AZURE_PATH, 'vm', 'host', 'group', 'show', '--output', 'json',
+        '--name', self.name
+    ] + self.resource_group.args
+    stdout, _, _ = vm_util.IssueCommand(show_cmd, raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+
+def _GetSkuType(machine_type):
+  """Returns the host SKU type derived from the VM machine type."""
+  # TODO(buggay): add support for FSv2 machine types when no longer in preview
+  # https://docs.microsoft.com/en-us/azure/virtual-machines/windows/dedicated-hosts
+  sku = ''
+  if re.match('Standard_D[0-9]*s_v3', machine_type):
+    sku = 'DSv3-Type1'
+  elif re.match('Standard_E[0-9]*s_v3', machine_type):
+    sku = 'ESv3-Type1'
+  else:
+    raise ValueError('Dedicated hosting does not support machine type %s.' %
+                     machine_type)
+  return sku
+
+
+class AzureDedicatedHost(resource.BaseResource):
+  """Object representing an Azure host.
+
+  Attributes:
+    host_group: The required host group to which the host will belong.
+    name: The name of the vm - to be part of the host name.
+    region: The region the host will exist in.
+    resource_group: The group of resources for the host.
+  """
+  _lock = threading.Lock()
+  # globals guarded by _lock
+  host_group_map = {}
+
+  def __init__(self, name, region, resource_group, sku_type,
+               availability_zone):
+    super(AzureDedicatedHost, self).__init__()
+    self.name = name + '-Host'
+    self.region = region
+    self.resource_group = resource_group
+    self.sku_type = sku_type
+    self.availability_zone = availability_zone
+    self.host_group = None
+    self.fill_fraction = 0.0
+
+  def _CreateDependencies(self):
+    """See base class."""
+    with self._lock:
+      if self.region not in self.host_group_map:
+        new_host_group = AzureDedicatedHostGroup(self.name, self.region,
+                                                 self.resource_group,
+                                                 self.availability_zone)
+        new_host_group.Create()
+        self.host_group_map[self.region] = new_host_group.name
+      self.host_group = self.host_group_map[self.region]
+
+  def _Create(self):
+    """See base class."""
+    create_cmd = ([
+        azure.AZURE_PATH,
+        'vm',
+        'host',
+        'create',
+        '--host-group',
+        self.host_group,
+        '--name',
+        self.name,
+        '--sku',
+        self.sku_type,
+        '--location',
+        self.region,
+        # the specific fault domain (physical rack) for the host dependent on
+        # the number (count) of fault domains of the host group
+        # TODO(buggay): add support for specifying multiple fault domains if
+        # benchmarks require
+        '--platform-fault-domain',
+        '0',
+    ] + self.resource_group.args)
+    vm_util.IssueCommand(create_cmd)
+
+  def _Delete(self):
+    """See base class."""
+    delete_cmd = ([
+        azure.AZURE_PATH,
+        'vm',
+        'host',
+        'delete',
+        '--host-group',
+        self.host_group,
+        '--name',
+        self.name,
+        '--yes',
+    ] + self.resource_group.args)
+    vm_util.IssueCommand(delete_cmd)
+
+  def _Exists(self):
+    """See base class."""
+    show_cmd = [
+        azure.AZURE_PATH,
+        'vm',
+        'host',
+        'show',
+        '--output',
+        'json',
+        '--name',
+        self.name,
+        '--host-group',
+        self.host_group,
+    ] + self.resource_group.args
+    stdout, _, _ = vm_util.IssueCommand(show_cmd, raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+
+class AzureVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing an Azure Virtual Machine."""
+  CLOUD = providers.AZURE
+
+  _lock = threading.Lock()
+  # TODO(buggay): remove host groups & hosts as globals -> create new spec
+  # globals guarded by _lock
+  host_map = collections.defaultdict(list)
+
+  def __init__(self, vm_spec):
+    """Initialize an Azure virtual machine.
+
+    Args:
+      vm_spec: virtual_machine.BaseVmSpec object of the vm.
+    """
+    super(AzureVirtualMachine, self).__init__(vm_spec)
+
+    # PKB zone can be either a region or a region with an availability zone.
+    # Format for Azure availability zone support is "region-availability_zone"
+    # Example: eastus2-1 is Azure region eastus2 with availability zone 1.
+
+    self.region = util.GetRegionFromZone(self.zone)
+    self.availability_zone = util.GetAvailabilityZoneFromZone(self.zone)
+    self.use_dedicated_host = vm_spec.use_dedicated_host
+    self.num_vms_per_host = vm_spec.num_vms_per_host
+    self.network = azure_network.AzureNetwork.GetNetwork(self)
+    self.firewall = azure_network.AzureFirewall.GetFirewall()
+    self.max_local_disks = NUM_LOCAL_VOLUMES.get(self.machine_type) or 1
+    self._lun_counter = itertools.count()
+    self._deleted = False
+
+    self.resource_group = azure_network.GetResourceGroup()
+    self.public_ip = AzurePublicIPAddress(self.region, self.availability_zone,
+                                          self.name + '-public-ip')
+    self.nic = AzureNIC(self.network.subnet, self.name + '-nic',
+                        self.public_ip.name, vm_spec.accelerated_networking,
+                        self.network.nsg)
+    self.storage_account = self.network.storage_account
+    if vm_spec.image:
+      self.image = vm_spec.image
+    elif self.machine_type in _MACHINE_TYPES_ONLY_SUPPORT_GEN2_IMAGES:
+      if hasattr(type(self), 'GEN2_IMAGE_URN'):
+        if self.machine_type in _MACHINE_TYPES_ARM64:
+          self.image = type(self).IMAGE_ARM64_URN
+        else:
+          self.image = type(self).GEN2_IMAGE_URN
+      else:
+        raise errors.Benchmarks.UnsupportedConfigError('No Azure gen2 image.')
+    else:
+      self.image = type(self).IMAGE_URN
+
+    self.host = None
+    if self.use_dedicated_host:
+      self.host_series_sku = _GetSkuType(self.machine_type)
+      self.host_list = None
+    self.low_priority = vm_spec.low_priority
+    self.low_priority_status_code = None
+    self.spot_early_termination = False
+    self.ultra_ssd_enabled = False
+
+    disk_spec = disk.BaseDiskSpec('azure_os_disk')
+    disk_spec.disk_type = (
+        vm_spec.boot_disk_type or self.storage_account.storage_type)
+    if vm_spec.boot_disk_size:
+      disk_spec.disk_size = vm_spec.boot_disk_size
+    self.os_disk = azure_disk.AzureDisk(
+        disk_spec,
+        self,
+        None,
+        is_image=True)
+
+  @property
+  @classmethod
+  @abc.abstractmethod
+  def IMAGE_URN(cls):
+    raise NotImplementedError()
+
+  def _CreateDependencies(self):
+    """Create VM dependencies."""
+    self.public_ip.Create()
+    self.nic.Create()
+
+    if self.use_dedicated_host:
+      with self._lock:
+        self.host_list = self.host_map[(self.host_series_sku, self.region)]
+        if (not self.host_list or (self.num_vms_per_host and
+                                   self.host_list[-1].fill_fraction +
+                                   1.0 / self.num_vms_per_host > 1.0)):
+          new_host = AzureDedicatedHost(self.name, self.region,
+                                        self.resource_group,
+                                        self.host_series_sku,
+                                        self.availability_zone)
+          self.host_list.append(new_host)
+          new_host.Create()
+        self.host = self.host_list[-1]
+        if self.num_vms_per_host:
+          self.host.fill_fraction += 1.0 / self.num_vms_per_host
+
+  def _RequiresUltraDisk(self):
+    return any(disk_spec.disk_type == azure_disk.ULTRA_STORAGE
+               for disk_spec in self.disk_specs)
+
+  def _Create(self):
+    """See base class."""
+    if self.os_disk.disk_size:
+      disk_size_args = ['--os-disk-size-gb', str(self.os_disk.disk_size)]
+    else:
+      disk_size_args = []
+
+    tags = {}
+    tags.update(self.vm_metadata)
+    tags.update(util.GetResourceTags(self.resource_group.timeout_minutes))
+    tag_args = ['--tags'] + util.FormatTags(tags)
+
+    create_cmd = ([
+        azure.AZURE_PATH, 'vm', 'create', '--location', self.region,
+        '--image', self.image, '--size', self.machine_type, '--admin-username',
+        self.user_name, '--storage-sku', self.os_disk.disk_type, '--name',
+        self.name
+    ] + disk_size_args + self.resource_group.args + self.nic.args + tag_args)
+
+    if self._RequiresUltraDisk():
+      self.ultra_ssd_enabled = True
+      create_cmd.extend(['--ultra-ssd-enabled'])
+
+    if self.availability_zone:
+      create_cmd.extend(['--zone', self.availability_zone])
+
+    # Resources in Availability Set are not allowed to be
+    # deployed to particular hosts.
+    if self.use_dedicated_host:
+      create_cmd.extend(
+          ['--host-group', self.host.host_group, '--host', self.host.name])
+      num_hosts = len(self.host_list)
+
+    if self.network.placement_group:
+      create_cmd.extend(self.network.placement_group.AddVmArgs())
+
+    if self.low_priority:
+      create_cmd.extend(['--priority', 'Spot'])
+
+    if self.password:
+      create_cmd.extend(['--admin-password', self.password])
+    else:
+      create_cmd.extend(['--ssh-key-value', self.ssh_public_key])
+
+    # Uses a custom default because create has a very long tail.
+    azure_vm_create_timeout = 1800
+    _, stderr, retcode = vm_util.IssueCommand(
+        create_cmd, timeout=azure_vm_create_timeout, raise_on_failure=False)
+    if retcode:
+      if 'quota' in stderr.lower():
+        raise errors.Benchmarks.QuotaFailure(
+            virtual_machine.QUOTA_EXCEEDED_MESSAGE + stderr)
+      elif re.search(
+          r'requested VM size \S+ is not available', stderr) or re.search(
+              r'not available in location .+ for subscription', stderr):
+        raise errors.Benchmarks.UnsupportedConfigError(stderr)
+      elif self.low_priority and 'OverconstrainedAllocationRequest' in stderr:
+        raise errors.Benchmarks.InsufficientCapacityCloudFailure(stderr)
+    # TODO(buggay) refactor to share code with gcp_virtual_machine.py
+    if (self.use_dedicated_host and retcode and
+        'AllocationFailed' in stderr):
+      if self.num_vms_per_host:
+        raise errors.Resource.CreationError(
+            'Failed to create host: %d vms of type %s per host exceeds '
+            'memory capacity limits of the host' %
+            (self.num_vms_per_host, self.machine_type))
+      else:
+        logging.warning(
+            'Creation failed due to insufficient host capacity. A new host will '
+            'be created and instance creation will be retried.')
+        with self._lock:
+          if num_hosts == len(self.host_list):
+            new_host = AzureDedicatedHost(self.name, self.region,
+                                          self.resource_group,
+                                          self.host_series_sku,
+                                          self.availability_zone)
+            self.host_list.append(new_host)
+            new_host.Create()
+          self.host = self.host_list[-1]
+        raise errors.Resource.RetryableCreationError()
+    if (not self.use_dedicated_host and retcode and
+        ('AllocationFailed' in stderr or
+         'OverconstrainedZonalAllocationRequest' in stderr)):
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(stderr)
+    if retcode:
+      if "Virtual Machine Scale Set with '<NULL>' security type." in stderr:
+        raise errors.Resource.CreationError(
+            f'Failed to create VM: {self.machine_type} is likely a confidential'
+            ' machine, which PKB does not support at this time.\n\n'
+            f' Full error: {stderr} return code: {retcode}')
+      if "cannot boot Hypervisor Generation '1'" in stderr:
+        raise errors.Resource.CreationError(
+            f'Failed to create VM: {self.machine_type} is unable to support V1 '
+            'Hypervision. Please update _MACHINE_TYPES_ONLY_SUPPORT_GEN2_IMAGES'
+            ' in azure_virtual_machine.py.\n\n'
+            f' Full error: {stderr} return code: {retcode}')
+      else:
+        raise errors.Resource.CreationError(
+            'Failed to create VM: %s return code: %s' % (stderr, retcode))
+
+  def _Exists(self):
+    """Returns True if the VM exists."""
+    if self._deleted:
+      return False
+    show_cmd = [
+        azure.AZURE_PATH, 'vm', 'show', '--output', 'json', '--name', self.name
+    ] + self.resource_group.args
+    stdout, _, _ = vm_util.IssueCommand(show_cmd, raise_on_failure=False)
+    try:
+      json.loads(stdout)
+      return True
+    except ValueError:
+      return False
+
+  def _Delete(self):
+    # The VM will be deleted when the resource group is.
+    self._deleted = True
+
+  def _Start(self):
+    """Starts the VM."""
+    start_cmd = ([azure.AZURE_PATH, 'vm', 'start', '--name', self.name] +
+                 self.resource_group.args)
+    vm_util.IssueCommand(start_cmd)
+    self.ip_address = self.public_ip.GetIPAddress()
+
+  def _Stop(self):
+    """Stops the VM."""
+    stop_cmd = ([azure.AZURE_PATH, 'vm', 'stop', '--name', self.name] +
+                self.resource_group.args)
+    vm_util.IssueCommand(stop_cmd)
+    # remove resources, similar to GCE stop
+    deallocate_cmd = (
+        [azure.AZURE_PATH, 'vm', 'deallocate', '--name', self.name] +
+        self.resource_group.args)
+    vm_util.IssueCommand(deallocate_cmd)
+
+  def _Suspend(self):
+    """Suspends the VM."""
+    raise NotImplementedError()
+
+  def _Resume(self):
+    """Resumes the VM."""
+    raise NotImplementedError()
+
+  @vm_util.Retry()
+  def _PostCreate(self):
+    """Get VM data."""
+    stdout, _ = vm_util.IssueRetryableCommand([
+        azure.AZURE_PATH, 'vm', 'show', '--output', 'json', '--name', self.name
+    ] + self.resource_group.args)
+    response = json.loads(stdout)
+    self.os_disk.name = response['storageProfile']['osDisk']['name']
+    self.os_disk.created = True
+    vm_util.IssueCommand([
+        azure.AZURE_PATH, 'disk', 'update', '--name', self.os_disk.name,
+        '--set',
+        util.GetTagsJson(self.resource_group.timeout_minutes)
+    ] + self.resource_group.args)
+    self.internal_ip = self.nic.GetInternalIP()
+    self.ip_address = self.public_ip.GetIPAddress()
+
+  def AddMetadata(self, **kwargs):
+    if not kwargs:
+      return
+    tag_list = ['tags.%s=%s' % (k, v) for k, v in six.iteritems(kwargs)]
+    vm_util.IssueRetryableCommand(
+        [azure.AZURE_PATH, 'vm', 'update', '--name', self.name] +
+        self.resource_group.args + ['--set'] + tag_list)
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+
+    Raises:
+      CreationError: If an SMB disk is listed but the SMB service not created.
+    """
+    disks = []
+
+    for _ in range(disk_spec.num_striped_disks):
+      if disk_spec.disk_type == disk.NFS:
+        data_disk = self._GetNfsService().CreateNfsDisk()
+        disks.append(data_disk)
+        continue
+      elif disk_spec.disk_type == disk.SMB:
+        data_disk = self._GetSmbService().CreateSmbDisk()
+        disks.append(data_disk)
+        continue
+      elif disk_spec.disk_type == disk.LOCAL:
+        # Local disk numbers start at 1 (0 is the system disk).
+        disk_number = self.local_disk_counter + 1
+        self.local_disk_counter += 1
+        if self.local_disk_counter > self.max_local_disks:
+          raise errors.Error('Not enough local disks.')
+      else:
+        # Remote disk numbers start at max_local disks, Azure does not separate
+        # local disk and system disk.
+        disk_number = self.remote_disk_counter + self.max_local_disks
+        self.remote_disk_counter += 1
+      lun = next(self._lun_counter)
+      data_disk = azure_disk.AzureDisk(disk_spec, self, lun)
+      data_disk.disk_number = disk_number
+      disks.append(data_disk)
+
+    self._CreateScratchDiskFromDisks(disk_spec, disks)
+
+  def InstallCli(self):
+    """Installs the Azure cli and credentials on this Azure vm."""
+    self.Install('azure_cli')
+    self.Install('azure_credentials')
+
+  def DownloadPreprovisionedData(self, install_path, module_name, filename):
+    """Downloads a data file from Azure blob storage with pre-provisioned data.
+
+    Use --azure_preprovisioned_data_bucket to specify the name of the account.
+
+    Note: Azure blob storage does not allow underscores in the container name,
+    so this method replaces any underscores in module_name with dashes.
+    Make sure that the same convention is used when uploading the data
+    to Azure blob storage. For example: when uploading data for
+    'module_name' to Azure, create a container named 'benchmark-name'.
+
+    Args:
+      install_path: The install path on this VM.
+      module_name: Name of the module associated with this data file.
+      filename: The name of the file that was downloaded.
+    """
+    # N.B. Should already be installed by ShouldDownloadPreprovisionedData
+    self.Install('azure_cli')
+    self.RemoteCommand(
+        GenerateDownloadPreprovisionedDataCommand(install_path, module_name,
+                                                  filename))
+
+  def ShouldDownloadPreprovisionedData(self, module_name, filename):
+    """Returns whether or not preprovisioned data is available."""
+    # Do not install credentials. Data are fetched using locally generated
+    # connection strings and do not use credentials on the VM.
+    self.Install('azure_cli')
+    return FLAGS.azure_preprovisioned_data_bucket and self.TryRemoteCommand(
+        GenerateStatPreprovisionedDataCommand(module_name, filename))
+
+  def GetResourceMetadata(self):
+    result = super(AzureVirtualMachine, self).GetResourceMetadata()
+    result['accelerated_networking'] = self.nic.accelerated_networking
+    result['boot_disk_type'] = self.os_disk.disk_type
+    result['boot_disk_size'] = self.os_disk.disk_size
+    if self.network.placement_group:
+      result['placement_group_strategy'] = self.network.placement_group.strategy
+    else:
+      result['placement_group_strategy'] = placement_group.PLACEMENT_GROUP_NONE
+    result['preemptible'] = self.low_priority
+    if self.use_dedicated_host:
+      result['num_vms_per_host'] = self.num_vms_per_host
+    return result
+
+  @vm_util.Retry(max_retries=5)
+  def UpdateInterruptibleVmStatus(self):
+    """Updates the interruptible status if the VM was preempted."""
+    if self.spot_early_termination:
+      return
+    if self.low_priority and self._Exists():
+      stdout, stderr, return_code = self.RemoteCommandWithReturnCode(
+          _SCHEDULED_EVENTS_CMD)
+      if return_code:
+        logging.error('Checking Interrupt Error: %s', stderr)
+      else:
+        events = json.loads(stdout).get('Events', [])
+        self.spot_early_termination = any(
+            event.get('EventType') == 'Preempt' for event in events)
+        if self.spot_early_termination:
+          logging.info('Spotted early termination on %s', self)
+
+  def _UpdateInterruptibleVmStatusThroughMetadataService(self):
+    stdout, stderr, return_code = self.RemoteCommandWithReturnCode(
+        _SCHEDULED_EVENTS_CMD)
+    if return_code:
+      logging.error('Checking Interrupt Error: %s', stderr)
+    else:
+      events = json.loads(stdout).get('Events', [])
+      self.spot_early_termination = any(
+          event.get('EventType') == 'Preempt' for event in events)
+      if self.spot_early_termination:
+        logging.info('Spotted early termination on %s', self)
+
+  def IsInterruptible(self):
+    """Returns whether this vm is a interruptible vm (e.g. spot, preemptible).
+
+    Returns:
+      True if this vm is a interruptible vm.
+    """
+    return self.low_priority
+
+  def WasInterrupted(self):
+    """Returns whether this low-priority vm was terminated early by Azure.
+
+    Returns: True if this vm was terminated early by Azure.
+    """
+    return self.spot_early_termination
+
+  def GetVmStatusCode(self):
+    """Returns the early termination code if any.
+
+    Returns: Early termination code.
+    """
+    return self.low_priority_status_code
+
+
+class Debian9BasedAzureVirtualMachine(AzureVirtualMachine,
+                                      linux_virtual_machine.Debian9Mixin):
+  # From https://wiki.debian.org/Cloud/MicrosoftAzure
+  IMAGE_URN = 'credativ:Debian:9:latest'
+
+
+class Debian10BasedAzureVirtualMachine(AzureVirtualMachine,
+                                       linux_virtual_machine.Debian10Mixin):
+  # From https://wiki.debian.org/Cloud/MicrosoftAzure
+  GEN2_IMAGE_URN = 'Debian:debian-10:10-gen2:latest'
+  IMAGE_URN = 'Debian:debian-10:10:latest'
+
+
+class Debian11BasedAzureVirtualMachine(AzureVirtualMachine,
+                                       linux_virtual_machine.Debian11Mixin):
+  # From https://wiki.debian.org/Cloud/MicrosoftAzure
+  GEN2_IMAGE_URN = 'Debian:debian-11:11-gen2:latest'
+  IMAGE_URN = 'Debian:debian-11:11:latest'
+
+
+class Ubuntu1604BasedAzureVirtualMachine(AzureVirtualMachine,
+                                         linux_virtual_machine.Ubuntu1604Mixin):
+  GEN2_IMAGE_URN = 'Canonical:UbuntuServer:16_04-lts-gen2:latest'
+  IMAGE_URN = 'Canonical:UbuntuServer:16.04-LTS:latest'
+
+
+class Ubuntu1804BasedAzureVirtualMachine(AzureVirtualMachine,
+                                         linux_virtual_machine.Ubuntu1804Mixin):
+  GEN2_IMAGE_URN = 'Canonical:UbuntuServer:18_04-lts-gen2:latest'
+  IMAGE_URN = 'Canonical:UbuntuServer:18.04-LTS:latest'
+
+
+class Ubuntu2004BasedAzureVirtualMachine(AzureVirtualMachine,
+                                         linux_virtual_machine.Ubuntu2004Mixin):
+    IMAGE_ARM64_URN = 'Canonical:0001-com-ubuntu-server-focal:20_04-lts-arm64:latest'
+    GEN2_IMAGE_URN = 'Canonical:0001-com-ubuntu-server-focal:20_04-lts-gen2:latest'
+    IMAGE_URN = 'Canonical:0001-com-ubuntu-server-focal:20_04-lts:latest'
+
+
+class Ubuntu2204BasedAzureVirtualMachine(AzureVirtualMachine,
+                                         linux_virtual_machine.Ubuntu2204Mixin):
+    IMAGE_ARM64_URN = 'Canonical:0001-com-ubuntu-server-jammy:22_04-lts-arm64:latest'
+    GEN2_IMAGE_URN = 'Canonical:0001-com-ubuntu-server-jammy:22_04-lts-gen2:latest'
+    IMAGE_URN = 'Canonical:0001-com-ubuntu-server-jammy:22_04-lts:latest'
+
+
+class Rhel7BasedAzureVirtualMachine(AzureVirtualMachine,
+                                    linux_virtual_machine.Rhel7Mixin):
+  GEN2_IMAGE_URN = 'RedHat:RHEL:7lvm-gen2:latest'
+  IMAGE_URN = 'RedHat:RHEL:7-LVM:latest'
+
+
+class Rhel8BasedAzureVirtualMachine(AzureVirtualMachine,
+                                    linux_virtual_machine.Rhel8Mixin):
+  GEN2_IMAGE_URN = 'RedHat:RHEL:8-lvm-gen2:latest'
+  IMAGE_URN = 'RedHat:RHEL:8-LVM:latest'
+
+
+class CentOs7BasedAzureVirtualMachine(AzureVirtualMachine,
+                                      linux_virtual_machine.CentOs7Mixin):
+  GEN2_IMAGE_URN = 'OpenLogic:CentOS-LVM:7-lvm-gen2:latest'
+  IMAGE_URN = 'OpenLogic:CentOS-LVM:7-lvm:latest'
+
+
+class CentOs8BasedAzureVirtualMachine(AzureVirtualMachine,
+                                      linux_virtual_machine.CentOs8Mixin):
+  GEN2_IMAGE_URN = 'OpenLogic:CentOS-LVM:8-lvm-gen2:latest'
+  IMAGE_URN = 'OpenLogic:CentOS-LVM:8-lvm:latest'
+
+
+class CentOsStream8BasedAzureVirtualMachine(AzureVirtualMachine,
+                                            linux_virtual_machine.CentOsStream8Mixin):
+  # TODO: Change to Azure official centos 8 stream image when it is available
+  IMAGE_URN = 'cloudwhizsolutions:centos-8-stream-cw:centos-8-stream-cw:1.2019.0712'
+
+
+# TODO(pclay): Add Fedora CoreOS when available:
+#   https://docs.fedoraproject.org/en-US/fedora-coreos/provisioning-azure/
+
+
+class BaseWindowsAzureVirtualMachine(AzureVirtualMachine,
+                                     windows_virtual_machine.BaseWindowsMixin):
+  """Class supporting Windows Azure virtual machines."""
+
+  # This ia a required attribute, but this is a base class.
+  IMAGE_URN = 'non-existent'
+
+  def __init__(self, vm_spec):
+    super(BaseWindowsAzureVirtualMachine, self).__init__(vm_spec)
+    # The names of Windows VMs on Azure are limited to 15 characters so let's
+    # drop the pkb prefix if necessary.
+    if len(self.name) > 15:
+      self.name = re.sub('^pkb-', '', self.name)
+    self.user_name = self.name
+    self.password = vm_util.GenerateRandomWindowsPassword()
+
+  def _PostCreate(self):
+    super(BaseWindowsAzureVirtualMachine, self)._PostCreate()
+    config_dict = {'commandToExecute': windows_virtual_machine.STARTUP_SCRIPT}
+    config = json.dumps(config_dict)
+    vm_util.IssueRetryableCommand([
+        azure.AZURE_PATH, 'vm', 'extension', 'set', '--vm-name', self.name,
+        '--name', 'CustomScriptExtension', '--publisher', 'Microsoft.Compute',
+        '--version', '1.4',
+        '--protected-settings=%s' % config
+    ] + self.resource_group.args)
+
+  def _UpdateInterruptibleVmStatusThroughMetadataService(self):
+    stdout, _ = self.RemoteCommand(_SCHEDULED_EVENTS_CMD_WIN)
+    events = json.loads(stdout).get('Events', [])
+    self.spot_early_termination = any(
+        event.get('EventType') == 'Preempt' for event in events)
+    if self.spot_early_termination:
+      logging.info('Spotted early termination on %s', self)
+
+
+# Azure seems to have dropped support for 2012 Server Core. It is neither here:
+# https://docs.microsoft.com/en-us/azure/virtual-machines/windows/cli-ps-findimage#table-of-commonly-used-windows-images
+# nor in `az vm image list -p MicrosoftWindowsServer -f WindowsServer -s 2012`
+# Rather than exclude this just allow 2012 to refer to the 2012 Base image.
+class Windows2012CoreAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2012CoreMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2012-r2-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2012-R2-Datacenter:latest'
+
+
+class Windows2016CoreAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2016CoreMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2016-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2016-Datacenter-Server-Core:latest'
+
+
+class Windows2019CoreAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2019CoreMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2019-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2019-Datacenter-Core:latest'
+
+
+class Windows2022CoreAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2022CoreMixin):
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2022-Datacenter-Core:latest'
+
+
+class Windows2012DesktopAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2012DesktopMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2012-r2-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2012-R2-Datacenter:latest'
+
+
+class Windows2016DesktopAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2016DesktopMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2016-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2016-Datacenter:latest'
+
+
+class Windows2019DesktopAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2019DesktopMixin):
+  GEN2_IMAGE_URN = 'MicrosoftWindowsServer:windowsserver-gen2preview:2019-datacenter-gen2:latest'
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2019-Datacenter:latest'
+
+
+class Windows2022DesktopAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2022DesktopMixin):
+  IMAGE_URN = 'MicrosoftWindowsServer:WindowsServer:2022-Datacenter:latest'
+
+
+class Windows2019DesktopSQLServer2019StandardAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Standard):
+  GEN2_IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2019:standard-gen2:latest'
+  IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2019:standard:latest'
+
+
+class Windows2019DesktopSQLServer2019EnterpriseAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Enterprise):
+  GEN2_IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2019:enterprise-gen2:latest'
+  IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2019:enterprise:latest'
+
+
+class Windows2022DesktopSQLServer2019StandardAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Standard):
+  IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2022:standard:latest'
+
+
+class Windows2022DesktopSQLServer2019EnterpriseAzureVirtualMachine(
+    BaseWindowsAzureVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Enterprise):
+  IMAGE_URN = 'MicrosoftSQLServer:sql2019-ws2022:enterprise:latest'
+
+
+def GenerateDownloadPreprovisionedDataCommand(install_path, module_name,
+                                              filename):
+  """Returns a string used to download preprovisioned data."""
+  module_name_with_underscores_removed = module_name.replace('_', '-')
+  destpath = posixpath.join(install_path, filename)
+  if install_path:
+    # TODO(ferneyhough): Refactor this so that this mkdir command
+    # is run on all clouds, and is os-agnostic (this is linux specific).
+    mkdir_command = 'mkdir -p %s' % posixpath.dirname(destpath)
+
+  account_name = FLAGS.azure_preprovisioned_data_bucket
+  connection_string = util.GetAzureStorageConnectionString(account_name, [])
+  download_command = (
+      'az storage blob download '
+      '--no-progress '
+      '--account-name {account_name} '
+      '--container-name {container_name} '
+      '--name {name} '
+      '--file {file} '
+      '--connection-string "{connection_string}"'.format(
+          account_name=account_name,
+          container_name=module_name_with_underscores_removed,
+          name=filename,
+          file=destpath,
+          connection_string=connection_string))
+  if install_path:
+    return '{0} && {1}'.format(mkdir_command, download_command)
+  return download_command
+
+
+def GenerateStatPreprovisionedDataCommand(module_name, filename):
+  """Returns a string used to download preprovisioned data."""
+  module_name_with_underscores_removed = module_name.replace('_', '-')
+  account_name = FLAGS.azure_preprovisioned_data_bucket
+  connection_string = util.GetAzureStorageConnectionString(account_name, [])
+  return ('az storage blob show '
+          '--account-name {account_name} '
+          '--container-name {container_name} '
+          '--name {name} '
+          '--connection-string "{connection_string}"'.format(
+              account_name=account_name,
+              container_name=module_name_with_underscores_removed,
+              name=filename,
+              connection_string=connection_string))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/flags.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/flags.py
new file mode 100644
index 0000000..ec6201c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/flags.py
@@ -0,0 +1,104 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing flags applicable across benchmark run on Azure."""
+
+from absl import flags
+
+
+NONE = 'None'
+READ_ONLY = 'ReadOnly'
+READ_WRITE = 'ReadWrite'
+flags.DEFINE_enum(
+    'azure_host_caching', NONE,
+    [NONE, READ_ONLY, READ_WRITE],
+    'The type of host caching to use on Azure data disks.')
+# Azure Storage Account types. See
+# http://azure.microsoft.com/en-us/pricing/details/storage/ for more information
+# about the different types.
+LRS = 'Standard_LRS'
+ULRS = 'UltraSSD_LRS'
+PLRS = 'Premium_LRS'
+ZRS = 'Standard_ZRS'
+GRS = 'Standard_GRS'
+RAGRS = 'Standard_RAGRS'
+
+STORAGE = 'Storage'
+BLOB_STORAGE = 'BlobStorage'
+VALID_TIERS = ['Basic', 'Standard', 'Premium']
+
+# Azure redis cache tiers. See
+# https://docs.microsoft.com/en-us/azure/redis-cache/cache-faq for information.
+VALID_CACHE_SIZES = ['C0', 'C1', 'C2', 'C3', 'C4', 'C5', 'C6',
+                     'P1', 'P2', 'P3', 'P4', 'P5']
+
+flags.DEFINE_enum(
+    'azure_storage_type', LRS,
+    [LRS, PLRS, ULRS, ZRS, GRS, RAGRS],
+    'The type of storage account to create. See '
+    'http://azure.microsoft.com/en-us/pricing/details/storage/ for more '
+    'information. To use remote ssd scratch disks, you must use Premium_LRS. '
+    'If you use Premium_LRS, you must use the DS series of machines, or else '
+    'VM creation will fail.')
+
+flags.DEFINE_enum(
+    'azure_blob_account_kind', BLOB_STORAGE,
+    [STORAGE, BLOB_STORAGE],
+    'The type of storage account to use for blob storage. Choosing Storage '
+    'will let you use ZRS storage. Choosing BlobStorage will give you access '
+    'to Hot and Cold storage tiers.')
+
+flags.DEFINE_integer('azure_provisioned_iops', None,
+                     'IOPS for Provisioned IOPS volumes in Azure.')
+flags.DEFINE_integer('azure_provisioned_throughput', None,
+                     'Provisioned throughput (MB/s) for volumes in Azure.')
+
+flags.DEFINE_string('azure_preprovisioned_data_bucket', None,
+                    'Azure blob storage account where pre-provisioned data '
+                    'has been copied.')
+
+flags.DEFINE_boolean('azure_accelerated_networking', False,
+                     'Enable Azure Accelerated Networking. See '
+                     'https://docs.microsoft.com/en-us/azure/virtual-network/'
+                     'create-vm-accelerated-networking-cli'
+                     'for more information.')
+
+flags.DEFINE_enum('azure_tier', 'Basic', VALID_TIERS,
+                  'Performance tier to use for the machine type. Defaults to '
+                  'Basic.')
+
+flags.DEFINE_integer(
+    'azure_compute_units', None,
+    'Number of compute units to allocate for the machine type')
+
+flags.DEFINE_enum('azure_redis_size',
+                  'C3', VALID_CACHE_SIZES,
+                  'Azure redis cache size to use.')
+
+flags.DEFINE_boolean('azure_low_priority_vms', False,
+                     'Whether to set the priority to low for Azure VMs')
+
+flags.DEFINE_boolean('bootstrap_azure_service_principal', True,
+                     'Whether to use the current service principal credentials '
+                     'when passing a service principal to a service. This has '
+                     'no effect if the logged in user is not a service '
+                     'principal. This is useful, because service principals '
+                     "usually lack the 'User Authentication Admin' role that "
+                     'allows creation of new service principals.')
+flags.DEFINE_enum('sqldatawarehouse_client_interface', 'CLI', ['CLI', 'JDBC'],
+                  'The Runtime Interface used when interacting with Synapse.')
+flags.DEFINE_string('query_timeout', '600', 'Query timeout in seconds.')
+flags.DEFINE_string('min_tls_version', 'TLS1_2',
+                    'The minimum TLS version to be permitted on requests to storage.'
+                    'The default interpretation is TLS 1.0 for this property.'
+                    'accepted values: TLS1_0, TLS1_1, TLS1_2 .')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/provider_info.py
new file mode 100644
index 0000000..12d62ec
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/provider_info.py
@@ -0,0 +1,24 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provider info for Azure."""
+
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+
+
+class AzureProviderInfo(provider_info.BaseProviderInfo):
+
+  UNSUPPORTED_BENCHMARKS = ['mysql_service']
+  CLOUD = providers.AZURE
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/service_principal.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/service_principal.py
new file mode 100644
index 0000000..b84d1ee
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/service_principal.py
@@ -0,0 +1,124 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains classes/functions related to Azure Service Principals."""
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import object_storage_service
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import azure_credentials
+from perfkitbenchmarker.providers import azure
+
+FLAGS = flags.FLAGS
+
+
+class ServicePrincipal(resource.BaseResource):
+  """Class representing an Azure service principal."""
+
+  _instance = None
+
+  @classmethod
+  def GetInstance(cls):
+    """Returns the service principal instance."""
+    if cls._instance is None:
+      if FLAGS.bootstrap_azure_service_principal:
+        cls._instance = cls.LoadFromFile()
+      else:
+        cls._instance = cls()
+    return cls._instance
+
+  @classmethod
+  def LoadFromFile(cls):
+    """Loads a service principal from a file."""
+    with open(
+        object_storage_service.FindCredentialFile(
+            azure_credentials.PROFILE_FILE),
+        encoding='utf-8-sig') as profile_fp:
+      subscriptions = json.load(profile_fp)['subscriptions']
+      subscription = [sub for sub in subscriptions if sub['isDefault']][0]
+      subscription_type = subscription['user']['type']
+      if subscription_type != 'servicePrincipal':
+        # We are using user auth, and will probably have permission to create a
+        # service principal.
+        logging.info("Azure credentials are of type '%s'. "
+                     'Will try to create a new service principal.',
+                     subscription_type)
+        return cls()
+      # name and id are backwards
+      name = subscription['id']
+      app_id = subscription['user']['name']
+
+      # If subscription_type is servicePrincipal then service_principals.json
+      # will exist.
+      with open(
+          object_storage_service.FindCredentialFile(
+              azure_credentials.SERVICE_PRINCIPAL_FILE)
+      ) as service_principals_fp:
+        for sp in json.load(service_principals_fp):
+          if sp['client_id'] == app_id:
+            logging.info("Azure credentials are of type 'servicePrincipal'. "
+                         'Will reuse them for benchmarking.')
+            return cls(
+                name, app_id, password=sp['client_secret'], user_managed=True)
+        logging.warning('No access tokens found matching Azure defaultProfile '
+                        'Will try to create a new service principal.')
+        return cls()
+
+  def __init__(self, name=None, app_id=None, password=None, user_managed=False):
+    super(ServicePrincipal, self).__init__(user_managed)
+    # Service principals can be referred to by user provided name as long as
+    # they are prefixed by http:// or by a server generated appId.
+    # Prefer user provided ID for idempotence when talking to Active Directory.
+    # When talking to AKS or ACR, app_id is required.
+    self.name = 'http://' + (name or 'pkb-' + FLAGS.run_uri)
+    self.app_id = app_id
+    self.password = password
+
+  def _Create(self):
+    """Creates the service principal."""
+    cmd = [
+        azure.AZURE_PATH, 'ad', 'sp', 'create-for-rbac', '--name', self.name,
+        '--skip-assignment'
+    ]
+    stdout, _, _ = vm_util.IssueCommand(cmd)
+    response = json.loads(stdout)
+    if response:
+      self.app_id = response['appId']
+      self.password = response['password']
+      if not self.app_id or not self.password:
+        raise errors.Resource.CreationError(
+            'Invalid creation response when creating service principal. '
+            'Expected appId and password. Received:\n' + stdout)
+      return True
+    return False
+
+  def _Exists(self):
+    """Returns True if the service principal exists."""
+    # Use show rather than list, because list requires admin privileges.
+    cmd = [azure.AZURE_PATH, 'ad', 'sp', 'show', '--id', self.app_id]
+    try:
+      vm_util.IssueCommand(cmd, raise_on_failure=True)
+      return True
+    except errors.VmUtil.IssueCommandError:
+      return False
+
+  def _Delete(self):
+    """Deletes the service principal."""
+    cmd = [azure.AZURE_PATH, 'ad', 'sp', 'delete', '--id', self.app_id]
+    vm_util.IssueCommand(cmd)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/azure/util.py b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/util.py
new file mode 100644
index 0000000..26b29e0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/azure/util.py
@@ -0,0 +1,223 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for working with Azure resources."""
+
+
+import json
+import re
+from typing import Any, Dict, Set
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers import azure
+import six
+
+AZURE_PATH = azure.AZURE_PATH
+AZURE_SUFFIX = ['--output', 'json']
+FLAGS = flags.FLAGS
+
+
+def GetAzureStorageConnectionString(storage_account_name, resource_group_args):
+  """Get connection string."""
+  stdout, _ = vm_util.IssueRetryableCommand(
+      [AZURE_PATH, 'storage', 'account', 'show-connection-string',
+       '--name', storage_account_name] + resource_group_args + AZURE_SUFFIX)
+  response = json.loads(stdout)
+  return response['connectionString']
+
+
+def GetAzureStorageConnectionArgs(storage_account_name, resource_group_args):
+  """Get connection CLI arguments."""
+  return ['--connection-string',
+          GetAzureStorageConnectionString(storage_account_name,
+                                          resource_group_args)]
+
+
+def GetAzureStorageAccountKey(storage_account_name, resource_group_args):
+  """Get storage account key."""
+  stdout, _ = vm_util.IssueRetryableCommand(
+      [AZURE_PATH, 'storage', 'account', 'keys', 'list',
+       '--account-name', storage_account_name] +
+      resource_group_args + AZURE_SUFFIX)
+
+  response = json.loads(stdout)
+  # A new storage account comes with two keys, but we only need one.
+  assert response[0]['permissions'].lower() == 'full'
+  return response[0]['value']
+
+
+def FormatTag(key, value):
+  """Format an individual tag for use with the --tags param of Azure CLI."""
+  return '{0}={1}'.format(key, value)
+
+
+def FormatTags(tags_dict):
+  """Format a dict of tags into arguments for 'tag' parameter.
+
+  Args:
+    tags_dict: Tags to be formatted.
+
+  Returns:
+    A list of tags formatted as arguments for 'tag' parameter.
+  """
+  return [FormatTag(k, v) for k, v in sorted(six.iteritems(tags_dict))]
+
+
+def GetResourceTags(timeout_minutes):
+  """Gets a dict of tags.
+
+  Args:
+    timeout_minutes: int, Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A dict contains formatted tags.
+  """
+  benchmark_spec = context.GetThreadBenchmarkSpec()
+  return benchmark_spec.GetResourceTags(timeout_minutes)
+
+
+def GetTags(timeout_minutes):
+  """Gets a list of tags to be used with the --tags param of Azure CLI.
+
+  Args:
+    timeout_minutes: int, Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A string contains formatted tags.
+  """
+  return FormatTags(GetResourceTags(timeout_minutes))
+
+
+def GetTagsJson(timeout_minutes):
+  """Gets a JSON string of tags to be used with the --set param of Azure CLI.
+
+  Args:
+    timeout_minutes: int, Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A string contains json formatted tags.
+  """
+  return 'tags={}'.format(json.dumps(GetResourceTags(timeout_minutes)))
+
+
+def _IsRegion(zone_or_region):
+  """Returns whether "zone_or_region" is a region."""
+  return re.match(r'[a-z]+[0-9]?$', zone_or_region, re.IGNORECASE)
+
+
+def _IsRecommendedRegion(json_object: Dict[str, Any]) -> bool:
+  return json_object['metadata']['regionCategory'] == 'Recommended'
+
+
+def IsZone(zone_or_region):
+  """Returns whether "zone_or_region" is a zone.
+
+  Args:
+    zone_or_region: string, Azure zone or region. Format for Azure
+      availability
+      zone support is "region-availability_zone". Example: eastus2-1 specifies
+        Azure region eastus2 with availability zone 1.
+  """
+
+  return re.match(r'[a-z]+[0-9]?-[0-9]$', zone_or_region, re.IGNORECASE)
+
+
+def GetRegionFromZone(zone_or_region: str) -> str:
+  """Returns the region a zone is in (or "zone_or_region" if it's a region)."""
+  if _IsRegion(zone_or_region):
+    return zone_or_region
+  if IsZone(zone_or_region):
+    return zone_or_region[:-2]
+
+  raise ValueError('%s is not a valid Azure zone or region name' %
+                   zone_or_region)
+
+
+def GetZonesInRegion(region: str) -> Set[str]:
+  """Returns a set of zones in the region."""
+  # As of 2021 all Azure AZs are numbered 1-3 for eligible regions.
+  return set([f'{region}-{i}' for i in range(1, 4)])
+
+
+def ShouldKeepZoneFromCLI(zone: str) -> bool:
+  """Filter out zones that we can't access."""
+  if 'EUAP' in zone:
+    return False
+  return True
+
+
+def GetZonesFromMachineType() -> Set[str]:
+  """Returns a set of zones for a machine type."""
+  stdout, _ = vm_util.IssueRetryableCommand(
+      [AZURE_PATH, 'vm', 'list-skus', '--size', FLAGS.machine_type])
+  zones = set()
+  for item in json.loads(stdout):
+    for location_info in item['locationInfo']:
+      region = location_info['location']
+      for zone in location_info['zones']:
+        if ShouldKeepZoneFromCLI(f'{region}-{zone}'):
+          zones.add(f'{region}-{zone}')
+  return zones
+
+
+def GetAllRegions() -> Set[str]:
+  """Returns all valid regions."""
+  stdout, _ = vm_util.IssueRetryableCommand([
+      AZURE_PATH, 'account', 'list-locations', '--output', 'json'
+  ])
+  # Filter out staging regions from the output.
+  return set([
+      item['name'] for item in json.loads(stdout) if _IsRecommendedRegion(item)
+  ])
+
+
+def GetAllZones() -> Set[str]:
+  """Returns all valid availability zones."""
+  zones = set()
+  for region in GetAllRegions():
+    zones.update(GetZonesInRegion(region))
+  return zones
+
+
+def GetGeoFromRegion(region: str) -> str:
+  """Gets valid geo from the region, i.e. region westus2 returns US."""
+  stdout, _ = vm_util.IssueRetryableCommand([
+      AZURE_PATH, 'account', 'list-locations',
+      '--output', 'json',
+      '--query', f"[?name == '{region}'].metadata.geographyGroup"
+  ])
+  return stdout.splitlines()[1].strip('" ')
+
+
+def GetRegionsInGeo(geo: str) -> Set[str]:
+  """Gets valid regions in the geo."""
+  stdout, _ = vm_util.IssueRetryableCommand([
+      AZURE_PATH, 'account', 'list-locations',
+      '--output', 'json',
+      '--query', f"[?metadata.geographyGroup == '{geo}']"
+  ])
+  return set([
+      item['name'] for item in json.loads(stdout) if _IsRecommendedRegion(item)
+  ])
+
+
+def GetAvailabilityZoneFromZone(zone_or_region):
+  """Returns the Availability Zone from a zone."""
+  if IsZone(zone_or_region):
+    return zone_or_region[-1]
+  if _IsRegion(zone_or_region):
+    return None
+  raise ValueError('%s is not a valid Azure zone' % zone_or_region)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/__init__.py
new file mode 100644
index 0000000..49b94a7
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/__init__.py
@@ -0,0 +1,14 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Provider for GCP."""
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/bigquery.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/bigquery.py
new file mode 100644
index 0000000..2d375b0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/bigquery.py
@@ -0,0 +1,618 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's Bigquery EDW service."""
+
+import copy
+import datetime
+import json
+import logging
+import os
+import re
+from typing import Dict, List, Text, Tuple
+
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import edw_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import google_cloud_sdk
+from perfkitbenchmarker.providers.gcp import util as gcp_util
+
+
+FLAGS = flags.FLAGS
+
+DEFAULT_TABLE_EXPIRATION = 3600 * 24 * 365  # seconds
+
+
+def GetBigQueryClientInterface(
+    project_id: str, dataset_id: str) -> edw_service.EdwClientInterface:
+  """Builds and Returns the requested BigQuery client Interface.
+
+  Args:
+    project_id: String name of the BigQuery project to benchmark
+    dataset_id: String name of the BigQuery dataset to benchmark
+
+  Returns:
+    A concrete Client Interface object (subclass of GenericClientInterface)
+
+  Raises:
+    RuntimeError: if an unsupported bq_client_interface is requested
+  """
+  if FLAGS.bq_client_interface == 'CLI':
+    return CliClientInterface(project_id, dataset_id)
+  if FLAGS.bq_client_interface == 'JAVA':
+    return JavaClientInterface(project_id, dataset_id)
+  if FLAGS.bq_client_interface == 'SIMBA_JDBC_1_2_4_1007':
+    return JdbcClientInterface(project_id, dataset_id)
+  raise RuntimeError('Unknown BigQuery Client Interface requested.')
+
+
+class GenericClientInterface(edw_service.EdwClientInterface):
+  """Generic Client Interface class for BigQuery.
+
+  Attributes:
+    project_id: String name of the BigQuery project to benchmark
+    dataset_id: String name of the BigQuery dataset to benchmark
+  """
+
+  def __init__(self, project_id: str, dataset_id: str):
+    self.project_id = project_id
+    self.dataset_id = dataset_id
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Gets the Metadata attributes for the Client Interface."""
+    return {'client': FLAGS.bq_client_interface}
+
+
+class CliClientInterface(GenericClientInterface):
+  """Command Line Client Interface class for BigQuery.
+
+  Uses the native Bigquery client that ships with the google_cloud_sdk
+  https://cloud.google.com/bigquery/docs/bq-command-line-tool.
+  """
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the bq tool dependencies and authenticates using a service account.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('pip')
+    self.client_vm.RemoteCommand('sudo pip install absl-py')
+    self.client_vm.Install('google_cloud_sdk')
+
+    # Push the service account file to the working directory on client vm
+    key_file_name = FLAGS.gcp_service_account_key_file.split('/')[-1]
+    if '/' in FLAGS.gcp_service_account_key_file:
+      self.client_vm.PushFile(FLAGS.gcp_service_account_key_file)
+    else:
+      self.client_vm.InstallPreprovisionedPackageData(
+          package_name, [FLAGS.gcp_service_account_key_file], '')
+
+    # Authenticate using the service account file
+    vm_gcloud_path = google_cloud_sdk.GCLOUD_PATH
+    activate_cmd = ('{} auth activate-service-account {} --key-file={}'.format(
+        vm_gcloud_path, FLAGS.gcp_service_account, key_file_name))
+    self.client_vm.RemoteCommand(activate_cmd)
+
+    # Push the framework to execute a sql query and gather performance details
+    service_specific_dir = os.path.join('edw', Bigquery.SERVICE_TYPE)
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir, 'script_runner.sh')))
+    runner_permission_update_cmd = 'chmod 755 {}'.format('script_runner.sh')
+    self.client_vm.RemoteCommand(runner_permission_update_cmd)
+    self.client_vm.PushFile(
+        data.ResourcePath(os.path.join('edw', 'script_driver.py')))
+    self.client_vm.PushFile(
+        data.ResourcePath(
+            os.path.join(service_specific_dir,
+                         'provider_specific_script_driver.py')))
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = ('python script_driver.py --script={} --bq_project_id={} '
+                     '--bq_dataset_id={}').format(query_name,
+                                                  self.project_id,
+                                                  self.dataset_id)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    performance = json.loads(stdout)
+    details = copy.copy(self.GetMetadata())  # Copy the base metadata
+    details['job_id'] = performance[query_name]['job_id']
+    return float(performance[query_name]['execution_time']), details
+
+
+class JdbcClientInterface(GenericClientInterface):
+  """JDBC Client Interface class for BigQuery.
+
+  https://cloud.google.com/bigquery/providers/simba-drivers
+  """
+
+  def SetProvisionedAttributes(self, benchmark_spec):
+    super(JdbcClientInterface,
+          self).SetProvisionedAttributes(benchmark_spec)
+    self.project_id = re.split(r'\.',
+                               benchmark_spec.edw_service.cluster_identifier)[0]
+    self.dataset_id = re.split(r'\.',
+                               benchmark_spec.edw_service.cluster_identifier)[1]
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs
+    a) Java Execution Environment,
+    b) BigQuery Authnetication Credentials,
+    c) JDBC Application to execute a query and gather execution details,
+    d) Simba JDBC BigQuery client code dependencencies, and
+    e) The Simba JDBC interface jar
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+
+    # Push the service account file to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(
+        package_name, [FLAGS.gcp_service_account_key_file], '')
+
+    # Push the executable jars to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(
+        package_name, ['bq-jdbc-client-1.0.jar', 'GoogleBigQueryJDBC42.jar'],
+        '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    query_command = (
+        'java -cp bq-jdbc-client-1.0.jar:GoogleBigQueryJDBC42.jar '
+        'com.google.cloud.performance.edw.App --project {} --service_account '
+        '{} --credentials_file {} --dataset {} --query_file {}'.format(
+            self.project_id, FLAGS.gcp_service_account,
+            FLAGS.gcp_service_account_key_file, self.dataset_id, query_name))
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    details = copy.copy(self.GetMetadata())  # Copy the base metadata
+    details.update(json.loads(stdout)['details'])
+    return json.loads(stdout)['performance'], details
+
+
+class JavaClientInterface(GenericClientInterface):
+  """Native Java Client Interface class for BigQuery.
+
+  https://cloud.google.com/bigquery/docs/reference/libraries#client-libraries-install-java
+  """
+
+  def Prepare(self, package_name: str) -> None:
+    """Prepares the client vm to execute query.
+
+    Installs the Java Execution Environment and a uber jar with
+    a) BigQuery Java client libraries,
+    b) An application to execute a query and gather execution details, and
+    c) their dependencies.
+
+    Args:
+      package_name: String name of the package defining the preprovisioned data
+        (certificates, etc.) to extract and use during client vm preparation.
+    """
+    self.client_vm.Install('openjdk')
+
+    # Push the service account file to the working directory on client vm
+    if '/' in FLAGS.gcp_service_account_key_file:
+      self.client_vm.PushFile(FLAGS.gcp_service_account_key_file)
+    else:
+      self.client_vm.InstallPreprovisionedPackageData(
+          package_name, [FLAGS.gcp_service_account_key_file], '')
+    # Push the executable jar to the working directory on client vm
+    self.client_vm.InstallPreprovisionedPackageData(package_name,
+                                                    ['bq-java-client-2.3.jar'],
+                                                    '')
+
+  def ExecuteQuery(self, query_name: Text) -> Tuple[float, Dict[str, str]]:
+    """Executes a query and returns performance details.
+
+    Args:
+      query_name: String name of the query to execute.
+
+    Returns:
+      A tuple of (execution_time, execution details)
+      execution_time: A Float variable set to the query's completion time in
+        secs. -1.0 is used as a sentinel value implying the query failed. For a
+        successful query the value is expected to be positive.
+      performance_details: A dictionary of query execution attributes eg. job_id
+    """
+    key_file_name = FLAGS.gcp_service_account_key_file
+    if '/' in FLAGS.gcp_service_account_key_file:
+      key_file_name = FLAGS.gcp_service_account_key_file.split('/')[-1]
+
+    query_command = ('java -cp bq-java-client-2.3.jar '
+                     'com.google.cloud.performance.edw.Single --project {} '
+                     '--credentials_file {} --dataset {} '
+                     '--query_file {}').format(self.project_id, key_file_name,
+                                               self.dataset_id, query_name)
+    stdout, _ = self.client_vm.RemoteCommand(query_command)
+    details = copy.copy(self.GetMetadata())  # Copy the base metadata
+    details.update(json.loads(stdout)['details'])
+    return json.loads(stdout)['query_wall_time_in_secs'], details
+
+  def ExecuteSimultaneous(self, submission_interval: int,
+                          queries: List[str]) -> str:
+    """Executes queries simultaneously on client and return performance details.
+
+    Simultaneous app expects queries as white space separated query file names.
+
+    Args:
+      submission_interval: Simultaneous query submission interval in
+        milliseconds.
+      queries: List of strings (names) of queries to execute.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    key_file_name = FLAGS.gcp_service_account_key_file
+    if '/' in FLAGS.gcp_service_account_key_file:
+      key_file_name = os.path.basename(FLAGS.gcp_service_account_key_file)
+    cmd = ('java -cp bq-java-client-2.3.jar '
+           'com.google.cloud.performance.edw.Simultaneous --project {} '
+           '--credentials_file {} --dataset {} --submission_interval {} '
+           '--query_files {}'.format(self.project_id, key_file_name,
+                                     self.dataset_id, submission_interval,
+                                     ' '.join(queries)))
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+  def ExecuteThroughput(self, concurrency_streams: List[List[str]]) -> str:
+    """Executes a throughput test and returns performance details.
+
+    Args:
+      concurrency_streams: List of streams to execute simultaneously, each of
+        which is a list of string names of queries.
+
+    Returns:
+      A serialized dictionary of execution details.
+    """
+    key_file_name = FLAGS.gcp_service_account_key_file
+    if '/' in FLAGS.gcp_service_account_key_file:
+      key_file_name = os.path.basename(FLAGS.gcp_service_account_key_file)
+    cmd = ('java -cp bq-java-client-2.3.jar '
+           'com.google.cloud.performance.edw.Throughput --project {} '
+           '--credentials_file {} --dataset {} --query_streams {}'.format(
+               self.project_id, key_file_name, self.dataset_id,
+               ' '.join([','.join(stream) for stream in concurrency_streams])))
+    stdout, _ = self.client_vm.RemoteCommand(cmd)
+    return stdout
+
+
+class Bigquery(edw_service.EdwService):
+  """Object representing a Bigquery cluster.
+
+  Attributes:
+    job_id_prefix: A string prefix for the job id for bigquery job.
+  """
+
+  CLOUD = providers.GCP
+  SERVICE_TYPE = 'bigquery'
+
+  def __init__(self, edw_service_spec):
+    super(Bigquery, self).__init__(edw_service_spec)
+    project_id = re.split(r'\.', self.cluster_identifier)[0]
+    dataset_id = re.split(r'\.', self.cluster_identifier)[1]
+    self.client_interface = GetBigQueryClientInterface(project_id, dataset_id)
+
+  def _Create(self):
+    """Create a BigQuery cluster.
+
+    Bigquery clusters creation is out of scope of the benchmarking.
+    """
+    raise NotImplementedError
+
+  def _Exists(self):
+    """Method to validate the existence of a Bigquery cluster.
+
+    Returns:
+      Boolean value indicating the existence of a cluster.
+    """
+    return True
+
+  def _Delete(self):
+    """Delete a BigQuery cluster.
+
+    Bigquery cluster deletion is out of scope of benchmarking.
+    """
+    raise NotImplementedError
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for the BigQuery cluster."""
+    basic_data = super(Bigquery, self).GetMetadata()
+    basic_data.update(self.client_interface.GetMetadata())
+    return basic_data
+
+  def FormatProjectAndDatasetForCommand(self, dataset=None):
+    """Returns the project and dataset in the format needed for bq commands.
+
+    E.g., project:dataset.
+
+    Args:
+      dataset: The dataset to run commands against. If None, extracts the
+        dataset from the cluster identifier whose format is "project.dataset").
+    """
+    return ((self.cluster_identifier.split('.')[0] + ':' +
+             dataset) if dataset else self.cluster_identifier.replace('.', ':'))
+
+  def GetDatasetLastUpdatedTime(self, dataset=None):
+    """Get the formatted last modified timestamp of the dataset."""
+    cmd = [
+        'bq', 'show', '--format=prettyjson',
+        self.FormatProjectAndDatasetForCommand(dataset)
+    ]
+    dataset_metadata, _, _ = vm_util.IssueCommand(cmd)
+    metadata_json = json.loads(str(dataset_metadata))
+    return datetime.datetime.fromtimestamp(
+        float(metadata_json['lastModifiedTime']) /
+        1000.0).strftime('%Y-%m-%d_%H-%M-%S')
+
+  def GetAllTablesInDataset(self, dataset=None):
+    """Returns a list of the IDs of all the tables in the dataset."""
+    cmd = [
+        'bq', 'ls', '--format=prettyjson',
+        self.FormatProjectAndDatasetForCommand(dataset)
+    ]
+    tables_list, _, _ = vm_util.IssueCommand(cmd)
+    all_tables = []
+    for table in json.loads(str(tables_list)):
+      if table['type'] == 'TABLE':
+        all_tables.append(table['tableReference']['tableId'])
+    return all_tables
+
+  def ExtractDataset(self,
+                     dest_bucket,
+                     dataset=None,
+                     tables=None,
+                     dest_format='CSV'):
+    """Extract all tables in a dataset to a GCS bucket.
+
+    Args:
+      dest_bucket: Name of the bucket to extract the data to. Should already
+        exist.
+      dataset: Optional name of the dataset. If none, will be extracted from the
+        cluster_identifier.
+      tables: Optional list of table names to extract. If none, all tables in
+        the dataset will be extracted.
+      dest_format: Format to extract data in. Can be one of: CSV, JSON, or Avro.
+    """
+    if tables is None:
+      tables = self.GetAllTablesInDataset(dataset)
+    gcs_uri = 'gs://' + dest_bucket
+
+    # Make sure the bucket is empty.
+    vm_util.IssueCommand(['gsutil', '-m', 'rm', gcs_uri + '/**'],
+                         raise_on_failure=False)
+
+    project_dataset = self.FormatProjectAndDatasetForCommand(dataset)
+    for table in tables:
+      cmd = [
+          'bq', 'extract',
+          '--destination_format=%s' % dest_format,
+          '%s.%s' % (project_dataset, table),
+          '%s/%s/*.csv' % (gcs_uri, table)
+      ]
+      _, stderr, retcode = vm_util.IssueCommand(cmd)
+      # There is a 10T daily limit on extracting from BQ. Large datasets will
+      # inherently hit this limit and benchmarks shouldn't use those.
+      gcp_util.CheckGcloudResponseKnownFailures(stderr, retcode)
+
+  def RemoveDataset(self, dataset=None):
+    """Removes a dataset.
+
+    See https://cloud.google.com/bigquery/docs/managing-tables#deleting_tables
+
+    Args:
+      dataset: Optional name of the dataset. If none, will be extracted from the
+        cluster_identifier.
+    """
+    project_dataset = self.FormatProjectAndDatasetForCommand(dataset)
+    vm_util.IssueCommand(['bq', 'rm', '-r', '-f', '-d', project_dataset],
+                         raise_on_failure=False)
+
+  def CreateDataset(self, dataset=None, description=None):
+    """Creates a new dataset.
+
+    See https://cloud.google.com/bigquery/docs/tables
+
+    Args:
+      dataset: Optional name of the dataset. If none, will be extracted from the
+        cluster_identifier.
+      description: Optional description of the dataset. Escape double quotes.
+    """
+    project_dataset = self.FormatProjectAndDatasetForCommand(dataset)
+    cmd = [
+        'bq', 'mk', '--dataset',
+        '--default_table_expiration=%d' % DEFAULT_TABLE_EXPIRATION
+    ]
+    if description:
+      cmd.extend(['--description', '"%s"' % description])
+    cmd.append(project_dataset)
+    vm_util.IssueCommand(cmd)
+
+    cmd = ['bq', 'update']
+    for key, value in gcp_util.GetDefaultTags().items():
+      cmd.extend(['--set_label', f'{key}:{value}'])
+    cmd.append(project_dataset)
+    vm_util.IssueCommand(cmd)
+
+  def LoadDataset(self,
+                  source_bucket,
+                  tables,
+                  schema_dir,
+                  dataset=None,
+                  append=True,
+                  skip_header_row=True,
+                  field_delimiter=','):
+    """Load all tables in a dataset to a database from CSV object storage.
+
+    See https://cloud.google.com/bigquery/docs/loading-data-cloud-storage-csv
+
+    Args:
+      source_bucket: Name of the bucket to load the data from. Should already
+        exist. Each table must have its own subfolder in the bucket named after
+        the table, containing one or more csv files that make up the table data.
+      tables: List of table names to load.
+      schema_dir: GCS directory containing json schemas of all tables to load.
+      dataset: Optional name of the dataset. If none, will be extracted from the
+        cluster_identifier.
+      append: If True, appends loaded data to the existing set. If False,
+        replaces the existing data (if any).
+      skip_header_row: If True, skips the first row of data being loaded.
+      field_delimiter: The separator for fields in the CSV file.
+    """
+    project_dataset = self.FormatProjectAndDatasetForCommand(dataset)
+    for table in tables:
+      schema_path = schema_dir + table + '.json'
+      local_schema = './%s.json' % table
+      vm_util.IssueCommand(['gsutil', 'cp', schema_path, local_schema])
+      cmd = [
+          'bq', 'load', '--noreplace' if append else '--replace',
+          '--source_format=CSV',
+          '--field_delimiter=%s' % field_delimiter,
+          '--skip_leading_rows=%d' % (1 if skip_header_row else 0),
+          '%s.%s' % (project_dataset, table),
+          'gs://%s/%s/*.csv' % (source_bucket, table), local_schema
+      ]
+      _, stderr, retcode = vm_util.IssueCommand(cmd, raise_on_failure=False)
+      if retcode:
+        logging.warning('Loading table %s failed. stderr: %s, retcode: %s',
+                        table, stderr, retcode)
+
+      cmd = ['bq', 'update']
+      for key, value in gcp_util.GetDefaultTags().items():
+        cmd.extend(['--set_label', f'{key}:{value}'])
+      cmd.append(f'{project_dataset}.{table}')
+      vm_util.IssueCommand(cmd)
+
+
+class Endor(Bigquery):
+  """Class representing BigQuery Endor service."""
+
+  SERVICE_TYPE = 'endor'
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Return a dictionary of the metadata for the BigQuery Endor service.
+
+    Returns:
+      A dictionary set to Endor service details.
+    """
+    basic_data = super(Endor, self).GetMetadata()
+    basic_data['edw_service_type'] = 'endor'
+    basic_data.update(self.client_interface.GetMetadata())
+    basic_data.update(self.GetDataDetails())
+    return basic_data
+
+  def GetDataDetails(self) -> Dict[str, str]:
+    """Returns a dictionary with underlying data details.
+
+    cluster_identifier = <project_id>.<dataset_id>
+    Data details are extracted from the dataset_id that follows the format:
+    <dataset>_<format>_<compression>_<partitioning>_<location>
+    eg.
+    tpch100_parquet_uncompressed_unpartitoned_s3
+
+    Returns:
+      A dictionary set to underlying data's details (format, etc.)
+    """
+    data_details = {}
+    dataset_id = re.split(r'\.', self.cluster_identifier)[1]
+    parsed_id = re.split(r'_', dataset_id)
+    data_details['format'] = parsed_id[1]
+    data_details['compression'] = parsed_id[2]
+    data_details['partitioning'] = parsed_id[3]
+    data_details['location'] = parsed_id[4]
+    return data_details
+
+
+class Endorazure(Endor):
+  """Class representing BigQuery Endor Azure service."""
+
+  SERVICE_TYPE = 'endorazure'
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Return a dictionary of the metadata for the BigQuery Endor Azure service.
+
+    Returns:
+      A dictionary set to Endor Azure service details.
+    """
+    basic_data = super(Endorazure, self).GetMetadata()
+    basic_data['edw_service_type'] = 'endorazure'
+    return basic_data
+
+
+class Bqfederated(Bigquery):
+  """Class representing BigQuery Federated service."""
+
+  SERVICE_TYPE = 'bqfederated'
+
+  def GetMetadata(self) -> Dict[str, str]:
+    """Return a dictionary of the metadata for the BigQuery Federated service.
+
+    Returns:
+      A dictionary set to Federated service details.
+    """
+    basic_data = super(Bqfederated, self).GetMetadata()
+    basic_data['edw_service_type'] = Bqfederated.SERVICE_TYPE
+    basic_data.update(self.client_interface.GetMetadata())
+    basic_data.update(self.GetDataDetails())
+    return basic_data
+
+  def GetDataDetails(self) -> Dict[str, str]:
+    """Returns a dictionary with underlying data details.
+
+    cluster_identifier = <project_id>.<dataset_id>
+    Data details are extracted from the dataset_id that follows the format:
+    <dataset>_<format>_<compression>_<partitioning>_<location>
+    eg.
+    tpch10000_parquet_compressed_partitoned_gcs
+
+    Returns:
+      A dictionary set to underlying data's details (format, etc.)
+    """
+    data_details = {}
+    dataset_id = re.split(r'\.', self.cluster_identifier)[1]
+    parsed_id = re.split(r'_', dataset_id)
+    data_details['format'] = parsed_id[1]
+    data_details['compression'] = parsed_id[2]
+    data_details['partitioning'] = parsed_id[3]
+    data_details['location'] = parsed_id[4]
+    return data_details
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/flags.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/flags.py
new file mode 100644
index 0000000..da48ea5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/flags.py
@@ -0,0 +1,172 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing flags applicable across benchmark run on GCP."""
+
+from absl import flags
+
+# Sentinel value for unspecified platform.
+GCP_MIN_CPU_PLATFORM_NONE = 'none'
+
+flags.DEFINE_string('gcloud_path', 'gcloud', 'The path for the gcloud utility.')
+flags.DEFINE_list('additional_gcloud_flags', [],
+                  'Additional flags to pass to gcloud.')
+flags.DEFINE_integer(
+    'gce_num_local_ssds', 0,
+    'The number of ssds that should be added to the VM. Note '
+    'that this is currently only supported in certain zones '
+    '(see https://cloud.google.com/compute/docs/local-ssd).')
+flags.DEFINE_string(
+    'gcloud_scopes', None, 'If set, space-separated list of '
+    'scopes to apply to every created machine')
+flags.DEFINE_boolean('gce_migrate_on_maintenance', True, 'If true, allow VM '
+                     'migration on GCE host maintenance.')
+flags.DEFINE_boolean('gce_automatic_restart', False, 'If true, allow VM '
+                     'to restart when crashes.')
+flags.DEFINE_boolean('gce_preemptible_vms', False, 'If true, use preemptible '
+                     'VMs on GCE.')
+flags.DEFINE_string(
+    'image_family', None, 'The family of the image that the boot disk will be '
+    'initialized with. The --image flag will take priority over this flag. See:'
+    ' https://cloud.google.com/sdk/gcloud/reference/compute/instances/create')
+flags.DEFINE_string(
+    'image_project', None, 'The project against which all image references will'
+    ' be resolved. See: '
+    'https://cloud.google.com/sdk/gcloud/reference/compute/disks/create')
+flags.DEFINE_string(
+    'gce_network_name', None, 'The name of an already created '
+    'network to use instead of creating a new one.')
+flags.DEFINE_string(
+    'gce_subnet_name', None, 'The name of an already created '
+    'subnet to use instead of creating a new one.')
+flags.DEFINE_string(
+    'gce_subnet_region', None, 'Region to create subnet in '
+    'instead of automatically creating one in every region.')
+flags.DEFINE_string(
+    'gce_subnet_addr', '10.128.0.0/20', 'Address range to the '
+    'subnet, given in CDR notation. Not used unless '
+    '--gce_subnet_region is given.')
+flags.DEFINE_string(
+    'gce_remote_access_firewall_rule', None, 'The name of an '
+    'already created firewall rule which allows remote access '
+    'instead of creating a new one.')
+flags.DEFINE_multi_string(
+    'gcp_instance_metadata_from_file', [],
+    'A colon separated key-value pair that will be added to the '
+    '"--metadata-from-file" flag of the gcloud cli (with the colon replaced by '
+    'the equal sign). Multiple key-value pairs may be specified by separating '
+    'each pair by commas. This option can be repeated multiple times. For '
+    'information about GCP instance metadata, see: --metadata-from-file from '
+    '`gcloud help compute instances create`.')
+flags.DEFINE_multi_string(
+    'gcp_instance_metadata', [],
+    'A colon separated key-value pair that will be added to the '
+    '"--metadata" flag of the gcloud cli (with the colon replaced by the equal '
+    'sign). Multiple key-value pairs may be specified by separating each pair '
+    'by commas. This option can be repeated multiple times. For information '
+    'about GCP instance metadata, see: --metadata from '
+    '`gcloud help compute instances create`.')
+flags.DEFINE_integer('gce_boot_disk_size', None,
+                     'The boot disk size in GB for GCP VMs.')
+flags.DEFINE_enum('gce_boot_disk_type', None, ['pd-standard', 'pd-ssd', 'pd-extreme'],
+                  'The boot disk type for GCP VMs.')
+flags.DEFINE_enum('gce_ssd_interface', 'SCSI', ['SCSI', 'NVME'],
+                  'The ssd interface for GCE local SSD.')
+flags.DEFINE_enum('gce_nic_type', 'VIRTIO_NET', ['VIRTIO_NET', 'GVNIC'],
+                  'The virtual NIC type of GCE VMs.')
+EGRESS_BANDWIDTH_TIER = flags.DEFINE_enum(
+    'gce_egress_bandwidth_tier', None, ['TIER_1'],
+    'Egress bandwidth tier of the GCE VMs.')
+
+flags.DEFINE_string('gcp_node_type', None,
+                    'The node type of all sole tenant hosts that get created.')
+flags.DEFINE_enum(
+    'gcp_min_cpu_platform', None, [
+        GCP_MIN_CPU_PLATFORM_NONE, 'sandybridge', 'ivybridge', 'haswell',
+        'broadwell', 'skylake', 'cascadelake', 'icelake', 'rome', 'milan'
+    ], 'When specified, the VM will have either the specified '
+    'architecture or a newer one. Architecture availability is zone dependent.')
+flags.DEFINE_string(
+    'gce_accelerator_type_override', None,
+    'When specified, override the accelerator_type string passed to the gcloud '
+    'compute instance create command.')
+flags.DEFINE_string('gcp_preprovisioned_data_bucket', None,
+                    'GCS bucket where pre-provisioned data has been copied.')
+flags.DEFINE_integer('gcp_redis_gb', 5, 'Size of redis cluster in gb')
+flags.DEFINE_string('gcp_service_account', None, 'Service account to use for '
+                    'authorization.')
+flags.DEFINE_string(
+    'gcp_service_account_key_file', None,
+    'Local path to file that contains a private authorization '
+    'key, used to activate gcloud.')
+flags.DEFINE_list('gce_tags', None, 'List of --tags when creating a VM')
+flags.DEFINE_boolean('gke_enable_alpha', False,
+                     'Whether to enable alpha kubernetes clusters.')
+flags.DEFINE_string('gcp_dataproc_subnet', None,
+                    'Specifies the subnet that the cluster will be part of.')
+flags.DEFINE_multi_string('gcp_dataproc_property', [],
+                          'Specifies configuration properties for installed '
+                          'packages, such as Hadoop and Spark. Properties are '
+                          'mapped to configuration files by specifying a prefix'
+                          ', such as "core:io.serializations". '
+                          'See https://cloud.google.com/dataproc/docs/concepts/'
+                          'configuring-clusters/cluster-properties '
+                          'for details.')
+flags.DEFINE_string('gcp_dataproc_image', None,
+                    'Specifies the custom image URI or the custom image name '
+                    'that will be used to create a cluster.')
+flags.DEFINE_boolean('gcp_internal_ip', False,
+                     'Use internal ips for ssh or scp commands. gcloud beta'
+                     'components must be installed to use this flag.')
+flags.DEFINE_enum('gce_network_tier', 'premium', ['premium', 'standard'],
+                  'Network tier to use for all GCE VMs. Note that standard '
+                  'networking is only available in certain regions. See '
+                  'https://cloud.google.com/network-tiers/docs/overview')
+flags.DEFINE_boolean(
+    'gce_shielded_secure_boot', False,
+    'Whether the image uses the shielded VM feature')
+flags.DEFINE_boolean('gce_firewall_rules_clean_all', False,
+                     'Determines whether all the gce firewall rules should be '
+                     'cleaned up before deleting the network. If firewall '
+                     'rules are added manually, PKB will not know about all of '
+                     'them. However, they must be deleted in order to '
+                     'successfully delete the PKB-created network.')
+flags.DEFINE_enum('bq_client_interface', 'CLI',
+                  ['CLI', 'JAVA', 'SIMBA_JDBC_1_2_4_1007'],
+                  'The Runtime Interface used when interacting with BigQuery.')
+flags.DEFINE_string('gcp_preemptible_status_bucket', None,
+                    'The GCS bucket to store the preemptible status when '
+                    'running on GCP.')
+flags.DEFINE_boolean(
+    'gce_confidential_compute', False,
+    'Whether the image uses the confidential VM feature')
+flags.DEFINE_integer(
+    'gcp_provisioned_iops', 100000,
+    'Iops to provision for pd-extreme. Defaults to the gcloud '
+    'default of 100000.')
+API_OVERRIDE = flags.DEFINE_string(
+    'gcp_cloud_redis_api_override',
+    default='https://redis.googleapis.com/',
+    help='Cloud redis API endpoint override. Defaults to prod.')
+
+
+def _ValidatePreemptFlags(flags_dict):
+  if flags_dict['gce_preemptible_vms']:
+    return bool(flags_dict['gcp_preemptible_status_bucket'])
+  return True
+
+
+flags.register_multi_flags_validator(
+    ['gce_preemptible_vms', 'gcp_preemptible_status_bucket'],
+    _ValidatePreemptFlags, 'When gce_preemptible_vms is specified, '
+    'gcp_preemptible_status_bucket must be specified.')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_disk.py
new file mode 100644
index 0000000..47985e9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_disk.py
@@ -0,0 +1,192 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to GCE disks.
+
+Disks can be created, deleted, attached to VMs, and detached from VMs.
+Use 'gcloud compute disk-types list' to determine valid disk types.
+"""
+
+import json
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+
+PD_STANDARD = 'pd-standard'
+PD_SSD = 'pd-ssd'
+PD_BALANCED = 'pd-balanced'
+PD_EXTREME = 'pd-extreme'
+
+DISK_TYPE = {disk.STANDARD: PD_STANDARD, disk.REMOTE_SSD: PD_SSD}
+
+REGIONAL_DISK_SCOPE = 'regional'
+
+DISK_METADATA = {
+    PD_STANDARD: {
+        disk.MEDIA: disk.HDD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    PD_BALANCED: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    PD_SSD: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    PD_EXTREME: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.ZONE,
+    },
+    disk.LOCAL: {
+        disk.MEDIA: disk.SSD,
+        disk.REPLICATION: disk.NONE,
+    },
+}
+
+SCSI = 'SCSI'
+NVME = 'NVME'
+
+disk.RegisterDiskTypeMap(providers.GCP, DISK_TYPE)
+
+
+class GceDisk(disk.BaseDisk):
+  """Object representing an GCE Disk."""
+
+  def __init__(self,
+               disk_spec,
+               name,
+               zone,
+               project,
+               image=None,
+               image_project=None,
+               replica_zones=None):
+    super(GceDisk, self).__init__(disk_spec)
+    self.attached_vm_name = None
+    self.image = image
+    self.image_project = image_project
+    self.name = name
+    self.zone = zone
+    self.project = project
+    self.replica_zones = replica_zones
+    self.region = util.GetRegionFromZone(self.zone)
+    self.provisioned_iops = None
+    if self.disk_type == PD_EXTREME:
+      self.provisioned_iops = FLAGS.gcp_provisioned_iops
+
+    disk_metadata = DISK_METADATA[disk_spec.disk_type]
+    if self.replica_zones:
+      disk_metadata[disk.REPLICATION] = disk.REGION
+      self.metadata['replica_zones'] = replica_zones
+    self.metadata.update(DISK_METADATA[disk_spec.disk_type])
+    if self.disk_type == disk.LOCAL:
+      self.metadata['interface'] = FLAGS.gce_ssd_interface
+    if self.provisioned_iops and self.disk_type == PD_EXTREME:
+      self.metadata['provisioned_iops'] = self.provisioned_iops
+
+  def _Create(self):
+    """Creates the disk."""
+    cmd = util.GcloudCommand(self, 'compute', 'disks', 'create', self.name)
+    cmd.flags['size'] = self.disk_size
+    cmd.flags['type'] = self.disk_type
+    if self.provisioned_iops and self.disk_type == PD_EXTREME:
+      cmd.flags['provisioned-iops'] = self.provisioned_iops
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    if self.image:
+      cmd.flags['image'] = self.image
+    if self.image_project:
+      cmd.flags['image-project'] = self.image_project
+
+    if self.replica_zones:
+      cmd.flags['region'] = self.region
+      cmd.flags['replica-zones'] = ','.join(self.replica_zones)
+      del cmd.flags['zone']
+
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    util.CheckGcloudResponseKnownFailures(stderr, retcode)
+
+  def _Delete(self):
+    """Deletes the disk."""
+    cmd = util.GcloudCommand(self, 'compute', 'disks', 'delete', self.name)
+    if self.replica_zones:
+      cmd.flags['region'] = self.region
+      del cmd.flags['zone']
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the disk exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'disks', 'describe', self.name)
+
+    if self.replica_zones:
+      cmd.flags['region'] = self.region
+      del cmd.flags['zone']
+
+    stdout, _, _ = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    try:
+      json.loads(stdout)
+    except ValueError:
+      return False
+    return True
+
+  @vm_util.Retry()
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The GceVirtualMachine instance to which the disk will be attached.
+    """
+    self.attached_vm_name = vm.name
+    cmd = util.GcloudCommand(self, 'compute', 'instances', 'attach-disk',
+                             self.attached_vm_name)
+    cmd.flags['device-name'] = self.name
+    cmd.flags['disk'] = self.name
+
+    if self.replica_zones:
+      cmd.flags['disk-scope'] = REGIONAL_DISK_SCOPE
+
+    stdout, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    # Gcloud attach-disk commands may still attach disks despite being rate
+    # limited.
+    if retcode:
+      if (cmd.rate_limited and 'is already being used' in stderr and
+          FLAGS.retry_on_rate_limited):
+        return
+      debug_text = ('Ran: {%s}\nReturnCode:%s\nSTDOUT: %s\nSTDERR: %s' %
+                    (' '.join(cmd.GetCommand()), retcode, stdout, stderr))
+      raise errors.VmUtil.CalledProcessException(
+          'Command returned a non-zero exit code:\n{}'.format(debug_text))
+
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    cmd = util.GcloudCommand(self, 'compute', 'instances', 'detach-disk',
+                             self.attached_vm_name)
+    cmd.flags['device-name'] = self.name
+
+    if self.replica_zones:
+      cmd.flags['disk-scope'] = REGIONAL_DISK_SCOPE
+    cmd.IssueRetryable()
+    self.attached_vm_name = None
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside the VM."""
+    if self.disk_type == disk.LOCAL and FLAGS.gce_ssd_interface == NVME:
+      return '/dev/%s' % self.name
+    else:
+      # by default, gce_ssd_interface == SCSI and returns this name id
+      return '/dev/disk/by-id/google-%s' % self.name
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_network.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_network.py
new file mode 100644
index 0000000..d836386
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_network.py
@@ -0,0 +1,991 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to GCE VM networking.
+
+The Firewall class provides a way of opening VM ports. The Network class allows
+VMs to communicate via internal ips and isolates PerfKitBenchmarker VMs from
+others in the
+same project. See https://developers.google.com/compute/docs/networking for
+more information about GCE VM networking.
+"""
+
+
+import json
+import logging
+import threading
+from typing import Any, Dict, List, Optional, Set, Tuple, Union
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import network
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import vpn_service
+from perfkitbenchmarker.providers.gcp import gce_placement_group
+from perfkitbenchmarker.providers.gcp import util
+import six
+
+FLAGS = flags.FLAGS
+NETWORK_RANGE = '10.0.0.0/8'
+ALLOW_ALL = 'tcp:1-65535,udp:1-65535,icmp'
+
+
+class GceVpnGateway(network.BaseVpnGateway):
+  """Object representing a GCE VPN Gateway."""
+  CLOUD = providers.GCP
+
+  def __init__(self, name: str, network_name: str, region: str, cidr: str,
+               project: str):
+    super(GceVpnGateway, self).__init__()
+
+    self.forwarding_rules: Dict[str, GceForwardingRule] = {}
+    self.forwarding_rules_lock = threading.Lock()
+    self.tunnels: Dict[str, GceStaticTunnel] = {}
+    self.routes: Dict[str, GceRoute] = {}
+    self.ip_resource = None
+    self.vpn_gateway_resource = GceVpnGatewayResource(
+        name, network_name, region, cidr, project)
+    self.vpn_gateway_resource_lock = threading.Lock()
+
+    self.name = name
+    self.network_name = network_name
+    self.region = region
+    self.cidr = cidr
+    self.project = project
+
+    self.ip_address = None
+    self.ip_address_lock = threading.Lock()
+    self.created = False
+    self.require_target_to_init = False
+    self.routing = None
+    self.psk = None
+
+    # Add gateway to benchmark spec at init().
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetNetwork called in a thread without a '
+                         'BenchmarkSpec.')
+    key = self.name
+    with benchmark_spec.vpn_gateways_lock:
+      if key not in benchmark_spec.vpn_gateways:
+        benchmark_spec.vpn_gateways[key] = self
+
+  def ConfigureTunnel(self, tunnel_config: vpn_service.TunnelConfig):
+    """Updates tunnel config with new information.
+
+    Args:
+      tunnel_config: The tunnel configuration for this VPN.
+    """
+    logging.debug('Configuring Tunnel with params:')
+    logging.debug(tunnel_config)
+
+    # update tunnel_config if needed
+    if self.name not in tunnel_config.endpoints:
+      logging.debug('tunnel_config: This endpoint isnt registered yet... %s',
+                    self.name)
+      tunnel_config.endpoints[self.name] = {
+          'is_configured': False,
+          'cidr': self.cidr,
+          'project': self.project,
+          'network_name': self.network_name,
+          'region': self.region,
+          'require_target_to_init': self.require_target_to_init,
+      }
+
+    # attach public IP to this gateway if doesnt exist
+    # and update tunnel_config if needed
+    # requires project, region, name
+    with self.ip_address_lock:
+      if not self.ip_address:
+        if not self.ip_resource:
+          self.ip_resource = GceIPAddress(self.project, self.region, self.name)
+          self.ip_resource.Create()
+        self.ip_address = self.ip_resource.ip_address
+      if 'ip_address' not in tunnel_config.endpoints[self.name]:
+        logging.debug('tunnel_config: Configuring IP for %s', self.name)
+        tunnel_config.endpoints[self.name]['ip_address'] = self.ip_address
+
+    # configure forwarding
+    # requires: -
+    with self.forwarding_rules_lock:
+      if len(self.forwarding_rules) == 3:
+        logging.debug('tunnel_config: Forwarding already configured, skipping')
+      else:
+        logging.debug('tunnel_config: Setting up forwarding')
+        self._SetupForwarding(tunnel_config)
+
+    # Abort if we don't have a target info configured yet
+    if len(tunnel_config.endpoints) < 2:
+      logging.debug('tunnel_config: Only found %d endpoints... '
+                    'waiting for target to configure',
+                    len(tunnel_config.endpoints))
+      return
+
+    # Get target endpoint config key
+    target_endpoint = [k for k in tunnel_config.endpoints.keys()
+                       if k not in self.name][0]
+
+    # configure tunnel resources
+    # requires: target_ip_address, IKE version (default 1),
+    if 'ip_address' not in tunnel_config.endpoints[target_endpoint]:
+      logging.debug('tunnel_config: Target IP needed... '
+                    'waiting for target to configure')
+      return
+    if not hasattr(tunnel_config, 'psk'):
+      logging.debug('tunnel_config: PSK not provided... setting to runid')
+      tunnel_config.psk = 'key' + FLAGS.run_uri
+    self._SetupTunnel(tunnel_config)
+
+    # configure routing
+    # requires: next_hop_tunnel_id, target_cidr,
+    # TODO(dlott) Should be Optional[str], but that requires making endpoints a
+    # proper class rather than a dictionary of string and bool. See TunnelConfig
+    dest_cidr: Optional[Any] = tunnel_config.endpoints[target_endpoint].get(
+        'cidr')
+    if not dest_cidr or not dest_cidr.strip():
+      logging.debug('tunnel_config: destination CIDR needed... '
+                    'waiting for target to configure')
+      return
+    self._SetupRouting(
+        tunnel_config.suffix,
+        tunnel_config.endpoints[self.name]['tunnel_id'],
+        tunnel_config.endpoints[target_endpoint]['cidr'])
+
+    tunnel_config.endpoints[self.name]['is_configured'] = True
+
+  def IsTunnelReady(self, tunnel_id: str) -> bool:
+    """Returns True if the tunnel is up and ready for traffic.
+
+    Args:
+      tunnel_id: The id of the tunnel to check.
+
+    Returns:
+      boolean.
+
+    """
+    return self.tunnels[tunnel_id].IsReady()
+
+  def _SetupTunnel(self, tunnel_config: vpn_service.TunnelConfig):
+    """Register a new GCE VPN tunnel for this endpoint.
+
+    Args:
+      tunnel_config: VPN tunnel configuration.
+    """
+    target_endpoint = [k for k in tunnel_config.endpoints.keys()
+                       if k not in self.name][0]
+    project = tunnel_config.endpoints[self.name]['project']
+    region = tunnel_config.endpoints[self.name]['region']
+    vpn_gateway_id = self.name
+    target_ip = tunnel_config.endpoints[target_endpoint]['ip_address']
+    psk = tunnel_config.psk
+    ike_version = tunnel_config.ike_version
+    suffix = tunnel_config.suffix
+    name = 'tun-' + self.name + '-' + suffix
+    if name not in self.tunnels:
+      self.tunnels[name] = GceStaticTunnel(
+          project, region, name, vpn_gateway_id, target_ip, ike_version, psk)
+      self.tunnels[name].Create()
+      tunnel_config.endpoints[self.name]['tunnel_id'] = name
+
+  def _SetupForwarding(self, tunnel_config: vpn_service.TunnelConfig):
+    """Create IPSec forwarding rules.
+
+    Forwards ESP protocol, and UDP 500/4500 for tunnel setup.
+
+    Args:
+      tunnel_config: The tunnel configuration for this VPN.
+    """
+    if len(self.forwarding_rules) == 3:
+      return  # backout if already set
+    suffix = tunnel_config.suffix
+    # GCP doesnt like uppercase names?!?
+    fr_UDP500_name = ('fr-udp500-%s-%s' %
+                      (suffix, FLAGS.run_uri))
+    fr_UDP4500_name = ('fr-udp4500-%s-%s' %
+                       (suffix, FLAGS.run_uri))
+    fr_ESP_name = ('fr-esp-%s-%s' %
+                   (suffix, FLAGS.run_uri))
+
+    if fr_UDP500_name not in self.forwarding_rules:
+      fr_UDP500 = GceForwardingRule(
+          fr_UDP500_name, 'UDP', self, 500)
+      self.forwarding_rules[fr_UDP500_name] = fr_UDP500
+      fr_UDP500.Create()
+    if fr_UDP4500_name not in self.forwarding_rules:
+      fr_UDP4500 = GceForwardingRule(
+          fr_UDP4500_name, 'UDP', self, 4500)
+      self.forwarding_rules[fr_UDP4500_name] = fr_UDP4500
+      fr_UDP4500.Create()
+    if fr_ESP_name not in self.forwarding_rules:
+      fr_ESP = GceForwardingRule(
+          fr_ESP_name, 'ESP', self)
+      self.forwarding_rules[fr_ESP_name] = fr_ESP
+      fr_ESP.Create()
+
+  def _SetupRouting(self, suffix: str, next_hop_tun: str, dest_cidr: str):
+    """Create IPSec routing rules between the source and target gateways."""
+
+    route_name = 'route-' + self.name + '-' + suffix
+    if route_name not in self.routes:
+      self.routes[route_name] = GceRoute(
+          route_name, dest_cidr, self.network_name, next_hop_tun,
+          self.region, self.project)
+      self.routes[route_name].Create()
+
+  def Create(self):
+    """Creates the actual VpnGateway."""
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetNetwork called in a thread without a '
+                         'BenchmarkSpec.')
+    if self.created:
+      return
+    self.vpn_gateway_resource.Create()
+
+    self.created = True
+
+  def Delete(self):
+    """Deletes the actual VpnGateway."""
+    if self.ip_resource:
+      self.ip_resource.Delete()
+
+    if self.tunnels:
+      vm_util.RunThreaded(lambda tun: self.tunnels[tun].Delete(),
+                          list(self.tunnels.keys()))
+
+    if self.forwarding_rules:
+      vm_util.RunThreaded(lambda fr: self.forwarding_rules[fr].Delete(),
+                          list(self.forwarding_rules.keys()))
+
+    if self.routes:
+      vm_util.RunThreaded(lambda route: self.routes[route].Delete(),
+                          list(self.routes.keys()))
+
+    if self.vpn_gateway_resource:
+      self.vpn_gateway_resource.Delete()
+
+    self.created = False
+
+
+class GceVpnGatewayResource(resource.BaseResource):
+  """Object representing a GCE VPN Gateway Resource."""
+
+  def __init__(self, name: str, network_name: str, region: str, cidr: str,
+               project: str):
+    super(GceVpnGatewayResource, self).__init__()
+    self.name = name
+    self.network_name = network_name
+    self.region = region
+    self.cidr = cidr
+    self.project = project
+
+  def _Create(self):
+    cmd = util.GcloudCommand(self, 'compute', 'target-vpn-gateways', 'create',
+                             self.name)
+    cmd.flags['network'] = self.network_name
+    cmd.flags['region'] = self.region
+    cmd.Issue()
+
+  def _Exists(self):
+    cmd = util.GcloudCommand(self, 'compute', 'target-vpn-gateways', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _Delete(self):
+    cmd = util.GcloudCommand(self, 'compute', 'target-vpn-gateways', 'delete',
+                             self.name)
+    cmd.flags['region'] = self.region
+    cmd.Issue(raise_on_failure=False)
+
+
+class GceIPAddress(resource.BaseResource):
+  """Object representing a GCE IP address."""
+
+  def __init__(self, project: str, region: str, name: str):
+    super(GceIPAddress, self).__init__()
+    self.project = project
+    self.region = region
+    self.name = name
+    self.ip_address = None
+
+  def _Create(self):
+    """Allocates a public IP for the VPN gateway."""
+    cmd = util.GcloudCommand(self, 'compute', 'addresses', 'create', self.name)
+    cmd.flags['region'] = self.region
+    cmd.Issue()
+
+  def _PostCreate(self):
+    cmd = util.GcloudCommand(self, 'compute', 'addresses', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.region
+    cmd.flags['format'] = 'value(address)'
+    stdout, _, _ = cmd.Issue()
+    self.ip_address = stdout.rstrip()
+
+  def _Delete(self):
+    """Deletes a public IP for the VPN gateway."""
+    cmd = util.GcloudCommand(self, 'compute', 'addresses', 'delete', self.name)
+    cmd.flags['region'] = self.region
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the IP address exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'addresses', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+
+class GceStaticTunnel(resource.BaseResource):
+  """An object representing a GCE Tunnel."""
+
+  def __init__(self, project: str, region: str, name: str, vpn_gateway_id: str,
+               target_ip: str, ike_version: str, psk: str):
+    super(GceStaticTunnel, self).__init__()
+    self.project = project
+    self.region = region
+    self.name = name
+    self.vpn_gateway_id = vpn_gateway_id
+    self.target_ip = target_ip
+    self.ike_version = ike_version
+    self.psk = psk
+
+  def _Create(self):
+    """Creates the Tunnel."""
+    cmd = util.GcloudCommand(self, 'compute', 'vpn-tunnels', 'create',
+                             self.name)
+    cmd.flags['peer-address'] = self.target_ip
+    cmd.flags['target-vpn-gateway'] = self.vpn_gateway_id
+    cmd.flags['ike-version'] = self.ike_version
+    cmd.flags['local-traffic-selector'] = '0.0.0.0/0'
+    cmd.flags['remote-traffic-selector'] = '0.0.0.0/0'
+    cmd.flags['shared-secret'] = self.psk
+    cmd.flags['region'] = self.region
+    cmd.Issue()
+
+  def _Delete(self):
+    """Delete IPSec tunnel."""
+    cmd = util.GcloudCommand(self, 'compute', 'vpn-tunnels', 'delete',
+                             self.name)
+    cmd.flags['region'] = self.region
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the tunnel exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'vpn-tunnels', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def IsReady(self) -> bool:
+    cmd = util.GcloudCommand(self, 'compute', 'vpn-tunnels', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.region
+    response = cmd.Issue(suppress_warning=True)
+    return 'established' in str(response).lower()
+
+
+class GceRoute(resource.BaseResource):
+  """An object representing a GCE Route."""
+
+  def __init__(self, route_name: str, dest_cidr: str, network_name: str,
+               next_hop_tun: str, next_hop_region: str, project: str):
+    super(GceRoute, self).__init__()
+    self.name = route_name
+    self.dest_cidr = dest_cidr
+    self.next_hop_region = next_hop_region
+    self.next_hop_tun = next_hop_tun
+    self.network_name = network_name
+    self.project = project
+
+  def _Create(self):
+    """Creates the Route."""
+    cmd = util.GcloudCommand(self, 'compute', 'routes', 'create', self.name)
+    cmd.flags['destination-range'] = self.dest_cidr
+    cmd.flags['network'] = self.network_name
+    cmd.flags['next-hop-vpn-tunnel'] = self.next_hop_tun
+    cmd.flags['next-hop-vpn-tunnel-region'] = self.next_hop_region
+    cmd.Issue()
+
+  def _Delete(self):
+    """Delete route."""
+    cmd = util.GcloudCommand(self, 'compute', 'routes', 'delete', self.name)
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the Route exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'routes', 'describe',
+                             self.name)
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+
+class GceForwardingRule(resource.BaseResource):
+  """An object representing a GCE Forwarding Rule."""
+
+  def __init__(self,
+               name: str,
+               protocol: str,
+               src_vpn_gateway: GceVpnGateway,
+               port: Optional[int] = None):
+    super(GceForwardingRule, self).__init__()
+    self.name = name
+    self.protocol = protocol
+    self.port = port
+    self.target_name = src_vpn_gateway.name
+    self.target_ip = src_vpn_gateway.ip_address
+    self.src_region = src_vpn_gateway.region
+    self.project = src_vpn_gateway.project
+
+  def __eq__(self, other: 'GceForwardingRule') -> bool:
+    """Defines equality to make comparison easy."""
+    return (self.name == other.name and
+            self.protocol == other.protocol and
+            self.port == other.port and
+            self.target_name == other.target_name and
+            self.target_ip == other.target_ip and
+            self.src_region == other.src_region)
+
+  def _Create(self):
+    """Creates the Forwarding Rule."""
+    cmd = util.GcloudCommand(self, 'compute', 'forwarding-rules', 'create',
+                             self.name)
+    cmd.flags['ip-protocol'] = self.protocol
+    if self.port:
+      cmd.flags['ports'] = self.port
+    cmd.flags['address'] = self.target_ip
+    cmd.flags['target-vpn-gateway'] = self.target_name
+    cmd.flags['region'] = self.src_region
+    cmd.Issue()
+
+  def _Delete(self):
+    """Deletes the Forwarding Rule."""
+    cmd = util.GcloudCommand(self, 'compute', 'forwarding-rules', 'delete',
+                             self.name)
+    cmd.flags['region'] = self.src_region
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the Forwarding Rule exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'forwarding-rules', 'describe',
+                             self.name)
+    cmd.flags['region'] = self.src_region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+
+class GceFirewallRule(resource.BaseResource):
+  """An object representing a GCE Firewall Rule."""
+
+  def __init__(self,
+               name: str,
+               project: str,
+               allow: str,
+               network_name: str,
+               source_range: Optional[str] = None):
+    super(GceFirewallRule, self).__init__()
+    self.name = name
+    self.project = project
+    self.allow = allow
+    self.network_name = network_name
+    self.source_range = source_range
+
+  def __eq__(self, other: 'GceFirewallRule') -> bool:
+    """Defines equality to make comparison easy."""
+    return (self.name == other.name and
+            self.allow == other.allow and
+            self.project == other.project and
+            self.network_name == other.network_name and
+            self.source_range == other.source_range)
+
+  def _Create(self):
+    """Creates the Firewall Rule."""
+    cmd = util.GcloudCommand(self, 'compute', 'firewall-rules', 'create',
+                             self.name)
+    cmd.flags['allow'] = self.allow
+    cmd.flags['network'] = self.network_name
+    if self.source_range:
+      cmd.flags['source-ranges'] = self.source_range
+    # Gcloud create commands may still create firewalls despite being rate
+    # limited.
+    stdout, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode:
+      if cmd.rate_limited and 'already exists' in stderr:
+        return
+      debug_text = ('Ran: {%s}\nReturnCode:%s\nSTDOUT: %s\nSTDERR: %s' %
+                    (' '.join(cmd.GetCommand()), retcode, stdout, stderr))
+      raise errors.VmUtil.IssueCommandError(debug_text)
+
+  def _Delete(self):
+    """Deletes the Firewall Rule."""
+    cmd = util.GcloudCommand(self, 'compute', 'firewall-rules', 'delete',
+                             self.name)
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the Firewall Rule exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'firewall-rules', 'describe',
+                             self.name)
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+
+class GceFirewall(network.BaseFirewall):
+  """An object representing the GCE Firewall."""
+
+  CLOUD = providers.GCP
+
+  def __init__(self):
+    """Initialize GCE firewall class."""
+    self._lock = threading.Lock()
+    # TODO(user): make the key always have the same number of elements
+    self.firewall_rules: Dict[Tuple[Any, ...], GceFirewallRule] = {}
+    self.firewall_icmp_rules: Dict[Tuple[Any, ...], GceFirewallRule] = {}
+
+  def AllowPort(
+      self,
+      vm,  # gce_virtual_machine.GceVirtualMachine
+      start_port: int,
+      end_port: Optional[int] = None,
+      source_range: Optional[List[str]] = None):
+    """Opens a port on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the port for.
+      start_port: The first local port to open in a range.
+      end_port: The last local port to open in a range. If None, only start_port
+        will be opened.
+      source_range: List of source CIDRs to allow for this port. If none, all
+        sources are allowed.
+    """
+    if vm.is_static:
+      return
+    if source_range:
+      source_range = ','.join(source_range)
+    with self._lock:
+      if end_port is None:
+        end_port = start_port
+      if vm.cidr:  # Allow multiple networks per zone.
+        cidr_string = network.BaseNetwork.FormatCidrString(vm.cidr)
+        firewall_name = ('perfkit-firewall-%s-%s-%d-%d' %
+                         (cidr_string, FLAGS.run_uri, start_port, end_port))
+        key = (vm.project, vm.cidr, start_port, end_port, source_range)
+      else:
+        firewall_name = ('perfkit-firewall-%s-%d-%d' %
+                         (FLAGS.run_uri, start_port, end_port))
+        key = (vm.project, start_port, end_port, source_range)
+      if key in self.firewall_rules:
+        return
+      allow = ','.join('{0}:{1}-{2}'.format(protocol, start_port, end_port)
+                       for protocol in ('tcp', 'udp'))
+      firewall_rule = GceFirewallRule(
+          firewall_name, vm.project, allow,
+          vm.network.network_resource.name, source_range)
+      self.firewall_rules[key] = firewall_rule
+      firewall_rule.Create()
+
+  def DisallowAllPorts(self):
+    """Closes all ports on the firewall."""
+    for firewall_rule in six.itervalues(self.firewall_rules):
+      firewall_rule.Delete()
+    for firewall_rule in six.itervalues(self.firewall_icmp_rules):
+      firewall_rule.Delete()
+
+  def AllowIcmp(self, vm):
+    """Opens the ICMP protocol on the firewall.
+
+    Args:
+      vm: The BaseVirtualMachine object to open the ICMP protocol for.
+    """
+    if vm.is_static:
+      return
+    with self._lock:
+      if vm.cidr:  # Allow multiple networks per zone.
+        cidr_string = network.BaseNetwork.FormatCidrString(vm.cidr)
+        firewall_name = ('perfkit-firewall-icmp-%s-%s' %
+                         (cidr_string, FLAGS.run_uri))
+        key = (vm.project, vm.cidr)
+      else:
+        firewall_name = ('perfkit-firewall-icmp-%s' %
+                         (FLAGS.run_uri))
+        key = (vm.project)
+
+      if key in self.firewall_icmp_rules:
+        return
+
+      allow = 'ICMP'
+      firewall_rule = GceFirewallRule(
+          firewall_name, vm.project, allow,
+          vm.network.network_resource.name)
+      self.firewall_icmp_rules[key] = firewall_rule
+      firewall_rule.Create()
+
+
+class GceNetworkSpec(network.BaseNetworkSpec):
+  """Object representing a GCE Network specification."""
+
+  def __init__(self,
+               project: Optional[str] = None,
+               mtu: Optional[int] = None,
+               **kwargs):
+    """Initializes the GceNetworkSpec.
+
+    Args:
+      project: The project for which the Network should be created.
+      mtu: The MTU (max transmission unit) to use, if any.
+      **kwargs: Additional key word arguments passed to BaseNetworkSpec.
+    """
+    super(GceNetworkSpec, self).__init__(**kwargs)
+    self.project = project
+    self.mtu = mtu
+
+
+class GceNetworkResource(resource.BaseResource):
+  """Object representing a GCE Network resource."""
+
+  def __init__(self,
+               name: str,
+               mode: str,
+               project: str,
+               mtu: Optional[int] = None):
+    super(GceNetworkResource, self).__init__()
+    self.name = name
+    self.mode = mode
+    self.project = project
+    self.mtu = mtu
+
+  def _Create(self):
+    """Creates the Network resource."""
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'create', self.name)
+    cmd.flags['subnet-mode'] = self.mode
+    if self.mtu:
+      cmd.flags['mtu'] = self.mtu
+    cmd.Issue()
+
+  def _Delete(self):
+    """Deletes the Network resource."""
+    if FLAGS.gce_firewall_rules_clean_all:
+      for firewall_rule in self._GetAllFirewallRules():
+        firewall_rule.Delete()
+
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'delete', self.name)
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self) -> bool:
+    """Returns True if the Network resource exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'describe', self.name)
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _GetAllFirewallRules(self) -> List[GceFirewallRule]:
+    """Returns all the firewall rules that use the network."""
+    cmd = util.GcloudCommand(self, 'compute', 'firewall-rules', 'list')
+    cmd.flags['filter'] = 'network=%s' % self.name
+
+    stdout, _, _ = cmd.Issue(suppress_warning=True)
+    result = json.loads(stdout)
+    return [GceFirewallRule(entry['name'], self.project, ALLOW_ALL, self.name,
+                            NETWORK_RANGE) for entry in result]
+
+
+class GceSubnetResource(resource.BaseResource):
+  """Object representing a GCE subnet resource."""
+
+  def __init__(self, name: str, network_name: str, region: str, addr_range: str,
+               project: str):
+    super(GceSubnetResource, self).__init__()
+    self.name = name
+    self.network_name = network_name
+    self.region = region
+    self.addr_range = addr_range
+    self.project = project
+
+  def _Create(self):
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'subnets', 'create',
+                             self.name)
+    cmd.flags['network'] = self.network_name
+    cmd.flags['region'] = self.region
+    cmd.flags['range'] = self.addr_range
+    cmd.Issue()
+
+  def _Exists(self) -> bool:
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'subnets', 'describe',
+                             self.name)
+    if self.region:
+      cmd.flags['region'] = self.region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _Delete(self):
+    cmd = util.GcloudCommand(self, 'compute', 'networks', 'subnets', 'delete',
+                             self.name)
+    if self.region:
+      cmd.flags['region'] = self.region
+    cmd.Issue(raise_on_failure=False)
+
+
+class GceNetwork(network.BaseNetwork):
+  """Object representing a GCE Network."""
+
+  CLOUD = providers.GCP
+
+  def __init__(self, network_spec: GceNetworkSpec):
+    super(GceNetwork, self).__init__(network_spec)
+    self.project: Optional[str] = network_spec.project
+    self.vpn_gateway: Dict[str, GceVpnGateway] = {}
+
+    #  Figuring out the type of network here.
+    #  Precedence: User Managed > MULTI > SINGLE > DEFAULT
+    self.net_type = network.NetType.DEFAULT.value
+    self.cidr = NETWORK_RANGE
+    if FLAGS.gce_subnet_region:
+      self.net_type = network.NetType.SINGLE.value
+      self.cidr = FLAGS.gce_subnet_addr
+    if network_spec.cidr:
+      self.net_type = network.NetType.MULTI.value
+      self.cidr = network_spec.cidr
+    self.mtu = network_spec.mtu
+
+    name = self._MakeGceNetworkName()
+
+    subnet_region = (FLAGS.gce_subnet_region if not network_spec.cidr else
+                     util.GetRegionFromZone(network_spec.zone))
+    mode = 'auto' if subnet_region is None else 'custom'
+    self.network_resource = GceNetworkResource(name, mode, self.project,
+                                               self.mtu)
+    if subnet_region is None:
+      self.subnet_resource = None
+    else:
+      self.subnet_resource = GceSubnetResource(FLAGS.gce_subnet_name or name,
+                                               name, subnet_region,
+                                               self.cidr, self.project)
+
+    # Stage FW rules.
+    self.all_nets = self._GetNetworksFromSpec(
+        network_spec)  # Holds the different networks in this run.
+    # Holds FW rules for any external subnets.
+    self.external_nets_rules: Dict[str, GceFirewallRule] = {}
+
+    #  Set the default rule to allow all traffic within this network's subnet.
+    firewall_name = self._MakeGceFWRuleName()
+    self.default_firewall_rule = GceFirewallRule(
+        firewall_name, self.project, ALLOW_ALL, name, self.cidr)
+
+    # Set external rules to allow traffic from other subnets in this benchmark.
+    for ext_net in self.all_nets:
+      if ext_net == self.cidr:
+        continue  # We've already added our own network to the default rule.
+      rule_name = self._MakeGceFWRuleName(dst_cidr=ext_net)
+      self.external_nets_rules[rule_name] = GceFirewallRule(rule_name,
+                                                            self.project,
+                                                            ALLOW_ALL, name,
+                                                            ext_net)
+
+    # Add VpnGateways to the network.
+    if FLAGS.use_vpn:
+      for gatewaynum in range(0, FLAGS.vpn_service_gateway_count):
+        vpn_gateway_name = 'vpngw-%s-%s-%s' % (
+            util.GetRegionFromZone(network_spec.zone), gatewaynum,
+            FLAGS.run_uri)
+        self.vpn_gateway[vpn_gateway_name] = GceVpnGateway(
+            vpn_gateway_name, name, util.GetRegionFromZone(network_spec.zone),
+            network_spec.cidr, self.project)
+
+    # Add GCE Placement Group
+    no_placement_group = (
+        not FLAGS.placement_group_style or
+        FLAGS.placement_group_style == placement_group.PLACEMENT_GROUP_NONE)
+    if no_placement_group:
+      self.placement_group = None
+    else:
+      placement_group_spec = gce_placement_group.GcePlacementGroupSpec(
+          'GcePlacementGroupSpec',
+          flag_values=FLAGS,
+          zone=network_spec.zone,
+          project=self.project,
+          num_vms=self._GetNumberVms())
+      self.placement_group = gce_placement_group.GcePlacementGroup(
+          placement_group_spec)
+
+  def _GetNetworksFromSpec(self, network_spec: GceNetworkSpec) -> Set[str]:
+    """Returns a list of distinct CIDR networks for this benchmark.
+
+    All GCP networks that aren't set explicitly with a vm_group cidr property
+    are assigned to the default network. The default network is either
+    specified as a single region with the gce_subnet_region and
+    gce_subnet_addr flags, or assigned to an auto created /20 in each region
+    if no flags are passed.
+
+    Args:
+      network_spec: The network spec for the network.
+    Returns:
+      A set of CIDR strings used by this benchmark.
+    """
+    nets = set()
+    gce_default_subnet = (FLAGS.gce_subnet_addr if FLAGS.gce_subnet_region
+                          else NETWORK_RANGE)
+
+    if hasattr(network_spec, 'custom_subnets'):
+      for (_, v) in network_spec.custom_subnets.items():
+        if not v['cidr'] and v['cloud'] != 'GCP':
+          pass  # @TODO handle other providers defaults in net_util
+        elif not v['cidr']:
+          nets.add(gce_default_subnet)
+        else:
+          nets.add(v['cidr'])
+    return nets
+
+  def _MakeGceNetworkName(self,
+                          net_type: Optional[str] = None,
+                          cidr: Optional[str] = None,
+                          uri: Optional[str] = None) -> str:
+    """Build the current network's name string.
+
+    Uses current instance properties if none provided.
+    Must match regex: r'(?:[a-z](?:[-a-z0-9]{0,61}[a-z0-9])?)'
+
+    Args:
+      net_type: One of ['default', 'single', 'multi']
+      cidr: The CIDR range of this network.
+      uri: A network suffix (if different than FLAGS.run_uri)
+    Returns:
+      String The name of this network.
+    """
+    if FLAGS.gce_network_name:  # Return user managed network name if defined.
+      return FLAGS.gce_network_name
+
+    net_type = net_type or self.net_type
+    cidr = cidr or self.cidr
+    uri = uri or FLAGS.run_uri
+
+    name = 'pkb-network-%s' % uri  # Assume the default network naming.
+
+    if net_type in (network.NetType.SINGLE.value,
+                    network.NetType.MULTI.value):
+      name = 'pkb-network-%s-%s-%s' % (
+          net_type, self.FormatCidrString(cidr), uri)
+
+    return name
+
+  def _MakeGceFWRuleName(self,
+                         net_type: Optional[str] = None,
+                         src_cidr: Optional[str] = None,
+                         dst_cidr: Optional[str] = None,
+                         port_range_lo: Optional[str] = None,
+                         port_range_hi: Optional[str] = None,
+                         uri: Optional[str] = None) -> str:
+    """Build a firewall name string.
+
+    Firewall rule names must be unique within a project so we include source
+    and destination nets to disambiguate.
+
+    Args:
+      net_type: One of ['default', 'single', 'multi']
+      src_cidr: The CIDR range of this network.
+      dst_cidr: The CIDR range of the remote network.
+      port_range_lo: The low port to open
+      port_range_hi: The high port to open in range.
+      uri: A firewall suffix (if different than FLAGS.run_uri)
+    Returns:
+      The name of this firewall rule.
+    """
+    net_type = net_type or self.net_type
+    uri = uri or FLAGS.run_uri
+    src_cidr = src_cidr or self.cidr
+    dst_cidr = dst_cidr or self.cidr
+
+    prefix = None if src_cidr == dst_cidr else 'perfkit-firewall'
+    src_cidr = 'internal' if src_cidr == dst_cidr else self.FormatCidrString(
+        src_cidr)
+    dst_cidr = self.FormatCidrString(dst_cidr)
+    port_lo = port_range_lo
+    port_hi = None if port_range_lo == port_range_hi else port_range_hi
+
+    firewall_name = '-'.join(str(i) for i in (
+        prefix, net_type, src_cidr, dst_cidr,
+        port_lo, port_hi, uri) if i)
+
+    return firewall_name
+
+  @staticmethod
+  def _GetNetworkSpecFromVm(vm) -> GceNetworkSpec:
+    """Returns a BaseNetworkSpec created from VM attributes."""
+    return GceNetworkSpec(
+        project=vm.project, zone=vm.zone, cidr=vm.cidr, mtu=vm.mtu)
+
+  @classmethod
+  def _GetKeyFromNetworkSpec(
+      cls, spec) -> Union[Tuple[str, str], Tuple[str, str, str]]:
+    """Returns a key used to register Network instances."""
+    if spec.cidr:
+      return (cls.CLOUD, spec.project, spec.cidr)
+    return (cls.CLOUD, spec.project)
+
+  def _GetNumberVms(self) -> int:
+    """Counts the number of VMs to be used in this benchmark.
+
+    Cannot do a len(benchmark_spec.vms) as that hasn't been populated yet.  Go
+    through all the group_specs and sum up the vm_counts.
+
+    Returns:
+      Count of the number of VMs in the benchmark.
+    """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    return sum((group_spec.vm_count - len(group_spec.static_vms))
+               for group_spec in benchmark_spec.config.vm_groups.values())
+
+  def Create(self):
+    """Creates the actual network."""
+    if not FLAGS.gce_network_name:
+      self.network_resource.Create()
+      if self.subnet_resource:
+        self.subnet_resource.Create()
+      if self.default_firewall_rule:
+        self.default_firewall_rule.Create()
+      if self.external_nets_rules:
+        vm_util.RunThreaded(
+            lambda rule: self.external_nets_rules[rule].Create(),
+            list(self.external_nets_rules.keys()))
+      if getattr(self, 'vpn_gateway', False):
+        vm_util.RunThreaded(
+            lambda gateway: self.vpn_gateway[gateway].Create(),
+            list(self.vpn_gateway.keys()))
+    if self.placement_group:
+      self.placement_group.Create()
+
+  def Delete(self):
+    """Deletes the actual network."""
+    if self.placement_group:
+      self.placement_group.Delete()
+    if not FLAGS.gce_network_name:
+      if getattr(self, 'vpn_gateway', False):
+        vm_util.RunThreaded(
+            lambda gateway: self.vpn_gateway[gateway].Delete(),
+            list(self.vpn_gateway.keys()))
+      if self.default_firewall_rule.created:
+        self.default_firewall_rule.Delete()
+      if self.external_nets_rules:
+        vm_util.RunThreaded(
+            lambda rule: self.external_nets_rules[rule].Delete(),
+            list(self.external_nets_rules.keys()))
+      if self.subnet_resource:
+        self.subnet_resource.Delete()
+      self.network_resource.Delete()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_nfs_service.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_nfs_service.py
new file mode 100644
index 0000000..5c9c4d8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_nfs_service.py
@@ -0,0 +1,103 @@
+"""Resource for GCE NFS service."""
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import nfs_service
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import gce_network
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+
+
+class GceNfsService(nfs_service.BaseNfsService):
+  """Resource for GCE NFS service."""
+  CLOUD = providers.GCP
+  NFS_TIERS = (
+      'STANDARD',
+      'PREMIUM'
+  )
+  DEFAULT_NFS_VERSION = '3.0'
+  DEFAULT_TIER = 'STANDARD'
+  user_managed = False
+
+  def __init__(self, disk_spec, zone):
+    super(GceNfsService, self).__init__(disk_spec, zone)
+    self.name = 'nfs-%s' % FLAGS.run_uri
+    self.server_directory = '/vol0'
+
+  @property
+  def network(self):
+    spec = gce_network.GceNetworkSpec(project=FLAGS.project)
+    network = gce_network.GceNetwork.GetNetworkFromNetworkSpec(spec)
+    return network.network_resource.name
+
+  def GetRemoteAddress(self):
+    return self._Describe()['networks'][0]['ipAddresses'][0]
+
+  def _Create(self):
+    logging.info('Creating NFS server %s', self.name)
+    volume_arg = 'name={0},capacity={1}'.format(
+        self.server_directory.strip('/'), self.disk_spec.disk_size)
+    network_arg = 'name={0}'.format(self.network)
+    args = [
+        '--file-share', volume_arg, '--network', network_arg, '--labels',
+        util.MakeFormattedDefaultTags()
+    ]
+    if self.nfs_tier:
+      args += ['--tier', self.nfs_tier]
+    try:
+      self._NfsCommand('create', *args)
+    except errors.Error as ex:
+      # if this NFS service already exists reuse it
+      if self._Exists():
+        logging.info('Reusing existing NFS server %s', self.name)
+      else:
+        raise errors.Resource.RetryableCreationError(
+            'Error creating NFS service %s' % self.name, ex)
+
+  def _Delete(self):
+    logging.info('Deleting NFS server %s', self.name)
+    try:
+      self._NfsCommand('delete', '--async')
+    except errors.Error as ex:
+      if self._Exists():
+        raise errors.Resource.RetryableDeletionError(ex)
+      else:
+        logging.info('NFS server %s already deleted', self.name)
+
+  def _Exists(self):
+    try:
+      self._Describe()
+      return True
+    except errors.Error:
+      return False
+
+  def _IsReady(self):
+    try:
+      return self._Describe().get('state', None) == 'READY'
+    except errors.Error:
+      return False
+
+  def _Describe(self):
+    return self._NfsCommand('describe')
+
+  def _GetLocation(self):
+    return self.zone
+
+  def _NfsCommand(self, verb, *args):
+    cmd = [FLAGS.gcloud_path, 'alpha', '--quiet', '--format', 'json']
+    if FLAGS.project:
+      cmd += ['--project', FLAGS.project]
+    cmd += ['filestore', 'instances', verb, self.name]
+    cmd += [str(arg) for arg in args]
+    cmd += ['--location', self._GetLocation()]
+    stdout, stderr, retcode = vm_util.IssueCommand(
+        cmd, raise_on_failure=False, timeout=1800)
+    if retcode:
+      raise errors.Error('Error running command %s : %s' % (verb, stderr))
+    return json.loads(stdout)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_placement_group.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_placement_group.py
new file mode 100644
index 0000000..e87b290
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_placement_group.py
@@ -0,0 +1,156 @@
+# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent an GCP Placement Group object.
+
+GCP specific implementations of Placement Group.
+https://cloud.google.com/compute/docs/instances/define-instance-placement
+"""
+
+import json
+import logging
+
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.gcp import util as gcp_util
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_integer(
+    'gce_availability_domain_count',
+    1,
+    'Number of fault domains to create for SPREAD placement group',
+    lower_bound=1,
+    upper_bound=8)
+
+
+class GcePlacementGroupSpec(placement_group.BasePlacementGroupSpec):
+  """Object containing the information needed to create an GcePlacementGroup.
+
+  Attributes:
+      project: GCE project, used in creating the resource policy URL.
+      region: GCE region, used in creating the resource policy URL.
+      num_vms: Number of VMs to put into the resource group.
+  """
+
+  CLOUD = providers.GCP
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(GcePlacementGroupSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'project': (option_decoders.StringDecoder, {'none_ok': False}),
+        'num_vms': (option_decoders.IntDecoder, {'none_ok': False}),
+        'placement_group_style': (option_decoders.EnumDecoder, {
+            'valid_values': set([placement_group.PLACEMENT_GROUP_SUPERCLUSTER] +
+                                list(placement_group.PLACEMENT_GROUP_OPTIONS)),
+            'default': placement_group.PLACEMENT_GROUP_NONE,
+        })
+    })
+    return result
+
+
+class GcePlacementGroup(placement_group.BasePlacementGroup):
+  """Object representing an GCE Placement Group."""
+
+  CLOUD = providers.GCP
+
+  def __init__(self, gce_placement_group_spec):
+    """Init method for GcePlacementGroup.
+
+    Args:
+      gce_placement_group_spec: Object containing the
+        information needed to create an GcePlacementGroup.
+    """
+    super(GcePlacementGroup, self).__init__(gce_placement_group_spec)
+    self.project = gce_placement_group_spec.project
+    self.region = gcp_util.GetRegionFromZone(gce_placement_group_spec.zone)
+    self.zone = None
+    self.num_vms = gce_placement_group_spec.num_vms
+    self.name = 'perfkit-{}'.format(context.GetThreadBenchmarkSpec().uuid)
+    self.style = gce_placement_group_spec.placement_group_style
+    self.availability_domain_count = FLAGS.gce_availability_domain_count
+    self.metadata.update({
+        'placement_group_name': self.name,
+        'placement_group_style': self.style
+    })
+
+  def _Create(self):
+    """Creates the GCE placement group."""
+
+    cmd = gcp_util.GcloudCommand(self, 'compute', 'resource-policies',
+                                 'create', 'group-placement', self.name)
+
+    placement_policy = {
+        'format': 'json',
+        'region': self.region,
+    }
+
+    if self.style == placement_group.PLACEMENT_GROUP_CLUSTER:
+      placement_policy['collocation'] = 'COLLOCATED'
+      placement_policy['vm-count'] = self.num_vms
+
+    elif self.style == placement_group.PLACEMENT_GROUP_SUPERCLUSTER:
+      placement_policy['collocation'] = 'CLUSTERED'
+      placement_policy['vm-count'] = self.num_vms
+      # Only alpha API supported for CLUSTERED.
+      cmd = gcp_util.GcloudCommand(self, 'alpha', 'compute',
+                                   'resource-policies', 'create',
+                                   'group-placement', self.name)
+
+    else:
+      placement_policy[
+          'availability-domain-count'] = self.availability_domain_count
+
+    cmd.flags.update(placement_policy)
+
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+
+    if retcode and "Quota 'RESOURCE_POLICIES' exceeded" in stderr:
+      raise errors.Benchmarks.QuotaFailure(stderr)
+    elif retcode:
+      raise errors.Resource.CreationError(
+          'Failed to create placement group: %s return code: %s' %
+          (stderr, retcode))
+
+  def _Exists(self):
+    """See base class."""
+    cmd = gcp_util.GcloudCommand(self, 'compute', 'resource-policies',
+                                 'describe', self.name)
+    cmd.flags.update({'region': self.region, 'format': 'json'})
+    stdout, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode:
+      return False
+    status = json.loads(stdout)['status']
+    logging.info('Status of placement group %s: %s', self.name, status)
+    return True
+
+  def _Delete(self):
+    """See base class."""
+    logging.info('Deleting placement group %s', self.name)
+    cmd = gcp_util.GcloudCommand(self, 'compute', 'resource-policies',
+                                 'delete', self.name)
+    cmd.flags.update({'region': self.region, 'format': 'json'})
+    cmd.Issue(raise_on_failure=False)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py
new file mode 100644
index 0000000..c78563f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gce_virtual_machine.py
@@ -0,0 +1,1465 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Class to represent a GCE Virtual Machine object.
+
+Zones:
+run 'gcloud compute zones list'
+Machine Types:
+run 'gcloud compute machine-types list'
+Images:
+run 'gcloud compute images list'
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+
+import collections
+import copy
+import itertools
+import json
+import logging
+import posixpath
+import re
+import threading
+from typing import Dict, List, Optional, Tuple
+
+from absl import flags
+from perfkitbenchmarker import custom_virtual_machine_spec
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import linux_virtual_machine as linux_vm
+from perfkitbenchmarker import placement_group
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import windows_virtual_machine
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.gcp import flags as gcp_flags
+from perfkitbenchmarker.providers.gcp import gce_disk
+from perfkitbenchmarker.providers.gcp import gce_network
+from perfkitbenchmarker.providers.gcp import gcs
+from perfkitbenchmarker.providers.gcp import gcsfuse_disk
+from perfkitbenchmarker.providers.gcp import util
+import six
+from six.moves import range
+import yaml
+import ipaddress
+import math
+
+try:
+    unicode
+except NameError:
+    unicode = str
+
+FLAGS = flags.FLAGS
+
+NVME = 'NVME'
+SCSI = 'SCSI'
+GVNIC = 'GVNIC'
+_INSUFFICIENT_HOST_CAPACITY = ('does not have enough resources available '
+                               'to fulfill the request.')
+_FAILED_TO_START_DUE_TO_PREEMPTION = (
+    'Instance failed to start due to preemption.')
+_UNSUPPORTED_RESOURCE = 'Could not fetch resource'
+_GCE_VM_CREATE_TIMEOUT = 1200
+_GCE_NVIDIA_GPU_PREFIX = 'nvidia-tesla-'
+_SHUTDOWN_SCRIPT = 'su "{user}" -c "echo | gsutil cp - {preempt_marker}"'
+_WINDOWS_SHUTDOWN_SCRIPT_PS1 = 'Write-Host | gsutil cp - {preempt_marker}'
+_METADATA_PREEMPT_URI = 'http://metadata.google.internal/computeMetadata/v1/instance/preempted'
+_METADATA_PREEMPT_CMD = f'curl {_METADATA_PREEMPT_URI} -H "Metadata-Flavor: Google"'
+_METADATA_PREEMPT_CMD_WIN = (f'Invoke-RestMethod -Uri {_METADATA_PREEMPT_URI} '
+                             '-Headers @{"Metadata-Flavor"="Google"}')
+_NON_SEV_COMPATIBLE_IMAGE = (
+    'can only be set when using an SEV compatible image')
+
+_ARM_MACHINE_TYPES = [
+    't2a'
+]
+
+class GceUnexpectedWindowsAdapterOutputError(Exception):
+  """Raised when querying the status of a windows adapter failed."""
+
+
+class GceDriverDoesntSupportFeatureError(Exception):
+  """Raised if there is an attempt to set a feature not supported."""
+
+
+class GceVmSpec(virtual_machine.BaseVmSpec):
+  """Object containing the information needed to create a GceVirtualMachine.
+
+  Attributes:
+    cpus: None or int. Number of vCPUs for custom VMs.
+    memory: None or string. For custom VMs, a string representation of the size
+        of memory, expressed in MiB or GiB. Must be an integer number of MiB
+        (e.g. "1280MiB", "7.5GiB").
+    num_local_ssds: int. The number of local SSDs to attach to the instance.
+    preemptible: boolean. True if the VM should be preemptible, False otherwise.
+    project: string or None. The project to create the VM in.
+    image_family: string or None. The image family used to locate the image.
+    image_project: string or None. The image project used to locate the
+        specifed image.
+    boot_disk_size: None or int. The size of the boot disk in GB.
+    boot_disk_type: string or None. The type of the boot disk.
+  """
+
+  CLOUD = providers.GCP
+
+  def __init__(self, *args, **kwargs):
+    self.num_local_ssds: int = None
+    self.preemptible: bool = None
+    self.boot_disk_size: int = None
+    self.boot_disk_type: str = None
+    self.project: str = None
+    self.image_family: str = None
+    self.image_project: str = None
+    self.node_type: str = None
+    self.min_cpu_platform: str = None
+    self.threads_per_core: int = None
+    self.gce_tags: List[str] = None
+    self.min_node_cpus: int = None
+    super(GceVmSpec, self).__init__(*args, **kwargs)
+
+    if isinstance(self.machine_type,
+                  custom_virtual_machine_spec.CustomMachineTypeSpec):
+      logging.warning('Specifying a custom machine in the format of '
+                      '{cpus: [NUMBER_OF_CPUS], memory: [GB_OF_MEMORY]} '
+                      'creates a custom machine in the n1 machine family. '
+                      'To create custom machines in other machine families, '
+                      'use [MACHINE_FAMILY]-custom-[NUMBER_CPUS]-[NUMBER_MiB] '
+                      'nomaclature. e.g. n2-custom-2-4096.')
+      self.cpus = self.machine_type.cpus
+      self.memory = self.machine_type.memory
+      self.machine_type = None
+    else:
+      self.cpus = None
+      self.memory = None
+
+    # The A2 machine family, unlike other GCP offerings has a preset number of
+    # GPUs, so we set them directly from the machine_type
+    # https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu
+    if self.machine_type and self.machine_type.startswith('a2-'):
+      a2_lookup = {
+          'a2-highgpu-1g': 1,
+          'a2-highgpu-2g': 2,
+          'a2-highgpu-4g': 4,
+          'a2-highgpu-8g': 8,
+          'a2-megagpu-16g': 16
+      }
+      self.gpu_count = a2_lookup[self.machine_type]
+      self.gpu_type = virtual_machine.GPU_A100
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super(GceVmSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['gce_num_local_ssds'].present:
+      config_values['num_local_ssds'] = flag_values.gce_num_local_ssds
+    if flag_values['gce_preemptible_vms'].present:
+      config_values['preemptible'] = flag_values.gce_preemptible_vms
+    if flag_values['gce_boot_disk_size'].present:
+      config_values['boot_disk_size'] = flag_values.gce_boot_disk_size
+    if flag_values['gce_boot_disk_type'].present:
+      config_values['boot_disk_type'] = flag_values.gce_boot_disk_type
+    if flag_values['machine_type'].present:
+      config_values['machine_type'] = yaml.safe_load(flag_values.machine_type)
+    if flag_values['project'].present:
+      config_values['project'] = flag_values.project
+    if flag_values['image_family'].present:
+      config_values['image_family'] = flag_values.image_family
+    if flag_values['image_project'].present:
+      config_values['image_project'] = flag_values.image_project
+    if flag_values['gcp_node_type'].present:
+      config_values['node_type'] = flag_values.gcp_node_type
+    if flag_values['gcp_min_cpu_platform'].present:
+      if (flag_values.gcp_min_cpu_platform !=
+          gcp_flags.GCP_MIN_CPU_PLATFORM_NONE):
+        config_values['min_cpu_platform'] = flag_values.gcp_min_cpu_platform
+      else:
+        # Specifying gcp_min_cpu_platform explicitly removes any config.
+        config_values.pop('min_cpu_platform', None)
+    if flag_values['disable_smt'].present:
+      config_values['threads_per_core'] = 1
+    if flag_values['gce_tags'].present:
+      config_values['gce_tags'] = flag_values.gce_tags
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(GceVmSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'machine_type': (custom_virtual_machine_spec.MachineTypeDecoder, {
+            'default': None
+        }),
+        'num_local_ssds': (option_decoders.IntDecoder, {
+            'default': 0,
+            'min': 0
+        }),
+        'preemptible': (option_decoders.BooleanDecoder, {
+            'default': False
+        }),
+        'boot_disk_size': (option_decoders.IntDecoder, {
+            'default': None
+        }),
+        'boot_disk_type': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'project': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'image_family': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'image_project': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'node_type': (option_decoders.StringDecoder, {
+            'default': 'n1-node-96-624'
+        }),
+        'min_cpu_platform': (option_decoders.StringDecoder, {
+            'default': None
+        }),
+        'threads_per_core': (option_decoders.IntDecoder, {
+            'default': None
+        }),
+        'gce_tags': (option_decoders.ListDecoder, {
+            'item_decoder': option_decoders.StringDecoder(),
+            'default': None
+        }),
+    })
+    return result
+
+
+class GceSoleTenantNodeTemplate(resource.BaseResource):
+  """Object representing a GCE sole tenant node template.
+
+  Attributes:
+    name: string. The name of the node template.
+    node_type: string. The node type of the node template.
+    zone: string. The zone of the node template, converted to region.
+  """
+
+  def __init__(self, name, node_type, zone, project):
+    super(GceSoleTenantNodeTemplate, self).__init__()
+    self.name = name
+    self.node_type = node_type
+    self.region = util.GetRegionFromZone(zone)
+    self.project = project
+
+  def _Create(self):
+    """Creates the node template."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-templates', 'create', self.name)
+    cmd.flags['node-type'] = self.node_type
+    cmd.flags['region'] = self.region
+    cmd.Issue()
+
+  def _Exists(self):
+    """Returns True if the node template exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-templates', 'describe', self.name)
+    cmd.flags['region'] = self.region
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _Delete(self):
+    """Deletes the node template."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-templates', 'delete', self.name)
+    cmd.flags['region'] = self.region
+    cmd.Issue(raise_on_failure=False)
+
+
+class GceSoleTenantNodeGroup(resource.BaseResource):
+  """Object representing a GCE sole tenant node group.
+
+  Attributes:
+    name: string. The name of the node group.
+    node_template: string. The note template of the node group.
+    zone: string. The zone of the node group.
+  """
+
+  _counter_lock = threading.Lock()
+  _counter = itertools.count()
+
+  def __init__(self, node_type, zone, project):
+    super(GceSoleTenantNodeGroup, self).__init__()
+    with self._counter_lock:
+      self.instance_number = next(self._counter)
+    self.name = 'pkb-node-group-%s-%s' % (FLAGS.run_uri, self.instance_number)
+    self.node_type = node_type
+    self.node_template = None
+    self.zone = zone
+    self.project = project
+    self.fill_fraction = 0.0
+
+  def _Create(self):
+    """Creates the host."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-groups', 'create', self.name)
+    cmd.flags['node-template'] = self.node_template.name
+    cmd.flags['target-size'] = 1
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    util.CheckGcloudResponseKnownFailures(stderr, retcode)
+
+  def _CreateDependencies(self):
+    super(GceSoleTenantNodeGroup, self)._CreateDependencies()
+    node_template_name = self.name.replace('group', 'template')
+    node_template = GceSoleTenantNodeTemplate(
+        node_template_name, self.node_type, self.zone, self.project)
+    node_template.Create()
+    self.node_template = node_template
+
+  def _DeleteDependencies(self):
+    if self.node_template:
+      self.node_template.Delete()
+
+  def _Exists(self):
+    """Returns True if the host exists."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-groups', 'describe', self.name)
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return not retcode
+
+  def _Delete(self):
+    """Deletes the host."""
+    cmd = util.GcloudCommand(self, 'compute', 'sole-tenancy',
+                             'node-groups', 'delete', self.name)
+    cmd.Issue(raise_on_failure=False)
+
+
+def GenerateAcceleratorSpecString(accelerator_type, accelerator_count):
+  """Generates a string to be used to attach accelerators to a VM using gcloud.
+
+  This function takes a cloud-agnostic accelerator type (k80, p100, etc.) and
+  returns a gce-specific accelerator name (nvidia-tesla-k80, etc).
+
+  If FLAGS.gce_accelerator_type_override is specified, the value of said flag
+  will be used as the name of the accelerator.
+
+  Args:
+    accelerator_type: cloud-agnostic accelerator type (p100, k80, etc.)
+    accelerator_count: number of accelerators to attach to the VM
+
+  Returns:
+    String to be used by gcloud to attach accelerators to a VM.
+    Must be prepended by the flag '--accelerator'.
+  """
+  gce_accelerator_type = (FLAGS.gce_accelerator_type_override or
+                          _GCE_NVIDIA_GPU_PREFIX + accelerator_type)
+  return 'type={0},count={1}'.format(
+      gce_accelerator_type,
+      accelerator_count)
+
+
+class GceVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing a Google Compute Engine Virtual Machine."""
+
+  CLOUD = providers.GCP
+
+  # Subclasses should override the default image OR
+  # both the image family and image_project.
+  DEFAULT_IMAGE = None
+  DEFAULT_IMAGE_FAMILY = None
+  DEFAULT_IMAGE_PROJECT = None
+
+  # Subclasses may override these, but are recommended to leave them up to GCE.
+  BOOT_DISK_SIZE_GB = None
+  BOOT_DISK_TYPE = None
+
+  NVME_START_INDEX = 1
+
+  _host_lock = threading.Lock()
+  deleted_hosts = set()
+  host_map = collections.defaultdict(list)
+
+  def __init__(self, vm_spec):
+    """Initialize a GCE virtual machine.
+
+    Args:
+      vm_spec: virtual_machine.BaseVmSpec object of the vm.
+
+    Raises:
+      errors.Config.MissingOption: If the spec does not include a "machine_type"
+          or both "cpus" and "memory".
+      errors.Config.InvalidValue: If the spec contains both "machine_type" and
+          at least one of "cpus" or "memory".
+    """
+    super(GceVirtualMachine, self).__init__(vm_spec)
+    self.boot_metadata = {}
+    self.cpus = vm_spec.cpus
+    self.image = self.image or self.DEFAULT_IMAGE
+    self.max_local_disks = vm_spec.num_local_ssds
+    self.memory_mib = vm_spec.memory
+    self.preemptible = vm_spec.preemptible
+    self.spot_early_termination = False
+    self.preemptible_status_code = None
+    self.project = vm_spec.project or util.GetDefaultProject()
+    self.image_family = vm_spec.image_family or self.DEFAULT_IMAGE_FAMILY
+
+    if self.machine_type.split("-")[0] in _ARM_MACHINE_TYPES:
+      self.image_family = self.image_family + "-arm64"
+
+    self.image_project = vm_spec.image_project or self.DEFAULT_IMAGE_PROJECT
+    self.backfill_image = False
+    self.mtu: Optional[int] = FLAGS.mtu
+    self.network = self._GetNetwork()
+    self.firewall = gce_network.GceFirewall.GetFirewall()
+    self.boot_disk_size = vm_spec.boot_disk_size or self.BOOT_DISK_SIZE_GB
+    self.boot_disk_type = vm_spec.boot_disk_type or self.BOOT_DISK_TYPE
+    self.id = None
+    self.node_type = vm_spec.node_type
+    self.node_group = None
+    self.use_dedicated_host = vm_spec.use_dedicated_host
+    self.num_vms_per_host = vm_spec.num_vms_per_host
+    self.min_cpu_platform = vm_spec.min_cpu_platform
+    self.threads_per_core = vm_spec.threads_per_core
+    self.gce_remote_access_firewall_rule = FLAGS.gce_remote_access_firewall_rule
+    self.gce_accelerator_type_override = FLAGS.gce_accelerator_type_override
+    self.gce_tags = vm_spec.gce_tags
+    self.gce_network_tier = FLAGS.gce_network_tier
+    self.gce_nic_type = FLAGS.gce_nic_type
+    self.gce_egress_bandwidth_tier = gcp_flags.EGRESS_BANDWIDTH_TIER.value
+    self.gce_shielded_secure_boot = FLAGS.gce_shielded_secure_boot
+    self.gce_confidential_compute = FLAGS.gce_confidential_compute
+    # Default to GCE default (Live Migration)
+    self.on_host_maintenance = None
+    # https://cloud.google.com/compute/docs/instances/live-migration#gpusmaintenance
+    # https://cloud.google.com/compute/docs/instances/define-instance-placement#restrictions
+    # TODO(pclay): Update if this assertion ever changes
+    if (FLAGS['gce_migrate_on_maintenance'].present and
+        FLAGS.gce_migrate_on_maintenance and
+        (self.gpu_count or self.network.placement_group)):
+      raise errors.Config.InvalidValue(
+          'Cannot set flag gce_migrate_on_maintenance on instances with GPUs '
+          'or network placement groups, as it is not supported by GCP.')
+    if (FLAGS['gce_migrate_on_maintenance'].present and
+        FLAGS.gce_migrate_on_maintenance and self.gce_confidential_compute):
+      raise errors.Config.InvalidValue(
+          'Cannot set flag gce_migrate_on_maintenance on instances with confidential VM enabled, '
+          'as it is not supported by GCP.')
+    if (not FLAGS.gce_migrate_on_maintenance or
+        self.gpu_count or self.network.placement_group or
+        self.gce_confidential_compute):
+      self.on_host_maintenance = 'TERMINATE'
+    self.automatic_restart = FLAGS.gce_automatic_restart
+    if self.preemptible:
+      self.preempt_marker = f'gs://{FLAGS.gcp_preemptible_status_bucket}/{FLAGS.run_uri}/{self.name}'
+      #  Can not configure a preemptible instance to live migrate
+      self.on_host_maintenance = 'TERMINATE'
+
+  def _GetNetwork(self):
+    """Returns the GceNetwork to use."""
+    return gce_network.GceNetwork.GetNetwork(self)
+
+  @property
+  def host_list(self):
+    """Returns the list of hosts that are compatible with this VM."""
+    return self.host_map[(self.project, self.zone)]
+
+  def _GenerateCreateCommand(self, ssh_keys_path):
+    """Generates a command to create the VM instance.
+
+    Args:
+      ssh_keys_path: string. Path to a file containing the sshKeys metadata.
+
+    Returns:
+      GcloudCommand. gcloud command to issue in order to create the VM instance.
+    """
+    args = ['compute', 'instances', 'create', self.name]
+
+    cmd = util.GcloudCommand(self, *args)
+
+    # https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration
+    if self.gce_egress_bandwidth_tier:
+      network_performance_configs = f'total-egress-bandwidth-tier={self.gce_egress_bandwidth_tier}'
+      cmd.flags['network-performance-configs'] = network_performance_configs
+
+    # https://cloud.google.com/compute/docs/instances/setting-instance-scheduling-options
+    if self.on_host_maintenance:
+      maintenance_flag = 'maintenance-policy'
+      cmd.flags[maintenance_flag] = self.on_host_maintenance
+
+    # Bundle network-related arguments with --network-interface
+    # This flag is mutually exclusive with any of these flags:
+    # --address, --network, --network-tier, --subnet, --private-network-ip.
+    # gcloud compute instances create ... --network-interface=
+    ni_args = []
+    if self.network.subnet_resource is not None:
+      ni_args.append(f'subnet={self.network.subnet_resource.name}')
+    else:
+      ni_args.append(f'network={self.network.network_resource.name}')
+    ni_args.append(f'network-tier={self.gce_network_tier.upper()}')
+    if self.gce_confidential_compute:
+      self.gce_nic_type = GVNIC
+    ni_args.append(f'nic-type={self.gce_nic_type.upper()}')
+    cmd.flags['network-interface'] = ','.join(ni_args)
+
+    if self.image:
+      cmd.flags['image'] = self.image
+    elif self.image_family:
+      cmd.flags['image-family'] = self.image_family
+    if self.image_project is not None:
+      cmd.flags['image-project'] = self.image_project
+    cmd.flags['boot-disk-auto-delete'] = True
+    if self.boot_disk_size:
+      cmd.flags['boot-disk-size'] = self.boot_disk_size
+    if self.boot_disk_type:
+      cmd.flags['boot-disk-type'] = self.boot_disk_type
+    if self.machine_type is None:
+      cmd.flags['custom-cpu'] = self.cpus
+      cmd.flags['custom-memory'] = '{0}MiB'.format(self.memory_mib)
+    else:
+      cmd.flags['machine-type'] = self.machine_type
+
+    if self.min_cpu_platform:
+      cmd.flags['min-cpu-platform'] = self.min_cpu_platform
+
+    if self.threads_per_core:
+      cmd.flags['threads-per-core'] = self.threads_per_core
+
+    if self.gpu_count and self.machine_type and 'a2-' not in self.machine_type:
+      # A2 machine type already has predefined GPU type and count.
+      cmd.flags['accelerator'] = GenerateAcceleratorSpecString(self.gpu_type,
+                                                               self.gpu_count)
+    cmd.flags['tags'] = ','.join(['perfkitbenchmarker'] + (self.gce_tags or []))
+    if not self.automatic_restart:
+      cmd.flags['no-restart-on-failure'] = True
+    self.metadata['automatic_restart'] = self.automatic_restart
+    if self.node_group:
+      cmd.flags['node-group'] = self.node_group.name
+    if self.gce_shielded_secure_boot:
+      cmd.flags['shielded-secure-boot'] = True
+    if self.gce_confidential_compute:
+      cmd.flags['confidential-compute'] = True
+
+    if self.network.placement_group:
+      self.metadata.update(self.network.placement_group.GetResourceMetadata())
+      cmd.flags['resource-policies'] = self.network.placement_group.name
+    else:
+      self.metadata[
+          'placement_group_style'] = placement_group.PLACEMENT_GROUP_NONE
+
+    metadata_from_file = {'sshKeys': ssh_keys_path}
+    parsed_metadata_from_file = flag_util.ParseKeyValuePairs(
+        FLAGS.gcp_instance_metadata_from_file)
+    for key, value in six.iteritems(parsed_metadata_from_file):
+      if key in metadata_from_file:
+        logging.warning('Metadata "%s" is set internally. Cannot be overridden '
+                        'from command line.', key)
+        continue
+      metadata_from_file[key] = value
+    cmd.flags['metadata-from-file'] = ','.join([
+        '%s=%s' % (k, v) for k, v in six.iteritems(metadata_from_file)
+    ])
+
+    metadata = {}
+    metadata.update(self.boot_metadata)
+    metadata.update(util.GetDefaultTags())
+
+    additional_metadata = {}
+    additional_metadata.update(self.vm_metadata)
+    additional_metadata.update(
+        flag_util.ParseKeyValuePairs(FLAGS.gcp_instance_metadata))
+
+    for key, value in six.iteritems(additional_metadata):
+      if key in metadata:
+        logging.warning('Metadata "%s" is set internally. Cannot be overridden '
+                        'from command line.', key)
+        continue
+      metadata[key] = value
+
+    if self.preemptible:
+      cmd.flags['preemptible'] = True
+      metadata.update([self._PreemptibleMetadataKeyValue()])
+
+    cmd.flags['metadata'] = util.FormatTags(metadata)
+    cmd.flags['local-ssd'] = (['interface={0}'.format(
+        FLAGS.gce_ssd_interface)] * self.max_local_disks)
+    if FLAGS.gcloud_scopes:
+      cmd.flags['scopes'] = ','.join(re.split(r'[,; ]', FLAGS.gcloud_scopes))
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    if self.gce_confidential_compute:
+      if self.machine_type and 'n2d-' not in self.machine_type:
+        raise errors.Config.InvalidValue(
+            'Cannot set flag gce_confidential_compute on instances with machine family '
+            'other than n2d.')
+      if self.gce_nic_type.upper() != GVNIC:
+        raise errors.Config.InvalidValue(
+            'Cannot set flag gce_confidential_compute on instances with virtual NIC '
+            'type other than {}.'.format(GVNIC))
+      if self.max_local_disks > 0 and FLAGS.gce_ssd_interface.upper() != NVME:
+        raise errors.Config.InvalidValue(
+            'Cannot set flag gce_confidential_compute on instances with ssd interface '
+            'type other than {}.'.format(NVME))
+
+    # gce_egress_bandwidth_tier flag could only be set when nic type is GVNIC
+    # and only work with VMs in the N2, N2D, C2, or C2D series
+    if self.gce_egress_bandwidth_tier:
+        if self.machine_type and not self._HighBandwidthVMType(self.machine_type):
+          raise errors.Config.InvalidValue(
+              'Cannot set flag gce_egress_bandwidth_tier on instances with machine family '
+              'other than n2, n2d, c2, c2d.')
+        if self.gce_nic_type.upper() != GVNIC:
+          raise errors.Config.InvalidValue(
+              'Cannot set flag gce_egress_bandwidth_tier on instances with virtual NIC '
+              'type other than {}.'.format(GVNIC))
+    return cmd
+
+  def _HighBandwidthVMType(self, machine_type):
+    if 'n2' in machine_type or 'c2' in machine_type:
+      return True
+    return False
+
+  def _AddShutdownScript(self):
+    cmd = util.GcloudCommand(
+        self, 'compute', 'instances', 'add-metadata', self.name)
+    key, value = self._PreemptibleMetadataKeyValue()
+    cmd.flags['metadata'] = f'{key}={value}'
+    cmd.Issue()
+
+  def _RemoveShutdownScript(self):
+    # Removes shutdown script which copies status when it is interrupted
+    cmd = util.GcloudCommand(
+        self, 'compute', 'instances', 'remove-metadata', self.name)
+    key, _ = self._PreemptibleMetadataKeyValue()
+    cmd.flags['keys'] = key
+    cmd.Issue(raise_on_failure=False)
+
+  def Reboot(self):
+    if self.preemptible:
+      self._RemoveShutdownScript()
+    super().Reboot()
+    if self.preemptible:
+      self._AddShutdownScript()
+
+  def _Start(self):
+    """Starts the VM."""
+    start_cmd = util.GcloudCommand(self, 'compute', 'instances', 'start',
+                                   self.name)
+    # After start, IP address is changed
+    stdout, _, _ = start_cmd.Issue()
+    response = json.loads(stdout)
+    # Response is a list of size one
+    self._ParseDescribeResponse(response[0])
+
+  def _Stop(self):
+    """Stops the VM."""
+    stop_cmd = util.GcloudCommand(self, 'compute', 'instances', 'stop',
+                                  self.name)
+    stop_cmd.Issue()
+
+  def _PreDelete(self):
+    super()._PreDelete()
+    if self.preemptible:
+      self._RemoveShutdownScript()
+
+  def _Create(self):
+    """Create a GCE VM instance."""
+    num_hosts = len(self.host_list)
+    with open(self.ssh_public_key) as f:
+      public_key = f.read().rstrip('\n')
+    with vm_util.NamedTemporaryFile(mode='w', dir=vm_util.GetTempDir(),
+                                    prefix='key-metadata') as tf:
+      tf.write('%s:%s\n' % (self.user_name, public_key))
+      tf.close()
+      create_cmd = self._GenerateCreateCommand(tf.name)
+      _, stderr, retcode = create_cmd.Issue(timeout=_GCE_VM_CREATE_TIMEOUT,
+                                            raise_on_failure=False)
+
+    if (self.use_dedicated_host and retcode and
+        _INSUFFICIENT_HOST_CAPACITY in stderr):
+      if self.num_vms_per_host:
+        raise errors.Resource.CreationError(
+            'Failed to create host: %d vms of type %s per host exceeds '
+            'memory capacity limits of the host' %
+            (self.num_vms_per_host, self.machine_type))
+      else:
+        logging.warning(
+            'Creation failed due to insufficient host capacity. A new host will '
+            'be created and instance creation will be retried.')
+        with self._host_lock:
+          if num_hosts == len(self.host_list):
+            host = GceSoleTenantNodeGroup(self.node_type,
+                                          self.zone, self.project)
+            self.host_list.append(host)
+            host.Create()
+          self.node_group = self.host_list[-1]
+        raise errors.Resource.RetryableCreationError()
+    if (not self.use_dedicated_host and retcode and
+        _INSUFFICIENT_HOST_CAPACITY in stderr):
+      logging.error(util.STOCKOUT_MESSAGE)
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+          util.STOCKOUT_MESSAGE)
+    util.CheckGcloudResponseKnownFailures(stderr, retcode)
+    if retcode:
+      if (create_cmd.rate_limited and 'already exists' in stderr and
+          FLAGS.retry_on_rate_limited):
+        # Gcloud create commands may still create VMs despite being rate
+        # limited.
+        return
+      if util.RATE_LIMITED_MESSAGE in stderr:
+        raise errors.Benchmarks.QuotaFailure.RateLimitExceededError(stderr)
+      if self.preemptible and _FAILED_TO_START_DUE_TO_PREEMPTION in stderr:
+        self.spot_early_termination = True
+        raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+            'Interrupted before VM started')
+      if self.gce_confidential_compute and _NON_SEV_COMPATIBLE_IMAGE in stderr:
+        raise errors.Resource.CreationError('Failed to create VM: Selected image '
+                                            'is not SEV compatible when using '
+                                            'gce_confidential_compute flag')
+      if _UNSUPPORTED_RESOURCE in stderr:
+        raise errors.Benchmarks.UnsupportedConfigError(stderr)
+      raise errors.Resource.CreationError(
+          'Failed to create VM: %s return code: %s' % (stderr, retcode))
+
+  def _CreateDependencies(self):
+    super(GceVirtualMachine, self)._CreateDependencies()
+    # Create necessary VM access rules *prior* to creating the VM, such that it
+    # doesn't affect boot time.
+    self.AllowRemoteAccessPorts()
+
+    if self.use_dedicated_host:
+      with self._host_lock:
+        if (not self.host_list or (self.num_vms_per_host and
+                                   self.host_list[-1].fill_fraction +
+                                   1.0 / self.num_vms_per_host > 1.0)):
+          host = GceSoleTenantNodeGroup(self.node_type,
+                                        self.zone, self.project)
+          self.host_list.append(host)
+          host.Create()
+        self.node_group = self.host_list[-1]
+        if self.num_vms_per_host:
+          self.node_group.fill_fraction += 1.0 / self.num_vms_per_host
+
+  def _DeleteDependencies(self):
+    if self.node_group:
+      with self._host_lock:
+        if self.node_group in self.host_list:
+          self.host_list.remove(self.node_group)
+        if self.node_group not in self.deleted_hosts:
+          self.node_group.Delete()
+          self.deleted_hosts.add(self.node_group)
+
+  def _ParseDescribeResponse(self, describe_response):
+    """Sets the ID and IP addresses from a response to the describe command.
+
+    Args:
+      describe_response: JSON-loaded response to the describe gcloud command.
+
+    Raises:
+      KeyError, IndexError: If the ID and IP addresses cannot be parsed.
+    """
+    self.id = describe_response['id']
+    network_interface = describe_response['networkInterfaces'][0]
+    self.internal_ip = network_interface['networkIP']
+    self.ip_address = network_interface['accessConfigs'][0]['natIP']
+
+  @property
+  def HasIpAddress(self):
+    """Returns True when the IP has been retrieved from a describe response."""
+    return not self._NeedsToParseDescribeResponse()
+
+  def _NeedsToParseDescribeResponse(self):
+    """Returns whether the ID and IP addresses still need to be set."""
+    return not self.id or not self.internal_ip or not self.ip_address
+
+  @vm_util.Retry()
+  def _PostCreate(self):
+    """Get the instance's data."""
+    if self._NeedsToParseDescribeResponse():
+      getinstance_cmd = util.GcloudCommand(self, 'compute', 'instances',
+                                           'describe', self.name)
+      stdout, _, _ = getinstance_cmd.Issue()
+      response = json.loads(stdout)
+      self._ParseDescribeResponse(response)
+    if not all((self.image, self.boot_disk_size, self.boot_disk_type)):
+      getdisk_cmd = util.GcloudCommand(
+          self, 'compute', 'disks', 'describe', self.name)
+      stdout, _, _ = getdisk_cmd.Issue()
+      response = json.loads(stdout)
+      if not self.image:
+        self.image = response['sourceImage'].split('/')[-1]
+        self.backfill_image = True
+      if not self.boot_disk_size:
+        self.boot_disk_size = response['sizeGb']
+      if not self.boot_disk_type:
+        self.boot_disk_type = response['type'].split('/')[-1]
+
+  def _Delete(self):
+    """Delete a GCE VM instance."""
+    delete_cmd = util.GcloudCommand(self, 'compute', 'instances', 'delete',
+                                    self.name)
+    delete_cmd.Issue(raise_on_failure=False)
+
+  def _Suspend(self):
+    """Suspend a GCE VM instance."""
+    util.GcloudCommand(self, 'beta', 'compute', 'instances', 'suspend',
+                       self.name).Issue()
+
+  def _Resume(self):
+    """Resume a GCE VM instance."""
+    resume_cmd = util.GcloudCommand(self, 'beta', 'compute', 'instances',
+                                    'resume', self.name)
+
+    # After resume, IP address is refreshed
+    stdout, _, _ = resume_cmd.Issue()
+    response = json.loads(stdout)
+    # Response is a list of size one
+    self._ParseDescribeResponse(response[0])
+
+  def _Exists(self):
+    """Returns true if the VM exists."""
+    getinstance_cmd = util.GcloudCommand(self, 'compute', 'instances',
+                                         'describe', self.name)
+    stdout, _, _ = getinstance_cmd.Issue(suppress_warning=True,
+                                         raise_on_failure=False)
+    try:
+      response = json.loads(stdout)
+    except ValueError:
+      return False
+    try:
+      # The VM may exist before we can fully parse the describe response for the
+      # IP address or ID of the VM. For example, if the VM has a status of
+      # provisioning, we can't yet parse the IP address. If this is the case, we
+      # will continue to invoke the describe command in _PostCreate above.
+      # However, if we do have this information now, it's better to stash it and
+      # avoid invoking the describe command again.
+      self._ParseDescribeResponse(response)
+    except (KeyError, IndexError):
+      pass
+    return True
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+    """
+    disks = []
+    replica_zones = FLAGS.data_disk_zones
+
+    for i in range(disk_spec.num_striped_disks):
+      if disk_spec.disk_type == disk.LOCAL:
+        name = ''
+        if FLAGS.gce_ssd_interface == SCSI:
+          name = 'local-ssd-%d' % self.local_disk_counter
+          disk_number = self.local_disk_counter + 1
+        elif FLAGS.gce_ssd_interface == NVME:
+          # Device can either be /dev/nvme0n1 or /dev/nvme1n1. Find out which.
+          name, _ = self.RemoteCommand('find /dev/nvme*n%d' %
+                                       (self.local_disk_counter + 1))
+          name = name.strip().split('/')[-1]
+          disk_number = self.local_disk_counter + self.NVME_START_INDEX
+        else:
+          raise errors.Error('Unknown Local SSD Interface.')
+        data_disk = gce_disk.GceDisk(disk_spec, name, self.zone, self.project,
+                                     replica_zones=replica_zones)
+        data_disk.disk_number = disk_number
+        self.local_disk_counter += 1
+        if self.local_disk_counter > self.max_local_disks:
+          raise errors.Error('Not enough local disks.')
+      elif disk_spec.disk_type == disk.NFS:
+        data_disk = self._GetNfsService().CreateNfsDisk()
+      elif disk_spec.disk_type == disk.OBJECT_STORAGE:
+        data_disk = gcsfuse_disk.GcsFuseDisk(disk_spec)
+      else:
+        name = '%s-data-%d-%d' % (self.name, len(self.scratch_disks), i)
+        data_disk = gce_disk.GceDisk(disk_spec, name, self.zone, self.project,
+                                     replica_zones=replica_zones)
+        # Remote disk numbers start at 1+max_local_disks (0 is the system disk
+        # and local disks occupy 1-max_local_disks).
+        data_disk.disk_number = (self.remote_disk_counter +
+                                 1 + self.max_local_disks)
+        self.remote_disk_counter += 1
+      disks.append(data_disk)
+
+    self._CreateScratchDiskFromDisks(disk_spec, disks)
+
+  def AddMetadata(self, **kwargs):
+    """Adds metadata to disk."""
+    # vm metadata added to vm on creation.
+    cmd = util.GcloudCommand(
+        self, 'compute', 'disks', 'add-labels', self.name)
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    cmd.Issue()
+
+  def AllowRemoteAccessPorts(self):
+    """Creates firewall rules for remote access if required."""
+
+    # If gce_remote_access_firewall_rule is specified, access is already
+    # granted by that rule.
+    # If not, GCE firewall rules are created for all instances in a
+    # network.
+    if not self.gce_remote_access_firewall_rule:
+      super(GceVirtualMachine, self).AllowRemoteAccessPorts()
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the VM.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = super(GceVirtualMachine, self).GetResourceMetadata()
+    for attr_name in 'cpus', 'memory_mib', 'preemptible', 'project':
+      attr_value = getattr(self, attr_name)
+      if attr_value:
+        result[attr_name] = attr_value
+    # Only record image_family flag when it is used in vm creation command.
+    # Note, when using non-debian/ubuntu based custom images, user will need
+    # to use --os_type flag. In that case, we do not want to
+    # record image_family in metadata.
+    if self.backfill_image and self.image_family:
+      result['image_family'] = self.image_family
+    if self.image_project:
+      result['image_project'] = self.image_project
+    if self.use_dedicated_host:
+      result['node_type'] = self.node_type
+      result['num_vms_per_host'] = self.num_vms_per_host
+    if self.gpu_count:
+      result['gpu_type'] = self.gpu_type
+      result['gpu_count'] = self.gpu_count
+    if self.gce_accelerator_type_override:
+      result['accelerator_type_override'] = self.gce_accelerator_type_override
+    if self.gce_tags:
+      result['gce_tags'] = ','.join(self.gce_tags)
+    if self.max_local_disks:
+      result['gce_local_ssd_count'] = self.max_local_disks
+      result['gce_local_ssd_interface'] = FLAGS.gce_ssd_interface
+    result['gce_network_tier'] = self.gce_network_tier
+    result['gce_nic_type'] = self.gce_nic_type
+    if self.gce_egress_bandwidth_tier:
+      result['gce_egress_bandwidth_tier'] = self.gce_egress_bandwidth_tier
+    result['gce_shielded_secure_boot'] = self.gce_shielded_secure_boot
+    result['gce_confidential_compute'] = self.gce_confidential_compute
+    result['boot_disk_type'] = self.boot_disk_type
+    result['boot_disk_size'] = self.boot_disk_size
+    if self.threads_per_core:
+      result['threads_per_core'] = self.threads_per_core
+    if self.network.mtu:
+      result['mtu'] = self.network.mtu
+    return result
+
+  def SimulateMaintenanceEvent(self):
+    """Simulates a maintenance event on the VM."""
+    cmd = util.GcloudCommand(self, 'compute', 'instances',
+                             'simulate-maintenance-event', self.name, '--async')
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode:
+      raise errors.VirtualMachine.VirtualMachineError(
+          'Unable to simulate maintenance event.')
+
+  def DownloadPreprovisionedData(self, install_path, module_name, filename):
+    """Downloads a data file from a GCS bucket with pre-provisioned data.
+
+    Use --gce_preprovisioned_data_bucket to specify the name of the bucket.
+
+    Args:
+      install_path: The install path on this VM.
+      module_name: Name of the module associated with this data file.
+      filename: The name of the file that was downloaded.
+    """
+    # TODO(deitz): Add retry logic.
+    self.RemoteCommand(GenerateDownloadPreprovisionedDataCommand(
+        install_path, module_name, filename))
+
+  def InstallCli(self):
+    """Installs the gcloud cli on this GCP vm."""
+    self.Install('google_cloud_sdk')
+
+  def ShouldDownloadPreprovisionedData(self, module_name, filename):
+    """Returns whether or not preprovisioned data is available."""
+    return FLAGS.gcp_preprovisioned_data_bucket and self.TryRemoteCommand(
+        GenerateStatPreprovisionedDataCommand(module_name, filename))
+
+  def _UpdateInterruptibleVmStatusThroughMetadataService(self):
+    _, _, retcode = vm_util.IssueCommand(
+        [FLAGS.gsutil_path, 'stat', self.preempt_marker],
+        raise_on_failure=False, suppress_warning=True)
+    # The VM is preempted if the command exits without an error
+    self.spot_early_termination = not bool(retcode)
+    if self.WasInterrupted():
+      return
+    stdout, _ = self.RemoteCommand(self._MetadataPreemptCmd)
+    self.spot_early_termination = stdout.strip().lower() == 'true'
+
+  @property
+  def _MetadataPreemptCmd(self):
+    return _METADATA_PREEMPT_CMD
+
+  def _PreemptibleMetadataKeyValue(self) -> Tuple[str, str]:
+    """See base class."""
+    return 'shutdown-script', _SHUTDOWN_SCRIPT.format(
+        preempt_marker=self.preempt_marker, user=self.user_name)
+
+  def _AcquireWritePermissionsLinux(self):
+    gcs.GoogleCloudStorageService.AcquireWritePermissionsLinux(self)
+
+  def OnStartup(self):
+    super().OnStartup()
+    if self.preemptible:
+      # Prepare VM to use GCS. When an instance is interrupt, the shutdown
+      # script will copy the status a GCS bucket.
+      self._AcquireWritePermissionsLinux()
+
+  def _UpdateInterruptibleVmStatusThroughApi(self):
+    # If the run has failed then do a check that could throw an exception.
+    vm_without_zone = copy.copy(self)
+    vm_without_zone.zone = None
+    gcloud_command = util.GcloudCommand(vm_without_zone, 'compute',
+                                        'operations', 'list')
+    gcloud_command.flags['filter'] = f'targetLink.scope():{self.name}'
+    gcloud_command.flags['zones'] = self.zone
+    stdout, _, _ = gcloud_command.Issue()
+    self.spot_early_termination = any(
+        operation['operationType'] == 'compute.instances.preempted'
+        for operation in json.loads(stdout))
+
+  def UpdateInterruptibleVmStatus(self, use_api=False):
+    """Updates the interruptible status if the VM was preempted."""
+    if not self.IsInterruptible():
+      return
+    if self.WasInterrupted():
+      return
+    try:
+      self._UpdateInterruptibleVmStatusThroughMetadataService()
+    except errors.VirtualMachine.RemoteCommandError as error:
+      if use_api and 'connection timed out' in str(error).lower():
+        self._UpdateInterruptibleVmStatusThroughApi()
+
+  def IsInterruptible(self):
+    """Returns whether this vm is an interruptible vm (spot vm).
+
+    Returns: True if this vm is an interruptible vm (spot vm).
+    """
+    return self.preemptible
+
+  def WasInterrupted(self):
+    """Returns whether this spot vm was terminated early by GCP.
+
+    Returns: True if this vm was terminated early by GCP.
+    """
+    return self.spot_early_termination
+
+  def GetVmStatusCode(self):
+    """Returns the early termination code if any.
+
+    Returns: Early termination code.
+    """
+    return self.preemptible_status_code
+
+  # Intel - lower priority of security scanner
+  def LowerSecurityScannerPriority(self):
+    run_cmd = "/opt/nessus_agent/sbin/nessuscli"
+    self.RemoteCommand(f"[ -d {run_cmd} ] && sudo {run_cmd} fix --set process_priority=low",
+                       ignore_failure=True,
+                       suppress_warning=True
+                       )
+    self.RemoteCommand(f"[ -d {run_cmd} ] && sudo {run_cmd} --set scan_performance_mode=low",
+                       ignore_failure=True,
+                       suppress_warning=True
+                       )
+  # End Intel contribution
+
+  def GetInterruptableStatusPollSeconds(self):
+    """Get seconds between preemptible status polls.
+
+    Returns:
+      Seconds between polls
+    """
+    return 3600
+
+
+class BaseLinuxGceVirtualMachine(GceVirtualMachine,
+                                 linux_vm.BaseLinuxMixin):
+  """Class supporting Linux GCE virtual machines.
+
+  Currently looks for gVNIC capabilities.
+  TODO(pclay): Make more generic and move to BaseLinuxMixin.
+  """
+
+  # regex to get the network devices from "ip link show"
+  _IP_LINK_RE = re.compile(r'^\d+: (?P<device_name>\S+):.*mtu (?P<mtu>\d+)')
+  # devices to ignore from "ip link show"
+  _IGNORE_NETWORK_DEVICES = ('lo',)
+  # ethtool properties output should match this regex
+  _ETHTOOL_RE = re.compile(r'^(?P<key>.*?):\s*(?P<value>.*)\s*')
+  # the "device" value in ethtool properties for gvnic
+  _GVNIC_DEVICE_NAME = 'gve'
+
+  def __init__(self, vm_spec):
+    super(BaseLinuxGceVirtualMachine, self).__init__(vm_spec)
+    self._gvnic_version = None
+    self._discovered_mtu: Optional[int] = None
+
+  def GetResourceMetadata(self):
+    """See base class."""
+    metadata = super(BaseLinuxGceVirtualMachine,
+                     self).GetResourceMetadata().copy()
+    if self._gvnic_version:
+      metadata['gvnic_version'] = self._gvnic_version
+    if self._discovered_mtu:
+      metadata['mtu'] = self._discovered_mtu
+    return metadata
+
+  def OnStartup(self):
+    """See base class.  Sets the _gvnic_version."""
+    super(BaseLinuxGceVirtualMachine, self).OnStartup()
+    self._gvnic_version = self.GetGvnicVersion()
+    devices = self._GetNetworkDevices()
+    all_mtus = set(devices.values())
+    if len(all_mtus) == 1:
+      self._discovered_mtu = list(all_mtus)[0]
+    else:
+      logging.warning('To record MTU must only have 1 unique MTU value not: %s',
+                      devices)
+
+  def GetGvnicVersion(self) -> Optional[str]:
+    """Returns the gvnic network driver version."""
+    all_device_properties = {}
+    for device_name in self._GetNetworkDevices():
+      device = self._GetNetworkDeviceProperties(device_name)
+      all_device_properties[device_name] = device
+      driver = device.get('driver')
+      driver_version = device.get('version')
+      if not driver:
+        logging.error(
+            'Network device %s lacks a driver %s', device_name, device)
+      elif driver == self._GVNIC_DEVICE_NAME:
+        logging.info('gvnic properties %s', device)
+        if driver_version:
+          return driver_version
+        raise ValueError(f'No version in {device}')
+
+  def _GetNetworkDeviceProperties(self, device_name: str) -> Dict[str, str]:
+    """Returns a dict of the network device properties."""
+    # ethtool can exist under /usr/sbin or needs to be installed (debian9)
+    if self.HasPackage('ethtool'):
+      self.InstallPackages('ethtool')
+    try:
+      stdout, _ = self.RemoteCommand(
+          f'PATH="${{PATH}}":/usr/sbin ethtool -i {device_name}')
+    except errors.VirtualMachine.RemoteCommandError:
+      logging.info('ethtool not installed', exc_info=True)
+      return {}
+    properties = {}
+    for line in stdout.splitlines():
+      m = self._ETHTOOL_RE.match(line)
+      if m:
+        properties[m['key']] = m['value']
+    return properties
+
+  def _GetNetworkDevices(self) -> Dict[str, int]:
+    """Returns network device names and their MTUs."""
+    stdout, _ = self.RemoteCommand('PATH="${PATH}":/usr/sbin ip link show up')
+    devices = {}
+    for line in stdout.splitlines():
+      m = self._IP_LINK_RE.match(line)
+      if m:
+        device_name = m['device_name']
+        if device_name not in self._IGNORE_NETWORK_DEVICES:
+          devices[device_name] = int(m['mtu'])
+    return devices
+
+
+class Debian9BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Debian9Mixin):
+  DEFAULT_IMAGE_FAMILY = 'debian-9'
+  DEFAULT_IMAGE_PROJECT = 'debian-cloud'
+
+  def _BeforeSuspend(self):
+    self.InstallPackages('dbus')
+    self.RemoteCommand('sudo systemctl restart systemd-logind.service')
+
+
+class Debian10BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Debian10Mixin):
+  DEFAULT_IMAGE_FAMILY = 'debian-10'
+  DEFAULT_IMAGE_PROJECT = 'debian-cloud'
+
+
+class Debian11BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Debian11Mixin):
+  DEFAULT_IMAGE_FAMILY = 'debian-11'
+  DEFAULT_IMAGE_PROJECT = 'debian-cloud'
+
+
+class Rhel7BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Rhel7Mixin):
+  DEFAULT_IMAGE_FAMILY = 'rhel-7'
+  DEFAULT_IMAGE_PROJECT = 'rhel-cloud'
+
+
+class Rhel8BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Rhel8Mixin):
+  DEFAULT_IMAGE_FAMILY = 'rhel-8'
+  DEFAULT_IMAGE_PROJECT = 'rhel-cloud'
+
+
+class CentOs7BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.CentOs7Mixin):
+  DEFAULT_IMAGE_FAMILY = 'centos-7'
+  DEFAULT_IMAGE_PROJECT = 'centos-cloud'
+
+
+class CentOsStream8BasedGceVirtualMachine(BaseLinuxGceVirtualMachine,
+                                          linux_vm.CentOsStream8Mixin):
+  DEFAULT_IMAGE_FAMILY = 'centos-stream-8'
+  DEFAULT_IMAGE_PROJECT = 'centos-cloud'
+
+
+class RockyLinux8BasedGceVirtualMachine(BaseLinuxGceVirtualMachine,
+                                        linux_vm.RockyLinux8Mixin):
+  DEFAULT_IMAGE_FAMILY = 'rocky-linux-8'
+  DEFAULT_IMAGE_PROJECT = 'rocky-linux-cloud'
+
+
+class ContainerOptimizedOsBasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.ContainerOptimizedOsMixin):
+  DEFAULT_IMAGE_FAMILY = 'cos-stable'
+  DEFAULT_IMAGE_PROJECT = 'cos-cloud'
+
+
+class CoreOsBasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.CoreOsMixin):
+  DEFAULT_IMAGE_FAMILY = 'fedora-coreos-stable'
+  DEFAULT_IMAGE_PROJECT = 'fedora-coreos-cloud'
+
+  def __init__(self, vm_spec):
+    super(CoreOsBasedGceVirtualMachine, self).__init__(vm_spec)
+    # Fedora CoreOS only creates the core user
+    self.user_name = 'core'
+
+
+class Ubuntu1804BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Ubuntu1804Mixin):
+  DEFAULT_IMAGE_FAMILY = 'ubuntu-1804-lts'
+  DEFAULT_IMAGE_PROJECT = 'ubuntu-os-cloud'
+
+
+class Ubuntu2004BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Ubuntu2004Mixin):
+  DEFAULT_IMAGE_FAMILY = 'ubuntu-2004-lts'
+  DEFAULT_IMAGE_PROJECT = 'ubuntu-os-cloud'
+
+
+class Ubuntu2204BasedGceVirtualMachine(
+    BaseLinuxGceVirtualMachine, linux_vm.Ubuntu2204Mixin):
+  DEFAULT_IMAGE_FAMILY = 'ubuntu-2204-lts'
+  DEFAULT_IMAGE_PROJECT = 'ubuntu-os-cloud'
+
+
+class BaseWindowsGceVirtualMachine(GceVirtualMachine,
+                                   windows_virtual_machine.BaseWindowsMixin):
+  """Class supporting Windows GCE virtual machines."""
+
+  DEFAULT_IMAGE_PROJECT = 'windows-cloud'
+
+  NVME_START_INDEX = 0
+
+  def __init__(self, vm_spec):
+    """Initialize a Windows GCE virtual machine.
+
+    Args:
+      vm_spec: virtual_machine.BaseVmSpec object of the vm.
+    """
+    super(BaseWindowsGceVirtualMachine, self).__init__(vm_spec)
+    self.boot_metadata[
+        'windows-startup-script-ps1'] = windows_virtual_machine.STARTUP_SCRIPT
+
+  def _GenerateResetPasswordCommand(self):
+    """Generates a command to reset a VM user's password.
+
+    Returns:
+      GcloudCommand. gcloud command to issue in order to reset the VM user's
+      password.
+    """
+    cmd = util.GcloudCommand(self, 'compute', 'reset-windows-password',
+                             self.name)
+    cmd.flags['user'] = self.user_name
+    return cmd
+
+  def _PostCreate(self):
+    super(BaseWindowsGceVirtualMachine, self)._PostCreate()
+    reset_password_cmd = self._GenerateResetPasswordCommand()
+    stdout, _ = reset_password_cmd.IssueRetryable()
+    response = json.loads(stdout)
+    self.password = response['password']
+
+  def _PreemptibleMetadataKeyValue(self) -> Tuple[str, str]:
+    """See base class."""
+    return 'windows-shutdown-script-ps1', _WINDOWS_SHUTDOWN_SCRIPT_PS1.format(
+        preempt_marker=self.preempt_marker)
+
+  @vm_util.Retry(
+      max_retries=10,
+      retryable_exceptions=(GceUnexpectedWindowsAdapterOutputError,
+                            errors.VirtualMachine.RemoteCommandError))
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the VM.
+
+    Returns:
+      dict mapping metadata key to value.
+    """
+    result = super(BaseWindowsGceVirtualMachine, self).GetResourceMetadata()
+    result['disable_rss'] = self.disable_rss
+    return result
+
+  def DisableRSS(self):
+    """Disables RSS on the GCE VM.
+
+    Raises:
+      GceDriverDoesntSupportFeatureError: If RSS is not supported.
+      GceUnexpectedWindowsAdapterOutputError: If querying the RSS state
+        returns unexpected output.
+    """
+    # First ensure that the driver supports interrupt moderation
+    net_adapters, _ = self.RemoteCommand('Get-NetAdapter')
+    if 'Red Hat VirtIO Ethernet Adapter' not in net_adapters:
+      raise GceDriverDoesntSupportFeatureError(
+          'Driver not tested with RSS disabled in PKB.')
+
+    command = 'netsh int tcp set global rss=disabled'
+    self.RemoteCommand(command)
+    try:
+      self.RemoteCommand('Restart-NetAdapter -Name "Ethernet"')
+    except IOError:
+      # Restarting the network adapter will always fail because
+      # the winrm connection used to issue the command will be
+      # broken.
+      pass
+
+    # Verify the setting went through
+    stdout, _ = self.RemoteCommand('netsh int tcp show global')
+    if 'Receive-Side Scaling State          : enabled' in stdout:
+      raise GceUnexpectedWindowsAdapterOutputError('RSS failed to disable.')
+
+  def _AcquireWritePermissionsLinux(self):
+    gcs.GoogleCloudStorageService.AcquireWritePermissionsWindows(self)
+
+  @property
+  def _MetadataPreemptCmd(self):
+    return _METADATA_PREEMPT_CMD_WIN
+
+
+class Windows2012CoreGceVirtualMachine(
+    BaseWindowsGceVirtualMachine, windows_virtual_machine.Windows2012CoreMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2012-r2-core'
+
+
+class Windows2016CoreGceVirtualMachine(
+    BaseWindowsGceVirtualMachine, windows_virtual_machine.Windows2016CoreMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2016-core'
+
+
+class Windows2019CoreGceVirtualMachine(
+    BaseWindowsGceVirtualMachine, windows_virtual_machine.Windows2019CoreMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2019-core'
+
+
+class Windows2012DesktopGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2012DesktopMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2012-r2'
+
+
+class Windows2016DesktopGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2016DesktopMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2016'
+
+
+class Windows2019DesktopGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2019DesktopMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2019'
+
+
+class Windows2022DesktopGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2022DesktopMixin):
+  DEFAULT_IMAGE_FAMILY = 'windows-2022'
+
+
+class Windows2019DesktopSQLServer2017StandardGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2017Standard):
+  DEFAULT_IMAGE_FAMILY = 'sql-std-2017-win-2019'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+class Windows2019DesktopSQLServer2017EnterpriseGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2017Enterprise):
+  DEFAULT_IMAGE_FAMILY = 'sql-ent-2017-win-2019'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+class Windows2019DesktopSQLServer2019StandardGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Standard):
+  DEFAULT_IMAGE_FAMILY = 'sql-std-2019-win-2019'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+class Windows2019DesktopSQLServer2019EnterpriseGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2019SQLServer2019Enterprise):
+  DEFAULT_IMAGE_FAMILY = 'sql-ent-2019-win-2019'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+class Windows2022DesktopSQLServer2019StandardGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Standard):
+  DEFAULT_IMAGE_FAMILY = 'sql-std-2019-win-2022'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+class Windows2022DesktopSQLServer2019EnterpriseGceVirtualMachine(
+    BaseWindowsGceVirtualMachine,
+    windows_virtual_machine.Windows2022SQLServer2019Enterprise):
+  DEFAULT_IMAGE_FAMILY = 'sql-ent-2019-win-2022'
+  DEFAULT_IMAGE_PROJECT = 'windows-sql-cloud'
+
+
+def GenerateDownloadPreprovisionedDataCommand(install_path, module_name,
+                                              filename):
+  """Returns a string used to download preprovisioned data."""
+  return 'gsutil -q cp gs://%s/%s/%s %s' % (
+      FLAGS.gcp_preprovisioned_data_bucket, module_name, filename,
+      posixpath.join(install_path, filename))
+
+
+def GenerateStatPreprovisionedDataCommand(module_name, filename):
+  """Returns a string used to download preprovisioned data."""
+  return 'gsutil stat gs://%s/%s/%s' % (
+      FLAGS.gcp_preprovisioned_data_bucket, module_name, filename)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_bigtable.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
new file mode 100644
index 0000000..6731d77
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_bigtable.py
@@ -0,0 +1,397 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's bigtable instances.
+
+Clusters can be created and deleted.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import non_relational_db
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.providers.gcp import util
+import requests
+
+FLAGS = flags.FLAGS
+
+
+flags.DEFINE_string('google_bigtable_instance_name', None,
+                    'Bigtable instance name. If not specified, new instance '
+                    'will be created and deleted on the fly. If specified, '
+                    'the instance is considered user managed and will not '
+                    'created/deleted by PKB.')
+flags.DEFINE_integer(
+    'bigtable_node_count', None,
+    'Number of nodes to create in the bigtable cluster. '
+    'Ignored if --bigtable_autoscaling_min_nodes is set.'
+    'TODO: Consider merging the two flags for better user-friendliness.')
+_AUTOSCALING_MIN_NODES = flags.DEFINE_integer(
+    'bigtable_autoscaling_min_nodes', None,
+    'Minimum number of nodes for autoscaling.')
+_AUTOSCALING_MAX_NODES = flags.DEFINE_integer(
+    'bigtable_autoscaling_max_nodes', None,
+    'Maximum number of nodes for autoscaling.')
+_AUTOSCALING_CPU_TARGET = flags.DEFINE_integer(
+    'bigtable_autoscaling_cpu_target', None,
+    'The target CPU utilization percent for autoscaling.')
+flags.DEFINE_enum('bigtable_storage_type', None, ['ssd', 'hdd'],
+                  'Storage class for the cluster')
+flags.DEFINE_string('google_bigtable_zone', None,
+                    'Bigtable zone.')
+flags.DEFINE_boolean('bigtable_replication_cluster', None,
+                     'Whether to create a Bigtable replication cluster.')
+flags.DEFINE_string('bigtable_replication_cluster_zone', None,
+                    'Zone in which to create a Bigtable replication cluster.')
+flags.DEFINE_boolean('bigtable_multicluster_routing', None,
+                     'Whether to use multi-cluster routing.')
+
+_DEFAULT_NODE_COUNT = 3
+_DEFAULT_STORAGE_TYPE = 'ssd'
+_DEFAULT_ZONE = 'us-central1-b'
+_DEFAULT_REPLICATION_ZONE = 'us-central1-c'
+
+
+class BigtableSpec(non_relational_db.BaseNonRelationalDbSpec):
+  """Configurable options of a Bigtable instance. See below for descriptions."""
+
+  SERVICE_TYPE = non_relational_db.BIGTABLE
+
+  name: str
+  zone: str
+  project: str
+  node_count: int
+  storage_type: str
+  replication_cluster: bool
+  replication_cluster_zone: str
+  multicluster_routing: bool
+  autoscaling_min_nodes: int
+  autoscaling_max_nodes: int
+  autoscaling_cpu_target: int
+
+  def __init__(self, component_full_name, flag_values, **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes / constructor args for each configurable option."""
+    result = super()._GetOptionDecoderConstructions()
+    none_ok = {'default': None, 'none_ok': True}
+    result.update({
+        'name': (option_decoders.StringDecoder, none_ok),
+        'zone': (option_decoders.StringDecoder, none_ok),
+        'project': (option_decoders.StringDecoder, none_ok),
+        'node_count': (option_decoders.IntDecoder, none_ok),
+        'storage_type': (option_decoders.StringDecoder, none_ok),
+        'replication_cluster': (option_decoders.BooleanDecoder, none_ok),
+        'replication_cluster_zone': (option_decoders.StringDecoder, none_ok),
+        'multicluster_routing': (option_decoders.BooleanDecoder, none_ok),
+        'autoscaling_min_nodes': (option_decoders.IntDecoder, none_ok),
+        'autoscaling_max_nodes': (option_decoders.IntDecoder, none_ok),
+        'autoscaling_cpu_target': (option_decoders.IntDecoder, none_ok),
+    })
+    return result
+
+  @classmethod
+  def _ValidateConfig(cls, config_values) -> None:
+    """Verifies correct usage of the bigtable config options."""
+    if (config_values.get('multicluster_routing', False) and
+        not config_values.get('replication_cluster', False)):
+      raise errors.Config.InvalidValue(
+          'bigtable_replication_cluster must be set if '
+          'bigtable_multicluster_routing is True.')
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values) -> None:
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May be
+        modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+        provided config values.
+    """
+    super()._ApplyFlags(config_values, flag_values)
+    option_name_from_flag = {
+        'google_bigtable_instance_name': 'name',
+        'google_bigtable_zone': 'zone',
+        'bigtable_storage_type': 'storage_type',
+        'bigtable_node_count': 'node_count',
+        'bigtable_replication_cluster': 'replication_cluster',
+        'bigtable_replication_cluster_zone': 'replication_cluster_zone',
+        'bigtable_multicluster_routing': 'multicluster_routing',
+        'bigtable_autoscaling_min_nodes': 'autoscaling_min_nodes',
+        'bigtable_autoscaling_max_nodes': 'autoscaling_max_nodes',
+        'bigtable_autoscaling_cpu_target': 'autoscaling_cpu_target',
+    }
+    for flag_name, option_name in option_name_from_flag.items():
+      if flag_values[flag_name].present:
+        config_values[option_name] = flag_values[flag_name].value
+
+    cls._ValidateConfig(config_values)
+
+  def __repr__(self) -> str:
+    return str(self.__dict__)
+
+
+class GcpBigtableInstance(non_relational_db.BaseNonRelationalDb):
+  """Object representing a GCP Bigtable Instance.
+
+  See https://cloud.google.com/bigtable/docs/overview.
+
+  For replication settings, see
+
+  For autoscaling/multicluster attributes, see
+  https://cloud.google.com/bigtable/docs/autoscaling.
+
+  Attributes:
+    name: Instance and cluster name.
+    project: Enclosing project for the instance.
+    zone: zone of the instance's cluster.
+    node_count: Number of nodes in the instance's cluster.
+    storage_type: Storage class for the cluster.
+    replication_cluster: Whether the instance has a replication cluster.
+    replication_cluster_zone: Zone for the replication cluster.
+    multicluster_routing: Whether the instance uses multicluster_routing.
+    autoscaling_min_nodes: Minimum number of nodes for autoscaling.
+    autoscaling_max_nodes: Maximum number of nodes for autoscaling.
+    autoscaling_cpu_target: CPU utilization percent for autoscaling.
+  """
+
+  SERVICE_TYPE = non_relational_db.BIGTABLE
+
+  def __init__(self,
+               name: Optional[str],
+               project: Optional[str],
+               zone: Optional[str],
+               node_count: Optional[int],
+               storage_type: Optional[str],
+               replication_cluster: Optional[bool],
+               replication_cluster_zone: Optional[str],
+               multicluster_routing: Optional[bool],
+               autoscaling_min_nodes: Optional[int],
+               autoscaling_max_nodes: Optional[int],
+               autoscaling_cpu_target: Optional[int]):
+    super(GcpBigtableInstance, self).__init__()
+    if name is not None:
+      self.user_managed = True
+    self.name: str = name or f'pkb-bigtable-{FLAGS.run_uri}'
+    self.zone: str = zone or FLAGS.google_bigtable_zone
+    self.project: str = project or FLAGS.project or util.GetDefaultProject()
+    self.node_count: int = node_count or _DEFAULT_NODE_COUNT
+    self.storage_type: str = storage_type or _DEFAULT_STORAGE_TYPE
+    self.replication_cluster: bool = replication_cluster or False
+    self.replication_cluster_zone: str = (
+        replication_cluster_zone or _DEFAULT_REPLICATION_ZONE)
+    self.multicluster_routing: bool = multicluster_routing or False
+    self.autoscaling_min_nodes: Optional[int] = autoscaling_min_nodes or None
+    self.autoscaling_max_nodes: Optional[int] = autoscaling_max_nodes or None
+    self.autoscaling_cpu_target: Optional[int] = autoscaling_cpu_target or None
+
+  @classmethod
+  def FromSpec(cls, spec: BigtableSpec) -> 'GcpBigtableInstance':
+    return cls(
+        name=spec.name,
+        zone=spec.zone,
+        project=spec.project,
+        node_count=spec.node_count,
+        storage_type=spec.storage_type,
+        replication_cluster=spec.replication_cluster,
+        replication_cluster_zone=spec.replication_cluster_zone,
+        multicluster_routing=spec.multicluster_routing,
+        autoscaling_min_nodes=spec.autoscaling_min_nodes,
+        autoscaling_max_nodes=spec.autoscaling_max_nodes,
+        autoscaling_cpu_target=spec.autoscaling_cpu_target)
+
+  def _BuildClusterConfigs(self) -> List[str]:
+    """Return flag values for --cluster_config when creating an instance.
+
+    Returns:
+      List of strings for repeated --cluster_config flag values.
+    """
+    flag_values = []
+    cluster_config = {
+        'id': f'{self.name}-0',
+        'zone': self.zone,
+        'nodes': self.node_count,
+        # Depending on flag settings, the config may be incomplete, but we rely
+        # on gcloud to validate for us.
+        'autoscaling-min-nodes': self.autoscaling_min_nodes,
+        'autoscaling-max-nodes': self.autoscaling_max_nodes,
+        'autoscaling-cpu-target': self.autoscaling_cpu_target,
+    }
+
+    # Ignore nodes if autoscaling is configured. --bigtable_node_count has a
+    # default value so we want to maintain backwards compatibility.
+    if self.autoscaling_min_nodes:
+      del cluster_config['nodes']
+
+    keys_to_remove = []
+    for k, v in cluster_config.items():
+      if v is None:
+        keys_to_remove.append(k)
+    for key in keys_to_remove:
+      del cluster_config[key]
+
+    flag_values.append(','.join(
+        '{}={}'.format(k, v) for (k, v) in cluster_config.items()))
+
+    if self.replication_cluster:
+      replication_cluster_config = cluster_config.copy()
+      replication_cluster_config['id'] = f'{self.name}-1'
+      replication_cluster_config['zone'] = self.replication_cluster_zone
+      flag_values.append(','.join(
+          '{}={}'.format(k, v)
+          for (k, v) in replication_cluster_config.items()))
+
+    return flag_values
+
+  def _Create(self):
+    """Creates the instance."""
+    cmd = util.GcloudCommand(self, 'bigtable', 'instances', 'create', self.name)
+    cmd.flags['display-name'] = self.name
+    cmd.flags['cluster-storage-type'] = self.storage_type
+    cmd.flags['project'] = self.project
+    cmd.flags['cluster-config'] = self._BuildClusterConfigs()
+    # The zone flag makes this command fail.
+    cmd.flags['zone'] = []
+
+    logging.info('Creating instance %s.', self.name)
+
+    _, stderr, _ = cmd.Issue()
+    if 'Insufficient node quota' in stderr:
+      raise errors.Benchmarks.QuotaFailure(
+          f'Insufficient node quota in project {self.project} '
+          f'and zone {self.zone}')
+
+    self._UpdateLabels(util.GetDefaultTags())
+
+    if self.multicluster_routing:
+      cmd = util.GcloudCommand(
+          self, 'bigtable', 'app-profiles', 'update', 'default')
+      cmd.flags['instance'] = self.name
+      cmd.flags['route-any'] = True
+      cmd.flags['force'] = True
+      cmd.flags['zone'] = []
+      cmd.Issue()
+
+  def _GetLabels(self) -> Dict[str, Any]:
+    """Gets labels from the current instance."""
+    return self._DescribeInstance().get('labels', {})
+
+  def _UpdateLabels(self, labels: Dict[str, Any]) -> None:
+    """Updates the labels of the current instance."""
+    header = {'Authorization': f'Bearer {util.GetAccessToken()}'}
+    url = ('https://bigtableadmin.googleapis.com/v2/'
+           f'projects/{self.project}/instances/{self.name}')
+    # Keep any existing labels
+    tags = self._GetLabels()
+    tags.update(labels)
+    response = requests.patch(
+        url,
+        headers=header,
+        params={'updateMask': 'labels'},
+        json={'labels': tags})
+    logging.info('Update labels: status code %s, %s', response.status_code,
+                 response.text)
+    if response.status_code != 200:
+      raise errors.Resource.UpdateError(
+          f'Unable to update Bigtable instance: {response.text}')
+
+  def _UpdateTimeout(self, timeout_minutes: int) -> None:
+    """See base class."""
+    self._UpdateLabels(util.GetDefaultTags(timeout_minutes))
+
+  def _Delete(self):
+    """Deletes the instance."""
+    cmd = util.GcloudCommand(self, 'bigtable', 'instances', 'delete', self.name)
+    # The zone flag makes this command fail.
+    cmd.flags['zone'] = []
+    cmd.Issue(raise_on_failure=False)
+
+  def _DescribeInstance(self) -> Dict[str, Any]:
+    cmd = util.GcloudCommand(
+        self, 'bigtable', 'instances', 'describe', self.name)
+    # The zone flag makes this command fail.
+    cmd.flags['zone'] = []
+    stdout, stderr, retcode = cmd.Issue(
+        suppress_warning=True, raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Describing instance %s failed: %s', self.name, stderr)
+      return {}
+    return json.loads(stdout)
+
+  def _Exists(self):
+    """Returns true if the instance exists."""
+    instance = self._DescribeInstance()
+    if not instance:
+      return False
+    return instance['state'] == 'READY'
+
+  def GetResourceMetadata(self) -> Dict[str, Any]:
+    metadata = {}
+    if self.user_managed:
+      clusters = GetClustersDescription(self.name, self.project)
+      metadata['bigtable_zone'] = [
+          cluster['zone'] for cluster in clusters]
+      metadata['bigtable_storage_type'] = [
+          cluster['defaultStorageType'] for cluster in clusters]
+      metadata['bigtable_node_count'] = [
+          cluster['serveNodes'] for cluster in clusters]
+    else:
+      metadata['bigtable_zone'] = self.zone
+      metadata['bigtable_replication_zone'] = self.replication_cluster_zone
+      metadata['bigtable_storage_type'] = self.storage_type
+      metadata['bigtable_node_count'] = self.node_count
+      metadata['bigtable_multicluster_routing'] = self.multicluster_routing
+    return metadata
+
+
+def GetClustersDescription(instance_name, project):
+  """Gets descriptions of all the clusters given the instance and project.
+
+  This is a module function to allow getting description of clusters not created
+  by pkb.
+
+  Args:
+    instance_name: Instance to get cluster descriptions for.
+    project: Project where instance is in.
+
+  Returns:
+    A list of cluster descriptions dicts.
+  """
+  cmd = util.GcloudCommand(None, 'bigtable', 'clusters', 'list')
+  cmd.flags['instances'] = instance_name
+  cmd.flags['project'] = project
+  stdout, stderr, retcode = cmd.Issue(
+      suppress_warning=True, raise_on_failure=False)
+  if retcode:
+    logging.error('Command "%s" failed:\nSTDOUT:\n%s\nSTDERR:\n%s',
+                  repr(cmd), stdout, stderr)
+  output = json.loads(stdout)
+
+  result = []
+  for cluster_details in output:
+    current_instance_name = cluster_details['name'].split('/')[3]
+    if current_instance_name == instance_name:
+      cluster_details['name'] = cluster_details['name'].split('/')[5]
+      cluster_details['zone'] = cluster_details['location'].split('/')[3]
+      result.append(cluster_details)
+
+  return result
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_cloud_redis.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_cloud_redis.py
new file mode 100644
index 0000000..92500a4
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_cloud_redis.py
@@ -0,0 +1,213 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's cloud redis instances.
+
+Instances can be created and deleted.
+"""
+import json
+import logging
+import time
+
+from absl import flags
+from google.cloud import monitoring_v3
+from google.cloud.monitoring_v3.types import TimeInterval
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import managed_memory_store
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import flags as gcp_flags
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+STANDARD_TIER = 'STANDARD'
+BASIC_TIER = 'BASIC'
+COMMAND_TIMEOUT = 600  # 10 minutes
+# Default redis api endpoint
+API_ENDPOINT = 'https://redis.googleapis.com/'
+
+
+class CloudRedis(managed_memory_store.BaseManagedMemoryStore):
+  """Object representing a GCP cloud redis instance."""
+
+  CLOUD = providers.GCP
+  MEMORY_STORE = managed_memory_store.REDIS
+
+  def __init__(self, spec):
+    super(CloudRedis, self).__init__(spec)
+    self.project = FLAGS.project
+    self.size = FLAGS.gcp_redis_gb
+    self.redis_region = FLAGS.cloud_redis_region
+    self.redis_version = spec.config.cloud_redis.redis_version
+    self.failover_style = FLAGS.redis_failover_style
+    if self.failover_style == managed_memory_store.Failover.FAILOVER_NONE:
+      self.tier = BASIC_TIER
+    elif self.failover_style == managed_memory_store.Failover.FAILOVER_SAME_REGION:
+      self.tier = STANDARD_TIER
+    cmd = util.GcloudCommand(self, 'config', 'set',
+                             'api_endpoint_overrides/redis',
+                             gcp_flags.API_OVERRIDE.value)
+    cmd.Issue()
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    if FLAGS.redis_failover_style == managed_memory_store.Failover.FAILOVER_SAME_ZONE:
+      raise errors.Config.InvalidValue(
+          'GCP cloud redis does not support same zone failover')
+    if (FLAGS.managed_memory_store_version and
+        FLAGS.managed_memory_store_version
+        not in managed_memory_store.REDIS_VERSIONS):
+      raise errors.Config.InvalidValue('Invalid Redis version.')
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the instance.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = {
+        'cloud_redis_failover_style': self.failover_style,
+        'cloud_redis_size': self.size,
+        'cloud_redis_tier': self.tier,
+        'cloud_redis_region': self.redis_region,
+        'cloud_redis_version': self.ParseReadableVersion(self.redis_version),
+    }
+    return result
+
+  @staticmethod
+  def ParseReadableVersion(version):
+    """Parses Redis major and minor version number."""
+    if version.count('_') < 2:
+      logging.info(
+          'Could not parse version string correctly, '
+          'full Redis version returned: %s', version)
+      return version
+    return '.'.join(version.split('_')[1:])
+
+  def _Create(self):
+    """Creates the instance."""
+    cmd = util.GcloudCommand(self, 'redis', 'instances', 'create', self.name)
+    cmd.flags['region'] = self.redis_region
+    cmd.flags['zone'] = FLAGS.zone[0]
+    cmd.flags['network'] = FLAGS.gce_network_name
+    cmd.flags['tier'] = self.tier
+    cmd.flags['size'] = self.size
+    cmd.flags['redis-version'] = self.redis_version
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    cmd.Issue(timeout=COMMAND_TIMEOUT)
+
+  def _IsReady(self):
+    """Returns whether cluster is ready."""
+    instance_details, _, _ = self.DescribeInstance()
+    return json.loads(instance_details).get('state') == 'READY'
+
+  def _Delete(self):
+    """Deletes the instance."""
+    cmd = util.GcloudCommand(self, 'redis', 'instances', 'delete', self.name)
+    cmd.flags['region'] = self.redis_region
+    cmd.Issue(timeout=COMMAND_TIMEOUT, raise_on_failure=False)
+    reset_cmd = util.GcloudCommand(self, 'config', 'set',
+                                   'api_endpoint_overrides/redis',
+                                   'https://redis.googleapis.com/')
+    reset_cmd.Issue(timeout=COMMAND_TIMEOUT, raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns true if the instance exists."""
+    _, _, retcode = self.DescribeInstance()
+    return retcode == 0
+
+  def DescribeInstance(self):
+    """Calls describe instance using the gcloud tool.
+
+    Returns:
+      stdout, stderr, and retcode.
+    """
+    cmd = util.GcloudCommand(self, 'redis', 'instances', 'describe', self.name)
+    cmd.flags['region'] = self.redis_region
+    stdout, stderr, retcode = cmd.Issue(
+        suppress_warning=True, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find redis instance %s', self.name)
+    return stdout, stderr, retcode
+
+  @vm_util.Retry(max_retries=5)
+  def _PopulateEndpoint(self):
+    """Populates endpoint information about the instance.
+
+    Raises:
+      errors.Resource.RetryableGetError:
+      Failed to retrieve information on instance
+    """
+    stdout, _, retcode = self.DescribeInstance()
+    if retcode != 0:
+      raise errors.Resource.RetryableGetError(
+          'Failed to retrieve information on {}'.format(self.name))
+    self._ip = json.loads(stdout)['host']
+    self._port = json.loads(stdout)['port']
+
+  def MeasureCpuUtilization(self, interval_length):
+    """Measure the average CPU utilization on GCP instance in percentage."""
+    now = time.time()
+    seconds = int(now)
+    interval = TimeInterval()
+    interval.end_time.seconds = seconds
+    interval.start_time.seconds = seconds - interval_length
+    client = monitoring_v3.MetricServiceClient()
+
+    api_filter = (
+        'metric.type = "redis.googleapis.com/stats/cpu_utilization" '
+        'AND resource.labels.instance_id = "projects/'
+    ) + self.project + '/locations/' + self.redis_region + '/instances/' + self.name + '"'
+
+    time_series = client.list_time_series(
+        name='projects/' + self.project,
+        filter_=api_filter,
+        interval=interval,
+        view=monitoring_v3.enums.ListTimeSeriesRequest.TimeSeriesView.FULL)
+
+    return self._ParseMonitoringTimeSeries(time_series)
+
+  def _ParseMonitoringTimeSeries(self, time_series):
+    """Parses time series data and returns average CPU across intervals in %.
+
+    For example, an interval of 3 minutes would be represented as [x, y, z],
+    where x, y, and z are cpu seconds.
+    average CPU usage per minute in cpu seconds = (x + y + z) / 3
+    average cpu usage in percentage = [(x + y + z) / 3] / 60
+
+    Args:
+      time_series: time series of cpu seconds returned by monitoring.
+
+    Returns:
+      Percentage CPU use.
+    """
+    intervals = []
+    # For each of the four types of load, sum the CPU across all intervals
+    for i, time_interval in enumerate(time_series):
+      for j, interval in enumerate(time_interval.points):
+        if i == 0:
+          intervals.append(interval.value.double_value)
+        else:
+          intervals[j] += interval.value.double_value
+
+    if intervals:
+      # Average over all minute intervals captured
+      averaged = sum(intervals) / len(intervals)
+      # averaged is in the unit of cpu seconds per minute.
+      # So divide by 60sec in 1 min to get a percentage usage over the minute.
+      return averaged / 60
+    return None
+
+  def GetInstanceSize(self):
+    """Return the size of the GCP instance in gigabytes."""
+    return self.size
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dataproc.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dataproc.py
new file mode 100644
index 0000000..74b6b4b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dataproc.py
@@ -0,0 +1,261 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's spark service.
+
+Spark clusters can be created and deleted.
+"""
+
+import datetime
+import json
+import logging
+import os
+import re
+
+from absl import flags
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import spark_service
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import util
+
+
+FLAGS = flags.FLAGS
+
+
+class GcpDataproc(spark_service.BaseSparkService):
+  """Object representing a GCP Dataproc cluster.
+
+  Attributes:
+    cluster_id: ID of the cluster.
+    project: ID of the project.
+  """
+
+  CLOUD = providers.GCP
+  SERVICE_NAME = 'dataproc'
+
+  def __init__(self, spark_service_spec):
+    super(GcpDataproc, self).__init__(spark_service_spec)
+    self.project = self.spec.master_group.vm_spec.project
+    self.region = self.zone.rsplit('-', 1)[0]
+
+  @staticmethod
+  def _ParseTime(state_time):
+    """Parses time from json output.
+
+    Args:
+      state_time: string. the state start time.
+
+    Returns:
+      datetime.
+    """
+    try:
+      return datetime.datetime.strptime(state_time, '%Y-%m-%dT%H:%M:%S.%fZ')
+    except ValueError:
+      return datetime.datetime.strptime(state_time, '%Y-%m-%dT%H:%M:%SZ')
+
+  @staticmethod
+  def _GetStats(stdout):
+    results = json.loads(stdout)
+    stats = {}
+    done_time = GcpDataproc._ParseTime(results['status']['stateStartTime'])
+    pending_time = None
+    start_time = None
+    for state in results['statusHistory']:
+      if state['state'] == 'PENDING':
+        pending_time = GcpDataproc._ParseTime(state['stateStartTime'])
+      elif state['state'] == 'RUNNING':
+        start_time = GcpDataproc._ParseTime(state['stateStartTime'])
+
+    if done_time and start_time:
+      stats[spark_service.RUNTIME] = (done_time - start_time).total_seconds()
+    if start_time and pending_time:
+      stats[spark_service.WAITING] = (
+          (start_time - pending_time).total_seconds())
+    return stats
+
+  def DataprocGcloudCommand(self, *args):
+    all_args = ('dataproc',) + args
+    cmd = util.GcloudCommand(self, *all_args)
+    cmd.flags['region'] = self.region
+    return cmd
+
+  def _Create(self):
+    """Creates the cluster."""
+
+    if self.cluster_id is None:
+      self.cluster_id = 'pkb-' + FLAGS.run_uri
+    cmd = self.DataprocGcloudCommand('clusters', 'create', self.cluster_id)
+    if self.project is not None:
+      cmd.flags['project'] = self.project
+    cmd.flags['num-workers'] = self.spec.worker_group.vm_count
+
+    for group_type, group_spec in [
+        ('worker', self.spec.worker_group),
+        ('master', self.spec.master_group)]:
+      flag_name = group_type + '-machine-type'
+      cmd.flags[flag_name] = group_spec.vm_spec.machine_type
+
+      if group_spec.vm_spec.num_local_ssds:
+        ssd_flag = 'num-{0}-local-ssds'.format(group_type)
+        cmd.flags[ssd_flag] = group_spec.vm_spec.num_local_ssds
+
+      if group_spec.vm_spec.boot_disk_size:
+        disk_flag = group_type + '-boot-disk-size'
+        cmd.flags[disk_flag] = group_spec.vm_spec.boot_disk_size
+
+      if group_spec.vm_spec.boot_disk_type:
+        disk_flag = group_type + '-boot-disk-type'
+        cmd.flags[disk_flag] = group_spec.vm_spec.boot_disk_type
+
+    if FLAGS.gcp_dataproc_subnet:
+      cmd.flags['subnet'] = FLAGS.gcp_dataproc_subnet
+      cmd.additional_flags.append('--no-address')
+
+    if FLAGS.gcp_dataproc_property:
+      cmd.flags['properties'] = ','.join(FLAGS.gcp_dataproc_property)
+
+    if FLAGS.gcp_dataproc_image:
+      cmd.flags['image'] = FLAGS.gcp_dataproc_image
+
+    cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    cmd.Issue()
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    cmd = self.DataprocGcloudCommand('clusters', 'delete', self.cluster_id)
+    # If we don't put this here, zone is automatically added, which
+    # breaks the dataproc clusters delete
+    cmd.flags['zone'] = []
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self):
+    """Check to see whether the cluster exists."""
+    cmd = self.DataprocGcloudCommand('clusters', 'describe', self.cluster_id)
+    # If we don't put this here, zone is automatically added to
+    # the command, which breaks dataproc clusters describe
+    cmd.flags['zone'] = []
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    return retcode == 0
+
+  def SubmitJob(self, jarfile, classname, job_script=None,
+                job_poll_interval=None,
+                job_arguments=None, job_stdout_file=None,
+                job_type=spark_service.SPARK_JOB_TYPE):
+    cmd = self.DataprocGcloudCommand('jobs', 'submit', job_type)
+    cmd.flags['cluster'] = self.cluster_id
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    # If we don't put this here, zone is auotmatically added to the command
+    # which breaks dataproc jobs submit
+    cmd.flags['zone'] = []
+
+    cmd.additional_flags = []
+    if classname and jarfile:
+      cmd.flags['jars'] = jarfile
+      cmd.flags['class'] = classname
+    elif jarfile:
+      cmd.flags['jar'] = jarfile
+    elif job_script:
+      cmd.additional_flags += [job_script]
+
+    # Dataproc gives as stdout an object describing job execution.
+    # Its stderr contains a mix of the stderr of the job, and the
+    # stdout of the job.  We can set the driver log level to FATAL
+    # to suppress those messages, and we can then separate, hopefully
+    # the job standard out from the log messages.
+    cmd.flags['driver-log-levels'] = 'root={}'.format(
+        FLAGS.spark_service_log_level)
+    if job_arguments:
+      cmd.additional_flags += ['--'] + job_arguments
+    stdout, stderr, retcode = cmd.Issue(timeout=None, raise_on_failure=False)
+    if retcode != 0:
+      return {spark_service.SUCCESS: False}
+
+    stats = self._GetStats(stdout)
+    stats[spark_service.SUCCESS] = True
+
+    if job_stdout_file:
+      with open(job_stdout_file, 'w') as f:
+        lines = stderr.splitlines(True)
+        if (not re.match(r'Job \[.*\] submitted.', lines[0]) or
+            not re.match(r'Waiting for job output...', lines[1])):
+          raise Exception('Dataproc output in unexpected format.')
+        i = 2
+        if job_type == spark_service.SPARK_JOB_TYPE:
+          if not re.match(r'\r', lines[i]):
+            raise Exception('Dataproc output in unexpected format.')
+          i += 1
+          # Eat these status lines.  They end in \r, so they overwrite
+          # themselves at the console or when you cat a file.  But they
+          # are part of this string.
+          while re.match(r'\[Stage \d+:', lines[i]):
+            i += 1
+          if not re.match(r' *\r$', lines[i]):
+            raise Exception('Dataproc output in unexpected format.')
+
+        while i < len(lines) and not re.match(r'Job \[.*\]', lines[i]):
+          f.write(lines[i])
+          i += 1
+        if i != len(lines) - 1:
+          raise Exception('Dataproc output in unexpected format.')
+    return stats
+
+  def ExecuteOnMaster(self, script_path, script_args):
+    master_name = self.cluster_id + '-m'
+    script_name = os.path.basename(script_path)
+    if FLAGS.gcp_internal_ip:
+      scp_cmd = ['gcloud', 'beta', 'compute', 'scp', '--internal-ip']
+    else:
+      scp_cmd = ['gcloud', 'compute', 'scp']
+    scp_cmd += ['--zone', self.GetZone(), '--quiet', script_path,
+                'pkb@' + master_name + ':/tmp/' + script_name]
+    vm_util.IssueCommand(scp_cmd, force_info_log=True)
+    ssh_cmd = ['gcloud', 'compute', 'ssh']
+    if FLAGS.gcp_internal_ip:
+      ssh_cmd += ['--internal-ip']
+    ssh_cmd += ['--zone=' + self.GetZone(), '--quiet',
+                'pkb@' + master_name, '--',
+                'chmod +x /tmp/' + script_name + '; sudo /tmp/' + script_name
+                + ' ' + ' '.join(script_args)]
+    vm_util.IssueCommand(ssh_cmd, force_info_log=True)
+
+  def CopyFromMaster(self, remote_path, local_path):
+    master_name = self.cluster_id + '-m'
+    if FLAGS.gcp_internal_ip:
+      scp_cmd = ['gcloud', 'beta', 'compute', 'scp', '--internal-ip']
+    else:
+      scp_cmd = ['gcloud', 'compute', 'scp']
+    scp_cmd += ['--zone=' + self.GetZone(), '--quiet',
+                'pkb@' + master_name + ':' +
+                remote_path, local_path]
+    vm_util.IssueCommand(scp_cmd, force_info_log=True)
+
+  def SetClusterProperty(self):
+    pass
+
+  def GetMetadata(self):
+    basic_data = super(GcpDataproc, self).GetMetadata()
+    if self.spec.worker_group.vm_spec.num_local_ssds:
+      basic_data.update(
+          {'ssd_count': str(self.spec.worker_group.vm_spec.num_local_ssds)})
+    return basic_data
+
+  def GetZone(self):
+    cmd = self.DataprocGcloudCommand('clusters', 'describe', self.cluster_id)
+    cmd.flags['zone'] = []
+    cmd.flags['format'] = ['value(config.gceClusterConfig.zoneUri)']
+    r = cmd.Issue()
+    logging.info(r)
+    zone = r[0].strip().split('/')[-1]
+    logging.info(zone)
+    return zone
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataflow.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataflow.py
new file mode 100644
index 0000000..98f9ec9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataflow.py
@@ -0,0 +1,147 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's dataflow service.
+
+No Clusters can be created or destroyed, since it is a managed solution
+See details at: https://cloud.google.com/dataflow/
+"""
+
+import os
+
+from absl import flags
+from perfkitbenchmarker import beam_benchmark_helper
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import vm_util
+
+flags.DEFINE_string('dpb_dataflow_staging_location', None,
+                    'Google Cloud Storage bucket for Dataflow to stage the '
+                    'binary and any temporary files. You must create this '
+                    'bucket ahead of time, before running your pipeline.')
+flags.DEFINE_string('dpb_dataflow_runner', 'DataflowRunner',
+                    'Flag to specify the pipeline runner at runtime.')
+flags.DEFINE_string('dpb_dataflow_sdk', None,
+                    'SDK used to build the Dataflow executable.')
+
+
+FLAGS = flags.FLAGS
+
+GCP_TIME_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'
+
+DATAFLOW_WC_INPUT = 'gs://dataflow-samples/shakespeare/kinglear.txt'
+
+
+class GcpDpbDataflow(dpb_service.BaseDpbService):
+  """Object representing GCP Dataflow Service."""
+
+  CLOUD = providers.GCP
+  SERVICE_TYPE = 'dataflow'
+
+  def __init__(self, dpb_service_spec):
+    super(GcpDpbDataflow, self).__init__(dpb_service_spec)
+    self.project = None
+
+  @staticmethod
+  def _GetStats(stdout):
+    """Get Stats.
+
+    TODO(saksena): Hook up the metrics API of dataflow to retrieve performance
+    metrics when available
+    """
+    pass
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    del benchmark_config  # Unused
+    if not FLAGS.dpb_job_jarfile or not os.path.exists(FLAGS.dpb_job_jarfile):
+      raise errors.Config.InvalidValue('Job jar missing.')
+    if not FLAGS.dpb_dataflow_sdk:
+      raise errors.Config.InvalidValue('Dataflow SDK version missing.')
+
+  def Create(self):
+    """See base class."""
+    pass
+
+  def Delete(self):
+    """See base class."""
+    pass
+
+  # TODO(saksena): Make this actually follow the contract or better yet delete
+  # this class.
+  def SubmitJob(
+      self,
+      jarfile='',
+      classname=None,
+      job_poll_interval=None,
+      job_arguments=None,
+      job_stdout_file=None,
+      job_type=None):
+    """See base class."""
+
+    if job_type == self.BEAM_JOB_TYPE:
+      full_cmd, base_dir = beam_benchmark_helper.BuildBeamCommand(
+          self.spec, classname, job_arguments)
+      _, _, retcode = vm_util.IssueCommand(
+          full_cmd,
+          cwd=base_dir,
+          timeout=FLAGS.beam_it_timeout,
+          raise_on_failure=False)
+      assert retcode == 0, 'Integration Test Failed.'
+      return
+
+    worker_machine_type = self.spec.worker_group.vm_spec.machine_type
+    num_workers = self.spec.worker_count
+    max_num_workers = self.spec.worker_count
+    if (self.spec.worker_group.disk_spec and
+        self.spec.worker_group.disk_spec.disk_size):
+      disk_size_gb = self.spec.worker_group.disk_spec.disk_size
+    elif self.spec.worker_group.vm_spec.boot_disk_size:
+      disk_size_gb = self.spec.worker_group.vm_spec.boot_disk_size
+    else:
+      disk_size_gb = None
+
+    cmd = []
+
+    # Needed to verify java executable is on the path
+    dataflow_executable = 'java'
+    if not vm_util.ExecutableOnPath(dataflow_executable):
+      raise errors.Setup.MissingExecutableError(
+          'Could not find required executable "%s"' % dataflow_executable)
+    cmd.append(dataflow_executable)
+
+    cmd.append('-cp')
+    cmd.append(jarfile)
+
+    cmd.append(classname)
+    cmd += job_arguments
+
+    cmd.append('--workerMachineType={}'.format(worker_machine_type))
+    cmd.append('--numWorkers={}'.format(num_workers))
+    cmd.append('--maxNumWorkers={}'.format(max_num_workers))
+
+    if disk_size_gb:
+      cmd.append('--diskSizeGb={}'.format(disk_size_gb))
+    cmd.append('--defaultWorkerLogLevel={}'.format(FLAGS.dpb_log_level))
+    _, _, _ = vm_util.IssueCommand(cmd)
+
+  def SetClusterProperty(self):
+    pass
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this cluster."""
+    basic_data = super(GcpDpbDataflow, self).GetMetadata()
+    basic_data['dpb_dataflow_runner'] = FLAGS.dpb_dataflow_runner
+    basic_data['dpb_dataflow_sdk'] = FLAGS.dpb_dataflow_sdk
+    return basic_data
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py
new file mode 100644
index 0000000..30d5d12
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_dpb_dataproc.py
@@ -0,0 +1,557 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's dataproc service.
+
+Clusters can be created, have jobs submitted to them and deleted. See details
+at https://cloud.google.com/dataproc/
+"""
+
+import datetime
+import json
+import logging
+from typing import Any, Dict, Optional
+
+from absl import flags
+from perfkitbenchmarker import dpb_service
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.linux_packages import aws_credentials
+from perfkitbenchmarker.providers.gcp import gcs
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dpb_dataproc_image_version', None,
+                    'The image version to use for the cluster.')
+
+disk_to_hdfs_map = {
+    'pd-standard': 'HDD',
+    'pd-balanced': 'SSD (Balanced)',
+    'pd-ssd': 'SSD',
+}
+
+
+class GcpDpbBaseDataproc(dpb_service.BaseDpbService):
+  """Base class for all Dataproc-based services (cluster or serverless)."""
+
+  def __init__(self, dpb_service_spec):
+    super().__init__(dpb_service_spec)
+    self.dpb_service_type = self.SERVICE_TYPE
+    self.project = FLAGS.project
+    if FLAGS.dpb_dataproc_image_version:
+      self.dpb_version = FLAGS.dpb_dataproc_image_version
+    if not self.dpb_service_zone:
+      raise errors.Setup.InvalidSetupError(
+          'dpb_service_zone must be provided, for provisioning.')
+    self.region = self.dpb_service_zone.rsplit('-', 1)[0]
+    self.storage_service = gcs.GoogleCloudStorageService()
+    self.storage_service.PrepareService(location=self.region)
+    self.persistent_fs_prefix = 'gs://'
+    self._cluster_create_time = None
+
+  @staticmethod
+  def _ParseTime(state_time: str) -> datetime.datetime:
+    """Parses time from json output.
+
+    Args:
+      state_time: string. the state start time.
+
+    Returns:
+      Parsed datetime.
+    """
+    try:
+      return datetime.datetime.strptime(state_time, '%Y-%m-%dT%H:%M:%S.%fZ')
+    except ValueError:
+      return datetime.datetime.strptime(state_time, '%Y-%m-%dT%H:%M:%SZ')
+
+  @staticmethod
+  def CheckPrerequisites(benchmark_config):
+    del benchmark_config  # Unused
+
+  def DataprocGcloudCommand(self, *args):
+    all_args = ('dataproc',) + tuple(args)
+    cmd = util.GcloudCommand(self, *all_args)
+    cmd.flags['region'] = self.region
+    return cmd
+
+  def MigrateCrossCloud(self,
+                        source_location,
+                        destination_location,
+                        dest_cloud='AWS'):
+    """Method to copy data cross cloud using a distributed job on the cluster.
+
+    Currently the only supported destination cloud is AWS.
+    TODO(user): Add support for other destination clouds.
+
+    Args:
+      source_location: The source GCS path to migrate.
+      destination_location: The destination path.
+      dest_cloud: The cloud to copy data to.
+
+    Returns:
+      A dictionary with key 'success' and boolean value set to the status of
+      data migration command.
+    """
+    if dest_cloud == 'AWS':
+      dest_prefix = 's3a://'
+    else:
+      raise ValueError('Unsupported destination cloud.')
+    s3_access_key, s3_secret_key = aws_credentials.GetCredentials()
+    return self.DistributedCopy(
+        'gs://' + source_location,
+        dest_prefix + destination_location,
+        properties={
+            'fs.s3a.access.key': s3_access_key,
+            'fs.s3a.secret.key': s3_secret_key,
+        })
+
+
+class GcpDpbDataproc(GcpDpbBaseDataproc):
+  """Object representing a managed GCP Dataproc cluster.
+
+  Attributes:
+    project: ID of the project.
+  """
+
+  CLOUD = providers.GCP
+  SERVICE_TYPE = 'dataproc'
+
+  def __init__(self, dpb_service_spec):
+    super().__init__(dpb_service_spec)
+    if self.user_managed and not FLAGS.dpb_service_bucket:
+      self.bucket = self._GetCluster()['config']['tempBucket']
+
+  def GetClusterCreateTime(self) -> Optional[float]:
+    """Returns the cluster creation time.
+
+    On this implementation, the time returned is based on the timestamps
+    reported by the Dataproc API (which is stored in the _cluster_create_time
+    attribute).
+
+    Returns:
+      A float representing the creation time in seconds or None.
+    """
+    return self._cluster_create_time
+
+  def _Create(self):
+    """Creates the cluster."""
+    cmd = self.DataprocGcloudCommand('clusters', 'create', self.cluster_id)
+    if self.project is not None:
+      cmd.flags['project'] = self.project
+
+    if self.spec.worker_count:
+      # The number of worker machines in the cluster
+      cmd.flags['num-workers'] = self.spec.worker_count
+    else:
+      cmd.flags['single-node'] = True
+
+    # Initialize applications on the dataproc cluster
+    if self.spec.applications:
+      logging.info('Include the requested applications')
+      cmd.flags['optional-components'] = ','.join(self.spec.applications)
+
+    # Enable component gateway for debuggability. Does not impact performance.
+    cmd.flags['enable-component-gateway'] = True
+
+    # TODO(pclay): stop ignoring spec.master_group?
+    for role in ['worker', 'master']:
+      # Set machine type
+      if self.spec.worker_group.vm_spec.machine_type:
+        self._AddToCmd(cmd, '{0}-machine-type'.format(role),
+                       self.spec.worker_group.vm_spec.machine_type)
+      # Set boot_disk_size
+      if self.spec.worker_group.disk_spec.disk_size:
+        size_in_gb = '{}GB'.format(
+            str(self.spec.worker_group.disk_spec.disk_size))
+        self._AddToCmd(cmd, '{0}-boot-disk-size'.format(role), size_in_gb)
+      # Set boot_disk_type
+      if self.spec.worker_group.disk_spec.disk_type:
+        self._AddToCmd(cmd, '{0}-boot-disk-type'.format(role),
+                       self.spec.worker_group.disk_spec.disk_type)
+        self.dpb_hdfs_type = disk_to_hdfs_map[
+            self.spec.worker_group.disk_spec.disk_type]
+
+      # Set ssd count
+      if self.spec.worker_group.vm_spec.num_local_ssds:
+        self._AddToCmd(cmd, 'num-{0}-local-ssds'.format(role),
+                       self.spec.worker_group.vm_spec.num_local_ssds)
+        # This will actually be used for storage
+        self.dpb_hdfs_type = 'Local SSD'
+    # Set zone
+    cmd.flags['zone'] = self.dpb_service_zone
+    if self.dpb_version:
+      cmd.flags['image-version'] = self.dpb_version
+
+    if FLAGS.gcp_dataproc_image:
+      cmd.flags['image'] = FLAGS.gcp_dataproc_image
+
+    if FLAGS.dpb_cluster_properties:
+      cmd.flags['properties'] = ','.join(FLAGS.dpb_cluster_properties)
+
+    # Ideally DpbServiceSpec would have a network spec, which we would create to
+    # Resolve the name, but because EMR provisions its own VPC and we are
+    # generally happy using pre-existing networks for Dataproc. Just use the
+    # underlying flag instead.
+    if FLAGS.gce_network_name:
+      cmd.flags['network'] = FLAGS.gce_network_name
+
+    metadata = util.GetDefaultTags()
+    metadata.update(flag_util.ParseKeyValuePairs(FLAGS.gcp_instance_metadata))
+    cmd.flags['metadata'] = util.FormatTags(metadata)
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    timeout = 900  # 15 min
+    stdout, stderr, retcode = cmd.Issue(timeout=timeout, raise_on_failure=False)
+    self._cluster_create_time = self._ParseClusterCreateTime(stdout)
+    if retcode:
+      util.CheckGcloudResponseKnownFailures(stderr, retcode)
+      raise errors.Resource.CreationError(stderr)
+
+  @classmethod
+  def _ParseClusterCreateTime(cls, stdout: str) -> Optional[float]:
+    """Parses the cluster create time from a raw API response."""
+    try:
+      creation_data = json.loads(stdout)
+    except json.JSONDecodeError:
+      creation_data = {}
+    can_parse = creation_data.get('status', {}).get('state') == 'RUNNING'
+    status_history = creation_data.get('statusHistory', [])
+    can_parse = can_parse and len(
+        status_history) == 1 and status_history[0]['state'] == 'CREATING'
+    if not can_parse:
+      logging.warning('Unable to parse cluster creation duration.')
+      return None
+    creation_start = cls._ParseTime(status_history[0]['stateStartTime'])
+    creation_end = cls._ParseTime(creation_data['status']['stateStartTime'])
+    return (creation_end - creation_start).total_seconds()
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    cmd = self.DataprocGcloudCommand('clusters', 'delete', self.cluster_id)
+    cmd.Issue(raise_on_failure=False)
+
+  def _GetCluster(self) -> Optional[Dict[str, Any]]:
+    """Get the cluster resource in a dict."""
+    cmd = self.DataprocGcloudCommand('clusters', 'describe', self.cluster_id)
+    stdout, _, retcode = cmd.Issue(raise_on_failure=False)
+    if not retcode:
+      return json.loads(stdout)
+
+  def _Exists(self):
+    """Check to see whether the cluster exists."""
+    return self._GetCluster() is not None
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=None,
+                job_stdout_file=None,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_type=None,
+                properties=None):
+    """See base class."""
+    assert job_type
+    args = ['jobs', 'submit', job_type]
+
+    if job_type == self.PYSPARK_JOB_TYPE:
+      args.append(pyspark_file)
+
+    cmd = self.DataprocGcloudCommand(*args)
+
+    cmd.flags['cluster'] = self.cluster_id
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+
+    job_jars = job_jars or []
+    if classname:
+      if jarfile:
+        # Dataproc does not support both a main class and a main jar so just
+        # make the main jar an additional jar instead.
+        job_jars.append(jarfile)
+      cmd.flags['class'] = classname
+    elif jarfile:
+      cmd.flags['jar'] = jarfile
+
+    if query_file:
+      cmd.flags['file'] = query_file
+
+    if job_files:
+      cmd.flags['files'] = ','.join(job_files)
+    if job_jars:
+      cmd.flags['jars'] = ','.join(job_jars)
+
+    # Dataproc gives as stdout an object describing job execution.
+    # Its stderr contains a mix of the stderr of the job, and the
+    # stdout of the job.  We set the driver log level to FATAL
+    # to suppress those messages, and we can then separate, hopefully
+    # the job standard out from the log messages.
+    cmd.flags['driver-log-levels'] = 'root={}'.format(FLAGS.dpb_log_level)
+
+    all_properties = self.GetJobProperties()
+    all_properties.update(properties or {})
+    if all_properties:
+      # For commas: https://cloud.google.com/sdk/gcloud/reference/topic/escaping
+      cmd.flags['properties'] = '^@^' + '@'.join(
+          '{}={}'.format(k, v) for k, v in all_properties.items())
+
+    if job_arguments:
+      cmd.additional_flags = ['--'] + job_arguments
+
+    stdout, stderr, retcode = cmd.Issue(timeout=None, raise_on_failure=False)
+    if retcode != 0:
+      raise dpb_service.JobSubmissionError(stderr)
+
+    results = json.loads(stdout)
+    # Otherwise retcode would not have been 0
+    assert results['status']['state'] == 'DONE'
+    done_time = GcpDpbDataproc._ParseTime(results['status']['stateStartTime'])
+    pending_time = None
+    start_time = None
+    for state in results['statusHistory']:
+      if state['state'] == 'PENDING':
+        pending_time = GcpDpbDataproc._ParseTime(state['stateStartTime'])
+      elif state['state'] == 'RUNNING':
+        start_time = GcpDpbDataproc._ParseTime(state['stateStartTime'])
+
+    assert pending_time and start_time and done_time
+
+    return dpb_service.JobResult(
+        run_time=(done_time - start_time).total_seconds(),
+        pending_time=(start_time - pending_time).total_seconds())
+
+  def _AddToCmd(self, cmd, cmd_property, cmd_value):
+    flag_name = cmd_property
+    cmd.flags[flag_name] = cmd_value
+
+
+class GcpDpbDpgke(GcpDpbDataproc):
+  """Dataproc on GKE cluster.
+
+  Extends from GcpDpbDataproc and not GcpDpbBaseDataproc as this represents a
+  cluster with managed infrastructure.
+  """
+
+  CLOUD = providers.GCP
+  SERVICE_TYPE = 'dataproc_gke'
+
+  def __init__(self, dpb_service_spec):
+    super(GcpDpbDpgke, self).__init__(dpb_service_spec)
+    required_spec_attrs = [
+        'gke_cluster_name', 'gke_cluster_nodepools',
+        'gke_cluster_location'
+    ]
+    missing_attrs = [
+        attr for attr in required_spec_attrs
+        if not getattr(self.spec, attr, None)
+    ]
+    if missing_attrs:
+      raise errors.Setup.InvalidSetupError(
+          f'{missing_attrs} must be provided for provisioning DPGKE.')
+
+  def _Create(self):
+    """Creates the dpgke virtual cluster."""
+    cmd = self.DataprocGcloudCommand('clusters', 'gke', 'create',
+                                     self.cluster_id)
+    cmd.use_alpha_gcloud = True
+    cmd.flags['setup-workload-identity'] = True
+    cmd.flags['gke-cluster'] = self.spec.gke_cluster_name
+    cmd.flags['namespace'] = self.cluster_id
+    # replace ':' field delimiter with '=' since create cluster command
+    # only accept '=' as field delimiter but pkb doesn't allow overriding
+    # spec parameters containing '='
+    cmd.flags['pools'] = self.spec.gke_cluster_nodepools.replace(':', '=')
+    cmd.flags['gke-cluster-location'] = self.spec.gke_cluster_location
+    if FLAGS.dpb_service_bucket:
+      cmd.flags['staging-bucket'] = FLAGS.dpb_service_bucket
+    if self.project is not None:
+      cmd.flags['project'] = self.project
+    cmd.flags['image-version'] = self.spec.version
+    if FLAGS.dpb_cluster_properties:
+      cmd.flags['properties'] = ','.join(FLAGS.dpb_cluster_properties)
+    timeout = 900  # 15 min
+    logging.info('Issuing command to create dpgke cluster. Flags %s, Args %s',
+                 cmd.flags, cmd.args)
+    stdout, stderr, retcode = cmd.Issue(timeout=timeout, raise_on_failure=False)
+    self._cluster_create_time = self._ParseClusterCreateTime(stdout)
+    if retcode:
+      util.CheckGcloudResponseKnownFailures(stderr, retcode)
+      raise errors.Resource.CreationError(stderr)
+
+
+class GcpDpbDataprocServerless(GcpDpbBaseDataproc):
+  """Resource that allows spawning serverless Dataproc Jobs."""
+
+  CLOUD = providers.GCP
+  SERVICE_TYPE = 'dataproc_serverless'
+
+  def SubmitJob(self,
+                jarfile=None,
+                classname=None,
+                pyspark_file=None,
+                query_file=None,
+                job_poll_interval=None,
+                job_stdout_file=None,
+                job_arguments=None,
+                job_files=None,
+                job_jars=None,
+                job_type=None,
+                properties=None):
+    """See base class."""
+    assert job_type
+    args = ['batches', 'submit', job_type]
+    additional_args = []
+
+    if job_type == self.PYSPARK_JOB_TYPE:
+      args.append(pyspark_file)
+
+    cmd = self.DataprocGcloudCommand(*args)
+
+    cmd.flags['batch'] = self.cluster_id
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+
+    job_jars = job_jars or []
+    if classname:
+      if jarfile:
+        # Dataproc does not support both a main class and a main jar so just
+        # make the main jar an additional jar instead.
+        job_jars.append(jarfile)
+      cmd.flags['class'] = classname
+    elif jarfile:
+      cmd.flags['jar'] = jarfile
+
+    if query_file:
+      additional_args += query_file
+
+    if job_files:
+      cmd.flags['files'] = ','.join(job_files)
+    if job_jars:
+      cmd.flags['jars'] = ','.join(job_jars)
+
+    if FLAGS.gce_network_name:
+      cmd.flags['network'] = FLAGS.gce_network_name
+
+    if self.dpb_version:
+      cmd.flags['version'] = self.dpb_version
+    if FLAGS.gcp_dataproc_image:
+      cmd.flags['container-image'] = FLAGS.gcp_dataproc_image
+
+    all_properties = self.GetJobProperties()
+    all_properties.update(properties or {})
+    if all_properties:
+      # For commas: https://cloud.google.com/sdk/gcloud/reference/topic/escaping
+      cmd.flags['properties'] = '^@^' + '@'.join(
+          '{}={}'.format(k, v) for k, v in all_properties.items())
+
+    if job_arguments:
+      additional_args += ['--'] + job_arguments
+    cmd.additional_flags = additional_args
+
+    _, stderr, retcode = cmd.Issue(timeout=None, raise_on_failure=False)
+    if retcode != 0:
+      raise dpb_service.JobSubmissionError(stderr)
+
+    fetch_batch_cmd = self.DataprocGcloudCommand(
+        'batches', 'describe', self.cluster_id)
+    stdout, stderr, retcode = fetch_batch_cmd.Issue(
+        timeout=None, raise_on_failure=False)
+    if retcode != 0:
+      raise dpb_service.JobSubmissionError(stderr)
+
+    results = json.loads(stdout)
+    # Otherwise retcode would not have been 0
+    assert results['state'] == 'SUCCEEDED'
+    done_time = self._ParseTime(results['stateTime'])
+    pending_time = None
+    start_time = None
+    for state in results['stateHistory']:
+      if state['state'] == 'PENDING':
+        pending_time = self._ParseTime(state['stateStartTime'])
+      elif state['state'] == 'RUNNING':
+        start_time = self._ParseTime(state['stateStartTime'])
+
+    assert pending_time and start_time and done_time
+
+    return dpb_service.JobResult(
+        run_time=(done_time - start_time).total_seconds(),
+        pending_time=(start_time - pending_time).total_seconds())
+
+  def _Create(self):
+    # Since there's no managed infrastructure, this is a no-op.
+    pass
+
+  def _Delete(self):
+    # Since there's no managed infrastructure, this is a no-op.
+    pass
+
+  def GetClusterCreateTime(self) -> Optional[float]:
+    return None
+
+  def GetJobProperties(self) -> Dict[str, str]:
+    result = {}
+    if self.spec.dataproc_serverless_core_count:
+      result['spark.executor.cores'] = self.spec.dataproc_serverless_core_count
+      result['spark.driver.cores'] = self.spec.dataproc_serverless_core_count
+    if self.spec.dataproc_serverless_initial_executors:
+      result['spark.executor.instances'] = (
+          self.spec.dataproc_serverless_initial_executors)
+    if self.spec.dataproc_serverless_min_executors:
+      result['spark.dynamicAllocation.minExecutors'] = (
+          self.spec.dataproc_serverless_min_executors)
+    if self.spec.dataproc_serverless_max_executors:
+      result['spark.dynamicAllocation.maxExecutors'] = (
+          self.spec.dataproc_serverless_max_executors)
+    if self.spec.worker_group.disk_spec.disk_size:
+      result['spark.dataproc.driver.disk_size'] = (
+          f'{self.spec.worker_group.disk_spec.disk_size}g'
+      )
+      result['spark.dataproc.executor.disk_size'] = (
+          f'{self.spec.worker_group.disk_spec.disk_size}g'
+      )
+    result.update(super().GetJobProperties())
+    return result
+
+  def GetMetadata(self):
+    basic_data = super().GetMetadata()
+
+    if self.spec.dataproc_serverless_core_count:
+      cluster_shape = (
+          f'dataproc-serverless-{self.spec.dataproc_serverless_core_count}')
+    else:
+      cluster_shape = 'dataproc-serverless-default'
+
+    initial_executors = self.spec.dataproc_serverless_initial_executors
+    min_executors = self.spec.dataproc_serverless_min_executors
+    max_executors = self.spec.dataproc_serverless_max_executors
+
+    cluster_size = None
+    if initial_executors == min_executors == max_executors:
+      cluster_size = initial_executors
+
+    return {
+        'dpb_service': basic_data['dpb_service'],
+        'dpb_version': basic_data['dpb_version'],
+        'dpb_service_version': basic_data['dpb_service_version'],
+        'dpb_batch_id': basic_data['dpb_cluster_id'],
+        'dpb_cluster_shape': cluster_shape,
+        'dpb_cluster_size': cluster_size,
+        'dpb_cluster_min_executors': min_executors,
+        'dpb_cluster_max_executors': max_executors,
+        'dpb_cluster_initial_executors': initial_executors,
+        'dpb_cores_per_node': self.spec.dataproc_serverless_core_count,
+        'dpb_hdfs_type': 'default-disk',
+        'dpb_service_zone': basic_data['dpb_service_zone'],
+        'dpb_job_properties': basic_data['dpb_job_properties'],
+    }
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_pubsub.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_pubsub.py
new file mode 100644
index 0000000..598425a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_pubsub.py
@@ -0,0 +1,162 @@
+"""GCP PubSub interface for resources.
+
+This class handles resource creation/cleanup for messaging service benchmark
+on GCP Cloud PubSub. https://cloud.google.com/pubsub/docs
+"""
+
+import json
+import logging
+import os
+from typing import Any, Dict
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import messaging_service as msgsvc
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+MESSAGING_SERVICE_SCRIPTS_VM_GCP_DIR = os.path.join(
+    msgsvc.MESSAGING_SERVICE_SCRIPTS_VM_LIB_DIR, 'gcp')
+MESSAGING_SERVICE_SCRIPTS_GCP_PREFIX = 'messaging_service_scripts/gcp'
+MESSAGING_SERVICE_SCRIPTS_GCP_FILES = ['__init__.py', 'gcp_pubsub_client.py']
+MESSAGING_SERVICE_SCRIPTS_GCP_BIN = 'messaging_service_scripts/gcp_benchmark.py'
+
+
+class GCPCloudPubSub(msgsvc.BaseMessagingService):
+  """GCP Cloud PubSub Interface Class for prepare phase.
+
+  This class has methods that allow us to run the provision/prepare and cleanup
+  phase for GCP from the benchmark VM. The provision/prepare phase involve
+  things like: installing specific packages on the client VM, uploading files
+  to client VM, resource creation on the cloud provider (PubSub needs a topic
+  and subcription).
+  """
+
+  CLOUD = providers.GCP
+
+  def __init__(self):
+    super().__init__()
+    self.project = FLAGS.project or util.GetDefaultProject()
+    self.pubsub_topic = 'pkb-topic-{0}'.format(FLAGS.run_uri)
+    self.pubsub_subscription = 'pkb-subscription-{0}'.format(FLAGS.run_uri)
+
+  def _Create(self):
+    """Handles provision of resources needed for GCP Pub/Sub benchmark."""
+    self._CreateTopic()
+    self._CreateSubscription()
+
+  def _Exists(self):
+    return self._TopicExists() and self._SubscriptionExists()
+
+  def _Delete(self):
+    self._DeleteSubscription()
+    self._DeleteTopic()
+
+  def _IsDeleting(self):
+    """Overrides BaseResource._IsDeleting.
+
+    Used internally while deleting to check if the deletion is still in
+    progress.
+
+    Returns:
+      A bool. True if the resource is not yet deleted, else False.
+    """
+    return self._SubscriptionExists() or self._TopicExists()
+
+  def _InstallCloudClients(self):
+    # Install/uploads GCP specific modules/files.
+    self.client_vm.RemoteCommand(
+        'sudo pip3 install --upgrade --ignore-installed google-cloud-pubsub',
+        ignore_failure=False)
+
+    self._CopyFiles(
+        MESSAGING_SERVICE_SCRIPTS_GCP_PREFIX,
+        MESSAGING_SERVICE_SCRIPTS_GCP_FILES,
+        MESSAGING_SERVICE_SCRIPTS_VM_GCP_DIR)
+    self.client_vm.PushDataFile(MESSAGING_SERVICE_SCRIPTS_GCP_BIN)
+
+  def Run(self, benchmark_scenario: str, number_of_messages: str,
+          message_size: str) -> Dict[str, Any]:
+    """Runs a benchmark on GCP PubSub from the client VM.
+
+    Runs a benchmark based on the configuration specified through the arguments:
+    benchmark_scenario, number_of_messages, and message_size. This contains
+    the GCP specific command that we need to run on the client VM to run the
+    benchmark.
+
+    Args:
+      benchmark_scenario: Specifies which benchmark scenario to run.
+      number_of_messages: Number of messages to use on the benchmark.
+      message_size: Size of the messages that will be used on the benchmark. It
+        specifies the number of characters in those messages.
+
+    Returns:
+      Dictionary produce by the benchmark with metric_name (mean_latency,
+      p50_latency...) as key and the results from the benchmark as the value:
+
+        data = {
+          'mean_latency': 0.3423443...
+          ...
+        }
+    """
+    command = (f'python3 -m gcp_benchmark '
+               f'--pubsub_project={self.project} '
+               f'--pubsub_topic={self.pubsub_topic} '
+               f'--pubsub_subscription={self.pubsub_subscription} '
+               f'--benchmark_scenario={benchmark_scenario} '
+               f'--number_of_messages={number_of_messages} '
+               f'--message_size={message_size} ')
+    stdout, _ = self.client_vm.RemoteCommand(command)
+    metrics = json.loads(stdout)
+    return metrics
+
+  def _CreateTopic(self):
+    """Handles topic creation on GCP Pub/Sub."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'topics', 'create',
+                             self.pubsub_topic)
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Creation of GCP PubSub topic failed.')
+      raise errors.Resource.CreationError(
+          'Failed to create PubSub Topic: %s return code: %s' %
+          (retcode, stderr))
+
+  def _TopicExists(self) -> bool:
+    """Check if subscription exists on GCP Pub/Sub."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'topics', 'describe',
+                             self.pubsub_topic)
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    return retcode == 0
+
+  def _DeleteTopic(self):
+    """Handles topic deletion on GCP Pub/Sub."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'topics', 'delete',
+                             self.pubsub_topic)
+    cmd.Issue(raise_on_failure=False)
+
+  def _CreateSubscription(self):
+    """Handles Subscription creation on GCP Pub/Sub."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'subscriptions', 'create',
+                             self.pubsub_subscription)
+    cmd.flags['topic'] = self.pubsub_topic
+    cmd.flags['topic-project'] = self.project
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Creation of GCP PubSub subscription failed.')
+      raise errors.Resource.CreationError(
+          'Failed to create PubSub Subscription: %s return code: %s' %
+          (retcode, stderr))
+
+  def _SubscriptionExists(self) -> bool:
+    """Check if subscription exists on GCP Pub/Sub.."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'subscriptions', 'describe',
+                             self.pubsub_subscription)
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    return retcode == 0
+
+  def _DeleteSubscription(self):
+    """Handles subscription deletion on GCP Pub/Sub."""
+    cmd = util.GcloudCommand(self, 'pubsub', 'subscriptions', 'delete',
+                             self.pubsub_subscription)
+    cmd.Issue(raise_on_failure=False)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_relational_db.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_relational_db.py
new file mode 100644
index 0000000..4485e68
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_relational_db.py
@@ -0,0 +1,497 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Managed relational database provisioning for GCP.
+
+As of June 2017 to make this benchmark run for GCP you must install the
+gcloud beta component. This is necessary because creating a Cloud SQL instance
+with a non-default storage size is in beta right now. This can be removed when
+this feature is part of the default components.
+See https://cloud.google.com/sdk/gcloud/reference/beta/sql/instances/create
+for more information.
+"""
+
+
+import datetime
+import json
+import logging
+import time
+
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import relational_db
+from perfkitbenchmarker import sql_engine_utils
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import gce_network
+from perfkitbenchmarker.providers.gcp import util
+from six.moves import range
+
+FLAGS = flags.FLAGS
+
+GCP_DATABASE_VERSION_MAPPING = {
+    sql_engine_utils.MYSQL: {
+        '5.5': 'MYSQL_5_5',
+        '5.6': 'MYSQL_5_6',
+        '5.7': 'MYSQL_5_7',
+        '8.0': 'MYSQL_8_0'
+    },
+    sql_engine_utils.POSTGRES: {
+        '9.6': 'POSTGRES_9_6',
+        '10': 'POSTGRES_10',
+        '11': 'POSTGRES_11',
+        '12': 'POSTGRES_12',
+        '13': 'POSTGRES_13'
+    },
+    sql_engine_utils.SQLSERVER: {
+        '2017_Standard': 'SQLSERVER_2017_Standard',
+        '2017_Enterprise': 'SQLSERVER_2017_ENTERPRISE',
+        '2017_Express': 'SQLSERVER_2017_EXPRESS',
+        '2017_Web': 'SQLSERVER_2017_WEB'
+    }
+}
+
+
+DEFAULT_MYSQL_VERSION = '5.7'
+DEFAULT_POSTGRES_VERSION = '9.6'
+DEFAULT_SQL_SERVER_VERSION = '2017_Standard'
+
+DEFAULT_ENGINE_VERSIONS = {
+    sql_engine_utils.MYSQL: DEFAULT_MYSQL_VERSION,
+    sql_engine_utils.POSTGRES: DEFAULT_POSTGRES_VERSION,
+    sql_engine_utils.SQLSERVER: DEFAULT_SQL_SERVER_VERSION,
+}
+
+# TODO(chunla): Move to engine specific module
+DEFAULT_USERNAME = {
+    sql_engine_utils.MYSQL: 'root',
+    sql_engine_utils.POSTGRES: 'postgres',
+    sql_engine_utils.SQLSERVER: 'sqlserver',
+}
+
+# PostgreSQL restrictions on memory.
+# Source: https://cloud.google.com/sql/docs/postgres/instance-settings.
+CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND = 0.9
+CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND = 6.5
+MIN_CUSTOM_MACHINE_MEM_MB = 3840
+
+IS_READY_TIMEOUT = 600  # 10 minutes
+DELETE_INSTANCE_TIMEOUT = 600  # 10 minutes
+CREATION_TIMEOUT = 1200  # 20 minutes
+
+
+class UnsupportedDatabaseEngineError(Exception):
+  pass
+
+
+class GCPRelationalDb(relational_db.BaseRelationalDb):
+  """A GCP CloudSQL database resource.
+
+  This class contains logic required to provision and teardown the database.
+  Currently, the database will be open to the world (0.0.0.0/0) which is not
+  ideal; however, a password is still required to connect. Currently only
+  MySQL 5.7 and Postgres 9.6 are supported.
+  """
+  CLOUD = providers.GCP
+
+  def __init__(self, relational_db_spec):
+    super(GCPRelationalDb, self).__init__(relational_db_spec)
+    self.project = FLAGS.project or util.GetDefaultProject()
+
+    self.unmanaged_db_exists = None if self.is_managed_db else False
+
+  def _GetAuthorizedNetworks(self, vms):
+    """Get CIDR connections for list of VM specs that need to access the db."""
+    for vm in vms:
+      if not vm.HasIpAddress:
+        raise Exception('Client vm needs to be initialized before database can '
+                        'discover authorized network.')
+    # create the CIDR of the client VM that is configured to access
+    # the database
+    return ','.join('{0}/32'.format(vm.ip_address) for vm in vms)
+
+  def _CreateGcloudSqlInstance(self):
+    storage_size = self.spec.db_disk_spec.disk_size
+    instance_zone = self.spec.db_spec.zone
+
+    authorized_network = self._GetAuthorizedNetworks([self.client_vm])
+
+    database_version_string = self._GetEngineVersionString(
+        self.spec.engine, self.spec.engine_version)
+
+    cmd_string = [
+        self,
+        'beta',
+        'sql',
+        'instances',
+        'create',
+        self.instance_id,
+        '--quiet',
+        '--format=json',
+        '--activation-policy=ALWAYS',
+        '--assign-ip',
+        '--authorized-networks=%s' % authorized_network,
+        '--zone=%s' % instance_zone,
+        '--database-version=%s' % database_version_string,
+        '--storage-size=%d' % storage_size,
+        '--labels=%s' % util.MakeFormattedDefaultTags(),
+    ]
+    if self.spec.engine == sql_engine_utils.MYSQL:
+      cmd_string.append('--enable-bin-log')
+
+    if self.spec.engine == sql_engine_utils.SQLSERVER:
+      # `--root-password` is required when creating SQL Server instances.
+      cmd_string.append('--root-password={0}'.format(
+          self.spec.database_password))
+
+    if (self.spec.db_spec.cpus and self.spec.db_spec.memory):
+      self._ValidateSpec()
+      memory = self.spec.db_spec.memory
+      cpus = self.spec.db_spec.cpus
+      self._ValidateMachineType(memory, cpus)
+      cmd_string.append('--cpu={}'.format(cpus))
+      cmd_string.append('--memory={}MiB'.format(memory))
+    elif hasattr(self.spec.db_spec, 'machine_type'):
+      machine_type_flag = '--tier=%s' % self.spec.db_spec.machine_type
+      cmd_string.append(machine_type_flag)
+    else:
+      raise Exception('Unspecified machine type')
+
+    if self.spec.high_availability:
+      cmd_string.append(self._GetHighAvailabilityFlag())
+
+    if self.spec.backup_enabled:
+      cmd_string.append('--backup')
+      cmd_string.append('--backup-start-time={}'.format(
+          self.spec.backup_start_time))
+    else:
+      cmd_string.append('--no-backup')
+    cmd = util.GcloudCommand(*cmd_string)
+    cmd.flags['project'] = self.project
+
+    _, stderr, retcode = cmd.Issue(timeout=CREATION_TIMEOUT)
+
+    util.CheckGcloudResponseKnownFailures(stderr, retcode)
+
+  def _Create(self):
+    """Creates the Cloud SQL instance and authorizes traffic from anywhere.
+
+    Raises:
+      UnsupportedDatabaseEngineError:
+        if the database is unmanaged and the engine isn't MYSQL.
+      Exception: if an invalid MySQL flag was used.
+    """
+    if self.is_managed_db:
+      self._CreateGcloudSqlInstance()
+    else:
+      self._SetupUnmanagedDatabase()
+
+      if FLAGS.ip_addresses == vm_util.IpAddressSubset.INTERNAL:
+        self.endpoint = self.server_vm.internal_ip
+      else:
+        self.endpoint = self.server_vm.ip_address
+        self.firewall = gce_network.GceFirewall()
+        self.firewall.AllowPort(
+            self.server_vm, self.port, source_range=[self.client_vm.ip_address])
+      self.unmanaged_db_exists = True
+
+  def _GetHighAvailabilityFlag(self):
+    """Returns a flag that enables high-availability.
+
+    Returns:
+      Flag (as string) to be appended to the gcloud sql create command.
+    """
+    return '--availability-type=REGIONAL'
+
+  def _ValidateSpec(self):
+    """Validates PostgreSQL spec for CPU and memory.
+
+    Raises:
+      data.ResourceNotFound: On missing memory or cpus in postgres benchmark
+        config.
+    """
+    if not hasattr(self.spec.db_spec, 'cpus') or not self.spec.db_spec.cpus:
+      raise data.ResourceNotFound(
+          'Must specify cpu count in benchmark config. See https://'
+          'cloud.google.com/sql/docs/postgres/instance-settings for more '
+          'details about size restrictions.')
+    if not hasattr(self.spec.db_spec, 'memory') or not self.spec.db_spec.memory:
+      raise data.ResourceNotFound(
+          'Must specify a memory amount in benchmark config. See https://'
+          'cloud.google.com/sql/docs/postgres/instance-settings for more '
+          'details about size restrictions.')
+
+  def _ValidateMachineType(self, memory, cpus):
+    """Validates the custom machine type configuration.
+
+    Memory and CPU must be within the parameters described here:
+    https://cloud.google.com/sql/docs/postgres/instance-settings
+
+    Args:
+      memory: (int) in MiB
+      cpus: (int)
+
+    Raises:
+      ValueError on invalid configuration.
+    """
+    if cpus not in [1] + list(range(2, 97, 2)):
+      raise ValueError(
+          'CPUs (%i) much be 1 or an even number in-between 2 and 96, '
+          'inclusive.' % cpus)
+
+    if memory % 256 != 0:
+      raise ValueError(
+          'Total memory (%dMiB) for a custom machine must be a multiple'
+          'of 256MiB.' % memory)
+    ratio = memory / 1024.0 / cpus
+    if (ratio < CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND or
+        ratio > CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND):
+      raise ValueError(
+          'The memory (%.2fGiB) per vCPU (%d) of a custom machine '
+          'type must be between %.2f GiB and %.2f GiB per vCPU, '
+          'inclusive.' %
+          (memory / 1024.0, cpus, CUSTOM_MACHINE_CPU_MEM_RATIO_LOWER_BOUND,
+           CUSTOM_MACHINE_CPU_MEM_RATIO_UPPER_BOUND))
+    if memory < MIN_CUSTOM_MACHINE_MEM_MB:
+      raise ValueError('The total memory (%dMiB) for a custom machine type'
+                       'must be at least %dMiB.' %
+                       (memory,
+                        MIN_CUSTOM_MACHINE_MEM_MB))
+
+  def _Delete(self):
+    """Deletes the underlying resource.
+
+    Implementations of this method should be idempotent since it may
+    be called multiple times, even if the resource has already been
+    deleted.
+    """
+    if not self.is_managed_db:
+      if hasattr(self, 'firewall'):
+        self.firewall.DisallowAllPorts()
+      self.unmanaged_db_exists = False
+      self.PrintUnmanagedDbStats()
+      return
+    if hasattr(self, 'replica_instance_id'):
+      cmd = util.GcloudCommand(self, 'sql', 'instances', 'delete',
+                               self.replica_instance_id, '--quiet')
+      cmd.Issue(raise_on_failure=False, timeout=DELETE_INSTANCE_TIMEOUT)
+
+    cmd = util.GcloudCommand(self, 'sql', 'instances', 'delete',
+                             self.instance_id, '--quiet', '--async')
+    cmd.Issue(raise_on_failure=False, timeout=DELETE_INSTANCE_TIMEOUT)
+
+  def _Exists(self):
+    """Returns true if the underlying resource exists.
+
+    Supplying this method is optional. If it is not implemented then the
+    default is to assume success when _Create and _Delete do not raise
+    exceptions.
+    """
+    if not self.is_managed_db:
+      return self.unmanaged_db_exists
+    cmd = util.GcloudCommand(self, 'sql', 'instances', 'describe',
+                             self.instance_id)
+    stdout, _, _ = cmd.Issue(raise_on_failure=False)
+    try:
+      json_output = json.loads(stdout)
+      return json_output['kind'] == 'sql#instance'
+    except:
+      return False
+
+  def _IsDBInstanceReady(self, instance_id, timeout=IS_READY_TIMEOUT):
+    cmd = util.GcloudCommand(self, 'sql', 'instances', 'describe',
+                             instance_id)
+    start_time = datetime.datetime.now()
+
+    while True:
+      if (datetime.datetime.now() - start_time).seconds > timeout:
+        logging.exception('Timeout waiting for sql instance to be ready')
+        return False
+      stdout, _, _ = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+
+      try:
+        json_output = json.loads(stdout)
+        state = json_output['state']
+        logging.info('Instance %s state: %s', instance_id, state)
+        if state == 'RUNNABLE':
+          break
+      except:
+        logging.exception('Error attempting to read stdout. Creation failure.')
+        return False
+      time.sleep(5)
+
+    return True
+
+  def _IsReady(self, timeout=IS_READY_TIMEOUT):
+    """Return true if the underlying resource is ready.
+
+    Supplying this method is optional.  Use it when a resource can exist
+    without being ready.  If the subclass does not implement
+    it then it just returns true.
+
+    Args:
+      timeout: how long to wait when checking if the DB is ready.
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out.
+    """
+    if not self.is_managed_db:
+      return self._IsReadyUnmanaged()
+
+    if not self._IsDBInstanceReady(self.instance_id, timeout):
+      return False
+    if self.spec.high_availability and hasattr(self, 'replica_instance_id'):
+      if not self._IsDBInstanceReady(self.replica_instance_id, timeout):
+        return False
+
+    cmd = util.GcloudCommand(
+        self, 'sql', 'instances', 'describe', self.instance_id)
+    stdout, _, _ = cmd.Issue()
+    json_output = json.loads(stdout)
+    self.endpoint = self._ParseEndpoint(json_output)
+    return True
+
+  def _ParseEndpoint(self, describe_instance_json):
+    """Returns the IP of the resource given the metadata as JSON.
+
+    Args:
+      describe_instance_json: JSON output.
+    Returns:
+      public IP address (string)
+    """
+    if describe_instance_json is None:
+      return ''
+    try:
+      selflink = describe_instance_json['ipAddresses'][0]['ipAddress']
+    except:
+      selflink = ''
+      logging.exception('Error attempting to read stdout. Creation failure.')
+    return selflink
+
+  @vm_util.Retry(max_retries=4, poll_interval=2)
+  def SetManagedDatabasePassword(self):
+    # The hostname '%' means unrestricted access from any host.
+    cmd = util.GcloudCommand(
+        self, 'sql', 'users', 'create', self.spec.database_username,
+        '--host=%', '--instance={0}'.format(self.instance_id),
+        '--password={0}'.format(self.spec.database_password))
+    _, _, _ = cmd.Issue()
+
+    # By default the empty password is a security violation.
+    # Change the password to a non-default value.
+    default_user = DEFAULT_USERNAME[self.spec.engine]
+
+    cmd = util.GcloudCommand(
+        self, 'sql', 'users', 'set-password', default_user,
+        '--host=%', '--instance={0}'.format(self.instance_id),
+        '--password={0}'.format(self.spec.database_password))
+    _, _, _ = cmd.Issue()
+
+  def _PostCreate(self):
+    """Creates the PKB user and sets the password.
+    """
+    super()._PostCreate()
+
+    if self.is_managed_db:
+      self.SetManagedDatabasePassword()
+
+    self.client_vm_query_tools.InstallPackages()
+
+  def _ApplyManagedDbFlags(self):
+    cmd_string = [
+        self, 'sql', 'instances', 'patch', self.instance_id,
+        '--database-flags=%s' % ','.join(FLAGS.db_flags)
+    ]
+    cmd = util.GcloudCommand(*cmd_string)
+    _, stderr, _ = cmd.Issue()
+    if stderr:
+      # sql instance patch outputs information to stderr
+      # Reference to GCP documentation
+      # https://cloud.google.com/sdk/gcloud/reference/sql/instances/patch
+      # Example output
+      # Updated [https://sqladmin.googleapis.com/].
+      if 'Updated' in stderr:
+        return
+      raise Exception('Invalid flags: %s' % stderr)
+
+    self._Reboot()
+
+  def _Reboot(self):
+    cmd_string = [
+        self, 'sql', 'instances', 'restart', self.instance_id
+    ]
+    cmd = util.GcloudCommand(*cmd_string)
+    cmd.Issue()
+
+    if not self._IsReady():
+      raise Exception('Instance could not be set to ready after '
+                      'reboot')
+
+  @staticmethod
+  def GetDefaultEngineVersion(engine):
+    """Returns the default version of a given database engine.
+
+    Args:
+      engine (string): type of database (my_sql or postgres).
+
+    Returns:
+      (string): Default version for the given database engine.
+    """
+    if engine not in DEFAULT_ENGINE_VERSIONS:
+      raise NotImplementedError('Default engine not specified for '
+                                'engine {0}'.format(engine))
+    return DEFAULT_ENGINE_VERSIONS[engine]
+
+  @staticmethod
+  def _GetEngineVersionString(engine, version):
+    """Returns CloudSQL-specific version string for givin database engine.
+
+    Args:
+      engine: database engine
+      version: engine version
+
+    Returns:
+      (string): CloudSQL-specific name for requested engine and version.
+
+    Raises:
+      NotImplementedError on invalid engine / version combination.
+    """
+    if engine not in GCP_DATABASE_VERSION_MAPPING:
+      valid_databases = ', '.join(GCP_DATABASE_VERSION_MAPPING.keys())
+      raise NotImplementedError(
+          'Database {0} is not supported,supported '
+          'databases include {1}'.format(engine, valid_databases))
+
+    version_mapping = GCP_DATABASE_VERSION_MAPPING[engine]
+    if version not in version_mapping:
+      valid_versions = ', '.join(version_mapping.keys())
+      raise NotImplementedError(
+          'Version {0} is not supported,supported '
+          'versions include {1}'.format(version, valid_versions))
+
+    return version_mapping[version]
+
+  def _FailoverHA(self):
+    """Fail over from master to replica."""
+    cmd_string = [
+        self,
+        'sql',
+        'instances',
+        'failover',
+        self.instance_id,
+    ]
+    cmd = util.GcloudCommand(*cmd_string)
+    cmd.flags['project'] = self.project
+    # this command doesnt support the specifier: 'format'
+    del cmd.flags['format']
+    cmd.IssueRetryable()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_spanner.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_spanner.py
new file mode 100644
index 0000000..9f08237
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_spanner.py
@@ -0,0 +1,354 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's spanner instances.
+
+Instances can be created and deleted.
+"""
+
+import dataclasses
+import json
+import logging
+from typing import Any, Dict, Optional
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+from perfkitbenchmarker.configs import freeze_restore_spec
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+from perfkitbenchmarker.providers.gcp import util
+import requests
+
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('cloud_spanner_config',
+                    None,
+                    'The config for the Cloud Spanner instance. Use default '
+                    'config if unset.')
+flags.DEFINE_integer('cloud_spanner_nodes', None,
+                     'The number of nodes for the Cloud Spanner instance.')
+flags.DEFINE_string('cloud_spanner_project',
+                    None,
+                    'The project for the Cloud Spanner instance. Use default '
+                    'project if unset.')
+
+# Valid GCP Spanner types:
+DEFAULT_SPANNER_TYPE = 'default'
+
+_DEFAULT_REGION = 'us-central1'
+_DEFAULT_DESCRIPTION = 'Spanner instance created by PKB.'
+_DEFAULT_DDL = """
+  CREATE TABLE pkb_table (
+    id     STRING(MAX),
+    field0 STRING(MAX)
+  ) PRIMARY KEY(id)
+  """
+_DEFAULT_NODES = 1
+_FROZEN_NODE_COUNT = 1
+
+# Common decoder configuration option.
+_NONE_OK = {'default': None, 'none_ok': True}
+
+
+@dataclasses.dataclass
+class SpannerSpec(freeze_restore_spec.FreezeRestoreSpec):
+  """Configurable options of a Spanner instance."""
+
+  # Needed for registering the spec class.
+  SPEC_TYPE = 'SpannerSpec'
+  SPEC_ATTRS = ['SERVICE_TYPE']
+  SERVICE_TYPE = DEFAULT_SPANNER_TYPE
+
+  service_type: str
+  name: str
+  description: str
+  database: str
+  ddl: str
+  config: str
+  nodes: int
+  project: str
+
+  def __init__(self,
+               component_full_name: str,
+               flag_values: Optional[Dict[str, flags.FlagValues]] = None,
+               **kwargs):
+    super().__init__(component_full_name, flag_values=flag_values, **kwargs)
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+      The pair specifies a decoder class and its __init__() keyword arguments
+      to construct in order to decode the named option.
+    """
+    result = super()._GetOptionDecoderConstructions()
+    result.update({
+        'service_type': (
+            option_decoders.EnumDecoder,
+            {
+                'valid_values': [
+                    DEFAULT_SPANNER_TYPE,
+                ],
+                'default': DEFAULT_SPANNER_TYPE
+            }),
+        'name': (option_decoders.StringDecoder, _NONE_OK),
+        'database': (option_decoders.StringDecoder, _NONE_OK),
+        'description': (option_decoders.StringDecoder, _NONE_OK),
+        'ddl': (option_decoders.StringDecoder, _NONE_OK),
+        'config': (option_decoders.StringDecoder, _NONE_OK),
+        'nodes': (option_decoders.IntDecoder, _NONE_OK),
+        'project': (option_decoders.StringDecoder, _NONE_OK),
+    })
+    return result
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Modifies config options based on runtime flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. May
+          be modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+    """
+    super()._ApplyFlags(config_values, flag_values)
+    if flag_values['cloud_spanner_config'].present:
+      config_values['config'] = flag_values.cloud_spanner_config
+    if flag_values['cloud_spanner_nodes'].present:
+      config_values['nodes'] = flag_values.cloud_spanner_nodes
+    if flag_values['cloud_spanner_project'].present:
+      config_values['project'] = flag_values.cloud_spanner_project
+
+
+def GetSpannerSpecClass(service_type) -> Optional[spec.BaseSpecMetaClass]:
+  """Return the SpannerSpec class corresponding to 'service_type'."""
+  return spec.GetSpecClass(SpannerSpec, SERVICE_TYPE=service_type)
+
+
+class GcpSpannerInstance(resource.BaseResource):
+  """Object representing a GCP Spanner Instance.
+
+  The project and Cloud Spanner config must already exist. Instance and database
+  will be created and torn down before and after the test.
+
+  The following parameters are overridden by the corresponding FLAGs.
+    project:     FLAGS.cloud_spanner_project
+    config:      FLAGS.cloud_spanner_config
+    nodes:       FLAGS.cloud_spanner_nodes
+
+  Attributes:
+    name:        Name of the instance to create.
+    description: Description of the instance.
+    database:    Name of the database to create
+    ddl:         The schema of the database.
+  """
+  # Required for registering the class.
+  RESOURCE_TYPE = 'GcpSpannerInstance'
+  REQUIRED_ATTRS = ['SERVICE_TYPE']
+  SERVICE_TYPE = DEFAULT_SPANNER_TYPE
+
+  def __init__(self,
+               name: Optional[str] = None,
+               description: Optional[str] = None,
+               database: Optional[str] = None,
+               ddl: Optional[str] = None,
+               config: Optional[str] = None,
+               nodes: Optional[int] = None,
+               project: Optional[str] = None,
+               **kwargs):
+    super(GcpSpannerInstance, self).__init__(**kwargs)
+    self.name = name or f'pkb-instance-{FLAGS.run_uri}'
+    self.database = database or f'pkb-database-{FLAGS.run_uri}'
+    self._description = description or _DEFAULT_DESCRIPTION
+    self._ddl = ddl or _DEFAULT_DDL
+    self._config = config or self._GetDefaultConfig()
+    self._nodes = nodes or _DEFAULT_NODES
+    self._end_point = None
+
+    # Cloud Spanner may not explicitly set the following common flags.
+    self.project = (
+        project or FLAGS.project or util.GetDefaultProject())
+    self.zone = None
+
+  def _GetDefaultConfig(self) -> str:
+    """Gets the config that corresponds the region used for the test."""
+    try:
+      region = util.GetRegionFromZone(
+          FLAGS.zones[0] if FLAGS.zones else FLAGS.zone[0])
+    except IndexError:
+      region = _DEFAULT_REGION
+    return f'regional-{region}'
+
+  @classmethod
+  def FromSpec(cls, spanner_spec: SpannerSpec) -> 'GcpSpannerInstance':
+    """Initialize Spanner from the provided spec."""
+    return cls(
+        name=spanner_spec.name,
+        description=spanner_spec.description,
+        database=spanner_spec.database,
+        ddl=spanner_spec.ddl,
+        config=spanner_spec.config,
+        nodes=spanner_spec.nodes,
+        project=spanner_spec.project,
+        enable_freeze_restore=spanner_spec.enable_freeze_restore,
+        create_on_restore_error=spanner_spec.create_on_restore_error,
+        delete_on_freeze_error=spanner_spec.delete_on_freeze_error)
+
+  def _Create(self) -> None:
+    """Creates the instance, the database, and update the schema."""
+    cmd = util.GcloudCommand(self, 'spanner', 'instances', 'create', self.name)
+    cmd.flags['description'] = self._description
+    cmd.flags['nodes'] = self._nodes
+    cmd.flags['config'] = self._config
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Create GCP Spanner instance failed.')
+      return
+
+    self._UpdateLabels(util.GetDefaultTags())
+
+    cmd = util.GcloudCommand(self, 'spanner', 'databases', 'create',
+                             self.database)
+    cmd.flags['instance'] = self.name
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Create GCP Spanner database failed.')
+      return
+
+    cmd = util.GcloudCommand(self, 'spanner', 'databases', 'ddl', 'update',
+                             self.database)
+    cmd.flags['instance'] = self.name
+    cmd.flags['ddl'] = self._ddl
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Update GCP Spanner database schema failed.')
+    else:
+      logging.info('Created GCP Spanner instance and database.')
+
+  def _Delete(self) -> None:
+    """Deletes the instance."""
+    cmd = util.GcloudCommand(self, 'spanner', 'instances', 'delete',
+                             self.name)
+    _, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Delete GCP Spanner instance failed.')
+    else:
+      logging.info('Deleted GCP Spanner instance.')
+
+  def _Exists(self, instance_only: bool = False) -> bool:
+    """Returns true if the instance and the database exists."""
+    cmd = util.GcloudCommand(self, 'spanner', 'instances', 'describe',
+                             self.name)
+
+    # Do not log error or warning when checking existence.
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find GCP Spanner instance %s.', self.name)
+      return False
+
+    if instance_only:
+      return True
+
+    cmd = util.GcloudCommand(self, 'spanner', 'databases', 'describe',
+                             self.database)
+    cmd.flags['instance'] = self.name
+
+    # Do not log error or warning when checking existence.
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not find GCP Spanner database %s.', self.database)
+      return False
+
+    return True
+
+  def GetEndPoint(self) -> Optional[str]:
+    """Returns the end point for Cloud Spanner."""
+    if self._end_point:
+      return self._end_point
+
+    cmd = util.GcloudCommand(self, 'config', 'get-value',
+                             'api_endpoint_overrides/spanner')
+    stdout, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.warning('Fail to retrieve cloud spanner end point.')
+      return None
+    self._end_point = json.loads(stdout)
+    return self._end_point
+
+  def _SetNodes(self, nodes: int) -> None:
+    """Sets the number of nodes on the Spanner instance."""
+    cmd = util.GcloudCommand(self, 'spanner', 'instances', 'update', self.name)
+    cmd.flags['nodes'] = nodes
+    cmd.Issue(raise_on_failure=True)
+
+  def _Restore(self) -> None:
+    """See base class.
+
+    Increases the number of nodes on the instance to the specified number.  See
+    https://cloud.google.com/spanner/pricing for Spanner pricing info.
+    """
+    self._SetNodes(self._nodes)
+
+  def _Freeze(self) -> None:
+    """See base class.
+
+    Lowers the number of nodes on the instance to one. Note there are
+    restrictions to being able to lower the number of nodes on an instance. See
+    https://cloud.google.com/spanner/docs/create-manage-instances.
+    """
+    self._SetNodes(_FROZEN_NODE_COUNT)
+
+  def _GetLabels(self) -> Dict[str, Any]:
+    """Gets labels from the current instance."""
+    cmd = util.GcloudCommand(self, 'spanner', 'instances', 'describe',
+                             self.name)
+    stdout, _, _ = cmd.Issue(raise_on_failure=True)
+    return json.loads(stdout).get('labels', {})
+
+  def _UpdateLabels(self, labels: Dict[str, Any]) -> None:
+    """Updates the labels of the current instance."""
+    header = {'Authorization': f'Bearer {util.GetAccessToken()}'}
+    url = ('https://spanner.googleapis.com/v1/projects/'
+           f'{self.project}/instances/{self.name}')
+    # Keep any existing labels
+    tags = self._GetLabels()
+    tags.update(labels)
+    args = {
+        'instance': {
+            'labels': tags
+        },
+        'fieldMask': 'labels',
+    }
+    response = requests.patch(url, headers=header, json=args)
+    logging.info('Update labels: status code %s, %s',
+                 response.status_code, response.text)
+    if response.status_code != 200:
+      raise errors.Resource.UpdateError(
+          f'Unable to update Spanner instance: {response.text}')
+
+  def _UpdateTimeout(self, timeout_minutes: int) -> None:
+    """See base class."""
+    labels = util.GetDefaultTags(timeout_minutes)
+    self._UpdateLabels(labels)
+
+
+def GetSpannerClass(
+    service_type: str) -> Optional[resource.AutoRegisterResourceMeta]:
+  """Return the Spanner class associated with service_type."""
+  return resource.GetResourceClass(
+      GcpSpannerInstance, SERVICE_TYPE=service_type)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_tpu.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_tpu.py
new file mode 100644
index 0000000..21ff192
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcp_tpu.py
@@ -0,0 +1,152 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing class for GCP's cloud TPU.
+
+cloud TPU can be created and deleted.
+"""
+
+import json
+import logging
+from absl import flags
+from perfkitbenchmarker import cloud_tpu
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.providers.gcp import util
+
+FLAGS = flags.FLAGS
+TPU_TIMEOUT = 1200
+_INSUFFICIENT_CAPACITY = 'There is no more capacity in the zone'
+
+
+class GcpTpu(cloud_tpu.BaseTpu):
+  """class representing a GCP cloud TPU.
+
+  Attributes:
+    name: Name of the cloud TPU to create.
+    project: the GCP project.
+    version: the TPU version.
+    zone: the GCP zone.
+    tpu_ip: the TPU IP.
+  """
+
+  CLOUD = providers.GCP
+  SERVICE_NAME = 'tpu'
+  TPU_IP = '10.240.{}.2'
+  DEFAULT_TPU_VERSION = '1.6'
+
+  def __init__(self, tpu_spec):
+    super(GcpTpu, self).__init__(tpu_spec)
+    self.spec = tpu_spec
+    self.project = FLAGS.project or util.GetDefaultProject()
+
+  def _Create(self):
+    """Create Cloud TPU."""
+    cmd = util.GcloudCommand(self, 'compute', 'tpus', 'create',
+                             self.spec.tpu_name)
+    cmd.flags['range'] = self.spec.tpu_cidr_range
+    if self.spec.tpu_accelerator_type:
+      cmd.flags['accelerator-type'] = self.spec.tpu_accelerator_type
+    if self.spec.tpu_description:
+      cmd.flags['description'] = self.spec.tpu_description
+    if self.spec.tpu_network:
+      cmd.flags['network'] = self.spec.tpu_network
+    if self.spec.tpu_tf_version:
+      cmd.flags['version'] = self.spec.tpu_tf_version
+    if self.spec.tpu_zone:
+      cmd.flags['zone'] = self.spec.tpu_zone
+    if self.spec.tpu_preemptible:
+      cmd.flags['preemptible'] = self.spec.tpu_preemptible
+    cmd.flags['project'] = self.project
+    _, stderr, retcode = cmd.Issue(raise_on_failure=False)
+
+    if _INSUFFICIENT_CAPACITY in stderr:
+      logging.error(util.STOCKOUT_MESSAGE)
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+          util.STOCKOUT_MESSAGE)
+
+    if retcode != 0:
+      logging.error('Create GCP cloud TPU failed.')
+
+  def _Delete(self):
+    """Deletes the cloud TPU."""
+    cmd = util.GcloudCommand(self, 'compute', 'tpus', 'delete',
+                             self.spec.tpu_name)
+    if self.spec.tpu_zone:
+      cmd.flags['zone'] = self.spec.tpu_zone
+    cmd.flags['project'] = self.project
+    _, _, retcode = cmd.Issue(timeout=TPU_TIMEOUT, raise_on_failure=False)
+    if retcode != 0:
+      logging.error('Delete GCP cloud TPU failed.')
+    else:
+      logging.info('Deleted GCP cloud TPU.')
+
+  def _GetTpuDescription(self):
+    """Gets the cloud TPU description."""
+    cmd = util.GcloudCommand(self, 'compute', 'tpus', 'describe',
+                             self.spec.tpu_name)
+    if self.spec.tpu_zone:
+      cmd.flags['zone'] = self.spec.tpu_zone
+    cmd.flags['project'] = self.project
+    stdout, _, retcode = cmd.Issue(raise_on_failure=False)
+    if retcode != 0:
+      logging.info('Could not found GCP cloud TPU %s.',
+                   self.spec.tpu_name)
+    return stdout and json.loads(stdout), retcode
+
+  def _Exists(self):
+    """Returns true if the cloud TPU exists."""
+    _, retcode = self._GetTpuDescription()
+    return retcode == 0
+
+  def GetName(self):
+    """Gets the name of the cloud TPU."""
+    return self.spec.tpu_name
+
+  def GetMasterGrpcAddress(self):
+    """Gets the grpc address of the 0th NetworkEndpoint."""
+    master_network_endpoint = self._GetTpuDescription()[0]['networkEndpoints'][
+        0]
+
+    return 'grpc://{ip_address}:{port}'.format(
+        ip_address=master_network_endpoint['ipAddress'],
+        port=master_network_endpoint['port'])
+
+  def GetNumShards(self):
+    """Gets the number of TPU shards."""
+    num_tpus = len(self._GetTpuDescription()[0]['networkEndpoints'])
+    return num_tpus * FLAGS.tpu_cores_per_donut
+
+  def GetZone(self):
+    """Gets the TPU zone."""
+    return self.spec.tpu_zone
+
+  def GetAcceleratorType(self):
+    """Gets the TPU accelerator type."""
+    return self.spec.tpu_accelerator_type
+
+  def GetResourceMetadata(self):
+    """Returns the metadata associated with the resource.
+
+    All keys will be prefaced with tpu before
+    being published (done in publisher.py).
+
+    Returns:
+      metadata: dict of GCP cloud TPU metadata.
+    """
+    metadata = super(GcpTpu, self).GetResourceMetadata()
+    metadata.update({
+        'project': self.project,
+        'cloud': self.CLOUD
+    })
+    return metadata
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcs.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcs.py
new file mode 100644
index 0000000..06aec50
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcs.py
@@ -0,0 +1,371 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Contains classes/functions related to Google Cloud Storage."""
+
+import logging
+import ntpath
+import os
+import posixpath
+import re
+from typing import List as TList
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import linux_packages
+from perfkitbenchmarker import object_storage_service
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import temp_dir
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.gcp import util
+
+_DEFAULT_GCP_SERVICE_KEY_FILE = 'gcp_credentials.json'
+DEFAULT_GCP_REGION = 'us-central1'
+GCLOUD_CONFIG_PATH = '.config/gcloud'
+GCS_CLIENT_PYTHON = 'python'
+GCS_CLIENT_BOTO = 'boto'
+READER = 'objectViewer'
+WRITER = 'objectCreator'
+
+flags.DEFINE_string('google_cloud_sdk_version', None,
+                    'Use a particular version of the Google Cloud SDK, e.g.: '
+                    '103.0.0')
+flags.DEFINE_enum('gcs_client', GCS_CLIENT_BOTO,
+                  [GCS_CLIENT_PYTHON, GCS_CLIENT_BOTO],
+                  'The GCS client library to use (default boto).')
+
+FLAGS = flags.FLAGS
+
+
+class GoogleCloudStorageService(object_storage_service.ObjectStorageService):
+  """Interface to Google Cloud Storage."""
+
+  STORAGE_NAME = providers.GCP
+
+  location: str
+
+  def PrepareService(self, location):
+    self.location = location or DEFAULT_GCP_REGION
+
+  def MakeBucket(self, bucket, raise_on_failure=True):
+    command = ['gsutil', 'mb']
+    if self.location:
+      command.extend(['-l', self.location])
+    if self.location and '-' in self.location:
+      # regional buckets
+      command.extend(['-c', 'regional'])
+    elif FLAGS.object_storage_storage_class is not None:
+      command.extend(['-c', FLAGS.object_storage_storage_class])
+    if FLAGS.project:
+      command.extend(['-p', FLAGS.project])
+    command.extend(['gs://%s' % bucket])
+
+    _, stderr, ret_code = vm_util.IssueCommand(command, raise_on_failure=False)
+    if ret_code and raise_on_failure:
+      raise errors.Benchmarks.BucketCreationError(stderr)
+
+    command = ['gsutil', 'label', 'ch']
+    for key, value in util.GetDefaultTags().items():
+      command.extend(['-l', f'{key}:{value}'])
+    command.extend([f'gs://{bucket}'])
+    _, stderr, ret_code = vm_util.IssueCommand(command, raise_on_failure=False)
+    if ret_code and raise_on_failure:
+      raise errors.Benchmarks.BucketCreationError(stderr)
+
+  def Copy(self, src_url, dst_url, recursive=False):
+    """See base class."""
+    cmd = ['gsutil', 'cp']
+    if recursive:
+      cmd += ['-r']
+    cmd += [src_url, dst_url]
+    vm_util.IssueCommand(cmd)
+
+  def CopyToBucket(self, src_path, bucket, object_path):
+    """See base class."""
+    dst_url = self.MakeRemoteCliDownloadUrl(bucket, object_path)
+    vm_util.IssueCommand(['gsutil', 'cp', src_path, dst_url])
+
+  def MakeRemoteCliDownloadUrl(self, bucket, object_path):
+    """See base class."""
+    path = posixpath.join(bucket, object_path)
+    return 'gs://' + path
+
+  def GenerateCliDownloadFileCommand(self, src_url, local_path):
+    """See base class."""
+    return 'gsutil cp "%s" "%s"' % (src_url, local_path)
+
+  def List(self, bucket):
+    """See base class."""
+    # Full URI is required by gsutil.
+    if not bucket.startswith('gs://'):
+      bucket = 'gs://' + bucket
+    stdout, _, _ = vm_util.IssueCommand(['gsutil', 'ls', bucket])
+    return stdout
+
+  def ListTopLevelSubfolders(self, bucket):
+    """Lists the top level folders (not files) in a bucket.
+
+    Each folder is returned as its full uri, eg. "gs://pkbtpch1/customer/", so
+    just the folder name is extracted. When there's more than one, splitting
+    on the newline returns a final blank row, so blank values are skipped.
+
+    Args:
+      bucket: Name of the bucket to list the top level subfolders of.
+
+    Returns:
+      A list of top level subfolder names. Can be empty if there are no folders.
+    """
+    return [
+        obj.split('/')[-2].strip()
+        for obj in self.List(bucket).split('\n')
+        if obj and obj.endswith('/')
+    ]
+
+  @vm_util.Retry()
+  def DeleteBucket(self, bucket):
+    # We want to retry rm and rb together because it's possible that
+    # we issue rm followed by rb, but then rb fails because the
+    # metadata store isn't consistent and the server that handles the
+    # rb thinks there are still objects in the bucket. It's also
+    # possible for rm to fail because the metadata store is
+    # inconsistent and rm doesn't find all objects, so can't delete
+    # them all.
+    self.EmptyBucket(bucket)
+
+    def _bucket_not_found(stdout, stderr, retcode):
+      del stdout  # unused
+
+      return retcode and 'BucketNotFoundException' in stderr
+
+    vm_util.IssueCommand(['gsutil', 'rb', 'gs://%s' % bucket],
+                         suppress_failure=_bucket_not_found)
+
+  def EmptyBucket(self, bucket):
+    # Ignore failures here and retry in DeleteBucket.  See more comments there.
+    vm_util.IssueCommand(
+        ['gsutil', '-m', 'rm', '-r',
+         'gs://%s/*' % bucket], raise_on_failure=False)
+
+  def AclBucket(self, entity: str, roles: TList[str], bucket: str):
+    """Updates access control lists.
+
+    Args:
+      entity: the user or group to grant permission.
+      roles: the IAM roles to be granted.
+      bucket: the name of the bucket to change
+    """
+    vm_util.IssueCommand([
+        'gsutil', 'iam', 'ch', f"{entity}:{','.join(roles)}", f'gs://{bucket}'
+    ])
+
+  def MakeBucketPubliclyReadable(self, bucket, also_make_writable=False):
+    """See base class."""
+    roles = [READER]
+    logging.warning('Making bucket %s publicly readable!', bucket)
+    if also_make_writable:
+      roles.append(WRITER)
+      logging.warning('Making bucket %s publicly writable!', bucket)
+    self.AclBucket('allUsers', roles, bucket)
+
+  # Use JSON API over XML for URLs
+  def GetDownloadUrl(self, bucket, object_name, use_https=True):
+    """See base class."""
+    # https://cloud.google.com/storage/docs/downloading-objects
+    scheme = 'https' if use_https else 'http'
+    return (f'{scheme}://storage.googleapis.com/storage/v1/'
+            f'b/{bucket}/o/{object_name}?alt=media')
+
+  def GetUploadUrl(self, bucket, object_name, use_https=True):
+    """See base class."""
+    # https://cloud.google.com/storage/docs/uploading-objects
+    # Note I don't believe GCS supports upload via HTTP.
+    scheme = 'https' if use_https else 'http'
+    return (f'{scheme}://storage.googleapis.com/upload/storage/v1/'
+            f'b/{bucket}/o?uploadType=media&name={object_name}')
+
+  UPLOAD_HTTP_METHOD = 'POST'
+
+  @classmethod
+  def AcquireWritePermissionsWindows(cls, vm):
+    """Prepare boto file on a remote Windows instance.
+
+    If the boto file specifies a service key file, copy that service key file to
+    the VM and modify the .boto file on the VM to point to the copied file.
+
+    Args:
+      vm: gce virtual machine object.
+    """
+    boto_src = object_storage_service.FindBotoFile()
+    boto_des = object_storage_service.DEFAULT_BOTO_LOCATION_USER
+    stdout, _ = vm.RemoteCommand(f'Test-Path {boto_des}')
+    if 'True' in stdout:
+      return
+    with open(boto_src) as f:
+      boto_contents = f.read()
+    match = re.search(r'gs_service_key_file\s*=\s*(.*)', boto_contents)
+    if match:
+      service_key_src = match.group(1)
+      service_key_des = ntpath.join(vm.home_dir,
+                                    posixpath.basename(service_key_src))
+      boto_src = cls._PrepareGcsServiceKey(vm, boto_src, service_key_src,
+                                           service_key_des)
+    vm.PushFile(boto_src, boto_des)
+
+  @classmethod
+  def AcquireWritePermissionsLinux(cls, vm):
+    """Prepare boto file on a remote Linux instance.
+
+    If the boto file specifies a service key file, copy that service key file to
+    the VM and modify the .boto file on the VM to point to the copied file.
+
+    Args:
+      vm: gce virtual machine object.
+    """
+    vm_pwd, _ = vm.RemoteCommand('pwd')
+    home_dir = vm_pwd.strip()
+    boto_src = object_storage_service.FindBotoFile()
+    boto_des = object_storage_service.DEFAULT_BOTO_LOCATION_USER
+    if vm.TryRemoteCommand(f'test -f {boto_des}'):
+      return
+    with open(boto_src) as f:
+      boto_contents = f.read()
+    match = re.search(r'gs_service_key_file\s*=\s*(.*)', boto_contents)
+    if match:
+      service_key_src = match.group(1)
+      service_key_des = posixpath.join(home_dir,
+                                       posixpath.basename(service_key_src))
+      boto_src = cls._PrepareGcsServiceKey(vm, boto_src, service_key_src,
+                                           service_key_des)
+    vm.PushFile(boto_src, boto_des)
+
+  @classmethod
+  def _PrepareGcsServiceKey(cls, vm, boto_src, service_key_src,
+                            service_key_des):
+    """Copy GS service key file to remote VM and update key path in boto file.
+
+    Args:
+      vm: gce virtual machine object.
+      boto_src: string, the boto file path in local machine.
+      service_key_src: string, the gs service key file in local machine.
+      service_key_des: string, the gs service key file in remote VM.
+
+    Returns:
+      The updated boto file path.
+    """
+    vm.PushFile(service_key_src, service_key_des)
+    key = 'gs_service_key_file'
+    with open(boto_src, 'r') as src_file:
+      boto_path = os.path.join(temp_dir.GetRunDirPath(),
+                               posixpath.basename(boto_src))
+      with open(boto_path, 'w') as des_file:
+        for line in src_file:
+          if line.startswith(f'{key} = '):
+            des_file.write(f'{key} = {service_key_des}\n')
+          else:
+            des_file.write(line)
+    return boto_path
+
+  def PrepareVM(self, vm):
+    vm.Install('wget')
+    # Unfortunately there isn't one URL scheme that works for both
+    # versioned archives and "always get the latest version".
+    if FLAGS.google_cloud_sdk_version is not None:
+      sdk_file = ('google-cloud-sdk-%s-linux-x86_64.tar.gz' %
+                  FLAGS.google_cloud_sdk_version)
+      sdk_url = 'https://storage.googleapis.com/cloud-sdk-release/' + sdk_file
+    else:
+      sdk_file = 'google-cloud-sdk.tar.gz'
+      sdk_url = 'https://dl.google.com/dl/cloudsdk/release/' + sdk_file
+    vm.RemoteCommand('wget ' + sdk_url)
+    vm.RemoteCommand('tar xvf ' + sdk_file)
+    # Versioned and unversioned archives both unzip to a folder called
+    # 'google-cloud-sdk'.
+    vm.RemoteCommand('bash ./google-cloud-sdk/install.sh '
+                     '--disable-installation-options '
+                     '--usage-report=false '
+                     '--rc-path=.bash_profile '
+                     '--path-update=true '
+                     '--bash-completion=true')
+    vm.Install('google_cloud_storage')
+
+    vm.RemoteCommand('mkdir -p .config')
+
+    if FLAGS.gcs_client == GCS_CLIENT_BOTO:
+      if vm.BASE_OS_TYPE == os_types.WINDOWS:
+        self.AcquireWritePermissionsWindows(vm)
+      else:
+        self.AcquireWritePermissionsLinux(vm)
+      vm.Install('gcs_boto_plugin')
+
+    vm.gsutil_path, _ = vm.RemoteCommand('which gsutil', login_shell=True)
+    vm.gsutil_path = vm.gsutil_path.split()[0]
+
+    # Detect if we need to install crcmod for gcp.
+    # See "gsutil help crc" for details.
+    raw_result, _ = vm.RemoteCommand('%s version -l' % vm.gsutil_path)
+    logging.info('gsutil version -l raw result is %s', raw_result)
+    search_string = 'compiled crcmod: True'
+    result_string = re.findall(search_string, raw_result)
+    if not result_string:
+      logging.info('compiled crcmod is not available, installing now...')
+      try:
+        # Try uninstall first just in case there is a pure python version of
+        # crcmod on the system already, this is required by gsutil doc:
+        # https://cloud.google.com/storage/docs/
+        # gsutil/addlhelp/CRC32CandInstallingcrcmod
+        vm.Uninstall('crcmod')
+      except errors.VirtualMachine.RemoteCommandError:
+        logging.info('pip uninstall crcmod failed, could be normal if crcmod '
+                     'is not available at all.')
+      vm.Install('crcmod')
+      vm.installed_crcmod = True
+    else:
+      logging.info('compiled crcmod is available, not installing again.')
+      vm.installed_crcmod = False
+
+  def CleanupVM(self, vm):
+    vm.RemoveFile('google-cloud-sdk')
+    vm.RemoveFile(GCLOUD_CONFIG_PATH)
+    if FLAGS.gcs_client == GCS_CLIENT_BOTO:
+      vm.RemoveFile(object_storage_service.DEFAULT_BOTO_LOCATION_USER)
+      vm.Uninstall('gcs_boto_plugin')
+
+  def CLIUploadDirectory(self, vm, directory, files, bucket):
+    return vm.RemoteCommand(
+        'time %s -m cp %s/* gs://%s/' % (
+            vm.gsutil_path, directory, bucket))
+
+  def CLIDownloadBucket(self, vm, bucket, objects, dest):
+    return vm.RemoteCommand(
+        'time %s -m cp gs://%s/* %s' % (vm.gsutil_path, bucket, dest))
+
+  def Metadata(self, vm):
+    metadata = {
+        'pkb_installed_crcmod': vm.installed_crcmod,
+        'gcs_client': str(FLAGS.gcs_client)
+    }
+    if FLAGS.gcs_client == GCS_CLIENT_BOTO:
+      metadata.update({
+          object_storage_service.BOTO_LIB_VERSION:
+              linux_packages.GetPipPackageVersion(vm, 'boto')
+      })
+    return metadata
+
+  def APIScriptArgs(self):
+    return ['--gcs_client=' + str(FLAGS.gcs_client)]
+
+  @classmethod
+  def APIScriptFiles(cls):
+    return ['gcs.py', 'gcs_boto.py']
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcsfuse_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcsfuse_disk.py
new file mode 100644
index 0000000..24e87e6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/gcsfuse_disk.py
@@ -0,0 +1,34 @@
+"""GCS FUSE based disk implementation."""
+
+from absl import flags
+from perfkitbenchmarker import disk
+
+FLAGS = flags.FLAGS
+
+DEFAULT_MOUNT_OPTIONS = [
+    'allow_other',
+    'dir_mode=777',
+    'file_mode=777',
+    'implicit_dirs',
+]
+
+
+class GcsFuseDisk(disk.MountableDisk):
+  """GCS FUSE based disk implementation.
+
+  Mount the bucket specified by flag gcsfuse_bucket at the mount_point. If not
+  specified, all the buckets are mounted as subdirectories.
+  """
+
+  def Attach(self, vm):
+    vm.Install('gcsfuse')
+
+  def Mount(self, vm):
+    vm.RemoteCommand(
+        f'sudo mkdir -p {self.mount_point} && '
+        f'sudo chmod a+w {self.mount_point}')
+
+    opts = ','.join(DEFAULT_MOUNT_OPTIONS + FLAGS.mount_options)
+    bucket = FLAGS.gcsfuse_bucket
+    target = self.mount_point
+    vm.RemoteCommand(f'sudo mount -t gcsfuse -o {opts} {bucket} {target}')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
new file mode 100644
index 0000000..f867c5a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/google_kubernetes_engine.py
@@ -0,0 +1,301 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains classes/functions related to GKE (Google Kubernetes Engine)."""
+
+import json
+import logging
+import math
+import os
+import re
+
+from absl import flags
+from perfkitbenchmarker import container_service
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import kubernetes_helper
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.providers.gcp import gce_virtual_machine
+from perfkitbenchmarker.providers.gcp import util
+import six
+
+FLAGS = flags.FLAGS
+
+NVIDIA_DRIVER_SETUP_DAEMON_SET_SCRIPT = 'https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml'
+NVIDIA_UNRESTRICTED_PERMISSIONS_DAEMON_SET = 'nvidia_unrestricted_permissions_daemonset.yml'
+DEFAULT_RELEASE_CHANNEL = 'regular'
+SERVICE_ACCOUNT_PATTERN = r'.*((?<!iam)|{project}.iam).gserviceaccount.com'
+RELEASE_CHANNELS = ['rapid', DEFAULT_RELEASE_CHANNEL, 'stable']
+
+
+def _CalculateCidrSize(nodes: int) -> int:
+  # Defaults are used for pod and services CIDR ranges:
+  # https://cloud.google.com/kubernetes-engine/docs/concepts/alias-ips#cluster_sizing_secondary_range_svcs)
+  # Each node requires a /24 CIDR range for pods
+  # The cluster requires a /20 CIDR range for services
+  # So 2^(32 - nodes) - 2^(32 - 20) >= 2^(32 - 24) * CIDR
+  # OR CIDR <= 32 - log2(2^8 * nodes + 2^12)
+  cidr_size = int(32 - math.log2((nodes << 8) + (1 << 12)))
+  # /19 is narrowest CIDR range GKE supports
+  return min(cidr_size, 19)
+
+
+class GoogleContainerRegistry(container_service.BaseContainerRegistry):
+  """Class for building and storing container images on GCP."""
+
+  CLOUD = providers.GCP
+
+  def __init__(self, registry_spec):
+    super(GoogleContainerRegistry, self).__init__(registry_spec)
+    self.project = self.project or util.GetDefaultProject()
+
+  def GetFullRegistryTag(self, image):
+    """Gets the full tag of the image."""
+    region = util.GetMultiRegionFromRegion(util.GetRegionFromZone(self.zone))
+    hostname = '{region}.gcr.io'.format(region=region)
+    full_tag = '{hostname}/{project}/{name}'.format(
+        hostname=hostname, project=self.project, name=image)
+    return full_tag
+
+  def Login(self):
+    """Configure docker to be able to push to remote repo."""
+    # TODO(pclay): Don't edit user's docker config. It is idempotent.
+    cmd = util.GcloudCommand(self, 'auth', 'configure-docker')
+    del cmd.flags['zone']
+    cmd.Issue()
+
+  def RemoteBuild(self, image):
+    """Build the image remotely."""
+    full_tag = self.GetFullRegistryTag(image.name)
+    build_cmd = util.GcloudCommand(self, 'builds', 'submit', '--tag', full_tag,
+                                   image.directory)
+    del build_cmd.flags['zone']
+    build_cmd.Issue()
+
+
+class GkeCluster(container_service.KubernetesCluster):
+  """Class representing a Google Kubernetes Engine cluster."""
+
+  CLOUD = providers.GCP
+
+  def __init__(self, spec):
+    super(GkeCluster, self).__init__(spec)
+    self.project = spec.vm_spec.project
+    self.cluster_version = (
+        FLAGS.container_cluster_version or DEFAULT_RELEASE_CHANNEL)
+    self.use_application_default_credentials = True
+    self.zones = self.zone and self.zone.split(',')
+    if not self.zones:
+      raise errors.Config.MissingOption(
+          'container_cluster.vm_spec.GCP.zone is required.')
+    elif len(self.zones) == 1 and util.IsRegion(self.zone):
+      self.region = self.zone
+      self.zones = []
+      logging.info("Interpreting zone '%s' as a region", self.zone)
+    else:
+      self.region = util.GetRegionFromZone(self.zones[0])
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing metadata about the cluster.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    result = super(GkeCluster, self).GetResourceMetadata()
+    result['project'] = self.project
+    if self.cluster_version in RELEASE_CHANNELS:
+      result['gke_release_channel'] = self.cluster_version
+
+    result['boot_disk_type'] = self.vm_config.boot_disk_type
+    result['boot_disk_size'] = self.vm_config.boot_disk_size
+    if self.vm_config.max_local_disks:
+      result['gce_local_ssd_count'] = self.vm_config.max_local_disks
+      # TODO(pclay): support NVME when it leaves alpha
+      # Also consider moving FLAGS.gce_ssd_interface into the vm_spec.
+      result['gce_local_ssd_interface'] = gce_virtual_machine.SCSI
+    return result
+
+  def _GcloudCommand(self, *args, **kwargs):
+    """Fix zone and region."""
+    cmd = util.GcloudCommand(self, *args, **kwargs)
+    if len(self.zones) != 1:
+      del cmd.flags['zone']
+      cmd.flags['region'] = self.region
+    return cmd
+
+  def _Create(self):
+    """Creates the cluster."""
+    cmd = self._GcloudCommand('container', 'clusters', 'create', self.name)
+
+    self._AddNodeParamsToCmd(self.vm_config, self.num_nodes,
+                             container_service.DEFAULT_NODEPOOL, cmd)
+
+    if self.cluster_version in RELEASE_CHANNELS:
+      cmd.flags['release-channel'] = self.cluster_version
+    else:
+      cmd.flags['cluster-version'] = self.cluster_version
+    if FLAGS.gke_enable_alpha:
+      cmd.args.append('--enable-kubernetes-alpha')
+      cmd.args.append('--no-enable-autorepair')
+      cmd.args.append('--no-enable-autoupgrade')
+
+    user = util.GetDefaultUser()
+    if FLAGS.gcp_service_account:
+      cmd.flags['service-account'] = FLAGS.gcp_service_account
+    # Matches service accounts that either definitely belongs to this project or
+    # are a GCP managed service account like the GCE default service account,
+    # which we can't tell to which project they belong.
+    elif re.match(SERVICE_ACCOUNT_PATTERN, user):
+      logging.info('Re-using configured service-account for GKE Cluster: %s',
+                   user)
+      cmd.flags['service-account'] = user
+      self.use_application_default_credentials = False
+    else:
+      logging.info('Using default GCE service account for GKE cluster')
+      cmd.flags['scopes'] = 'cloud-platform'
+
+    if self.min_nodes != self.num_nodes or self.max_nodes != self.num_nodes:
+      cmd.args.append('--enable-autoscaling')
+      cmd.flags['max-nodes'] = self.max_nodes
+      cmd.flags['min-nodes'] = self.min_nodes
+
+    cmd.flags['cluster-ipv4-cidr'] = f'/{_CalculateCidrSize(self.max_nodes)}'
+
+    if self.vm_config.network:
+      cmd.flags['network'] = self.vm_config.network.network_resource.name
+
+    cmd.flags['metadata'] = util.MakeFormattedDefaultTags()
+    cmd.flags['labels'] = util.MakeFormattedDefaultTags()
+    self._IssueResourceCreationCommand(cmd)
+
+    self._CreateNodePools()
+
+  def _CreateNodePools(self):
+    """Creates additional nodepools for the cluster, if applicable."""
+    for name, nodepool in six.iteritems(self.nodepools):
+      cmd = self._GcloudCommand('container', 'node-pools', 'create', name,
+                                '--cluster', self.name)
+      self._AddNodeParamsToCmd(nodepool.vm_config, nodepool.vm_count, name, cmd)
+      self._IssueResourceCreationCommand(cmd)
+
+  def _IssueResourceCreationCommand(self, cmd):
+    """Issues a command to gcloud to create resources."""
+
+    # This command needs a long timeout due to the many minutes it
+    # can take to provision a large GPU-accelerated GKE cluster.
+    _, stderr, retcode = cmd.Issue(timeout=1200, raise_on_failure=False)
+    if retcode:
+      # Log specific type of failure, if known.
+      if 'ZONE_RESOURCE_POOL_EXHAUSTED' in stderr:
+        logging.exception('Container resources exhausted: %s', stderr)
+        raise errors.Benchmarks.InsufficientCapacityCloudFailure(
+            'Container resources exhausted in zone %s: %s' %
+            (self.zone, stderr))
+      util.CheckGcloudResponseKnownFailures(stderr, retcode)
+      raise errors.Resource.CreationError(stderr)
+
+  def _AddNodeParamsToCmd(self, vm_config, num_nodes, name, cmd):
+    """Modifies cmd to include node specific command arguments."""
+
+    if vm_config.gpu_count:
+      cmd.flags['accelerator'] = (
+          gce_virtual_machine.GenerateAcceleratorSpecString(
+              vm_config.gpu_type,
+              vm_config.gpu_count))
+    if vm_config.min_cpu_platform:
+      cmd.flags['min-cpu-platform'] = vm_config.min_cpu_platform
+
+    if vm_config.threads_per_core:
+      # TODO(user): Remove when threads-per-core is available in GA
+      cmd.use_alpha_gcloud = True
+      cmd.flags['threads-per-core'] = vm_config.threads_per_core
+
+    if vm_config.boot_disk_size:
+      cmd.flags['disk-size'] = vm_config.boot_disk_size
+    if vm_config.boot_disk_type:
+      cmd.flags['disk-type'] = vm_config.boot_disk_type
+    if vm_config.max_local_disks:
+      # TODO(pclay): Switch to local-ssd-volumes which support NVME when it
+      # leaves alpha. See
+      # https://cloud.google.com/sdk/gcloud/reference/alpha/container/clusters/create
+      cmd.flags['local-ssd-count'] = vm_config.max_local_disks
+
+    cmd.flags['num-nodes'] = num_nodes
+    # vm_config.zone may be split a comma separated list
+    if vm_config.zone:
+      cmd.flags['node-locations'] = vm_config.zone
+
+    if vm_config.machine_type is None:
+      cmd.flags['machine-type'] = 'custom-{0}-{1}'.format(
+          vm_config.cpus,
+          vm_config.memory_mib)
+    else:
+      cmd.flags['machine-type'] = vm_config.machine_type
+
+    cmd.flags['node-labels'] = f'pkb_nodepool={name}'
+
+  def _PostCreate(self):
+    """Acquire cluster authentication."""
+    super(GkeCluster, self)._PostCreate()
+    cmd = self._GcloudCommand('container', 'clusters', 'get-credentials',
+                              self.name)
+    env = os.environ.copy()
+    env['KUBECONFIG'] = FLAGS.kubeconfig
+    cmd.IssueRetryable(env=env)
+
+    if self.vm_config.gpu_count:
+      kubernetes_helper.CreateFromFile(NVIDIA_DRIVER_SETUP_DAEMON_SET_SCRIPT)
+      kubernetes_helper.CreateFromFile(
+          data.ResourcePath(NVIDIA_UNRESTRICTED_PERMISSIONS_DAEMON_SET))
+
+    # GKE does not wait for kube-dns by default
+    logging.info('Waiting for kube-dns')
+    self.WaitForResource(
+        'deployment/kube-dns',
+        condition_name='Available',
+        namespace='kube-system')
+
+  def _GetInstanceGroups(self):
+    cmd = self._GcloudCommand('container', 'node-pools', 'list')
+    cmd.flags['cluster'] = self.name
+    stdout, _, _ = cmd.Issue()
+    json_output = json.loads(stdout)
+    instance_groups = []
+    for node_pool in json_output:
+      for group_url in node_pool['instanceGroupUrls']:
+        instance_groups.append(group_url.split('/')[-1])  # last url part
+    return instance_groups
+
+  def _IsDeleting(self):
+    cmd = self._GcloudCommand('container', 'clusters', 'describe', self.name)
+    stdout, _, _ = cmd.Issue(raise_on_failure=False)
+    return True if stdout else False
+
+  def _Delete(self):
+    """Deletes the cluster."""
+    super()._Delete()
+    cmd = self._GcloudCommand('container', 'clusters', 'delete', self.name)
+    cmd.args.append('--async')
+    cmd.Issue(raise_on_failure=False)
+
+  def _Exists(self):
+    """Returns True if the cluster exits."""
+    cmd = self._GcloudCommand('container', 'clusters', 'describe', self.name)
+    _, _, retcode = cmd.Issue(suppress_warning=True, raise_on_failure=False)
+    return retcode == 0
+
+  def GetDefaultStorageClass(self) -> str:
+    """Get the default storage class for the provider."""
+    # https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/gce-pd-csi-driver
+    # PD-SSD
+    return 'premium-rwo'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/memcache.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/memcache.py
new file mode 100644
index 0000000..171e3b5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/memcache.py
@@ -0,0 +1,39 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.memcache_service import MemcacheService
+
+
+class MemcacheService(MemcacheService):
+
+  CLOUD = providers.GCP
+
+  def __init__(self):
+    pass
+
+  def Create(self):
+    raise NotImplementedError
+
+  def Destroy(self):
+    raise NotImplementedError
+
+  def Flush(self):
+    raise NotImplementedError
+
+  def GetHosts(self):
+    raise NotImplementedError
+
+  def GetMetadata(self):
+    raise NotImplementedError
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/provider_info.py
new file mode 100644
index 0000000..9551c00
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/provider_info.py
@@ -0,0 +1,24 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Provider info for Google Cloud Platform."""
+
+from perfkitbenchmarker import provider_info
+from perfkitbenchmarker import providers
+
+
+class GCPProviderInfo(provider_info.BaseProviderInfo):
+
+  UNSUPPORTED_BENCHMARKS = []
+  CLOUD = providers.GCP
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/util.py b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/util.py
new file mode 100644
index 0000000..37cea08
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/gcp/util.py
@@ -0,0 +1,511 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for working with Google Cloud Platform resources."""
+
+import collections
+import functools
+import json
+import logging
+import re
+from typing import Set
+from absl import flags
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+import six
+
+FLAGS = flags.FLAGS
+
+RATE_LIMITED_MESSAGE = 'Rate Limit Exceeded'
+# regex to check API limits when tagging resources
+# matches a string like:
+# ERROR: (gcloud.compute.disks.add-labels) PERMISSION_DENIED: Quota exceeded
+# for quota group 'ReadGroup' and limit 'Read requests per 100 seconds' of
+# service 'compute.googleapis.com' for consumer 'project_number:012345678901'.
+TAGGING_RATE_LIMITED_REGEX = re.compile(
+    "Quota exceeded for quota group '.*?' and limit "
+    "'.*? per.*?seconds' of service 'compute.googleapis.com'")
+RATE_LIMITED_MAX_RETRIES = 10
+# 200s is chosen because 1) quota is measured in 100s intervals and 2) fuzzing
+# causes a random number between 100 and this to be chosen.
+RATE_LIMITED_MAX_POLLING_INTERVAL = 200
+# This must be set. Otherwise, calling Issue() will fail in util_test.py.
+RATE_LIMITED_FUZZ = 0.5
+RATE_LIMITED_TIMEOUT = 1200
+STOCKOUT_MESSAGE = ('Creation failed due to insufficient capacity indicating a '
+                    'potential stockout scenario.')
+
+
+@functools.lru_cache()
+def GetDefaultProject():
+  """Get the default project."""
+  cmd = [FLAGS.gcloud_path, 'config', 'list', '--format=json']
+  stdout, _, _ = vm_util.IssueCommand(cmd)
+  result = json.loads(stdout)
+  return result['core']['project']
+
+
+@functools.lru_cache()
+def GetDefaultUser():
+  """Get the default project."""
+  cmd = [FLAGS.gcloud_path, 'config', 'list', '--format=json']
+  stdout, _, _ = vm_util.IssueCommand(cmd)
+  result = json.loads(stdout)
+  return result['core']['account']
+
+
+def GetRegionFromZone(zone):
+  """Returns the region name from a fully-qualified zone name.
+
+  Each fully-qualified GCP zone name is formatted as <region>-<zone> where, for
+  example, each region looks like us-central1, europe-west1, or asia-east1.
+  Therefore, we pull the first two parts the fully qualified zone name delimited
+  by a dash and assume the rest is the name of the zone. See
+  https://cloud.google.com/compute/docs/regions-zones for more information.
+
+  Args:
+    zone: The fully-qualified name of a GCP zone.
+  """
+  parts = zone.split('-')
+  return '-'.join(parts[:2])
+
+
+def IsRegion(location: str) -> bool:
+  """Determine if a zone or region is a region."""
+  return bool(re.fullmatch(r'[a-z]+-[a-z]+[0-9]', location))
+
+
+def GetAllZones() -> Set[str]:
+  """Gets a list of valid zones."""
+  cmd = GcloudCommand(None, 'compute', 'zones', 'list')
+  cmd.flags = {
+      'format': 'value(name)',
+  }
+  stdout, _, _ = cmd.Issue()
+  return set(stdout.splitlines())
+
+
+def GetAllRegions() -> Set[str]:
+  """Gets a list of valid regions."""
+  cmd = GcloudCommand(None, 'compute', 'regions', 'list')
+  cmd.flags = {
+      'format': 'value(name)',
+  }
+  stdout, _, _ = cmd.Issue()
+  return set(stdout.splitlines())
+
+
+def GetZonesInRegion(region) -> Set[str]:
+  """Gets a list of zones for the given region."""
+  cmd = GcloudCommand(None, 'compute', 'zones', 'list')
+  cmd.flags = {
+      'filter': f"name~'{region}'",
+      'format': 'value(name)',
+  }
+  stdout, _, _ = cmd.Issue()
+  return set(stdout.splitlines())
+
+
+def GetZonesFromMachineType() -> Set[str]:
+  """Gets a list of zones for the given machine type."""
+  cmd = GcloudCommand(None, 'compute', 'machine-types', 'list')
+  cmd.flags = {
+      'filter': f"name~'{FLAGS.machine_type}'",
+      'format': 'value(zone)'
+  }
+  stdout, _, _ = cmd.Issue()
+  return set(stdout.splitlines())
+
+
+def GetGeoFromRegion(region: str) -> str:
+  """Gets valid geo from the region, i.e. region us-central1 returns us."""
+  return region.split('-')[0]
+
+
+def GetRegionsInGeo(geo: str) -> Set[str]:
+  """Gets valid regions in the geo."""
+  return {region for region in GetAllRegions() if region.startswith(geo)}
+
+
+def GetMultiRegionFromRegion(region):
+  """Gets the closest multi-region location to the region."""
+  if (region.startswith('us') or
+      region.startswith('northamerica') or
+      region.startswith('southamerica')):
+    return 'us'
+  elif region.startswith('europe'):
+    return 'eu'
+  elif region.startswith('asia') or region.startswith('australia'):
+    return 'asia'
+  else:
+    raise Exception('Unknown region "%s".' % region)
+
+
+def IssueCommandFunction(cmd, **kwargs):
+  """Use vm_util to issue the given command.
+
+  Args:
+    cmd: the gcloud command to run
+    **kwargs: additional arguments for the gcloud command
+
+  Returns:
+    stdout, stderr, retcode tuple from running the command
+  """
+  return vm_util.IssueCommand(cmd.GetCommand(), **kwargs)
+
+
+def IssueRetryableCommandFunction(cmd, **kwargs):
+  """Use vm_util to issue the given retryable command.
+
+  Args:
+    cmd: the gcloud command to run
+    **kwargs: additional arguments for the gcloud command
+
+  Returns:
+    stdout, stderr, tuple from running the command
+  """
+  return vm_util.IssueRetryableCommand(cmd.GetCommand(), **kwargs)
+
+
+# The function that is used to issue a command, when given a GcloudCommand
+# object and additional arguments. Can be overridden.
+_issue_command_function = IssueCommandFunction
+
+# The function that is used to issue a retryable command, when given a
+# GcloudCommand object and additional arguments. Can be overridden.
+_issue_retryable_command_function = IssueRetryableCommandFunction
+
+
+def SetIssueCommandFunction(func):
+  """Set the issue command function to be the given function.
+
+  Args:
+    func: the function to run when issuing a GcloudCommand.
+  """
+  global _issue_command_function
+  _issue_command_function = func
+
+
+def SetIssueRetryableCommandFunction(func):
+  """Set the issue retryable command function to be the given function.
+
+  Args:
+    func: the function to run when issuing a GcloudCommand.
+  """
+  global _issue_retryable_command_function
+  _issue_retryable_command_function = func
+
+
+class GcloudCommand(object):
+  """A gcloud command.
+
+  Attributes:
+    args: list of strings. Non-flag args to pass to gcloud, typically
+        specifying an operation to perform (e.g. ['compute', 'images', 'list']
+        to list available images).
+    flags: OrderedDict mapping flag name string to flag value. Flags to pass to
+        gcloud (e.g. {'project': 'my-project-id'}). If a provided value is
+        True, the flag is passed to gcloud without a value. If a provided value
+        is a list, the flag is passed to gcloud multiple times, once with each
+        value in the list.
+    additional_flags: list of strings. Additional flags to append unmodified to
+        the end of the gcloud command (e.g. ['--metadata', 'color=red']).
+    rate_limited: boolean. True if rate limited, False otherwise.
+    use_alpha_gcloud: boolean. Defaults to False.
+  """
+
+  def __init__(self, resource, *args):
+    """Initializes a GcloudCommand with the provided args and common flags.
+
+    Args:
+      resource: A GCE resource of type BaseResource.
+      *args: sequence of strings. Non-flag args to pass to gcloud, typically
+          specifying an operation to perform (e.g. ['compute', 'images', 'list']
+          to list available images).
+    """
+    self.args = list(args)
+    self.flags = collections.OrderedDict()
+    self.additional_flags = []
+    self._AddCommonFlags(resource)
+    self.rate_limited = False
+    self.use_alpha_gcloud = False
+
+  def GetCommand(self):
+    """Generates the gcloud command.
+
+    Returns:
+      list of strings. When joined by spaces, forms the gcloud shell command.
+
+    Raises:
+      ValueError: if passed a None value
+    """
+    cmd = [FLAGS.gcloud_path]
+    cmd.extend(self.args)
+    for flag_name, values in sorted(self.flags.items()):
+      flag_name_str = '--{0}'.format(flag_name)
+      if values is True:
+        cmd.append(flag_name_str)
+      elif values is None:
+        raise ValueError(f'Flag {flag_name} is None. Please filter out.')
+      else:
+        values_iterable = values if isinstance(values, list) else [values]
+        for value in values_iterable:
+          cmd.append(flag_name_str)
+          cmd.append(str(value))
+    cmd.extend(self.additional_flags)
+    if self.use_alpha_gcloud and len(cmd) > 1 and cmd[1] != 'alpha':
+      cmd.insert(1, 'alpha')
+    return cmd
+
+  def __repr__(self):
+    return '{0}({1})'.format(type(self).__name__, ' '.join(self.GetCommand()))
+
+  @staticmethod
+  def _IsIssueRateLimitMessage(text) -> bool:
+    if RATE_LIMITED_MESSAGE in text:
+      return True
+    match = TAGGING_RATE_LIMITED_REGEX.search(text)
+    if match:
+      return True
+    return False
+
+  @vm_util.Retry(
+      poll_interval=RATE_LIMITED_MAX_POLLING_INTERVAL,
+      max_retries=RATE_LIMITED_MAX_RETRIES,
+      fuzz=RATE_LIMITED_FUZZ,
+      timeout=RATE_LIMITED_TIMEOUT,
+      retryable_exceptions=(
+          errors.Benchmarks.QuotaFailure.RateLimitExceededError,))
+  def Issue(self, **kwargs):
+    """Tries to run the gcloud command once, retrying if Rate Limited.
+
+    Args:
+      **kwargs: Keyword arguments to forward to vm_util.IssueCommand when
+        issuing the gcloud command.
+
+    Returns:
+      A tuple of stdout, stderr, and retcode from running the gcloud command.
+    Raises:
+      RateLimitExceededError: if command fails with Rate Limit Exceeded.
+      QuotaFailure: if command fails without Rate Limit Exceeded and
+      retry_on_rate_limited is set to false
+      IssueCommandError: if command fails without Rate Limit Exceeded.
+
+    """
+    try:
+      stdout, stderr, retcode = _issue_command_function(self, **kwargs)
+    except errors.VmUtil.IssueCommandError as error:
+      error_message = str(error)
+      if GcloudCommand._IsIssueRateLimitMessage(error_message):
+        self._RaiseRateLimitedException(error_message)
+      else:
+        raise error
+    if retcode and GcloudCommand._IsIssueRateLimitMessage(stderr):
+      self._RaiseRateLimitedException(stderr)
+
+    return stdout, stderr, retcode
+
+  def _RaiseRateLimitedException(self, error):
+    """Raise rate limited exception based on the retry_on_rate_limited flag.
+
+    Args:
+      error: Error message to raise
+
+    Raises:
+      RateLimitExceededError: if command fails with Rate Limit Exceeded and
+      retry_on_rate_limited is set to true
+      QuotaFailure: if command fails without Rate Limit Exceeded and
+      retry_on_rate_limited is set to false
+    """
+    self.rate_limited = True
+    if FLAGS.retry_on_rate_limited:
+      raise errors.Benchmarks.QuotaFailure.RateLimitExceededError(error)
+    raise errors.Benchmarks.QuotaFailure(error)
+
+  def IssueRetryable(self, **kwargs):
+    """Tries running the gcloud command until it succeeds or times out.
+
+    Args:
+      **kwargs: Keyword arguments to forward to vm_util.IssueRetryableCommand
+        when issuing the gcloud command.
+
+    Returns:
+      (stdout, stderr) pair of strings from running the gcloud command.
+    """
+    return _issue_retryable_command_function(self, **kwargs)
+
+  def _AddCommonFlags(self, resource):
+    """Adds common flags to the command.
+
+    Adds common gcloud flags derived from the PKB flags and provided resource.
+
+    Args:
+      resource: A GCE resource of type BaseResource.
+    """
+    self.flags['format'] = 'json'
+    self.flags['quiet'] = True
+    if resource:
+      if resource.project is not None:
+        self.flags['project'] = resource.project
+      if hasattr(resource, 'zone') and resource.zone:
+        self.flags['zone'] = resource.zone
+    self.additional_flags.extend(FLAGS.additional_gcloud_flags or ())
+
+
+_QUOTA_EXCEEDED_REGEX = re.compile(
+    r"(Quota '.*' exceeded|Insufficient \w+ quota)")
+
+_NOT_ENOUGH_RESOURCES_STDERR = ('does not have enough resources available to '
+                                'fulfill the request.')
+_NOT_ENOUGH_RESOURCES_MESSAGE = 'Creation failed due to not enough resources: '
+
+
+def CheckGcloudResponseKnownFailures(stderr, retcode):
+  """Checks gcloud responses for quota exceeded errors.
+
+  Args:
+      stderr: The stderr from a gcloud command.
+      retcode: The return code from a gcloud command.
+  """
+  if retcode:
+    if _QUOTA_EXCEEDED_REGEX.search(stderr):
+      message = virtual_machine.QUOTA_EXCEEDED_MESSAGE + stderr
+      logging.error(message)
+      raise errors.Benchmarks.QuotaFailure(message)
+    if _NOT_ENOUGH_RESOURCES_STDERR in stderr:
+      message = _NOT_ENOUGH_RESOURCES_MESSAGE + stderr
+      logging.error(message)
+      raise errors.Benchmarks.InsufficientCapacityCloudFailure(message)
+
+
+def AuthenticateServiceAccount(vm, vm_gcloud_path='gcloud', benchmark=None):
+  """Authorize gcloud to access Google Cloud Platform with a service account.
+
+  If you want gcloud (and other tools in the Cloud SDK) to use service account
+  credentials to make requests, use this method to authenticate.
+  Account name is provided by FLAGS.gcp_service_account
+  Credentials are fetched from a file whose local path is provided by
+  FLAGS.gcp_service_account_key_file, which contains private authorization key.
+  In the absence of a locally supplied credential file, the file is retrieved
+  from pre-provisioned data bucket.
+
+  Args:
+    vm: vm on which the gcloud library needs to be authenticated.
+    vm_gcloud_path: Optional path to the gcloud binary on the vm.
+    benchmark: The module for retrieving the associated service account file.
+  """
+  if not FLAGS.gcp_service_account:
+    raise errors.Setup.InvalidFlagConfigurationError(
+        'Authentication requires the service account name to be '
+        'specified via --gcp_service_account.')
+  if not FLAGS.gcp_service_account_key_file:
+    raise errors.Setup.InvalidFlagConfigurationError(
+        'Authentication requires the service account credential json to be '
+        'specified via --gcp_service_account_key_file.')
+  if '/' in FLAGS.gcp_service_account_key_file:
+    vm.PushFile(FLAGS.gcp_service_account_key_file, vm_util.VM_TMP_DIR)
+    key_file_name = FLAGS.gcp_service_account_key_file.split('/')[-1]
+  else:
+    vm.InstallPreprovisionedBenchmarkData(benchmark,
+                                          [FLAGS.gcp_service_account_key_file],
+                                          vm_util.VM_TMP_DIR)
+    key_file_name = FLAGS.gcp_service_account_key_file
+  activate_cmd = ('{} auth activate-service-account {} --key-file={}/{}'
+                  .format(vm_gcloud_path, FLAGS.gcp_service_account,
+                          vm_util.VM_TMP_DIR, key_file_name))
+  vm.RemoteCommand(activate_cmd)
+
+
+def InstallGcloudComponents(vm, vm_gcloud_path='gcloud', component='alpha'):
+  """Install gcloud components on the target vm.
+
+  Args:
+    vm: vm on which the gcloud's alpha components need to be installed.
+    vm_gcloud_path: Optional path to the gcloud binary on the vm.
+    component: Gcloud component to install.
+  """
+  install_cmd = '{} components install {} --quiet'.format(vm_gcloud_path,
+                                                          component)
+  vm.RemoteCommand(install_cmd)
+
+
+def FormatTags(tags_dict):
+  """Format a dict of tags into arguments.
+
+  Args:
+    tags_dict: Tags to be formatted.
+
+  Returns:
+    A string contains formatted tags
+  """
+  return ','.join(
+      '{0}={1}'.format(k, v) for k, v in sorted(six.iteritems(tags_dict)))
+
+
+def SplitTags(tags):
+  """Formats a string of joined tags into a dictionary.
+
+  Args:
+    tags: A string containing tags formatted as key1=value1,key2=value2,...
+
+  Returns:
+    An OrderedDict mapping tag keys to values in the order the tags were given.
+  """
+  return collections.OrderedDict(
+      tag_pair.split('=') for tag_pair in tags.split(','))
+
+
+def GetDefaultTags(timeout_minutes=None):
+  """Get the default tags in a dictionary.
+
+  Args:
+    timeout_minutes: Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A dict of tags, contributed from the benchmark spec.
+  """
+  benchmark_spec = context.GetThreadBenchmarkSpec()
+  if not benchmark_spec:
+    return {}
+  return benchmark_spec.GetResourceTags(timeout_minutes)
+
+
+def MakeFormattedDefaultTags(timeout_minutes=None):
+  """Get the default tags formatted.
+
+  Args:
+    timeout_minutes: Timeout used for setting the timeout_utc tag.
+
+  Returns:
+    A string contains tags, contributed from the benchmark spec.
+  """
+  return FormatTags(GetDefaultTags(timeout_minutes))
+
+
+def GetAccessToken(application_default: bool = True) -> str:
+  """Gets the access token for the default project.
+
+  Args:
+    application_default: whether to use application-default in gcloud args.
+
+  Returns:
+    Text string of the access token.
+  """
+  cmd = [FLAGS.gcloud_path, 'auth']
+  if application_default:
+    cmd.append('application-default')
+  cmd.append('print-access-token')
+  stdout, _, _ = vm_util.IssueCommand(cmd)
+  return stdout.strip()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/__init__.py
new file mode 100644
index 0000000..d90275b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/__init__.py
@@ -0,0 +1,13 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/flags.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/flags.py
new file mode 100644
index 0000000..0ca6a58
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/flags.py
@@ -0,0 +1,34 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from absl import flags
+
+flags.DEFINE_string('tencent_user_name', 'ubuntu',
+                    'This determines the user name that Perfkit will '
+                    'attempt to use. This must be changed in order to '
+                    'use any image other than ubuntu.')
+flags.DEFINE_string('tencent_boot_disk_type', None,
+                    '"CLOUD_BASIC" - HDD cloud disk, '
+                    '"CLOUD_PREMIUM" - premium cloud disk, '
+                    '"CLOUD_SSD" - cloud SSD disk, '
+                    '"LOCAL_BASIC" - local disk, '
+                    '"LOCAL_SSD" - local SSD disk')
+flags.DEFINE_string('tencent_boot_disk_size', None,
+                    'Boot disk size in GB.')
+flags.DEFINE_string('tencent_internet_bandwidth', None,
+                    'Internet bandwidth in Mbps.')
+flags.DEFINE_string('tencent_image_id', None,
+                    'Image ID.')
+flags.DEFINE_integer('tencent_project_id', 0,
+                     'Project ID.')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/provider_info.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/provider_info.py
new file mode 100644
index 0000000..3ffb78c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/provider_info.py
@@ -0,0 +1,24 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+""" Provider info for Tencent
+"""
+
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import provider_info
+
+
+class TencentProviderInfo(provider_info.BaseProviderInfo):
+
+  CLOUD = providers.TENCENT
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/requirements.txt b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/requirements.txt
new file mode 100644
index 0000000..61bd443
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/requirements.txt
@@ -0,0 +1,18 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Requirements for running PerfKit Benchmarker on Tencent.
+-r../../../requirements.txt
+tccli>=3.0.68.1
+
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_disk.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_disk.py
new file mode 100644
index 0000000..21cde97
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_disk.py
@@ -0,0 +1,184 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing classes related to Tencent disks.
+"""
+
+import json
+import threading
+import logging
+
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.providers.tencent import util
+
+TENCENT_CLOUD_EXCEPTION_STDOUT = '[TencentCloudSDKException]'
+DISK_CHARGE_TYPE = 'POSTPAID_BY_HOUR'
+LOCAL_DISK_TYPES = ['LOCAL_BASIC', 'LOCAL_SSD']
+
+
+class TencentWaitOnDiskRetryableError(Exception):
+  """Error for retrying _Exists when an Tencent Disk has an ID but is not yet created."""
+
+
+class TencentCloudDiskUnknownCLIRetryableError(Exception):
+  """Error for retrying commands when STDOUT returns an unexpected CLI error (and still exit 0)"""
+
+
+class TencentDisk(disk.BaseDisk):
+  """Object representing a Tencent Disk."""
+  _lock = threading.Lock()
+  vm_devices = {}
+
+  def __init__(self, disk_spec, vm):
+    super(TencentDisk, self).__init__(disk_spec)
+
+    self.id = None
+    self.vm = vm
+    self.zone = vm.zone
+    self.project_id = vm.project_id
+    self.region = util.GetRegionFromZone(self.zone)
+    self.device_letter = None
+    self.attached_vm_id = None
+    self.device_path = None
+    self.disk_recently_created = False
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudDiskUnknownCLIRetryableError,))
+  def _Create(self):
+    """Creates the disk."""
+    placement = {
+        "Zone": self.zone
+    }
+    create_cmd = util.TENCENT_PREFIX + [
+        'cbs',
+        'CreateDisks',
+        '--region', self.region,
+        '--DiskType', self.disk_type,
+        '--DiskChargeType', DISK_CHARGE_TYPE,
+        '--Placement', json.dumps(placement),
+        '--DiskSize', str(self.disk_size)] + util.TENCENT_SUFFIX
+
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudDiskUnknownCLIRetryableError
+    self.id = response['DiskIdSet'][0]
+    self.disk_recently_created = True
+    util.AddDefaultTags(self.id, self.region)
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=10,
+                 retryable_exceptions=(TencentCloudDiskUnknownCLIRetryableError))
+  def _Delete(self):
+    """Deletes the disk."""
+    delete_cmd = util.TENCENT_PREFIX + [
+        'cbs',
+        'TerminateDisks',
+        '--region', self.region,
+        '--DiskIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    logging.info('Deleting Tencent volume %s. This may fail if the disk is not '
+                 'yet detached, but will be retried.', self.id)
+    stdout, _, _ = vm_util.IssueCommand(delete_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      if TENCENT_CLOUD_EXCEPTION_STDOUT not in stdout:
+        logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+        raise TencentCloudDiskUnknownCLIRetryableError
+    self.disk_recently_created = False
+
+  @vm_util.Retry(poll_interval=3, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentWaitOnDiskRetryableError, TencentCloudDiskUnknownCLIRetryableError))
+  def _Exists(self):
+    """Returns true if the disk exists."""
+    describe_cmd = util.TENCENT_PREFIX + [
+        'cbs',
+        'DescribeDisks',
+        '--region', self.region,
+        '--DiskIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudDiskUnknownCLIRetryableError
+    disks = response['DiskSet']
+    assert len(disks) < 2, 'Too many volumes.'
+    if not disks:
+      if self.disk_recently_created:
+        raise TencentWaitOnDiskRetryableError
+      else:
+        return False
+    return len(disks) > 0
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=10,
+                 retryable_exceptions=(TencentCloudDiskUnknownCLIRetryableError))
+  def Attach(self, vm):
+    """Attaches the disk to a VM.
+
+    Args:
+      vm: The Tencent instance to which the disk will be attached.
+    """
+    self.attached_vm_id = vm.id
+    attach_cmd = util.TENCENT_PREFIX + [
+        'cbs',
+        'AttachDisks',
+        '--region', self.region,
+        '--InstanceId', vm.id,
+        '--DiskIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    logging.info('Attaching Tencent disk %s. This may fail if the disk is not '
+                 'ready, but will be retried.', self.id)
+    stdout, _ = util.IssueRetryableCommand(attach_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudDiskUnknownCLIRetryableError
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=10,
+                 retryable_exceptions=(TencentCloudDiskUnknownCLIRetryableError))
+  def Detach(self):
+    """Detaches the disk from a VM."""
+    detach_cmd = util.TENCENT_PREFIX + [
+        'cbs',
+        'DetachDisks',
+        '--region', self.region,
+        '--DiskIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(detach_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudDiskUnknownCLIRetryableError
+    util.IssueRetryableCommand(detach_cmd)
+
+  def GetDevicePath(self):
+    """Returns the path to the device inside the VM."""
+    if not self.device_path:
+      self._GetPathFromRemoteHost()
+    return self.device_path
+
+  def SetDiskId(self, id):
+    """Sets Disk ID for the local disk case since Create() will not be called"""
+    self.id = id
+
+  @vm_util.Retry(log_errors=False, poll_interval=5, max_retries=10)
+  def _GetPathFromRemoteHost(self):
+    """Waits until VM is has booted."""
+    readlink_cmd = 'readlink -e /dev/disk/by-id/virtio-%s' % self.id
+    resp, _ = self.vm.RemoteHostCommand(readlink_cmd, suppress_warning=True)
+    self.device_path = resp[:-1]
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_network.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_network.py
new file mode 100644
index 0000000..4aea85f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_network.py
@@ -0,0 +1,244 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing classes related to Tencent VM networking.
+"""
+import json
+import logging
+import threading
+
+from absl import flags
+from perfkitbenchmarker import network
+from perfkitbenchmarker import providers
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.providers.tencent import util
+
+FLAGS = flags.FLAGS
+TENCENT_CLOUD_EXCEPTION_STDOUT = '[TencentCloudSDKException]'
+
+
+class TencentCloudVPCError(Exception):
+  """Error for VPC-related failures."""
+
+
+class TencentCloudNetworkUnknownCLIRetryableError(Exception):
+  """Error for retrying commands when STDOUT returns an unexpected CLI error (and still exit 0)"""
+
+
+class TencentNetwork(network.BaseNetwork):
+  """Object representing a Tencent Network.
+
+  Attributes:
+    region: The Tencent region the Network is in.
+    zone: The Zone within the region for this network.
+    vpc: The Tencent VPC for this network.
+    subnet: the Tencent for this zone.
+  """
+
+  CLOUD = providers.TENCENT
+
+  def __repr__(self):
+    return '%s(%r)' % (self.__class__, self.__dict__)
+
+  def __init__(self, spec):
+    """Initializes TencentNetwork instances.
+
+    Args:
+      spec: A BaseNetworkSpec object.
+    """
+    super(TencentNetwork, self).__init__(spec)
+    self.region = util.GetRegionFromZone(spec.zone)
+    self.zone = spec.zone
+    self.vpc = TencentVpc(self.region)
+    self.subnet = None
+    self._create_lock = threading.Lock()
+
+  def Create(self):
+    """Creates the network."""
+    self.route_table = None
+    self.created = False
+    self.vpc.Create()
+    if self.subnet is None:
+      cidr = self.vpc.NextSubnetCidrBlock()
+      self.subnet = TencentSubnet(self.zone, self.vpc.id,
+                                  cidr_block=cidr)
+      self.subnet.Create()
+
+  def Delete(self):
+    """Deletes the network."""
+    self.vpc.Delete()
+
+
+class TencentVpc(resource.BaseResource):
+  """An object representing an Tencent VPC."""
+
+  def __init__(self, region):
+    super(TencentVpc, self).__init__()
+    self.region = region
+    self.id = None
+    self.name = 'perfkit-vpc-{0}'.format(FLAGS.run_uri)
+
+    # _subnet_index tracks the next unused 10.0.x.0/24 block.
+    self._subnet_index = 0
+    # Lock protecting _subnet_index
+    self._subnet_index_lock = threading.Lock()
+    self.default_security_group_id = None
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudNetworkUnknownCLIRetryableError,))
+  def _Create(self):
+    """Creates the VPC."""
+    create_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'CreateVpc',
+        '--region', self.region,
+        '--CidrBlock', '10.0.0.0/16',
+        '--VpcName', self.name
+    ] + util.TENCENT_SUFFIX
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError:
+      logging.error(stdout)
+      raise TencentCloudVPCError
+    self.id = response['Vpc']['VpcId']
+
+  def _PostCreate(self):
+    """Looks up the VPC default security group."""
+    util.AddDefaultTags(self.id, self.region)
+    return
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudNetworkUnknownCLIRetryableError,))
+  def _Exists(self):
+    """Returns true if the VPC exists."""
+    describe_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'DescribeVpcs',
+        '--region', self.region,
+        '--VpcIds', json.dumps([self.id])
+    ] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudNetworkUnknownCLIRetryableError
+    assert response['TotalCount'] < 2, 'Too many VPCs.'
+    return response['TotalCount'] > 0
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudNetworkUnknownCLIRetryableError,))
+  def _Delete(self):
+    """Deletes the VPC."""
+    delete_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'DeleteVpc',
+        '--region', self.region,
+        '--VpcId', self.id
+    ] + util.TENCENT_SUFFIX
+    stdout, _, _ = vm_util.IssueCommand(delete_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      if TENCENT_CLOUD_EXCEPTION_STDOUT not in stdout:
+        logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+        raise TencentCloudNetworkUnknownCLIRetryableError
+
+  def NextSubnetCidrBlock(self):
+    """Returns the next available /24 CIDR block in this VPC.
+
+    Each VPC has a 10.0.0.0/16 CIDR block.
+    Each subnet is assigned a /24 within this allocation.
+    Calls to this method return the next unused /24.
+
+    Returns:
+      A string representing the next available /24 block, in CIDR notation.
+    Raises:
+      ValueError: when no additional subnets can be created.
+    """
+    with self._subnet_index_lock:
+      if self._subnet_index >= (1 << 8) - 1:
+        raise ValueError('Exceeded subnet limit ({0}).'.format(
+            self._subnet_index))
+      cidr = '10.0.{0}.0/24'.format(self._subnet_index)
+      self._subnet_index += 1
+    return cidr
+
+
+class TencentSubnet(resource.BaseResource):
+  def __init__(self, zone, vpc_id, cidr_block='10.0.0.0/24'):
+    super(TencentSubnet, self).__init__()
+    self.zone = zone
+    self.region = util.GetRegionFromZone(zone)
+    self.vpc_id = vpc_id
+    self.id = None
+    self.cidr_block = cidr_block
+    self.name = self._GetSubNetName()
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudNetworkUnknownCLIRetryableError,))
+  def _Create(self):
+    """Creates the subnet."""
+    create_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'CreateSubnet',
+        '--region', self.region,
+        '--VpcId', self.vpc_id,
+        '--SubnetName', self.name,
+        '--CidrBlock', self.cidr_block,
+        '--Zone', self.zone
+    ] + util.TENCENT_SUFFIX
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudNetworkUnknownCLIRetryableError
+    self.id = response['Subnet']['SubnetId']
+    util.AddDefaultTags(self.id, self.region)
+
+  def _Delete(self):
+    """Deletes the subnet."""
+    delete_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'DeleteSubnet',
+        '--region', self.region,
+        '--SubnetId', self.id
+    ] + util.TENCENT_SUFFIX
+    stdout, _, _ = vm_util.IssueCommand(delete_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      if TENCENT_CLOUD_EXCEPTION_STDOUT not in stdout:
+        logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+        raise TencentCloudNetworkUnknownCLIRetryableError
+
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudNetworkUnknownCLIRetryableError,))
+  def _Exists(self):
+    """Returns true if the subnet exists."""
+    describe_cmd = util.TENCENT_PREFIX + [
+        'vpc', 'DescribeSubnets',
+        '--region', self.region,
+        '--SubnetIds', json.dumps([self.id])
+    ] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudNetworkUnknownCLIRetryableError
+    assert response['TotalCount'] < 2, 'Too many Subnets.'
+    return response['TotalCount'] > 0
+
+  @classmethod
+  def _GetSubNetName(cls):
+    return 'perfkit_subnet_{0}'.format(FLAGS.run_uri)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_virtual_machine.py
new file mode 100644
index 0000000..528fafd
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/tencent_virtual_machine.py
@@ -0,0 +1,422 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent a Tencent Virtual Machine object.
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+import json
+import threading
+import logging
+import base64
+
+from absl import flags
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import providers
+from perfkitbenchmarker.providers.tencent import tencent_disk
+from perfkitbenchmarker.providers.tencent import tencent_network
+from perfkitbenchmarker.providers.tencent import util
+FLAGS = flags.FLAGS
+
+# Tencent CLI returns 0 even when an exception is encountered.
+# In some cases, stdout must be scanned for this message.
+TENCENT_CLOUD_EXCEPTION_STDOUT = '[TencentCloudSDKException]'
+TENCENT_CLOUD_SOLD_OUT_MSG = 'ResourcesSoldOut'
+TENCENT_CLOUD_INSUFFICIENT_BALANCE = 'InsufficientBalance'
+
+INSTANCE_TRANSITIONAL_STATUSES = frozenset(['TERMINATING', 'PENDING'])
+DEFAULT_SYSTEM_DISK_SIZE = 50
+DEFAULT_SYSTEM_DISK_TYPE = 'CLOUD_PREMIUM'
+DEFAULT_INTERNET_BANDWIDTH = 100
+DEFAULT_USER_NAME = 'perfkit'
+
+
+class TencentTransitionalVmRetryableError(Exception):
+  """Error for retrying _Exists when an Tencent VM is in a transitional state."""
+
+
+class TencentCloudSDKExceptionRetryableError(Exception):
+  """Error for retrying commands when STDOUT returns TencentCLoudSDKException (and still exit 0)"""
+
+
+class TencentCloudUnknownCLIRetryableError(Exception):
+  """Error for retrying commands when STDOUT returns an unexpected CLI error (and still exit 0)"""
+
+
+class TencentCloudResourceSoldOut(Exception):
+  """Error for resouce sold out"""
+
+
+class TencentCloudInsufficientBalance(Exception):
+  """Error for insufficient account balance"""
+
+
+class TencentVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing an Tencent Virtual Machine."""
+
+  IMAGE_NAME_FILTER = None
+  CLOUD = providers.TENCENT
+
+  def __init__(self, vm_spec):
+    """Initialize a Tencent virtual machine.
+
+    Args:
+      vm_spec: virtual_machine.BaseVirtualMachineSpec object of the VM.
+    """
+    super(TencentVirtualMachine, self).__init__(vm_spec)
+    self.region = util.GetRegionFromZone(self.zone)
+    self.user_name = FLAGS.tencent_user_name
+    self.network = tencent_network.TencentNetwork.GetNetwork(self)
+    self.host = None
+    self.id = None
+    self.project_id = FLAGS.tencent_project_id
+    self.key_id = None
+    self.boot_disk_size = FLAGS.tencent_boot_disk_size or DEFAULT_SYSTEM_DISK_SIZE
+    self.boot_disk_type = FLAGS.tencent_boot_disk_type or DEFAULT_SYSTEM_DISK_TYPE
+    self.internet_bandwidth = FLAGS.tencent_internet_bandwidth or DEFAULT_INTERNET_BANDWIDTH
+    self.image_id = FLAGS.tencent_image_id or None
+
+  @vm_util.Retry()
+  def _PostCreate(self):
+    """Get the instance's data."""
+    describe_cmd = util.TENCENT_PREFIX + [
+        'cvm', 'DescribeInstances',
+        '--region', self.region,
+        '--InstanceIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    logging.info('Getting instance %s public IP. This will fail until '
+                 'a public IP is available, but will be retried.', self.id)
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudUnknownCLIRetryableError
+    instance = response['InstanceSet'][0]
+    if not instance['PublicIpAddresses']:
+      raise TencentTransitionalVmRetryableError
+    self.ip_address = instance['PublicIpAddresses'][0]
+    self.internal_ip = instance['PrivateIpAddresses'][0]
+
+  def _CreateDependencies(self):
+    """Create VM dependencies."""
+    self.image_id = self.image_id or self.GetDefaultImage(self.machine_type,
+                                                          self.region)
+    self.key_id = TencentKeyFileManager.ImportKeyfile(self.region)
+
+
+  def _DeleteDependencies(self):
+    """Delete VM dependencies."""
+    if self.key_id:
+      TencentKeyFileManager.DeleteKeyfile(self.region, self.key_id)
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudUnknownCLIRetryableError,))
+  def _Create(self):
+    """Create a VM instance."""
+    placement = {
+        'Zone': self.zone,
+        'ProjectId': self.project_id
+
+    }
+    login_settings = {
+        'KeyIds': [self.key_id]
+    }
+    vpc = {
+        'VpcId': self.network.vpc.id,
+        'SubnetId': self.network.subnet.id
+    }
+    system_disk = {
+        'DiskType': self.boot_disk_type,
+        'DiskSize': self.boot_disk_size
+    }
+    internet_accessible = {
+        'PublicIpAssigned': True,
+        'InternetMaxBandwidthOut': self.internet_bandwidth
+    }
+
+    create_cmd = util.TENCENT_PREFIX + [
+        'cvm', 'RunInstances',
+        '--region', self.region,
+        '--Placement', json.dumps(placement),
+        '--InstanceType', self.machine_type,
+        '--ImageId', self.image_id,
+        '--VirtualPrivateCloud', json.dumps(vpc),
+        '--InternetAccessible', json.dumps(internet_accessible),
+        '--SystemDisk', json.dumps(system_disk),
+        '--LoginSettings', json.dumps(login_settings),
+        '--InstanceName', self.name
+    ] + util.TENCENT_SUFFIX
+
+    # Create user and add SSH key if image doesn't have a default non-root user
+    if self.CREATE_NON_ROOT_USER:
+      public_key = TencentKeyFileManager.GetPublicKey()
+      user_data = util.ADD_USER_TEMPLATE.format(self.user_name, public_key)
+      logging.debug('encoding startup script: %s' % user_data)
+      create_cmd.extend(['--UserData', base64.b64encode(user_data.encode("utf-8"))])
+
+    # Tccli will exit 0 and provide a non-json formatted error msg to stdout in case of failure
+    stdout, _, _ = vm_util.IssueCommand(create_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError:
+      if TENCENT_CLOUD_EXCEPTION_STDOUT in stdout:
+        if TENCENT_CLOUD_SOLD_OUT_MSG in stdout:
+          raise TencentCloudResourceSoldOut
+        elif TENCENT_CLOUD_INSUFFICIENT_BALANCE in stdout:
+          raise TencentCloudInsufficientBalance
+        else:
+          raise TencentCloudSDKExceptionRetryableError
+      else:
+        raise TencentCloudUnknownCLIRetryableError
+
+    self.id = response['InstanceIdSet'][0]
+    util.AddDefaultTags(self.id, self.region)
+
+  @vm_util.Retry(poll_interval=5, log_errors=False, max_retries=5,
+                 retryable_exceptions=(TencentCloudUnknownCLIRetryableError,))
+  def _Delete(self):
+    """Delete a VM instance."""
+    delete_cmd = util.TENCENT_PREFIX + [
+        'cvm',
+        'TerminateInstances',
+        '--region', self.region,
+        '--InstanceIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    stdout, stderr, _ = vm_util.IssueCommand(delete_cmd)
+    try:
+      json.loads(stdout)
+    except ValueError as e:
+      if TENCENT_CLOUD_EXCEPTION_STDOUT not in stderr:
+        logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+        raise TencentCloudUnknownCLIRetryableError
+
+  @vm_util.Retry(poll_interval=5, log_errors=False,
+                 retryable_exceptions=(TencentTransitionalVmRetryableError, TencentCloudUnknownCLIRetryableError))
+  def _Exists(self):
+    """Returns whether the VM exists."""
+    describe_cmd = util.TENCENT_PREFIX + [
+        'cvm', 'DescribeInstances',
+        '--region', self.region,
+        '--InstanceIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudUnknownCLIRetryableError
+    instance_set = response['InstanceSet']
+    if not instance_set:
+      return False
+      # TODO There is potential issue here with tccli, possibly during high traffic periods where the instance query will return empty when an instnace is transitioning from PENDING -> RUNNING
+    if instance_set[0]['InstanceState'] in INSTANCE_TRANSITIONAL_STATUSES:
+      raise TencentTransitionalVmRetryableError
+    assert len(instance_set) < 2, 'Too many instances.'
+    return len(instance_set) > 0
+
+  @classmethod
+  @vm_util.Retry(poll_interval=5, log_errors=False,
+                 retryable_exceptions=(TencentTransitionalVmRetryableError, TencentCloudUnknownCLIRetryableError))
+  def GetDefaultImage(cls, machine_type, region):
+    """Returns Image ID of first match with IMAGE_NAME_MATCH.
+    Results from DescribeImages are evaluated in the order they are returned from the command (assumed arbitrary).
+    """
+    if cls.IMAGE_NAME_MATCH is None:
+      return None
+    describe_cmd = util.TENCENT_PREFIX + [
+        'cvm', 'DescribeImages',
+        '--region', region,
+        '--Limit', '100',
+        '--InstanceType', machine_type
+    ] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudUnknownCLIRetryableError
+    for i in response['ImageSet']:
+      if i['ImageName'] in cls.IMAGE_NAME_MATCH and i['ImageSource'] == 'OFFICIAL':
+        logging.debug('Found image %s, %s' % (i['ImageName'], i['ImageId']))
+        return i['ImageId']
+    return None
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+    """
+    disk_ids = []
+    if disk_spec.disk_type in tencent_disk.LOCAL_DISK_TYPES:
+      disk_spec.disk_type = disk.LOCAL
+      logging.debug("Querying instance for local disk ids")
+      disk_ids = self._GetDataDiskIds()
+      if len(disk_ids) != disk_spec.num_striped_disks:
+        raise errors.Error('Expected %s local disks but found %s local disks' %
+                           (disk_spec.num_striped_disks, len(disk_ids)))
+    disks = []
+    for i in range(disk_spec.num_striped_disks):
+      data_disk = tencent_disk.TencentDisk(disk_spec, self)
+      if disk_spec.disk_type == disk.LOCAL:
+        data_disk.SetDiskId(disk_ids[i])
+      disks.append(data_disk)
+    self._CreateScratchDiskFromDisks(disk_spec, disks)
+
+  @vm_util.Retry(poll_interval=2, log_errors=False,
+                 retryable_exceptions=(TencentCloudUnknownCLIRetryableError,))
+  def _GetDataDiskIds(self):
+    """Returns Ids of attached data disks"""
+    disk_ids = []
+    describe_cmd = util.TENCENT_PREFIX + [
+        'cvm', 'DescribeInstances',
+        '--region', self.region,
+        '--InstanceIds', json.dumps([self.id])] + util.TENCENT_SUFFIX
+    stdout, _ = util.IssueRetryableCommand(describe_cmd)
+    try:
+      response = json.loads(stdout)
+    except ValueError as e:
+      logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+      raise TencentCloudUnknownCLIRetryableError
+    instance = response['InstanceSet'][0]
+    for data_disk in instance['DataDisks']:
+      disk_ids.append(data_disk['DiskId'])
+    return disk_ids
+
+
+class TencentKeyFileManager(object):
+  """Object for managing Tencent Keyfiles."""
+  _lock = threading.Lock()
+  imported_keyfile_set = set()
+  deleted_keyfile_set = set()
+  run_uri_key_ids = {}
+
+  @classmethod
+  @vm_util.Retry(poll_interval=2, log_errors=False,
+                 retryable_exceptions=(TencentCloudSDKExceptionRetryableError, TencentCloudUnknownCLIRetryableError))
+  def ImportKeyfile(cls, region):
+    """Imports the public keyfile to Tencent."""
+    with cls._lock:
+      if FLAGS.run_uri in cls.run_uri_key_ids:
+        return cls.run_uri_key_ids[FLAGS.run_uri]
+      public_key = cls.GetPublicKey()
+      import_cmd = util.TENCENT_PREFIX + [
+          'cvm',
+          'ImportKeyPair',
+          '--ProjectId', '0',
+          '--region', region,
+          '--KeyName', cls.GetKeyNameForRun(),
+          '--PublicKey', public_key] + util.TENCENT_SUFFIX
+      stdout, _ = util.IssueRetryableCommand(import_cmd)
+      try:
+        response = json.loads(stdout)
+      except ValueError as e:
+        if TENCENT_CLOUD_EXCEPTION_STDOUT in stdout:
+          raise TencentCloudSDKExceptionRetryableError
+        else:
+          logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+          raise TencentCloudUnknownCLIRetryableError
+
+      key_id = response['KeyId']
+      cls.run_uri_key_ids[FLAGS.run_uri] = key_id
+      return key_id
+
+
+  @classmethod
+  @vm_util.Retry(poll_interval=2, log_errors=False,
+                 retryable_exceptions=(TencentCloudSDKExceptionRetryableError, TencentCloudUnknownCLIRetryableError))
+  def DeleteKeyfile(cls, region, key_id):
+    """Deletes the imported KeyPair for a run_uri."""
+    with cls._lock:
+      if FLAGS.run_uri not in cls.run_uri_key_ids:
+        return
+      delete_cmd = util.TENCENT_PREFIX + [
+          'cvm',
+          'DeleteKeyPairs',
+          '--region', region,
+          '--KeyIds', json.dumps([key_id])] + util.TENCENT_SUFFIX
+      stdout, _ = util.IssueRetryableCommand(delete_cmd)
+      try:
+        json.loads(stdout)
+      except ValueError as e:
+        if TENCENT_CLOUD_EXCEPTION_STDOUT in stdout:
+          raise TencentCloudSDKExceptionRetryableError
+        else:
+          logging.warn("Encountered unexpected return from command '{}', retrying.".format(e))
+          raise TencentCloudUnknownCLIRetryableError
+      del cls.run_uri_key_ids[FLAGS.run_uri]
+
+  @classmethod
+  def GetKeyNameForRun(cls):
+    return 'perfkit_key_{0}'.format(FLAGS.run_uri)
+
+  @classmethod
+  def GetPublicKey(cls):
+    cat_cmd = ['cat',
+               vm_util.GetPublicKeyPath()]
+    keyfile, _ = vm_util.IssueRetryableCommand(cat_cmd)
+    return keyfile
+
+
+class DebianBasedTencentVirtualMachine(TencentVirtualMachine,
+                                       linux_virtual_machine.BaseDebianMixin):
+  CREATE_NON_ROOT_USER = False
+  IMAGE_NAME_MATCH = 'Ubuntu Server 16.04.1 LTS 64'
+
+
+class Ubuntu1604BasedTencentVirtualMachine(TencentVirtualMachine,
+                                           linux_virtual_machine.Ubuntu1604Mixin):
+  CREATE_NON_ROOT_USER = False
+  IMAGE_NAME_MATCH = 'Ubuntu Server 16.04.1 LTS 64'
+
+
+class Ubuntu1804BasedTencentVirtualMachine(TencentVirtualMachine,
+                                           linux_virtual_machine.Ubuntu1804Mixin):
+  CREATE_NON_ROOT_USER = False
+  IMAGE_NAME_MATCH = 'Ubuntu Server 18.04.1 LTS 64'
+
+
+class Ubuntu2004BasedTencentVirtualMachine(TencentVirtualMachine,
+                                           linux_virtual_machine.Ubuntu2004Mixin):
+  CREATE_NON_ROOT_USER = False
+  IMAGE_NAME_MATCH = ['Ubuntu Server 20.04 LTS 64', 'Ubuntu 20.04(arm64)']
+
+  def UpdateEnvironmentPath(self):
+    # Tencent's image for Ubuntu 20.04 seems to have root-owned files in user home
+    self.RemoteCommand('sudo chown -R {0}:{0} /home/{0}'.format(self.user_name))
+
+
+# TODO to be verified
+class Ubuntu2204BasedTencentVirtualMachine(TencentVirtualMachine,
+                                           linux_virtual_machine.Ubuntu2204Mixin):
+  CREATE_NON_ROOT_USER = False
+  IMAGE_NAME_MATCH = 'Ubuntu Server 22.04 LTS 64'
+
+
+class CentOs7BasedTencentVirtualMachine(TencentVirtualMachine,
+                                        linux_virtual_machine.CentOs7Mixin):
+  CREATE_NON_ROOT_USER = True
+  IMAGE_NAME_MATCH = 'CentOS 7.9 64'
+
+  def __init__(self, vm_spec):
+    super(CentOs7BasedTencentVirtualMachine, self).__init__(vm_spec)
+    user_name_set = FLAGS['tencent_user_name'].present
+    self.user_name = FLAGS.tencent_user_name if user_name_set else DEFAULT_USER_NAME
+    self.python_package_config = 'python'
+    self.python_dev_package_config = 'python2-devel'
+    self.python_pip_package_config = 'python2-pip'
diff --git a/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/util.py b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/util.py
new file mode 100644
index 0000000..f6af819
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/providers/tencent/util.py
@@ -0,0 +1,125 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for working with Tencent Cloud resources."""
+
+
+import re
+import shlex
+import six
+
+from perfkitbenchmarker import errors
+from absl import flags
+from perfkitbenchmarker import vm_util
+
+TENCENT_PATH = 'tccli'
+TENCENT_PREFIX = [TENCENT_PATH]
+# Tencent positional args such as 'vpc' and 'CreateVpc' must come before flags, so format flag must come later.
+TENCENT_SUFFIX = ['--output', 'json']
+INSTANCE = 'instance'
+FLAGS = flags.FLAGS
+
+ADD_USER_TEMPLATE = '''#!/bin/bash
+echo "{0} ALL = NOPASSWD: ALL" >> /etc/sudoers
+useradd {0} --home /home/{0} --shell /bin/bash -m
+mkdir /home/{0}/.ssh
+echo "{1}" >> /home/{0}/.ssh/authorized_keys
+chown -R {0}:{0} /home/{0}/.ssh
+chmod 700 /home/{0}/.ssh
+chmod 600 /home/{0}/.ssh/authorized_keys
+'''
+
+
+def TokenizeCommand(cmd):
+  # This probably doesnt work for most Tencent commands that have JSON args - it strips quotes it seems
+  cmd_line = ' '.join(cmd)
+  cmd_args = shlex.split(cmd_line)
+  return cmd_args
+
+
+def IsRegion(region_or_zone):
+  if not re.match(r'[a-z]{2}-[a-z]+$', region_or_zone):
+    return False
+  else:
+    return True
+
+
+def GetRegionFromZone(zone):
+  """Returns the region a zone is in """
+  m = re.match(r'([a-z]{2}-[a-z]+)-[0-9]?$', zone)
+  if not m:
+    raise ValueError(
+        '%s is not a valid Tencent zone' % zone)
+  return m.group(1)
+
+
+@vm_util.Retry()
+def IssueRetryableCommand(cmd, env=None):
+  """Tries running the provided command until it succeeds or times out.
+
+  Args:
+    cmd: A list of strings such as is given to the subprocess.Popen()
+        constructor.
+    env: An alternate environment to pass to the Popen command.
+
+  Returns:
+    A tuple of stdout and stderr from running the provided command.
+  """
+  stdout, stderr, retcode = vm_util.IssueCommand(cmd, env=env)
+  if retcode:
+    raise errors.VmUtil.CalledProcessException(
+        'Command returned a non-zero exit code.\n')
+  if stderr:
+    raise errors.VmUtil.CalledProcessException(
+        'The command had output on stderr:\n%s' % stderr)
+  return stdout, stderr
+
+
+def AddTags(resource_id, region, **kwargs):
+  """Adds tags to a Tencent cloud resource created by PerfKitBenchmarker.
+
+  Args:
+    resource_id: An extant Tencent cloud resource to operate on.
+    region: The Tencent cloud region 'resource_id' was created in.
+    **kwargs: dict. Key-value pairs to set on the instance.
+  """
+  if not kwargs:
+    return
+
+  tag_cmd = TENCENT_PREFIX + [
+      'tag', 'AddResourceTag',
+      '--Resource', f'qcs::cvm:{region}::{INSTANCE}/{resource_id}'
+  ]
+  for _, (key, value) in enumerate(six.iteritems(kwargs)):
+    tmp_cmd = tag_cmd.copy()
+    tmp_cmd.extend([
+        '--TagKey', str(key),
+        '--TagValue', str(value)
+    ])
+    vm_util.IssueRetryableCommand(tmp_cmd)
+
+
+def AddDefaultTags(resource_id, region):
+  """Adds tags to a Tencent cloud resource created by PerfKitBenchmarker.
+
+  By default, resources are tagged with "owner" and "perfkitbenchmarker-run"
+  key-value
+  pairs.
+
+  Args:
+    resource_id: An extant Tencent cloud resource to operate on.
+    region: The Tencent cloud region 'resource_id' was created in.
+  """
+  tags = {'owner': FLAGS.owner, 'perfkitbenchmarker-run': FLAGS.run_uri}
+  AddTags(resource_id, region, **tags)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/publisher.py b/script/cumulus/pkb/perfkitbenchmarker/publisher.py
new file mode 100755
index 0000000..75b775b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/publisher.py
@@ -0,0 +1,1099 @@
+#!/usr/bin/env python
+
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Classes to collect and publish performance samples to various sinks."""
+
+
+import abc
+import collections
+import copy
+import csv
+import datetime
+import fcntl
+import itertools
+import json
+import logging
+import math
+import operator
+import posixpath
+import pprint
+import requests
+import sys
+import time
+from typing import List
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import events
+from perfkitbenchmarker import flag_util
+from perfkitbenchmarker import log_util
+from perfkitbenchmarker import sample as pkb_sample
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import version
+from perfkitbenchmarker import vm_util
+import six
+from six.moves import urllib
+import six.moves.http_client as httplib
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string(
+    'product_name',
+    'PerfKitBenchmarker',
+    'The product name to use when publishing results.')
+
+flags.DEFINE_boolean(
+    'official',
+    False,
+    'A boolean indicating whether results are official or not. The '
+    'default is False. Official test results are treated and queried '
+    'differently from non-official test results.')
+
+flags.DEFINE_boolean(
+    'hostname_metadata',
+    False,
+    'A boolean indicating whether to publish VM hostnames as part of sample '
+    'metadata.')
+
+flags.DEFINE_string(
+    'json_path',
+    None,
+    'A path to write newline-delimited JSON results '
+    'Default: write to a run-specific temporary directory')
+flags.DEFINE_enum(
+    'json_write_mode',
+    'w',
+    ['w', 'a'],
+    'Open mode for file specified by --json_path. Default: overwrite file')
+flags.DEFINE_boolean(
+    'collapse_labels',
+    False,
+    'Collapse entries in labels in JSON output.')
+flags.DEFINE_string(
+    'csv_path',
+    None,
+    'A path to write CSV-format results')
+
+flags.DEFINE_string(
+    'bigquery_table',
+    None,
+    'The BigQuery table to publish results to. This should be of the form '
+    '"[project_id:]dataset_name.table_name".')
+flags.DEFINE_string(
+    'bq_path', 'bq', 'Path to the "bq" executable.')
+flags.DEFINE_string(
+    'bq_project', None, 'Project to use for authenticating with BigQuery.')
+flags.DEFINE_string(
+    'service_account', None, 'Service account to use to authenticate with BQ.')
+flags.DEFINE_string(
+    'service_account_private_key', None,
+    'Service private key for authenticating with BQ.')
+flags.DEFINE_string(
+    'application_default_credential_file', None,
+    'Application default credentials file for authenticating with BQ.')
+
+flags.DEFINE_string(
+    'gsutil_path', 'gsutil', 'path to the "gsutil" executable')
+flags.DEFINE_string(
+    'cloud_storage_bucket',
+    None,
+    'GCS bucket to upload records to. Bucket must exist. '
+    'This flag differs from --hourly_partitioned_cloud_storage_bucket '
+    'by putting records directly in the bucket.')
+PARTITIONED_GCS_URL = flags.DEFINE_string(
+    'hourly_partitioned_cloud_storage_bucket', None,
+    'GCS bucket to upload records to. Bucket must exist. This flag differs '
+    'from --cloud_storage_bucket by putting records in subfolders based on '
+    'time of publish. i.e. gs://bucket/YYYY/mm/dd/HH/data.')
+flags.DEFINE_string(
+    'es_uri', None,
+    'The Elasticsearch address and port. e.g. http://localhost:9200')
+
+flags.DEFINE_string(
+    'es_index', 'perfkit', 'Elasticsearch index name to store documents')
+
+flags.DEFINE_string('es_type', 'result', 'Elasticsearch document type')
+
+flags.DEFINE_multi_string(
+    'metadata',
+    [],
+    'A colon separated key-value pair that will be added to the labels field '
+    'of all samples as metadata. Multiple key-value pairs may be specified '
+    'by separating each pair by commas.')
+
+flags.DEFINE_string(
+    'influx_uri', None,
+    'The Influx DB address and port. Expects the format hostname:port'
+    'If port is not passed in it assumes port 80. e.g. localhost:8086')
+
+flags.DEFINE_string(
+    'influx_db_name', 'perfkit',
+    'Name of Influx DB database that you wish to publish to or create')
+
+flags.DEFINE_boolean(
+    'publish_config',
+    True,
+    'A boolean indicating whether to publish user config including '
+    'command line and user configuration file contents.')
+
+flags.DEFINE_boolean(
+    'record_log_publisher', True,
+    'Whether to use the log publisher or not.')
+
+DEFAULT_JSON_OUTPUT_NAME = 'perfkitbenchmarker_results.json'
+DEFAULT_CREDENTIALS_JSON = 'credentials.json'
+GCS_OBJECT_NAME_LENGTH = 20
+# A list of SamplePublishers that can be extended to add support for publishing
+# types beyond those in this module. The classes should not require any
+# arguments to their __init__ methods. The SampleCollector will unconditionally
+# call PublishSamples using Publishers added via this method.
+EXTERNAL_PUBLISHERS = []
+
+
+def PublishRunStageSamples(benchmark_spec, samples):
+  """Publishes benchmark run-stage samples immediately.
+
+  Typically, a benchmark publishes samples by returning them from the Run
+  function so that they can be pubished at set points (publish periods or at the
+  end of a run). This function can be called to publish the samples immediately.
+
+  Note that metadata for the run number will not be added to such samples.
+  TODO(deitz): Can we still add the run number? This will require passing a run
+  number or callback to the benchmark Run functions (or some other mechanism).
+
+  Args:
+    benchmark_spec: The BenchmarkSpec created for the benchmark.
+    samples: A list of samples to publish.
+  """
+  events.samples_created.send(
+      stages.RUN, benchmark_spec=benchmark_spec, samples=samples)
+  collector = SampleCollector()
+  collector.AddSamples(samples, benchmark_spec.name, benchmark_spec)
+  collector.PublishSamples()
+
+
+def GetLabelsFromDict(metadata):
+  """Converts a metadata dictionary to a string of labels sorted by key.
+
+  Args:
+    metadata: a dictionary of string key value pairs.
+
+  Returns:
+    A string of labels, sorted by key, in the format that Perfkit uses.
+  """
+  labels = []
+  for k, v in sorted(six.iteritems(metadata)):
+    labels.append('|%s:%s|' % (k, v))
+  return ','.join(labels)
+
+
+def FormatTimestampForElasticsearch(epoch_us):
+  """Convert the floating epoch timestamp in micro seconds epoch_us to
+  yyyy-MM-dd HH:mm:ss.SSSSSS in string
+  """
+  ts = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(epoch_us))
+  num_dec = ("%.6f" % (epoch_us - math.floor(epoch_us))).split('.')[1]
+  new_ts = '%s.%s' % (ts, num_dec)
+  return new_ts
+
+
+def DeDotKeys(res):
+  """Recursively replace dot with underscore in all keys in a dictionary."""
+  for key, value in res.copy().items():
+    if isinstance(value, dict):
+      DeDotKeys(value)
+    new_key = key.replace('.', '_')
+    if new_key != key:
+      res[new_key] = res.pop(key)
+  return res
+
+
+class MetadataProvider(six.with_metaclass(abc.ABCMeta, object)):
+  """A provider of sample metadata."""
+
+  @abc.abstractmethod
+  def AddMetadata(self, metadata, benchmark_spec):
+    """Add metadata to a dictionary.
+
+    Existing values will be overwritten.
+
+    Args:
+      metadata: dict. Dictionary of metadata to update.
+      benchmark_spec: BenchmarkSpec. The benchmark specification.
+
+    Returns:
+      Updated 'metadata'.
+    """
+    raise NotImplementedError()
+
+
+class DefaultMetadataProvider(MetadataProvider):
+  """Adds default metadata to samples."""
+
+  def AddMetadata(self, metadata, benchmark_spec):
+    metadata = metadata.copy()
+    metadata['perfkitbenchmarker_version'] = version.VERSION
+    if FLAGS.simulate_maintenance:
+      metadata['simulate_maintenance'] = True
+    if FLAGS.hostname_metadata:
+      metadata['hostnames'] = ','.join([vm.hostname
+                                        for vm in benchmark_spec.vms])
+    if benchmark_spec.container_cluster:
+      cluster = benchmark_spec.container_cluster
+      for k, v in six.iteritems(cluster.GetResourceMetadata()):
+        metadata['container_cluster_' + k] = v
+
+    if benchmark_spec.relational_db:
+      db = benchmark_spec.relational_db
+      for k, v in six.iteritems(db.GetResourceMetadata()):
+        # TODO(jerlawson): Rename to relational_db.
+        metadata['managed_relational_db_' + k] = v
+
+    for name, tpu in six.iteritems(benchmark_spec.tpu_groups):
+      for k, v in six.iteritems(tpu.GetResourceMetadata()):
+        metadata['tpu_' + k] = v
+
+    for name, vms in six.iteritems(benchmark_spec.vm_groups):
+      if len(vms) == 0:
+        continue
+      # Get a representative VM so that we can publish the cloud, zone,
+      # machine type, and image.
+      vm = vms[-1]
+      name_prefix = '' if name == 'default' else name + '_'
+      for k, v in six.iteritems(vm.GetResourceMetadata()):
+        metadata[name_prefix + k] = v
+      metadata[name_prefix + 'vm_count'] = len(vms)
+      for k, v in six.iteritems(vm.GetOSResourceMetadata()):
+        metadata[name_prefix + k] = v
+
+      if vm.scratch_disks:
+        data_disk = vm.scratch_disks[0]
+        metadata[name_prefix + 'data_disk_count'] = len(vm.scratch_disks)
+        for key, value in six.iteritems(data_disk.GetResourceMetadata()):
+          metadata[name_prefix + 'data_disk_0_%s' % (key,)] = value
+
+    if FLAGS.set_files:
+      metadata['set_files'] = ','.join(FLAGS.set_files)
+    if FLAGS.sysctl:
+      metadata['sysctl'] = ','.join(FLAGS.sysctl)
+
+    # Flatten all user metadata into a single list (since each string in the
+    # FLAGS.metadata can actually be several key-value pairs) and then iterate
+    # over it.
+    parsed_metadata = flag_util.ParseKeyValuePairs(FLAGS.metadata)
+    metadata.update(parsed_metadata)
+    return metadata
+
+
+DEFAULT_METADATA_PROVIDERS = [DefaultMetadataProvider()]
+
+
+class UserConfigurationMetadataProvider(MetadataProvider):
+
+  def AddMetadata(self, metadata, benchmark_spec):
+    metadata = metadata.copy()
+    metadata['user_cmdline'] = " ".join(sys.argv[:])
+    return metadata
+
+
+class SamplePublisher(six.with_metaclass(abc.ABCMeta, object)):
+  """An object that can publish performance samples."""
+
+  @abc.abstractmethod
+  def PublishSamples(self, samples: List[pkb_sample.SampleDict]):
+    """Publishes 'samples'.
+
+    PublishSamples will be called exactly once. Calling
+    SamplePublisher.PublishSamples multiple times may result in data being
+    overwritten.
+
+    Args:
+      samples: list of dicts to publish.
+    """
+    raise NotImplementedError()
+
+
+class CSVPublisher(SamplePublisher):
+  """Publisher which writes results in CSV format to a specified path.
+
+  The default field names are written first, followed by all unique metadata
+  keys found in the data.
+  """
+
+  _DEFAULT_FIELDS = ('timestamp', 'test', 'metric', 'value', 'unit',
+                     'product_name', 'official', 'owner', 'run_uri',
+                     'sample_uri')
+
+  def __init__(self, path):
+    super().__init__()
+    self._path = path
+
+  def PublishSamples(self, samples):
+    samples = list(samples)
+    # Union of all metadata keys.
+    meta_keys = sorted(
+        set(key for sample in samples for key in sample['metadata']))
+
+    logging.info('Writing CSV results to %s', self._path)
+    with open(self._path, 'w') as fp:
+      writer = csv.DictWriter(fp, list(self._DEFAULT_FIELDS) + meta_keys)
+      writer.writeheader()
+
+      for sample in samples:
+        d = {}
+        d.update(sample)
+        d.update(d.pop('metadata'))
+        writer.writerow(d)
+
+
+class PrettyPrintStreamPublisher(SamplePublisher):
+  """Writes samples to an output stream, defaulting to stdout.
+
+  Samples are pretty-printed and summarized. Example output (truncated):
+
+    -------------------------PerfKitBenchmarker Results Summary--------------
+    COREMARK:
+      num_cpus="4"
+      Coremark Score                    44145.237832
+      End to End Runtime                  289.477677 seconds
+    NETPERF:
+      client_machine_type="n1-standard-4" client_zone="us-central1-a" ....
+      TCP_RR_Transaction_Rate  1354.04 transactions_per_second (ip_type="ext ...
+      TCP_RR_Transaction_Rate  3972.70 transactions_per_second (ip_type="int ...
+      TCP_CRR_Transaction_Rate  449.69 transactions_per_second (ip_type="ext ...
+      TCP_CRR_Transaction_Rate 1271.68 transactions_per_second (ip_type="int ...
+      TCP_STREAM_Throughput    1171.04 Mbits/sec               (ip_type="ext ...
+      TCP_STREAM_Throughput    6253.24 Mbits/sec               (ip_type="int ...
+      UDP_RR_Transaction_Rate  1380.37 transactions_per_second (ip_type="ext ...
+      UDP_RR_Transaction_Rate  4336.37 transactions_per_second (ip_type="int ...
+      End to End Runtime        444.33 seconds
+
+    -------------------------
+    For all tests: cloud="GCP" image="ubuntu-14-04" machine_type="n1-standa ...
+
+  Attributes:
+    stream: File-like object. Output stream to print samples.
+  """
+
+  def __init__(self, stream=None):
+    super().__init__()
+    self.stream = stream or sys.stdout
+
+  def __repr__(self):
+    return '<{0} stream={1}>'.format(type(self).__name__, self.stream)
+
+  def _FindConstantMetadataKeys(self, samples):
+    """Finds metadata keys which are constant across a collection of samples.
+
+    Args:
+      samples: List of dicts, as passed to SamplePublisher.PublishSamples.
+
+    Returns:
+      The set of metadata keys for which all samples in 'samples' have the same
+      value.
+    """
+    unique_values = {}
+
+    for sample in samples:
+      for k, v in six.iteritems(sample['metadata']):
+        if len(unique_values.setdefault(k, set())) < 2 and v.__hash__:
+          unique_values[k].add(v)
+
+    # Find keys which are not present in all samples
+    for sample in samples:
+      for k in frozenset(unique_values) - frozenset(sample['metadata']):
+        unique_values[k].add(None)
+
+    return frozenset(k for k, v in six.iteritems(unique_values)
+                     if len(v) == 1 and None not in v)
+
+  def _FormatMetadata(self, metadata):
+    """Format 'metadata' as space-delimited key="value" pairs."""
+    return ' '.join('{0}="{1}"'.format(k, v)
+                    for k, v in sorted(six.iteritems(metadata)))
+
+  def PublishSamples(self, samples):
+    # result will store the formatted text, then be emitted to self.stream and
+    # logged.
+    result = six.StringIO()
+    dashes = '-' * 25
+    result.write('\n' + dashes +
+                 'PerfKitBenchmarker Results Summary' +
+                 dashes + '\n')
+
+    if not samples:
+      logging.debug('Pretty-printing results to %s:\n%s', self.stream,
+                    result.getvalue())
+      self.stream.write(result.getvalue())
+      return
+
+    key = operator.itemgetter('test')
+    samples = sorted(samples, key=key)
+    globally_constant_keys = self._FindConstantMetadataKeys(samples)
+
+    for benchmark, test_samples in itertools.groupby(samples, key):
+      test_samples = list(test_samples)
+      # Drop end-to-end runtime: it always has no metadata.
+      non_endtoend_samples = [i for i in test_samples
+                              if i['metric'] != 'End to End Runtime']
+      locally_constant_keys = (
+          self._FindConstantMetadataKeys(non_endtoend_samples) -
+          globally_constant_keys)
+      all_constant_meta = globally_constant_keys.union(locally_constant_keys)
+
+      benchmark_meta = {k: v
+                        for k, v in six.iteritems(test_samples[0]['metadata'])
+                        if k in locally_constant_keys}
+      result.write('{0}:\n'.format(benchmark.upper()))
+
+      if benchmark_meta:
+        result.write('  {0}\n'.format(
+            self._FormatMetadata(benchmark_meta)))
+
+      for sample in test_samples:
+        meta = {k: v for k, v in six.iteritems(sample['metadata'])
+                if k not in all_constant_meta}
+        if not isinstance(sample['value'], six.string_types):
+          result.write('  {0:<30s} {1:>15f} {2:<30s}'.format(
+              sample['metric'], sample['value'], sample['unit']))
+        else:
+          result.write('  {0:<30s} {1:<30s} {2:<30s}'.format(
+              sample['metric'], sample['value'], sample['unit']))
+
+        if meta:
+          result.write(' ({0})'.format(self._FormatMetadata(meta)))
+        result.write('\n')
+
+    global_meta = {k: v for k, v in six.iteritems(samples[0]['metadata'])
+                   if k in globally_constant_keys}
+    result.write('\n' + dashes + '\n')
+    result.write('For all tests: {0}\n'.format(
+        self._FormatMetadata(global_meta)))
+
+    value = result.getvalue()
+    logging.debug('Pretty-printing results to %s:\n%s', self.stream, value)
+    self.stream.write(value)
+
+
+class LogPublisher(SamplePublisher):
+  """Writes samples to a Python Logger.
+
+  Attributes:
+    level: Logging level. Defaults to logging.INFO.
+    logger: Logger to publish to. Defaults to the root logger.
+  """
+
+  def __init__(self, level=logging.INFO, logger=None):
+    super().__init__()
+    self.level = level
+    self.logger = logger or logging.getLogger()
+    self._pprinter = pprint.PrettyPrinter()
+
+  def __repr__(self):
+    return '<{0} logger={1} level={2}>'.format(type(self).__name__, self.logger,
+                                               self.level)
+
+  def PublishSamples(self, samples):
+    header = '\n' + '-' * 25 + 'PerfKitBenchmarker Complete Results' + '-' * 25
+    self.logger.log(self.level, header)
+    for sample in samples:
+      self.logger.log(self.level, self._pprinter.pformat(sample))
+
+
+# TODO: Extract a function to write delimited JSON to a stream.
+class NewlineDelimitedJSONPublisher(SamplePublisher):
+  """Publishes samples to a file as newline delimited JSON.
+
+  The resulting output file is compatible with 'bq load' using
+  format NEWLINE_DELIMITED_JSON.
+
+  If 'collapse_labels' is True, metadata is converted to a flat string with key
+  'labels' via GetLabelsFromDict.
+
+  Attributes:
+    file_path: string. Destination path to write samples.
+    mode: Open mode for 'file_path'. Set to 'a' to append.
+    collapse_labels: boolean. If true, collapse sample metadata.
+  """
+
+  def __init__(self, file_path, mode='wt', collapse_labels=True):
+    super().__init__()
+    self.file_path = file_path
+    self.mode = mode
+    self.collapse_labels = collapse_labels
+
+  def __repr__(self):
+    return '<{0} file_path="{1}" mode="{2}">'.format(
+        type(self).__name__, self.file_path, self.mode)
+
+  def PublishSamples(self, samples):
+    logging.info('Publishing %d samples to %s', len(samples),
+                 self.file_path)
+    with open(self.file_path, self.mode) as fp:
+      fcntl.flock(fp, fcntl.LOCK_EX)
+      for sample in samples:
+        sample = sample.copy()
+        if self.collapse_labels:
+          sample['labels'] = GetLabelsFromDict(sample.pop('metadata', {}))
+        fp.write(json.dumps(sample) + '\n')
+
+
+class BigQueryPublisher(SamplePublisher):
+  """Publishes samples to BigQuery.
+
+  Attributes:
+    bigquery_table: string. The bigquery table to publish to, of the form
+      '[project_name:]dataset_name.table_name'
+    project_id: string. Project to use for authenticating with BigQuery.
+    bq_path: string. Path to the 'bq' executable'.
+    service_account: string. Use this service account email address for
+      authorization. For example, 1234567890@developer.gserviceaccount.com
+    service_account_private_key: Filename that contains the service account
+      private key. Must be specified if service_account is specified.
+    application_default_credential_file: Filename that holds Google applciation
+      default credentials. Cannot be set alongside service_account.
+  """
+
+  def __init__(self,
+               bigquery_table,
+               project_id=None,
+               bq_path='bq',
+               service_account=None,
+               service_account_private_key_file=None,
+               application_default_credential_file=None):
+    super().__init__()
+    self.bigquery_table = bigquery_table
+    self.project_id = project_id
+    self.bq_path = bq_path
+    self.service_account = service_account
+    self.service_account_private_key_file = service_account_private_key_file
+    self._credentials_file = vm_util.PrependTempDir(DEFAULT_CREDENTIALS_JSON)
+    self.application_default_credential_file = (
+        application_default_credential_file)
+
+    if ((self.service_account is None) !=
+        (self.service_account_private_key_file is None)):
+      raise ValueError('service_account and service_account_private_key '
+                       'must be specified together.')
+    if (application_default_credential_file is not None and
+        self.service_account is not None):
+      raise ValueError('application_default_credential_file cannot be used '
+                       'alongside service_account.')
+
+  def __repr__(self):
+    return '<{0} table="{1}">'.format(type(self).__name__, self.bigquery_table)
+
+  def PublishSamples(self, samples):
+    if not samples:
+      logging.warning('No samples: not publishing to BigQuery')
+      return
+
+    with vm_util.NamedTemporaryFile(prefix='perfkit-bq-pub',
+                                    dir=vm_util.GetTempDir(),
+                                    suffix='.json') as tf:
+      json_publisher = NewlineDelimitedJSONPublisher(tf.name,
+                                                     collapse_labels=True)
+      json_publisher.PublishSamples(samples)
+      tf.close()
+      logging.info('Publishing %d samples to %s', len(samples),
+                   self.bigquery_table)
+      load_cmd = [self.bq_path]
+      if self.project_id:
+        load_cmd.append('--project_id=' + self.project_id)
+      if self.service_account:
+        assert self.service_account_private_key_file is not None
+        load_cmd.extend(['--service_account=' + self.service_account,
+                         '--service_account_credential_file=' +
+                         self._credentials_file,
+                         '--service_account_private_key_file=' +
+                         self.service_account_private_key_file])
+      elif self.application_default_credential_file is not None:
+        load_cmd.append('--application_default_credential_file=' +
+                        self.application_default_credential_file)
+      load_cmd.extend(['load',
+                       '--autodetect',
+                       '--source_format=NEWLINE_DELIMITED_JSON',
+                       self.bigquery_table,
+                       tf.name])
+      vm_util.IssueRetryableCommand(load_cmd)
+
+
+class CloudStoragePublisher(SamplePublisher):
+  """Publishes samples to a Google Cloud Storage bucket using gsutil.
+
+  Samples are formatted using a NewlineDelimitedJSONPublisher, and written to a
+  the destination file within the specified bucket named:
+
+    <time>_<uri>
+
+  where <time> is the number of milliseconds since the Epoch, and <uri> is a
+  random UUID.
+
+  Attributes:
+    bucket: string. The GCS bucket name to publish to.
+    gsutil_path: string. The path to the 'gsutil' tool.
+    sub_folder: Optional folder within the bucket to publish to.
+  """
+
+  def __init__(self, bucket, gsutil_path='gsutil', sub_folder=None):
+    super().__init__()
+    self.gsutil_path = gsutil_path
+    if sub_folder:
+      self.gcs_directory = f'gs://{bucket}/{sub_folder}'
+    else:
+      self.gcs_directory = f'gs://{bucket}'
+
+  def __repr__(self):
+    return f'<{type(self).__name__} gcs_directory="{self.gcs_directory}">'
+
+  def _GenerateObjectName(self):
+    object_name = str(int(time.time() * 100)) + '_' + str(uuid.uuid4())
+    return object_name[:GCS_OBJECT_NAME_LENGTH]
+
+  def PublishSamples(self, samples):
+    with vm_util.NamedTemporaryFile(prefix='perfkit-gcs-pub',
+                                    dir=vm_util.GetTempDir(),
+                                    suffix='.json') as tf:
+      json_publisher = NewlineDelimitedJSONPublisher(tf.name)
+      json_publisher.PublishSamples(samples)
+      tf.close()
+      object_name = self._GenerateObjectName()
+      storage_uri = f'{self.gcs_directory}/{object_name}'
+      logging.info('Publishing %d samples to %s', len(samples), storage_uri)
+      copy_cmd = [self.gsutil_path, 'cp', tf.name, storage_uri]
+      vm_util.IssueRetryableCommand(copy_cmd)
+
+
+class ElasticsearchPublisher(SamplePublisher):
+  """Publish samples to an Elasticsearch server.
+
+  Index and document type will be created if they do not exist.
+
+  Attributes:
+    es_uri: String. e.g. "http://localhost:9200"
+    es_index: String. Default "perfkit"
+    es_type: String. Default "result"
+  """
+
+  def __init__(self, es_uri=None, es_index=None, es_type=None):
+    super().__init__()
+    self.es_uri = es_uri
+    self.es_index = es_index.lower()
+    self.es_type = es_type
+    self.mapping_5_plus = {
+        'mappings': {
+            'result': {
+                'numeric_detection':
+                    True,
+                'properties': {
+                    'timestamp': {
+                        'type': 'date',
+                        'format': 'yyyy-MM-dd HH:mm:ss.SSSSSS'
+                    },
+                    'value': {
+                        'type': 'double'
+                    }
+                },
+                'dynamic_templates': [{
+                    'strings': {
+                        'match_mapping_type': 'string',
+                        'mapping': {
+                            'type': 'text',
+                            'fields': {
+                                'raw': {
+                                    'type': 'keyword',
+                                    'ignore_above': 256
+                                }
+                            }
+                        }
+                    }
+                }]
+            }
+        }
+    }
+
+    self.mapping_before_5 = {
+        'mappings': {
+            'result': {
+                'numeric_detection':
+                    True,
+                'properties': {
+                    'timestamp': {
+                        'type': 'date',
+                        'format': 'yyyy-MM-dd HH:mm:ss.SSSSSS'
+                    },
+                    'value': {
+                        'type': 'double'
+                    }
+                },
+                'dynamic_templates': [{
+                    'strings': {
+                        'match_mapping_type': 'string',
+                        'mapping': {
+                            'type': 'string',
+                            'fields': {
+                                'raw': {
+                                    'type': 'string',
+                                    'index': 'not_analyzed'
+                                }
+                            }
+                        }
+                    }
+                }]
+            }
+        }
+    }
+
+  def PublishSamples(self, samples):
+    """Publish samples to Elasticsearch service."""
+    try:
+      # pylint:disable=g-import-not-at-top
+      from elasticsearch import Elasticsearch  # pytype: disable=import-error
+      # pylint:enable=g-import-not-at-top
+    except ImportError:
+      raise ImportError('The "elasticsearch" package is required to use '
+                        'the Elasticsearch publisher. Please make sure it '
+                        'is installed.')
+
+    es = Elasticsearch([self.es_uri])
+    if not es.indices.exists(index=self.es_index):
+      # choose whether to use old or new mapings based on
+      # the version of elasticsearch that is being used
+      if int(es.info()['version']['number'].split('.')[0]) >= 5:
+        es.indices.create(index=self.es_index, body=self.mapping_5_plus)
+        logging.info('Create index %s and default mappings for'
+                     ' elasticsearch version >= 5.0.0',
+                     self.es_index)
+      else:
+        es.indices.create(index=self.es_index, body=self.mapping_before_5)
+        logging.info('Create index %s and default mappings for'
+                     ' elasticsearch version < 5.0.0',
+                     self.es_index)
+    for s in samples:
+      sample = copy.deepcopy(s)
+      # Make timestamp understandable by ES and human.
+      sample['timestamp'] = FormatTimestampForElasticsearch(
+          sample['timestamp']
+      )
+      # Keys cannot have dots for ES
+      sample = DeDotKeys(sample)
+      # Add sample to the "perfkit index" of "result type" and using sample_uri
+      # as each ES's document's unique _id
+      es.create(index=self.es_index, doc_type=self.es_type,
+                id=sample['sample_uri'], body=json.dumps(sample))
+
+
+  def _FormatTimestampForElasticsearch(self, epoch_us):
+    """Convert the floating epoch timestamp in micro seconds epoch_us to
+    yyyy-MM-dd HH:mm:ss.SSSSSS in string
+    """
+    ts = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime(epoch_us))
+    num_dec = ('%.6f' % (epoch_us - math.floor(epoch_us))).split('.')[1]
+    new_ts = '%s.%s' % (ts, num_dec)
+    return new_ts
+
+  def _deDotKeys(self, res):
+    """Recursively replace dot with underscore in all keys in a dictionary."""
+    for key, value in res.items():
+      if isinstance(value, dict):
+        self._deDotKeys(value)
+      new_key = key.replace('.', '_')
+      if new_key != key:
+        res[new_key] = res.pop(key)
+    return res
+
+
+class InfluxDBPublisher(SamplePublisher):
+  """Publisher writes samples to InfluxDB.
+
+  Attributes:
+    influx_uri: Takes in type string. Consists of the Influx DB address and
+      port.Expects the format hostname:port
+    influx_db_name: Takes in tupe string.
+      Consists of the name of Influx DB database that you wish to publish to or
+      create.
+  """
+
+  def __init__(self, influx_uri=None, influx_db_name=None):
+    super().__init__()
+    # set to default above in flags unless changed
+    self.influx_uri = influx_uri
+    self.influx_db_name = influx_db_name
+
+  def PublishSamples(self, samples):
+    formated_samples = []
+    for sample in samples:
+      formated_samples.append(self._ConstructSample(sample))
+    self._Publish(formated_samples)
+
+  def _Publish(self, formated_samples):
+    try:
+      self._CreateDB()
+      body = '\n'.join(formated_samples)
+      self._WriteData(body)
+    except (IOError, httplib.HTTPException) as http_exception:
+      logging.error('Error connecting to the database:  %s', http_exception)
+
+  def _ConstructSample(self, sample):
+    sample['product_name'] = FLAGS.product_name
+    timestamp = str(int((10 ** 9) * sample['timestamp']))
+    measurement = 'perfkitbenchmarker'
+
+    tag_set_metadata = ''
+    if 'metadata' in sample:
+      if sample['metadata']:
+        tag_set_metadata = ','.join(self._FormatToKeyValue(sample['metadata']))
+    tag_keys = ('test', 'official', 'owner', 'run_uri', 'sample_uri',
+                'metric', 'unit', 'product_name')
+    ordered_tags = collections.OrderedDict([(k, sample[k]) for k in tag_keys])
+    tag_set = ','.join(self._FormatToKeyValue(ordered_tags))
+    if tag_set_metadata:
+      tag_set += ',' + tag_set_metadata
+
+    field_set = '%s=%s' % ('value', sample['value'])
+
+    sample_constructed_body = '%s,%s %s %s' % (measurement, tag_set,
+                                               field_set, timestamp)
+    return sample_constructed_body
+
+  def _FormatToKeyValue(self, sample):
+    key_value_pairs = []
+    for k, v in six.iteritems(sample):
+      if v == '':
+        v = '\\"\\"'
+      v = str(v)
+      v = v.replace(',', '\,')
+      v = v.replace(' ', '\ ')
+      key_value_pairs.append('%s=%s' % (k, v))
+    return key_value_pairs
+
+  def _CreateDB(self):
+    """Creates a database.
+
+    This method is idempotent. If the DB already exists it will simply
+    return a 200 code without re-creating it.
+    """
+    successful_http_request_codes = [200, 202, 204]
+    header = {'Content-type': 'application/x-www-form-urlencoded',
+              'Accept': 'text/plain'}
+    params = urllib.parse.urlencode(
+        {'q': 'CREATE DATABASE ' + self.influx_db_name})
+    conn = httplib.HTTPConnection(self.influx_uri)
+    conn.request('POST', '/query?' + params, headers=header)
+    response = conn.getresponse()
+    conn.close()
+    if response.status in successful_http_request_codes:
+      logging.debug('Success! %s DB Created', self.influx_db_name)
+    else:
+      logging.error('%d Request could not be completed due to: %s',
+                    response.status, response.reason)
+      raise httplib.HTTPException
+
+  def _WriteData(self, data):
+    successful_http_request_codes = [200, 202, 204]
+    params = data
+    header = {'Content-type': 'application/octet-stream'}
+    conn = httplib.HTTPConnection(self.influx_uri)
+    conn.request('POST', '/write?' + 'db=' + self.influx_db_name, params,
+                 headers=header)
+    response = conn.getresponse()
+    conn.close()
+    if response.status in successful_http_request_codes:
+      logging.debug('Writing samples to publisher: writing samples.')
+    else:
+      logging.error('%d Request could not be completed due to: %s %s',
+                    response.status, response.reason, data)
+      raise httplib.HTTPException
+
+
+class SampleCollector(object):
+  """A performance sample collector.
+
+  Supports incorporating additional metadata into samples, and publishing
+  results via any number of SamplePublishers.
+
+  Attributes:
+    samples: A list of Sample objects as dicts.
+    metadata_providers: A list of MetadataProvider objects. Metadata providers
+      to use.  Defaults to DEFAULT_METADATA_PROVIDERS.
+    publishers: A list of SamplePublisher objects to publish to.
+    publishers_from_flags: If True, construct publishers based on FLAGS and add
+      those to the publishers list.
+    add_default_publishers: If True, add a LogPublisher,
+      PrettyPrintStreamPublisher, and NewlineDelimitedJSONPublisher targeting
+      the run directory to the publishers list.
+    run_uri: A unique tag for the run.
+  """
+
+  def __init__(self, metadata_providers=None, publishers=None,
+               publishers_from_flags=True, add_default_publishers=True):
+    self.samples: List[pkb_sample.SampleDict] = []
+
+    if metadata_providers is not None:
+      self.metadata_providers = metadata_providers
+    else:
+      self.metadata_providers = DEFAULT_METADATA_PROVIDERS
+      if FLAGS.publish_config:
+        self.metadata_providers.append(UserConfigurationMetadataProvider())
+
+    self.publishers: List[SamplePublisher] = publishers[:] if publishers else []
+    for publisher_class in EXTERNAL_PUBLISHERS:
+      self.publishers.append(publisher_class())
+    if publishers_from_flags:
+      self.publishers.extend(SampleCollector._PublishersFromFlags())
+    if add_default_publishers:
+      self.publishers.extend(SampleCollector._DefaultPublishers())
+
+    logging.debug('Using publishers: %s', str(self.publishers))
+
+  @classmethod
+  def _DefaultPublishers(cls):
+    """Gets a list of default publishers."""
+    publishers = []
+    if FLAGS.record_log_publisher:
+      publishers.append(LogPublisher())
+    publishers.append(PrettyPrintStreamPublisher())
+
+    # Publish to the default JSON path even if we will also publish to a
+    # different path due to flags.
+    default_json_path = vm_util.PrependTempDir(DEFAULT_JSON_OUTPUT_NAME)
+    publishers.append(NewlineDelimitedJSONPublisher(
+        default_json_path,
+        mode=FLAGS.json_write_mode,
+        collapse_labels=FLAGS.collapse_labels))
+
+    return publishers
+
+  @classmethod
+  def _PublishersFromFlags(cls):
+    publishers = []
+
+    if FLAGS.json_path:
+      publishers.append(NewlineDelimitedJSONPublisher(
+          FLAGS.json_path,
+          mode=FLAGS.json_write_mode,
+          collapse_labels=FLAGS.collapse_labels))
+
+    if FLAGS.bigquery_table:
+      publishers.append(
+          BigQueryPublisher(
+              FLAGS.bigquery_table,
+              project_id=FLAGS.bq_project,
+              bq_path=FLAGS.bq_path,
+              service_account=FLAGS.service_account,
+              service_account_private_key_file=FLAGS
+              .service_account_private_key,
+              application_default_credential_file=FLAGS
+              .application_default_credential_file))
+
+    if FLAGS.cloud_storage_bucket:
+      publishers.append(CloudStoragePublisher(FLAGS.cloud_storage_bucket,
+                                              gsutil_path=FLAGS.gsutil_path))
+    if PARTITIONED_GCS_URL.value:
+      now = datetime.datetime.now()
+      publishers.append(
+          CloudStoragePublisher(PARTITIONED_GCS_URL.value,
+                                sub_folder=now.strftime('%Y/%m/%d/%H'),
+                                gsutil_path=FLAGS.gsutil_path))
+    if FLAGS.csv_path:
+      publishers.append(CSVPublisher(FLAGS.csv_path))
+
+    if FLAGS.es_uri:
+      publishers.append(ElasticsearchPublisher(es_uri=FLAGS.es_uri,
+                                               es_index=FLAGS.es_index,
+                                               es_type=FLAGS.es_type))
+    if FLAGS.influx_uri:
+      publishers.append(InfluxDBPublisher(influx_uri=FLAGS.influx_uri,
+                                          influx_db_name=FLAGS.influx_db_name))
+
+    return publishers
+
+  def AddSamples(self, samples, benchmark, benchmark_spec):
+    """Adds data samples to the publisher.
+
+    Args:
+      samples: A list of Sample objects.
+      benchmark: string. The name of the benchmark.
+      benchmark_spec: BenchmarkSpec. Benchmark specification.
+    """
+    for s in samples:
+      # Annotate the sample.
+      sample: pkb_sample.SampleDict = s.asdict()
+      sample['test'] = benchmark
+
+      for meta_provider in self.metadata_providers:
+        sample['metadata'] = meta_provider.AddMetadata(
+            sample['metadata'], benchmark_spec)
+
+      sample['product_name'] = FLAGS.product_name
+      sample['official'] = FLAGS.official
+      sample['owner'] = FLAGS.owner
+      sample['run_uri'] = benchmark_spec.uuid
+      sample['sample_uri'] = str(uuid.uuid4())
+      self.samples.append(sample)
+
+  def PublishSamples(self):
+    """Publish samples via all registered publishers."""
+    if not self.samples:
+      logging.warning('No samples to publish.')
+      return
+    for publisher in self.publishers:
+      publisher.PublishSamples(self.samples)
+    self.samples = []
+
+
+def RepublishJSONSamples(path):
+  """Read samples from a JSON file and re-export them.
+
+  Args:
+    path: the path to the JSON file.
+  """
+
+  with open(path, 'r') as file:
+    samples = [json.loads(s) for s in file if s]
+  if FLAGS.collapse_labels:
+    for sample in samples:
+      # Chop '|' at the beginning and end of labels and split labels by '|,|'
+      fields = sample.pop('labels')[1:-1].split('|,|')
+      # Turn the fields into [[key, value], ...]
+      key_values = [field.split(':', 1) for field in fields]
+      sample['metadata'] = {k: v for k, v in key_values}
+
+  # We can't use a SampleCollector because SampleCollector.AddSamples depends on
+  # having a benchmark and a benchmark_spec.
+  publishers = SampleCollector._PublishersFromFlags()
+  for publisher in publishers:
+    publisher.PublishSamples(samples)
+
+
+if __name__ == '__main__':
+  log_util.ConfigureBasicLogging()
+
+  try:
+    argv = FLAGS(sys.argv)
+  except flags.Error as e:
+    logging.error(e)
+    logging.info('Flag error. Usage: publisher.py <flags> path-to-json-file')
+    sys.exit(1)
+
+  if len(argv) != 2:
+    logging.info('Argument number error. Usage: publisher.py <flags> '
+                 'path-to-json-file')
+    sys.exit(1)
+
+  json_path = argv[1]
+
+  RepublishJSONSamples(json_path)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/regex_util.py b/script/cumulus/pkb/perfkitbenchmarker/regex_util.py
new file mode 100644
index 0000000..d1e4f69
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/regex_util.py
@@ -0,0 +1,175 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for extracting benchmark results using regular expression."""
+
+import re
+
+_IPV4_REGEX = r'[0-9]+(?:\.[0-9]+){3}'
+
+# From https://docs.python.org/2/library/re.html#simulating-scanf.
+FLOAT_REGEX = r'[-+]?(\d+(\.\d*)?|\.\d+)([eE][-+]?\d+)?'
+
+
+class NoMatchError(ValueError):
+  """Raised when no matches for a regex are found within a string."""
+  pass
+
+
+class TooManyMatchesError(ValueError):
+  """Raised when a regex matches a string more times than expected."""
+  pass
+
+
+def ExtractGroup(regex, text, group=1, flags=0):
+  """Extracts a string from a regular expression matched to 'text'.
+
+  Args:
+    regex: string or regexp pattern. Regular expression.
+    text: string. Text to search.
+    group: int. Group containing a floating point value. Use '0' for the whole
+      string.
+    flags: int. Flags to pass to re.search().
+  Returns:
+    A string matched by 'regex' on 'text'.
+  Raises:
+    NoMatchError: when 'regex' does not match 'text'.
+    IndexError: when 'group' is not present in the match.
+  """
+  match = re.search(regex, text, flags=flags)
+  if not match:
+    raise NoMatchError('No match for pattern "{0}" in "{1}"'.format(
+        regex, text))
+
+  try:
+    return match.group(group)
+  except IndexError:
+    raise IndexError('No such group {0} in "{1}".'.format(group, regex))
+
+
+def ExtractFloat(regex, text, group=1):
+  """Extracts a float from a regular expression matched to 'text'."""
+  return float(ExtractGroup(regex, text, group=group))
+
+
+def ExtractInt(regex, text, group=1):
+  """Extracts an int from a regular expression matched to 'text'."""
+  return int(ExtractGroup(regex, text, group=group))
+
+
+def ExtractAllFloatMetrics(text,
+                           metric_regex=r'\w+',
+                           value_regex=FLOAT_REGEX,
+                           delimiter_regex='='):
+  """Extracts metrics and their values into a dict.
+
+  Args:
+    text: The text to parse to find metric and values.
+    metric_regex: A regular expression to find metric names. The metric regex
+        should not contain any parenthesized groups.
+    value_regex: A regular expression to find float values. By default, this
+        works well for floating-point numbers found via scanf.
+    delimiter_regex: A regular expression between the metric name and value.
+
+  Returns:
+    A dict mapping metrics to values.
+  """
+  if '(' in metric_regex:
+    raise NotImplementedError('ExtractAllFloatMetrics does not support a '
+                              'metric regex with groups.')
+  matches = re.findall('(%s)%s(%s)' % (metric_regex, delimiter_regex,
+                                       value_regex), text)
+  return {match[0]: float(match[1]) for match in matches}
+
+
+def ExtractIpv4Addresses(text):
+  """Extracts all ipv4 addresses within 'text'.
+
+  Args:
+    text: string. Text to search.
+  Returns:
+    A list of ipv4 strings.
+  Raises:
+    NoMatchError: when no ipv4 address is found.
+  """
+  match = re.findall(_IPV4_REGEX, text)
+  if not match:
+    raise NoMatchError('No match for ipv4 addresses in "{0}"'.format(text))
+  return match
+
+
+def ExtractAllMatches(regex, text, flags=0):
+  """Extracts all matches from a regular expression matched within 'text'.
+
+  Extracts all matches from a regular expression matched within 'text'. Please
+  note that this function will return a list of strings if regex does not
+  contain any capturing groups, matching the behavior of re.findall:
+  >>> re.findall(r'bar', 'foo foo bar foo bar foo')
+  ['bar', 'bar']
+
+  Args:
+    regex: string. Regular expression.
+    text: string. Text to search.
+    flags: int. Flags to pass to re.findall().
+  Returns:
+    A list of tuples of strings that matched by 'regex' within 'text'.
+  Raises:
+    NoMatchError: when 'regex' does not match 'text'.
+  """
+  match = re.findall(regex, text, flags=flags)
+  if not match:
+    raise NoMatchError('No match for pattern "{0}" in "{1}"'.format(
+        regex, text))
+  return match
+
+
+def ExtractExactlyOneMatch(regex, text):
+  """Extracts exactly one match of a regular expression from 'text'.
+
+  Args:
+    regex: string. Regular expression, possibly with capturing group.
+    text: string. The text to search.
+
+  Returns:
+    The contents of the capturing group in the regex. If no capturing
+    group is present, the text that matched the expression.
+
+  Raises:
+    NoMatchError: if 'regex' does not match 'text'.
+    TooManyMatchesError: if 'regex' matches 'text' more than once.
+  """
+
+  matches = ExtractAllMatches(regex, text)
+  if len(matches) > 1:
+    raise TooManyMatchesError(
+        'Pattern "{0}" matched "{1}" non-uniquely.'.format(regex, text))
+  return matches[0]
+
+
+def Substitute(pattern, repl, text):
+  """Substitute all 'pattern' in 'text' with 'repl'.
+
+  Args:
+    pattern: string. Pattern to be replaced.
+    repl: string. Replacement pattern.
+    text: string. Text to search.
+  Returns:
+    A string after replacing all patterns with repl.
+  Raises:
+    NoMatchError: when 'pattern' isn't found in string.
+  """
+  if not re.search(pattern, text):
+    raise NoMatchError('No match for pattern "{0}" in "{1}"'.format(
+        pattern, text))
+  return re.sub(pattern, repl, text)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/relational_db.py b/script/cumulus/pkb/perfkitbenchmarker/relational_db.py
new file mode 100644
index 0000000..28ece2a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/relational_db.py
@@ -0,0 +1,822 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from abc import abstractmethod
+import posixpath
+import secrets
+import re
+import string
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import sql_engine_utils
+from perfkitbenchmarker import vm_util
+import six
+
+# TODO(ferneyhough): change to enum
+flags.DEFINE_string('managed_db_engine', None,
+                    'Managed database flavor to use (mysql, postgres)')
+flags.DEFINE_string('managed_db_engine_version', None,
+                    'Version of the database flavor selected, e.g. 5.7')
+flags.DEFINE_string('managed_db_database_name', None,
+                    'Name of the database to create. Defaults to '
+                    'pkb-db-[run-uri]')
+flags.DEFINE_string('managed_db_database_username', None,
+                    'Database username. Defaults to '
+                    'pkb-db-user-[run-uri]')
+flags.DEFINE_string('managed_db_database_password', None,
+                    'Database password. Defaults to '
+                    'a random 10-character alpha-numeric string')
+flags.DEFINE_boolean('managed_db_high_availability', False,
+                     'Specifies if the database should be high availability')
+flags.DEFINE_boolean('managed_db_backup_enabled', True,
+                     'Whether or not to enable automated backups')
+flags.DEFINE_string('managed_db_backup_start_time', '07:00',
+                    'Time in UTC that automated backups (if enabled) '
+                    'will be scheduled. In the form HH:MM UTC. '
+                    'Defaults to 07:00 UTC')
+flags.DEFINE_list('managed_db_zone', None,
+                  'zone or region to launch the database in. '
+                  'Defaults to the client vm\'s zone.')
+flags.DEFINE_string('client_vm_zone', None,
+                    'zone or region to launch the client in. ')
+flags.DEFINE_string('managed_db_machine_type', None,
+                    'Machine type of the database.')
+flags.DEFINE_integer('managed_db_cpus', None,
+                     'Number of Cpus in the database.')
+flags.DEFINE_string('managed_db_memory', None,
+                    'Amount of Memory in the database.  Uses the same format '
+                    'string as custom machine memory type.')
+flags.DEFINE_integer('managed_db_disk_size', None,
+                     'Size of the database disk in GB.')
+flags.DEFINE_integer('db_num_striped_disks', None,
+                     'The number of data disks to stripe together to form one.')
+flags.DEFINE_string('managed_db_disk_type', None, 'Disk type of the database.')
+flags.DEFINE_integer('managed_db_disk_iops', None,
+                     'Disk iops of the database on AWS io1 disks.')
+
+flags.DEFINE_integer('managed_db_azure_compute_units', None,
+                     'Number of Dtus in the database.')
+flags.DEFINE_string('managed_db_tier', None,
+                    'Tier in azure. (Basic, Standard, Premium).')
+flags.DEFINE_string('server_vm_os_type', None,
+                    'OS type of the client vm.')
+flags.DEFINE_string('client_vm_os_type', None,
+                    'OS type of the client vm.')
+flags.DEFINE_string('server_gcp_min_cpu_platform', None,
+                    'Cpu platform of the server vm.')
+flags.DEFINE_string('client_gcp_min_cpu_platform', None,
+                    'CPU platform of the client vm.')
+flags.DEFINE_string('client_vm_machine_type', None,
+                    'Machine type of the client vm.')
+flags.DEFINE_integer('client_vm_cpus', None, 'Number of Cpus in the client vm.')
+flags.DEFINE_string(
+    'client_vm_memory', None,
+    'Amount of Memory in the vm.  Uses the same format '
+    'string as custom machine memory type.')
+flags.DEFINE_integer('client_vm_disk_size', None,
+                     'Size of the client vm disk in GB.')
+flags.DEFINE_string('client_vm_disk_type', None, 'Disk type of the client vm.')
+flags.DEFINE_integer('client_vm_disk_iops', None,
+                     'Disk iops of the database on AWS for client vm.')
+flags.DEFINE_boolean(
+    'use_managed_db', True, 'If true, uses the managed MySql '
+    'service for the requested cloud provider. If false, uses '
+    'MySql installed on a VM.')
+flags.DEFINE_list(
+    'db_flags', '', 'Flags to apply to the managed relational database '
+    'on the cloud that\'s being used. Example: '
+    'binlog_cache_size=4096,innodb_log_buffer_size=4294967295')
+flags.DEFINE_integer(
+    'innodb_buffer_pool_size', None,
+    'Size of the innodb buffer pool size in GB. '
+    'Defaults to 25% of VM memory if unset')
+
+flags.DEFINE_bool(
+    'mysql_bin_log', False,
+    'Flag to turn binary logging on. '
+    'Defaults to False')
+flags.DEFINE_integer('innodb_log_file_size', 1000,
+                     'Size of the log file in MB. Defaults to 1000M.')
+flags.DEFINE_integer(
+    'postgres_shared_buffer_size', None,
+    'Size of the shared buffer size in GB. '
+    'Defaults to 25% of VM memory if unset')
+
+
+BACKUP_TIME_REGULAR_EXPRESSION = '^\d\d\:\d\d$'
+flags.register_validator(
+    'managed_db_backup_start_time',
+    lambda value: re.search(BACKUP_TIME_REGULAR_EXPRESSION, value) is not None,
+    message=('--database_backup_start_time must be in the form HH:MM'))
+
+
+FLAGS = flags.FLAGS
+
+# TODO(chunla): Move these into engine specific class.
+POSTGRES_13_VERSION = '13'
+POSTGRES_RESOURCE_PATH = 'database_configurations/postgres'
+
+POSTGRES_HBA_CONFIG = 'pg_hba.conf'
+POSTGRES_CONFIG = 'postgresql.conf'
+POSTGRES_CONFIG_PATH = '/etc/postgresql/{0}/main/'
+
+DEFAULT_MYSQL_PORT = 3306
+DEFAULT_POSTGRES_PORT = 5432
+DEFAULT_SQLSERVER_PORT = 1433
+
+DEFAULT_PORTS = {
+    sql_engine_utils.MYSQL: DEFAULT_MYSQL_PORT,
+    sql_engine_utils.POSTGRES: DEFAULT_POSTGRES_PORT,
+    sql_engine_utils.SQLSERVER: DEFAULT_SQLSERVER_PORT,
+}
+
+# TODO: Implement DEFAULT BACKUP_START_TIME for instances.
+
+
+class RelationalDbPropertyNotSetError(Exception):
+  pass
+
+
+class RelationalDbEngineNotFoundError(Exception):
+  pass
+
+
+class UnsupportedError(Exception):
+  pass
+
+
+def GenerateRandomDbPassword():
+  """Generate a strong random password.
+
+   # pylint: disable=line-too-long
+  Reference: https://docs.microsoft.com/en-us/sql/relational-databases/security/password-policy?view=sql-server-ver15
+  # pylint: enable=line-too-long
+
+  Returns:
+    A random database password.
+  """
+  prefix = [secrets.choice(string.ascii_lowercase),
+            secrets.choice(string.ascii_uppercase),
+            secrets.choice(string.digits)]
+  return ''.join(prefix) + str(uuid.uuid4())[:10]
+
+
+def GetRelationalDbClass(cloud, is_managed_db, engine):
+  """Get the RelationalDb class corresponding to 'cloud'.
+
+  Args:
+    cloud: name of cloud to get the class for
+    is_managed_db: is the database self managed or database a a service
+    engine: database engine type
+
+  Returns:
+    BaseRelationalDb class with the cloud attribute of 'cloud'.
+  """
+  relational_db = None
+  try:
+    relational_db = resource.GetResourceClass(
+        BaseRelationalDb, CLOUD=cloud, IS_MANAGED=is_managed_db, ENGINE=engine)
+  except errors.Resource.SubclassNotFoundError:
+    relational_db = resource.GetResourceClass(BaseRelationalDb, CLOUD=cloud)
+  return relational_db
+
+
+def VmsToBoot(vm_groups):
+  # TODO(jerlawson): Enable replications.
+  return {
+      name: spec  # pylint: disable=g-complex-comprehension
+      for name, spec in six.iteritems(vm_groups)
+      if name == 'clients' or name == 'default' or
+      (not FLAGS.use_managed_db and name == 'servers')
+  }
+
+
+class BaseRelationalDb(resource.BaseResource):
+  """Object representing a relational database Service."""
+
+  RESOURCE_TYPE = 'BaseRelationalDb'
+
+  def __init__(self, relational_db_spec):
+    """Initialize the managed relational database object.
+
+    Args:
+      relational_db_spec: spec of the managed database.
+
+    Raises:
+      UnsupportedError: if high availability is requested for an unmanaged db.
+    """
+    super(BaseRelationalDb, self).__init__()
+    self.spec = relational_db_spec
+    self.instance_id = 'pkb-db-instance-' + FLAGS.run_uri
+    self.port = self.GetDefaultPort()
+    if not FLAGS.use_managed_db:
+      if self.spec.high_availability:
+        raise UnsupportedError('High availability is unsupported for unmanaged '
+                               'databases.')
+      self.endpoint = ''
+      self.spec.database_username = 'root'
+      self.spec.database_password = 'perfkitbenchmarker'
+      self.innodb_buffer_pool_size = FLAGS.innodb_buffer_pool_size
+      self.mysql_bin_log = FLAGS.mysql_bin_log
+      self.innodb_log_file_size = FLAGS.innodb_log_file_size
+      self.postgres_shared_buffer_size = FLAGS.postgres_shared_buffer_size
+      self.is_managed_db = False
+    else:
+      self.is_managed_db = True
+
+  @property
+  def client_vm(self):
+    """Client VM which will drive the database test.
+
+    This is required by subclasses to perform client-vm
+    network-specific tasks, such as getting information about
+    the VPC, IP address, etc.
+
+    Raises:
+      RelationalDbPropertyNotSetError: if the client_vm is missing.
+
+    Returns:
+      The client_vm.
+    """
+    if not hasattr(self, '_client_vm'):
+      raise RelationalDbPropertyNotSetError('client_vm is not set')
+    return self._client_vm
+
+  @client_vm.setter
+  def client_vm(self, client_vm):
+    self._client_vm = client_vm
+
+  @property
+  def server_vm(self):
+    """Server VM for hosting a managed database.
+
+    Raises:
+      RelationalDbPropertyNotSetError: if the server_vm is missing.
+
+    Returns:
+      The server_vm.
+    """
+    if not hasattr(self, '_server_vm'):
+      raise RelationalDbPropertyNotSetError('server_vm is not set')
+    return self._server_vm
+
+  @server_vm.setter
+  def server_vm(self, server_vm):
+    self._server_vm = server_vm
+
+  @property
+  def client_vm_query_tools(self):
+    if not hasattr(self, '_client_vm_query_tools'):
+      connection_properties = sql_engine_utils.DbConnectionProperties(
+          self.spec.engine, self.spec.engine_version, self.endpoint, self.port,
+          self.spec.database_username, self.spec.database_password)
+      self._client_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine(
+          self.client_vm, connection_properties)
+    return self._client_vm_query_tools
+
+  @property
+  def server_vm_query_tools(self):
+    if not hasattr(self, '_server_vm_query_tools'):
+      connection_properties = sql_engine_utils.DbConnectionProperties(
+          self.spec.engine, self.spec.engine_version, 'localhost', self.port,
+          self.spec.database_username, self.spec.database_password)
+      self._server_vm_query_tools = sql_engine_utils.GetQueryToolsByEngine(
+          self.server_vm, connection_properties)
+    return self._server_vm_query_tools
+
+  def SetVms(self, vm_groups):
+    self.client_vm = vm_groups['clients' if 'clients' in
+                               vm_groups else 'default'][0]
+    if not self.is_managed_db and 'servers' in vm_groups:
+      self.server_vm = vm_groups['servers'][0]
+      kb_to_gb = 1.0 / 1000000
+      if not self.innodb_buffer_pool_size:
+        self.innodb_buffer_pool_size = int(self.server_vm.total_memory_kb *
+                                           kb_to_gb / 4)
+
+      if not self.postgres_shared_buffer_size:
+        self.postgres_shared_buffer_size = int(self.server_vm.total_memory_kb *
+                                               kb_to_gb / 4)
+
+  @property
+  def endpoint(self):
+    """Endpoint of the database server (exclusing port)."""
+    if not hasattr(self, '_endpoint'):
+      raise RelationalDbPropertyNotSetError('endpoint not set')
+    return self._endpoint
+
+  @endpoint.setter
+  def endpoint(self, endpoint):
+    self._endpoint = endpoint
+
+  @property
+  def port(self):
+    """Port (int) on which the database server is listening."""
+    if not hasattr(self, '_port'):
+      raise RelationalDbPropertyNotSetError('port not set')
+    return self._port
+
+  @port.setter
+  def port(self, port):
+    self._port = int(port)
+
+  def GetResourceMetadata(self):
+    """Returns a dictionary of metadata.
+
+    Child classes can extend this if needed.
+
+    Raises:
+       RelationalDbPropertyNotSetError: if any expected metadata is missing.
+    """
+    metadata = {
+        'zone': self.spec.db_spec.zone,
+        'disk_type': self.spec.db_disk_spec.disk_type,
+        'disk_size': self.spec.db_disk_spec.disk_size,
+        'engine': self.spec.engine,
+        'high_availability': self.spec.high_availability,
+        'backup_enabled': self.spec.backup_enabled,
+        'backup_start_time': self.spec.backup_start_time,
+        'engine_version': self.spec.engine_version,
+        'client_vm_zone': self.spec.vm_groups['clients'].vm_spec.zone,
+        'use_managed_db': self.is_managed_db,
+        'instance_id': self.instance_id,
+        'client_vm_disk_type':
+            self.spec.vm_groups['clients'].disk_spec.disk_type,
+        'client_vm_disk_size':
+            self.spec.vm_groups['clients'].disk_spec.disk_size,
+    }
+
+    if not self.is_managed_db and self.spec.engine == 'mysql':
+      metadata.update({
+          'unmanaged_db_innodb_buffer_pool_size_gb':
+              self.innodb_buffer_pool_size,
+          'unmanaged_db_innodb_log_file_size_mb':
+              self.innodb_log_file_size,
+          'unmanaged_db_mysql_bin_log':
+              self.mysql_bin_log
+      })
+
+    if not self.is_managed_db and self.spec.engine == 'postgres':
+      metadata.update({
+          'postgres_shared_buffer_size':
+              self.postgres_shared_buffer_size
+      })
+
+    if (hasattr(self.spec.db_spec, 'machine_type') and
+        self.spec.db_spec.machine_type):
+      metadata.update({
+          'machine_type': self.spec.db_spec.machine_type,
+      })
+    elif hasattr(self.spec.db_spec, 'cpus') and (hasattr(
+        self.spec.db_spec, 'memory')):
+      metadata.update({
+          'cpus': self.spec.db_spec.cpus,
+      })
+      metadata.update({
+          'memory': self.spec.db_spec.memory,
+      })
+    elif hasattr(self.spec.db_spec, 'tier') and (hasattr(
+        self.spec.db_spec, 'compute_units')):
+      metadata.update({
+          'tier': self.spec.db_spec.tier,
+      })
+      metadata.update({
+          'compute_units': self.spec.db_spec.compute_units,
+      })
+    else:
+      raise RelationalDbPropertyNotSetError(
+          'Machine type of the database must be set.')
+
+    if (hasattr(self.spec.vm_groups['clients'].vm_spec, 'machine_type') and
+        self.spec.vm_groups['clients'].vm_spec.machine_type):
+      metadata.update({
+          'client_vm_machine_type':
+              self.spec.vm_groups['clients'].vm_spec.machine_type,
+      })
+    elif hasattr(self.spec.vm_groups['clients'].vm_spec, 'cpus') and (hasattr(
+        self.spec.vm_groups['clients'].vm_spec, 'memory')):
+      metadata.update({
+          'client_vm_cpus': self.spec.vm_groups['clients'].vm_spec.cpus,
+      })
+      metadata.update({
+          'client_vm_memory': self.spec.vm_groups['clients'].vm_spec.memory,
+      })
+    else:
+      raise RelationalDbPropertyNotSetError(
+          'Machine type of the client VM must be set.')
+
+    if FLAGS.db_flags:
+      metadata.update({
+          'db_flags': FLAGS.db_flags,
+      })
+
+    return metadata
+
+  @abstractmethod
+  def GetDefaultEngineVersion(self, engine):
+    """Return the default version (for PKB) for the given database engine.
+
+    Args:
+      engine: name of the database engine
+
+    Returns: default version as a string for the given engine.
+    """
+
+  def _PostCreate(self):
+    self._ApplyDbFlags()
+
+  def _IsReadyUnmanaged(self):
+    """Return true if the underlying resource is ready.
+
+    Returns:
+      True if MySQL was installed successfully, False if not.
+
+    Raises:
+      Exception: If this method is called when the database is a managed one.
+        Shouldn't happen.
+    """
+    if self.is_managed_db:
+      raise Exception('Checking state of unmanaged database when the database '
+                      'is managed.')
+
+    if self.spec.engine == sql_engine_utils.MYSQL:
+      if (self.spec.engine_version == '5.6' or
+          self.spec.engine_version.startswith('5.6.')):
+        mysql_name = 'mysql56'
+      elif (self.spec.engine_version == '5.7' or
+            self.spec.engine_version.startswith('5.7.')):
+        mysql_name = 'mysql57'
+      elif (self.spec.engine_version == '8.0' or
+            self.spec.engine_version.startswith('8.0.')):
+        mysql_name = 'mysql80'
+      else:
+        raise Exception('Invalid database engine version: %s. Only 5.6 and 5.7 '
+                        'and 8.0 are supported.' % self.spec.engine_version)
+      stdout, stderr = self.server_vm.RemoteCommand(
+          'sudo service %s status' % self.server_vm.GetServiceName(mysql_name))
+      return stdout and not stderr
+    elif self.spec.engine == sql_engine_utils.POSTGRES:
+      stdout, stderr = self.server_vm.RemoteCommand(
+          'sudo service postgresql status')
+      return stdout and not stderr
+    elif self.spec.engine == sql_engine_utils.SQLSERVER:
+      return True
+
+    raise UnsupportedError('%s engine is not supported '
+                           'for unmanaged database.' % self.spec.engine)
+
+  def _PrepareDataDirectories(self, mysql_name):
+    # Make the data directories in case they don't already exist.
+    self.server_vm.RemoteCommand('sudo mkdir -p /scratch/mysql')
+    self.server_vm.RemoteCommand('sudo mkdir -p /scratch/tmp')
+    self.server_vm.RemoteCommand('sudo chown mysql:mysql /scratch/mysql')
+    self.server_vm.RemoteCommand('sudo chown mysql:mysql /scratch/tmp')
+    # Copy all the contents of the default data directories to the new ones.
+    self.server_vm.RemoteCommand(
+        'sudo rsync -avzh /var/lib/mysql/ /scratch/mysql')
+    self.server_vm.RemoteCommand('sudo rsync -avzh /tmp/ /scratch/tmp')
+    self.server_vm.RemoteCommand('df', should_log=True)
+    # Configure AppArmor.
+    self.server_vm.RemoteCommand(
+        'echo "alias /var/lib/mysql -> /scratch/mysql," | sudo tee -a '
+        '/etc/apparmor.d/tunables/alias')
+    self.server_vm.RemoteCommand(
+        'echo "alias /tmp -> /scratch/tmp," | sudo tee -a '
+        '/etc/apparmor.d/tunables/alias')
+    self.server_vm.RemoteCommand(
+        'sudo sed -i '
+        '"s|# Allow data files dir access|'
+        '  /scratch/mysql/ r, /scratch/mysql/** rwk, /scratch/tmp/ r, '
+        '/scratch/tmp/** rwk, /proc/*/status r, '
+        '/sys/devices/system/node/ r, /sys/devices/system/node/node*/meminfo r,'
+        ' /sys/devices/system/node/*/* r, /sys/devices/system/node/* r, '
+        '# Allow data files dir access|g" /etc/apparmor.d/usr.sbin.mysqld')
+    self.server_vm.RemoteCommand(
+        'sudo apparmor_parser -r /etc/apparmor.d/usr.sbin.mysqld')
+    self.server_vm.RemoteCommand('sudo systemctl restart apparmor')
+    # Finally, change the MySQL data directory.
+    self.server_vm.RemoteCommand(
+        'sudo sed -i '
+        '"s|datadir\t\t= /var/lib/mysql|datadir\t\t= /scratch/mysql|g" '
+        '%s' % self.server_vm.GetPathToConfig(mysql_name))
+    self.server_vm.RemoteCommand(
+        'sudo sed -i '
+        '"s|tmpdir\t\t= /tmp|tmpdir\t\t= /scratch/tmp|g" '
+        '%s' % self.server_vm.GetPathToConfig(mysql_name))
+
+  def _SetupWindowsUnamangedDatabase(self):
+    db_engine = self.spec.engine
+
+    if db_engine == sql_engine_utils.SQLSERVER:
+      self.spec.database_username = 'sa'
+      self.spec.database_password = GenerateRandomDbPassword()
+      self.server_vm.RemoteCommand('sqlcmd -Q "ALTER LOGIN sa ENABLE;"')
+      self.server_vm.RemoteCommand(
+          'sqlcmd -Q "ALTER LOGIN sa WITH PASSWORD = \'%s\' ;"' %
+          self.spec.database_password)
+
+      # Change the authentication method from windows authentication to
+      # SQL Server Authentication
+      # https://docs.microsoft.com/en-us/sql/database-engine/configure-windows/change-server-authentication-mode?view=sql-server-ver15
+      self.server_vm.RemoteCommand(
+          'sqlcmd -Q "EXEC xp_instance_regwrite'
+          ' N\'HKEY_LOCAL_MACHINE\','
+          ' N\'Software\\Microsoft\\MSSQLServer\\MSSQLServer\', '
+          'N\'LoginMode\', REG_DWORD, 2"')
+
+      # Set the default database location to scratch disk
+      self.server_vm.RemoteCommand(
+          'sqlcmd -Q "EXEC xp_instance_regwrite N\'HKEY_LOCAL_MACHINE\', '
+          'N\'Software\\Microsoft\\MSSQLServer\\MSSQLServer\', '
+          'N\'BackupDirectory\', REG_SZ, N\'C:\\scratch\'"')
+      self.server_vm.RemoteCommand(
+          'sqlcmd -Q "EXEC xp_instance_regwrite N\'HKEY_LOCAL_MACHINE\', '
+          'N\'Software\\Microsoft\\MSSQLServer\\MSSQLServer\', '
+          'N\'DefaultData\', REG_SZ, N\'%s:\\\'"' %
+          self.server_vm.assigned_disk_letter)
+      self.server_vm.RemoteCommand(
+          'sqlcmd -Q "EXEC xp_instance_regwrite N\'HKEY_LOCAL_MACHINE\', '
+          'N\'Software\\Microsoft\\MSSQLServer\\MSSQLServer\', '
+          'N\'DefaultLog\', REG_SZ, N\'%s:\\\'"' %
+          self.server_vm.assigned_disk_letter)
+      self.server_vm.RemoteCommand('net stop mssqlserver /y')
+      self.server_vm.RemoteCommand('net start mssqlserver')
+      return
+
+    raise UnsupportedError('Only sql server is currently '
+                           'supported on windows vm')
+
+  def _SetupLinuxUnmanagedDatabase(self):
+    db_engine = self.spec.engine
+    self.server_vm_query_tools.InstallPackages()
+
+    if self.client_vm.IS_REBOOTABLE:
+      self.client_vm.ApplySysctlPersistent({
+          'net.ipv4.tcp_keepalive_time': 100,
+          'net.ipv4.tcp_keepalive_intvl': 100,
+          'net.ipv4.tcp_keepalive_probes': 10
+      })
+    if self.server_vm.IS_REBOOTABLE:
+      self.server_vm.ApplySysctlPersistent({
+          'net.ipv4.tcp_keepalive_time': 100,
+          'net.ipv4.tcp_keepalive_intvl': 100,
+          'net.ipv4.tcp_keepalive_probes': 10
+      })
+
+    if db_engine == 'mysql':
+      self._InstallMySQLServer()
+    elif db_engine == 'postgres':
+      self._InstallPostgresServer()
+    else:
+      raise Exception(
+          'Engine {0} not supported for unmanaged databases.'.format(
+              self.spec.engine))
+
+  def _SetupUnmanagedDatabase(self):
+    """Installs unmanaged databases on server vm."""
+    if self.server_vm.OS_TYPE in os_types.WINDOWS_OS_TYPES:
+      self._SetupWindowsUnamangedDatabase()
+    else:
+      self._SetupLinuxUnmanagedDatabase()
+
+  def _InstallPostgresServer(self):
+    if self.spec.engine_version == POSTGRES_13_VERSION:
+      self.server_vm.Install('postgres13')
+    else:
+      raise UnsupportedError('Only postgres version 13 is currently supported')
+
+    vm = self.server_vm
+    version = self.spec.engine_version
+    postgres_conf_path = POSTGRES_CONFIG_PATH.format(version)
+    postgres_conf_file = postgres_conf_path + POSTGRES_CONFIG
+    postgres_hba_conf_file = postgres_conf_path + POSTGRES_HBA_CONFIG
+    vm.PushFile(data.ResourcePath(
+        posixpath.join(POSTGRES_RESOURCE_PATH, POSTGRES_HBA_CONFIG)))
+    vm.RemoteCommand('sudo -u postgres psql postgres -c '
+                     '"ALTER USER postgres PASSWORD \'%s\';"'
+                     % self.spec.database_password)
+    vm.RemoteCommand('sudo -u postgres psql postgres -c '
+                     '"CREATE ROLE %s LOGIN SUPERUSER PASSWORD \'%s\';"' %
+                     (self.spec.database_username,
+                      self.spec.database_password))
+
+    # Change the directory to scratch
+    vm.RemoteCommand(
+        'sudo sed -i.bak '
+        '"s:\'/var/lib/postgresql/{0}/main\':\'{1}/postgresql/{0}/main\':" '
+        '/etc/postgresql/{0}/main/postgresql.conf'.format(
+            version, self.server_vm.GetScratchDir()))
+
+    # Accept remote connection
+    vm.RemoteCommand(
+        'sudo sed -i.bak '
+        r'"s:\#listen_addresses ='
+        ' \'localhost\':listen_addresses = \'*\':" '
+        '{}'.format(postgres_conf_file))
+
+    # Set the size of the shared buffer
+    vm.RemoteCommand(
+        'sudo sed -i.bak "s:shared_buffers = 128MB:shared_buffers = {}GB:" '
+        '{}'.format(self.postgres_shared_buffer_size, postgres_conf_file))
+    # Update data path to new location
+    vm.InstallPackages('rsync')
+    vm.RemoteCommand('sudo rsync -av /var/lib/postgresql /scratch')
+
+    # # Use cat to move files because mv will override file permissions
+    self.server_vm.RemoteCommand(
+        "sudo bash -c "
+        "'cat pg_hba.conf > "
+        "{}'".format(postgres_hba_conf_file))
+
+    self.server_vm.RemoteCommand(
+        'sudo cat {}'.format(postgres_conf_file))
+    self.server_vm.RemoteCommand(
+        'sudo cat {}'.format(postgres_hba_conf_file))
+    vm.RemoteCommand('sudo systemctl restart postgresql')
+
+  def _InstallMySQLServer(self):
+    """Installs MySQL Server on the server vm.
+
+    https://d0.awsstatic.com/whitepapers/Database/optimizing-mysql-running-on-amazon-ec2-using-amazon-ebs.pdf
+    for minimal tuning parameters.
+    Raises:
+      Exception: If the requested engine version is unsupported, or if this
+        method is called when the database is a managed one. The latter
+        shouldn't happen.
+    """
+    if (self.spec.engine_version == '5.6' or
+        self.spec.engine_version.startswith('5.6.')):
+      mysql_name = 'mysql56'
+    elif (self.spec.engine_version == '5.7' or
+          self.spec.engine_version.startswith('5.7.')):
+      mysql_name = 'mysql57'
+    elif (self.spec.engine_version == '8.0' or
+          self.spec.engine_version.startswith('8.0.')):
+      mysql_name = 'mysql80'
+    else:
+      raise Exception('Invalid database engine version: %s. Only 5.6 and 5.7 '
+                      'and 8.0 are supported.' % self.spec.engine_version)
+    self.server_vm.Install(mysql_name)
+    self.server_vm.RemoteCommand('chmod 777 %s' %
+                                 self.server_vm.GetScratchDir())
+    self.server_vm.RemoteCommand('sudo service %s stop' %
+                                 self.server_vm.GetServiceName(mysql_name))
+    self._PrepareDataDirectories(mysql_name)
+
+    # Minimal MySQL tuning; see AWS whitepaper in docstring.
+    innodb_buffer_pool_gb = self.innodb_buffer_pool_size
+    innodb_log_file_mb = self.innodb_log_file_size
+    self.server_vm.RemoteCommand(
+        'echo "\n'
+        f'innodb_buffer_pool_size = {innodb_buffer_pool_gb}G\n'
+        'innodb_flush_method = O_DIRECT\n'
+        'innodb_flush_neighbors = 0\n'
+        f'innodb_log_file_size = {innodb_log_file_mb}M'
+        '" | sudo tee -a %s' % self.server_vm.GetPathToConfig(mysql_name))
+
+    if self.mysql_bin_log:
+      bin_log_path = self.server_vm.GetScratchDir() + '/mysql/mysql-bin.log'
+      self.server_vm.RemoteCommand(
+          'echo "\n'
+          'server-id  = 1\n'
+          'log_bin = %s\n'
+          '" | sudo tee -a %s' %
+          (bin_log_path, self.server_vm.GetPathToConfig(mysql_name)))
+
+    # These (and max_connections after restarting) help avoid losing connection.
+    self.server_vm.RemoteCommand(
+        'echo "\nskip-name-resolve\n'
+        'connect_timeout        = 86400\n'
+        'wait_timeout        = 86400\n'
+        'interactive_timeout        = 86400" | sudo tee -a %s' %
+        self.server_vm.GetPathToConfig(mysql_name))
+    self.server_vm.RemoteCommand(
+        'sudo sed -i "s/^bind-address/#bind-address/g" '
+        '%s' % self.server_vm.GetPathToConfig(mysql_name))
+    self.server_vm.RemoteCommand(
+        'sudo sed -i "s/^mysqlx-bind-address/#mysqlx-bind-address/g" '
+        '%s' % self.server_vm.GetPathToConfig(mysql_name))
+    self.server_vm.RemoteCommand(
+        'sudo sed -i '
+        '"s/max_allowed_packet\t= 16M/max_allowed_packet\t= 1024M/g" %s' %
+        self.server_vm.GetPathToConfig(mysql_name))
+
+    # Configure logging (/var/log/mysql/error.log will print upon db deletion).
+    self.server_vm.RemoteCommand(
+        'echo "\nlog_error_verbosity        = 3" | sudo tee -a %s' %
+        self.server_vm.GetPathToConfig(mysql_name))
+    # Restart.
+    self.server_vm.RemoteCommand('sudo service %s restart' %
+                                 self.server_vm.GetServiceName(mysql_name))
+    self.server_vm.RemoteCommand(
+        'sudo cat %s' % self.server_vm.GetPathToConfig(mysql_name),
+        should_log=True)
+
+    self.server_vm_query_tools.IssueSqlCommand(
+        'SET GLOBAL max_connections=8000;', superuser=True)
+
+    self.SetMYSQLClientPrivileges()
+
+  def SetMYSQLClientPrivileges(self):
+    if FLAGS.ip_addresses == vm_util.IpAddressSubset.INTERNAL:
+      client_ip = self.client_vm.internal_ip
+    else:
+      client_ip = self.client_vm.ip_address
+
+    self.server_vm_query_tools.IssueSqlCommand(
+        'CREATE USER \'%s\'@\'%s\' IDENTIFIED BY \'%s\';' %
+        (self.spec.database_username, client_ip, self.spec.database_password),
+        superuser=True, ignore_failure=True)
+
+    self.server_vm_query_tools.IssueSqlCommand(
+        'GRANT ALL PRIVILEGES ON *.* TO \'%s\'@\'%s\';' %
+        (self.spec.database_username, client_ip), superuser=True,
+        ignore_failure=True)
+    self.server_vm_query_tools.IssueSqlCommand(
+        'FLUSH PRIVILEGES;', superuser=True, ignore_failure=True)
+
+  def _ApplyDbFlags(self):
+    """Apply Flags on the database."""
+    if FLAGS.db_flags:
+      if self.is_managed_db:
+        self._ApplyManagedDbFlags()
+      else:
+        # TODO(chunla): Refactor this into a separate engine module.
+        if self.spec.engine == sql_engine_utils.MYSQL:
+          self._ApplyMySqlFlags()
+        elif self.spec.engine == sql_engine_utils.POSTGRES:
+          self._ApplyPostgresFlags()
+        else:
+          raise NotImplementedError('Flags is not supported on %s' %
+                                    self.spec.engine)
+
+  def _ApplyManagedDbFlags(self):
+    """Apply flags on the managed database."""
+    raise NotImplementedError('Managed Db flags is not supported for %s' %
+                              type(self).__name__)
+
+  def _ApplyMySqlFlags(self):
+    if FLAGS.db_flags:
+      for flag in FLAGS.db_flags:
+        _, stderr, _ = self.client_vm_query_tools.IssueSqlCommand(
+            'SET %s;' % flag, ignore_failure=True)
+        if stderr:
+          raise Exception('Invalid MySQL flags: %s' % stderr)
+
+  def _ApplyPostgresFlags(self):
+    """Add postgres flags to postgres config file."""
+    if FLAGS.db_flags:
+      version = self.spec.engine_version
+      postgres_conf_path = POSTGRES_CONFIG_PATH.format(version)
+      postgres_conf_file = postgres_conf_path + POSTGRES_CONFIG
+      for flag in FLAGS.db_flags:
+        self.server_vm.RemoteCommand('sudo sh -c \'echo %s >> %s\'' %
+                                     (flag, postgres_conf_file))
+      self.server_vm.RemoteCommand('sudo systemctl restart postgresql')
+
+  def GetDefaultPort(self):
+    """Returns default port for the db engine from the spec."""
+    engine = sql_engine_utils.GetDbEngineType(self.spec.engine)
+    if engine not in DEFAULT_PORTS:
+      raise NotImplementedError('Default port not specified for '
+                                'engine {0}'.format(engine))
+    return DEFAULT_PORTS[engine]
+
+  def PrintUnmanagedDbStats(self):
+    """Print server logs on unmanaged db."""
+    if self.spec.engine == 'mysql':
+      self.server_vm.RemoteCommand('sudo cat /var/log/mysql/error.log')
+      self.server_vm_query_tools.IssueSqlCommand(
+          'SHOW GLOBAL STATUS LIKE \'Aborted_connects\';', superuser=True)
+      self.server_vm_query_tools.IssueSqlCommand(
+          'SHOW GLOBAL STATUS LIKE \'Aborted_clients\';', superuser=True)
+      self.server_vm_query_tools.IssueSqlCommand(
+          'SHOW GLOBAL STATUS LIKE \'%version%\';', superuser=True)
+
+  def Failover(self):
+    """Fail over the database.  Throws exception if not high available."""
+    if not self.spec.high_availability:
+      raise Exception('Attempt to fail over a database that isn\'t marked '
+                      'as high available')
+    self._FailoverHA()
+
+  def _FailoverHA(self):
+    """Fail over from master to replica."""
+    raise NotImplementedError('Failover is not implemented.')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/requirements.py b/script/cumulus/pkb/perfkitbenchmarker/requirements.py
new file mode 100644
index 0000000..2d84fe4
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/requirements.py
@@ -0,0 +1,93 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions for checking that required Python packages are installed."""
+
+from collections import deque
+import os
+import pkg_resources
+
+from perfkitbenchmarker import errors
+
+
+# Path of the root of the current git branch.
+_BRANCH_ROOT_DIR = os.path.dirname(os.path.dirname(__file__))
+
+
+def _CheckRequirements(requirements_file_path):
+  """Checks that all package requirements specified in a file are met.
+
+  Args:
+    requirements_file_path: string. Path to a pip requirements file.
+  """
+  with open(requirements_file_path, 'r') as fp:
+    requirements_to_check = [(requirements_file_path, deque(fp.readlines()))]
+  try:
+    while requirements_to_check:
+      file_path, lines = requirements_to_check.pop()
+      while lines:
+        line = lines.popleft().strip()
+        if line.startswith('-r'):
+          requirements_to_check.append((file_path, lines))
+          file_path = os.path.join(os.path.dirname(file_path), line[2:])
+          with open(file_path, 'r') as fp:
+            lines = deque(fp.readlines())
+        elif line:
+          pkg_resources.require(line)
+  except (pkg_resources.DistributionNotFound,
+          pkg_resources.VersionConflict) as e:
+    # In newer versions of setuptools, these exception classes have a report
+    # method that provides a readable description of the error.
+    report = getattr(e, 'report', None)
+    err_msg = report() if report else str(e)
+    raise errors.Setup.PythonPackageRequirementUnfulfilled(
+        'A Python package requirement was not met while checking "{path}": '
+        '{msg}{linesep}To install required packages, execute the following '
+        'command:{linesep}pip install -r "{path}"{linesep}To bypass package '
+        'requirement checks, run PerfKit Benchmarker with the '
+        '--ignore_package_requirements flag.'.format(
+            linesep=os.linesep, msg=err_msg, path=requirements_file_path))
+
+
+def CheckBasicRequirements():
+  """Checks that all basic package requirements are met.
+
+  The basic requirements include packages used by modules that are imported
+  regardless of the specified cloud providers. The list of required packages
+  and versions is found in the requirements.txt file in the git branch's root
+  directory. If such a file does not exist, then the requirements check is
+  skipped.
+  """
+  requirements_file_path = os.path.join(_BRANCH_ROOT_DIR,
+                                        'requirements.txt')
+  if os.path.isfile(requirements_file_path):
+    _CheckRequirements(requirements_file_path)
+
+
+def CheckProviderRequirements(provider):
+  """Checks that all provider-specific requirements are met.
+
+  The provider-specific requirements include packages used by modules that are
+  imported when using a particular cloud provider. The list of required packages
+  is found in the requirements-<provider>.txt file in the git branch's root
+  directory. If such a file does not exist, then no additional requirements are
+  necessary.
+
+  Args:
+    provider: string. Lowercase name of the cloud provider (e.g. 'gcp').
+  """
+  requirements_file_path = os.path.join(
+      _BRANCH_ROOT_DIR, 'perfkitbenchmarker', 'providers', provider,
+      'requirements.txt')
+  if os.path.isfile(requirements_file_path):
+    _CheckRequirements(requirements_file_path)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/resource.py b/script/cumulus/pkb/perfkitbenchmarker/resource.py
new file mode 100644
index 0000000..5b4bf54
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/resource.py
@@ -0,0 +1,430 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing abstract class for reliable resources.
+
+The Resource class wraps unreliable create and delete commands in retry loops
+and checks for resource existence so that resources can be created and deleted
+reliably.
+"""
+import abc
+import logging
+import time
+from typing import List
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+
+FLAGS = flags.FLAGS
+
+_RESOURCE_REGISTRY = {}
+
+
+def GetResourceClass(base_class, **kwargs):
+  """Returns the subclass with the corresponding attributes.
+
+  Args:
+    base_class: The base class of the resource to return
+        (e.g. BaseVirtualMachine).
+    **kwargs: Every attribute/value of the subclass's REQUIRED_ATTRS that were
+        used to register the subclass.
+  Raises:
+    Exception: If no class could be found with matching attributes.
+  """
+  key = [base_class.__name__]
+  key += sorted(kwargs.items())
+  if tuple(key) not in _RESOURCE_REGISTRY:
+    raise errors.Resource.SubclassNotFoundError(
+        'No %s subclass defined with the attributes: %s' %
+        (base_class.__name__, kwargs))
+  return _RESOURCE_REGISTRY.get(tuple(key))
+
+
+class AutoRegisterResourceMeta(abc.ABCMeta):
+  """Metaclass which allows resources to automatically be registered."""
+
+  # See BaseResource
+  RESOURCE_TYPE: str
+  REQUIRED_ATTRS: List[str]
+
+  def __init__(cls, name, bases, dct):
+    if (all(hasattr(cls, attr) for attr in cls.REQUIRED_ATTRS) and
+        cls.RESOURCE_TYPE):
+      unset_attrs = [
+          attr for attr in cls.REQUIRED_ATTRS if getattr(cls, attr) is None]
+      # Raise exception if subclass with unset attributes.
+      if unset_attrs and cls.RESOURCE_TYPE != cls.__name__:
+        raise Exception(
+            'Subclasses of %s must have the following attrs set: %s. For %s '
+            'the following attrs were not set: %s.' %
+            (cls.RESOURCE_TYPE, cls.REQUIRED_ATTRS, cls.__name__, unset_attrs))
+      key = [cls.RESOURCE_TYPE]
+      key += sorted([(attr, getattr(cls, attr)) for attr in cls.REQUIRED_ATTRS])
+      _RESOURCE_REGISTRY[tuple(key)] = cls
+    super(AutoRegisterResourceMeta, cls).__init__(name, bases, dct)
+
+
+class BaseResource(metaclass=AutoRegisterResourceMeta):
+  """An object representing a cloud resource.
+
+  Attributes:
+    created: True if the resource has been created.
+    deleted: True if the resource has been deleted.
+    user_managed: Whether Create() and Delete() should be skipped.
+    restored: True if the resource has been restored.
+    enable_freeze_restore: Whether the resource should use freeze/restore when
+      the option is specified on the command line. Different benchmarks may want
+      different resources to have freeze/restore enabled.
+    create_on_restore_error: Whether to create the resource if there is an issue
+      while restoring.
+    delete_on_freeze_error: Whether to delete the resource if there is an issue
+      while freezing.
+    create_start_time: The start time of the last create.
+    delete_start_time: The start time of the last delete.
+    create_end_time: The end time of the last create.
+    delete_end_time: The end time of the last delete.
+    resource_ready_time: The time when the resource last became ready.
+    metadata: Dictionary of resource metadata.
+  """
+
+  # The name of the base class (e.g. BaseVirtualMachine) that will be extended
+  # with auto-registered subclasses.
+  RESOURCE_TYPE = None
+  # A list of attributes that are used to register Resource subclasses
+  # (e.g. CLOUD).
+  REQUIRED_ATTRS = ['CLOUD']
+
+  # Timeout in seconds for resource to be ready.
+  READY_TIMEOUT = None
+  # Time between retries.
+  POLL_INTERVAL = 5
+
+  def __init__(
+      self,
+      user_managed=False,
+      enable_freeze_restore=False,
+      create_on_restore_error=False,
+      delete_on_freeze_error=False,
+  ):
+    super(BaseResource, self).__init__()
+    self.created = user_managed
+    self.deleted = user_managed
+    self.user_managed = user_managed
+    self.restored: bool = False
+    self.enable_freeze_restore = enable_freeze_restore
+    self.create_on_restore_error = create_on_restore_error
+    self.delete_on_freeze_error = delete_on_freeze_error
+
+    # Creation and deletion time information
+    # that we may make use of later.
+    self.create_start_time = None
+    self.delete_start_time = None
+    self.create_end_time = None
+    self.delete_end_time = None
+    self.resource_ready_time = None
+    self.metadata = dict()
+
+  def GetResourceMetadata(self):
+    """Returns a dictionary of metadata about the resource."""
+    return self.metadata.copy()
+
+  @abc.abstractmethod
+  def _Create(self):
+    """Creates the underlying resource."""
+    raise NotImplementedError()
+
+  def _Restore(self) -> None:
+    """Restores the underlying resource from a file.
+
+    This method is required if using Restore() with a resource.
+    """
+    raise NotImplementedError()
+
+  def _Freeze(self) -> None:
+    """Freezes the underlying resource to a long-term, sustainable state.
+
+    This method is required if using Restore() with a resource.
+    """
+    raise NotImplementedError()
+
+  def _UpdateTimeout(self, timeout_minutes: int) -> None:
+    """Updates the underlying resource's timeout after a successful freeze.
+
+    This method is required if using Freeze()/Restore() with a resource.
+
+    Args:
+      timeout_minutes: The number of minutes past the current time at which the
+        resource should be considered expired.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def _Delete(self):
+    """Deletes the underlying resource.
+
+    Implementations of this method should be idempotent since it may
+    be called multiple times, even if the resource has already been
+    deleted.
+    """
+    raise NotImplementedError()
+
+  def _Exists(self):
+    """Returns true if the underlying resource exists.
+
+    Supplying this method is optional. If it is not implemented then the
+    default is to assume success when _Create and _Delete do not raise
+    exceptions.
+    """
+    raise NotImplementedError()
+
+  def _IsReady(self):
+    """Return true if the underlying resource is ready.
+
+    Supplying this method is optional.  Use it when a resource can exist
+    without being ready.  If the subclass does not implement
+    it then it just returns true.
+
+    Returns:
+      True if the resource was ready in time, False if the wait timed out.
+    """
+    return True
+
+  def _IsDeleting(self):
+    """Return true if the underlying resource is getting deleted.
+
+    Supplying this method is optional.  Potentially use when the resource has an
+    asynchronous deletion operation to avoid rerunning the deletion command and
+    track the deletion time correctly. If the subclass does not implement it
+    then it just returns false.
+
+    Returns:
+      True if the resource was being deleted, False if the resource was in a non
+      deleting state.
+    """
+    return False
+
+  def _PreDelete(self):
+    """Method that will be called once before _DeleteResource() is called.
+
+    Supplying this method is optional. If it is supplied, it will be called
+    once, before attempting to delete the resource. It is intended to allow
+    data about the resource to be collected right before it is deleted.
+    """
+    pass
+
+  def _PostCreate(self):
+    """Method that will be called once after _CreateResource() is called.
+
+    Supplying this method is optional. If it is supplied, it will be called
+    once, after the resource is confirmed to exist. It is intended to allow
+    data about the resource to be collected or for the resource to be tagged.
+    """
+    pass
+
+  def _CreateDependencies(self):
+    """Method that will be called once before _CreateResource() is called.
+
+    Supplying this method is optional. It is intended to allow additional
+    flexibility in creating resource dependencies separately from _Create().
+    """
+    pass
+
+  def _DeleteDependencies(self):
+    """Method that will be called once after _DeleteResource() is called.
+
+    Supplying this method is optional. It is intended to allow additional
+    flexibility in deleting resource dependencies separately from _Delete().
+    """
+    pass
+
+  @vm_util.Retry(retryable_exceptions=(errors.Resource.RetryableCreationError,))
+  def _CreateResource(self):
+    """Reliably creates the underlying resource."""
+    if self.created:
+      return
+    # Overwrite create_start_time each time this is called,
+    # with the assumption that multple calls to Create() imply
+    # that the resource was not actually being created on the
+    # backend during previous failed attempts.
+    self.create_start_time = time.time()
+    self._Create()
+    try:
+      if not self._Exists():
+        raise errors.Resource.RetryableCreationError(
+            'Creation of %s failed.' % type(self).__name__)
+    except NotImplementedError:
+      pass
+    self.created = True
+    self.create_end_time = time.time()
+
+  @vm_util.Retry(retryable_exceptions=(errors.Resource.RetryableDeletionError,),
+                 timeout=3600)
+  def _DeleteResource(self):
+    """Reliably deletes the underlying resource."""
+
+    # Retryable method which allows waiting for deletion of the resource.
+    @vm_util.Retry(poll_interval=self.POLL_INTERVAL, fuzz=0, timeout=3600,
+                   retryable_exceptions=(
+                       errors.Resource.RetryableDeletionError,))
+    def WaitUntilDeleted():
+      if self._IsDeleting():
+        raise errors.Resource.RetryableDeletionError('Not yet deleted')
+
+    if self.deleted or not self.created:
+      return
+    if not self.delete_start_time:
+      self.delete_start_time = time.time()
+    self._Delete()
+    WaitUntilDeleted()
+    try:
+      if self._Exists():
+        raise errors.Resource.RetryableDeletionError(
+            'Deletion of %s failed.' % type(self).__name__)
+    except NotImplementedError:
+      pass
+
+  def Restore(self) -> None:
+    """Restores a resource instead of creating it.
+
+    Raises:
+      RestoreError: Generic error encompassing restore failures.
+    """
+    # TODO(user): Add usage lock with labels to prevent multiple
+    # benchmarks from using the same resource concurrently.
+    logging.info('Restoring resource %s.', repr(self))
+
+    try:
+      self._Restore()
+    except NotImplementedError as e:
+      raise errors.Resource.RestoreError(
+          f'Class {self.__class__} does not have _Restore() implemented but a '
+          'restore file was provided.') from e
+    except Exception as e:
+      raise errors.Resource.RestoreError('Error restoring resource '
+                                         f'{repr(self)}') from e
+
+    self.restored = True
+    self.UpdateTimeout(FLAGS.timeout_minutes)
+
+  def Create(self, restore: bool = False) -> None:
+    """Creates a resource and its dependencies.
+
+    Args:
+      restore: Whether to restore the resource instead of creating. If
+        enable_freeze_restore is false, this proceeds with creation.
+
+    Raises:
+      RestoreError: If there is an error while restoring.
+    """
+
+    @vm_util.Retry(poll_interval=self.POLL_INTERVAL, fuzz=0,
+                   timeout=self.READY_TIMEOUT,
+                   retryable_exceptions=(
+                       errors.Resource.RetryableCreationError,))
+    def WaitUntilReady():
+      if not self._IsReady():
+        raise errors.Resource.RetryableCreationError('Not yet ready')
+
+    if self.user_managed:
+      return
+
+    if restore and self.enable_freeze_restore:
+      try:
+        self.Restore()
+        return
+      except errors.Resource.RestoreError:
+        logging.exception(
+            'Encountered an exception while attempting to Restore(). '
+            'Creating: %s', self.create_on_restore_error)
+        if not self.create_on_restore_error:
+          raise
+
+    self._CreateDependencies()
+    self._CreateResource()
+    WaitUntilReady()
+    if not self.resource_ready_time:
+      self.resource_ready_time = time.time()
+    self._PostCreate()
+
+  def Freeze(self) -> None:
+    """Freezes a resource instead of deleting it.
+
+    Raises:
+      FreezeError: Generic error encompassing freeze failures.
+    """
+    logging.info('Freezing resource %s.', repr(self))
+    # Attempt to call freeze, failing if unimplemented.
+    try:
+      self._Freeze()
+    except NotImplementedError as e:
+      raise errors.Resource.FreezeError(
+          f'Class {self.__class__} does not have _Freeze() implemented but '
+          'Freeze() was called.') from e
+    except Exception as e:
+      raise errors.Resource.FreezeError(
+          f'Error freezing resource {repr(self)}') from e
+
+    # If frozen successfully, attempt to update the timeout.
+    self.restored = False
+    self.UpdateTimeout(FLAGS.persistent_timeout_minutes)
+
+  def Delete(self, freeze: bool = False) -> None:
+    """Deletes a resource and its dependencies.
+
+    Args:
+      freeze: Whether to freeze the resource instead of deleting. If
+        enable_freeze_restore is false, this proceeds with deletion.
+
+    Raises:
+      FreezeError: If there is an error while freezing.
+    """
+    if self.user_managed:
+      return
+
+    if freeze and self.enable_freeze_restore:
+      try:
+        self.Freeze()
+        return
+      except errors.Resource.FreezeError:
+        logging.exception(
+            'Encountered an exception while attempting to Freeze(). '
+            'Deleting: %s', self.delete_on_freeze_error)
+        if not self.delete_on_freeze_error:
+          raise
+
+    self._PreDelete()
+    self._DeleteResource()
+    self.deleted = True
+    self.delete_end_time = time.time()
+    self._DeleteDependencies()
+
+  def UpdateTimeout(self, timeout_minutes: int) -> None:
+    """Updates the timeout of the underlying resource.
+
+    Args:
+      timeout_minutes: The number of minutes past the current time at which the
+        resource should be considered expired.
+
+    Raises:
+      NotImplementedError: If the resource has not implemented _UpdateTimeout().
+    """
+    logging.info('Updating timeout for %s.', repr(self))
+    try:
+      self._UpdateTimeout(timeout_minutes)
+    except NotImplementedError:
+      logging.exception(
+          'Class %s does not have _UpdateTimeout() implemented, which is '
+          'needed for Freeze(). Please add an implementation.', self.__class__)
+      raise
diff --git a/script/cumulus/pkb/perfkitbenchmarker/sample.py b/script/cumulus/pkb/perfkitbenchmarker/sample.py
new file mode 100644
index 0000000..f5743e6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/sample.py
@@ -0,0 +1,219 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""A performance sample class."""
+
+import collections
+import math
+import time
+from typing import Any, Dict, List, NewType
+
+import numpy as np
+
+PERCENTILES_LIST = 0.1, 1, 5, 10, 50, 90, 95, 99, 99.9
+
+_SAMPLE_FIELDS = 'metric', 'value', 'unit', 'metadata', 'timestamp'
+
+
+def PercentileCalculator(numbers, percentiles=PERCENTILES_LIST):
+  """Computes percentiles, stddev and mean on a set of numbers.
+
+  Args:
+    numbers: A sequence of numbers to compute percentiles for.
+    percentiles: If given, a list of percentiles to compute. Can be floats, ints
+      or longs.
+
+  Returns:
+    A dictionary of percentiles.
+
+  Raises:
+    ValueError, if numbers is empty or if a percentile is outside of
+    [0, 100].
+
+  """
+
+  # 'if not numbers' will fail if numbers is an np.Array or pd.Series.
+  if not len(numbers):
+    raise ValueError("Can't compute percentiles of empty list.")
+
+  numbers_sorted = sorted(numbers)
+  count = len(numbers_sorted)
+  total = sum(numbers_sorted)
+  result = {}
+  for percentile in percentiles:
+    float(percentile)  # verify type
+    if percentile < 0.0 or percentile > 100.0:
+      raise ValueError('Invalid percentile %s' % percentile)
+
+    percentile_string = 'p%s' % str(percentile)
+    index = int(count * float(percentile) / 100.0)
+    index = min(index, count - 1)  # Correction to handle 100th percentile.
+    result[percentile_string] = numbers_sorted[index]
+
+  average = total / float(count)
+  result['average'] = average
+  if count > 1:
+    total_of_squares = sum([(i - average)**2 for i in numbers])
+    result['stddev'] = (total_of_squares / (count - 1))**0.5
+  else:
+    result['stddev'] = 0
+
+  return result
+
+
+def GeoMean(iterable):
+  """Calculate the geometric mean of a collection of numbers.
+
+  Args:
+    iterable: A sequence of numbers.
+
+  Returns:
+    The geometric mean
+
+  Raises:
+    ValueError, if numbers is empty.
+  """
+  arr = np.fromiter(iterable, dtype='float')
+  if not arr.size:
+    raise ValueError("Can't compute geomean of empty list.")
+  return arr.prod()**(1 / len(arr))
+
+
+# The Sample is converted via collections.namedtuple._asdict for publishing
+SampleDict = NewType('SampleDict', Dict[str, Any])
+
+
+class Sample(collections.namedtuple('Sample', _SAMPLE_FIELDS)):
+  """A performance sample.
+
+  Attributes:
+    metric: string. Name of the metric within the benchmark.
+    value: float. Result for 'metric'.
+    unit: string. Units for 'value'.
+    metadata: dict. Additional metadata to include with the sample.
+    timestamp: float. Unix timestamp.
+  """
+
+  def __new__(cls,
+              metric,
+              value,
+              unit,
+              metadata=None,
+              timestamp=None,
+              **kwargs):
+    if timestamp is None:
+      timestamp = time.time()
+
+    return super(Sample, cls).__new__(
+        cls,
+        metric,
+        float(value or 0.0),
+        unit,
+        metadata=metadata or {},
+        timestamp=timestamp,
+        **kwargs)
+
+  def __eq__(self, other) -> bool:
+    if not isinstance(other, Sample):
+      # don't attempt to compare against unrelated types
+      return NotImplemented
+    if self.value != other.value:
+      return False
+    if self.metric != other.metric:
+      return False
+    if self.timestamp != other.timestamp:
+      return False
+    for key, value in other.metadata.items():
+      if key not in self.metadata or self.metadata[key] != value:
+        return False
+    return True
+
+  def asdict(self)-> Dict[str, Any]:  # pylint:disable=invalid-name
+    """Converts the Sample to a dictionary."""
+    return self._asdict()
+
+
+_Histogram = collections.OrderedDict
+
+
+def MakeHistogram(values: List[float],
+                  round_bottom: float = 0.0,
+                  round_to_sig_fig: int = 3) -> _Histogram:
+  """Take a list of float values and returns a ordered dict of values and frequency.
+
+  Args:
+    values: a list of float values
+    round_bottom: A float between 0 and 1 indicating a percentile of values that
+      should be rounded. Any values below this percentile will be rounded
+      according to the precision specified by round_to_sig_fig. Values equal to
+      and above this percentile will not be rounded. (included with full
+      precision). (e.g. 0.95 will round all values below the 95th percentile and
+      keep full precision of values above the 95th percentile.) 0 by default,
+      rounds no values, 1 would round all values.
+    round_to_sig_fig: The number of significant figures kept when rounding
+      values. 3 by default.
+
+  Returns:
+    An ordered dictionary of the values and their frequency
+  """
+  histogram = _Histogram()
+  for iteration, value in enumerate(sorted(values)):
+    percentile = iteration / len(values)
+    if percentile < round_bottom:
+      if value > 0:
+        rounded_value = round(
+            value,
+            round_to_sig_fig - int(math.floor(math.log10(abs(value)))) - 1)
+      else:
+        rounded_value = 0.0
+      histogram[rounded_value] = histogram.get(rounded_value, 0) + 1
+    else:
+      histogram[value] = histogram.get(value, 0) + 1
+  return histogram
+
+
+def _ConvertHistogramToString(histogram: _Histogram) -> str:
+  histogram_label_values = ','.join(
+      f'"{key}": {value}' for (key, value) in histogram.items())
+  histogram_labels = '{%s}' % histogram_label_values
+  return histogram_labels
+
+
+def CreateHistogramSample(histogram: _Histogram,
+                          name: str,
+                          subname: str,
+                          units: str,
+                          additional_metadata=None,
+                          metric='') -> Sample:
+  """Given a histogram of values, create a sample.
+
+  Args:
+    histogram: an ordered dict of objects
+    name: name of histogram
+    subname: subname of histogram
+    units: the units of measure used in the sample
+    additional_metadata: any additional metadata to add
+    metric: metric in the sample
+
+  Returns:
+    sample: One sample object that reports the histogram passed in.
+
+  """
+  metadata = {
+      'histogram': _ConvertHistogramToString(histogram),
+      'Name': name,
+      'Subname': subname
+  }
+  if additional_metadata:
+    metadata.update(additional_metadata)
+  return Sample(metric, 0, units, metadata)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/scripts/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/scripts/__init__.py
new file mode 100644
index 0000000..1f3df9f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/scripts/__init__.py
@@ -0,0 +1,17 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Files to run *on the guest VM*.
+
+Nothing in this package should be imported.
+"""
diff --git a/script/cumulus/pkb/perfkitbenchmarker/scripts/execute_command.py b/script/cumulus/pkb/perfkitbenchmarker/scripts/execute_command.py
new file mode 100755
index 0000000..2e622b9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/scripts/execute_command.py
@@ -0,0 +1,124 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+
+"""Runs a command, saving stdout, stderr, and the return code in files.
+
+Simplifies executing long-running commands on a remote host.
+The status file (as specified by --status) is exclusively locked until the
+child process running the user-specified command exits.
+This command will fail if the status file cannot be successfully locked.
+
+To await completion, "wait_for_command.py" acquires a shared lock on the
+status file, which blocks until the process completes.
+
+*Runs on the guest VM. Supports Python 3.x.*
+"""
+
+import fcntl
+import logging
+import optparse
+import subprocess
+import sys
+import threading
+
+# By default, set a timeout of 100 years to mimic no timeout.
+_DEFAULT_NEAR_ETERNAL_TIMEOUT = 60 * 60 * 24 * 365 * 100
+
+
+def main():
+  parser = optparse.OptionParser()
+  parser.add_option('-o', '--stdout', dest='stdout', metavar='FILE',
+                    help="""Write stdout to FILE. Required.""")
+  parser.add_option('-e', '--stderr', dest='stderr', metavar='FILE',
+                    help="""Write stderr to FILE. Required.""")
+  parser.add_option('-p', '--pid', dest='pid', help="""Write PID to FILE.""",
+                    metavar='FILE')
+  parser.add_option('-s', '--status', dest='status', help="""Write process exit
+                    status to FILE. An exclusive lock will be placed on FILE
+                    until this process exits. Required.""", metavar='FILE')
+  parser.add_option('-c', '--command', dest='command', help="""Shell command to
+                    execute. Required.""")
+  parser.add_option(
+      '-x',
+      '--exclusive',
+      dest='exclusive',
+      help='Make FILE exist to indicate the exclusive lock on status has been '
+      'placed. Required.',
+      metavar='FILE')
+  parser.add_option('-t', '--timeout', dest='timeout',
+                    default=_DEFAULT_NEAR_ETERNAL_TIMEOUT,
+                    type='int', help="""Timeout in seconds before killing
+                    the command.""")
+  options, args = parser.parse_args()
+  if args:
+    sys.stderr.write('Unexpected arguments: {0}\n'.format(args))
+    return 1
+
+  missing = []
+  for option in ('stdout', 'stderr', 'status', 'command', 'exclusive'):
+    if getattr(options, option) is None:
+      missing.append(option)
+
+  if missing:
+    parser.print_usage()
+    msg = 'Missing required flag(s): {0}\n'.format(
+        ', '.join('--' + i for i in missing))
+    sys.stderr.write(msg)
+    return 1
+
+  with open(options.status, 'w+') as status:
+    with open(options.stdout, 'w') as stdout:
+      with open(options.stderr, 'w') as stderr:
+        logging.info('Acquiring lock on %s', options.status)
+        # Non-blocking exclusive lock acquisition; will raise an IOError if
+        # acquisition fails, which is desirable here.
+        fcntl.lockf(status, fcntl.LOCK_EX | fcntl.LOCK_NB)
+
+        p = subprocess.Popen(options.command, stdout=stdout, stderr=stderr,
+                             shell=True)
+        logging.info('Started pid %d: %s', p.pid, options.command)
+
+        # Create empty file to inform consumers of status that we've taken an
+        # exclusive lock on it.
+        with open(options.exclusive, 'w'):
+          pass
+
+        if options.pid:
+          with open(options.pid, 'w') as pid:
+            pid.write(str(p.pid))
+
+        def _KillProcess():
+          logging.error('ExecuteCommand timed out after %d seconds. Killing '
+                        'command.', options.timeout)
+          p.kill()
+
+        timer = threading.Timer(options.timeout, _KillProcess)
+        timer.start()
+        logging.info('Waiting on PID %s', p.pid)
+        try:
+          return_code = p.wait()
+        finally:
+          timer.cancel()
+        logging.info('Return code: %s', return_code)
+        status.truncate()
+        status.write(str(return_code))
+
+        # File lock will be released when the status file is closed.
+        return return_code
+
+if __name__ == '__main__':
+  logging.basicConfig(level=logging.INFO)
+  sys.exit(main())
diff --git a/script/cumulus/pkb/perfkitbenchmarker/scripts/generator/run.py b/script/cumulus/pkb/perfkitbenchmarker/scripts/generator/run.py
new file mode 100755
index 0000000..fb17e20
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/scripts/generator/run.py
@@ -0,0 +1,464 @@
+#!/usr/bin/env python3
+
+import jinja2
+import json5 as json
+import re
+import ast
+import argparse
+import os
+import posixpath
+import glob
+import shutil
+import sys
+import traceback
+from jsonschema import Draft7Validator
+
+
+PERFKIT_ROOT = os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                            os.path.pardir, os.path.pardir)
+
+
+def parse_flags(cfg, benchmark_variables, flag_option):
+  flag_has_cmd = False
+  for i, old_dict in enumerate(cfg[flag_option]):
+    new_dict = update_cfg_dict(old_dict, cfg["metadata"]["generated_name"])
+    if new_dict["default_cmd"]:
+      flag_has_cmd = True
+    cfg[flag_option][i] = new_dict
+    benchmark_variables[old_dict["name"]] = "FLAGS.{0}".format(new_dict["full_name"])
+  return flag_has_cmd
+
+
+def parse_template(config, workload_force_overwrite):
+  cfg = config["workload1"]
+  excluded_keys = ["config_flags", "tunable_flags", "metadata"]
+  # generate unique name
+  cfg["metadata"] = config["workload1"]["metadata"]
+  generated_name = cfg["metadata"]["name"]
+  replacements = (',', '-', '!', '?')
+  for r in replacements:
+    generated_name = generated_name.replace(r, ' ')
+  generated_name = "_".join(generated_name.lower().split())
+  check_if_unique_benchmark_name(generated_name, workload_force_overwrite)
+  cfg["metadata"]["generated_name"] = generated_name
+  benchmark_variables = {}
+  config_flags_has_cmd = False
+  if "config_flags" in cfg:
+    config_flags_has_cmd = parse_flags(cfg, benchmark_variables, "config_flags")
+  tunable_flags_has_cmd = False
+  if "tunable_flags" in cfg:
+    tunable_flags_has_cmd = parse_flags(cfg, benchmark_variables, "tunable_flags")
+  if "output_dir" in cfg:
+    for key in cfg["output_dir"]:
+      benchmark_variables[key] = '"{}"'.format(cfg["output_dir"][key])
+    benchmark_variables["output_dir"] = cfg["output_dir"]
+  replace_variables(config, benchmark_variables, excluded_keys)
+  cfg["config_flags_has_cmd"] = config_flags_has_cmd
+  cfg["tunable_flags_has_cmd"] = tunable_flags_has_cmd
+  return cfg
+
+
+def replace_variables(obj, benchmark_variables, excluded_keys=[]):
+  if isinstance(obj, dict):
+    for k, v in obj.items():
+      if k not in excluded_keys:
+        obj[k] = replace_variables(v, benchmark_variables, excluded_keys)
+    return obj
+  elif isinstance(obj, list):
+    for elem in range(len(obj)):
+      obj[elem] = replace_variables(obj[elem], benchmark_variables, excluded_keys)
+    return obj
+  else:
+    return parse_field(obj, benchmark_variables)
+
+
+def parse_field(field, benchmark_variables):
+  new_cmd = '"{0}"'.format(field)
+  cmd_variables = []
+  vars_used = set()
+  for match in re.findall(r"\$([A-Za-z0-9_]+)", new_cmd):
+    if match in benchmark_variables:
+      # escape existing '{' and '}' in order to not create issues with our variables when formatting
+      if not len(vars_used):
+        new_cmd = new_cmd.replace("{", "{{").replace("}", "}}")
+      new_cmd = new_cmd.replace("${0}".format(match), '{' + str(len(vars_used)) + '}')
+      replace_value = benchmark_variables[match]
+      if replace_value not in vars_used:
+        cmd_variables.append(replace_value)
+        vars_used.add(replace_value)
+  if "./" in new_cmd:
+    new_cmd = '"cd {0} && ".format(BENCHMARK_DIR) + ' + new_cmd
+  if cmd_variables:
+    format_str = ".format({0})".format(", ".join(cmd_variables))
+    new_cmd += format_str
+  if new_cmd == '""':
+    new_cmd = ""
+  return new_cmd
+
+
+def update_cfg_dict(cfg, benchmark_name):
+  new_cfg = cfg.copy()
+  if "range" in cfg:
+    range_val = new_cfg["range"]
+    range_type = "string"
+  new_cfg["full_name"] = "{0}_{1}".format(benchmark_name, new_cfg["name"])
+  if "default" in cfg:
+    default_field = new_cfg["default"]
+    default_val, default_type, default_cmd = parse_default_field(default_field, new_cfg["type"])
+  if "range" in cfg and "default" in cfg:
+    if range_val.count("-") == 1:
+      splitted = range_val.split('-')
+      for i in splitted:
+        if not is_int(i):
+          break
+      else:
+        new_cfg["min"] = int(splitted[0])
+        new_cfg["max"] = int(splitted[1])
+        if default_val != "" and not default_cmd:
+          if not new_cfg["min"] <= default_val <= new_cfg["max"]:
+            raise Exception("Default value of '{0}' is not in range".format(new_cfg["name"]))
+        range_type = "range"
+    elif re.search(r"\((.+)(,\s*.+)*\)|\(\)", range_val):
+      if default_val != "":
+        range_list = ast.literal_eval(range_val)
+        check_val = default_val.strip("'") if default_type == "string" else default_val
+        if check_val not in range_list:
+          raise Exception("Default value of '{0}' is not in range".format(new_cfg["name"]))
+      range_type = "enum"
+    elif re.search(r"\[(.+)(,\s*.+)*\]|\[\]", range_val):
+      if default_val != "":
+        range_list = ast.literal_eval(range_val)
+        check_val = default_val.strip("'") if default_type == "string" else default_val
+        if check_val not in range_list:
+          raise Exception("Default value of '{0}' is not in range".format(new_cfg["name"]))
+        default_val = "[{0}]".format(default_val)
+      range_type = "list"
+    elif not range_val:
+      range_type = default_type
+  if "range" in cfg:
+    new_cfg["range"] = range_val
+    new_cfg["range_type"] = range_type
+  if "default" in cfg:
+    if default_val == "" or default_cmd:
+      new_cfg["default"] = None
+    else:
+      new_cfg["default"] = default_val
+    new_cfg["default_cmd"] = default_cmd
+  new_cfg["type"] = default_type
+  # Parse this manual because we skipped the 'config' key from the replace_variables function
+  for key in ("default_cmd", "setcmd"):
+    if key in new_cfg:
+      cmd = new_cfg[key]
+      if cmd:
+        if "./" in cmd:
+          new_cfg[key] = '"cd {0} && ".format(BENCHMARK_DIR) + ' + '"{0}"'.format(cmd.strip("'"))
+  return new_cfg
+
+
+def parse_default_field(field, type):
+  field_default_cmd = None
+  field_raw_val = field
+  if type.startswith("OS") or type.startswith("custom"):
+    field_raw_type_parsed = type.split("-")
+    field_val, field_type = parse_default_field_simple(field_raw_val, field_raw_type_parsed[1])
+    field_default_cmd = field_raw_val
+  else:
+    field_val, field_type = parse_default_field_simple(field_raw_val, type)
+  return field_val, field_type, field_default_cmd
+
+
+def parse_default_field_simple(val, type):
+  new_type = "string"
+  new_val = val
+  if type == "num":
+    if is_int(val):
+      new_val = int(val)
+      new_type = "integer"
+    elif is_float(val):
+      new_val = float(val)
+      new_type = "float"
+    else:
+      new_type = "integer"
+  elif type == "hex":
+    if is_hex(val):
+      new_val = int(val, 16)
+    new_type = "integer"
+  return new_val, new_type
+
+
+def is_int(x):
+  try:
+    int(x)
+  except:
+    return False
+  return True
+
+
+def is_float(x):
+  try:
+    float(x)
+  except:
+    return False
+  return True
+
+
+def is_hex(x):
+  status = True
+  if x.lower().startswith('0x'):
+    try:
+      int(x, 16)
+    except:
+      status = False
+  else:
+    status = False
+  return status
+
+
+def render_template(template, cfg):
+  with open(template) as f:
+    template = jinja2.Template(f.read())
+  return template.render(cfg)
+
+
+def write_generated_template(template, file):
+  with open(file, "w") as f:
+    f.write(template)
+
+
+def valid_file(path):
+  if os.path.isfile(path):
+    return path
+  raise argparse.ArgumentTypeError("The file path: '{}' does not exist".format(path))
+
+
+def valid_dir(path):
+  if os.path.isdir(path):
+    return path
+  raise argparse.ArgumentTypeError("The directory: '{}' does not exist".format(path))
+
+
+def valid_json_file(path):
+  if valid_file(path) and (path.endswith('.json') or path.endswith('.json5')):
+    return path
+  raise argparse.ArgumentTypeError("The file path: '{}' is not a valid json file".format(path))
+
+
+def load_json(filename):
+  with open(filename) as f:
+    data = json.load(f)
+  return data
+
+
+def generate_benchmark_files(benchmark_name, rendered, data_dir):
+  benchmark_full_name = "{0}_benchmark".format(benchmark_name)
+  benchmark_script_name = "{0}.py".format(benchmark_full_name)
+  benchmarks_dir = posixpath.join(PERFKIT_ROOT, "linux_benchmarks", "intel_community")
+  write_generated_template(rendered, posixpath.join(benchmarks_dir, benchmark_script_name))
+  benchmark_data_dir = posixpath.join(PERFKIT_ROOT, "data", "intel_community", benchmark_full_name)
+  shutil.rmtree(benchmark_data_dir, ignore_errors=True)
+  shutil.copytree(data_dir, benchmark_data_dir)
+
+
+def generate_package_file(benchmark_name, rendered, group_name):
+  pkg_script_name = "{0}_{1}_deps.py".format(benchmark_name, group_name)
+  pkg_dir = posixpath.join(PERFKIT_ROOT, "linux_packages", "intel_community")
+  write_generated_template(rendered, posixpath.join(pkg_dir, pkg_script_name))
+
+
+def workload_existence_check(benchmark_name):
+  def find_files_in_path(path, pattern):
+    found_original = False
+    data_existing_names = [os.path.basename(p)
+                           for p in glob.glob(os.path.join(path, pattern))]
+    if data_existing_names:
+      found_original = True
+    return found_original
+
+  search_results = []
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "data"),
+                                           "{0}*_benchmark".format(benchmark_name)))
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "linux_packages"),
+                                           "{0}*_deps.py".format(benchmark_name)))
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "linux_benchmarks"),
+                                           "{0}*_benchmark.py".format(benchmark_name)))
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "data", "intel_community"),
+                                           "{0}*_benchmark".format(benchmark_name)))
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "linux_packages", "intel_community"),
+                                           "{0}*_deps.py".format(benchmark_name)))
+  search_results.append(find_files_in_path(posixpath.join(PERFKIT_ROOT, "linux_benchmarks", "intel_community"),
+                                           "{0}*_benchmark.py".format(benchmark_name)))
+
+  for element in search_results:
+    if element:
+      return True
+  return False
+
+
+def generate_readme_file(benchmark_name, rendered):
+  benchmark_full_name = "{0}_benchmark".format(benchmark_name)
+  readme_file = posixpath.join(PERFKIT_ROOT, "data", "intel_community", benchmark_full_name, "README.md")
+  write_generated_template(rendered, readme_file)
+
+
+def check_if_unique_benchmark_name(benchmark_name, workload_force_overwrite):
+  elementExists = workload_existence_check(benchmark_name)
+  if elementExists and not workload_force_overwrite:
+      raise Exception("Benchmark {0} already exists. Please use '-o True' "
+                      "if you wish to overwrite".format(benchmark_name))
+  else:
+    print("Benchmark '{0}' will be generated".format(benchmark_name))
+
+
+def generate_pkg_files(cfg, package_template_path, benchmark_name):
+  for vm_group in cfg["vm_groups"]:
+    local_cfg = vm_group.copy()
+    local_cfg["metadata"] = cfg["metadata"]
+    local_cfg["name"] = local_cfg["name"].strip('"')
+    pkg_cfg = generate_pkg_config(local_cfg, benchmark_name)
+    generated_pkg_template = render_template(package_template_path, pkg_cfg)
+    generate_package_file(benchmark_name, generated_pkg_template, local_cfg["name"])
+
+
+def generate_pkg_config(config, benchmark_name):
+  cfg = config.copy()
+  flag_default_name = "{0}_deps".format(benchmark_name)
+  pkg_type = "OS"
+  for dep in cfg["deps"]:
+    lower_name = dep["os_type"].lower()
+    if "ubuntu" in lower_name:
+      dep["generated_os_name"] = "ubuntu"
+      pkg_type = "ubuntu"
+    elif "centos" in lower_name:
+      dep["generated_os_name"] = "centos"
+      pkg_type = "centos"
+    if "packages_info" in dep:
+      for pkg in dep["packages_info"]:
+        pkg_name = pkg["name"].lower().strip('"')
+        pkg["flag_name"] = "{0}_{1}_{2}_{3}_ver".format(flag_default_name, pkg_type,
+                                                        pkg_name.replace("-", "_"),
+                                                        cfg["name"])
+        pkg["flag_description"] = "Version of {0} package".format(pkg_name)
+        if "ver" in pkg and pkg["ver"]:
+          pkg["flag_default"] = pkg["ver"]
+        else:
+          pkg["flag_default"] = None
+  return cfg
+
+
+def generate_read_config(readme_template, config):
+  packageflags = {}
+  benchmarkflags = {}
+  benchmark_cfg = config.copy()
+  if "config" in benchmark_cfg:
+    for conf in benchmark_cfg["config"]:
+      if conf["type"] == "OS-num":
+        if "description" in conf:
+          benchmarkflags["{0} = user defined".format(conf["name"])] = conf["description"]
+      else:
+        if "default" in conf and "description" in conf:
+          benchmarkflags["{0} = <{1}>".format(conf["name"], conf["default"])] = conf["description"]
+  local_conf = config.copy()
+  for vm_group in local_conf["vm_groups"]:
+    for dep in vm_group["deps"]:
+      if "packages_info" in dep:
+        pkgs = dep["packages_info"]
+        for pkg in pkgs:
+          pkg_name = pkg["name"].lower().strip('"')
+          if "ver" in pkg and pkg["ver"]:
+            pkg_default = pkg["ver"]
+          else:
+            pkg_default = None
+          pkg_description = "The version of {0} package on corresponding OS".format(pkg_name)
+          packageflags["{0} = <{1}>".format(pkg_name, pkg_default)] = pkg_description
+  local_conf.update(benchmark_cfg)
+  template_readme = render_template(readme_template, local_conf)
+  return template_readme
+
+
+def validate_json(json):
+  def GenerateErrorMessage(error):
+    err = error.path[0]
+    for i in range(1, len(error.path)):
+      if isinstance(error.path[i], str):
+        err += ".{0}".format(error.path[i])
+      elif isinstance(error.path[i], int):
+        err += "[{0}]".format(error.path[i])
+    return err
+  schema = load_json("schema.json")
+  v = Draft7Validator(schema)
+  errors = sorted(v.iter_errors(json), key=lambda e: e.path)
+  if errors:
+    for error in errors:
+      if error.instance == '':
+        print("[ERROR] Missing value: field {0} cannot be empty.".format(GenerateErrorMessage(error)))
+      elif "required" in error.message:
+        print("[ERROR] Missing key: {0}. JSON path: {1}".format(error.message, GenerateErrorMessage(error)))
+      else:
+        print("[ERROR] Problem around the line containing the value '{0}'. JSON path: {1}".format(error.instance, GenerateErrorMessage(error)))
+    return False
+  return True
+
+
+def str2bool(v):
+  if v.lower() in ('yes', 'true', 't', 'y', '1'):
+    return True
+  elif v.lower() in ('no', 'false', 'f', 'n', '0'):
+    return False
+  else:
+    raise Exception('Boolean value expected. Got ' + str(v))
+
+
+if __name__ == "__main__":
+  parser = argparse.ArgumentParser()
+  templates_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), "templates")
+  benchmark_template_default_path = os.path.join(templates_dir, "template_benchmark.j2")
+  package_template_path = os.path.join(templates_dir, "template_package.j2")
+  readme_template_path = os.path.join(templates_dir, "template_readme.j2")
+  parser.add_argument("-b", "--benchmark_template", type=valid_file,
+                      help="jinja template file",
+                      default=benchmark_template_default_path)
+  parser.add_argument("-w", "--workload_template", type=valid_json_file,
+                      help="workload template file in json format",
+                      default=os.path.join("workloads", "mpich_compile_multi_node",
+                                           "mpich_multi_node_template.json"))
+  parser.add_argument("-v", "--validate",
+                      help="Validate the json with the schema",
+                      type=str2bool,
+                      default=True)
+  parser.add_argument("-o", "--workload_force_overwrite",
+                      help="Overwrite existing workload",
+                      type=str2bool,
+                      default=False)
+  args = parser.parse_args()
+  try:
+    data_dir = os.path.dirname(args.workload_template)
+    config = load_json(args.workload_template)
+    if not args.validate:
+      print("[WARNING] Validation was disabled. Benchmark files may contain errors")
+    if not args.validate or (args.validate and validate_json(config)):
+      cfg = parse_template(config, args.workload_force_overwrite)
+      cfg["metadata"]["data_dir"] = "{0}_benchmark".format(cfg["metadata"]["generated_name"])
+      benchmark_name = cfg["metadata"]["generated_name"]
+      generate_pkg_files(cfg, package_template_path, benchmark_name)
+      generated_bench_template = render_template(args.benchmark_template, cfg)
+      generate_benchmark_files(benchmark_name, generated_bench_template, data_dir)
+      generated_readme_template = generate_read_config(readme_template_path, cfg)
+      generate_readme_file(benchmark_name, generated_readme_template)
+    else:
+      print("\nFailed to generate the benchmark files for {0} - Errors were encountered.".format(args.workload_template))
+      exit(1)
+  except Exception as e:
+    print("[ERROR] Other exception occured. Please check the documentation for the proper JSON template format: https://wiki.ith.intel.com/display/cloudperf/Workload+Integration+Template")
+    print("[ERROR] Exception details for debugging:")
+    ex_type, ex_value, ex_traceback = sys.exc_info()
+    trace_back = traceback.extract_tb(ex_traceback)
+    stack_trace = []
+    for trace in trace_back:
+      stack_trace.append("File: {0}, Line: {1}, Message: {2}".format(trace[0], trace[1], trace[3]))
+    print("            Exception type : {0} ".format(ex_type.__name__))
+    print("            Exception message : {0}".format(ex_value))
+    print("            Stack trace:")
+    for line in stack_trace:
+      print("                {0}".format(line))
+    print("\nFailed to generate the benchmark files for {0} - Errors were encountered.".format(args.workload_template))
+    exit(1)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/scripts/wait_for_command.py b/script/cumulus/pkb/perfkitbenchmarker/scripts/wait_for_command.py
new file mode 100755
index 0000000..64ab60a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/scripts/wait_for_command.py
@@ -0,0 +1,168 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -*- coding: utf-8 -*-
+
+"""Waits for a command started by execute_command.py to complete.
+
+Blocks until a command wrapped by "execute_command.py" completes, then mimics
+the wrapped command, copying the wrapped command's stdout/stderr to this
+process' stdout/stderr, and exiting with the wrapped command's status.
+
+If passed the path to the status file, but not the stdout and stderr files,
+this script will block until command completion, then print
+'Command finished.' before returning with status 0.
+
+*Runs on the guest VM. Supports Python 3.x.*
+"""
+
+import errno
+import fcntl
+import optparse
+import os
+import shutil
+import signal
+import sys
+import threading
+import time
+
+WAIT_TIMEOUT_IN_SEC = 120.0
+WAIT_SLEEP_IN_SEC = 5.0
+RETRYABLE_SSH_RETCODE = 255
+
+
+def signal_handler(signum, frame):
+  # Pre python3.5 the interruption of a system call would automatically raise
+  # an InterruptedError exception, but since PEP 475 was implemented for fcntl
+  # interruptions are automatically retried; this implementation depends on
+  # interrupting the attempt to acquire a lock on the status file, so we can
+  # ensure this in all python3 versions by raising it explicitly in the signal
+  # handler.
+  raise InterruptedError()
+
+
+def main():
+  p = optparse.OptionParser()
+  p.add_option('-o', '--stdout', dest='stdout',
+               help="""Read stdout from FILE.""", metavar='FILE')
+  p.add_option('-e', '--stderr', dest='stderr',
+               help="""Read stderr from FILE.""", metavar='FILE')
+  p.add_option('-s', '--status', dest='status', metavar='FILE',
+               help='Get process exit status from FILE. '
+               'Will block until a shared lock is acquired on FILE.')
+  p.add_option('-d', '--delete', dest='delete', action='store_true',
+               help='Delete stdout, stderr, and status files when finished.')
+  p.add_option(
+      '-x',
+      '--exclusive',
+      dest='exclusive',
+      help='Will block until FILE exists to ensure that status is ready to be '
+      'read. Required.',
+      metavar='FILE')
+  options, args = p.parse_args()
+  if args:
+    sys.stderr.write('Unexpected arguments: {0}\n'.format(args))
+    return 1
+
+  missing = []
+  for option in ('status', 'exclusive'):
+    if getattr(options, option) is None:
+      missing.append(option)
+
+  if missing:
+    p.print_usage()
+    msg = 'Missing required flag(s): {0}\n'.format(
+        ', '.join('--' + i for i in missing))
+    sys.stderr.write(msg)
+    return 1
+
+  start = time.time()
+  return_code_str = None
+  while time.time() < WAIT_TIMEOUT_IN_SEC + start:
+    try:
+      with open(options.exclusive, 'r'):
+        with open(options.status, 'r'):
+          break
+    except IOError as e:
+      print('WARNING: file doesn\'t exist, retrying: %s' % e, file=sys.stderr)
+      time.sleep(WAIT_SLEEP_IN_SEC)
+
+  # Set a signal handler to raise an InterruptedError on SIGALRM (this is no
+  # longer done automatically after PEP 475).
+  signal.signal(signal.SIGALRM, signal_handler)
+  # Send a SIGALRM signal after WAIT_TIMEOUT_IN_SEC seconds
+  signal.alarm(int(WAIT_TIMEOUT_IN_SEC))
+  with open(options.status, 'r') as status:
+    try:
+      # If we can acquire the lock on status, the command we're waiting on is
+      # done; if we can't acquire it for the next WAIT_TIMEOUT_IN_SEC seconds
+      # this attempt will be interrupted and we'll catch an InterruptedError.
+      fcntl.lockf(status, fcntl.LOCK_SH)
+    except InterruptedError:
+      print('Wait timed out. This will be retried with a subsequent wait.')
+      return 0
+    # OSError and IOError have similar interfaces, and later versions of fcntl
+    # will raise OSError where earlier versions raised IOError--we catch both
+    # here for compatibility.
+    except (OSError, IOError) as e:
+      if e.errno == errno.ECONNREFUSED:
+        print('Connection refused during wait. '
+              'This will be retried with a subsequent wait.')
+        return 0
+      elif e.errno in (errno.EAGAIN, errno.EACCES):
+        print('Status currently being modified and cannot be read right now. '
+              'This will be retried with a subsequent wait.')
+        return 0
+      raise e
+    signal.alarm(0)
+    return_code_str = status.read()
+
+  if not (options.stdout and options.stderr):
+    print('Command finished.')
+    return 0
+
+  # Some commands write out non UTF-8 control characters. Replace them with '?'
+  # to make Python 3.6+ happy.
+  with open(options.stdout, 'r', errors='backslashreplace') as stdout:
+    with open(options.stderr, 'r', errors='backslashreplace') as stderr:
+      if return_code_str:
+        return_code = int(return_code_str)
+      else:
+        print('WARNING: wrapper script interrupted.', file=sys.stderr)
+        return_code = 1
+
+      # RemoteCommand retries 255 as temporary SSH failure. In this case,
+      # long running command actually returned 255 and should not be retried.
+      if return_code == RETRYABLE_SSH_RETCODE:
+        print('WARNING: command returned 255.', file=sys.stderr)
+        return_code = 1
+
+      stderr_copier = threading.Thread(target=shutil.copyfileobj,
+                                       args=[stderr, sys.stderr],
+                                       name='stderr-copier')
+      stderr_copier.daemon = True
+      stderr_copier.start()
+      try:
+        shutil.copyfileobj(stdout, sys.stdout)
+      finally:
+        stderr_copier.join()
+
+  if options.delete:
+    for f in [options.stdout, options.stderr, options.status]:
+      os.unlink(f)
+
+  return return_code
+
+if __name__ == '__main__':
+  sys.exit(main())
diff --git a/script/cumulus/pkb/perfkitbenchmarker/smb_service.py b/script/cumulus/pkb/perfkitbenchmarker/smb_service.py
new file mode 100644
index 0000000..1749ac6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/smb_service.py
@@ -0,0 +1,113 @@
+# Copyright 2018 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Resource encapsulating provisioned cloud SMB services.
+
+Defines a resource for use in other benchmarks such as SpecSFS2014 and FIO.
+
+Example --benchmark_config_file:
+
+smb_10_tb: &smb_10_tb
+  Azure:
+    disk_type: smb
+    mount_point: /scratch
+
+specsfs:
+  name: specsfs2014
+  flags:
+    specsfs2014_num_runs: 1
+    specsfs2014_load: 1
+  vm_groups:
+    clients:
+      disk_spec: *smb_10_tb
+      vm_count: 1
+      os_type: rhel
+    gluster_servers:
+      vm_count: 0
+"""
+
+import abc
+import logging
+import re
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+
+flags.DEFINE_string('smb_tier', 'Standard', 'SMB Mode')
+FLAGS = flags.FLAGS
+
+_MOUNT_SMB_RE = re.compile(r'.*type smb \((.*?)\)', re.MULTILINE)
+
+
+def GetSmbServiceClass(cloud):
+  """Get the SMB service corresponding to the cloud.
+
+  Args:
+    cloud: The name of the cloud to supply the SMB service.
+
+  Returns:
+    The SMB service class for this cloud.
+
+  Raises:
+    NotImplementedError: No service found for this cloud.
+  """
+  return resource.GetResourceClass(BaseSmbService, CLOUD=cloud)
+
+
+class BaseSmbService(resource.BaseResource):
+  """Object representing an SMB Service."""
+
+  RESOURCE_TYPE = 'BaseSmbService'
+
+  # subclasses must override these defaults
+  CLOUD = None
+  DEFAULT_TIER = None
+  SMB_TIERS = None
+  DEFAULT_SMB_VERSION = None
+
+  def __init__(self, disk_spec, zone):
+    super(BaseSmbService, self).__init__()
+    self.disk_spec = disk_spec
+    self.zone = zone
+    self.smb_tier = FLAGS.smb_tier or self.DEFAULT_TIER
+    if self.smb_tier and self.SMB_TIERS and self.smb_tier not in self.SMB_TIERS:
+      # SMB service does not have to have a list of smb_tiers nor does it have
+      # to be implemented by a provider
+      raise errors.Config.InvalidValue(
+          ('smb_tier "%s" not in acceptable list "%s" '
+           'for cloud %s') % (self.smb_tier, self.SMB_TIERS, self.CLOUD))
+    logging.debug('%s SMB service with smb_tier %s zone %s default version %s',
+                  self.CLOUD, self.smb_tier, self.zone,
+                  self.DEFAULT_SMB_VERSION)
+
+  def CreateSmbDisk(self):
+    return disk.SmbDisk(self.disk_spec, self.GetRemoteAddress(),
+                        self.GetStorageAccountAndKey(),
+                        self.DEFAULT_SMB_VERSION, self.smb_tier)
+
+  @abc.abstractmethod
+  def _IsReady(self):
+    """Boolean function to determine if disk is SMB mountable."""
+    pass
+
+  @abc.abstractmethod
+  def GetRemoteAddress(self):
+    """The SMB server's address."""
+    pass
+
+  @abc.abstractmethod
+  def GetStorageAccountAndKey(self):
+    """The SMB server's storage account's name and key."""
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/spark_service.py b/script/cumulus/pkb/perfkitbenchmarker/spark_service.py
new file mode 100644
index 0000000..6183a0c
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/spark_service.py
@@ -0,0 +1,258 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Benchmarking support for Apache Spark services.
+
+In order to benchmark Apache Spark services such as Google Cloud
+Platform's Dataproc or Amazon's EMR, we create a BaseSparkService
+class.  Classes to wrap each provider's Apache Spark Service are
+in the provider directory as a subclass of BaseSparkService.
+
+Also in this module is a PkbSparkService, which builds a Spark
+cluster by creating VMs and installing the necessary software.
+
+For more on Apache Spark: http://spark.apache.org/
+"""
+
+import abc
+import copy
+import datetime
+import posixpath
+
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.linux_packages import hadoop
+
+flags.DEFINE_string('spark_static_cluster_id', None,
+                    'If set, the name of the Spark cluster, assumed to be '
+                    'ready.')
+flags.DEFINE_enum('spark_service_log_level', 'INFO', ['DEBUG', 'INFO', 'FATAL'],
+                  'Supported log levels when submitting jobs to spark service'
+                  ' clusters.')
+
+
+# Cloud to use for pkb-created Spark service.
+PKB_MANAGED = 'pkb_managed'
+PROVIDER_MANAGED = 'managed'
+
+SUCCESS = 'success'
+RUNTIME = 'running_time'
+WAITING = 'pending_time'
+
+SPARK_JOB_TYPE = 'spark'
+PYSPARK_JOB_TYPE = 'pyspark'
+HADOOP_JOB_TYPE = 'hadoop'
+
+SPARK_VM_GROUPS = ('master_group', 'worker_group')
+
+# This is used for error messages.
+
+FLAGS = flags.FLAGS
+
+
+def GetSparkServiceClass(cloud, service_type):
+  """Get the Spark class corresponding to 'cloud'."""
+  if service_type == PKB_MANAGED:
+    return PkbSparkService
+  return resource.GetResourceClass(BaseSparkService, CLOUD=cloud)
+
+
+class BaseSparkService(resource.BaseResource):
+  """Object representing a Spark Service."""
+
+  RESOURCE_TYPE = 'BaseSparkService'
+
+  # Set by derived classes
+  CLOUD = None
+  SERVICE_NAME = None
+
+  SPARK_SAMPLE_LOCATION = ('file:///usr/lib/spark/examples/jars/'
+                           'spark-examples.jar')
+
+  HADOOP_SAMPLE_LOCATION = ('file:///usr/lib/hadoop-mapreduce/'
+                            'hadoop-mapreduce-examples.jar')
+
+  def __init__(self, spark_service_spec):
+    """Initialize the Apache Spark Service object.
+
+    Args:
+      spark_service_spec: spec of the spark service.
+    """
+    is_user_managed = spark_service_spec.static_cluster_id is not None
+    super(BaseSparkService, self).__init__(user_managed=is_user_managed)
+    self.spec = spark_service_spec
+    # If only the worker group is specified, assume the master group is
+    # configured the same way.
+    if spark_service_spec.master_group is None:
+      self.spec.master_group = copy.copy(self.spec.worker_group)
+      self.spec.master_group.vm_count = 1
+    self.cluster_id = spark_service_spec.static_cluster_id
+    assert (spark_service_spec.master_group.vm_spec.zone ==
+            spark_service_spec.worker_group.vm_spec.zone)
+    self.zone = spark_service_spec.master_group.vm_spec.zone
+
+  @abc.abstractmethod
+  def SubmitJob(self, job_jar, class_name,
+                job_script=None,
+                job_poll_interval=None,
+                job_stdout_file=None, job_arguments=None,
+                job_type=SPARK_JOB_TYPE):
+    """Submit a job to the spark service.
+
+    Submits a job and waits for it to complete.
+
+    Args:
+      job_jar: Jar file to execute.
+      class_name: Name of the main class.
+      job_script: PySpark script to run. job_jar and class_name must be None.
+      job_poll_interval: integer saying how often to poll for job
+        completion.  Not used by providers for which submit job is a
+        synchronous operation.
+      job_stdout_file: String giving the location of the file in
+        which to put the standard out of the job.
+      job_arguments: Arguments to pass to class_name.  These are
+        not the arguments passed to the wrapper that submits the
+        job.
+      job_type: The type of the job.
+
+    Returns:
+      dictionary, where success is true if the job succeeded,
+      false otherwise.  The dictionary may also contain an entry for
+      running_time and pending_time if the platform reports those
+      metrics.
+    """
+    pass
+
+  @abc.abstractmethod
+  def ExecuteOnMaster(self, script_path, script_args):
+    """Execute a script on the master node.
+
+    Args:
+      script_path: local path of the script to execute.
+      script_args: arguments to pass to the script.
+    """
+    pass
+
+  @abc.abstractmethod
+  def CopyFromMaster(self, remote_path, local_path):
+    """Copy a file from the master node.
+
+    Args:
+      remote_path: path of the file to copy.
+      local_path: destination to copy to.
+    """
+    pass
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for this cluster."""
+    basic_data = {
+        'spark_service': self.SERVICE_NAME,
+        'spark_svc_cloud': self.CLOUD,
+        'spark_cluster_id': self.cluster_id,
+        'spark_cluster_zone': getattr(self, 'zone', None) or 'unknown'
+    }
+    # TODO grab this information for user_managed clusters.
+    if not self.user_managed:
+      basic_data.update({'num_workers': str(self.spec.worker_group.vm_count),
+                         'worker_machine_type':
+                         str(self.spec.worker_group.vm_spec.machine_type)})
+    return basic_data
+
+  @classmethod
+  def GetExampleJar(cls, job_type):
+    if job_type == SPARK_JOB_TYPE:
+      return cls.SPARK_SAMPLE_LOCATION
+    elif job_type == HADOOP_JOB_TYPE:
+      return cls.HADOOP_SAMPLE_LOCATION
+    else:
+      raise NotImplementedError()
+
+
+class PkbSparkService(BaseSparkService):
+  """A Spark service created from vms.
+
+  This class will create a Spark service by creating VMs and installing
+  the necessary software.  (Similar to how the hbase benchmark currently
+  runs.  It should work across all or almost all providers.
+  """
+
+  CLOUD = PKB_MANAGED
+  SERVICE_NAME = 'pkb-managed'
+
+  def __init__(self, spark_service_spec):
+    super(PkbSparkService, self).__init__(spark_service_spec)
+    assert self.cluster_id is None
+    self.vms = {}
+
+    # set by _Create
+    self.leader = None
+
+  def _Create(self):
+    """Create an Apache Spark cluster."""
+
+    # need to fix this to install spark
+    def InstallHadoop(vm):
+      vm.Install('hadoop')
+
+    if 'worker_group' not in self.vms:
+      raise errors.Resource.CreationError(
+          'PkbSparkService requires worker_group VMs.')
+    vm_util.RunThreaded(InstallHadoop,
+                        self.vms['worker_group'] + self.vms['master_group'])
+    self.leader = self.vms['master_group'][0]
+    hadoop.ConfigureAndStart(self.leader,
+                             self.vms['worker_group'])
+
+  def _Delete(self):
+    pass
+
+  def SubmitJob(self, jar_file, class_name, job_poll_interval=None,
+                job_stdout_file=None, job_arguments=None,
+                job_type=SPARK_JOB_TYPE):
+    """Submit the jar file."""
+    if job_type == SPARK_JOB_TYPE:
+      raise NotImplementedError()
+
+    cmd_list = [posixpath.join(hadoop.HADOOP_BIN, 'hadoop'),
+                'jar', jar_file]
+    if class_name:
+      cmd_list.append(class_name)
+    if job_arguments:
+      cmd_list += job_arguments
+    cmd_string = ' '.join(cmd_list)
+    start_time = datetime.datetime.now()
+    stdout, _ = self.leader.RemoteCommand(cmd_string)
+    end_time = datetime.datetime.now()
+    if job_stdout_file:
+      with open(job_stdout_file, 'w') as f:
+        f.write(stdout)
+    return {SUCCESS: True,
+            RUNTIME: (end_time - start_time).total_seconds()}
+
+  @classmethod
+  def GetExampleJar(cls, job_type):
+    if job_type == HADOOP_JOB_TYPE:
+      return posixpath.join(
+          hadoop.HADOOP_DIR, 'share', 'hadoop', 'mapreduce',
+          'hadoop-mapreduce-examples-{0}.jar'.format(FLAGS.hadoop_version))
+    else:
+      raise NotImplementedError()
+
+  def ExecuteOnMaster(self, script_path, script_args):
+    pass
+
+  def CopyFromMaster(self, remote_path, local_path):
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/sql_engine_utils.py b/script/cumulus/pkb/perfkitbenchmarker/sql_engine_utils.py
new file mode 100644
index 0000000..70a598b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/sql_engine_utils.py
@@ -0,0 +1,413 @@
+# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities to support multiple engines."""
+import abc
+import logging
+import timeit
+from typing import Dict, List, Any, Tuple, Union, Optional
+
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import virtual_machine
+
+SECOND = 'seconds'
+
+MYSQL = 'mysql'
+POSTGRES = 'postgres'
+AURORA_POSTGRES = 'aurora-postgresql'
+AURORA_MYSQL = 'aurora-mysql'
+AURORA_MYSQL56 = 'aurora'
+SQLSERVER = 'sqlserver'
+SQLSERVER_EXPRESS = 'sqlserver-ex'
+SQLSERVER_ENTERPRISE = 'sqlserver-ee'
+SQLSERVER_STANDARD = 'sqlserver-se'
+
+ALL_ENGINES = [
+    MYSQL,
+    POSTGRES,
+    AURORA_POSTGRES,
+    AURORA_MYSQL,
+    AURORA_MYSQL56,
+    SQLSERVER,
+    SQLSERVER_EXPRESS,
+    SQLSERVER_ENTERPRISE,
+    SQLSERVER_STANDARD,
+]
+
+ENGINE_TYPES = [
+    MYSQL,
+    POSTGRES,
+    SQLSERVER
+]
+
+AWS_SQLSERVER_ENGINES = ['sqlserver-ee', 'sqlserver-se',
+                         'sqlserver-ex', 'sqlserver-web']
+
+AWS_AURORA_POSTGRES_ENGINE = 'aurora-postgresql'
+AWS_AURORA_MYSQL_ENGINE = 'aurora-mysql'
+
+DEFAULT_COMMAND = 'default'
+
+
+# Query Related tools
+class DbConnectionProperties():
+  """Data class to store attrubutes needed for connecting to a database."""
+
+  def __init__(self, engine, engine_version, endpoint, port, database_username,
+               database_password):
+    self.engine = engine
+    self.engine_version = engine_version
+    self.endpoint = endpoint
+    self.port = port
+    self.database_username = database_username
+    self.database_password = database_password
+
+
+class ISQLQueryTools(metaclass=abc.ABCMeta):
+  """Interface for SQL related query.
+
+    Attributes:
+    vm: VM to issue command with.
+    connection_properties: Connection properties of the database.
+  """
+  ENGINE_TYPE = None
+
+  def __init__(self, vm: virtual_machine.VirtualMachine,
+               connection_properties: DbConnectionProperties):
+    """Initialize ISQLQuery class."""
+    self.vm = vm
+    self.connection_properties = connection_properties
+
+  def TimeQuery(self,
+                database_name: str,
+                query: str,
+                is_explain: bool = False,
+                suppress_stdout: bool = False) -> Tuple[Any, Any, str]:
+    """Time a query.."""
+    if is_explain:
+      query = self.GetExplainPrefix() + query
+
+    start = timeit.default_timer()
+    stdout_, error_ = self.IssueSqlCommand(
+        query,
+        database_name=database_name,
+        suppress_stdout=suppress_stdout,
+        timeout=60*300)
+    end = timeit.default_timer()
+    run_time = str(end - start)
+    if error_:
+      logging.info('Quries finished with error %s', error_)
+      run_time = '-1'
+
+    return stdout_, error_, run_time
+
+  def SamplesFromQueriesWithExplain(
+      self, database_name: str, queries: Dict[str, str],
+      metadata: Dict[str, Any]) -> List[sample.Sample]:
+    """Helper function to run quries."""
+    results = []
+    for query in queries:
+      execution_plan, _, run_time = self.TimeQuery(
+          database_name,
+          queries[query],
+          is_explain=True)
+
+      logging.info('Execution Plan for Query %s: %s', query, execution_plan)
+      result = sample.Sample('Query %s' % query, run_time, SECOND, metadata)
+      results.append(result)
+
+    return results
+
+  def SamplesFromQueriesAfterRunningExplain(
+      self, database_name: str, queries: Dict[str, str],
+      metadata: Dict[str, Any]) -> List[sample.Sample]:
+    """Run the query with explain and run the qeury once more for timing."""
+    results = []
+    for query in queries:
+      execution_plan, _, _ = self.TimeQuery(
+          database_name, queries[query], is_explain=True)
+
+      logging.info('Execution Plan for Query %s: %s', query, execution_plan)
+
+      _, _, run_time = self.TimeQuery(
+          database_name, queries[query], is_explain=False, suppress_stdout=True)
+
+      result = sample.Sample('Query %s' % query, run_time, SECOND, metadata)
+      results.append(result)
+
+    return results
+
+  def IssueSqlCommand(self,
+                      command: Union[str, Dict[str, str]],
+                      database_name: str = '',
+                      superuser: bool = False,
+                      session_variables: str = '',
+                      timeout: Optional[int] = None,
+                      ignore_failure: bool = False,
+                      suppress_warning: bool = False,
+                      suppress_stdout: bool = False):
+    """Issue Sql Command."""
+    command_string = None
+    # Get the command to issue base on type
+    if isinstance(command, dict):
+      if self.ENGINE_TYPE in command:
+        command_string = command[self.ENGINE_TYPE]
+      else:
+        command_string = command[DEFAULT_COMMAND]
+    elif isinstance(command, str):
+      command_string = command
+
+    command_string = self.MakeSqlCommand(
+        command_string,
+        database_name=database_name,
+        session_variables=session_variables)
+
+    if superuser:
+      command_string = 'sudo ' + command_string
+
+    if suppress_stdout:
+      command_string = command_string + ' >/dev/null 2>&1'
+
+    return self.vm.RemoteCommand(
+        command_string, timeout=timeout, ignore_failure=ignore_failure,
+        suppress_warning=suppress_warning)
+
+  @abc.abstractmethod
+  def InstallPackages(self) -> None:
+    """Installs packages required for making queries."""
+    pass
+
+  @abc.abstractmethod
+  def GetConnectionString(self, **kwargs):
+    """Get connection string."""
+    pass
+
+  @abc.abstractmethod
+  def MakeSqlCommand(self,
+                     command: str,
+                     database_name: str = '',
+                     session_variables: str = ''):
+    """Make a sql command."""
+    pass
+
+  def GetExplainPrefix(self) -> str:
+    """Returns the prefix for explain query."""
+    return 'EXPLIAN '
+
+  def RunSqlScript(self,
+                   file_path: str,
+                   database_name: str = '') -> Tuple[str, str]:
+    """Run a sql command through a file.
+
+    The file could have multiple commands. RunSqlScript runs the sql file
+    from the file_path using the database_name. Failure in one command might
+    cause failure in subsequent commands.
+
+    Args:
+      file_path: The local path from the machine.
+      database_name: Name of the database. Uses the master database
+        or the default database if nothing is supplied.
+
+    Returns:
+      A tuple of standard output and standard error.
+
+    """
+    raise NotImplementedError('Running from a file is not currently supported.')
+
+
+class PostgresCliQueryTools(ISQLQueryTools):
+  """SQL Query class to issue postgres related query."""
+  ENGINE_TYPE = POSTGRES
+
+  # The default database in postgres
+  DEFAULT_DATABASE = POSTGRES
+
+  def InstallPackages(self):
+    """Installs packages required for making queries."""
+    self.vm.Install('postgres_client')
+
+  def MakeSqlCommand(self,
+                     command: str,
+                     database_name: str = '',
+                     session_variables: str = ''):
+    """Make Sql Command."""
+    if not database_name:
+      database_name = self.DEFAULT_DATABASE
+    sql_command = 'psql %s ' % self.GetConnectionString(database_name)
+    if session_variables:
+      for session_variable in session_variables:
+        sql_command += '-c "%s" ' % session_variable
+
+    sql_command += '-c "%s"' % command
+    return sql_command
+
+  def GetConnectionString(self, database_name=''):
+    if not database_name:
+      database_name = self.DEFAULT_DATABASE
+    return '\'host={0} user={1} password={2} dbname={3}\''.format(
+        self.connection_properties.endpoint,
+        self.connection_properties.database_username,
+        self.connection_properties.database_password, database_name)
+
+  def GetSysbenchConnectionString(self):
+    return ('--pgsql-host={0} --pgsql-user={1} --pgsql-password="{2}" '
+            '--pgsql-port=5432').format(
+                self.connection_properties.endpoint,
+                self.connection_properties.database_username,
+                self.connection_properties.database_password)
+
+  def GetExplainPrefix(self) -> str:
+    """Adding hints to increase the verboseness of the explain."""
+    return 'EXPLAIN (ANALYZE, BUFFERS, TIMING, SUMMARY, VERBOSE) '
+
+
+class MysqlCliQueryTools(ISQLQueryTools):
+  """SQL Query class to issue Mysql related query."""
+  ENGINE_TYPE = MYSQL
+
+  def InstallPackages(self):
+    """Installs packages required for making queries."""
+    if (self.connection_properties.engine_version == '5.6' or
+        self.connection_properties.engine_version.startswith('5.6.')):
+      mysql_name = 'mysqlclient56'
+    elif (self.connection_properties.engine_version == '5.7' or
+          self.connection_properties.engine_version.startswith('5.7') or
+          self.connection_properties.engine_version == '8.0' or
+          self.connection_properties.engine_version.startswith('8.0')):
+      mysql_name = 'mysqlclient'
+    else:
+      raise Exception('Invalid database engine version: %s. Only 5.6, 5.7 '
+                      'and 8.0 are supported.' %
+                      self.connection_properties.engine_version)
+    self.vm.Install(mysql_name)
+
+  def MakeSqlCommand(self,
+                     command: str,
+                     database_name: str = '',
+                     session_variables: str = ''):
+    """See base class."""
+    if session_variables:
+      raise NotImplementedError(
+          'Session variables is currently not supported in mysql')
+    mysql_command = 'mysql %s ' % (self.GetConnectionString())
+    if database_name:
+      mysql_command += database_name + ' '
+
+    return mysql_command + '-e "%s"' % command
+
+  def GetConnectionString(self):
+    return '-h {0} -P 3306 -u {1} -p{2}'.format(
+        self.connection_properties.endpoint,
+        self.connection_properties.database_username,
+        self.connection_properties.database_password)
+
+  def GetSysbenchConnectionString(self):
+    return ('--mysql-host={0} --mysql-user={1} --mysql-password="{2}" ').format(
+        self.connection_properties.endpoint,
+        self.connection_properties.database_username,
+        self.connection_properties.database_password)
+
+
+class SqlServerCliQueryTools(ISQLQueryTools):
+  """SQL Query class to issue SQL server related query."""
+  ENGINE_TYPE = SQLSERVER
+
+  def InstallPackages(self):
+    """Installs packages required for making queries."""
+    self.vm.Install('mssql_tools')
+
+  def MakeSqlCommand(self,
+                     command: str,
+                     database_name: str = '',
+                     session_variables: str = ''):
+    """See base class."""
+    if session_variables:
+      raise NotImplementedError(
+          'Session variables is currently not supported in mysql')
+    sqlserver_command = '/opt/mssql-tools/bin/sqlcmd -S %s -U %s -P %s ' % (
+        self.connection_properties.endpoint,
+        self.connection_properties.database_username,
+        self.connection_properties.database_password)
+    if database_name:
+      sqlserver_command += '-d %s ' % database_name
+
+    sqlserver_command = sqlserver_command + '-Q "%s"' % command
+    return sqlserver_command
+
+  def GetConnectionString(self, database_name=''):
+    raise NotImplementedError('Connection string currently not supported')
+
+  def RunSqlScript(self,
+                   file_path: str,
+                   database_name: str = '') -> Tuple[str, str]:
+    """Runs Sql script from sqlcmd.
+
+    This method execute command in a sql file using sqlcmd with the -i option
+    enabled.
+    Args:
+      file_path: The local path from the machine.
+      database_name: Name of the database.
+
+    Returns:
+       A tuple of stdout and stderr from running the command.
+    """
+    sqlserver_command = (
+        '/opt/mssql-tools/bin/sqlcmd -S '
+        '%s -U %s -P %s ' %
+        (self.connection_properties.endpoint,
+         self.connection_properties.database_username,
+         self.connection_properties.database_password))
+    if database_name:
+      sqlserver_command += '-d %s ' % database_name
+
+    sqlserver_command += ' -i ' +  file_path
+    return self.vm.RemoteCommand(sqlserver_command)
+
+
+# Helper functions for this module
+def GetDbEngineType(db_engine: str) -> str:
+  """Converts the engine type from db_engine.
+
+  The same engine type can have multiple names because of differences
+  in cloud provider or versions.
+
+  Args:
+    db_engine: db_engine defined in the spec
+
+  Returns:
+    Engine type in string.
+  """
+
+  # AWS uses sqlserver-se and sqlserver-ex as db_egine for sql server
+  if db_engine in AWS_SQLSERVER_ENGINES:
+    return SQLSERVER
+  elif db_engine == AWS_AURORA_POSTGRES_ENGINE:
+    return POSTGRES
+  elif db_engine == AWS_AURORA_MYSQL_ENGINE or db_engine == AURORA_MYSQL56:
+    return MYSQL
+
+  if db_engine not in ENGINE_TYPES:
+    raise TypeError('Unsupported engine type', db_engine)
+  return db_engine
+
+
+def GetQueryToolsByEngine(vm, connection_properties):
+  engine_type = GetDbEngineType(connection_properties.engine)
+  if engine_type == MYSQL:
+    return MysqlCliQueryTools(vm, connection_properties)
+  elif engine_type == POSTGRES:
+    return PostgresCliQueryTools(vm, connection_properties)
+  elif engine_type == SQLSERVER:
+    return SqlServerCliQueryTools(vm, connection_properties)
+  raise ValueError('Engine not supported')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/stages.py b/script/cumulus/pkb/perfkitbenchmarker/stages.py
new file mode 100644
index 0000000..eb73ce8
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/stages.py
@@ -0,0 +1,103 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Variables and classes related to the different stages of a PKB run."""
+
+import itertools
+
+from absl import flags
+
+
+PROVISION = 'provision'
+PREPARE = 'prepare'
+RUN = 'run'
+CLEANUP = 'cleanup'
+TEARDOWN = 'teardown'
+
+STAGES = [PROVISION, PREPARE, RUN, CLEANUP, TEARDOWN]
+
+_NEXT_STAGE = {PROVISION: PREPARE, PREPARE: RUN, RUN: CLEANUP,
+               CLEANUP: TEARDOWN}
+_ALL = 'all'
+_VALID_FLAG_VALUES = PROVISION, PREPARE, RUN, CLEANUP, TEARDOWN, _ALL
+
+
+_SYNTACTIC_HELP = (
+    "A complete benchmark execution consists of {0} stages: {1}. Possible flag "
+    "values include an individual stage, a comma-separated list of stages, or "
+    "'all'. If a list of stages is provided, they must be in order without "
+    "skipping any stage.".format(len(STAGES), ', '.join(STAGES)))
+
+
+class RunStageParser(flags.ListParser):
+  """Parse a string containing PKB run stages.
+
+  See _SYNTACTIC_HELP for more information.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(RunStageParser, self).__init__(*args, **kwargs)
+    self.syntactic_help = _SYNTACTIC_HELP
+
+  def parse(self, argument):
+    """Parses a list of stages.
+
+    Args:
+      argument: string or list of strings.
+
+    Returns:
+      list of strings whose elements are chosen from STAGES.
+
+    Raises:
+      ValueError: If argument does not conform to the guidelines explained in
+          syntactic_help.
+    """
+    stage_list = super(RunStageParser, self).parse(argument)
+
+    if not stage_list:
+      raise ValueError('Unable to parse {0}. Stage list cannot be '
+                       'empty.'.format(repr(argument)))
+
+    invalid_items = set(stage_list).difference(_VALID_FLAG_VALUES)
+    if invalid_items:
+      raise ValueError(
+          'Unable to parse {0}. Unrecognized stages were found: {1}'.format(
+              repr(argument), ', '.join(sorted(invalid_items))))
+
+    if _ALL in stage_list:
+      if len(stage_list) > 1:
+        raise ValueError(
+            "Unable to parse {0}. If 'all' stages are specified, individual "
+            "stages cannot also be specified.".format(repr(argument)))
+      return list(STAGES)
+
+    previous_stage = stage_list[0]
+    for stage in itertools.islice(stage_list, 1, None):
+      expected_stage = _NEXT_STAGE.get(previous_stage)
+      if not expected_stage:
+        raise ValueError("Unable to parse {0}. '{1}' should be the last "
+                         "stage.".format(repr(argument), previous_stage))
+      if stage != expected_stage:
+        raise ValueError(
+            "Unable to parse {0}. The stage after '{1}' should be '{2}', not "
+            "'{3}'.".format(repr(argument), previous_stage, expected_stage,
+                            stage))
+      previous_stage = stage
+
+    return stage_list
+
+
+flags.DEFINE(
+    RunStageParser(), 'run_stage', STAGES,
+    "The stage or stages of perfkitbenchmarker to run.",
+    flags.FLAGS, flags.ListSerializer(','))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/static_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/static_virtual_machine.py
new file mode 100644
index 0000000..6d82e0a
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/static_virtual_machine.py
@@ -0,0 +1,371 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent a Static Virtual Machine object.
+
+All static VMs provided in a given group will be used before any non-static
+VMs are provisioned. For example, in a test that uses 4 VMs, if 3 static VMs
+are provided, all of them will be used and one additional non-static VM
+will be provisioned. The VM's should be set up with passwordless ssh and
+passwordless sudo (neither sshing nor running a sudo command should prompt
+the user for a password).
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+import collections
+import json
+import logging
+import threading
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import linux_virtual_machine
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import virtual_machine
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_list('static_vm_tags', None,
+                  'The tags of static VMs for PKB to run with. Even if other '
+                  'VMs are specified in a config, if they aren\'t in this list '
+                  'they will be skipped during VM creation.')
+
+
+class StaticVmSpec(virtual_machine.BaseVmSpec):
+  """Object containing all info needed to create a Static VM."""
+
+  CLOUD = 'Static'
+
+  def __init__(self, component_full_name, ip_address=None, user_name=None,
+               ssh_private_key=None, internal_ip=None, ssh_port=22,
+               password=None, disk_specs=None, os_type=None, tag=None,
+               zone=None, **kwargs):
+    """Initialize the StaticVmSpec object.
+
+    Args:
+      component_full_name: string. Fully qualified name of the configurable
+          component containing the config options.
+      ip_address: The public ip address of the VM.
+      user_name: The username of the VM that the keyfile corresponds to.
+      ssh_private_key: The absolute path to the private keyfile to use to ssh
+          to the VM.
+      internal_ip: The internal ip address of the VM.
+      ssh_port: The port number to use for SSH and SCP commands.
+      password: The password used to log into the VM (Windows Only).
+      disk_specs: None or a list of dictionaries containing kwargs used to
+          create disk.BaseDiskSpecs.
+      os_type: The OS type of the VM. See the flag of the same name for more
+          information.
+      tag: A string that allows the VM to be included or excluded from a run
+          by using the 'static_vm_tags' flag.
+      zone: The VM's zone.
+      **kwargs: Other args for the superclass.
+    """
+    super(StaticVmSpec, self).__init__(component_full_name, **kwargs)
+    self.ip_address = ip_address
+    self.user_name = user_name
+    self.ssh_private_key = ssh_private_key
+    self.internal_ip = internal_ip
+    self.ssh_port = ssh_port
+    self.password = password
+    self.os_type = os_type
+    self.tag = tag
+    self.zone = zone
+    self.disk_specs = [
+        disk.BaseDiskSpec(
+            '{0}.disk_specs[{1}]'.format(component_full_name, i),
+            flag_values=kwargs.get('flag_values'), **disk_spec)
+        for i, disk_spec in enumerate(disk_specs or ())]
+
+
+class StaticDisk(disk.BaseDisk):
+  """Object representing a static Disk."""
+
+  def _Create(self):
+    """StaticDisks don't implement _Create()."""
+    pass
+
+  def _Delete(self):
+    """StaticDisks don't implement _Delete()."""
+    pass
+
+  def Attach(self):
+    """StaticDisks don't implement Attach()."""
+    pass
+
+  def Detach(self):
+    """StaticDisks don't implement Detach()."""
+    pass
+
+
+class StaticVirtualMachine(virtual_machine.BaseVirtualMachine):
+  """Object representing a Static Virtual Machine."""
+
+  CLOUD = 'Static'
+  is_static = True
+  vm_pool = collections.deque()
+  vm_pool_lock = threading.Lock()
+
+  def __init__(self, vm_spec):
+    """Initialize a static virtual machine.
+
+    Args:
+      vm_spec: A StaticVmSpec object containing arguments.
+    """
+    super(StaticVirtualMachine, self).__init__(vm_spec)
+    self.ip_address = vm_spec.ip_address
+    self.user_name = vm_spec.user_name
+    self.ssh_private_key = vm_spec.ssh_private_key
+    self.internal_ip = vm_spec.internal_ip
+    self.zone = self.zone or ('Static - %s@%s' % (self.user_name,
+                                                  self.ip_address))
+    self.ssh_port = vm_spec.ssh_port
+    self.password = vm_spec.password
+    self.disk_specs = vm_spec.disk_specs
+    self.from_pool = False
+    self.preemptible = False
+
+  def _Suspend(self):
+    """Suspends VM."""
+    raise NotImplementedError()
+
+  def _Resume(self):
+    """Resumes VM."""
+    raise NotImplementedError()
+
+  def _Create(self):
+    """StaticVirtualMachines do not implement _Create()."""
+    pass
+
+  # StaticVirtualMachines do not implement _Start or _Stop
+  def _Start(self):
+    """Starts the VM."""
+    raise NotImplementedError()
+
+  def _Stop(self):
+    """Stops the VM."""
+    raise NotImplementedError()
+
+  def _Delete(self):
+    """Returns the virtual machine to the pool."""
+    if self.from_pool:
+      with self.vm_pool_lock:
+        self.vm_pool.appendleft(self)
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+    """
+    spec = self.disk_specs[len(self.scratch_disks)]
+    self.scratch_disks.append(StaticDisk(spec))
+
+  def DeleteScratchDisks(self):
+    """StaticVirtualMachines do not delete scratch disks."""
+    pass
+
+  @classmethod
+  def ReadStaticVirtualMachineFile(cls, file_obj):
+    """Read a file describing the static VMs to use.
+
+    This function will read the static VM information from the provided file,
+    instantiate VMs corresponding to the info, and add the VMs to the static
+    VM pool. The provided file should contain a single array in JSON-format.
+    Each element in the array must be an object with required format:
+
+      ip_address: string.
+      user_name: string.
+      keyfile_path: string.
+      ssh_port: integer, optional. Default 22
+      internal_ip: string, optional.
+      zone: string, optional.
+      local_disks: array of strings, optional.
+      scratch_disk_mountpoints: array of strings, optional
+      os_type: string, optional (see package_managers)
+      install_packages: bool, optional
+
+    Args:
+      file_obj: An open handle to a file containing the static VM info.
+
+    Raises:
+      ValueError: On missing required keys, or invalid keys.
+    """
+    vm_arr = json.load(file_obj)
+
+    if not isinstance(vm_arr, list):
+      raise ValueError('Invalid static VM file. Expected array, got: %s.' %
+                       type(vm_arr))
+
+    required_keys = frozenset(['ip_address', 'user_name'])
+
+    linux_required_keys = required_keys | frozenset(['keyfile_path'])
+
+    required_keys_by_os = {
+        os_types.WINDOWS: required_keys | frozenset(['password']),
+        os_types.DEBIAN: linux_required_keys,
+        os_types.RHEL: linux_required_keys,
+        os_types.CLEAR: linux_required_keys,
+        os_types.UBUNTU_CONTAINER: linux_required_keys,
+    }
+
+    # assume linux_required_keys for unknown os_type
+    required_keys = required_keys_by_os.get(FLAGS.os_type, linux_required_keys)
+
+    optional_keys = frozenset(['internal_ip', 'zone', 'local_disks',
+                               'scratch_disk_mountpoints', 'os_type',
+                               'ssh_port', 'install_packages'])
+    allowed_keys = required_keys | optional_keys
+
+    def VerifyItemFormat(item):
+      """Verify that the decoded JSON object matches the required schema."""
+      item_keys = frozenset(item)
+      extra_keys = sorted(item_keys - allowed_keys)
+      missing_keys = required_keys - item_keys
+      if extra_keys:
+        raise ValueError('Unexpected keys: {0}'.format(', '.join(extra_keys)))
+      elif missing_keys:
+        raise ValueError('Missing required keys: {0}'.format(
+            ', '.join(missing_keys)))
+
+    for item in vm_arr:
+      VerifyItemFormat(item)
+
+      ip_address = item['ip_address']
+      user_name = item['user_name']
+      keyfile_path = item.get('keyfile_path')
+      internal_ip = item.get('internal_ip')
+      zone = item.get('zone')
+      local_disks = item.get('local_disks', [])
+      password = item.get('password')
+
+      if not isinstance(local_disks, list):
+        raise ValueError('Expected a list of local disks, got: {0}'.format(
+            local_disks))
+      scratch_disk_mountpoints = item.get('scratch_disk_mountpoints', [])
+      if not isinstance(scratch_disk_mountpoints, list):
+        raise ValueError(
+            'Expected a list of disk mount points, got: {0}'.format(
+                scratch_disk_mountpoints))
+      ssh_port = item.get('ssh_port', 22)
+      os_type = item.get('os_type')
+      install_packages = item.get('install_packages', True)
+
+      if ((os_type == os_types.WINDOWS and FLAGS.os_type != os_types.WINDOWS) or
+          (os_type != os_types.WINDOWS and FLAGS.os_type == os_types.WINDOWS)):
+        raise ValueError('Please only use Windows VMs when using '
+                         '--os_type=windows and vice versa.')
+
+      disk_kwargs_list = []
+      for path in scratch_disk_mountpoints:
+        disk_kwargs_list.append({'mount_point': path})
+      for local_disk in local_disks:
+        disk_kwargs_list.append({'device_path': local_disk})
+
+      vm_spec = StaticVmSpec(
+          'static_vm_file', ip_address=ip_address, user_name=user_name,
+          ssh_port=ssh_port, install_packages=install_packages,
+          ssh_private_key=keyfile_path, internal_ip=internal_ip, zone=zone,
+          disk_specs=disk_kwargs_list, password=password,
+          flag_values=flags.FLAGS)
+
+      vm_class = GetStaticVmClass(os_type)
+      vm = vm_class(vm_spec)
+      cls.vm_pool.append(vm)
+
+  @classmethod
+  def GetStaticVirtualMachine(cls):
+    """Pull a Static VM from the pool of static VMs.
+
+    If there are no VMs left in the pool, the method will return None.
+
+    Returns:
+        A static VM from the pool, or None if there are no static VMs left.
+    """
+    with cls.vm_pool_lock:
+      if cls.vm_pool:
+        vm = cls.vm_pool.popleft()
+        vm.Create()
+        vm.from_pool = True
+        return vm
+      else:
+        return None
+
+
+def GetStaticVmClass(os_type):
+  """Returns the static VM class that corresponds to the os_type."""
+  if not os_type:
+    os_type = os_types.DEFAULT
+    logging.warning('Could not find os type for VM. Defaulting to %s.', os_type)
+  return resource.GetResourceClass(virtual_machine.BaseVirtualMachine,
+                                   CLOUD=StaticVirtualMachine.CLOUD,
+                                   OS_TYPE=os_type)
+
+
+class Ubuntu1604BasedStaticVirtualMachine(
+    StaticVirtualMachine, linux_virtual_machine.Ubuntu1604Mixin):
+  pass
+
+
+class Ubuntu1804BasedStaticVirtualMachine(
+    StaticVirtualMachine, linux_virtual_machine.Ubuntu1804Mixin):
+  pass
+
+
+class Ubuntu2004BasedStaticVirtualMachine(
+    StaticVirtualMachine, linux_virtual_machine.Ubuntu2004Mixin):
+  pass
+
+
+class Ubuntu2204BasedStaticVirtualMachine(
+    StaticVirtualMachine, linux_virtual_machine.Ubuntu2204Mixin):
+  pass
+
+
+class ClearBasedStaticVirtualMachine(StaticVirtualMachine,
+                                     linux_virtual_machine.ClearMixin):
+  pass
+
+
+class Rhel7BasedStaticVirtualMachine(StaticVirtualMachine,
+                                     linux_virtual_machine.Rhel7Mixin):
+  pass
+
+
+class Rhel8BasedStaticVirtualMachine(StaticVirtualMachine,
+                                     linux_virtual_machine.Rhel8Mixin):
+  pass
+
+
+class CentOs7BasedStaticVirtualMachine(StaticVirtualMachine,
+                                       linux_virtual_machine.CentOs7Mixin):
+  pass
+
+
+class CentOs8BasedStaticVirtualMachine(StaticVirtualMachine,
+                                       linux_virtual_machine.CentOs8Mixin):
+  pass
+
+
+class Debian9BasedStaticVirtualMachine(StaticVirtualMachine,
+                                       linux_virtual_machine.Debian9Mixin):
+  pass
+
+
+class Debian10BasedStaticVirtualMachine(StaticVirtualMachine,
+                                        linux_virtual_machine.Debian10Mixin):
+  pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/temp_dir.py b/script/cumulus/pkb/perfkitbenchmarker/temp_dir.py
new file mode 100644
index 0000000..ce062c7
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/temp_dir.py
@@ -0,0 +1,69 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""Functions related to PerfKit Benchmarker's temporary directory.
+
+PerfKit Benchmarker creates files under a temporary directory (typically in
+/tmp/perfkitbenchmarker or C:\TEMP\perfkitbenchmarker - see tempfile.tempdir for
+more information).
+"""
+
+import functools
+import os
+import tempfile
+
+from absl import flags
+from perfkitbenchmarker import version
+
+_PERFKITBENCHMARKER = 'perfkitbenchmarker'
+_RUNS = 'runs'
+_VERSIONS = 'versions'
+
+_TEMP_DIR = os.path.join(tempfile.gettempdir(), _PERFKITBENCHMARKER)
+
+flags.DEFINE_string('temp_dir', _TEMP_DIR, 'Temp directory PKB uses.')
+FLAGS = flags.FLAGS
+
+
+def GetAllRunsDirPath():
+  """Gets path to the directory containing the states of all PKB runs."""
+  return os.path.join(FLAGS.temp_dir, _RUNS)
+
+
+# Caching this will have the effect that even if the
+# run_uri changes, the temp dir will stay the same.
+@functools.lru_cache()
+def GetRunDirPath():
+  """Gets path to the directory containing files specific to a PKB run."""
+  return os.path.join(
+      FLAGS.temp_dir, _RUNS, str(flags.FLAGS.run_uri))
+
+
+def GetSshConnectionsDir():
+  """Returns the directory for SSH ControlPaths (for connection reuse)."""
+  return os.path.join(GetRunDirPath(), 'ssh')
+
+
+def GetVersionDirPath(version=version.VERSION):
+  """Gets path to the directory containing files specific to a PKB version."""
+  return os.path.join(FLAGS.temp_dir, _VERSIONS, version)
+
+
+def CreateTemporaryDirectories():
+  """Creates the temporary sub-directories needed by the current run."""
+  for path in (GetRunDirPath(), GetVersionDirPath(), GetSshConnectionsDir()):
+    try:
+      os.makedirs(path)
+    except OSError:
+      if not os.path.isdir(path):
+        raise
diff --git a/script/cumulus/pkb/perfkitbenchmarker/test_util.py b/script/cumulus/pkb/perfkitbenchmarker/test_util.py
new file mode 100644
index 0000000..e96fa87
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/test_util.py
@@ -0,0 +1,129 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Functions and classes to make testing easier."""
+
+
+import os
+
+from absl import flags
+import mock
+from perfkitbenchmarker import benchmark_spec
+from perfkitbenchmarker import sample
+from perfkitbenchmarker.configs import benchmark_config_spec
+import six
+from six.moves import range
+
+
+_BENCHMARK_NAME = 'test_benchmark'
+_BENCHMARK_UID = 'uid'
+
+
+class SamplesTestMixin(object):
+  """A mixin for unittest.TestCase that adds a type-specific equality
+  predicate for samples.
+  """
+
+  def __init__(self, *args, **kwargs):
+    super(SamplesTestMixin, self).__init__(self, *args, **kwargs)
+
+    self.addTypeEqualityFunc(sample.Sample, self.assertSamplesEqual)
+
+  def assertSamplesEqualUpToTimestamp(self, a, b, msg=None):
+    """Assert that two samples are equal, ignoring timestamp differences."""
+
+    self.assertEqual(a.metric, b.metric,
+                     msg or 'Samples %s and %s have different metrics' % (a, b))
+    self.assertEqual(a.value, b.value,
+                     msg or 'Samples %s and %s have different values' % (a, b))
+    self.assertEqual(a.unit, b.unit,
+                     msg or 'Samples %s and %s have different units' % (a, b))
+    self.assertDictEqual(a.metadata, b.metadata, msg or
+                         'Samples %s and %s have different metadata' % (a, b))
+    # Deliberately don't compare the timestamp fields of the samples.
+
+  def assertSampleListsEqualUpToTimestamp(self, a, b, msg=None):
+    """Compare two lists of samples.
+
+    Sadly, the builtin assertListsEqual will only use Python's
+    built-in equality predicate for testing the equality of elements
+    in a list. Since we compare lists of samples a lot, we need a
+    custom test for that.
+    """
+
+    self.assertEqual(len(a), len(b),
+                     msg or 'Lists %s and %s are not the same length' % (a, b))
+    for i in range(len(a)):
+      self.assertIsInstance(a[i], sample.Sample,
+                            msg or ('%s (item %s in list) is '
+                                    'not a sample.Sample object' %
+                                    (a[i], i)))
+      self.assertIsInstance(b[i], sample.Sample,
+                            msg or ('%s (item %s in list) is '
+                                    'not a sample.Sample object' %
+                                    (b[i], i)))
+      try:
+        self.assertSamplesEqualUpToTimestamp(a[i], b[i], msg=msg)
+      except self.failureException as ex:
+        ex.message = str(ex) + (' (was item %s in list)' % i)
+        ex.args = (ex.message,)
+        raise ex
+
+
+def assertDiskMounts(benchmark_config, mount_point):
+  """Test whether a disk mounts in a given configuration.
+
+  Sets up a virtual machine following benchmark_config and then tests
+  whether the path mount_point contains a working disk by trying to
+  create a file there. Returns nothing if file creation works;
+  otherwise raises an exception.
+
+  Args:
+    benchmark_config: a dict in the format of
+      benchmark_spec.BenchmarkSpec. The config must specify exactly
+      one virtual machine.
+    mount_point: a path, represented as a string.
+
+  Raises:
+    RemoteCommandError if it cannot create a file at mount_point and
+    verify that the file exists.
+
+    AssertionError if benchmark_config does not specify exactly one
+    virtual machine.
+  """
+
+  assert len(benchmark_config['vm_groups']) == 1
+  vm_group = next(six.itervalues(benchmark_config['vm_groups']))
+  assert vm_group.get('num_vms', 1) == 1
+  m = mock.MagicMock()
+  m.BENCHMARK_NAME = _BENCHMARK_NAME
+  config_spec = benchmark_config_spec.BenchmarkConfigSpec(
+      _BENCHMARK_NAME, flag_values=flags.FLAGS, **benchmark_config)
+  spec = benchmark_spec.BenchmarkSpec(
+      m, config_spec, _BENCHMARK_UID)
+  with spec.RedirectGlobalFlags():
+    try:
+      spec.ConstructVirtualMachines()
+      spec.Provision()
+
+      vm = spec.vms[0]
+
+      test_file_path = os.path.join(mount_point, 'test_file')
+      vm.RemoteCommand('touch %s' % test_file_path)
+
+      # This will raise RemoteCommandError if the test file does not
+      # exist.
+      vm.RemoteCommand('test -e %s' % test_file_path)
+
+    finally:
+      spec.Delete()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/timing_util.py b/script/cumulus/pkb/perfkitbenchmarker/timing_util.py
new file mode 100644
index 0000000..fcc6f37
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/timing_util.py
@@ -0,0 +1,164 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Utilities for generating timing samples."""
+
+from collections import OrderedDict
+from contextlib import contextmanager
+import time
+
+from absl import flags
+from perfkitbenchmarker import sample
+
+
+MEASUREMENTS_FLAG_NAME = 'timing_measurements'
+# Valid options that can be included in the flag's list value.
+MEASUREMENTS_NONE = 'none'
+MEASUREMENTS_END_TO_END_RUNTIME = 'end_to_end_runtime'
+MEASUREMENTS_RUNTIMES = 'runtimes'
+MEASUREMENTS_TIMESTAMPS = 'timestamps'
+MEASUREMENTS_ALL = OrderedDict([
+    (MEASUREMENTS_NONE, (
+        'No measurements included (same as providing an empty list, and cannot '
+        'be combined with other options).')),
+    (MEASUREMENTS_END_TO_END_RUNTIME, (
+        'Includes an end-to-end runtime measurement.')),
+    (MEASUREMENTS_RUNTIMES, (
+        'Includes runtimes of all measured intervals, including the end-to-end '
+        'runtime, the time taken by the benchmark module Prepare, Run, and '
+        'Cleanup functions, and other important intervals.')),
+    (MEASUREMENTS_TIMESTAMPS, (
+        'Includes start and stop timestamps of all measured intervals.'))])
+
+
+def EndToEndRuntimeMeasurementEnabled():
+  """Returns whether end-to-end runtime measurement is globally enabled."""
+  return (MEASUREMENTS_END_TO_END_RUNTIME in flags.FLAGS.timing_measurements or
+          RuntimeMeasurementsEnabled())
+
+
+def RuntimeMeasurementsEnabled():
+  """Returns whether runtime measurements are globally enabled."""
+  return MEASUREMENTS_RUNTIMES in flags.FLAGS.timing_measurements
+
+
+def TimestampMeasurementsEnabled():
+  """Returns whether timestamps measurements are globally enabled."""
+  return MEASUREMENTS_TIMESTAMPS in flags.FLAGS.timing_measurements
+
+
+def ValidateMeasurementsFlag(options_list):
+  """Verifies correct usage of the measurements configuration flag.
+
+  The user of the flag must provide at least one option. All provided options
+  must be valid. The NONE option cannot be combined with other options.
+
+  Args:
+    options_list: A list of strings parsed from the provided value for the
+      flag.
+
+  Returns:
+    True if the list of options provided as the value for the flag meets all
+    the documented requirements.
+
+  Raises:
+    flags.ValidationError: If the list of options provided as the value for
+      the flag does not meet the documented requirements.
+  """
+  for option in options_list:
+    if option not in MEASUREMENTS_ALL:
+      raise flags.ValidationError(
+          '%s: Invalid value for --%s' % (option, MEASUREMENTS_FLAG_NAME))
+    if option == MEASUREMENTS_NONE and len(options_list) != 1:
+      raise flags.ValidationError(
+          '%s: Cannot combine with other --%s options' % (
+              option, MEASUREMENTS_FLAG_NAME))
+  return True
+
+
+flags.DEFINE_list(
+    MEASUREMENTS_FLAG_NAME, MEASUREMENTS_END_TO_END_RUNTIME,
+    'Comma-separated list of values from <%s> that selects which timing '
+    'measurements to enable. Measurements will be included as samples in the '
+    'benchmark results. %s' % ('|'.join(MEASUREMENTS_ALL), ' '.join([
+        '%s: %s' % (option, description)
+        for option, description in MEASUREMENTS_ALL.items()
+    ])))
+flags.register_validator(
+    MEASUREMENTS_FLAG_NAME, ValidateMeasurementsFlag)
+
+
+def _GenerateIntervalSamples(interval, include_timestamps):
+  """Generates Samples for a single interval timed by IntervalTimer.Measure.
+
+  Args:
+    interval: A (name, start_time, stop_time) tuple from a call to
+      IntervalTimer.Measure.
+    include_timestamps: A Boolean that controls whether Samples containing the
+      start and stop timestamps are added to the generated list.
+
+  Returns:
+    A list of 0 to 3 Samples as specified by the args. When included, the
+    Samples appear in the order of runtime, start timestamp, stop timestamp.
+  """
+  samples = []
+  name = interval[0]
+  start_time = interval[1]
+  stop_time = interval[2]
+  elapsed_time = stop_time - start_time
+  samples.append(sample.Sample(name + ' Runtime', elapsed_time, 'seconds'))
+  if include_timestamps:
+    samples.append(sample.Sample(
+        name + ' Start Timestamp', start_time, 'seconds'))
+    samples.append(sample.Sample(
+        name + ' Stop Timestamp', stop_time, 'seconds'))
+  return samples
+
+
+class IntervalTimer(object):
+  """Class that can measure time and generate samples for each measurement.
+
+  Attributes:
+    intervals: A list of one 3-tuple per measured interval. Each tuple is of the
+      form (name string, start_time float, stop_time float).
+  """
+
+  def __init__(self):
+    self.intervals = []
+
+  @contextmanager
+  def Measure(self, name):
+    """Records the start and stop times of the enclosed interval.
+
+    Args:
+      name: A string that names the interval.
+    """
+    start_time = time.time()
+    yield
+    stop_time = time.time()
+    self.intervals.append((name, start_time, stop_time))
+
+  def GenerateSamples(self):
+    """Generates Samples based on the times recorded in all calls to Measure.
+
+    Returns:
+      A list of Samples. The list contains Samples for each interval that was
+      wrapped by a call to Measure, with per-interval Samples generated as
+      specified by the args in the order of runtime, start timestamp, stop
+      timestamp. All Samples for one interval appear before any Samples from the
+      next interval.
+    """
+    include_timestamps = TimestampMeasurementsEnabled()
+    return [
+        sample for interval in self.intervals for sample in
+        _GenerateIntervalSamples(interval, include_timestamps)]
diff --git a/script/cumulus/pkb/perfkitbenchmarker/trace_util.py b/script/cumulus/pkb/perfkitbenchmarker/trace_util.py
new file mode 100644
index 0000000..1939eb5
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/trace_util.py
@@ -0,0 +1,81 @@
+from typing import List
+
+import logging
+import time
+
+from perfkitbenchmarker import events, stages
+from perfkitbenchmarker.virtual_machine import BaseVirtualMachine
+
+
+def GetVMGroupNamesToTrace(benchmark_spec, group_names_string):
+  group_names = []
+  if not group_names_string:
+    for group_name in benchmark_spec.vm_groups.keys():
+      group_names.append(group_name)
+  else:
+    for group_name in group_names_string.split(','):
+      if group_name in benchmark_spec.vm_groups:
+        group_names.append(group_name.strip())
+      else:
+        logging.warn('unrecognized group name: {}'.format(group_name))
+  return group_names
+
+
+def GetVMGroupsToTrace(benchmark_spec, group_names_string):
+  vm_groups = {}
+  group_names = GetVMGroupNamesToTrace(benchmark_spec, group_names_string)
+  for name in group_names:
+    if name in benchmark_spec.vm_groups.keys():
+      vm_groups[name] = benchmark_spec.vm_groups[name]
+  return vm_groups
+
+
+def GetVMsToTrace(benchmark_spec, group_names_string) -> List[BaseVirtualMachine]:
+  """ Return list of vms from benchmark_spec that match group names specified
+      in group_names_string as comma separated list. If group_names_string is
+      empty, return all vms regardless of group.
+  """
+  vms = []
+  for group_name in GetVMGroupNamesToTrace(benchmark_spec, group_names_string):
+    if group_name in benchmark_spec.vm_groups:
+      vms.extend(benchmark_spec.vm_groups[group_name])
+  vms = list(set(vms))
+  return vms
+
+
+def ControlTracesByLogfileProcess(vm, benchmark_spec, logfile, start_phrase, stop_phrase=None, duration=None):
+  """A function meant to be started as a separate process for monitoring a log file.
+  Upon seeing a given start_phrase, will issue the start_trace event. Process will
+  send the stop_trace event if provided a stop_phrase or duration.
+
+  Args:
+    vm: VM object where log file is being generated.
+    benchmark_spec: benchmark_spec that will be send with the start/stop traces events.
+    logfile: Full path to log file on VM that will be monitored for phrases.
+    start_phrase: String pattern that will be passed to a grep command while tailing logfile to
+      determine when to start traces.
+    stop_phrase: Optional string pattern that will be passed to a grep command while tailing logfile to
+      determine when to stop traces.
+    duration: Duration in seconds to run traces if a specific time period is desired.
+  """
+  traces_started = False
+  logging.info('Tailing {logfile} for start phrase \'{start_phrase}\''.format(logfile=logfile, start_phrase=start_phrase))
+  _, _, retcode = vm.RemoteCommandWithReturnCode('tail --retry -f {logfile} | grep -q \'{start_phrase}\''.format(logfile=logfile, start_phrase=start_phrase))
+  if retcode == 0:
+    logging.info('Found start phrase \'{start_phrase}\' in {logfile}.'.format(logfile=logfile, start_phrase=start_phrase))
+    events.start_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+    traces_started = True
+
+  if traces_started:
+    if stop_phrase:
+      _, _, retcode = vm.RemoteCommandWithReturnCode("tail --retry -f {logfile} | grep -q '{stop_phrase}'".format(logfile=logfile, stop_phrase=stop_phrase))
+      if retcode == 0:
+        logging.info("Found stop phrase '{stop_phrase}' in {logfile}.".format(logfile=logfile, stop_phrase=stop_phrase))
+      events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+    elif duration:
+      logging.info("Sleeping for {duration} seconds while traces run.".format(duration=duration))
+      time.sleep(duration)
+      logging.info("Traces duration ended, stopping traces.")
+      events.stop_trace.send(stages.RUN, benchmark_spec=benchmark_spec)
+    else:
+      logging.warn("No log stop phrase or traces duration defined, traces will not be cleaned up by this process.")
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/traces/__init__.py
new file mode 100644
index 0000000..91566eb
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/__init__.py
@@ -0,0 +1,42 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Systems for tracking performance counters during a run.
+
+Individual trackers should define a function, 'Register', which will be
+called with a single argument, the parsed FLAGS instance, once the Benchmarker
+is initialized.
+"""
+
+
+from perfkitbenchmarker import events
+from perfkitbenchmarker import import_util
+
+
+TRACE_COLLECTORS = list(import_util.LoadModulesForPath(__path__, __name__))
+
+
+def RegisterAll(sender, parsed_flags):
+  TRACE_COLLECTORS.reverse()
+  for module in TRACE_COLLECTORS:
+    module.Register(parsed_flags)
+
+  events.RegisterTracingEvents()
+
+
+def IsAnyTraceEnabled():
+  """Determine if any trace collectors have been enabled by the user"""
+  for module in TRACE_COLLECTORS:
+    if module.IsEnabled():
+      return True
+  return False
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/base_collector.py b/script/cumulus/pkb/perfkitbenchmarker/traces/base_collector.py
new file mode 100644
index 0000000..b7f5b52
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/base_collector.py
@@ -0,0 +1,179 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Module containing abstract classes related to collectors."""
+
+
+import abc
+import functools
+import logging
+import os
+import posixpath
+import threading
+import time
+import uuid
+from absl import flags
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import events
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import trace_util
+import six
+
+FLAGS = flags.FLAGS
+flags.DEFINE_string('trace_vm_groups', None,
+                    'A comma separated list of vm_groups on which trace '
+                    'will be run. If not specified, trace is run on all VMs.')
+flags.DEFINE_boolean('trace_skip_install', False,
+                     'Skip installations before trace collection.')
+flags.DEFINE_boolean('trace_skip_cleanup', False,
+                     'Skip cleanups after trace collection.')
+
+
+def Register(parsed_flags):
+  """Registers the collector if FLAGS.<collector> is set.
+
+  See dstat.py for an example on how to register a collector.
+
+  Args:
+    parsed_flags: argument passed into each call to Register()
+  """
+  del parsed_flags  # unused
+
+
+def IsEnabled():
+  return False
+
+
+class BaseCollector(object):
+  """Object representing a Base Collector.
+
+  A Collector is a utility that is ran alongside benchmarks to record stats
+  at various points when running a benchmark. A Base collector is an abstract
+  class with common routines that derived collectors use.
+  """
+
+  def __init__(self, interval=None, output_directory=None):
+    """Runs collector on 'vms'.
+
+    Start collector collection via `Start`. Stop via `Stop`.
+
+    Args:
+      interval: Optional int. Interval in seconds in which to collect samples.
+      output_directory: Optional directory where to save collection output.
+    Raises:
+      IOError: for when the output directory doesn't exist.
+    """
+    self.interval = interval
+    self.output_directory = output_directory or vm_util.GetTempDir()
+    self._lock = threading.Lock()
+    self._pid_files = {}
+    self._role_mapping = {}  # mapping vm role to output file
+    self._start_time = 0
+
+    if not os.path.isdir(self.output_directory):
+      raise IOError('collector output directory does not exist: {0}'.format(
+          self.output_directory))
+
+  @abc.abstractmethod
+  def _CollectorName(self):
+    pass
+
+  @abc.abstractmethod
+  def _InstallCollector(self, vm):
+    pass
+
+  @abc.abstractmethod
+  def _CollectorRunCommand(self, vm, collector_file):
+    pass
+
+  def _KillCommand(self, pid):
+    """Command to kill off the collector."""
+    return 'kill {0}'.format(pid)
+
+  def _StartOnVm(self, vm, suffix=''):
+    """Start collector, having it write to an output file."""
+
+    suffix = '{0}-{1}'.format(suffix, self._CollectorName())
+    collector_file = posixpath.join(
+        vm_util.VM_TMP_DIR, '{0}{1}.stdout'.format(vm.name, suffix))
+
+    cmd = self._CollectorRunCommand(vm, collector_file)
+    logging.info("Starting {0} on {1} @ {2}".format(self._CollectorName(), vm.name, vm.ip_address))
+    stdout, _ = vm.RemoteCommand(cmd)
+    pid = stdout.strip()
+    with self._lock:
+      self._pid_files[vm.name] = (pid, collector_file)
+
+  def _StopOnVm(self, vm, vm_role):
+    """Stop collector on 'vm' and copy the files back."""
+    if vm.name not in self._pid_files:
+      logging.warn('No collector PID for %s', vm.name)
+      return
+    else:
+      with self._lock:
+        pid, file_name = self._pid_files.pop(vm.name)
+    vm.RemoteCommand(self._KillCommand(pid), ignore_failure=True)
+
+    try:
+      vm.PullFile(self.output_directory, file_name)
+      self._role_mapping[vm_role] = file_name
+    except errors.VirtualMachine.RemoteCommandError as ex:
+      logging.exception('Failed fetching collector result from %s.', vm.name)
+      raise ex
+
+  def Install(self, sender, benchmark_spec):
+    """Install and start collector on all VMs in 'benchmark_spec'. Can be
+       limited by setting FLAGS.trace_vm_groups.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._InstallCollector, vms)
+
+  def Start(self, sender, benchmark_spec):
+    """Starts a collector on vms in the benchmark
+    """
+    suffix = '-{0}-{1}'.format(benchmark_spec.uid, str(uuid.uuid4())[:8])
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    func = functools.partial(self._StartOnVm, suffix=suffix)
+    vm_util.RunThreaded(func, vms)
+    self._start_time = time.time()
+    return
+
+  def Stop(self, sender, benchmark_spec, name=''):
+    """Stop collector on all VMs in 'benchmark_spec', fetch results."""
+    vm_groups = trace_util.GetVMGroupsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    self.StopOnVms(sender, vm_groups, name)
+
+  def StopOnVms(self, sender, vm_groups, name):
+    """Stop collector on given subset of vms, fetch results.
+
+    Args:
+      sender: sender of the event to stop the collector.
+      vm_groups: vm_groups to stop the collector on.
+      name: name of event to be stopped.
+    """
+    events.record_event.send(sender, event=name,
+                             start_timestamp=self._start_time,
+                             end_timestamp=time.time(),
+                             metadata={})
+    args = []
+    for role, vms in six.iteritems(vm_groups):
+      args.extend([((
+          vm, '%s_%s' % (role, idx)), {}) for idx, vm in enumerate(vms)])
+    vm_util.RunThreaded(self._StopOnVm, args)
+    return
+
+  @abc.abstractmethod
+  def Analyze(self, sender, benchmark_spec, samples):
+    """Analyze collector file and record samples."""
+    pass
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/cadvisor.py b/script/cumulus/pkb/perfkitbenchmarker/traces/cadvisor.py
new file mode 100644
index 0000000..dae54e1
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/cadvisor.py
@@ -0,0 +1,524 @@
+import logging
+import os
+import time
+from itertools import filterfalse
+
+import requests
+import posixpath
+from typing import Set, List, Tuple
+
+from absl import flags
+from jsonlines import jsonlines
+from pathlib import Path
+
+from perfkitbenchmarker import events, trace_util, vm_util, data, stages
+from perfkitbenchmarker.benchmark_spec import BenchmarkSpec
+from perfkitbenchmarker.errors import Setup
+from perfkitbenchmarker.linux_packages import INSTALL_DIR
+from perfkitbenchmarker.virtual_machine import BaseVirtualMachine
+
+
+# Cadvisor configuration constants
+CADVISOR_DIR = posixpath.join(INSTALL_DIR, 'cAdvisor-metrics')
+
+DEFAULT_PERF_CONFIG_SRC = posixpath.join(Path(os.path.dirname(os.path.realpath(__file__))).parent.absolute(), 'data', 'cAdvisor_metrics', "perf-default.json")
+PERF_CONFIG_NAME = "perf-config.json"
+CONTAINER_PERF_CONFIG_SRC = f"/cadvisor-config/{PERF_CONFIG_NAME}"
+CADVISOR_CONTAINER_NAME = "cadvisor"
+CADVISOR_PORT = 8080
+
+# Prometheus configuration constants
+PROMETHEUS_PORT = 9090
+SCRAPE_INTERVAL = 5
+MAX_NUM_OF_SAMPLES = 200
+PROMETHEUS_YAML_CONFIG = 'cAdvisor_metrics/prometheus-config.yaml.j2'
+PROMETHEUS_CONFIG_FILE_NAME = "prometheus.yml"
+
+# Miscellaneous configuration constants
+FIREWALL_PROTECTED_CSP = {"AWS", "Azure", "GCP"}
+NETWORK_NAME = "cadvisor-metrics-net"
+
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean(
+    'cadvisor', False, 'Install and run cadvisor data collection on the target system.'
+)
+flags.DEFINE_string(
+    'cadvisor_prometheus_group', None,
+    'Point group with machine that will run Prometheus to collect metrics.'
+)
+flags.DEFINE_string(
+    'perf_config_src', DEFAULT_PERF_CONFIG_SRC,
+    'Absolute path to the perf configuration file.'
+)
+
+
+class CadvisorCollector(object):
+
+  def __init__(self):
+    self._start_tracing_epoch = None
+    self._stop_tracing_epoch = None
+
+  def Install(self, _, benchmark_spec: BenchmarkSpec):
+    """
+    Install Telemetry.
+
+    Args:
+      _: Unused parameter (unused_sender), that fits event connection "interface" (or signature)
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently running.
+    """
+    logging.info('Installing telemetry')
+    observed_machines, metrics_collect_machine = self._get_tracing_associated_machines(
+        benchmark_spec
+    )
+
+    machines_with_docker = observed_machines | {metrics_collect_machine}
+    for machine in machines_with_docker:
+      self._InstallDockerTelemetry(machine)
+      self._StartDockerNetwork(machine)
+      machine.RemoteCommand(f"mkdir -p {CADVISOR_DIR}")
+
+    self._enablePrometheusFirewallAccess(metrics_collect_machine)
+
+  def Start(self, _, benchmark_spec):
+    """
+    Start Telemetry
+
+    Args:
+      _: Unused parameter (unused_sender), that fits event connection "interface" (or signature)
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently running.
+    """
+    logging.info('Starting telemetry')
+    observed_machines, metrics_collect_machine = self._get_tracing_associated_machines(
+        benchmark_spec
+    )
+
+    observed_machines_addresses = []
+    for machine in observed_machines:
+      if machine == metrics_collect_machine:
+        observed_machines_addresses.append(CADVISOR_CONTAINER_NAME)  # scrape the same machine
+      else:
+        observed_machines_addresses.append(machine.internal_ip or machine.ip_address)
+
+    vm_util.RunThreaded(self._StartCAdvisorTelemetry, list(observed_machines))
+    self._StartPrometheusTelemetry(metrics_collect_machine, observed_machines_addresses)
+
+    self._start_tracing_epoch = time.time()
+
+  def After(self, _, benchmark_spec):
+    """
+    Fetch results and stop telemetry
+
+    Args:
+      _: Unused parameter (unused_sender), that fits event connection "interface" (or signature)
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently running.
+    """
+
+    self._stop_tracing_epoch = time.time()
+    self._FetchResults(benchmark_spec)
+    self._StopTelemetry(benchmark_spec)
+
+  def _StopTelemetry(self, benchmark_spec: BenchmarkSpec):
+    """Stops Telemetry on the VMs."""
+    logging.info('Stopping telemetry')
+
+    observed_machines, metrics_collect_machine = self._get_tracing_associated_machines(
+        benchmark_spec
+    )
+
+    vm_util.RunThreaded(self._StopPrometheusTelemetry, [metrics_collect_machine])
+    vm_util.RunThreaded(self._StopCAdvisorTelemetry, list(observed_machines))
+
+  def _FetchResults(self, benchmark_spec: BenchmarkSpec):
+    """Fetches telemetry results."""
+
+    observed_machines, prometheus_collector_machine = self._get_tracing_associated_machines(
+        benchmark_spec
+    )
+    logging.info('Collect metrics from machines')
+
+    for machine in observed_machines:
+      logging.info(f'Collect metrics from machine {machine.name}')
+      self.query_metrics(prometheus_collector_machine.ip_address, machine)
+
+  @staticmethod
+  def _InstallDockerTelemetry(vm):
+    """Installs telemetry tools for cAdvisor metrics on VM."""
+    logging.info('Installing docker environment')
+    vm.Install('docker_ce')
+
+  @staticmethod
+  def _enablePrometheusFirewallAccess(vm: BaseVirtualMachine):
+    """
+    Adds firewall rule to allow external access to Prometheus.
+    """
+    if vm.CLOUD in FIREWALL_PROTECTED_CSP:
+      logging.info(f"Open port {PROMETHEUS_PORT}")
+      vm.firewall.AllowPort(vm, PROMETHEUS_PORT)
+
+  @staticmethod
+  def _StartDockerNetwork(vm):
+    """Starts docker network for tracing."""
+    vm.RemoteCommand(f"docker network create {NETWORK_NAME}")
+
+  @staticmethod
+  def _StartCAdvisorTelemetry(vm):
+    """Starts telemetry tools for cAdvisor metrics on VM."""
+
+    logging.debug("Send perf config to cAdvisor")
+    perf_config_remote_path = posixpath.join(CADVISOR_DIR, PERF_CONFIG_NAME)
+    vm.RemoteHostCopy(FLAGS.perf_config_src, perf_config_remote_path)
+    logging.info(f"Perf configuration has been copied to cAdvisor machine {vm.ip_address}")
+
+    logging.debug(f"Start telemetry with cAdvisor on machine {vm.ip_address}")
+    vm.RemoteCommand(
+        "sudo docker run "
+        "-d "
+        "--privileged "
+        "--rm "
+        "--volume=/:/rootfs:ro "
+        "--volume=/var/run:/var/run:rw "
+        "--volume=/sys:/sys:ro "
+        "--volume=/var/lib/docker/:/var/lib/docker:ro "
+        "--volume=/dev/disk/:/dev/disk:ro "
+        f"--volume={perf_config_remote_path}:{CONTAINER_PERF_CONFIG_SRC}:ro "
+        f"--publish={CADVISOR_PORT}:{CADVISOR_PORT} "
+        f"--name={CADVISOR_CONTAINER_NAME} "
+        f"--network {NETWORK_NAME} "
+        "gcr.io/cadvisor/cadvisor:v0.40.0 "
+        f"-perf_events_config={CONTAINER_PERF_CONFIG_SRC} "
+        "--disable_metrics=accelerator,sched,process,disk,diskIO,hugetlb,resctrl,memory_numa,network,tcp,udp,referenced_memory,cpu_topology"
+    )
+
+  @staticmethod
+  def _StartPrometheusTelemetry(vm, addresses: List[str]):
+    """
+    Starts Prometheus tool for collecting cAdvisor metrics.
+
+    This function at first prepares configuration file for Prometheus. The config is based on the template.
+    Then, the actual service is started.
+    """
+
+    logging.info(f"Start telemetry collection with Prometheus on machine {vm.ip_address}")
+
+    addresses_with_ports = [f"{addr}:{CADVISOR_PORT}" for addr in addresses]
+    context = {
+        "targets_list": addresses_with_ports,
+        "scrape_interval": SCRAPE_INTERVAL,
+    }
+
+    prometheus_config_local_path = data.ResourcePath(PROMETHEUS_YAML_CONFIG)
+    prometheus_config_remote_path = posixpath.join(CADVISOR_DIR, PROMETHEUS_CONFIG_FILE_NAME)
+    vm.RenderTemplate(prometheus_config_local_path, prometheus_config_remote_path, context=context)
+    vm.RemoteCommand(f"chmod +r {prometheus_config_remote_path}")
+
+    vm.RemoteCommand(
+        "sudo docker run "
+        "-d "
+        "--rm "
+        f"-v {prometheus_config_remote_path}:/etc/prometheus/prometheus.yml "
+        "--publish=9090:9090 "
+        f"--network {NETWORK_NAME} "
+        "--name=prometheus "
+        "prom/prometheus:v2.28.1"
+    )
+
+  @staticmethod
+  def _StopCAdvisorTelemetry(vm):
+    """Stops cAdvisor metrics collection on the VM."""
+    logging.debug('Stopping cAdvisor telemetry')
+    vm.RemoteCommand("sudo docker stop cadvisor")
+    vm.RemoveFile(FLAGS.perf_config_src)
+
+  @staticmethod
+  def _StopPrometheusTelemetry(vm):
+    """Stops Prometheus scraping on the VM."""
+    logging.info('Stopping Prometheus telemetry')
+    vm.RemoteCommand("sudo docker stop prometheus")
+
+  @staticmethod
+  def _RemoveDockerNetwork(vm):
+    """Remove docker network for tracing."""
+    logging.info(f'Removing Docker newtwork {NETWORK_NAME}')
+    vm.RemoteCommand(f"docker network rm {NETWORK_NAME}")
+
+  def _get_tracing_associated_machines(
+      self, benchmark_spec: BenchmarkSpec
+  ) -> Tuple[Set[BaseVirtualMachine], BaseVirtualMachine]:
+    cadvisor_machines = self._get_observed_machines(benchmark_spec)
+    prometheus_machine = self._get_metrics_collect_machine(benchmark_spec)
+    return cadvisor_machines, prometheus_machine
+
+  @staticmethod
+  def _get_observed_machines(benchmark_spec: BenchmarkSpec) -> Set[BaseVirtualMachine]:
+    """
+    Get machines that will have cAdvisor installed.
+
+    When workload has only one machine:
+    Return the only one machine.
+
+    When workload contains many machines:
+    Take machines pointed out by --trace_vm_groups flag.
+    """
+
+    benchmark_groups = benchmark_spec.config.vm_groups
+    group_names = None
+
+    if len(benchmark_groups) == 1:
+      group_names = list(benchmark_groups)[0]
+
+    if len(benchmark_groups) > 1:
+      group_names = FLAGS.trace_vm_groups  # If flag not given, will trace all VMs
+
+    observed_machines = trace_util.GetVMsToTrace(benchmark_spec, group_names)
+    observed_machines = set(observed_machines)
+    return observed_machines
+
+  @staticmethod
+  def _get_metrics_collect_machine(benchmark_spec: BenchmarkSpec) -> BaseVirtualMachine:
+    """
+    Get machine that will have installed Prometheus for collecting metrics.
+
+    When workload has only one machine:
+    Return the only one machine.
+
+    When workload contains many machines:
+    Take machines pointed out by --trace_vm_groups flag.
+    """
+
+    benchmark_groups = benchmark_spec.config.vm_groups
+    group_name = None
+
+    if len(benchmark_groups) == 1:
+      group_name = list(benchmark_groups)[0]
+
+    if len(benchmark_groups) > 1:
+      if not FLAGS.cadvisor_prometheus_group:
+        raise Setup.InvalidFlagConfigurationError(
+            "For multi-vm environments, you must select machine for Prometheus with --cadvisor_prometheus_group flag."
+        )
+      group_name = FLAGS.cadvisor_prometheus_group
+
+    collector_machine = trace_util.GetVMsToTrace(benchmark_spec, group_name)[0]
+    return collector_machine
+
+  def query_metrics(self, prometheus_endpoint, vm):
+    """ For given Prometheus URL and traced machine, get metrics and save as JSON. """
+
+    metrics_list = ["cadvisor_container_perf_events_total", "cadvisor_container_perf_uncore_events_total"]
+
+    results_path = os.path.join(vm_util.GetTempDir(), vm.name + '-cadvisor')
+    cmd = ['mkdir', '-p', results_path]
+    vm_util.IssueCommand(cmd)
+
+    vm_url = vm.internal_ip or vm.ip_address
+    if vm.ip_address == prometheus_endpoint:
+      vm_url = CADVISOR_CONTAINER_NAME
+
+    for metric_name in metrics_list:
+      params = {
+          "query": f'{metric_name}{{instance="{vm_url}:{CADVISOR_PORT}"}}',
+          "start": self._start_tracing_epoch,
+          "end": self._stop_tracing_epoch,
+          "step": str(calculate_step(self._start_tracing_epoch, self._stop_tracing_epoch))
+      }
+      output_file_path = f"{vm.name}-{metric_name}-cadvisor.json"
+      response = requests.get('http://' + prometheus_endpoint + f":{PROMETHEUS_PORT}" + "/api/v1/query_range", params=params)
+      if response.status_code == 200:
+        print(f'Writing {metric_name}')
+        data = response.json()
+        if (metric_name == 'cadvisor_container_perf_events_total'):
+          group_core(data)
+        elif (metric_name == 'cadvisor_container_perf_uncore_events_total'):
+          group_uncore(data)
+        data.update({'query_metric': metric_name})
+        with jsonlines.open(os.path.join(results_path, output_file_path), mode='w') as writer:
+          writer.write(data)
+      else:
+        print(f'{metric_name} query failed with code {response.status_code}')
+
+  def Remove(self, unused_sender, benchmark_spec):
+    """Remove Docker leftovers from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    observed_machines, metrics_collect_machine = self._get_tracing_associated_machines(
+        benchmark_spec
+    )
+    machines_with_docker = observed_machines | {metrics_collect_machine}
+    vm_util.RunThreaded(self._RemoveDockerNetwork, list(machines_with_docker))
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.cadvisor is set."""
+  if not parsed_flags.cadvisor:
+    return
+  logging.info('Registering telemetry collector - cAdvisor')
+  telemetry_collector = CadvisorCollector()
+  events.before_phase.connect(telemetry_collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(telemetry_collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(telemetry_collector.After, stages.RUN, weak=False)
+  events.after_phase.connect(telemetry_collector.Remove, stages.CLEANUP, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.cadvisor
+
+
+def _group_results(results: list, custom_metric_key) -> dict:
+  """
+  Helper function that groups `results` similar by `instance`, `event` and one additional metric.
+
+  This function extracts part of common logic from `group_core()` and `group_uncore()` functions.
+  Grouping is done with building a tree with values of the indices mentioned above. For every record
+  with matching indices, timestamped record values are summed.
+  """
+
+  values = {}
+  epochs = []
+
+  for time_value in results[0]['values']:
+    epochs.append(time_value[0])
+
+  for record in results:
+    instance = record['metric']['instance']
+    event = record['metric']['event']
+    custom_metric = record['metric'][custom_metric_key]
+
+    if instance not in values:
+      values[instance] = {}
+    if event not in values[instance]:
+      values[instance][event] = {}
+    if custom_metric not in values[instance][event]:
+      values[instance][event][custom_metric] = {}
+
+    for time_val in epochs:
+      if time_val not in values[instance][event][custom_metric]:
+        values[instance][event][custom_metric][time_val] = 0
+
+    for sample_record in record["values"]:
+      sample_record_timestamp = sample_record[0]
+      sample_record_value = sample_record[1]
+      values[instance][event][custom_metric][sample_record_timestamp] += float(sample_record_value)
+
+  return values
+
+
+def group_core(traces_data):
+  """
+  Group data for core metrics.
+
+  This function groups metrics by following indices:
+  * `id`
+  * 'event'
+  * 'instance'
+  * `cpu`
+
+  When `id` belongs to {'/', '/system.slice'}, values of same indices are aggregated with SUM
+  and saved with `id` equal to '/'. Otherwise the records are left unchanged.
+
+  This function is based on WOS script.
+  """
+
+  _IDS_TO_GROUP = {"/", "/system.slice"}
+
+  if traces_data == {}:
+    logging.error("No core data collected, check cadvisor availability on target")
+    return
+
+  results = traces_data['data']['result']
+  if len(results) == 0:
+    logging.warning("No core data collected, check cadvisor availability on target")
+    return
+
+  filtered_results = list(filter(lambda result: result["metric"]["id"] in _IDS_TO_GROUP, results))
+  values = _group_results(filtered_results, 'cpu')
+
+  new_data = list(filterfalse(lambda result: result["metric"]["id"] in _IDS_TO_GROUP, results))
+  for instance in values:
+    for event in values[instance]:
+      for cpu_num in values[instance][event]:
+        new_data.append(
+            {
+                'metric': {
+                    '__name__': 'cadvisor_container_perf_events_total',
+                    'cpu': cpu_num,
+                    'event': event,
+                    'id': '/',
+                    'instance': instance,
+                    'job': 'cadvisor'
+                },
+                'values': [
+                    [time_val, str(value)] for time_val, value in values[instance][event][cpu_num].items()
+                ]
+            }
+        )
+  traces_data['data']['result'] = new_data
+
+
+def group_uncore(traces_data):
+  """
+  Groups data for uncore metrics.
+
+  This function groups metrics by following indices:
+  * `socket`
+  * 'event'
+  * 'instance'
+
+  Grouping aggregates data with SUM across different values of `pmu` field, which is removed after grouping.
+
+  This function is based on WOS script.
+  """
+
+  if traces_data == {}:
+    logging.error("No core data collected, check cadvisor availability on target")
+    return
+
+  results = traces_data['data']['result']
+  if len(results) == 0:
+    logging.warning("No core data collected, check cadvisor availability on target")
+    return
+
+  values = _group_results(results, 'socket')
+
+  new_data = []
+  for instance in values:
+    for event in values[instance]:
+      for socket in values[instance][event]:
+        new_data.append(
+            {
+                'metric': {
+                    '__name__': 'cadvisor_container_perf_uncore_events_total',
+                    'event': event,
+                    'id': '/',
+                    'instance': instance,
+                    'job': 'cadvisor',
+                    'socket': socket
+                },
+                'values': [
+                    [time_val, str(value)] for time_val, value in values[instance][event][socket].items()
+                ]
+            }
+        )
+
+  traces_data['data']['result'] = new_data
+
+
+def calculate_step(start, end) -> int:
+  """
+  Find calculation step in seconds so that number of samples doesn't exceed `MAX_NUM_OF_SAMPLES`.
+
+  Number of samples is limited so as metric files doesn't become too large. If concerned about
+  missing potential bottlenecks, consider return const value.
+  """
+
+  duration = end - start
+  logging.info("test duration was: {}s".format(duration))
+
+  if duration > SCRAPE_INTERVAL * MAX_NUM_OF_SAMPLES:
+    step = int(duration / MAX_NUM_OF_SAMPLES)
+    return step
+  else:
+    return SCRAPE_INTERVAL
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/collectd.py b/script/cumulus/pkb/perfkitbenchmarker/traces/collectd.py
new file mode 100755
index 0000000..cb7f7e1
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/collectd.py
@@ -0,0 +1,146 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Records system performance counters during benchmark runs using collectd.
+
+http://collectd.org
+"""
+
+import logging
+import os
+import posixpath
+from absl import flags
+from perfkitbenchmarker import events
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import trace_util
+from perfkitbenchmarker.linux_packages import collectd
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean('collectd', False,
+                     'Install and run collectd on the guest.')
+flags.DEFINE_string('collectd_output', None, 'Path to store collectd results.')
+
+
+class _CollectdCollector(object):
+  """Manages running collectd during a test, and fetching the CSV results."""
+
+  def __init__(self, target_dir):
+    self.target_dir = target_dir
+
+  def _FetchResults(self, vm):
+    """Fetches collectd CSV results."""
+    logging.info('Fetching collectd results')
+    # On the remote host, CSV files are in:
+    # self.csv_dir/<fqdn>/<category>.
+    # Since AWS VMs have a FQDN different from the VM name, we rename locally.
+    archive_name = vm.name + '-collectd.tar.gz'
+    vm.RemoteCommand('tar -czvf {0} -C {1} .'.format(archive_name, collectd.CSV_DIR))
+    local_dir = os.path.join(self.target_dir, vm.name + '-collectd')
+    cmd = ["mkdir",
+           "-p",
+           local_dir]
+    vm_util.IssueCommand(cmd, raise_on_failure=False)
+    vm.PullFile(local_dir, archive_name)
+    cmd = ['tar',
+           '-xzvf',
+           posixpath.join(local_dir, archive_name),
+           '-C',
+           local_dir,
+           '--strip',
+           '2']
+    vm_util.IssueCommand(cmd)
+    logging.info('Removing collectd data from target.')
+    vm.RemoteCommand('rm -rf {}'.format(collectd.CSV_DIR))
+    vm.RemoteCommand('rm -rf pkb-*-collectd.tar.gz')
+    if not FLAGS.trace_skip_cleanup:
+      vm.RemoteCommand("sudo rm -rf {}".format(collectd.COLLECTD_DIR))
+
+
+  def _StartCollectd(self, vm):
+    """Starts collectd on the VM."""
+    logging.info('Starting collectd')
+    collectd.Start(vm)
+
+  def _StopCollectd(self, vm):
+    """Stops collectd on the VM."""
+    logging.info('Stopping collectd')
+    collectd.Stop(vm)
+
+  def _InstallCollectd(self, vm):
+    """Installs collect on VM."""
+    logging.info('Installing collectd')
+    vm.Install('collectd')
+
+  def _PrepareCollectd(self, vm, benchmark_spec):
+    """Prepares collect on VM."""
+    logging.info('Preparing collectd')
+    collectd.Prepare(vm, benchmark_spec)
+
+  def Install(self, unused_sender, benchmark_spec):
+    """Install, prepare, and start collectd.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    # install
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._InstallCollectd, vms)
+
+    # prepare
+    prepare_params = [((vm,), {"benchmark_spec": benchmark_spec}) for vm in vms]
+
+    vm_util.RunThreaded(self._PrepareCollectd, prepare_params)
+
+  def Start(self, unused_sender, benchmark_spec):
+    """Start collectd
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._StartCollectd, vms)
+
+  def After(self, unused_sender, benchmark_spec):
+    """Stop collectd, fetch results from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._StopCollectd, vms)
+    vm_util.RunThreaded(self._FetchResults, vms)
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.collectd is set."""
+  if not parsed_flags.collectd:
+    return
+
+  logging.info('Registering collectd collector')
+
+  output_directory = parsed_flags.collectd_output or vm_util.GetTempDir()
+  if not os.path.isdir(output_directory):
+    raise IOError('collectd output directory does not exist: {0}'.format(
+        output_directory))
+  collector = _CollectdCollector(output_directory)
+  events.before_phase.connect(collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(collector.After, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.collectd
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/emon.py b/script/cumulus/pkb/perfkitbenchmarker/traces/emon.py
new file mode 100755
index 0000000..a42b9fd
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/emon.py
@@ -0,0 +1,119 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""
+Records system performance counters during benchmark runs using emon.
+
+Usage:
+  Required flags:
+  --emon
+
+  Example:
+  ./pkb.py --cloud=AWS --machine_type=m5.metal --benchmarks=intel_stress_ng --emon
+
+  Refer to ./perfkitbenchmarker/data/emon/README.md for more details on flags and usage
+
+"""
+
+import functools
+import logging
+import os
+import posixpath
+import pdb
+
+from perfkitbenchmarker import events, stages
+from absl import flags
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import trace_util
+from perfkitbenchmarker.linux_packages import emon
+
+FLAGS = flags.FLAGS
+flags.DEFINE_boolean('emon', False,
+                     'Install and run emon on each of the system-under-test (SUT) including the client(s) machines')
+
+
+class _EmonCollector(object):
+
+  def _InstallEmon(self, vm):
+    # calling into linux_packages.emon, either YumInstall or AptInstall
+    # Do not Install Emon if trace_Skip_install is True and if EMON Dirs exist
+    if FLAGS.trace_skip_install and emon.EmonDirsExist(vm):
+      logging.info('Emon directories present...skipping Emon Installation')
+      return
+    vm.Install('emon')
+
+  def Install(self, unused_sender, benchmark_spec):
+    """Install emon
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._InstallEmon, vms)
+
+
+  def Start(self, unused_sender, benchmark_spec):
+    """Start running emon
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(emon.Start, vms)
+
+  def After(self, unused_sender, benchmark_spec):
+    """Stop emon, fetch results from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(emon.Stop, vms)
+    emon_fetch_fns = [functools.partial(emon.FetchResults, vm) for vm in vms]
+    vm_util.RunThreaded(lambda f: f(), emon_fetch_fns)
+    emon_version = emon.GetEmonVersion(vms[0])
+    edp_version = emon.GetEDPVersion(vms[0])
+    benchmark_spec.software_config_metadata.update({'emon_version': emon_version,
+                                                    'edp_version': edp_version})
+
+
+  def Remove(self, unused_sender, benchmark_spec):
+    """Stop emon, fetch results from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    if not FLAGS.trace_skip_cleanup:
+      vm_util.RunThreaded(emon.EmonCleanup, vms)
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.emon is set."""
+  if not parsed_flags.emon:
+    return
+
+  logging.info('Registering emon collector')
+  collector = _EmonCollector()
+
+  events.before_phase.connect(collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(collector.After, stages.RUN, weak=False)
+  events.before_phase.connect(collector.Remove, stages.CLEANUP, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.emon
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/intel_perfspect.py b/script/cumulus/pkb/perfkitbenchmarker/traces/intel_perfspect.py
new file mode 100644
index 0000000..689c3f9
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/intel_perfspect.py
@@ -0,0 +1,210 @@
+# PerfSpect-PKB integration
+"""
+Intel PerfSpect is a system performance profiling and processing tool based on linux perf.
+
+Usage:
+  Required flags:
+  --intel_perfspect
+
+  Example:
+  ./pkb.py --cloud=AWS --benchmarks=sysbench_cpu --machine_type=m5.2xlarge --os_type=ubuntu2004 --intel_perfspect
+
+  Refer to ./perfkitbenchmarker/data/intel_perfspect/README.md for more details on flags and usage
+"""
+
+import logging
+import os
+import posixpath
+
+from absl import flags
+from six.moves.urllib.parse import urlparse
+
+from perfkitbenchmarker import events, stages
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import data
+from perfkitbenchmarker import trace_util
+from perfkitbenchmarker import os_types
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_boolean('intel_perfspect', False,
+                     'Install and run Intel perfspect on the target system.')
+flags.DEFINE_string('intel_perfspect_tarball', None,
+                    'Local path to Intel perfspect tarball.')
+flags.DEFINE_string('intel_perfspect_url', None,
+                    'URL for downloading Intel perfspect tarball.')
+
+PERFSPECT_ARCHIVE_URL = "https://cumulus.s3.us-east-2.amazonaws.com/perfspect/perfspect_internal_1.3.0.tgz"
+PREREQ_UBUNTU = ["linux-tools-common",
+                 "linux-tools-generic",
+                 "linux-tools-`uname -r`"]
+PREREQ_CENTOS = ["perf"]
+PREREQ_PKGS = ["python3-pip"]
+
+
+class PerfspectCollector(object):
+  """ Manages running telemetry during a test, and fetching the results folder. """
+
+  telemetry_dir = "/opt/perf_telemetry"
+
+  def __init__(self):
+    self.pid = None
+    self.perf_dir = None
+
+  def _InstallOSReqs(self, vm):
+    """ Installs prereqs depending on the OS """
+    if vm.OS_TYPE in os_types.LINUX_OS_TYPES:
+      if vm.OS_TYPE.find('ubuntu') >= 0:
+        vm.InstallPackages(' '.join(PREREQ_UBUNTU))
+      elif vm.OS_TYPE.find('centos') >= 0:
+        vm.InstallPackages(' '.join(PREREQ_CENTOS))
+    else:
+      raise errors.VirtualMachine.VirtualMachineError('OS not supported')
+
+  def _InstallTelemetry(self, vm):
+    """ Installs PerfSpect telemetry on the VM. """
+    logging.info('Installing PerfSpect on VM')
+    self._InstallOSReqs(vm)
+    vm.InstallPackages(' '.join(PREREQ_PKGS))
+    vm.RemoteCommand(' '.join(["sudo", "rm", "-rf", self.telemetry_dir]))
+    vm.RemoteCommand(' '.join(["sudo", "mkdir", "-p", self.telemetry_dir]))
+    vm.PushFile(self.perf_dir)
+    vm.RemoteCommand(' '.join(["sudo", "cp", "-r", "./perfspect", self.telemetry_dir + "/"]))
+
+  def _StartTelemetry(self, vm):
+    """ Starts PerfSpect telemetry on the VM. """
+    try:
+      # verify perf binary is executable
+      vm.RemoteCommand('perf list')
+    except errors.VirtualMachine.RemoteCommandError as ex:
+      logging.exception('Failed executing perf. Is it installed?')
+      raise ex
+    perf_collect_file = posixpath.join(self.telemetry_dir, 'perfspect', 'perf-collect.sh')
+    vm.RemoteCommand('sudo chmod +x {0}'.format(perf_collect_file))
+    collect_cmd = ['cd', posixpath.join(self.telemetry_dir, 'perfspect'), '&&', 'sudo', './perf-collect.sh']
+    stdout, _ = vm.RemoteCommand(' '.join(collect_cmd), should_log=True)
+    self.pid = stdout.strip()
+    logging.debug("pid of PerfSpect collector process: {0}".format(self.pid))
+
+  def _StopTelemetry(self, vm):
+    """ Stops PerfSpect telemetry on the VM. """
+    logging.info('Stopping PerfSpect telemetry')
+    vm.RemoteCommand('sudo pkill -9 -x perf')
+    logging.debug('Waiting until the process is killed')
+    wait_cmd = ['tail', '--pid=' + self.pid, '-f', '/dev/null']
+    vm.RemoteCommand(' '.join(wait_cmd))
+    logging.info('Post processing PerfSpect raw metrics')
+    postprocess_cmd = ['cd', posixpath.join(self.telemetry_dir, 'perfspect'), '&&', 'sudo', './perf-postprocess',
+                       '-r', 'results/perfstat.csv']
+    vm.RemoteCommand(' '.join(postprocess_cmd))
+
+  def _FetchResults(self, vm):
+    """ Fetches PerfSpect telemetry results. """
+    logging.info('Fetching PerfSpect telemetry results')
+    perfspect_dir = '~/' + vm.name + '-perfspect'
+    vm.RemoteCommand(('mkdir {0} ').format(perfspect_dir))
+    vm.RemoteCommand(' '.join(["sudo", "cp", "-r", posixpath.join(self.telemetry_dir, 'perfspect', 'results', '*'),
+                     perfspect_dir]))
+    vm.RemoteCopy(vm_util.GetTempDir(), perfspect_dir, False)
+    logging.info('PerfSpect results copied')
+
+  def _CleanupTelemetry(self, vm):
+    """ PerfSpect cleanup routines """
+    logging.info('Removing PerfSpect leftover files')
+    vm_util.IssueCommand(["rm", "-rf", self.perf_dir, self.perfspect_archive])
+    vm.RemoteCommand(' '.join(["sudo", "rm", "-rf", "~/*perfspect"]))
+    if not FLAGS.trace_skip_cleanup:
+      logging.info('Removing PerfSpect from VM')
+      vm.RemoteCommand(' '.join(["sudo", "rm", "-rf", self.telemetry_dir]))
+
+  def _GetLocalArchive(self):
+    """ Gets the local path of the PerfSpect archive. """
+    if FLAGS.intel_perfspect_tarball:
+      logging.info("intel_perfspect_tarball specified: {}".format(FLAGS.intel_perfspect_tarball))
+      local_archive_path = FLAGS.intel_perfspect_tarball
+    else:
+      url = FLAGS.intel_perfspect_url or PERFSPECT_ARCHIVE_URL
+      logging.info("downloading PerfSpect from: {}".format(url))
+      filename = os.path.basename(urlparse(url).path)
+      local_archive_path = posixpath.join(vm_util.GetTempDir(), filename)
+      vm_util.IssueCommand(["curl", "-k", "-L", "-o", local_archive_path, url], timeout=None)
+    return local_archive_path
+
+  def _PerfspectDirExist(self, vms):
+    for vm in vms:
+      _, _, retVal = vm.RemoteCommandWithReturnCode('test -d {0}'.format(self.telemetry_dir),
+                                                    ignore_failure=True)
+      # if perfspect directory does not exist on any vm, we need install
+      if retVal != 0:
+        return False
+    # if exist on all vms
+    return True
+
+  def Install(self, unused_sender, benchmark_spec):
+    """ Installs PerfSpect Telemetry.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    logging.info('Installing PerfSpect telemetry')
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    if FLAGS.trace_skip_install and self._PerfspectDirExist(vms):
+      logging.info('Skipping PerfSpect telemetry installation')
+      return
+
+    self.perf_dir = posixpath.join(vm_util.GetTempDir(), 'perfspect')
+    self.perfspect_archive = self._GetLocalArchive()
+    vm_util.IssueCommand(["tar", "-C", vm_util.GetTempDir(), "-xf", self.perfspect_archive])
+    vm_util.IssueCommand(['cp', data.ResourcePath(posixpath.join('intel_perfspect', 'perf-collect.sh')),
+                          self.perf_dir + "/"])
+    vm_util.RunThreaded(self._InstallTelemetry, vms)
+
+  def Start(self, unused_sender, benchmark_spec):
+    """ Starts PerfSpect Telemetry
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    logging.info('Starting PerfSpect telemetry')
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._StartTelemetry, vms)
+
+  def After(self, unused_sender, benchmark_spec):
+    """ Stops PerfSpect telemetry, fetch results from VM(s).
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._StopTelemetry, vms)
+    vm_util.RunThreaded(self._FetchResults, vms)
+
+  def Remove(self, unused_sender, benchmark_spec):
+    """Remove PerfSpect from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    vm_util.RunThreaded(self._CleanupTelemetry, vms)
+
+
+def Register(parsed_flags):
+  """ Registers the PerfSpect collector if FLAGS.intel_perfspect is set. """
+  if not parsed_flags.intel_perfspect:
+    return
+  logging.info('Registering PerfSpect telemetry collector')
+  telemetry_collector = PerfspectCollector()
+  events.before_phase.connect(telemetry_collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(telemetry_collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(telemetry_collector.After, stages.RUN, weak=False)
+  events.before_phase.connect(telemetry_collector.Remove, stages.CLEANUP, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.intel_perfspect
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/mpstat.py b/script/cumulus/pkb/perfkitbenchmarker/traces/mpstat.py
new file mode 100644
index 0000000..001bf8f
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/mpstat.py
@@ -0,0 +1,447 @@
+# Copyright 2021 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Records system performance counters during benchmark runs using mpstat.
+
+This collector collects activities for processors using mpstat.
+Samples are reported in the form of mpstat_{metric} or mpstat_avg_{metric}.
+mpstat_{metric} is the reported {metric} for the given mpstat_interval and
+mpstat_count for a specific cpu. The cpu id is reported in the sample metadata.
+mpstat_avg_{metric} is the average of {metric} over all cpus.
+Currently, only aggregated statistics are reported. Specifically, intr/s, %usr,
+%nice, %sys, %iowait, %irq, %soft, %steal, %guest, %idle. Individual stats can
+be added later if needed.
+
+Currently reported stats:
+%usr: % CPU utilization that occurred while executing at the user level
+(application).
+%nice: % CPU utilization that occurred while executing at the user level with
+nice priority.
+%sys: % CPU utilization that occurred while executing at the system level
+(kernel). Note that this does not include time spent servicing hardware and
+software interrupts.
+%iowait: % of time that the CPU or CPUs were idle during which the system had an
+outstanding disk I/O request.
+%irq: % of time spent by the CPU or CPUs to service hardware interrupts.
+%soft: % of time spent by the CPU or CPUs to service software interrupts.
+%steal: % of time spent in involuntary wait by the virtual CPU or CPUs while the
+hypervisor was servicing another virtual processor.
+%guest: % of time spent by the CPU or CPUs to run a virtual processor.
+%idle: % of time that the CPU or CPUs were idle and the system did not have an
+outstanding disk I/O request.
+
+For more details, see https://linux.die.net/man/1/mpstat.
+
+"""
+
+
+import datetime
+import json
+import logging
+import os
+from typing import Any, Dict, List, Optional
+
+from absl import flags
+from perfkitbenchmarker import events
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.traces import base_collector
+import six
+
+_MPSTAT = flags.DEFINE_boolean(
+    'mpstat', False, 'Run mpstat (https://linux.die.net/man/1/mpstat) '
+    'to collect system performance metrics during benchmark run.')
+_MPSTAT_BREAKDOWN = flags.DEFINE_enum(
+    'mpstat_breakdown', 'SUM', ['SUM', 'CPU', 'ALL'],
+    'Level of aggregation for statistics. Accepted '
+    'values are "SUM", "CPU", "ALL". Defaults to SUM. See '
+    'https://linux.die.net/man/1/mpstat for details.')
+_MPSTAT_CPUS = flags.DEFINE_string(
+    'mpstat_cpus', 'ALL', 'Comma delimited string of CPU ids or ALL. '
+    'Defaults to ALL.')
+_MPSTAT_INTERVAL = flags.DEFINE_integer(
+    'mpstat_interval', 1,
+    'The amount of time in seconds between each mpstat report.'
+    'Defaults to 1.')
+_MPSTAT_COUNT = flags.DEFINE_integer(
+    'mpstat_count', 1, 'The number of reports generated at interval apart.'
+    'Defaults to 1.')
+_MPSTAT_PUBLISH = flags.DEFINE_boolean(
+    'mpstat_publish', False,
+    'Whether to publish mpstat statistics.')
+_MPSTAT_PUBLISH_PER_INTERVAL_SAMPLES = flags.DEFINE_boolean(
+    'mpstat_publish_per_interval_samples', False,
+    'Whether to publish a separate mpstat statistics sample '
+    'for each interval. If True, --mpstat_publish must be True.')
+
+FLAGS = flags.FLAGS
+
+_TWENTY_THREE_HOURS_IN_SECONDS = 23 * 60 * 60
+
+flags.register_validator(
+    _MPSTAT_INTERVAL.name,
+    lambda value: value < _TWENTY_THREE_HOURS_IN_SECONDS,
+    message=('If --mpstat_interval must be less than 23 hours (if it\'s set '
+             'near or above 24 hours, it becomes hard to infer sample '
+             'timestamp from mpstat output.'))
+
+flags.register_validator(
+    _MPSTAT_PUBLISH_PER_INTERVAL_SAMPLES.name,
+    lambda value: FLAGS.mpstat_publish or not value,
+    message=('If --mpstat_publish_per_interval is True, --mpstat_publish must '
+             'be True.'))
+
+
+def _ParseStartTime(output: str) -> float:
+  """Parse the start time of the mpstat report.
+
+  Args:
+    output: output of mpstat
+
+  Returns:
+    An integer representing the unix time at which the first sample in the
+      report was run.
+
+  Example input:
+    third_party/py/perfkitbenchmarker/tests/data/mpstat_output.json
+
+  """
+  hosts = output['sysstat']['hosts']
+  date = hosts[0]['date']
+  time = hosts[0]['statistics'][0]['timestamp']
+  # TODO(user): handle malformed json output from mpstat
+  start_datetime_string = ' '.join([date, time])
+  # As a sysstat utility, this is printed in UTC by default
+  start_datetime = datetime.datetime.strptime(
+      start_datetime_string,
+      '%Y-%m-%d %H:%M:%S').replace(tzinfo=datetime.timezone.utc)
+  return start_datetime.timestamp()
+
+
+def _GetCPUMetrics(host_stats):
+  """Generate list of metrics that we want to publish.
+
+  Args:
+    host_stats: List of mpstat reports.
+
+  Returns:
+    List of metrics that we want to publish.
+  """
+  cpu_metrics = []
+  for cpu_metric in host_stats[0]['cpu-load'][0]:
+    # we don't want to generate a sample for cpu - cpu_id.
+    if cpu_metric == 'cpu':
+      continue
+    cpu_metrics.append(cpu_metric)
+  return cpu_metrics
+
+
+def _GetCPUAverageMetrics(
+    host_stats: List[Dict[str, Any]],
+    number_of_cpus: int,
+    metadata: Dict[str, Any],
+    timestamp: Optional[float] = None):
+  """Get average metrics for all CPUs.
+
+  Args:
+    host_stats: List of mpstat reports.
+    number_of_cpus: how many CPUs are being used.
+    metadata: metadata of the sample.
+    timestamp: timestamp of the sample.
+
+  Returns:
+    List of samples - containing the average metrics for all CPUs.
+
+  input data:
+  [
+    {
+      "timestamp": "22:05:29",
+      "cpu-load": [
+        {"cpu": "-1", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00},
+        {"cpu": "0", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00},
+        {"cpu": "1", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00}
+      ]
+      ...
+    }, {
+      "timestamp": "22:05:31",
+      "cpu-load": [
+        {"cpu": "-1", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00},
+        {"cpu": "0", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00},
+        {"cpu": "1", "usr": 100.00, "nice": 0.00, "sys": 0.00, "iowait": 0.00,
+        "irq": 0.00, "soft": 0.00, "steal": 0.00, "guest": 0.00, "gnice": 0.00,
+        "idle": 0.00}
+      ]
+      ...
+    }
+  ]
+  """
+  samples = []
+  cpu_metrics = _GetCPUMetrics(host_stats)
+  for cpu_id in range(-1, number_of_cpus):
+    for cpu_metric in cpu_metrics:
+      measurements = []
+      for report in host_stats:
+        value = report['cpu-load'][cpu_id + 1][cpu_metric]
+        measurements.append(value)
+      average = sum(measurements) / len(measurements)
+      metric_name = 'mpstat_avg_' + cpu_metric
+      meta = metadata.copy()
+      meta['mpstat_cpu_id'] = cpu_id
+      samples.append(sample.Sample(
+          metric=metric_name,
+          value=average,
+          unit='%',
+          metadata=meta,
+          timestamp=timestamp))
+  return samples
+
+
+def _GetCPUAverageInterruptions(
+    host_stats: List[Dict[str, Any]],
+    number_of_cpus: int,
+    metadata: Dict[str, Any],
+    timestamp: Optional[float] = None):
+  """Get average interruption for all CPUs.
+
+  Args:
+    host_stats: List of mpstat reports.
+    number_of_cpus: how many CPUs are being used.
+    metadata: metadata of the sample.
+    timestamp: timestamp of the sample.
+
+  Returns:
+    List of samples - containing the average metrics for all CPUs.
+
+  input data:
+  [
+    {
+      "timestamp": "22:05:29",
+      "sum-interrupts": [
+        {"cpu": "all", "intr": 274.77},
+        {"cpu": "0", "intr": 264.27},
+        {"cpu": "1", "intr": 15.45}
+      ],
+      ...
+    }, {
+      "timestamp": "22:05:31",
+      "sum-interrupts": [
+        {"cpu": "all", "intr": 273.75},
+        {"cpu": "0", "intr": 264.73},
+        {"cpu": "1", "intr": 13.30}
+      ],
+      ...
+    }
+  ]
+  """
+  samples = []
+  for cpu_id in range(number_of_cpus + 1):
+    measurements = []
+    for report in host_stats:
+      value = report['sum-interrupts'][cpu_id]['intr']
+      measurements.append(value)
+    average = sum(measurements) / len(measurements)
+    metric_name = 'mpstat_avg_intr'
+    meta = metadata.copy()
+    meta['mpstat_cpu_id'] = cpu_id - 1
+    samples.append(sample.Sample(
+        metric=metric_name,
+        value=average,
+        unit='interrupts/sec',
+        metadata=meta,
+        timestamp=timestamp))
+  return samples
+
+
+def _GetPerIntervalSamples(
+    host_stats: List[Dict[str, Any]],
+    metadata: Dict[str, Any],
+    start_timestamp: int,
+    interval: int) -> List[sample.Sample]:
+  """Generate samples for all CPU related metrics in every run of mpstat.
+
+  Args:
+    host_stats: List of mpstat reports.
+    metadata: metadata of the sample.
+    start_timestamp: a unix timestamp representing the start of the first
+      reporting period.
+    interval: the interval between mpstat reports
+
+  Returns:
+    a list of samples to publish
+
+  Because individual reports only have time (without a date), here we generate
+  the timestamp based on the number of intervals that have passed in order to
+  guarantee correct behavior if mpstat is run for more than 1 day.
+  """
+  samples = []
+  cpu_metrics = _GetCPUMetrics(host_stats)
+  for ordinal, host_stat in enumerate(host_stats):
+    sample_timestamp = start_timestamp + (ordinal * interval)
+    for cpu_metric in cpu_metrics:
+      for cpu in host_stat['cpu-load']:
+        metric_name = 'mpstat_avg_' + cpu_metric
+        cpu_id = int(cpu['cpu'])
+        metric_value = cpu[cpu_metric]
+        meta = metadata.copy()
+        meta['mpstat_cpu_id'] = cpu_id
+        meta['ordinal'] = ordinal
+        samples.append(sample.Sample(
+            metric=metric_name,
+            value=metric_value,
+            unit='%',
+            metadata=meta,
+            timestamp=sample_timestamp))
+  return samples
+
+
+def _MpstatResults(
+    metadata: Dict[str, Any],
+    output: Dict[str, Any],
+    interval: int,
+    per_interval_samples: bool = False,):
+  """Parses and appends mpstat results to the samples list.
+
+  Args:
+    metadata: metadata of the sample.
+    output: output of mpstat in JSON format
+    interval: the interval between mpstat reports; required if
+      per_interval_samples is True
+    per_interval_samples: whether a sample per interval should be published
+
+  Returns:
+    List of samples.
+  """
+  start_timestamp = _ParseStartTime(output)
+  samples = []
+  hosts = output['sysstat']['hosts']
+
+  for host in hosts:
+    host_stats = host['statistics']
+    number_of_cpus = host['number-of-cpus']
+    metadata['nodename'] = host['nodename']
+
+    samples += _GetCPUAverageMetrics(
+        host_stats,
+        number_of_cpus,
+        metadata,
+        start_timestamp)
+
+    samples += _GetCPUAverageInterruptions(
+        host_stats,
+        number_of_cpus,
+        metadata,
+        start_timestamp)
+
+    if per_interval_samples:
+      samples += _GetPerIntervalSamples(
+          host_stats,
+          metadata,
+          start_timestamp,
+          interval)
+
+  return samples
+
+
+class MpstatCollector(base_collector.BaseCollector):
+  """mpstat collector.
+
+  Installs and runs mpstat on a collection of VMs.
+  """
+
+  def __init__(
+      self,
+      interval=None,
+      output_directory=None,
+      per_interval_samples=False):
+    super().__init__(interval, output_directory=output_directory)
+    self.per_interval_samples = per_interval_samples
+
+  def _CollectorName(self):
+    return 'mpstat'
+
+  def _InstallCollector(self, vm):
+    vm.InstallPackages('sysstat')
+
+  def _CollectorRunCommand(self, vm, collector_file):
+    # We set the environment variable S_TIME_FORMAT=ISO to ensure consistent
+    # time formatting from mpstat
+    return ('export S_TIME_FORMAT=ISO; mpstat -I {breakdown} -u -P '
+            '{processor_number} {interval} {count} -o JSON > {output} 2>&1 &'
+            .format(
+                breakdown=FLAGS.mpstat_breakdown,
+                processor_number=FLAGS.mpstat_cpus,
+                interval=self.interval,
+                count=FLAGS.mpstat_count,
+                output=collector_file))
+
+  def Analyze(self, sender, benchmark_spec, samples):
+    """Analyze mpstat file and record samples.
+
+    Args:
+      sender: event sender for collecting stats.
+      benchmark_spec: benchmark_spec of this run.
+      samples: samples to add stats to.
+    """
+
+    def _Analyze(role, output):
+      """Parse file and record samples."""
+      with open(
+          os.path.join(self.output_directory, os.path.basename(output)),
+          'r') as fp:
+        output = json.loads(fp.read())
+        metadata = {
+            'event': 'mpstat',
+            'sender': 'run',
+            'role': role,
+        }
+        samples.extend(
+            _MpstatResults(
+                metadata,
+                output,
+                self.interval,
+                per_interval_samples=self.per_interval_samples,))
+
+    vm_util.RunThreaded(
+        _Analyze, [((k, w), {}) for k, w in six.iteritems(self._role_mapping)])
+
+
+def Register(parsed_flags):
+  """Registers the mpstat collector if FLAGS.mpstat is set."""
+  if not parsed_flags.mpstat:
+    return
+
+  logging.debug('Registering mpstat collector.')
+
+  collector = MpstatCollector(
+      interval=parsed_flags.mpstat_interval,
+      per_interval_samples=parsed_flags.mpstat_publish_per_interval_samples)
+  events.before_phase.connect(collector.Start, stages.RUN, weak=False)
+  events.after_phase.connect(collector.Stop, stages.RUN, weak=False)
+  if parsed_flags.mpstat_publish:
+    events.samples_created.connect(
+        collector.Analyze, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.mpstat
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/perf.py b/script/cumulus/pkb/perfkitbenchmarker/traces/perf.py
new file mode 100644
index 0000000..2f43e78
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/perf.py
@@ -0,0 +1,73 @@
+"""Records Linux profile with performance counters using the perf tool.
+"""
+
+import logging
+import os
+
+from perfkitbenchmarker import events, stages
+from perfkitbenchmarker import errors
+from absl import flags
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.traces import base_collector
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_boolean('perf', False,
+                     'Install and run perf on the guest.')
+flags.DEFINE_string('perf_output', None, 'Path to store perf results.')
+flags.DEFINE_string('perf_options', "", 'perf options.')
+
+
+class _PerfCollector(base_collector.BaseCollector):
+  """Manages running perf during a test, and fetching the results."""
+
+  def _CollectorName(self):
+    return 'perf'
+
+  def _InstallCollector(self, vm):
+    vm.Install('perf')
+
+  def _CollectorRunCommand(self, vm, collector_file):
+    return f'sudo perf record {FLAGS.perf_options} --output {collector_file} > /dev/null 2>&1 & echo $!'
+
+  # override base class method so we can execute a few extra required steps
+  def _StopOnVm(self, vm, vm_role):
+    """Stop collector on 'vm' and copy the files back."""
+    if vm.name not in self._pid_files:
+      logging.warn('No collector PID for %s', vm.name)
+      return
+    else:
+      with self._lock:
+        pid, file_name = self._pid_files.pop(vm.name)
+    cmd = 'sudo kill -INT {0} || true'.format(pid)
+    vm.RemoteCommand(cmd)
+    try:
+      vm.RemoteCommand('sudo chmod -R a+rw {0}'.format(file_name))
+      vm.PullFile(self.output_directory, file_name)
+      self._role_mapping[vm_role] = file_name
+      report_file = file_name + ".txt"
+      vm.RemoteCommand('sudo perf report -i {0} > {1}'.format(file_name, report_file))
+      vm.PullFile(self.output_directory, report_file)
+    except errors.VirtualMachine.RemoteCommandError as ex:
+      logging.exception('Failed fetching collector result from %s.', vm.name)
+      raise ex
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.perf is set."""
+  if not parsed_flags.perf:
+    return
+
+  logging.info('Registering perf collector')
+
+  output_directory = parsed_flags.perf_output or vm_util.GetTempDir()
+  if not os.path.isdir(output_directory):
+    os.makedirs(output_directory)
+  collector = _PerfCollector(output_directory)
+  events.before_phase.connect(collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(collector.Stop, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.perf
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/sar.py b/script/cumulus/pkb/perfkitbenchmarker/traces/sar.py
new file mode 100644
index 0000000..9340009
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/sar.py
@@ -0,0 +1,162 @@
+# Copyright 2019 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Records cpu performance counters during benchmark runs using sar."""
+
+
+import logging
+import os
+from absl import flags
+from perfkitbenchmarker import events
+from perfkitbenchmarker import sample
+from perfkitbenchmarker import stages
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.traces import base_collector
+import six
+
+flags.DEFINE_boolean(
+    'sar', False, 'Run sar (https://linux.die.net/man/1/sar) '
+    'on each VM to collect system performance metrics during '
+    'each benchmark run.')
+flags.DEFINE_integer(
+    'sar_interval', 5, 'sar sample collection frequency, in seconds. Only '
+    'applicable when --sar is specified.')
+flags.DEFINE_integer(
+    'sar_samples', None,
+    'Number of sar samples to collect. When undefined sar is '
+    'ran indefinitely. This must be set to record average '
+    'statistics. Only applicable when --sar is specified.')
+flags.DEFINE_string(
+    'sar_output', None, 'Output directory for sar output. '
+    'Only applicable when --sar is specified. '
+    'Default: run temporary directory.')
+flags.DEFINE_boolean('sar_publish', True,
+                     'Whether to publish average sar statistics.')
+flags.DEFINE_string('sar_flags', '-u',
+                    'Command line flags that get passed to sar such as "-m ALL".')
+FLAGS = flags.FLAGS
+
+
+def _AddStealResults(metadata, output, samples):
+  """Appends average Steal Time %'s to the samples list.
+
+  Sample data e.g.
+  ...
+  Linux 4.4.0-1083-aws (ip-10-0-0-217)   05/21/2019   _x86_64_  (8 CPU)
+
+  12:12:36 AM     CPU     %user     %nice   %system   %iowait    %steal    %idle
+  12:17:17 AM     all     18.09      0.00      0.00      0.00     81.91     0.00
+  12:17:22 AM     all     21.96      0.00      0.00      0.00     78.04     0.00
+  12:17:27 AM     all     36.47      0.00      0.00      0.00     63.53     0.00
+  Average:        all     33.73      0.00      0.00      0.00     66.27     0.00
+
+  Args:
+    metadata: metadata of the sample.
+    output: the output of the stress-ng benchmark.
+    samples: list of samples to return.
+  """
+  # Only parse results file if run with '-u' option
+  if FLAGS.sar_flags != '-u':
+      return
+
+  output_lines = output.splitlines()
+
+  for line in output_lines:
+    line_split = line.split()
+    if not line_split:
+      continue
+    if line_split[0] == 'Linux':
+      continue
+    if line_split[-2] == '%steal':
+      continue
+    if line_split[0] == 'Average:':
+      metric = 'average_steal'
+    else:
+      metric = 'steal'
+    value = float(line_split[-2])  # parse %steal time
+
+    my_metadata = {'user_percent': float(line_split[3])}
+    my_metadata.update(metadata)
+
+    samples.append(
+        sample.Sample(
+            metric=metric, value=value, unit='%', metadata=my_metadata))
+
+
+class _SarCollector(base_collector.BaseCollector):
+  """sar collector.
+
+  Installs sysstat and runs sar on a collection of VMs.
+  """
+
+  def _CollectorName(self):
+    return 'sar'
+
+  def _InstallCollector(self, vm):
+    vm.InstallPackages('sysstat')
+
+  def _CollectorRunCommand(self, vm, collector_file):
+    cmd = ('sar {sar_flags} {sar_interval} {sar_samples} > {output} 2>&1 & '
+           'echo $!').format(
+               sar_flags=FLAGS.sar_flags,
+               output=collector_file,
+               sar_interval=FLAGS.sar_interval,
+               sar_samples=FLAGS.sar_samples if FLAGS.sar_samples else '')
+    return cmd
+
+  def Analyze(self, sender, benchmark_spec, samples):
+    """Analyze sar file and record samples."""
+
+    def _Analyze(role, f):
+      """Parse file and record samples."""
+      with open(os.path.join(self.output_directory, os.path.basename(f)),
+                'r') as fp:
+        output = fp.read()
+        metadata = {
+            'event': 'sar',
+            'sender': 'run',
+            'sar_interval': self.interval,
+            'role': role,
+        }
+        _AddStealResults(metadata, output, samples)
+
+    vm_util.RunThreaded(
+        _Analyze, [((k, w), {}) for k, w in six.iteritems(self._role_mapping)])
+
+
+def Register(parsed_flags):
+  """Registers the sar collector if FLAGS.sar is set."""
+  if not parsed_flags.sar:
+    return
+
+  output_directory = (
+      parsed_flags.sar_output
+      if parsed_flags['sar_output'].present else vm_util.GetTempDir())
+
+  logging.debug('Registering sar collector with interval %s, output to %s.',
+                parsed_flags.sar_interval, output_directory)
+
+  if not os.path.isdir(output_directory):
+    os.makedirs(output_directory)
+  collector = _SarCollector(
+      interval=parsed_flags.sar_interval, output_directory=output_directory)
+  events.before_phase.connect(collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(collector.Stop, stages.RUN, weak=False)
+  if parsed_flags.sar_publish:
+    events.samples_created.connect(
+        collector.Analyze, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.sar
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/svrinfo.py b/script/cumulus/pkb/perfkitbenchmarker/traces/svrinfo.py
new file mode 100644
index 0000000..9d55ed6
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/svrinfo.py
@@ -0,0 +1,123 @@
+"""Records system information using svrinfo.
+"""
+
+import os
+import logging
+import posixpath
+
+from six.moves.urllib.parse import urlparse
+
+from perfkitbenchmarker import events, stages
+from perfkitbenchmarker import trace_util
+from absl import flags
+from perfkitbenchmarker import vm_util
+
+SVRINFO_BINARY_NAME = "svr-info"
+SVRINFO_DIRECTORY_NAME = "svr-info"
+SVRINFO_ARCHIVE_URL_S3_BUCKET = "https://cumulus.s3.us-east-2.amazonaws.com/svr_info/"
+
+flags.DEFINE_boolean('svrinfo', True,
+                     'Run svrinfo on VMs.')
+flags.DEFINE_string('svrinfo_flags', '-format all',
+                    'Command line flags that get passed to svr_info.')
+flags.DEFINE_string('svrinfo_local_path', None,
+                    'Local path where svr_info is/ will be installed.')
+flags.DEFINE_string('svrinfo_tarball', None,
+                    'Local path to svr_info tarball.')
+flags.DEFINE_string('svrinfo_url', None,
+                    'URL for downloading svr_info tarball.')
+flags.DEFINE_enum('svrinfo_package_version', 'svr-info-internal-2.0.1.tgz',
+                  ['svr-info-internal-2.0.1.tgz'],
+                  'Specify the internal svrinfo version')
+FLAGS = flags.FLAGS
+
+
+class _SvrinfoCollector(object):
+  """Manages running svrinfo"""
+  def __init__(self):
+    pass
+
+  def InstallAndRun(self, unused_sender, benchmark_spec):
+    """Install, Run, Retrieve/Publish on all VMs."""
+    install_dir = _GetLocalInstallPath()
+    vms = trace_util.GetVMsToTrace(benchmark_spec, FLAGS.trace_vm_groups)
+    if vms:
+      # installations of svr_info if it is not installed before or need re-install
+      if not FLAGS.trace_skip_install or not os.path.isdir(install_dir):
+        vm_util.IssueCommand(["rm", "-rf", install_dir])
+        vm_util.IssueCommand(["mkdir", "-p", install_dir])
+        svr_info_archive = _GetLocalArchive()
+        vm_util.IssueCommand(["tar", "-C", install_dir, "-xf", svr_info_archive])
+
+      try:
+        # run svr_info
+        vm_util.RunThreaded(lambda vm: _Run(vm, benchmark_spec), vms)
+      except Exception:
+        raise
+      finally:
+        # always do the cleanups no matter run fail or not if skip_cleanup is not set
+        if not FLAGS.trace_skip_cleanup:
+          vm_util.IssueCommand(["rm", "-rf", install_dir])
+
+
+def _GetLocalInstallPath():
+  """Get svr_info local installation directory"""
+  if FLAGS.svrinfo_local_path is None and FLAGS.trace_skip_install:
+    """Local path is not specified and we are skipping trace install, default to generic dir"""
+    svrinfo_local_path = posixpath.join(FLAGS.temp_dir, SVRINFO_DIRECTORY_NAME)
+  elif FLAGS.svrinfo_local_path is None and not FLAGS.trace_skip_install:
+    """Use unique path ID based on run_uri for cases where we're installing cleaning up (for common tmp dir)"""
+    svrinfo_local_path = posixpath.join(FLAGS.temp_dir, '-'.join([SVRINFO_DIRECTORY_NAME, FLAGS.run_uri]))
+  else:
+    svrinfo_local_path = FLAGS.svrinfo_local_path
+  return svrinfo_local_path
+
+
+def _GetLocalArchive():
+  """ get or make sure we already have the svr_info archive """
+  if FLAGS.svrinfo_tarball:
+    logging.info("svrinfo_tarball specified: {}".format(FLAGS.svrinfo_tarball))
+    local_archive_path = FLAGS.svrinfo_tarball
+  else:
+    svrinfo_archive_url = SVRINFO_ARCHIVE_URL_S3_BUCKET + FLAGS.svrinfo_package_version
+    url = FLAGS.svrinfo_url or svrinfo_archive_url
+    logging.info("downloading svrinfo from: {}".format(url))
+    filename = os.path.basename(urlparse(url).path)
+    local_archive_path = posixpath.join(_GetLocalInstallPath(), filename)
+    vm_util.IssueCommand(["curl", "-o", local_archive_path, url], timeout=None)
+  return local_archive_path
+
+
+def _Run(vm, benchmark_spec):
+  output_dir = posixpath.join(vm_util.GetTempDir(), vm.name + '-svrinfo')
+  vm_util.IssueCommand(["mkdir", "-p", output_dir])
+  command = [
+      posixpath.join(".", SVRINFO_BINARY_NAME)
+  ]
+  command.extend(FLAGS.svrinfo_flags.split())
+  command.extend([
+      "-output",
+      output_dir,
+      "-ip",
+      vm.ip_address,
+      "-port",
+      str(vm.ssh_port),
+      "-user",
+      vm.user_name])
+  key = vm.ssh_private_key if vm.is_static else vm_util.GetPrivateKeyPath()
+  if key is not None:
+      command.extend(["-key", key])
+  vm_util.IssueCommand(command, cwd=posixpath.join(_GetLocalInstallPath(), SVRINFO_DIRECTORY_NAME), timeout=None)
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.svrinfo is set."""
+  if not parsed_flags.svrinfo:
+    return
+  logging.info('Registering svr_info collector to run after PREPARE phase.')
+  collector = _SvrinfoCollector()
+  events.after_phase.connect(collector.InstallAndRun, stages.PREPARE, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.svrinfo
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/tcpdump.py b/script/cumulus/pkb/perfkitbenchmarker/traces/tcpdump.py
new file mode 100644
index 0000000..ec50174
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/tcpdump.py
@@ -0,0 +1,135 @@
+# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Runs tcpdump on VMs.
+
+Tcpdump is run in the background before the Run phase, the pcap file is copied
+to the current run's temp directory with a name like
+pkb-<machine_name>-<benchmark>-<UUID>-tcpdump.stdout that can be read in with
+"tcpdump -r <filename>"
+"""
+
+from absl import flags
+from perfkitbenchmarker import events
+from perfkitbenchmarker import stages
+from perfkitbenchmarker.traces import base_collector
+
+flags.DEFINE_boolean(
+    'tcpdump', False, 'Run tcpdump on each VM to collect network packets in '
+    'each benchmark run.')
+flags.DEFINE_list('tcpdump_ignore_ports', [22],
+                  'Ports to ignore when running tcpdump')
+flags.DEFINE_list(
+    'tcpdump_include_ports', [], 'Ports to include when running tcpdump.  By '
+    'default collects all ports except those in --tcpdump_ignore_ports')
+flags.DEFINE_integer('tcpdump_snaplen', 96,
+                     'Tcpdump snaplen, see man tcpdump "-s"')
+flags.DEFINE_integer(
+    'tcpdump_packet_count', None, 'Number of packets to collect. Default '
+    'is to collect all packets in the run phase')
+
+FLAGS = flags.FLAGS
+
+
+def _PortFilter(ports):
+  """Returns the port filter suitable for tcpdump.
+
+  Example: _PortFilter([22, 53]) => ['port', '(22 or 53)']
+
+  Args:
+    ports: List of ports to filter on.
+
+  Returns:
+    Two element list to append to tcpdump command.
+  """
+  return ['port', r'\({}\)'.format(' or '.join([str(port) for port in ports]))]
+
+
+class _TcpdumpCollector(base_collector.BaseCollector):
+  """tcpdump collector.
+
+  Installs tcpdump and runs it on the VMs.
+  """
+
+  def __init__(self,
+               ignore_ports=None,
+               include_ports=None,
+               snaplen=None,
+               packet_count=None):
+    super(_TcpdumpCollector, self).__init__(None, None)
+    self.snaplen = snaplen
+    self.packet_count = packet_count
+    if include_ports:
+      self.filter = _PortFilter(include_ports)
+    elif ignore_ports:
+      self.filter = ['not'] + _PortFilter(ignore_ports)
+    else:
+      self.filter = []
+
+  def _CollectorName(self):
+    """See base class."""
+    return 'tcpdump'
+
+  def _KillCommand(self, pid):
+    """See base class.
+
+    Args:
+      pid: The pid of the process to kill
+
+    Different from base class:
+    1. Needs to run as sudo as tcpdump launched as root
+    2. Sends a SIGINT signal so that tcpdump can flush its cache
+    3. Sleep for 3 seconds to allow the flush to happen
+
+    Returns:
+      String command to run to kill of tcpdump.
+    """
+    return 'sudo kill -s INT {}; sleep 3'.format(pid)
+
+  def _InstallCollector(self, vm):
+    """See base class."""
+    vm.InstallPackages('tcpdump')
+
+  def _CollectorRunCommand(self, vm, collector_file):
+    """See base class."""
+    cmd = ['sudo', 'tcpdump', '-n', '-w', collector_file]
+    if self.snaplen:
+      cmd.extend(['-s', str(self.snaplen)])
+    if self.packet_count:
+      cmd.extend(['-c', str(self.packet_count)])
+    cmd.extend(self.filter)
+    # ignore stdout, stderr, put in background and echo the pid
+    cmd.extend(['>', '/dev/null', '2>&1', '&', 'echo $!'])
+    return ' '.join(cmd)
+
+
+def _CreateCollector(parsed_flags):
+  """Creates a _TcpdumpCollector from flags."""
+  return _TcpdumpCollector(
+      ignore_ports=parsed_flags.tcpdump_ignore_ports,
+      include_ports=parsed_flags.tcpdump_include_ports,
+      snaplen=parsed_flags.tcpdump_snaplen,
+      packet_count=parsed_flags.tcpdump_packet_count)
+
+
+def Register(parsed_flags):
+  """Registers the tcpdump collector if FLAGS.tcpdump is set."""
+  if not parsed_flags.tcpdump:
+    return
+  collector = _CreateCollector(parsed_flags)
+  events.before_phase.connect(collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(collector.Stop, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.tcpdump
diff --git a/script/cumulus/pkb/perfkitbenchmarker/traces/telemetry.py b/script/cumulus/pkb/perfkitbenchmarker/traces/telemetry.py
new file mode 100644
index 0000000..a1fd98b
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/traces/telemetry.py
@@ -0,0 +1,125 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+
+from perfkitbenchmarker import events, stages
+from absl import flags
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import data
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_boolean('telemetry', False,
+                     'Install and run emon,containerized-collectd and sar on the guest.')
+
+ARCHIVE_LINK = "https://gitlab.devtools.intel.com/cumulus/external_dependencies/telemetry/raw/master/telemetry.tar.gz"
+PREREQ_PKGS = ["gcc",
+               "make",
+               "yp-tools",
+               "sysstat",
+               "dstat"]
+
+
+class _TelemetryCollector(object):
+  """Manages running telemetry during a test, and fetching the results folder."""
+
+  def __init__(self, output_directory):
+    self.output_directory = output_directory
+
+  def _FetchResults(self, vm):
+    """Fetches telemetry results."""
+    logging.info('Fetching telemetry results')
+    telemetry_dir = '~/' + vm.name + '-telemetry'
+    vm.RemoteCommand(('mkdir {0} ').format(telemetry_dir))
+    vm.RemoteCommand(('sudo mv /tmp/results {0} ').format(telemetry_dir))
+    vm.RemoteCommand(('sudo mv /tmp/telemetry.log {0} ').format(telemetry_dir))
+    vm.RemoteCopy(vm_util.GetTempDir(), telemetry_dir, False)
+    logging.info('Telemetry Files Copied')
+
+  def _InstallTelemetry(self, vm):
+    """Installs Telemetry on the VM."""
+    logging.info('Installing telemetry')
+    try:
+      telemetry_path = data.ResourcePath('telemetry/telemetry.tar.gz')
+    except data.ResourceNotFound:
+      tmp_dir = vm_util.GetTempDir()
+      vm_util.IssueCommand("wget -P {0} {1}".format(tmp_dir, ARCHIVE_LINK).split())
+      telemetry_path = os.path.join(tmp_dir, ARCHIVE_LINK[ARCHIVE_LINK.rindex("/") + 1:])
+
+    vm.RemoteCommand('sudo rm -rf /opt/intel')
+    vm.RemoteCommand('sudo mkdir -p /opt/intel')
+    vm.RemoteCopy(telemetry_path, '~', True)
+    vm.RemoteCommand('sudo cp ./telemetry.tar.gz /opt/intel/')
+    vm.RemoteCommand('cd /opt/intel && sudo tar -xf telemetry.tar.gz && sudo chmod -R 777 /opt/intel/telemetry')
+    vm.InstallPackages(' '.join(PREREQ_PKGS))
+
+  def _StopTelemetry(self, vm):
+    """Stops Telemetry on the VM."""
+    logging.info('Stopping telemetry')
+    vm.RemoteCommand('sudo /opt/intel/telemetry/scripts/cleanup_Telemetry.sh')
+
+  def _StartTelemetry(self, vm):
+    vm.RemoteCommand('sudo /opt/intel/telemetry/scripts/main.sh')
+
+  def Install(self, unused_sender, benchmark_spec):
+    """Install Telemetry.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    logging.info('Installing telemetry')
+    vm_util.RunThreaded(self._InstallTelemetry, benchmark_spec.vms)
+
+  def Start(self, unused_sender, benchmark_spec):
+    """Start Telemetry
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark currently
+          running.
+    """
+    logging.info('Starting telemetry')
+    vm_util.RunThreaded(self._StartTelemetry, benchmark_spec.vms)
+
+  def After(self, unused_sender, benchmark_spec):
+    """Stop telemetry, fetch results from VMs.
+
+    Args:
+      benchmark_spec: benchmark_spec.BenchmarkSpec. The benchmark that stopped
+          running.
+    """
+    vm_util.RunThreaded(self._StopTelemetry, benchmark_spec.vms)
+    vm_util.RunThreaded(self._FetchResults, benchmark_spec.vms)
+
+
+def Register(parsed_flags):
+  """Register the collector if FLAGS.telemetry is set."""
+  if not parsed_flags.telemetry:
+    return
+
+  raise ValueError("--telemetry flag is disabled for now because of old Emon version. Use --emon flag instead.")
+
+  logging.info('Registering telemetry collector')
+
+  output_directory = vm_util.GetTempDir()
+  telemetry_collector = _TelemetryCollector(output_directory)
+  events.before_phase.connect(telemetry_collector.Install, stages.RUN, weak=False)
+  events.start_trace.connect(telemetry_collector.Start, stages.RUN, weak=False)
+  events.stop_trace.connect(telemetry_collector.After, stages.RUN, weak=False)
+
+
+def IsEnabled():
+  return FLAGS.telemetry
diff --git a/script/cumulus/pkb/perfkitbenchmarker/units.py b/script/cumulus/pkb/perfkitbenchmarker/units.py
new file mode 100644
index 0000000..1de74b7
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/units.py
@@ -0,0 +1,97 @@
+# Copyright 2016 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module that provides access to pint functionality.
+
+Forwards access to pint Quantity and Unit classes built around a customized
+unit registry.
+"""
+
+import copy
+import numbers
+import pint
+import six.moves.copyreg
+
+
+class _UnitRegistry(pint.UnitRegistry):
+  """A customized pint.UnitRegistry used by PerfKit Benchmarker.
+
+  Supports 'K' prefix for 'kilo' (in addition to pint's default 'k').
+  Supports '%' as a unit, whereas pint tokenizes it as an operator.
+  """
+
+  def __init__(self):
+    super(_UnitRegistry, self).__init__()
+    self.define('K- = 1000')
+    # Kubernetes
+    self.define('Ki = kibibyte')
+    self.define('Mi = mebibyte')
+    self.define('% = [percent] = percent')
+
+  def parse_expression(self, input_string, *args, **kwargs):
+    result = super(_UnitRegistry, self).parse_expression(input_string, *args,
+                                                         **kwargs)
+    if (isinstance(result, numbers.Number) and
+        input_string.strip().endswith('%')):
+      return self.Quantity(result, self.Unit('percent'))
+    return result
+
+
+# Pint recommends one global UnitRegistry for the entire program, so
+# we create it here.
+_UNIT_REGISTRY = _UnitRegistry()
+
+
+# The Pint documentation suggests serializing Quantities as tuples. We
+# supply serializers to make sure that Quantities are unpickled with
+# our UnitRegistry, where we have added the K- unit.
+def _PickleQuantity(q):
+  return _UnPickleQuantity, (q.to_tuple(),)
+
+
+def _UnPickleQuantity(inp):
+  return _UNIT_REGISTRY.Quantity.from_tuple(inp)
+
+
+six.moves.copyreg.pickle(_UNIT_REGISTRY.Quantity, _PickleQuantity)
+
+
+# The following monkey-patch has been submitted to upstream Pint as
+# pull request 357.
+# TODO: once that PR is merged, get rid of this workaround.
+def _unit_deepcopy(self, memo):
+  ret = self.__class__(copy.deepcopy(self._units))
+  return ret
+
+_UNIT_REGISTRY.Unit.__deepcopy__ = _unit_deepcopy
+
+
+# Fix for https://github.com/hgrecco/pint/issues/372
+_UNIT_REGISTRY.Unit.__ne__ = lambda self, other: not self.__eq__(other)
+
+
+# Forward access to pint's classes and functions.
+DimensionalityError = pint.DimensionalityError
+ParseExpression = _UNIT_REGISTRY.parse_expression
+Quantity = _UNIT_REGISTRY.Quantity
+Unit = _UNIT_REGISTRY.Unit
+byte = Unit('byte')
+kilobyte = Unit('kilobyte')
+kibibyte = Unit('kibibyte')
+megabyte = Unit('megabyte')
+mebibyte = Unit('mebibyte')
+gigabyte = Unit('gigabyte')
+gibibyte = Unit('gibibyte')
+bit = Unit('bit')
+second = Unit('second')
+percent = Unit('percent')
diff --git a/script/cumulus/pkb/perfkitbenchmarker/version.py b/script/cumulus/pkb/perfkitbenchmarker/version.py
new file mode 100644
index 0000000..d1eb115
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/version.py
@@ -0,0 +1,48 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""PerfKitBenchmarker version."""
+
+import os.path
+import subprocess
+import perfkitbenchmarker
+import pkg_resources
+
+
+_STATIC_VERSION_FILE = 'version.txt'
+
+
+def _GetVersion():
+  """Gets the version from git or the static version file."""
+  # Try to pull the version from git.
+  root_dir = os.path.dirname(os.path.dirname(__file__))
+  git_dir = os.path.join(root_dir, '.git')
+  try:
+    version = subprocess.check_output(['git', '--git-dir', git_dir,
+                                       'describe', '--always'],
+                                      stderr=subprocess.STDOUT,
+                                      universal_newlines=True)
+  except (OSError, subprocess.CalledProcessError):
+    # Could not get the version from git. Resort to contents of the static
+    # version file.
+    try:
+      version = pkg_resources.resource_string(perfkitbenchmarker.__name__,
+                                              _STATIC_VERSION_FILE)
+    except IOError:
+      # Could not determine version.
+      return 'unknown'
+  return version.rstrip('\n')
+
+
+VERSION = _GetVersion()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/virtual_machine.py
new file mode 100644
index 0000000..9e25953
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/virtual_machine.py
@@ -0,0 +1,1523 @@
+# Copyright 2017 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Class to represent a Virtual Machine object.
+
+All VM specifics are self-contained and the class provides methods to
+operate on the VM: boot, shutdown, etc.
+"""
+
+
+import abc
+import contextlib
+import logging
+import os.path
+import socket
+import threading
+import time
+import typing
+from typing import Any, Dict, List
+
+from absl import flags
+import jinja2
+from perfkitbenchmarker import background_workload
+from perfkitbenchmarker import benchmark_lookup
+from perfkitbenchmarker import context as pkb_context
+from perfkitbenchmarker import data
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import events
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import package_lookup
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker.configs import option_decoders
+from perfkitbenchmarker.configs import spec
+import six
+
+FLAGS = flags.FLAGS
+DEFAULT_USERNAME = 'perfkit'
+QUOTA_EXCEEDED_MESSAGE = 'Creation failed due to quota exceeded: '
+PROXY_FILE = 'proxy_ip_list/proxy_ip_list.txt'
+
+
+def ValidateVmMetadataFlag(options_list):
+  """Verifies correct usage of the vm metadata flag.
+
+  All provided options must be in the form of key:value.
+
+  Args:
+    options_list: A list of strings parsed from the provided value for the
+      flag.
+
+  Returns:
+    True if the list of options provided as the value for the flag meets
+    requirements.
+
+  Raises:
+    flags.ValidationError: If the list of options provided as the value for
+      the flag does not meet requirements.
+  """
+  for option in options_list:
+    if ':' not in option[1:-1]:
+      raise flags.ValidationError(
+          '"%s" not in expected key:value format' % option)
+  return True
+
+# vm_metadata flag name
+VM_METADATA = 'vm_metadata'
+
+flags.DEFINE_boolean(
+    'dedicated_hosts', False,
+    'If True, use hosts that only have VMs from the same '
+    'benchmark running on them.')
+flags.DEFINE_integer(
+    'num_vms_per_host', None,
+    'The number of VMs per dedicated host. If None, VMs will be packed on a '
+    'single host until no more can be packed at which point a new host will '
+    'be created.')
+flags.DEFINE_integer(
+    'num_cpus_override', None,
+    'Rather than detecting the number of CPUs present on the machine, use this '
+    'value if set. Some benchmarks will use this number to automatically '
+    'scale their configurations; this can be used as a method to control '
+    'benchmark scaling. It will also change the num_cpus metadata '
+    'published along with the benchmark data.')
+flags.DEFINE_list(VM_METADATA, [], 'Metadata to add to the vm. It expects'
+                  'key:value pairs.')
+flags.register_validator(VM_METADATA, ValidateVmMetadataFlag)
+flags.DEFINE_bool(
+    'skip_firewall_rules', False,
+    'If set, this run will not create firewall rules. This is useful if the '
+    'user project already has all of the firewall rules in place and/or '
+    'creating new ones is expensive')
+flags.DEFINE_integer(
+    'additional_private_addresses_count', 0,
+    'Number of additional private addresses', lower_bound=0)
+flags.DEFINE_bool(
+    'preprovision_ignore_checksum', False,
+    'Ignore checksum verification for preprovisioned data. '
+    'Not recommended, please use with caution')
+flags.DEFINE_boolean(
+    'connect_via_internal_ip', False,
+    'Whether to use internal IP addresses for running commands on and pushing '
+    'data to VMs. By default, PKB interacts with VMs using external IP '
+    'addresses.')
+
+# Deprecated. Use connect_via_internal_ip.
+flags.DEFINE_boolean(
+    'ssh_via_internal_ip', False,
+    'Whether to use internal IP addresses for running commands on and pushing '
+    'data to VMs. By default, PKB interacts with VMs using external IP '
+    'addresses.')
+flags.DEFINE_boolean('retry_on_rate_limited', True,
+                     'Whether to retry commands when rate limited.')
+
+GPU_K80 = 'k80'
+GPU_P100 = 'p100'
+GPU_V100 = 'v100'
+GPU_A100 = 'a100'
+GPU_P4 = 'p4'
+GPU_P4_VWS = 'p4-vws'
+GPU_T4 = 't4'
+VALID_GPU_TYPES = [
+    GPU_K80, GPU_P100, GPU_V100, GPU_A100, GPU_P4, GPU_P4_VWS, GPU_T4
+]
+
+flags.DEFINE_integer(
+    'gpu_count', None,
+    'Number of gpus to attach to the VM. Requires gpu_type to be '
+    'specified.')
+flags.DEFINE_enum(
+    'gpu_type', None, VALID_GPU_TYPES,
+    'Type of gpus to attach to the VM. Requires gpu_count to be '
+    'specified.')
+
+
+def GetVmSpecClass(cloud):
+  """Returns the VmSpec class corresponding to 'cloud'."""
+  return spec.GetSpecClass(BaseVmSpec, CLOUD=cloud)
+
+
+def GetVmClass(cloud, os_type):
+  """Returns the VM class corresponding to 'cloud' and 'os_type'."""
+  return resource.GetResourceClass(BaseVirtualMachine,
+                                   CLOUD=cloud, OS_TYPE=os_type)
+
+
+class BaseVmSpec(spec.BaseSpec):
+  """Storing various data about a single vm.
+
+  Attributes:
+    zone: The region / zone the in which to launch the VM.
+    cidr: The CIDR subnet range in which to launch the VM.
+    machine_type: The provider-specific instance type (e.g. n1-standard-8).
+    gpu_count: None or int. Number of gpus to attach to the VM.
+    gpu_type: None or string. Type of gpus to attach to the VM.
+    image: The disk image to boot from.
+    install_packages: If false, no packages will be installed. This is
+        useful if benchmark dependencies have already been installed.
+    background_cpu_threads: The number of threads of background CPU usage
+        while running the benchmark.
+    background_network_mbits_per_sec: The number of megabits per second of
+        background network traffic during the benchmark.
+    background_network_ip_type: The IP address type (INTERNAL or
+        EXTERNAL) to use for generating background network workload.
+    disable_interrupt_moderation: If true, disables interrupt moderation.
+    disable_rss: = If true, disables rss.
+    vm_metadata: = Additional metadata for the VM.
+  """
+
+  SPEC_TYPE = 'BaseVmSpec'
+  CLOUD = None
+
+  def __init__(self, *args, **kwargs):
+    self.zone = None
+    self.cidr = None
+    self.machine_type = None
+    self.gpu_count = None
+    self.gpu_type = None
+    self.image = None
+    self.install_packages = None
+    self.background_cpu_threads = None
+    self.background_network_mbits_per_sec = None
+    self.background_network_ip_type = None
+    self.disable_interrupt_moderation = None
+    self.disable_rss = None
+    self.vm_metadata: Dict[str, Any] = None
+    super(BaseVmSpec, self).__init__(*args, **kwargs)
+
+  @classmethod
+  def _ApplyFlags(cls, config_values, flag_values):
+    """Overrides config values with flag values.
+
+    Can be overridden by derived classes to add support for specific flags.
+
+    Args:
+      config_values: dict mapping config option names to provided values. Is
+          modified by this function.
+      flag_values: flags.FlagValues. Runtime flags that may override the
+          provided config values.
+
+    Returns:
+      dict mapping config option names to values derived from the config
+      values or flag values.
+    """
+    super(BaseVmSpec, cls)._ApplyFlags(config_values, flag_values)
+    if flag_values['image'].present:
+      config_values['image'] = flag_values.image
+    if flag_values['install_packages'].present:
+      config_values['install_packages'] = flag_values.install_packages
+    if flag_values['machine_type'].present:
+      config_values['machine_type'] = flag_values.machine_type
+    if flag_values['background_cpu_threads'].present:
+      config_values['background_cpu_threads'] = (
+          flag_values.background_cpu_threads)
+    if flag_values['background_network_mbits_per_sec'].present:
+      config_values['background_network_mbits_per_sec'] = (
+          flag_values.background_network_mbits_per_sec)
+    if flag_values['background_network_ip_type'].present:
+      config_values['background_network_ip_type'] = (
+          flag_values.background_network_ip_type)
+    if flag_values['dedicated_hosts'].present:
+      config_values['use_dedicated_host'] = flag_values.dedicated_hosts
+    if flag_values['num_vms_per_host'].present:
+      config_values['num_vms_per_host'] = flag_values.num_vms_per_host
+    if flag_values['gpu_type'].present:
+      config_values['gpu_type'] = flag_values.gpu_type
+    if flag_values['gpu_count'].present:
+      config_values['gpu_count'] = flag_values.gpu_count
+    if flag_values['disable_interrupt_moderation'].present:
+      config_values['disable_interrupt_moderation'] = (
+          flag_values.disable_interrupt_moderation)
+    if flag_values['disable_rss'].present:
+      config_values['disable_rss'] = flag_values.disable_rss
+    if flag_values['additional_private_addresses_count'].present:
+      config_values['additional_private_addresses_count'] = \
+          flag_values.additional_private_addresses_count
+    if flag_values['vm_metadata'].present:
+      config_values['vm_metadata'] = flag_values.vm_metadata
+
+    if 'gpu_count' in config_values and 'gpu_type' not in config_values:
+      raise errors.Config.MissingOption(
+          'gpu_type must be specified if gpu_count is set')
+    if 'gpu_type' in config_values and 'gpu_count' not in config_values:
+      raise errors.Config.MissingOption(
+          'gpu_count must be specified if gpu_type is set')
+
+  @classmethod
+  def _GetOptionDecoderConstructions(cls):
+    """Gets decoder classes and constructor args for each configurable option.
+
+    Can be overridden by derived classes to add options or impose additional
+    requirements on existing options.
+
+    Returns:
+      dict. Maps option name string to a (ConfigOptionDecoder class, dict) pair.
+          The pair specifies a decoder class and its __init__() keyword
+          arguments to construct in order to decode the named option.
+    """
+    result = super(BaseVmSpec, cls)._GetOptionDecoderConstructions()
+    result.update({
+        'disable_interrupt_moderation': (option_decoders.BooleanDecoder, {
+            'default': False}),
+        'disable_rss': (option_decoders.BooleanDecoder, {'default': False}),
+        'image': (option_decoders.StringDecoder, {'none_ok': True,
+                                                  'default': None}),
+        'install_packages': (option_decoders.BooleanDecoder, {'default': True}),
+        'machine_type': (option_decoders.StringDecoder, {'none_ok': True,
+                                                         'default': None}),
+        'gpu_type': (option_decoders.EnumDecoder, {
+            'valid_values': VALID_GPU_TYPES,
+            'default': None}),
+        'gpu_count': (option_decoders.IntDecoder, {'min': 1, 'default': None}),
+        'zone': (option_decoders.StringDecoder, {'none_ok': True,
+                                                 'default': None}),
+        'cidr': (option_decoders.StringDecoder, {'none_ok': True,
+                                                 'default': None}),
+        'use_dedicated_host': (option_decoders.BooleanDecoder,
+                               {'default': False}),
+        'num_vms_per_host': (option_decoders.IntDecoder,
+                             {'default': None}),
+        'background_network_mbits_per_sec': (option_decoders.IntDecoder, {
+            'none_ok': True, 'default': None}),
+        'background_network_ip_type': (option_decoders.EnumDecoder, {
+            'default': vm_util.IpAddressSubset.EXTERNAL,
+            'valid_values': [vm_util.IpAddressSubset.EXTERNAL,
+                             vm_util.IpAddressSubset.INTERNAL]}),
+        'background_cpu_threads': (option_decoders.IntDecoder, {
+            'none_ok': True, 'default': None}),
+        'additional_private_addresses_count': (option_decoders.IntDecoder, {
+            'min': 0, 'default': None}),
+        'vm_metadata': (option_decoders.ListDecoder, {
+            'item_decoder': option_decoders.StringDecoder(),
+            'default': []})})
+    return result
+
+
+class BaseOsMixin(six.with_metaclass(abc.ABCMeta, object)):
+  """The base class for OS Mixin classes.
+
+  This class holds VM methods and attributes relating to the VM's guest OS.
+  For methods and attributes that relate to the VM as a cloud resource,
+  see BaseVirtualMachine and its subclasses.
+
+  Attributes:
+    bootable_time: The time when the VM finished booting.
+    hostname: The VM's hostname.
+    remote_access_ports: A list of ports which must be opened on the firewall
+        in order to access the VM.
+  """
+  # Represents whether the VM type can be (cleanly) rebooted. Should be false
+  # for a class if rebooting causes issues, e.g. for KubernetesVirtualMachine
+  # needing to reboot often indicates a design problem since restarting a
+  # container can have side effects in certain situations.
+  IS_REBOOTABLE = True
+
+  install_packages: bool  # mixed from BaseVirtualMachine
+  is_static: bool  # mixed from BaseVirtualMachine
+  scratch_disks: List[disk.BaseDisk]  # mixed from BaseVirtualMachine
+  ssh_private_key: str  # mixed from BaseVirtualMachine
+  user_name: str  # mixed from BaseVirtualMachine
+  disable_interrupt_moderation: str  # mixed from BaseVirtualMachine
+  disable_rss: str  # mixed from BaseVirtualMachine
+  num_disable_cpus: str  # mixed from BaseVirtualMachine
+  ip_address: str  # mixed from BaseVirtualMachine
+
+  @abc.abstractmethod
+  def GetConnectionIp(self):
+    """See BaseVirtualMachine."""
+
+  def __init__(self):
+    super(BaseOsMixin, self).__init__()
+    self._installed_packages = set()
+    self.startup_script_output = None
+    self.postrun_script_output = None
+    self.bootable_time = None
+    self.port_listening_time = None
+    self.hostname = None
+
+    # Ports that will be opened by benchmark_spec to permit access to the VM.
+    self.remote_access_ports = []
+    # Port to be used to see if VM is ready to receive a remote command.
+    self.primary_remote_access_port = None
+
+    # Cached values
+    self._reachable = {}
+    self._total_memory_kb = None
+    self._num_cpus = None
+    self._is_smt_enabled = None
+    self.os_metadata = {}
+    assert type(
+        self).BASE_OS_TYPE in os_types.BASE_OS_TYPES, '%s is not in %s' % (
+            type(self).BASE_OS_TYPE, os_types.BASE_OS_TYPES)
+
+  @property
+  @classmethod
+  @abc.abstractmethod
+  def OS_TYPE(cls):
+    raise NotImplementedError()
+
+  @property
+  @classmethod
+  @abc.abstractmethod
+  def BASE_OS_TYPE(cls):
+    raise NotImplementedError()
+
+  def GetOSResourceMetadata(self):
+    """Returns a dict containing VM OS metadata.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    return self.os_metadata
+
+  def CreateRamDisk(self, disk_spec):
+    """Create and mount Ram disk."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def RemoteCommand(self, command, should_log=False, ignore_failure=False,
+                    suppress_warning=False, timeout=None, **kwargs):
+    """Runs a command on the VM.
+
+    Derived classes may add additional kwargs if necessary, but they should not
+    be used outside of the class itself since they are non standard.
+
+    Args:
+      command: A valid bash command.
+      should_log: A boolean indicating whether the command result should be
+          logged at the info level. Even if it is false, the results will
+          still be logged at the debug level.
+      ignore_failure: Ignore any failure if set to true.
+      suppress_warning: Suppress the result logging from IssueCommand when the
+          return code is non-zero.
+      timeout: The time to wait in seconds for the command before exiting.
+          None means no timeout.
+      **kwargs: Additional command arguments.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem issuing the command.
+    """
+    raise NotImplementedError()
+
+  def TryRemoteCommand(self, command, **kwargs):
+    """Runs a remote command and returns True if it succeeded."""
+    try:
+      self.RemoteCommand(command, **kwargs)
+      return True
+    except errors.VirtualMachine.RemoteCommandError:
+      return False
+    except Exception:
+      raise
+
+  def Reboot(self):
+    """Reboots the VM.
+
+    Returns:
+      The duration in seconds from the time the reboot command was issued to
+      the time we could SSH into the VM and verify that the timestamp changed.
+
+    Raises:
+      Exception: If trying to reboot a VM that isn't rebootable
+        (e.g. Kubernetes).
+    """
+    if not self.IS_REBOOTABLE:
+      raise errors.VirtualMachine.VirtualMachineError(
+          "Trying to reboot a VM that isn't rebootable.")
+
+    vm_bootable_time = None
+
+    # Use self.bootable_time to determine if this is the first boot.
+    # On the first boot, WaitForBootCompletion will only run once.
+    # On subsequent boots, need to WaitForBootCompletion and ensure
+    # the last boot time changed.
+    if self.bootable_time is not None:
+      vm_bootable_time = self.VMLastBootTime()
+
+    before_reboot_timestamp = time.time()
+    self._Reboot()
+
+    while True:
+      self.WaitForBootCompletion()
+      # WaitForBootCompletion ensures that the machine is up
+      # this is sufficient check for the first boot - but not for a reboot
+      if vm_bootable_time != self.VMLastBootTime():
+        break
+    reboot_duration_sec = time.time() - before_reboot_timestamp
+    self._AfterReboot()
+    return reboot_duration_sec
+
+  def _BeforeSuspend(self):
+    pass
+
+  def _PostSuspend(self):
+    pass
+
+  def Suspend(self) -> float:
+    """Suspends the vm.
+
+      Future plans and edge cases: checking if a vm is suspendable.
+      Accidentally suspending a VM that is already suspending.
+      Trying to resume a VM that is not already suspended.
+
+    Returns:
+      The amount of time it takes to Suspend a VM that is suspendable.
+    """
+    self._BeforeSuspend()
+
+    before_suspend_timestamp = time.time()
+
+    self._Suspend()
+    self._PostSuspend()
+
+    return time.time() - before_suspend_timestamp
+
+  def Resume(self) -> float:
+    """Resumes the vm.
+
+    Returns:
+      The amount of time it takes to resume a VM that is suspendable.
+    """
+    before_resume_timestamp = time.time()
+    self._Resume()
+
+    # WaitForSSH tries to ssh into the VM,ensuring resume was successful
+    self._WaitForSSH()
+
+    return time.time() - before_resume_timestamp
+
+  @abc.abstractmethod
+  def _Reboot(self):
+    """OS-specific implementation of reboot command."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def _Suspend(self):
+    """Provider specific implementation of a VM suspend command."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def _Resume(self):
+    """Provider specific implementation of a VM resume command."""
+    raise NotImplementedError()
+
+  def _AfterReboot(self):
+    """Performs any OS-specific setup on the VM following reboot.
+
+    This will be called after every call to Reboot().
+    """
+    pass
+
+  def Start(self) -> float:
+    """Starts the VM.
+
+    Returns:
+      The duration in seconds from the time the start command was issued to
+      the time we could SSH into the VM and verify that the timestamp changed.
+    """
+
+    before_start_timestamp = time.time()
+    self._Start()
+    self._PostStart()
+    self._WaitForSSH()
+    start_duration_sec = time.time() - before_start_timestamp
+    return start_duration_sec
+
+  @abc.abstractmethod
+  def _Start(self):
+    """Provider-specific implementation of start command."""
+    raise NotImplementedError()
+
+  def _PostStart(self):
+    """Provider-specific checks after start command."""
+    pass
+
+  def Stop(self) -> float:
+    """Stop the VM.
+
+    Returns:
+      The duration in seconds from the time the start command was issued to
+      after the API call
+    """
+
+    before_stop_timestamp = time.time()
+    self._Stop()
+    self._PostStop()
+    stop_duration_sec = time.time() - before_stop_timestamp
+    return stop_duration_sec
+
+  @abc.abstractmethod
+  def _Stop(self):
+    """Provider-specific implementation of stop command."""
+    raise NotImplementedError()
+
+  def _PostStop(self):
+    """Provider-specific checks after stop command."""
+    pass
+
+  @abc.abstractmethod
+  def RemoteCopy(self, file_path, remote_path='', copy_to=True):
+    """Copies a file to or from the VM.
+
+    Args:
+      file_path: Local path to file.
+      remote_path: Optional path of where to copy file on remote host.
+      copy_to: True to copy to vm, False to copy from vm.
+
+    Raises:
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def WaitForBootCompletion(self):
+    """Waits until VM is has booted.
+
+    Implementations of this method should set the 'bootable_time' attribute
+    and the 'hostname' attribute.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def _WaitForSSH(self):
+    """Waits until VM is ready.
+
+    Implementations of this method should set the 'hostname' attribute.
+    """
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def VMLastBootTime(self):
+    """Returns the time the VM was last rebooted as reported by the VM.
+    """
+    raise NotImplementedError()
+
+  def OnStartup(self):
+    """Performs any necessary setup on the VM specific to the OS.
+
+    This will be called once immediately after the VM has booted.
+    """
+    events.on_vm_startup.send(vm=self)
+    # Resets the cached SMT enabled status and number cpus value.
+    self._is_smt_enabled = None
+    self._num_cpus = None
+
+  def PrepareVMEnvironment(self):
+    """Performs any necessary setup on the VM specific to the OS.
+
+    This will be called once after setting up scratch disks.
+    """
+    if self.disable_interrupt_moderation:
+      self.DisableInterruptModeration()
+
+    if self.disable_rss:
+      self.DisableRSS()
+
+  def AllowPortOsFirewall(self, start_port, end_port=None):
+    """Opens port (or range) on OS-specific firewall if the firewall is enabled.
+
+    Args:
+      start_port: Port number or start of range to open.
+      end_port: End of range to open.
+    """
+    raise NotImplementedError()
+
+  def DisableInterruptModeration(self):
+    """Disables interrupt moderation on the VM."""
+    raise NotImplementedError()
+
+  def DisableRSS(self):
+    """Disables RSS on the VM."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def Uninstall(self, package_name):
+    """Uninstalls a PerfKit package on the VM."""
+    raise NotImplementedError()
+
+  @abc.abstractmethod
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Deletes the temp directory, restores packages, and uninstalls all
+    PerfKit packages.
+    """
+    raise NotImplementedError()
+
+  def SetupLocalDisks(self):
+    """Perform OS specific setup on any local disks that exist."""
+    pass
+
+  def LogVmDebugInfo(self):
+    """Logs OS-specific debug info. Must be overridden on an OS mixin."""
+    pass
+
+  def PushFile(self, source_path, remote_path=''):
+    """Copies a file or a directory to the VM.
+
+    Args:
+      source_path: The location of the file or directory on the LOCAL machine.
+      remote_path: The destination of the file on the REMOTE machine, default
+          is the home directory.
+    """
+    self.RemoteCopy(source_path, remote_path)
+
+  def PullFile(self, local_path, remote_path):
+    """Copies a file or a directory from the VM to the local machine.
+
+    Args:
+      local_path: string. The destination path of the file or directory on the
+          local machine.
+      remote_path: string. The source path of the file or directory on the
+          remote machine.
+    """
+    self.RemoteCopy(local_path, remote_path, copy_to=False)
+
+  def PushDataFile(self, data_file, remote_path='', should_double_copy=None):
+    """Upload a file in perfkitbenchmarker.data directory to the VM.
+
+    Args:
+      data_file: The filename of the file to upload.
+      remote_path: The destination for 'data_file' on the VM. If not specified,
+        the file will be placed in the user's home directory.
+      should_double_copy: Indicates whether to first copy to the home directory
+    Raises:
+      perfkitbenchmarker.data.ResourceNotFound: if 'data_file' does not exist.
+    """
+    file_path = data.ResourcePath(data_file)
+    if should_double_copy:
+      home_file_path = '~/' + data_file
+      self.PushFile(file_path, home_file_path)
+      copy_cmd = (' '.join(['cp', home_file_path, remote_path]))
+      self.RemoteCommand(copy_cmd)
+    else:
+      self.PushFile(file_path, remote_path)
+
+  def RenderTemplate(self, template_path, remote_path, context):
+    """Renders a local Jinja2 template and copies it to the remote host.
+
+    The template will be provided variables defined in 'context', as well as a
+    variable named 'vm' referencing this object.
+
+    Args:
+      template_path: string. Local path to jinja2 template.
+      remote_path: string. Remote path for rendered file on the remote vm.
+      context: dict. Variables to pass to the Jinja2 template during rendering.
+
+    Raises:
+      jinja2.UndefinedError: if template contains variables not present in
+        'context'.
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    with open(template_path) as fp:
+      template_contents = fp.read()
+
+    environment = jinja2.Environment(undefined=jinja2.StrictUndefined)
+    template = environment.from_string(template_contents)
+    prefix = 'pkb-' + os.path.basename(template_path)
+
+    with vm_util.NamedTemporaryFile(prefix=prefix, dir=vm_util.GetTempDir(),
+                                    delete=False, mode='w') as tf:
+      tf.write(template.render(vm=self, **context))
+      tf.close()
+      self.RemoteCopy(tf.name, remote_path)
+
+  @abc.abstractmethod
+  def _CreateScratchDiskFromDisks(self, disk_spec, disks):
+    """Helper method to prepare data disks.
+
+    Given a list of BaseDisk objects, this will do most of the work creating,
+    attaching, striping, formatting, and mounting them. If multiple BaseDisk
+    objects are passed to this method, it will stripe them, combining them
+    into one 'logical' data disk (it will be treated as a single disk from a
+    benchmarks perspective). This is intended to be called from within a cloud
+    specific VM's CreateScratchDisk method.
+
+    Args:
+      disk_spec: The BaseDiskSpec object corresponding to the disk.
+      disks: A list of the disk(s) to be created, attached, striped,
+          formatted, and mounted. If there is more than one disk in
+          the list, then they will be striped together.
+    """
+    raise NotImplementedError()
+
+  @property
+  def num_cpus(self):
+    """Gets the number of CPUs on the VM.
+
+    Returns:
+      The number of CPUs on the VM.
+    """
+    if self._num_cpus is None:
+      self._num_cpus = self._GetNumCpus()
+    return self._num_cpus
+
+  def NumCpusForBenchmark(self, report_only_physical_cpus=False):
+    """Gets the number of CPUs for benchmark configuration purposes.
+
+    Many benchmarks scale their configurations based off of the number of CPUs
+    available on the system (e.g. determine the number of threads). Benchmarks
+    should use this property rather than num_cpus so that users can override
+    that behavior with the --num_cpus_override flag. Not all benchmarks may
+    use this, and they may use more than this number of CPUs. To actually
+    ensure that they are not being used during benchmarking, the CPUs should be
+    disabled.
+
+    Args:
+      report_only_physical_cpus: Whether to report only the physical
+      (non-SMT) CPUs.  Default is to report all vCPUs.
+
+    Returns:
+      The number of CPUs for benchmark configuration purposes.
+    """
+    if FLAGS.num_cpus_override:
+      return FLAGS.num_cpus_override
+    if report_only_physical_cpus and self.IsSmtEnabled():
+      # return half the number of CPUs.
+      return self.num_cpus // 2
+    if self.num_disable_cpus:
+      return self.num_cpus - self.num_disable_cpus
+    return self.num_cpus
+
+  @abc.abstractmethod
+  def _GetNumCpus(self):
+    """Returns the number of logical CPUs on the VM.
+
+    This method does not cache results (unlike "num_cpus").
+    """
+    raise NotImplementedError()
+
+  @property
+  def total_free_memory_kb(self):
+    """Gets the amount of free memory on the VM.
+
+    Returns:
+      The number of kilobytes of memory on the VM.
+    """
+    return self._GetTotalFreeMemoryKb()
+
+  @property
+  def total_memory_kb(self):
+    """Gets the amount of memory on the VM.
+
+    Returns:
+      The number of kilobytes of memory on the VM.
+    """
+    if not self._total_memory_kb:
+      self._total_memory_kb = self._GetTotalMemoryKb()
+    return self._total_memory_kb
+
+  @abc.abstractmethod
+  def _GetTotalFreeMemoryKb(self):
+    """Returns the amount of free physical memory on the VM in Kilobytes."""
+    raise NotImplementedError()
+
+  # Intel - lower priority of security scanner
+  def LowerSecurityScannerPriority(self):
+    """Lower priority of cloud-specific security scanners"""
+    pass
+  # End Intel contribution
+
+  @abc.abstractmethod
+  def _GetTotalMemoryKb(self):
+    """Returns the amount of physical memory on the VM in Kilobytes.
+
+    This method does not cache results (unlike "total_memory_kb").
+    """
+    raise NotImplementedError()
+
+  def IsReachable(self, target_vm):
+    """Indicates whether the target VM can be reached from it's internal ip.
+
+    Args:
+      target_vm: The VM whose reachability is being tested.
+
+    Returns:
+      True if the internal ip address of the target VM can be reached, false
+      otherwise.
+    """
+    if target_vm not in self._reachable:
+      if target_vm.internal_ip:
+        self._reachable[target_vm] = self._TestReachable(target_vm.internal_ip)
+      else:
+        self._reachable[target_vm] = False
+    return self._reachable[target_vm]
+
+  @abc.abstractmethod
+  def _TestReachable(self, ip):
+    """Returns True if the VM can reach the ip address and False otherwise."""
+    raise NotImplementedError()
+
+  def StartBackgroundWorkload(self):
+    """Start the background workload."""
+    for workload in background_workload.BACKGROUND_WORKLOADS:
+      if workload.IsEnabled(self):
+        if self.OS_TYPE in workload.EXCLUDED_OS_TYPES:
+          raise NotImplementedError()
+        workload.Start(self)
+
+  def StopBackgroundWorkload(self):
+    """Stop the background workoad."""
+    for workload in background_workload.BACKGROUND_WORKLOADS:
+      if workload.IsEnabled(self):
+        if self.OS_TYPE in workload.EXCLUDED_OS_TYPES:
+          raise NotImplementedError()
+        workload.Stop(self)
+
+  def PrepareBackgroundWorkload(self):
+    """Prepare for the background workload."""
+    for workload in background_workload.BACKGROUND_WORKLOADS:
+      if workload.IsEnabled(self):
+        if self.OS_TYPE in workload.EXCLUDED_OS_TYPES:
+          raise NotImplementedError()
+        workload.Prepare(self)
+
+  @abc.abstractmethod
+  def SetReadAhead(self, num_sectors, devices):
+    """Set read-ahead value for block devices.
+
+    Args:
+      num_sectors: int. Number of sectors of read ahead.
+      devices: list of strings. A list of block devices.
+    """
+    raise NotImplementedError()
+
+  def GetSha256sum(self, path, filename):
+    """Gets the sha256sum hash for a filename in a path on the VM.
+
+    Args:
+      path: string; Path on the VM.
+      filename: string; Name of the file in the path.
+
+    Returns:
+      string; The sha256sum hash.
+    """
+    raise NotImplementedError()
+
+  def CheckPreprovisionedData(self, install_path, module_name, filename,
+                              expected_sha256):
+    """Checks preprovisioned data for a checksum.
+
+    Checks the expected 256sum against the actual sha256sum. Called after the
+    file is downloaded.
+
+    This function should be overridden by each OS-specific MixIn.
+
+    Args:
+      install_path: The install path on this VM. The benchmark is installed at
+          this path in a subdirectory of the benchmark name.
+      module_name: Name of the benchmark associated with this data file.
+      filename: The name of the file that was downloaded.
+      expected_sha256: The expected sha256 checksum value.
+    """
+    actual_sha256 = self.GetSha256sum(install_path, filename)
+    if actual_sha256 != expected_sha256:
+      raise errors.Setup.BadPreprovisionedDataError(
+          'Invalid sha256sum for %s/%s: %s (actual) != %s (expected). Might '
+          'want to run using --preprovision_ignore_checksum '
+          '(not recommended).' % (
+              module_name, filename, actual_sha256, expected_sha256))
+
+  def TestConnectRemoteAccessPort(self, port=None, socket_timeout=0.5):
+    """Tries to connect to remote access port and throw if it fails.
+
+    Args:
+      port: Integer of the port to connect to. Defaults to
+          the default remote connection port of the VM.
+      socket_timeout: The number of seconds to wait on the socket before failing
+          and retrying. If this is too low, the connection may never succeed. If
+          this is too high it will add latency (because the current connection
+          may fail after a time that a new connection would succeed).
+          Defaults to 500ms.
+    """
+    if not self.ip_address:
+      raise errors.VirtualMachine.VirtualMachineError(
+          'Trying to connect to a VM without an external IP address')
+    if not port:
+      port = self.primary_remote_access_port
+    # TODO(user): refactor to reuse sockets?
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) \
+        as sock:
+      # Before the IP is reachable the socket times out (and throws). After that
+      # it throws immediately.
+      sock.settimeout(socket_timeout)  # seconds
+      sock.connect((self.ip_address, port))
+    logging.info('Connected to port %s on %s', port, self)
+
+  def IsSmtEnabled(self):
+    """Whether simultaneous multithreading (SMT) is enabled on the vm."""
+    if self._is_smt_enabled is None:
+      self._is_smt_enabled = self._IsSmtEnabled()
+    return self._is_smt_enabled
+
+  @abc.abstractmethod
+  def _IsSmtEnabled(self):
+    """Whether SMT is enabled on the vm."""
+
+  def _GetNfsService(self):
+    """Returns the NfsService created in the benchmark spec.
+
+    Before calling this method check that the disk.disk_type is equal to
+    disk.NFS or else an exception will be raised.
+
+    Returns:
+      The nfs_service.BaseNfsService service for this cloud.
+
+    Raises:
+      CreationError: If no NFS service was created.
+    """
+    nfs = getattr(pkb_context.GetThreadBenchmarkSpec(), 'nfs_service')
+    if nfs is None:
+      raise errors.Resource.CreationError('No NFS Service created')
+    return nfs
+
+
+class DeprecatedOsMixin(BaseOsMixin):
+  """Class that adds a deprecation log message to OsBasedVms."""
+
+  # The time or version in which this OS input will be removed
+  END_OF_LIFE = None
+
+  # Optional alternative to use instead.
+  ALTERNATIVE_OS = None
+
+  def __init__(self):
+    super(DeprecatedOsMixin, self).__init__()
+    assert self.OS_TYPE
+    assert self.END_OF_LIFE
+    warning = "os_type '%s' is deprecated and will be removed after %s." % (
+        self.OS_TYPE, self.END_OF_LIFE)
+    if self.ALTERNATIVE_OS:
+      warning += " Use '%s' instead." % self.ALTERNATIVE_OS
+    logging.warning(warning)
+
+
+class BaseVirtualMachine(BaseOsMixin, resource.BaseResource):
+  """Base class for Virtual Machines.
+
+  This class holds VM methods and attributes relating to the VM as a cloud
+  resource. For methods and attributes that interact with the VM's guest
+  OS, see BaseOsMixin and its subclasses.
+
+  Attributes:
+    image: The disk image used to boot.
+    internal_ip: Internal IP address.
+    ip_address: Public (external) IP address.
+    machine_type: The provider-specific instance type (e.g. n1-standard-8).
+    project: The provider-specific project associated with the VM (e.g.
+      artisanal-lightbulb-883).
+    ssh_public_key: Path to SSH public key file.
+    ssh_private_key: Path to SSH private key file.
+    user_name: Account name for login. the contents of 'ssh_public_key' should
+      be in .ssh/authorized_keys for this user.
+    zone: The region / zone the VM was launched in.
+    cidr: The CIDR range the VM was launched in.
+    disk_specs: list of BaseDiskSpec objects. Specifications for disks attached
+      to the VM.
+    scratch_disks: list of BaseDisk objects. Scratch disks attached to the VM.
+    max_local_disks: The number of local disks on the VM that can be used as
+      scratch disks or that can be striped together.
+    background_cpu_threads: The number of threads of background CPU usage
+      while running the benchmark.
+    background_network_mbits_per_sec: Number of mbits/sec of background network
+      usage while running the benchmark.
+    background_network_ip_type: Type of IP address to use for generating
+      background network workload
+    vm_group: The VM group this VM is associated with, if applicable.
+  """
+
+  is_static = False
+
+  RESOURCE_TYPE = 'BaseVirtualMachine'
+  REQUIRED_ATTRS = ['CLOUD', 'OS_TYPE']
+
+  _instance_counter_lock = threading.Lock()
+  _instance_counter = 0
+
+  def __init__(self, vm_spec: BaseVmSpec):
+    """Initialize BaseVirtualMachine class.
+
+    Args:
+      vm_spec: virtual_machine.BaseVmSpec object of the vm.
+    """
+    super(BaseVirtualMachine, self).__init__()
+    with self._instance_counter_lock:
+      self.instance_number = self._instance_counter
+      self.name = 'pkb-%s-%d' % (FLAGS.run_uri, self.instance_number)
+      BaseVirtualMachine._instance_counter += 1
+    self.disable_interrupt_moderation = vm_spec.disable_interrupt_moderation
+    self.disable_rss = vm_spec.disable_rss
+    self.zone = vm_spec.zone
+    self.cidr = vm_spec.cidr
+    self.machine_type = vm_spec.machine_type
+    self.gpu_count = vm_spec.gpu_count
+    self.gpu_type = vm_spec.gpu_type
+    self.image = vm_spec.image
+    self.install_packages = vm_spec.install_packages
+    self.ip_address = None
+    self.internal_ip = None
+    self.user_name = DEFAULT_USERNAME
+    self.password = None
+    self.ssh_public_key = vm_util.GetPublicKeyPath()
+    self.ssh_private_key = vm_util.GetPrivateKeyPath()
+    self.disk_specs = []
+    self.scratch_disks = []
+    self.max_local_disks = 0
+    self.local_disk_counter = 0
+    self.remote_disk_counter = 0
+    self.background_cpu_threads = vm_spec.background_cpu_threads
+    self.background_network_mbits_per_sec = (
+        vm_spec.background_network_mbits_per_sec)
+    self.background_network_ip_type = vm_spec.background_network_ip_type
+    self.use_dedicated_host = None
+    self.num_vms_per_host = None
+    self.network = None
+    self.firewall = None
+    self.tcp_congestion_control = None
+    self.numa_node_count = None
+    self.num_disable_cpus = None
+    self.capacity_reservation_id = None
+    self.vm_metadata = dict(item.split(':', 1) for item in vm_spec.vm_metadata)
+    self.vm_group = None
+
+  @property
+  @classmethod
+  @abc.abstractmethod
+  def CLOUD(cls):
+    raise NotImplementedError()
+
+  def __repr__(self):
+    return '<BaseVirtualMachine [ip={0}, internal_ip={1}]>'.format(
+        self.ip_address, self.internal_ip)
+
+  def __str__(self):
+    if self.ip_address:
+      return self.ip_address
+    return super(BaseVirtualMachine, self).__str__()
+
+  def __hash__(self):
+      return hash((self.ip_address,))
+
+  def __eq__(self, other):
+      if not isinstance(other, type(self)):
+          return NotImplemented
+      return self.ip_address == other.ip_address
+
+  def GetConnectionIp(self):
+    """Gets the IP to use for connecting to the VM."""
+    if FLAGS.ssh_via_internal_ip or FLAGS.connect_via_internal_ip:
+      return self.internal_ip
+    return self.ip_address
+
+  def CreateScratchDisk(self, disk_spec):
+    """Create a VM's scratch disk.
+
+    Args:
+      disk_spec: virtual_machine.BaseDiskSpec object of the disk.
+    """
+    pass
+
+  def DeleteScratchDisks(self):
+    """Delete a VM's scratch disks."""
+    for scratch_disk in self.scratch_disks:
+      if scratch_disk.disk_type != disk.LOCAL:
+        scratch_disk.Delete()
+
+  def GetScratchDir(self, disk_num=0):
+    """Gets the path to the scratch directory.
+
+    Args:
+      disk_num: The number of the disk to mount.
+    Returns:
+      The mounted disk directory.
+
+    """
+    if disk_num >= len(self.scratch_disks):
+      raise errors.Error(
+          'GetScratchDir(disk_num=%s) is invalid, max disk_num is %s' % (
+              disk_num, len(self.scratch_disks)))
+    return self.scratch_disks[disk_num].mount_point
+
+  def AllowIcmp(self):
+    """Opens ICMP protocol on the firewall corresponding to the VM if exists."""
+    if self.firewall and not FLAGS.skip_firewall_rules:
+      self.firewall.AllowIcmp(self)
+
+  def InsertProxyIPList(self):
+    """Create a proxy server ip address list as source_range."""
+    return vm_util.GetCIDRList(PROXY_FILE)
+
+  def AllowPort(self, start_port, end_port=None, source_range=None):
+    """Opens the port on the firewall corresponding to the VM if one exists.
+
+    Args:
+      start_port: The first local port to open in a range.
+      end_port: The last local port to open in a range. If None, only start_port
+        will be opened.
+      source_range: list of CIDRs. If none, all sources are allowed.
+    """
+    if self.firewall and not FLAGS.skip_firewall_rules:
+      if int(start_port) == 22 and source_range is None:
+        source_range = self.InsertProxyIPList()
+      self.firewall.AllowPort(self, start_port, end_port, source_range)
+
+  def AllowRemoteAccessPorts(self):
+    """Allow all ports in self.remote_access_ports."""
+    for port in self.remote_access_ports:
+      self.AllowPort(port)
+
+  def AddMetadata(self, **kwargs):
+    """Add key/value metadata to the instance.
+
+    Setting the metadata on create is preferred. If that is not possible, this
+    method adds metadata in the form of key value pairs to the instance. Useful
+    for debugging / introspection.
+
+    The default implementation is a noop. Cloud providers supporting instance
+    metadata should override.
+
+    Args:
+      **kwargs: dict. (tag name, tag value) pairs to set as metadata on the
+        instance.
+    """
+    pass
+
+  def GetResourceMetadata(self):
+    """Returns a dict containing VM metadata.
+
+    Returns:
+      dict mapping string property key to value.
+    """
+    if not self.created:
+      return {}
+    result = self.metadata.copy()
+    result.update({
+        'image': self.image,
+        'zone': self.zone,
+        'cloud': self.CLOUD,
+        'os_type': type(self).OS_TYPE,
+    })
+    if self.cidr is not None:
+      result['cidr'] = self.cidr
+    if self.machine_type is not None:
+      result['machine_type'] = self.machine_type
+    if self.use_dedicated_host is not None:
+      result['dedicated_host'] = self.use_dedicated_host
+    if self.num_vms_per_host is not None:
+      result['num_vms_per_host'] = self.num_vms_per_host
+    if self.tcp_congestion_control is not None:
+      result['tcp_congestion_control'] = self.tcp_congestion_control
+    if self.numa_node_count is not None:
+      result['numa_node_count'] = self.numa_node_count
+    if self.num_disable_cpus is not None:
+      result['num_disable_cpus'] = self.num_disable_cpus
+    if self.num_cpus is not None:
+      result['num_cpus'] = self.num_cpus
+      if self.NumCpusForBenchmark() != self.num_cpus:
+        result['num_benchmark_cpus'] = self.NumCpusForBenchmark()
+    return result
+
+  def SimulateMaintenanceEvent(self):
+    """Simulates a maintenance event on the VM."""
+    raise NotImplementedError()
+
+  def _InstallData(self, preprovisioned_data, module_name, filenames,
+                   install_path, fallback_url):
+    """Installs preprovisioned_data on this VM.
+
+    Args:
+      preprovisioned_data: The dict mapping filenames to sha256sum hashes.
+      module_name: The name of the module defining the preprovisioned data.
+      filenames: An iterable of preprovisioned data filenames for a particular
+      module.
+      install_path: The path to download the data file.
+      fallback_url: The dict mapping filenames to fallback url for downloading.
+
+    Raises:
+      errors.Setup.BadPreprovisionedDataError: If the module or filename are
+          not defined with preprovisioned data, or if the sha256sum hash in the
+          code does not match the sha256 of the file.
+    """
+    for filename in filenames:
+      if data.ResourceExists(filename):
+        local_tar_file_path = data.ResourcePath(filename)
+        self.PushFile(local_tar_file_path, install_path)
+        continue
+      url = fallback_url.get(filename)
+      sha256sum = preprovisioned_data.get(filename)
+      try:
+        preprovisioned = self.ShouldDownloadPreprovisionedData(
+            module_name, filename)
+      except NotImplementedError:
+        logging.info('The provider does not implement '
+                     'ShouldDownloadPreprovisionedData. Attempting to '
+                     'download the data via URL')
+        preprovisioned = False
+      if not FLAGS.preprovision_ignore_checksum and not sha256sum:
+        raise errors.Setup.BadPreprovisionedDataError(
+            'Cannot find sha256sum hash for file %s in module %s. Might want '
+            'to run using --preprovision_ignore_checksum (not recommended). '
+            'See README.md for information about preprovisioned data. '
+            'Cannot find file in /data directory either, fail to upload from '
+            'local directory.' % (filename, module_name))
+
+      if preprovisioned:
+        self.DownloadPreprovisionedData(install_path, module_name, filename)
+      elif url:
+        self.Install('wget')
+        file_name = os.path.basename(url)
+        self.RemoteCommand(
+            'wget -O {0} {1}'.format(
+                os.path.join(install_path, file_name), url))
+      else:
+        raise errors.Setup.BadPreprovisionedDataError(
+            'Cannot find preprovisioned file %s inside preprovisioned bucket '
+            'in module %s. See README.md for information about '
+            'preprovisioned data. '
+            'Cannot find fallback url of the file to download from web. '
+            'Cannot find file in /data directory either, fail to upload from '
+            'local directory.' % (filename, module_name))
+      if not FLAGS.preprovision_ignore_checksum:
+        self.CheckPreprovisionedData(
+            install_path, module_name, filename, sha256sum)
+
+  def InstallPreprovisionedBenchmarkData(self, benchmark_name, filenames,
+                                         install_path):
+    """Installs preprovisioned benchmark data on this VM.
+
+    Some benchmarks require importing many bytes of data into the virtual
+    machine. This data can be staged in a particular cloud and the virtual
+    machine implementation can override how the preprovisioned data is
+    installed in the VM by overriding DownloadPreprovisionedData.
+
+    For example, for GCP VMs, benchmark data can be preprovisioned in a GCS
+    bucket that the VMs may access. For a benchmark that requires
+    preprovisioned data, follow the instructions for that benchmark to download
+    and store the data so that it may be accessed by a VM via this method.
+
+    Before installing from preprovisioned data in the cloud, this function looks
+    for files in the local data directory. If found, they are pushed to the VM.
+    Otherwise, this function attempts to download them from their preprovisioned
+    location onto the VM.
+
+    Args:
+      benchmark_name: The name of the benchmark defining the preprovisioned
+          data. The benchmark's module must define the dict BENCHMARK_DATA
+          mapping filenames to sha256sum hashes.
+      filenames: An iterable of preprovisioned data filenames for a particular
+          benchmark.
+      install_path: The path to download the data file.
+
+    Raises:
+      errors.Setup.BadPreprovisionedDataError: If the benchmark or filename are
+          not defined with preprovisioned data, or if the sha256sum hash in the
+          code does not match the sha256sum of the file.
+    """
+    benchmark_module = benchmark_lookup.BenchmarkModule(benchmark_name)
+    if not benchmark_module:
+      raise errors.Setup.BadPreprovisionedDataError(
+          'Cannot install preprovisioned data for undefined benchmark %s.' %
+          benchmark_name)
+    try:
+      # TODO(user): Change BENCHMARK_DATA to PREPROVISIONED_DATA.
+      preprovisioned_data = benchmark_module.BENCHMARK_DATA
+    except AttributeError:
+      raise errors.Setup.BadPreprovisionedDataError(
+          'Benchmark %s does not define a BENCHMARK_DATA dict with '
+          'preprovisioned data.' % benchmark_name)
+    fallback_url = getattr(benchmark_module, 'BENCHMARK_DATA_URL', {})
+    self._InstallData(preprovisioned_data, benchmark_name, filenames,
+                      install_path, fallback_url)
+
+  def InstallPreprovisionedPackageData(self, package_name, filenames,
+                                       install_path):
+    """Installs preprovisioned Package data on this VM.
+
+    Some benchmarks require importing many bytes of data into the virtual
+    machine. This data can be staged in a particular cloud and the virtual
+    machine implementation can override how the preprovisioned data is
+    installed in the VM by overriding DownloadPreprovisionedData.
+
+    For example, for GCP VMs, benchmark data can be preprovisioned in a GCS
+    bucket that the VMs may access. For a benchmark that requires
+    preprovisioned data, follow the instructions for that benchmark to download
+    and store the data so that it may be accessed by a VM via this method.
+
+    Before installing from preprovisioned data in the cloud, this function looks
+    for files in the local data directory. If found, they are pushed to the VM.
+    Otherwise, this function attempts to download them from their preprovisioned
+    location onto the VM.
+
+    Args:
+      package_name: The name of the package file defining the preprovisoned
+      data. The default vaule is None. If the package_name is provided, the
+      package file must define the dict PREPROVISIONED_DATA mapping filenames to
+      sha256sum hashes.
+      filenames: An iterable of preprovisioned data filenames for a particular
+      package.
+      install_path: The path to download the data file.
+
+    Raises:
+      errors.Setup.BadPreprovisionedDataError: If the package or filename are
+          not defined with preprovisioned data, or if the sha256sum hash in the
+          code does not match the sha256sum of the file.
+    """
+    package_module = package_lookup.PackageModule(package_name)
+    if not package_module:
+      raise errors.Setup.BadPreprovisionedDataError(
+          'Cannot install preprovisioned data for undefined package %s.' %
+          package_name)
+    try:
+      preprovisioned_data = package_module.PREPROVISIONED_DATA
+    except AttributeError:
+      raise errors.Setup.BadPreprovisionedDataError(
+          'Package %s does not define a PREPROVISIONED_DATA dict with '
+          'preprovisioned data.' % package_name)
+    fallback_url = getattr(package_module, 'PACKAGE_DATA_URL', {})
+    self._InstallData(preprovisioned_data, package_name, filenames,
+                      install_path, fallback_url)
+
+  def ShouldDownloadPreprovisionedData(self, module_name, filename):
+    """Returns whether or not preprovisioned data is available.
+
+    This function should be overridden by each cloud provider VM.
+
+    Args:
+      module_name: Name of the module associated with this data file.
+      filename: The name of the file that was downloaded.
+
+    Returns:
+      A boolean indicates if preprovisioned data is available.
+    """
+    raise NotImplementedError()
+
+  def InstallCli(self):
+    """Installs the cloud specific cli along with credentials on this vm."""
+    raise NotImplementedError()
+
+  def DownloadPreprovisionedData(self, install_path, module_name, filename):
+    """Downloads preprovisioned benchmark data.
+
+    This function should be overridden by each cloud provider VM. The file
+    should be downloaded into the install path within a subdirectory with the
+    benchmark name.
+
+    The downloaded file's parent directory will be created if it does not
+    exist.
+
+    Args:
+      install_path: The install path on this VM.
+      module_name: Name of the module associated with this data file.
+      filename: The name of the file that was downloaded.
+    """
+    raise NotImplementedError()
+
+  def IsInterruptible(self):
+    """Returns whether this vm is a interruptible vm (e.g. spot, preemptible).
+
+    Caller must call UpdateInterruptibleVmStatus before calling this function
+    to make sure return value is up to date.
+
+    Returns:
+      True if this vm is a interruptible vm.
+    """
+    return False
+
+  def _UpdateInterruptibleVmStatusThroughMetadataService(self):
+    raise NotImplementedError()
+
+  def _UpdateInterruptibleVmStatusThroughApi(self):
+    # Azure do not support detecting through api
+    pass
+
+  def UpdateInterruptibleVmStatus(self, use_api=False):
+    """Updates the status of the discounted vm.
+
+    Args:
+      use_api: boolean, If use_api is false, method will attempt to query
+      metadata service to check vm preemption. If use_api is true, method will
+      attempt to use API to detect vm preemption query if metadata service
+      detecting fails.
+    """
+    if not self.IsInterruptible():
+      return
+    if self.WasInterrupted():
+      return
+    try:
+      self._UpdateInterruptibleVmStatusThroughMetadataService()
+    except (NotImplementedError, errors.VirtualMachine.RemoteCommandError):
+      self._UpdateInterruptibleVmStatusThroughApi()
+
+  def WasInterrupted(self):
+    """Returns whether this interruptible vm was terminated early.
+
+    Caller must call UpdateInterruptibleVmStatus before calling this function
+    to make sure return value is up to date.
+
+    Returns:
+      True if this vm is a interruptible vm was terminated early.
+    """
+    return False
+
+  def TestConnectRemoteAccessPort(self, port=None):
+    """Tries to connect to remote access port and throw if it fails."""
+    if not self.ip_address:
+      raise errors.VirtualMachine.VirtualMachineError(
+          'Trying to connect to a VM without an external IP address')
+    if not port:
+      port = self.primary_remote_access_port
+    # TODO(user): refactor to reuse sockets?
+    with contextlib.closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) \
+        as sock:
+      # Before the IP is reachable the socket times out (and throws). After that
+      # it throws immediately until the port is listened to.
+      # 250 ms fits well within the 500 ms cluster_boot polling fuzz.
+      sock.settimeout(0.25)  # seconds
+      sock.connect((self.ip_address, port))
+    logging.info('Connected to port %s on %s', port, self)
+
+  def GetVmStatusCode(self):
+    """Returns the vm status code if any.
+
+    Caller must call UpdateInterruptibleVmStatus before calling this function
+    to make sure return value is up to date.
+
+    Returns:
+      Vm status code.
+    """
+    return None
+
+  def GetInterruptableStatusPollSeconds(self):
+    """Get seconds between interruptable status polls.
+
+    Returns:
+      Seconds between polls
+    """
+    return 5
+
+  def _PreDelete(self):
+    """See base class."""
+    self.LogVmDebugInfo()
+
+
+VirtualMachine = typing.TypeVar('VirtualMachine', bound=BaseVirtualMachine)
diff --git a/script/cumulus/pkb/perfkitbenchmarker/vm_util.py b/script/cumulus/pkb/perfkitbenchmarker/vm_util.py
new file mode 100644
index 0000000..b2dda7d
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/vm_util.py
@@ -0,0 +1,789 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Set of utility functions for working with virtual machines."""
+
+
+import contextlib
+import ipaddress
+import logging
+import os
+import platform
+import posixpath
+import random
+import secrets
+import re
+import string
+import subprocess
+import tempfile
+import threading
+import time
+from typing import Callable, Dict, Iterable, Optional, Tuple
+import urllib.request
+
+from absl import flags
+import jinja2
+from perfkitbenchmarker import background_tasks
+from perfkitbenchmarker import data
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import temp_dir
+from six.moves import range
+
+FLAGS = flags.FLAGS
+
+PRIVATE_KEYFILE = 'perfkitbenchmarker_keyfile'
+PUBLIC_KEYFILE = 'perfkitbenchmarker_keyfile.pub'
+
+# The temporary directory on VMs. We cannot reuse GetTempDir()
+# because run_uri will not be available at time of module load and we need
+# to use this directory as a base for other module level constants.
+VM_TMP_DIR = '/tmp/pkb'
+
+# Default timeout for issuing a command.
+DEFAULT_TIMEOUT = 300
+
+# Defaults for retrying commands.
+POLL_INTERVAL = 30
+TIMEOUT = 1200
+FUZZ = .5
+MAX_RETRIES = -1
+
+WINDOWS = 'nt'
+DARWIN = 'Darwin'
+PASSWORD_LENGTH = 15
+
+OUTPUT_STDOUT = 0
+OUTPUT_STDERR = 1
+OUTPUT_EXIT_CODE = 2
+
+_SIMULATE_MAINTENANCE_SEMAPHORE = threading.Semaphore(0)
+
+flags.DEFINE_integer('default_timeout', TIMEOUT, 'The default timeout for '
+                     'retryable commands in seconds.')
+flags.DEFINE_integer('burn_cpu_seconds', 0,
+                     'Amount of time in seconds to burn cpu on vm before '
+                     'starting benchmark')
+flags.DEFINE_integer('burn_cpu_threads', 1, 'Number of threads to use to '
+                     'burn cpu before starting benchmark.')
+flags.DEFINE_integer('background_cpu_threads', None,
+                     'Number of threads of background cpu usage while '
+                     'running a benchmark')
+flags.DEFINE_integer('background_network_mbits_per_sec', None,
+                     'Number of megabits per second of background '
+                     'network traffic to generate during the run phase '
+                     'of the benchmark')
+flags.DEFINE_boolean('simulate_maintenance', False,
+                     'Whether to simulate VM maintenance during the benchmark. '
+                     'This requires both benchmark and provider support.')
+flags.DEFINE_integer('simulate_maintenance_delay', 0,
+                     'The number of seconds to wait to start simulating '
+                     'maintenance.')
+flags.DEFINE_boolean('ssh_reuse_connections', True,
+                     'Whether to reuse SSH connections rather than '
+                     'reestablishing a connection for each remote command.')
+# We set this to the short value of 5 seconds so that the cluster boot benchmark
+# can measure a fast connection when bringing up a VM. This avoids retries that
+# may not be as quick as every 5 seconds when specifying a larger value.
+flags.DEFINE_integer('ssh_connect_timeout', 5, 'timeout for SSH connection.',
+                     lower_bound=0)
+flags.DEFINE_string('ssh_control_path', None,
+                    'Overrides the default ControlPath setting for ssh '
+                    'connections if --ssh_reuse_connections is set. This can '
+                    'be helpful on systems whose default temporary directory '
+                    'path is too long (sockets have a max path length) or a '
+                    'version of ssh that doesn\'t support the %h token. See '
+                    'ssh documentation on the ControlPath setting for more '
+                    'detailed information.')
+flags.DEFINE_string('ssh_control_persist', '30m',
+                    'Setting applied to ssh connections if '
+                    '--ssh_reuse_connections is set. Sets how long the '
+                    'connections persist before they are removed. '
+                    'See ssh documentation about the ControlPersist setting '
+                    'for more detailed information.')
+flags.DEFINE_integer('ssh_server_alive_interval', 30,
+                     'Value for ssh -o ServerAliveInterval. Use with '
+                     '--ssh_server_alive_count_max to configure how long to '
+                     'wait for unresponsive servers.')
+flags.DEFINE_integer('ssh_server_alive_count_max', 10,
+                     'Value for ssh -o ServerAliveCountMax. Use with '
+                     '--ssh_server_alive_interval to configure how long to '
+                     'wait for unresponsive servers.')
+flags.DEFINE_list('proxy_cidr_list', None,
+                  'List of permitted source IP CIDRs such as "3.140.220.0/24,3.150.110.0/24"')
+
+
+class IpAddressSubset(object):
+  """Enum of options for --ip_addresses."""
+  REACHABLE = 'REACHABLE'
+  BOTH = 'BOTH'
+  INTERNAL = 'INTERNAL'
+  EXTERNAL = 'EXTERNAL'
+
+  ALL = (REACHABLE, BOTH, INTERNAL, EXTERNAL)
+
+flags.DEFINE_enum('ip_addresses', IpAddressSubset.REACHABLE,
+                  IpAddressSubset.ALL,
+                  'For networking tests: use both internal and external '
+                  'IP addresses (BOTH), internal and external only if '
+                  'the receiving VM is reachable by internal IP (REACHABLE), '
+                  'external IP only (EXTERNAL) or internal IP only (INTERNAL)')
+
+flags.DEFINE_enum('background_network_ip_type', IpAddressSubset.EXTERNAL,
+                  (IpAddressSubset.INTERNAL, IpAddressSubset.EXTERNAL),
+                  'IP address type to use when generating background network '
+                  'traffic')
+
+
+class IpAddressMetadata(object):
+  INTERNAL = 'internal'
+  EXTERNAL = 'external'
+
+
+def GetTempDir():
+  """Returns the tmp dir of the current run."""
+  return temp_dir.GetRunDirPath()
+
+
+def PrependTempDir(file_name):
+  """Returns the file name prepended with the tmp dir of the current run."""
+  return os.path.join(GetTempDir(), file_name)
+
+
+def GenTempDir():
+  """Creates the tmp dir for the current run if it does not already exist."""
+  temp_dir.CreateTemporaryDirectories()
+
+
+def SSHKeyGen():
+  """Create PerfKitBenchmarker SSH keys in the tmp dir of the current run."""
+  if not os.path.isdir(GetTempDir()):
+    GenTempDir()
+
+  if not os.path.isfile(GetPrivateKeyPath()):
+    create_cmd = ['ssh-keygen',
+                  '-t', 'rsa',
+                  '-N', '',
+                  '-m', 'PEM',
+                  '-q',
+                  '-f', PrependTempDir(PRIVATE_KEYFILE)]
+    IssueCommand(create_cmd)
+
+
+def GetPrivateKeyPath():
+  return PrependTempDir(PRIVATE_KEYFILE)
+
+
+def GetPublicKeyPath():
+  return PrependTempDir(PUBLIC_KEYFILE)
+
+
+def GetSshOptions(ssh_key_filename, connect_timeout=None, reuse_connections=True):
+  """Return common set of SSH and SCP options."""
+  options = [
+      '-2',
+      '-o', 'UserKnownHostsFile=/dev/null',
+      '-o', 'StrictHostKeyChecking=no',
+      '-o', 'IdentitiesOnly=no',
+      '-o', 'PreferredAuthentications=publickey',
+      '-o', 'PasswordAuthentication=no',
+      '-o', 'ConnectTimeout=%d' % (
+          connect_timeout or FLAGS.ssh_connect_timeout),
+      '-o', 'GSSAPIAuthentication=no',
+      '-o', 'ServerAliveInterval=%d' % FLAGS.ssh_server_alive_interval,
+      '-o', 'ServerAliveCountMax=%d' % FLAGS.ssh_server_alive_count_max,
+  ]
+  if ssh_key_filename:
+    options.extend(['-i', ssh_key_filename])
+  if FLAGS.use_ipv6:
+    options.append('-6')
+  if FLAGS.ssh_reuse_connections and reuse_connections:
+    control_path = (FLAGS.ssh_control_path or
+                    os.path.join(temp_dir.GetSshConnectionsDir(), '%h'))
+    options.extend([
+        '-o', 'ControlPath="%s"' % control_path,
+        '-o', 'ControlMaster=auto',
+        '-o', 'ControlPersist=%s' % FLAGS.ssh_control_persist
+    ])
+  options.extend(FLAGS.ssh_options)
+
+  return options
+
+
+# TODO(skschneider): Remove at least RunParallelProcesses and RunParallelThreads
+# from this file (update references to call directly into background_tasks).
+RunParallelProcesses = background_tasks.RunParallelProcesses
+RunParallelThreads = background_tasks.RunParallelThreads
+RunThreaded = background_tasks.RunThreaded
+
+
+def Retry(poll_interval=POLL_INTERVAL, max_retries=MAX_RETRIES,
+          timeout=None, fuzz=FUZZ, log_errors=True,
+          retryable_exceptions=None):
+  """A function decorator that will retry when exceptions are thrown.
+
+  Args:
+    poll_interval: The time between tries in seconds. This is the maximum poll
+        interval when fuzz is specified.
+    max_retries: The maximum number of retries before giving up. If -1, this
+        means continue until the timeout is reached. The function will stop
+        retrying when either max_retries is met or timeout is reached.
+    timeout: The timeout for all tries in seconds. If -1, this means continue
+        until max_retries is met. The function will stop retrying when either
+        max_retries is met or timeout is reached.
+    fuzz: The amount of randomness in the sleep time. This is used to
+        keep threads from all retrying at the same time. At 0, this
+        means sleep exactly poll_interval seconds. At 1, this means
+        sleep anywhere from 0 to poll_interval seconds.
+    log_errors: A boolean describing whether errors should be logged.
+    retryable_exceptions: A tuple of exceptions that should be retried. By
+        default, this is None, which indicates that all exceptions should
+        be retried.
+
+  Returns:
+    A function that wraps functions in retry logic. It can be
+        used as a decorator.
+  """
+  if retryable_exceptions is None:
+    retryable_exceptions = Exception
+
+  def Wrap(f):
+    """Wraps the supplied function with retry logic."""
+    def WrappedFunction(*args, **kwargs):
+      """Holds the retry logic."""
+      local_timeout = FLAGS.default_timeout if timeout is None else timeout
+
+      if local_timeout >= 0:
+        deadline = time.time() + local_timeout
+      else:
+        deadline = float('inf')
+
+      tries = 0
+      while True:
+        try:
+          tries += 1
+          return f(*args, **kwargs)
+        except retryable_exceptions as e:
+          fuzz_multiplier = 1 - fuzz + random.random() * fuzz
+          sleep_time = poll_interval * fuzz_multiplier
+          if ((time.time() + sleep_time) >= deadline or
+              (max_retries >= 0 and tries > max_retries)):
+            raise
+          else:
+            if log_errors:
+              logging.info('Retrying exception running %s: %s', f.__name__, e)
+            time.sleep(sleep_time)
+    return WrappedFunction
+  return Wrap
+
+
+class _BoxedObject(object):
+  """Box a value in a reference so it is modifiable inside an inner function.
+
+  In python3 the nonlocal keyword could be used instead - but for python2
+  there is no support for modifying an external scoped variable value.
+  """
+
+  def __init__(self, initial_value):
+    self.value = initial_value
+
+
+def _ReadIssueCommandOutput(tf_out, tf_err):
+  """Reads IssueCommand Output from stdout and stderr."""
+  tf_out.seek(0)
+  stdout = tf_out.read().decode('ascii', 'ignore')
+  tf_err.seek(0)
+  stderr = tf_err.read().decode('ascii', 'ignore')
+  return stdout, stderr
+
+
+def IssueCommand(
+    cmd: Iterable[str],
+    force_info_log: bool = False,
+    suppress_warning: bool = False,
+    env: Optional[Dict[str, str]] = None,
+    timeout: Optional[int] = DEFAULT_TIMEOUT,
+    cwd: Optional[str] = None,
+    raise_on_failure: bool = True,
+    suppress_failure: Optional[Callable[[str, str, int], bool]] = None,
+    raise_on_timeout: bool = True) -> Tuple[str, str, int]:
+  """Tries running the provided command once.
+
+  Args:
+    cmd: A list of strings such as is given to the subprocess.Popen()
+        constructor.
+    force_info_log: A boolean indicating whether the command result should
+        always be logged at the info level. Command results will always be
+        logged at the debug level if they aren't logged at another level.
+    suppress_warning: A boolean indicating whether the results should
+        not be logged at the info level in the event of a non-zero
+        return code. When force_info_log is True, the output is logged
+        regardless of suppress_warning's value.
+    env: A dict of key/value strings, such as is given to the subprocess.Popen()
+        constructor, that contains environment variables to be injected.
+    timeout: Timeout for the command in seconds. If the command has not finished
+        before the timeout is reached, it will be killed. Set timeout to None to
+        let the command run indefinitely. If the subprocess is killed, the
+        return code will indicate an error, and stdout and stderr will
+        contain what had already been written to them before the process was
+        killed.
+    cwd: Directory in which to execute the command.
+    raise_on_failure: A boolean indicating if non-zero return codes should raise
+        IssueCommandError.
+    suppress_failure: A function passed (stdout, stderr, ret_code) for non-zero
+        return codes to determine if the failure should be suppressed e.g. a
+        delete command which fails because the item to be deleted does not
+        exist.
+    raise_on_timeout: A boolean indicating if killing the process due to the
+        timeout being hit should raise a IssueCommandTimeoutError
+
+  Returns:
+    A tuple of stdout, stderr, and retcode from running the provided command.
+
+  Raises:
+    IssueCommandError: When raise_on_failure=True and retcode is non-zero.
+    IssueCommandTimeoutError:  When raise_on_timeout=True and
+                               command duration exceeds timeout
+  """
+  if env:
+    logging.debug('Environment variables: %s', env)
+
+  # Force conversion to string so you get a nice log statement before hitting a
+  # type error or NPE.
+  full_cmd = ' '.join(str(w) for w in cmd)
+  logging.info('Running: %s', full_cmd)
+
+  time_file_path = '/usr/bin/time'
+
+  running_on_windows = RunningOnWindows()
+  running_on_darwin = RunningOnDarwin()
+  should_time = (not (running_on_windows or running_on_darwin) and
+                 os.path.isfile(time_file_path) and FLAGS.time_commands)
+  shell_value = running_on_windows
+  with tempfile.TemporaryFile() as tf_out, \
+      tempfile.TemporaryFile() as tf_err, \
+      tempfile.NamedTemporaryFile(mode='r') as tf_timing:
+
+    cmd_to_use = cmd
+    if should_time:
+      cmd_to_use = [
+          time_file_path, '-o', tf_timing.name, '--quiet', '-f',
+          ',  WallTime:%Es,  CPU:%Us,  MaxMemory:%Mkb '
+      ] + list(cmd)
+
+    try:
+      process = subprocess.Popen(cmd_to_use, env=env, shell=shell_value,
+                                 stdin=subprocess.PIPE, stdout=tf_out,
+                                 stderr=tf_err, cwd=cwd)
+    except TypeError as e:
+      # Only perform this validation after a type error, in case we are being
+      # too strict.
+      non_strings = [s for s in cmd if not isinstance(s, str)]
+      if non_strings:
+        raise ValueError(
+            f'Command {cmd} contains non-string elements {non_strings}.') from e
+      raise
+
+    did_timeout = _BoxedObject(False)
+    was_killed = _BoxedObject(False)
+
+    def _KillProcess():
+      did_timeout.value = True
+      if not raise_on_timeout:
+        logging.warning('IssueCommand timed out after %d seconds. '
+                        'Killing command "%s".', timeout, full_cmd)
+      process.kill()
+      was_killed.value = True
+
+    timer = threading.Timer(timeout, _KillProcess)
+    timer.start()
+
+    try:
+      process.communicate()
+    finally:
+      timer.cancel()
+
+    stdout, stderr = _ReadIssueCommandOutput(tf_out, tf_err)
+
+    timing_output = ''
+    if should_time:
+      timing_output = tf_timing.read().rstrip('\n')
+
+  debug_text = ('Ran: {%s}\nReturnCode:%s%s\nSTDOUT: %s\nSTDERR: %s' %
+                (full_cmd, process.returncode, timing_output, stdout, stderr))
+  if force_info_log or (process.returncode and not suppress_warning):
+    logging.info(debug_text)
+  else:
+    logging.debug(debug_text)
+
+  # Raise timeout error regardless of raise_on_failure - as the intended
+  # semantics is to ignore expected errors caused by invoking the command
+  # not errors from PKB infrastructure.
+  if did_timeout.value and raise_on_timeout:
+    debug_text = (
+        '{0}\nIssueCommand timed out after {1} seconds.  '
+        '{2} by perfkitbenchmarker.'.format(
+            debug_text, timeout,
+            'Process was killed' if was_killed.value else
+            'Process may have been killed'))
+    raise errors.VmUtil.IssueCommandTimeoutError(debug_text)
+  elif process.returncode and (raise_on_failure or suppress_failure):
+    if (suppress_failure and
+        suppress_failure(stdout, stderr, process.returncode)):
+      # failure is suppressible, rewrite the stderr and return code as passing
+      # since some callers assume either is a failure e.g.
+      # perfkitbenchmarker.providers.aws.util.IssueRetryableCommand()
+      return stdout, '', 0
+    raise errors.VmUtil.IssueCommandError(debug_text)
+
+  return stdout, stderr, process.returncode
+
+
+def IssueBackgroundCommand(cmd, stdout_path, stderr_path, env=None):
+  """Run the provided command once in the background.
+
+  Args:
+    cmd: Command to be run, as expected by subprocess.Popen.
+    stdout_path: Redirect stdout here. Overwritten.
+    stderr_path: Redirect stderr here. Overwritten.
+    env: A dict of key/value strings, such as is given to the subprocess.Popen()
+        constructor, that contains environment variables to be injected.
+  """
+  logging.debug('Environment variables: %s', env)
+
+  full_cmd = ' '.join(cmd)
+  logging.info('Spawning: %s', full_cmd)
+  outfile = open(stdout_path, 'w')
+  errfile = open(stderr_path, 'w')
+  shell_value = RunningOnWindows()
+  subprocess.Popen(cmd, env=env, shell=shell_value,
+                   stdout=outfile, stderr=errfile, close_fds=True)
+
+
+@Retry()
+def IssueRetryableCommand(cmd, env=None):
+  """Tries running the provided command until it succeeds or times out.
+
+  Args:
+    cmd: A list of strings such as is given to the subprocess.Popen()
+        constructor.
+    env: An alternate environment to pass to the Popen command.
+
+  Returns:
+    A tuple of stdout and stderr from running the provided command.
+  """
+  stdout, stderr, retcode = IssueCommand(cmd, env=env, raise_on_failure=False)
+  if retcode:
+    debug_text = ('Ran: {%s}\nReturnCode:%s\nSTDOUT: %s\nSTDERR: %s' %
+                  (' '.join(cmd), retcode, stdout, stderr))
+    raise errors.VmUtil.CalledProcessException(
+        'Command returned a non-zero exit code:\n{}'.format(debug_text))
+  return stdout, stderr
+
+
+def ParseTimeCommandResult(command_result):
+  """Parse command result and get time elapsed.
+
+  Note this parses the output of bash's time builtin, not /usr/bin/time or other
+  implementations. You may need to run something like bash -c "time ./command"
+  to produce parseable output.
+
+  Args:
+     command_result: The result after executing a remote time command.
+
+  Returns:
+     Time taken for the command.
+  """
+  time_data = re.findall(r'real\s+(\d+)m(\d+.\d+)', command_result)
+  time_in_seconds = 60 * float(time_data[0][0]) + float(time_data[0][1])
+  return time_in_seconds
+
+
+def ShouldRunOnExternalIpAddress(ip_type=None):
+  """Returns whether a test should be run on an instance's external IP."""
+  ip_type_to_check = ip_type or FLAGS.ip_addresses
+  return ip_type_to_check in (IpAddressSubset.EXTERNAL, IpAddressSubset.BOTH,
+                              IpAddressSubset.REACHABLE)
+
+
+def ShouldRunOnInternalIpAddress(sending_vm, receiving_vm, ip_type=None):
+  """Returns whether a test should be run on an instance's internal IP.
+
+  Based on the command line flag --ip_addresses. Internal IP addresses are used
+  when:
+
+  * --ip_addresses=BOTH or --ip-addresses=INTERNAL
+  * --ip_addresses=REACHABLE and 'sending_vm' can ping 'receiving_vm' on its
+    internal IP.
+
+  Args:
+    sending_vm: VirtualMachine. The client.
+    receiving_vm: VirtualMachine. The server.
+    ip_type: optional ip_type to use instead of what is set in the FLAGS
+
+  Returns:
+    Whether a test should be run on an instance's internal IP.
+  """
+  ip_type_to_check = ip_type or FLAGS.ip_addresses
+  return (ip_type_to_check in (IpAddressSubset.BOTH, IpAddressSubset.INTERNAL)
+          or (ip_type_to_check == IpAddressSubset.REACHABLE and
+              sending_vm.IsReachable(receiving_vm)))
+
+
+def GetLastRunUri():
+  """Returns the last run_uri used (or None if it can't be determined)."""
+  runs_dir_path = temp_dir.GetAllRunsDirPath()
+  try:
+    dir_names = next(os.walk(runs_dir_path))[1]
+  except StopIteration:
+    # The runs directory was not found.
+    return None
+
+  if not dir_names:
+    # No run subdirectories were found in the runs directory.
+    return None
+
+  # Return the subdirectory with the most recent modification time.
+  return max(dir_names,
+             key=lambda d: os.path.getmtime(os.path.join(runs_dir_path, d)))
+
+
+@contextlib.contextmanager
+def NamedTemporaryFile(mode='w+b', prefix='tmp', suffix='', dir=None,
+                       delete=True):
+  """Behaves like tempfile.NamedTemporaryFile.
+
+  The existing tempfile.NamedTemporaryFile has the annoying property on
+  Windows that it cannot be opened a second time while it is already open.
+  This makes it impossible to use it with a "with" statement in a cross platform
+  compatible way. This serves a similar role, but allows the file to be closed
+  within a "with" statement without causing the file to be unlinked until the
+  context exits.
+
+  Args:
+    mode: see mode in tempfile.NamedTemporaryFile.
+    prefix: see prefix in tempfile.NamedTemporaryFile.
+    suffix: see suffix in tempfile.NamedTemporaryFile.
+    dir: see dir in tempfile.NamedTemporaryFile.
+    delete: see delete in NamedTemporaryFile.
+
+  Yields:
+    A cross platform file-like object which is "with" compatible.
+  """
+  f = tempfile.NamedTemporaryFile(mode=mode, prefix=prefix, suffix=suffix,
+                                  dir=dir, delete=False)
+  try:
+    yield f
+  finally:
+    if not f.closed:
+      f.close()
+    if delete:
+      os.unlink(f.name)
+
+
+def GenerateSSHConfig(vms, vm_groups):
+  """Generates an SSH config file to simplify connecting to the specified VMs.
+
+  Writes a file to GetTempDir()/ssh_config with an SSH configuration for each VM
+  provided in the arguments. Users can then SSH with any of the following:
+
+      ssh -F <ssh_config_path> <vm_name>
+      ssh -F <ssh_config_path> vm<vm_index>
+      ssh -F <ssh_config_path> <group_name>-<index>
+
+  Args:
+    vms: list of BaseVirtualMachines.
+    vm_groups: dict mapping VM group name string to list of BaseVirtualMachines.
+  """
+  target_file = os.path.join(GetTempDir(), 'ssh_config')
+  template_path = data.ResourcePath('ssh_config.j2')
+  environment = jinja2.Environment(undefined=jinja2.StrictUndefined)
+  with open(template_path) as fp:
+    template = environment.from_string(fp.read())
+  with open(target_file, 'w') as ofp:
+    ofp.write(template.render({'vms': vms, 'vm_groups': vm_groups}))
+
+  ssh_options = ['  ssh -F {0} {1}'.format(target_file, pattern)
+                 for pattern in ('<vm_name>', 'vm<index>',
+                                 '<group_name>-<index>')]
+  logging.info('ssh to VMs in this benchmark by name with:\n%s',
+               '\n'.join(ssh_options))
+
+
+def RunningOnWindows():
+  """Returns True if PKB is running on Windows."""
+  return os.name == WINDOWS
+
+
+def RunningOnDarwin():
+  """Returns True if PKB is running on a Darwin OS machine."""
+  return os.name != WINDOWS and platform.system() == DARWIN
+
+
+def ExecutableOnPath(executable_name):
+  """Return True if the given executable can be found on the path."""
+  cmd = ['where'] if RunningOnWindows() else ['which']
+  cmd.append(executable_name)
+
+  shell_value = RunningOnWindows()
+  process = subprocess.Popen(cmd,
+                             shell=shell_value,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+  process.communicate()
+
+  if process.returncode:
+    return False
+  return True
+
+
+def GenerateRandomWindowsPassword(password_length=PASSWORD_LENGTH):
+  """Generates a password that meets Windows complexity requirements."""
+  # The special characters have to be recognized by the Azure CLI as
+  # special characters. This greatly limits the set of characters
+  # that we can safely use. See
+  # https://github.com/Azure/azure-xplat-cli/blob/master/lib/commands/arm/vm/vmOsProfile._js#L145
+  special_chars = '*!@#$%+='
+  # Ensure that the password contains at least one of each 4 required
+  # character types starting with letters to avoid starting with chars which
+  # are problematic on the command line e.g. @.
+  prefix = [secrets.choice(string.ascii_lowercase),
+            secrets.choice(string.ascii_uppercase),
+            secrets.choice(string.digits),
+            secrets.choice(special_chars)]
+  password = [
+      secrets.choice(string.ascii_letters + string.digits + special_chars)
+      for _ in range(password_length - 4)]
+  return ''.join(prefix + password)
+
+
+def StartSimulatedMaintenance():
+  """Initiates the simulated maintenance event."""
+  if FLAGS.simulate_maintenance:
+    _SIMULATE_MAINTENANCE_SEMAPHORE.release()
+
+
+def SetupSimulatedMaintenance(vm):
+  """Called ready VM for simulated maintenance."""
+  if FLAGS.simulate_maintenance:
+    def _SimulateMaintenance():
+      _SIMULATE_MAINTENANCE_SEMAPHORE.acquire()
+      time.sleep(FLAGS.simulate_maintenance_delay)
+      vm.SimulateMaintenanceEvent()
+    t = threading.Thread(target=_SimulateMaintenance)
+    t.daemon = True
+    t.start()
+
+
+def CopyFileBetweenVms(filename, src_vm, src_path, dest_vm, dest_path):
+  """Copies a file from the src_vm to the dest_vm."""
+  with tempfile.NamedTemporaryFile() as tf:
+    temp_path = tf.name
+    src_vm.RemoteCopy(
+        temp_path, os.path.join(src_path, filename), copy_to=False)
+    dest_vm.RemoteCopy(
+        temp_path, os.path.join(dest_path, filename), copy_to=True)
+
+
+def ReplaceText(vm, current_value, new_value, file_name, regex_char='/'):
+  """Replaces text <current_value> with <new_value> in remote <file_name>."""
+  vm.RemoteCommand('sed -i -r "s{regex_char}{current_value}{regex_char}'
+                   '{new_value}{regex_char}" {file}'.format(
+                       regex_char=regex_char,
+                       current_value=current_value,
+                       new_value=new_value,
+                       file=file_name))
+
+
+def DictionaryToEnvString(dictionary, joiner=' '):
+  """Convert a dictionary to a space sperated 'key=value' string.
+
+  Args:
+    dictionary: the key-value dictionary to be convert
+    joiner: string to separate the entries in the returned value.
+
+  Returns:
+    a string representing the dictionary
+  """
+  return joiner.join(
+      f'{key}={value}' for key, value in sorted(dictionary.items()))
+
+
+def CreateRemoteFile(vm, file_contents, file_path):
+  """Creates a file on the remote server."""
+  with NamedTemporaryFile(mode='w') as tf:
+    tf.write(file_contents)
+    tf.close()
+    parent_dir = posixpath.dirname(file_path)
+    vm.RemoteCommand(f'[ -d {parent_dir} ] || mkdir -p {parent_dir}')
+    vm.PushFile(tf.name, file_path)
+
+
+def InstallRsync():
+  """Instal rsync tool on host machine"""
+  if not FLAGS.enable_rsync:
+    return
+
+  cmd = "command -v rsync"
+  stdout, _, _ = IssueCommand(cmd.split(), raise_on_failure=False)
+  # rsync is already installed
+  if len(stdout.rstrip()) > 0:
+    return
+
+  cmd = "grep -w ID_LIKE /etc/os-release"
+  stdout, _, _ = IssueCommand(cmd.split(), raise_on_failure=False)
+  logging.info(stdout)
+  if "debian" in stdout:
+    IssueCommand("sudo apt-get -y install rsync".split())
+  elif "fedora" in stdout:
+    IssueCommand("sudo yum install -y rsync".split())
+  else:
+    logging.warning("Not a supported OS, rsync is not installed. Please install it manually!")
+
+
+def GetCIDRList(file_path):
+  """Returns a list of CIDRs from runtime flag or filepath."""
+  if FLAGS.proxy_cidr_list is not None and len(FLAGS.proxy_cidr_list) > 0:
+      return FLAGS.proxy_cidr_list
+
+  cidr_list = []
+  proxy_server_ip_list_path = data.ResourcePath(file_path)
+  if not os.path.isfile(proxy_server_ip_list_path):
+    logging.info('file ({}) does not exist'.format(proxy_server_ip_list_path))
+    return None
+
+  with open(proxy_server_ip_list_path) as f:
+    proxy_ip_list = [line.rstrip() for line in f]
+
+  for ip in proxy_ip_list:
+    if not ip.strip() == '' and ip[0].isdigit():
+      cidr_list.append(ip)
+
+  if len(cidr_list) > 0:
+    return cidr_list
+
+  return GetExternalIPCIDR()
+
+
+def GetExternalIPCIDR():
+    """Get host machine external IP address and return its corresponding CIDR"""
+    external_ip = urllib.request.urlopen('https://ident.me').read().decode('utf8')
+    logging.info(f'External IP of host machine: {external_ip}')
+    tokens = external_ip.split('.')
+    tokens[3] = '0'
+    startip = ipaddress.IPv4Address('.'.join(tokens))
+    tokens[3] = '255'
+    endip = ipaddress.IPv4Address('.'.join(tokens))
+    cidr_list = [str(ipaddr) for ipaddr in ipaddress.summarize_address_range(startip, endip)]
+
+    return cidr_list
diff --git a/script/cumulus/pkb/perfkitbenchmarker/vpn_service.py b/script/cumulus/pkb/perfkitbenchmarker/vpn_service.py
new file mode 100644
index 0000000..bd4c6a0
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/vpn_service.py
@@ -0,0 +1,360 @@
+# Copyright 2020 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""VPN support for network benchmarks.
+
+This module contains the main VPNService class, which manages the VPN lifecycle,
+the VPN class, which manages the VPN tunnel lifecycle between two endpoints, and
+the TunnelConfig class, which maintains the parameters needed to configure a
+tunnel between two endpoints. Related: perfkitbenchmarker.network
+module includes the BaseVpnGateway class to manage VPN gateway endpoints.
+"""
+
+import itertools
+import json
+import logging
+import re
+import threading
+import uuid
+from enum import Enum
+
+from typing import List, Tuple
+from perfkitbenchmarker import context
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import network
+from absl import flags
+from perfkitbenchmarker import resource
+from perfkitbenchmarker import vm_util
+
+flags.DEFINE_integer('vpn_service_tunnel_count', None,
+                     'Number of tunnels to create for each VPN Gateway pair.')
+flags.DEFINE_integer('vpn_service_gateway_count', None,
+                     'Number of VPN Gateways to create for each vm_group.')
+flags.DEFINE_string('vpn_service_name', None,
+                    'If set, use this name for VPN Service.')
+flags.DEFINE_string('vpn_service_shared_key', None,
+                    'If set, use this PSK for VPNs.')
+flags.DEFINE_integer('vpn_service_ike_version', None, 'IKE version')
+
+
+class VPN_ROUTING_TYPE(Enum):
+  STATIC = 'static'
+  DYNAMIC = 'dynamic'
+
+flags.DEFINE_enum(
+    'vpn_service_routing_type', None,
+    [VPN_ROUTING_TYPE.STATIC.value, VPN_ROUTING_TYPE.DYNAMIC.value],
+    'static or dynamic(BGP)')
+
+FLAGS = flags.FLAGS
+
+
+def GetVPNServiceClass():
+  """Gets the VPNService class.
+
+  Args:
+
+  Returns:
+    Implementation class
+  """
+  return resource.GetResourceClass(VPNService)
+
+
+class VPN(object):
+  """An object representing the VPN.
+
+  A VPN instance manages tunnel configurations for exactly 1 pair of endpoints.
+  """
+
+  def __init__(self, *args, **kwargs):
+    # set by Create
+    self.gateway_pair: Tuple[network.BaseVpnGateway,
+                             network.BaseVpnGateway] = None
+    self.tunnel_config: TunnelConfig = None
+    return object.__init__(self, *args, **kwargs)
+
+  def getKeyFromGatewayPair(self, gateway_pair, suffix=''):
+    """Return the VPN key for a pair of endpoints.
+
+    Args:
+      gateway_pair: A tuple of 2 VPN gateways which define the VPN tunnel.
+      suffix: A unique suffix if multiple tunnels b/t this gateway pair exist.
+
+    Returns:
+      string. The VPN key.
+
+    """
+    key = 'vpn' + ''.join(
+        gateway for gateway in gateway_pair) + suffix + FLAGS.run_uri
+    return key
+
+  def Create(self, gateway_pair, suffix=''):
+    self.gateway_pair = gateway_pair
+    self.name = self.getKeyFromGatewayPair(gateway_pair)
+    self.tunnel_config = TunnelConfig(tunnel_name=self.name, suffix=suffix)
+
+  def Delete(self):
+    pass
+
+  def GetVPN(self, gateway_pair, suffix=''):
+    """Gets a VPN object for the gateway_pair or creates one if none exists.
+
+    Args:
+    gateway_pair: a tuple of two VpnGateways
+    """
+
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('GetVPN called in a thread without a '
+                         'BenchmarkSpec.')
+    with benchmark_spec.vpns_lock:
+      key = self.getKeyFromGatewayPair(gateway_pair, suffix)
+      if key not in benchmark_spec.vpns:
+        self.Create(gateway_pair, suffix)
+        benchmark_spec.vpns[key] = self
+      return benchmark_spec.vpns[key]
+
+  def ConfigureTunnel(self):
+    """Configure the VPN tunnel."""
+
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    vpn_gateway_0 = benchmark_spec.vpn_gateways[self.gateway_pair[0]]
+    vpn_gateway_1 = benchmark_spec.vpn_gateways[self.gateway_pair[1]]
+
+    both_gateways_are_passive = (vpn_gateway_0.require_target_to_init
+                                 and vpn_gateway_1.require_target_to_init)
+    assert not both_gateways_are_passive, 'Cant connect 2 passive VPN Gateways'
+
+    tunnel_config_hash = None
+    # In this loop we hand each vpn_gateway endpoint the tunnel_config.
+    # Each endpoint will try to setup its tunnel from the latest target endpoint
+    # dictionary and updates its own endpoint dictionary with any new values.
+    # This continues until either both endpoint tunnels have enough information
+    # to setup their tunnel (isTunnelConfigured is True), or there isn't enough
+    # information to configure the tunnel (raises ValueError).
+    while not self.isTunnelConfigured():
+      vpn_gateway_0.ConfigureTunnel(self.tunnel_config)
+      vpn_gateway_1.ConfigureTunnel(self.tunnel_config)
+      if self.tunnel_config.hash() == tunnel_config_hash:
+        raise ValueError('Not enough info to configure tunnel.')
+      tunnel_config_hash = self.tunnel_config.hash()
+
+    tunnel_status = self.isTunnelReady()
+    logging.debug('Tunnel is ready?: %s ', tunnel_status)
+
+  def isTunnelConfigured(self):
+    """Returns True if the tunnel configuration is complete.
+
+    Returns:
+      boolean.
+    """
+    is_tunnel_configured = False
+    if len(self.tunnel_config.endpoints) == 2:
+      if (self.tunnel_config.endpoints[self.gateway_pair[0]]['is_configured']
+          and
+          self.tunnel_config.endpoints[self.gateway_pair[1]]['is_configured']):
+        logging.debug('Tunnel is configured.')
+        is_tunnel_configured = True
+    return is_tunnel_configured
+
+  @vm_util.Retry(retryable_exceptions=errors.Resource.RetryableCreationError)
+  def isTunnelReady(self):
+    """Returns True if the tunnel is up.
+
+    Returns:
+      boolean.
+    """
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    logging.debug('Tunnel endpoints configured. Waiting for tunnel...')
+    ready = (
+        benchmark_spec.vpn_gateways[self.gateway_pair[0]].IsTunnelReady(
+            self.tunnel_config.endpoints[self.gateway_pair[0]]['tunnel_id']) and
+        benchmark_spec.vpn_gateways[self.gateway_pair[1]].IsTunnelReady(
+            self.tunnel_config.endpoints[self.gateway_pair[1]]['tunnel_id']))
+    if not ready:
+      raise errors.Resource.RetryableCreationError()
+
+    return ready
+
+
+class TunnelConfig(object):
+  """Object to hold all parms needed to configure a tunnel.
+
+  tunnel_config =
+  { tunnel_name = ''
+    routing = ''
+    psk = ''
+    endpoints = [ep1={...}, ep2={...}
+    }
+
+  endpoint =
+  { name = ''
+    ip = ''
+    cidr = ''
+    require_target_to_init = t/f
+    tunnel_id = ''
+
+  }
+  }
+
+  """
+
+  _tunnelconfig_lock = threading.Lock()
+
+  def __init__(self, **kwargs):
+    super(TunnelConfig, self).__init__()
+    self.tunnel_name = kwargs.get('tunnel_name', 'unnamed_tunnel')
+    self.endpoints = {}
+    self.routing = kwargs.get('routing', None)
+    self.ike_version = kwargs.get('ike_version', 2)
+    self.shared_key = kwargs.get('shared_key', 'key' + FLAGS.run_uri)
+    self.suffix = kwargs.get('suffix', '')
+
+  def setConfig(self, **kwargs):
+    with self._tunnelconfig_lock:
+      for key in kwargs:
+        setattr(self, key, kwargs[key])
+
+  def __str__(self):
+    return str(json.dumps(self.__dict__, sort_keys=True, default=str))
+
+  def hash(self):
+    """Hash the current tunnel config.
+
+    Returns:
+      int: An integer that changes if any properties have changes.
+
+    """
+    return hash(json.dumps(self.__dict__, sort_keys=True, default=str))
+
+
+class VPNService(resource.BaseResource):
+  """Service class to manage VPN lifecycle."""
+
+  RESOURCE_TYPE = 'BaseVPNService'
+  REQUIRED_ATTRS = ['SERVICE']
+
+  def __init__(self, spec):
+    """Initialize the VPN Service object.
+
+    Args:
+      vpn_service_spec: spec of the vpn service.
+    """
+    super(VPNService, self).__init__()
+    self.name = spec.name
+    self.tunnel_count = spec.tunnel_count
+    self.gateway_count = FLAGS.vpn_service_gateway_count
+    self.routing = spec.routing_type
+    self.ike_version = spec.ike_version
+    self.shared_key = spec.shared_key
+    self.spec = spec
+    self.vpns = {}
+    self.vpn_properties = {
+        'tunnel_count': self.tunnel_count,
+        'gateway_count': self.gateway_count,
+        'routing': self.routing,
+        'ike_version': self.ike_version,
+        'shared_key': self.shared_key,
+    }
+
+  def GetResourceMetadata(self):
+    """Returns a dictionary of metadata about the resource."""
+
+    if not self.created:
+      return {}
+    result = self.metadata.copy()
+    if self.routing is not None:
+      result['vpn_service_routing_type'] = self.routing
+    if self.ike_version is not None:
+      result['vpn_service_ike_version'] = self.ike_version
+    if self.tunnel_count is not None:
+      result['vpn_service_tunnel_count'] = self.tunnel_count
+    if self.gateway_count is not None:
+      result['gateway_count'] = self.gateway_count
+    # if self.psk is not None:  # probably don't want to publish this.
+    #   result['vpn_service_shared_key'] = self.psk
+
+    return result
+
+  def _Create(self):
+    """Creates VPN objects for VpnGateway pairs."""
+
+    benchmark_spec = context.GetThreadBenchmarkSpec()
+    if benchmark_spec is None:
+      raise errors.Error('CreateVPN Service. called in a thread without a '
+                         'BenchmarkSpec.')
+
+    self.vpn_gateway_pairs = self.GetVpnGatewayPairs(
+        benchmark_spec.vpn_gateways)
+
+    for gateway_pair in self.vpn_gateway_pairs:
+      # creates the vpn if it doesn't exist and registers in bm_spec.vpns
+      suffix = self.GetNewSuffix()
+      vpn_id = VPN().getKeyFromGatewayPair(gateway_pair, suffix)
+      self.vpns[vpn_id] = VPN().GetVPN(gateway_pair, suffix)
+      self.vpns[vpn_id].tunnel_config.setConfig(**self.vpn_properties)
+
+    vm_util.RunThreaded(lambda vpn: self.vpns[vpn].ConfigureTunnel(),
+                        list(self.vpns.keys()))
+
+  def _Delete(self):
+    pass
+
+  def GetNewSuffix(self):
+    """Names for tunnels, fr's, routes, etc need to be unique.
+
+    Returns:
+      string. A random string value.
+    """
+    return format(uuid.uuid4().fields[1], 'x')
+
+  def GetMetadata(self):
+    """Return a dictionary of the metadata for VPNs created."""
+    basic_data = {
+        'vpn_service_name': self.name,
+        'vpn_service_routing_type': self.routing,
+        'vpn_service_ike_version': self.ike_version,
+        'vpn_service_tunnel_count': self.tunnel_count,
+        'vpn_service_gateway_count': self.gateway_count,
+        # 'vpn_service_psk': self.psk,
+    }
+    return basic_data
+
+  def GetVpnGatewayPairs(
+      self, vpn_gateways
+  ) -> List[Tuple[network.BaseVpnGateway, network.BaseVpnGateway]]:
+    """Returns pairs of gateways to create VPNs between.
+
+    Currently creates a pair between all non-matching region endpoints (mesh).
+    --vpn_service_gateway_count flag dictates how many gateways are created in
+    each vm_group(region).
+    --vpn_service_tunnel_count flag dictates how many VPN tunnels to create for
+    each gateway pair.
+    @TODO Add more pairing strategies as needed.
+
+    Args:
+      vpn_gateways: The dict of gateways created.
+
+    Returns:
+      list. The list of tuples of gateway pairs to create VPNs for.
+
+    """
+    vpn_gateway_pairs = itertools.combinations(vpn_gateways, 2)
+    r = re.compile(
+        r'(?P<gateway_prefix>.*-.*-.*)?-(?P<gateway_tnum>[0-9])-(?P<run_id>.*)')
+
+    def filterGateways(gateway_pair):
+      return r.search(gateway_pair[0]).group('gateway_prefix') != r.search(
+          gateway_pair[1]).group('gateway_prefix')
+
+    return list(filter(filterGateways, vpn_gateway_pairs))
diff --git a/script/cumulus/pkb/perfkitbenchmarker/windows_benchmarks/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/windows_benchmarks/__init__.py
new file mode 100644
index 0000000..c7dc000
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/windows_benchmarks/__init__.py
@@ -0,0 +1,34 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains benchmark imports and a list of benchmarks.
+
+All modules within this package are considered benchmarks, and are loaded
+dynamically. Add non-benchmark code to other packages.
+"""
+
+from perfkitbenchmarker import import_util
+
+
+def _LoadBenchmarks():
+  return list(import_util.LoadModulesForPath(__path__, __name__))
+
+
+BENCHMARKS = _LoadBenchmarks()
+
+VALID_BENCHMARKS = {}
+for module in BENCHMARKS:
+  if module.BENCHMARK_NAME in VALID_BENCHMARKS:
+    raise ValueError('There are multiple benchmarks with BENCHMARK_NAME "%s"' %
+                     (module.BENCHMARK_NAME))
+  VALID_BENCHMARKS[module.BENCHMARK_NAME] = module
diff --git a/script/cumulus/pkb/perfkitbenchmarker/windows_packages/__init__.py b/script/cumulus/pkb/perfkitbenchmarker/windows_packages/__init__.py
new file mode 100644
index 0000000..70c0606
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/windows_packages/__init__.py
@@ -0,0 +1,37 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains package imports and a dictionary of package names and modules.
+
+All modules within this package are considered packages, and are loaded
+dynamically. Add non-package code to other packages.
+
+Packages should, at a minimum, define an install function (Install(vm)).
+If the package manually places files in locations other than the VM's temp
+directory, then it also needs to define an uninstall function (Uninstall(vm)).
+"""
+
+from perfkitbenchmarker import import_util
+
+
+def _LoadPackages():
+  """Imports all package modules and returns a dictionary of packages.
+
+  This imports all package modules in this directory and then creates a
+  mapping from module names to the modules themselves and returns it.
+  """
+  return {module.__name__.split('.')[-1]: module for module in
+          import_util.LoadModulesForPath(__path__, __name__)}
+
+
+PACKAGES = _LoadPackages()
diff --git a/script/cumulus/pkb/perfkitbenchmarker/windows_virtual_machine.py b/script/cumulus/pkb/perfkitbenchmarker/windows_virtual_machine.py
new file mode 100644
index 0000000..d73eabd
--- /dev/null
+++ b/script/cumulus/pkb/perfkitbenchmarker/windows_virtual_machine.py
@@ -0,0 +1,792 @@
+# Copyright 2015 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Module containing mixin classes for Windows virtual machines."""
+
+import base64
+import logging
+import ntpath
+import os
+import time
+import uuid
+
+from absl import flags
+from perfkitbenchmarker import disk
+from perfkitbenchmarker import errors
+from perfkitbenchmarker import os_types
+from perfkitbenchmarker import virtual_machine
+from perfkitbenchmarker import vm_util
+from perfkitbenchmarker import windows_packages
+
+import six
+import timeout_decorator
+import winrm
+
+FLAGS = flags.FLAGS
+
+flags.DEFINE_bool(
+    'log_windows_password', False,
+    'Whether to log passwords for Windows machines. This can be useful in '
+    'the event of needing to manually RDP to the instance.')
+
+flags.DEFINE_bool(
+    'set_cpu_priority_high', False,
+    'Allows executables to be set to High (up from Normal) CPU priority '
+    'through the SetProcessPriorityToHigh function.')
+
+# Windows disk letter starts from C, use a larger disk letter for attached disk
+# to avoid conflict. On Azure, D is reserved for DvD drive.
+ATTACHED_DISK_LETTER = 'F'
+SMB_PORT = 445
+WINRM_PORT = 5986
+RDP_PORT = 3389
+# This startup script enables remote mangement of the instance. It does so
+# by creating a WinRM listener (using a self-signed cert) and opening
+# the WinRM port in the Windows firewall.
+_STARTUP_SCRIPT = """
+Enable-PSRemoting -Force
+$cert = New-SelfSignedCertificate -DnsName hostname -CertStoreLocation `
+    Cert:\\LocalMachine\\My\\
+New-Item WSMan:\\localhost\\Listener -Transport HTTPS -Address * `
+    -CertificateThumbPrint $cert.Thumbprint -Force
+Set-Item -Path 'WSMan:\\localhost\\Service\\Auth\\Basic' -Value $true
+netsh advfirewall firewall add rule name='Allow WinRM' dir=in action=allow `
+    protocol=TCP localport={winrm_port}
+""".format(winrm_port=WINRM_PORT)
+STARTUP_SCRIPT = 'powershell -EncodedCommand {encoded_command}'.format(
+    encoded_command=six.ensure_str(
+        base64.b64encode(_STARTUP_SCRIPT.encode('utf-16-le'))))
+
+# Cygwin constants for installing and running commands through Cygwin.
+# _CYGWIN_FORMAT provides a format string to transform a bash command into one
+# that runs under Cygwin.
+_CYGWIN32_URL = 'https://cygwin.com/setup-x86.exe'
+_CYGWIN64_URL = 'https://cygwin.com/setup-x86_64.exe'
+_CYGWIN_MIRROR = 'https://mirrors.kernel.org/sourceware/cygwin/'
+_CYGWIN_ROOT = r'%PROGRAMFILES%\cygwinx86\cygwin'
+_CYGWIN_FORMAT = (r"%s\bin\bash.exe -c 'export PATH=$PATH:/usr/bin && "
+                  "{command}'" % _CYGWIN_ROOT)
+
+
+class WaitTimeoutError(Exception):
+  """Exception thrown if a wait operation takes too long."""
+
+
+class BaseWindowsMixin(virtual_machine.BaseOsMixin):
+  """Class that holds Windows related VM methods and attributes."""
+
+  OS_TYPE = os_types.WINDOWS
+  BASE_OS_TYPE = os_types.WINDOWS
+
+  def __init__(self):
+    super(BaseWindowsMixin, self).__init__()
+    self.winrm_port = WINRM_PORT
+    self.smb_port = SMB_PORT
+    self.remote_access_ports = [self.winrm_port, self.smb_port, RDP_PORT]
+    self.primary_remote_access_port = self.winrm_port
+    self.rdp_port_listening_time = None
+    self.temp_dir = None
+    self.home_dir = None
+    self.system_drive = None
+    self.assigned_disk_letter = ATTACHED_DISK_LETTER
+    self._send_remote_commands_to_cygwin = False
+
+  def RobustRemoteCommand(self, command, should_log=False, ignore_failure=False,
+                          suppress_warning=False, timeout=None):
+    """Runs a powershell command on the VM.
+
+    Should be more robust than its counterpart, RemoteCommand. In the event of
+    network failure, the process will continue on the VM, and we continually
+    reconnect to check if it has finished. The tradeoff is this is noticeably
+    slower than the normal RemoteCommand.
+
+    The algorithm works as follows:
+      1. Create a "command started" file
+      2. Run the command
+      3. Create a "command done" file
+
+    If we fail to run step 1, we raise a RemoteCommandError. If we have network
+    failure during step 2, the command will continue running on the VM and we
+    will spin inside this function waiting for the "command done" file to be
+    created.
+
+    Args:
+      command: A valid powershell command.
+      should_log: A boolean indicating whether the command result should be
+        logged at the info level. Even if it is false, the results will still be
+        logged at the debug level.
+      ignore_failure: Ignore any failure if set to true.
+      suppress_warning: Suppress the result logging from IssueCommand when the
+        return code is non-zero.
+      timeout: Float. A timeout in seconds for the command. If None is passed,
+        no timeout is applied. Timeout kills the winrm session which then kills
+        the process being executed.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem issuing the command or the
+          command timed out.
+    """
+
+    logging.info('Running robust command on %s: %s', self, command)
+    command_id = uuid.uuid4()
+    logged_command = ('New-Item -Path %s.start -ItemType File; powershell "%s" '
+                      '2> %s.err 1> %s.out; New-Item -Path %s.done -ItemType '
+                      'File') % (command_id, command, command_id, command_id,
+                                 command_id)
+    start_command_time = time.time()
+    try:
+      self.RemoteCommand(
+          logged_command,
+          should_log=should_log,
+          ignore_failure=ignore_failure,
+          suppress_warning=suppress_warning,
+          timeout=timeout)
+    except errors.VirtualMachine.RemoteCommandError:
+      logging.exception(
+          'Exception while running %s on %s, waiting for command to finish',
+          command, self)
+    start_out, _ = self.RemoteCommand('Test-Path %s.start' % (command_id,))
+    if 'True' not in start_out:
+      raise errors.VirtualMachine.RemoteCommandError(
+          'RobustRemoteCommand did not start on VM.')
+
+    end_command_time = time.time()
+
+    @timeout_decorator.timeout(
+        timeout - (end_command_time - start_command_time),
+        use_signals=False,
+        timeout_exception=errors.VirtualMachine.RemoteCommandError)
+    def wait_for_done_file():
+      # Spin on the VM until the "done" file is created. It is better to spin
+      # on the VM rather than creating a new session for each test.
+      done_out = ''
+      while 'True' not in done_out:
+        done_out, _ = self.RemoteCommand(
+            '$retries=0; while ((-not (Test-Path %s.done)) -and '
+            '($retries -le 60)) { Start-Sleep -Seconds 1; $retries++ }; '
+            'Test-Path %s.done' % (command_id, command_id))
+
+    wait_for_done_file()
+    stdout, _ = self.RemoteCommand('Get-Content %s.out' % (command_id,))
+    _, stderr = self.RemoteCommand('Get-Content %s.err' % (command_id,))
+
+    return stdout, stderr
+
+  def RemoteCommand(self, command, should_log=False, ignore_failure=False,
+                    suppress_warning=False, timeout=None):
+    """Runs a powershell command on the VM.
+
+    Args:
+      command: A valid powershell command.
+      should_log: A boolean indicating whether the command result should be
+          logged at the info level. Even if it is false, the results will
+          still be logged at the debug level.
+      ignore_failure: Ignore any failure if set to true.
+      suppress_warning: Suppress the result logging from IssueCommand when the
+          return code is non-zero.
+      timeout: Float. A timeout in seconds for the command. If None is passed,
+          no timeout is applied. Timeout kills the winrm session which then
+          kills the process being executed.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem issuing the command or the
+          command timed out.
+    """
+    raise Exception("SDL: Not use password in URL")
+#    logging.info('Running command on %s: %s', self, command)
+#    s = winrm.Session(
+#        'https://%s:%s' % (self.GetConnectionIp(), self.winrm_port),
+#        auth=(self.user_name, self.password),
+#        server_cert_validation='ignore')
+#    encoded_command = six.ensure_str(
+#        base64.b64encode(command.encode('utf_16_le')))
+
+    @timeout_decorator.timeout(timeout, use_signals=False,
+                               timeout_exception=errors.VirtualMachine.
+                               RemoteCommandError)
+    def run_command():
+      return s.run_cmd('powershell -encodedcommand %s' % encoded_command)
+
+    r = run_command()
+    retcode, stdout, stderr = r.status_code, six.ensure_str(
+        r.std_out), six.ensure_str(r.std_err)
+
+    debug_text = ('Ran %s on %s. Return code (%s).\nSTDOUT: %s\nSTDERR: %s' %
+                  (command, self, retcode, stdout, stderr))
+    if should_log or (retcode and not suppress_warning):
+      logging.info(debug_text)
+    else:
+      logging.debug(debug_text)
+
+    if retcode and not ignore_failure:
+      error_text = ('Got non-zero return code (%s) executing %s\n'
+                    'STDOUT: %sSTDERR: %s' %
+                    (retcode, command, stdout, stderr))
+      raise errors.VirtualMachine.RemoteCommandError(error_text)
+
+    return stdout, stderr
+
+  def InstallCygwin(self, bit64=True, packages=None):
+    """Downloads and installs cygwin on the Windows instance.
+
+    TODO(deitz): Support installing packages via vm.Install calls where the VM
+    would look in Linux packages and try to find a CygwinInstall function to
+    call. Alternatively, consider using cyg-apt as an installation method. With
+    this additional change, we could use similar code to run benchmarks under
+    both Windows and Linux (if necessary and useful).
+
+    Args:
+      bit64: Whether to use 64-bit Cygwin (default) or 32-bit Cygwin.
+      packages: List of packages to install on Cygwin.
+    """
+    url = _CYGWIN64_URL if bit64 else _CYGWIN32_URL
+    setup_exe = url.split('/')[-1]
+    self.DownloadFile(url, setup_exe)
+    self.RemoteCommand(
+        r'.\{setup_exe} --quiet-mode --site {mirror} --root "{cygwin_root}" '
+        '--packages {packages}'.format(
+            setup_exe=setup_exe,
+            mirror=_CYGWIN_MIRROR,
+            cygwin_root=_CYGWIN_ROOT,
+            packages=','.join(packages)))
+
+  def RemoteCommandCygwin(self, command, *args, **kwargs):
+    """Runs a Cygwin command on the VM.
+
+    Args:
+      command: A valid bash command to run under Cygwin.
+      *args: Arguments passed directly to RemoteCommandWithReturnCode.
+      **kwargs: Keyword arguments passed directly to
+          RemoteCommandWithReturnCode.
+
+    Returns:
+      A tuple of stdout and stderr from running the command.
+
+    Raises:
+      RemoteCommandError: If there was a problem issuing the command or the
+          command timed out.
+    """
+    # Wrap the command to be executed via bash.exe under Cygwin. Escape quotes
+    # since they are executed in a string.
+    cygwin_command = _CYGWIN_FORMAT.format(command=command.replace('"', r'\"'))
+    return self.RemoteCommand(cygwin_command, *args, **kwargs)
+
+  def RemoteCopy(self, local_path, remote_path='', copy_to=True):
+    """Copies a file to or from the VM.
+
+    Args:
+      local_path: Local path to file.
+      remote_path: Optional path of where to copy file on remote host.
+      copy_to: True to copy to vm, False to copy from vm.
+
+    Raises:
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    remote_path = remote_path or '~/'
+    # In order to expand "~" and "~user" we use ntpath.expanduser(),
+    # but it relies on environment variables being set. This modifies
+    # the HOME environment variable in order to use that function, and then
+    # restores it to its previous value.
+    home = os.environ.get('HOME')
+    try:
+      os.environ['HOME'] = self.home_dir
+      remote_path = ntpath.expanduser(remote_path)
+    finally:
+      if home is None:
+        del os.environ['HOME']
+      else:
+        os.environ['HOME'] = home
+
+    drive, remote_path = ntpath.splitdrive(remote_path)
+    remote_drive = (drive or self.system_drive).rstrip(':')
+    network_drive = '\\\\%s\\%s$' % (self.GetConnectionIp(), remote_drive)
+
+    if vm_util.RunningOnWindows():
+      self._PsDriveRemoteCopy(local_path, remote_path, copy_to, network_drive)
+    else:
+      self._SmbclientRemoteCopy(local_path, remote_path, copy_to, network_drive)
+
+  def _SmbclientRemoteCopy(self, local_path, remote_path,
+                           copy_to, network_drive):
+    """Copies a file to or from the VM using smbclient.
+
+    Args:
+      local_path: Local path to file.
+      remote_path: Optional path of where to copy file on remote host.
+      copy_to: True to copy to vm, False to copy from vm.
+      network_drive: The smb specification for the remote drive
+          (//{ip_address}/{share_name}).
+
+    Raises:
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    raise Exception("SDL: Not use password in URL")
+#    local_directory, local_file = os.path.split(local_path)
+#    remote_directory, remote_file = ntpath.split(remote_path)
+#
+#    smb_command = 'cd %s; lcd %s; ' % (remote_directory, local_directory)
+#    if copy_to:
+#      smb_command += 'put %s %s' % (local_file, remote_file)
+#    else:
+#      smb_command += 'get %s %s' % (remote_file, local_file)
+#    smb_copy = [
+#        'smbclient', network_drive,
+#        '--max-protocol', 'SMB3',
+#        '--user', '%s%%%s' % (self.user_name, self.password),
+#        '--port', str(self.smb_port),
+#        '--command', smb_command
+#    ]
+#    stdout, stderr, retcode = vm_util.IssueCommand(smb_copy,
+#                                                   raise_on_failure=False)
+#    if retcode:
+#      error_text = ('Got non-zero return code (%s) executing %s\n'
+#                    'STDOUT: %sSTDERR: %s' %
+#                    (retcode, smb_copy, stdout, stderr))
+#      raise errors.VirtualMachine.RemoteCommandError(error_text)
+
+  def _PsDriveRemoteCopy(self, local_path, remote_path,
+                         copy_to, network_drive):
+    """Copies a file to or from the VM using New-PSDrive and Copy-Item.
+
+    Args:
+      local_path: Local path to file.
+      remote_path: Optional path of where to copy file on remote host.
+      copy_to: True to copy to vm, False to copy from vm.
+      network_drive: The smb specification for the remote drive
+          (//{ip_address}/{share_name}).
+
+    Raises:
+      RemoteCommandError: If there was a problem copying the file.
+    """
+    raise Exception("SDL: Not use password in URL")
+#    set_error_pref = '$ErrorActionPreference="Stop"'
+#
+#    password = self.password.replace("'", "''")
+#    create_cred = (
+#        '$pw = convertto-securestring -AsPlainText -Force \'%s\';'
+#        '$cred = new-object -typename System.Management.Automation'
+#        '.PSCredential -argumentlist %s,$pw' % (password, self.user_name))
+#
+#    psdrive_name = self.name
+#    create_psdrive = (
+#        'New-PSDrive -Name %s -PSProvider filesystem -Root '
+#        '%s -Credential $cred' % (psdrive_name, network_drive))
+#
+#    remote_path = '%s:%s' % (psdrive_name, remote_path)
+#    if copy_to:
+#      from_path, to_path = local_path, remote_path
+#    else:
+#      from_path, to_path = remote_path, local_path
+#
+#    copy_item = 'Copy-Item -Path %s -Destination %s' % (from_path, to_path)
+#
+#    delete_connection = 'net use %s /delete' % network_drive
+#
+#    cmd = ';'.join([set_error_pref, create_cred, create_psdrive,
+#                    copy_item, delete_connection])
+#
+#    stdout, stderr, retcode = vm_util.IssueCommand(
+#        ['powershell', '-Command', cmd], timeout=None, raise_on_failure=False)
+#
+#    if retcode:
+#      error_text = ('Got non-zero return code (%s) executing %s\n'
+#                    'STDOUT: %sSTDERR: %s' %
+#                    (retcode, cmd, stdout, stderr))
+#      raise errors.VirtualMachine.RemoteCommandError(error_text)
+
+  def WaitForBootCompletion(self):
+    """Waits until VM is has booted."""
+    to_wait_for = [self._WaitForWinRmCommand]
+    if FLAGS.cluster_boot_test_rdp_port_listening:
+      to_wait_for.append(self._WaitForRdpPort)
+    vm_util.RunParallelThreads([(method, [], {}) for method in to_wait_for], 2)
+
+  @vm_util.Retry(log_errors=False, poll_interval=1, timeout=2400)
+  def _WaitForRdpPort(self):
+    self.TestConnectRemoteAccessPort(RDP_PORT)
+    if self.rdp_port_listening_time is None:
+      self.rdp_port_listening_time = time.time()
+
+  @vm_util.Retry(log_errors=False, poll_interval=1, timeout=2400)
+  def _WaitForWinRmCommand(self):
+    """Waits for WinRM command and optionally for the WinRM port to listen."""
+    # Test for listening on the port first, because this will happen strictly
+    # first.
+    if (FLAGS.cluster_boot_test_port_listening and
+        self.port_listening_time is None):
+      self.TestConnectRemoteAccessPort()
+      self.port_listening_time = time.time()
+
+    # Always wait for remote host command to succeed, because it is necessary to
+    # run benchmarks.
+    self._WaitForSSH()
+    if self.bootable_time is None:
+      self.bootable_time = time.time()
+    if FLAGS.log_windows_password:
+      raise Exception("SDL: Not use password in URL")
+      #logging.info('Password for %s: %s', self, self.password)
+
+  @vm_util.Retry(log_errors=False, poll_interval=1, timeout=2400)
+  def _WaitForSSH(self):
+    """Waits for the VMs to be ready."""
+    stdout, _ = self.RemoteCommand('hostname', suppress_warning=True)
+    if self.hostname is None:
+      self.hostname = stdout.rstrip()
+
+  @vm_util.Retry(poll_interval=1, max_retries=15)
+  def OnStartup(self):
+    # Log driver information so that the user has a record of which drivers
+    # were used.
+    # TODO(user): put the driver information in the metadata.
+    stdout, _ = self.RemoteCommand('dism /online /get-drivers')
+    logging.info(stdout)
+    stdout, _ = self.RemoteCommand('echo $env:TEMP')
+    self.temp_dir = ntpath.join(stdout.strip(), 'pkb')
+    stdout, _ = self.RemoteCommand('echo $env:USERPROFILE')
+    self.home_dir = stdout.strip()
+    stdout, _ = self.RemoteCommand('echo $env:SystemDrive')
+    self.system_drive = stdout.strip()
+    self.RemoteCommand('mkdir %s' % self.temp_dir)
+    self.DisableGuestFirewall()
+
+  def _Reboot(self):
+    """OS-specific implementation of reboot command."""
+    self.RemoteCommand('shutdown -t 0 -r -f', ignore_failure=True)
+
+  @vm_util.Retry(log_errors=False, poll_interval=1)
+  def VMLastBootTime(self):
+    """Returns the time the VM was last rebooted as reported by the VM."""
+    resp, _ = self.RemoteCommand(
+        'systeminfo | find /i "Boot Time"', suppress_warning=True)
+    return resp
+
+  def _AfterReboot(self):
+    """Performs any OS-specific setup on the VM following reboot.
+
+    This will be called after every call to Reboot().
+    """
+    pass
+
+  def Install(self, package_name):
+    """Installs a PerfKit package on the VM."""
+    if not self.install_packages:
+      return
+    if package_name not in self._installed_packages:
+      package = windows_packages.PACKAGES[package_name]
+      package.Install(self)
+      self._installed_packages.add(package_name)
+
+  def Uninstall(self, package_name):
+    """Uninstalls a Perfkit package on the VM."""
+    package = windows_packages.PACKAGES[package_name]
+    if hasattr(package, 'Uninstall'):
+      package.Uninstall()
+
+  def PackageCleanup(self):
+    """Cleans up all installed packages.
+
+    Deletes the Perfkit Benchmarker temp directory on the VM
+    and uninstalls all PerfKit packages.
+    """
+    for package_name in self._installed_packages:
+      self.Uninstall(package_name)
+    self.RemoteCommand('rm -recurse -force %s' % self.temp_dir)
+    self.EnableGuestFirewall()
+
+  def WaitForProcessRunning(self, process, timeout):
+    """Blocks until either the timeout passes or the process is running.
+
+    Args:
+      process: string name of the process.
+      timeout: number of seconds to block while the process is not running.
+
+    Raises:
+      WaitTimeoutError: raised if the process does not run within "timeout"
+                        seconds.
+    """
+    command = ('$count={timeout};'
+               'while( (ps | select-string {process} | measure-object).Count '
+               '-eq 0 -and $count -gt 0) {{sleep 1; $count=$count-1}}; '
+               'if ($count -eq 0) {{echo "FAIL"}}').format(
+                   timeout=timeout, process=process)
+    stdout, _ = self.RemoteCommand(command)
+    if 'FAIL' in stdout:
+      raise WaitTimeoutError()
+
+  def IsProcessRunning(self, process):
+    """Checks if a given process is running on the system.
+
+    Args:
+      process: string name of the process.
+
+    Returns:
+      Whether the process name is in the PS output.
+    """
+    stdout, _ = self.RemoteCommand('ps')
+    return process in stdout
+
+  def _GetNumCpus(self):
+    """Returns the number of logical CPUs on the VM.
+
+    This method does not cache results (unlike "num_cpus").
+
+    Returns:
+      int. Number of logical CPUs.
+    """
+    stdout, _ = self.RemoteCommand(
+        'Get-WmiObject -class Win32_processor | '
+        'Select-Object -ExpandProperty NumberOfLogicalProcessors')
+    # In the case that there are multiple Win32_processor instances, the result
+    # of this command can be a string like '4  4  '.
+    return sum(int(i) for i in stdout.split())
+
+  def _GetTotalFreeMemoryKb(self):
+    """Returns the amount of free physical memory on the VM in Kilobytes."""
+    raise NotImplementedError()
+
+  def _GetTotalMemoryKb(self):
+    """Returns the amount of physical memory on the VM in Kilobytes.
+
+    This method does not cache results (unlike "total_memory_kb").
+    """
+    stdout, _ = self.RemoteCommand(
+        'Get-WmiObject -class Win32_PhysicalMemory | '
+        'select -exp Capacity')
+    result = sum(int(capacity) for capacity in stdout.split('\n') if capacity)
+    return result / 1024
+
+  def GetTotalMemoryMb(self):
+    return self._GetTotalMemoryKb() / 1024
+
+  def _TestReachable(self, ip):
+    """Returns True if the VM can reach the ip address and False otherwise."""
+    return self.TryRemoteCommand('ping -n 1 %s' % ip)
+
+  def DownloadFile(self, url, dest):
+    """Downloads the content at the url to the specified destination."""
+
+    # Allow more security protocols to make it easier to download from
+    # sites where we don't know the security protocol beforehand
+    command = ('[Net.ServicePointManager]::SecurityProtocol = '
+               '[System.Net.SecurityProtocolType] '
+               '"tls, tls11, tls12";'
+               'Invoke-WebRequest {url} -OutFile {dest}').format(
+                   url=url, dest=dest)
+    self.RemoteCommand(command)
+
+  def UnzipFile(self, zip_file, dest):
+    """Unzips the file with the given path."""
+    command = ('Add-Type -A System.IO.Compression.FileSystem; '
+               '[IO.Compression.ZipFile]::ExtractToDirectory(\'{zip_file}\', '
+               '\'{dest}\')').format(zip_file=zip_file, dest=dest)
+    self.RemoteCommand(command)
+
+  def DisableGuestFirewall(self):
+    """Disables the guest firewall."""
+    command = 'netsh advfirewall set allprofiles state off'
+    self.RemoteCommand(command)
+
+  def EnableGuestFirewall(self):
+    """Enables the guest firewall."""
+    command = 'netsh advfirewall set allprofiles state on'
+    self.RemoteCommand(command)
+
+  def _RunDiskpartScript(self, script):
+    """Runs the supplied Diskpart script on the VM."""
+    with vm_util.NamedTemporaryFile(prefix='diskpart', mode='w') as tf:
+      tf.write(script)
+      tf.close()
+      script_path = ntpath.join(self.temp_dir, os.path.basename(tf.name))
+      self.RemoteCopy(tf.name, script_path)
+      self.RemoteCommand('diskpart /s {script_path}'.format(
+          script_path=script_path))
+
+  def _CreateScratchDiskFromDisks(self, disk_spec, disks):
+    """Helper method to prepare data disks.
+
+    Given a list of BaseDisk objects, this will do most of the work creating,
+    attaching, striping, formatting, and mounting them. If multiple BaseDisk
+    objects are passed to this method, it will stripe them, combining them
+    into one 'logical' data disk (it will be treated as a single disk from a
+    benchmarks perspective). This is intended to be called from within a cloud
+    specific VM's CreateScratchDisk method.
+
+    Args:
+      disk_spec: The BaseDiskSpec object corresponding to the disk.
+      disks: A list of the disk(s) to be created, attached, striped,
+          formatted, and mounted. If there is more than one disk in
+          the list, then they will be striped together.
+    """
+    if len(disks) > 1:
+      # If the disk_spec called for a striped disk, create one.
+      data_disk = disk.StripedDisk(disk_spec, disks)
+    else:
+      data_disk = disks[0]
+
+    self.scratch_disks.append(data_disk)
+
+    if data_disk.disk_type != disk.LOCAL:
+      data_disk.Create()
+      data_disk.Attach(self)
+
+    # Create and then run a Diskpart script that will initialize the disks,
+    # create a volume, and then format and mount the volume.
+    script = ''
+
+    disk_numbers = [str(d.disk_number) for d in disks]
+    for disk_number in disk_numbers:
+      # For each disk, set the status to online (if it is not already),
+      # remove any formatting or partitioning on the disks, and convert
+      # it to a dynamic disk so it can be used to create a volume.
+      script += ('select disk %s\n'
+                 'online disk noerr\n'
+                 'attributes disk clear readonly\n'
+                 'clean\n'
+                 'convert gpt\n'
+                 'convert dynamic\n' % disk_number)
+
+    # Create a volume out of the disk(s).
+    if data_disk.is_striped:
+      script += 'create volume stripe disk=%s\n' % ','.join(disk_numbers)
+    else:
+      script += 'create volume simple\n'
+
+    # If a mount point has been specified, create the directory where it will be
+    # mounted, format the volume, and assign the mount point to the volume.
+    if disk_spec.mount_point:
+      self.RemoteCommand('mkdir %s' % disk_spec.mount_point)
+      script += ('format quick\n'
+                 'assign letter=%s\n'
+                 'assign mount=%s\n' %
+                 (ATTACHED_DISK_LETTER.lower(), disk_spec.mount_point))
+
+    self._RunDiskpartScript(script)
+
+    # Grant user permissions on the drive
+    self.RemoteCommand(
+        'icacls {}: /grant Users:F /L'.format(ATTACHED_DISK_LETTER))
+    self.RemoteCommand(
+        'icacls {}: --% /grant Users:(OI)(CI)F /L'.format(ATTACHED_DISK_LETTER))
+
+  def SetReadAhead(self, num_sectors, devices):
+    """Set read-ahead value for block devices.
+
+    Args:
+      num_sectors: int. Number of sectors of read ahead.
+      devices: list of strings. A list of block devices.
+    """
+    raise NotImplementedError()
+
+  def SetProcessPriorityToHighByFlag(self, executable_name):
+    """Sets the CPU priority for a given executable name.
+
+    Note this only sets the CPU priority if FLAGS.set_cpu_priority_high is set.
+
+    Args:
+      executable_name: string. The executable name.
+    """
+    if not FLAGS.set_cpu_priority_high:
+      return
+
+    command = (
+        "New-Item 'HKLM:\\SOFTWARE\\Microsoft\\Windows "
+        "NT\\CurrentVersion\\Image File Execution Options\\{exe}\\PerfOptions' "
+        '-Force | New-ItemProperty -Name CpuPriorityClass -Value 3 -Force'
+    ).format(exe=executable_name)
+    self.RemoteCommand(command)
+    executables = self.os_metadata.get('high_cpu_priority')
+    if executables:
+      executables.append(executable_name)
+    else:
+      self.os_metadata['high_cpu_priority'] = [executable_name]
+
+  def _IsSmtEnabled(self):
+    """Whether SMT is enabled on the vm."""
+    # TODO(user): find way to do this in Windows
+    raise NotImplementedError('SMT detection currently not implemented')
+
+
+class Windows2012CoreMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2012 Server Core VM specifics."""
+  OS_TYPE = os_types.WINDOWS2012_CORE
+
+
+class Windows2016CoreMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2016 Server Core VM specifics."""
+  OS_TYPE = os_types.WINDOWS2016_CORE
+
+
+class Windows2019CoreMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2019 Server Core VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_CORE
+
+
+class Windows2022CoreMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2022 Server Core VM specifics."""
+  OS_TYPE = os_types.WINDOWS2022_CORE
+
+
+class Windows2012DesktopMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2012 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2012_DESKTOP
+
+
+class Windows2016DesktopMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2016 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2016_DESKTOP
+
+
+class Windows2019DesktopMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_DESKTOP
+
+
+class Windows2022DesktopMixin(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2022_DESKTOP
+
+
+class Windows2019SQLServer2017Standard(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_SQLSERVER_2017_STANDARD
+
+
+class Windows2019SQLServer2017Enterprise(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_SQLSERVER_2017_ENTERPRISE
+
+
+class Windows2019SQLServer2019Standard(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_SQLSERVER_2019_STANDARD
+
+
+class Windows2019SQLServer2019Enterprise(BaseWindowsMixin):
+  """Class holding Windows Server 2019 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2019_SQLSERVER_2019_ENTERPRISE
+
+
+class Windows2022SQLServer2019Standard(BaseWindowsMixin):
+  """Class holding Windows Server 2022 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2022_SQLSERVER_2019_STANDARD
+
+
+class Windows2022SQLServer2019Enterprise(BaseWindowsMixin):
+  """Class holding Windows Server 2022 with Desktop Experience VM specifics."""
+  OS_TYPE = os_types.WINDOWS2022_SQLSERVER_2019_ENTERPRISE
diff --git a/script/cumulus/pkb/pkb.py b/script/cumulus/pkb/pkb.py
new file mode 100755
index 0000000..c0d6d78
--- /dev/null
+++ b/script/cumulus/pkb/pkb.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+
+from perfkitbenchmarker.pkb import Main
+
+sys.exit(Main())
diff --git a/script/cumulus/pkb/requirements.txt b/script/cumulus/pkb/requirements.txt
new file mode 100644
index 0000000..b816be2
--- /dev/null
+++ b/script/cumulus/pkb/requirements.txt
@@ -0,0 +1,35 @@
+# Copyright 2014 PerfKitBenchmarker Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+absl-py>=0.11.0
+jinja2>=2.7
+jsonlines==1.2.0
+setuptools>=40.3.0
+colorlog[windows]==2.6.0
+blinker>=1.3
+packaging
+pandas>=1.1.5
+PyYAML==5.4.1
+pint>=0.7
+numpy>=1.17.3
+contextlib2>=0.5.1
+six>=1.13.0
+pywinrm
+timeout-decorator
+boto3
+google-cloud-datastore
+google-cloud-monitoring
+kafka-python
+pymongo[srv]
+dataclasses
+#
diff --git a/script/cumulus/provision.sh b/script/cumulus/provision.sh
new file mode 100755
index 0000000..8acebed
--- /dev/null
+++ b/script/cumulus/provision.sh
@@ -0,0 +1,79 @@
+#!/bin/bash -e
+
+# overwrite this file to provide customized provisioning of the cumulus cluster 
+# tailed to the workload provisioning requests. The default is to use what is
+# specified in script/cumulus/cumulus-config.yaml
+
+# args: [in] cluster-config [out] cumulus-config.yaml [in] docker/kubernetes
+
+if [ -z "$(grep cloud: "$CUMULUS_CONFIG_IN")" ]; then
+    cp -f "$CUMULUS_CONFIG_IN" "$2"
+else
+    nworkers=$(grep labels "$1" | wc -l)
+    if [ -n "$NWORKERS_MAX" ] && [ "$nworkers" -gt "$NWORKERS_MAX" ]; then
+        nworkers="$NWORKERS_MAX"
+    fi
+    disk_spec="$(awk -v ds="" '
+/HAS-SETUP-DISK-/ {
+    ds=gensub(/.*DISK-(.*)-(.*):.*/,"disk_\\1_\\2",1)
+}
+END {
+    print tolower(ds)
+}' "$1")"
+
+    if [ "$3" = "docker" ]; then
+        # remove the controller group in the docker mode
+        awk '
+/^.*#.*/ {
+    print
+    next
+}
+/^ *controller: *$/ {
+    n=index($0,$1)
+    next
+}
+n>0 && index($0,$1)>n {
+    next
+}
+{
+    print
+    n=0
+}' "$CUMULUS_CONFIG_IN"
+    else
+        cat "$CUMULUS_CONFIG_IN"
+    fi | awk -v n=$nworkers '
+/^\s*worker:\s*$/ {
+    w=1
+}
+{
+    if ($1=="vm_count:"&&w==1&&n>$2) {
+        print gensub(/:.*/,": "n,1)
+        w=0
+    } else {
+        print
+    }
+}
+' > "$2"
+
+    # adjust the SKU instance type
+    eval "machine_type=\"\$$(awk '/cloud:/{print toupper($NF)}' "$2"|tr -d '" ')_MACHINE_TYPE\""
+    if [ -n "$machine_type" ]; then
+        sed -i "1,/machine_type:/s/machine_type:.*/machine_type: $machine_type/" "$2"
+    fi
+    if [ -n "$disk_spec" ]; then
+        echo "disk_spec=$disk_spec"
+        sed -i "1,/disk_spec:/s/disk_spec: *\([^_]*\)_.*/disk_spec: \\1_$disk_spec/" "$2"
+    else
+        sed -i "1,/disk_spec:/s/\(disk_spec:.*\)/#\\1/" "$2"
+    fi
+    # expose disk_type parameter, e.g: ./ctest.sh --set AZURE_DISK_TYPE=Premium_LRS
+    eval "disk_type=\"\$$(awk '/cloud:/{print toupper($NF)}' "$2"|tr -d '" ')_DISK_TYPE\""
+    if [ -n "$disk_type" ]; then
+        sed -i "1,/disk_type:/s/disk_type:.*/disk_type: $disk_type/" "$2"
+    fi
+fi
+
+# docker_auth_reuse
+[ "$REGISTRY_AUTH" = "docker" ] && [ -n "$(grep auths "$HOME/.docker/config.json" 2> /dev/null)" ] && auth_reuse=true || auth_reuse=false
+sed -i -e '/docker_auth_reuse:.*/d' -e "s/^\(\s*\)\(dpt_namespace:.*\)$/\1\2\n\1docker_auth_reuse: $auth_reuse/" "$2"
+
diff --git a/script/cumulus/reuse-sut.awk b/script/cumulus/reuse-sut.awk
new file mode 100644
index 0000000..76bc9e5
--- /dev/null
+++ b/script/cumulus/reuse-sut.awk
@@ -0,0 +1,60 @@
+BEGIN {
+    cc=0
+}
+/SUT\/Info:/ && cc==0 {
+    split(gensub(/.*INFO\s+SUT\/Info:/,"",1), fields)
+    group=fields[1]
+    if (group == "registry") {
+        registry=fields[2]
+    } else {
+        ++n[group]
+        ip_address[group][n[group]]=fields[2]
+        internal_ip[group][n[group]]=fields[3]
+    }
+}
+/docker_pt/ && /vm_util.py/ && /Running: ssh/ && /-p [0-9]+/ && /[a-zA-Z0-9_]+@[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/ && cc==0 {
+    match($0,/[a-zA-Z0-9_]+@[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+/)
+    uip1=substr($0,RSTART,RLENGTH)
+    ip1=gensub(/.*@/,"",1,uip1)
+    username[ip1]=gensub(/@.*/,"",1,uip1)
+
+    match($0,/ -p [0-9]+/)
+    port[ip1]=gensub(/ -p /,"",1,substr($0,RSTART,RLENGTH))
+}
+/===cumulus-config.yaml===/ {
+    cc=1
+    next
+}
+/^\s*vm_groups:\s*$/ && cc==1 {
+    vm_groups=1
+}
+/^\s*flags:\s*$/ && cc==1 {
+    vm_groups=0
+    flags=1
+}
+/^\s*[a-zA-Z_]+:\s*$/ && vm_groups==1 {
+    group=gensub(/:/,"",1,$1)
+}
+/^\s*dpt_registry_map:/ && flags==1 && registry!="" {
+    $0=gensub(/,.*/, "," registry "\"", 1)
+}
+#/^\s*cloud:\s*[A-Z]+\s*$/ && flags==1 {
+#    next
+#}
+/^\s*append_kernel_command_line:/ && flags==1 {
+    next
+}
+cc==1 {
+    print $0
+}
+/^\s*vm_spec:/ && vm_groups==1 {
+    ns=substr($0,1,index($0,$1)-1)
+    print ns "static_vms:"
+    for (i=1;i<=n[group];i++) {
+        print ns "- ip_address: " ip_address[group][i]
+        print ns "  user_name: " username[ip_address[group][i]]
+        print ns "  ssh_private_key: \"" keyfile "\""
+        print ns "  internal_ip: " internal_ip[group][i]
+        print ns "  ssh_port: " port[ip_address[group][i]]
+    }
+}
diff --git a/script/cumulus/script/cleanup-alicloud.sh b/script/cumulus/script/cleanup-alicloud.sh
new file mode 100755
index 0000000..b62d517
--- /dev/null
+++ b/script/cumulus/script/cleanup-alicloud.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+DRYRUN="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --dry-run)"
+OWNER="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --owner | cut -f2 -d=)"
+OWNER="${OWNER:-$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')}"
+echo "OWNER=$OWNER"
+
+region_list_array=($(find /home -name "cumulus-config.*.yaml" -exec sh -c "grep -E 'cloud: *AliCloud' '{}' > /dev/null" \; -exec grep zone: "{}" \; | awk '{print$NF}' | sort| uniq))
+echo "regions: ${region_list_array[@]}"
+
+resource_groups=($(find /home -name "cumulus-config.*.yaml" -exec grep ali_resource_group_id: '{}' \; | awk '{print $NF}' | tr -d '"' | sort| uniq))
+[ ${#resource_groups[@]} -eq 0 ] && resource_groups=("")
+echo "resource groups: ${resource_groups[@]}"
+
+for region in "${region_list_array[@]}" ; do
+    echo "region: $region"
+    region1=$(echo $region | cut -f1-2 -d-)
+
+    while true; do
+        resources=()
+
+        for rg in ${resource_groups[@]}; do
+            echo
+            echo "Resource group: $rg"
+            [ -n "$rg" ] && rg="--ResourceGroupId $rg"
+
+            echo
+            echo "Scanning instance..."
+            for iid in $(aliyun ecs DescribeInstances --RegionId $region1 $rg | jq '.Instances.Instance[] | select(.InstanceName | test("perfkit-.*")) | .InstanceId' 2>/dev/null | tr -d '"'); do
+                echo "instance: $iid"
+                resources+=($iid)
+                [ -z "$DRYRUN" ] && (set -x; aliyun ecs DeleteInstance --InstanceId $iid --Force true) 
+            done
+
+            echo
+            echo "Scanning vpcs..."
+            for vpc in $(aliyun vpc DescribeVpcs --RegionId $region1 $rg | jq '.Vpcs.Vpc[] | select(.VpcName | test("perfkit-.*")) | .VpcId' 2>/dev/null | tr -d '"'); do
+                echo "vpc: $vpc"
+                resources+=($vpc)
+                [ -z "$DRYRUN" ] && (set -x; aliyun ecs DeleteVpc --RegionId $region1 --VpcId $vpc)
+            done
+
+            echo
+            echo "Scanning IP Address..."
+            for eip in $(aliyun vpc DescribeEipAddresses --RegionId $region1 $rg | jq '.EipAddresses[] | select(.Name | test("perfkit-.*")) | .AllocationId' 2>/dev/null | tr -d '"'); do
+                echo "IP Address: $eip"
+                resources+=($eip)
+                [ -z "$DRYRUN" ] && (set -x; aliyun ecs ReleaseEipAddress --RegionId $region1 --AllocationId $eip)
+            done
+
+            echo
+            echo "Scanning VSwitch..."
+            for vsid in $(aliyun vpc DescribeVSwitches --RegionId $region1 $rg | jq '.VSwitches.VSwitch[] | select(.VSwitchName | test("perfkit-.*")) | .VSwitchId' 2>/dev/null | tr -d '"'); do
+                echo "VSwitch: $vsid"
+                resources+=($vsid)
+                [ -z "$DRYRUN" ] && (set -x; aliyun vpc DeleteVSwitch --RegionId $region1 --VSiwtchId $vsid)
+            done
+        done
+
+        [ "${#resources[@]}" -eq 0 ] && break
+    done
+done
diff --git a/script/cumulus/script/cleanup-aws.sh b/script/cumulus/script/cleanup-aws.sh
new file mode 100755
index 0000000..c93fe69
--- /dev/null
+++ b/script/cumulus/script/cleanup-aws.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+
+DRYRUN="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --dry-run)"
+OWNER="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --owner | cut -f2 -d=)"
+OWNER="${OWNER:-$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')}"
+echo "OWNER=$OWNER"
+
+
+scan_vpc_dhcp_option () {
+    echo
+    echo "Scanning vpc dhcp options..."
+    for dhcp in $(aws --region $region ec2 describe-vpcs --filters Name=vpc-id,Values=$1 | awk '/"DhcpOptionsId":/{print$NF}' | tr -d '",'); do
+        if [ "$dhcp" != "default" ]; then
+            echo "DHCP Option: $dhcp"
+            resources+=($dhcp)
+            (set -x; aws ec2 --region ${region} associate-dhcp-options --dhcp-options-id default --vpc-id $vpc $DRYRUN)
+            (set -x; aws ec2 --region ${region} delete-dhcp-options --dhcp-options-id ${dhcp} $DRYRUN)
+        fi
+    done
+}
+
+scan_vpc_internet_gateway () {
+    echo
+    echo "Scanning vpc internet-gateways..."
+    for gateway in $(aws --region $region ec2 describe-internet-gateways --filters Name=attachment.vpc-id,Values=$1 | awk '/"InternetGatewayId":/{print$NF}' | tr -d '",'); do
+        echo "InternetGateway: $gateway"
+        resources+=($gateway)
+        (set -x; aws ec2 --region ${region} detach-internet-gateway --internet-gateway-id ${gateway} --vpc-id $vpc $DRYRUN)
+    done
+}
+
+scan_vpc_nat_gateway () {
+    echo
+    echo "Scanning vpc nat gateways..."
+    for gateway in $(aws --region $region ec2 describe-nat-gateways --filter Name=vpc-id,Values=$1 | awk '/"NatGatewayId":/{print$NF}' | tr -d '",'); do
+        echo "NAT Gateway: $gateway"
+        resources+=($gateway)
+        (set -x; aws ec2 --region ${region} detach-nat-gateway --nat-gateway-id ${gateway} --vpc-id $vpc $DRYRUN)
+    done
+}
+
+scan_vpc_vpn_connection () {
+    echo
+    echo "Scanning vpc vpn connections..."
+    for con in $(aws --region $region ec2 describe-vpn-connections --filters Name=vpc-id,Values=$1 | awk '/"VpnConnectionId":/{print$NF}' | tr -d '",'); do
+        echo "VPN Connection: $con"
+        resources+=($con)
+        (set -x; aws ec2 --region ${region} detach-vpn-connection --vpn-connection-id ${con} --vpc-id $vpc $DRYRUN)
+    done
+}
+
+scan_vpc_vpn_gateway () {
+    echo
+    echo "Scanning vpc vpn gateways..."
+    for gateway in $(aws --region $region ec2 describe-vpn-gateways --filters Name=vpc-id,Values=$1 | awk '/"VpnGatewayId":/{print$NF}' | tr -d '",'); do
+        echo "VPN Gateway: $gateway"
+        resources+=($gateway)
+        (set -x; aws ec2 --region ${region} detach-vpn-gateway --vpn-getway-id ${gateway} --vpc-id $vpc $DRYRUN)
+    done
+}
+scan_vpc_peering_connection () {
+    echo
+    echo "Scanning vpc peering connections..."
+    for vpcp in $(aws --region $region ec2 describe-vpc-peering-connections --filters Name=requester-vpc-info.vpc-id,Values=$1 | awk '/"VpcP":/{print$NF}' | tr -d '",'); do
+        echo "Peering Connection: $vpcp"
+        resources+=($vpcp)
+        (set -x; aws ec2 --region ${region} delete-vpc-peering-connection --vpc-peering-connection-id ${vpcp} $DRYRUN)
+    done
+}
+
+scan_vpc_network_interface () {
+    echo
+    echo "Scanning vpc network interfaces..."
+    for netinf in $(aws --region $region ec2 describe-network-interfaces --output=json --filters Name=vpc-id,Values=$1 | awk '/"AttachmentId":/{at=$NF}/"NetworkInterfaceId":/{print$NF":"at}' | tr -d '",'); do
+        echo "VPC: $vpc, NetworkInterface: $netinf"
+        resources+=($netinf)
+        [ -n "${netinf/*:/}" ] && (set -x; aws ec2 --region ${region} detach-network-interface --attachment-id ${netinf/*:/} --force $DRYRUN)
+        (set -x; aws ec2 --region ${region} delete-network-interface --network-interface-id ${netinf/:*/} $DRYRUN)
+    done
+}
+  
+scan_vpc_security_group () {
+    echo
+    echo "Scanning vpc security groups..."
+    for sg in $(aws --region $region ec2 describe-security-groups --output=json --filters Name=vpc-id,Values=$1 | awk '/"GroupId":/{print$NF}' | tr -d '",'); do
+        echo "SecurityGroup: $sg"
+        resources+=($sg)
+        (set -x; aws ec2 --region=$region delete-security-group --group-id $sg $DRYRUN)
+    done
+}
+  
+scan_vpc_route_table () {
+    echo
+    echo "Scanning vpc route tables..."
+    for rt in $(aws --region $region ec2 describe-route-tables --output=json --filters Name=vpc-id,Values=$1 | awk '/"RouteTableId":/{print$NF}' | tr -d '",'); do
+        echo "RouteTable: $rt"
+        resources+=($rt)
+
+        for cidr in $(aws --region $region ec2 describe-route-tables --output=json --filters Name=vpc-id,Values=$1 | awk '/"DestinationCidrBlock":/{print$NF}' | tr -d '",'); do
+            echo "CIDR: $cidr"
+            (set -x; aws ec2 --region=$region delete-route --route-table-id $rt --destination-cidr-block $cidr --$DRYRUN)
+        done
+        (set -x; aws ec2 --region=$region delete-route-table --route-table-id $rt $DRYRUN)
+    done
+}
+
+scan_vpc_volume () {
+    echo
+    echo "Scanning vpc volumes..."
+    for vol in $(aws --region $region ec2 describe-volumes --output=json --filters Name=attachment.instance-id,Values=$1 | awk '/"VolumeId":/{print$NF}' | tr -d '",'); do
+        echo "Volume: $vol"
+        resources+=($vol)
+        (set -x; aws ec2 --region=$region detach-volume --volume-id $vol --instance-id $vpc --force $DRYRUN)
+        (set -x; aws ec2 --region=$region delete-volume --volume-id $vol $DRYRUN)
+    done
+}
+
+scan_vpc_subnet () {
+    echo
+    echo "Scanning vpc subnets..."
+    for sid in $(aws --region $region ec2 describe-subnets --output=json --filters Name=vpc-id,Values=$1 | awk '/"SubnetId":/{print$NF}' | tr -d '",'); do
+        echo "Subnet: $sid"
+        resources+=($sid)
+        (set -x; aws ec2 --region=$region delete-subnet --subnet-id $sid $DRYRUN)
+    done
+}
+
+scan_vpc_instance () {
+    echo
+    echo "Scanning vpc instances..."
+    for iid in $(aws --region $region ec2 describe-instances --output=json --filters Name=vpc-id,Values=$1 | awk '/"InstanceId":/{print$NF}' | tr -d '",'); do
+        if [ -z "$(aws --region $region ec2 describe-instances --output=json --filters Name=instance-id,Values=$iid | grep "\"Name\": \"terminated\"")" ]; then
+            echo "Instance: $iid"
+            resources+=($iid)
+            (set -x; aws ec2 --region=$region terminate-instances --instance-id $iid $DRYRUN)
+        fi
+    done
+}
+
+scan_vpc_network_acl () {
+    echo
+    echo "Scanning vpc network ACL..."
+    for acl in $(aws --region $region ec2 describe-network-acls --output=json --filters Name=vpc-id,Values=$1 | awk '/"NetworkAclId":/{print$NF}' | tr -d '",'); do
+        echo "Network ACL: $acl"
+        resources+=($acl)
+        (set -x; aws ec2 --region ${region} delete-network-acl --network-acl-id $acl $DRYRUN)
+    done
+}
+
+scan_vpc_endpoint () {
+    echo
+    echo "Scanning vpc endpoint..."
+    for vpce in $(aws --region $region ec2 describe-vpc-endpoints --output=json --filters Name=vpc-id,Values=$1 | awk '/"VpcEndpointId":/{print$NF}' | tr -d '",'); do
+        echo "VPC Endpoint: $vpce"
+        resources+=($vpce)
+        (set -x; aws ec2 --region ${region} delete-vpc-endpoint --vpc-endpoint-id $vpce $DRYRUN)
+    done
+}
+
+scan_placement_group () {
+    echo
+    echo "Scanning placement groups..."
+    for pgn in $(aws --region $region ec2 describe-placement-groups --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"GroupName":/{print$NF}' | tr -d '",'); do
+        echo "Placement group: $pgn"
+        resources+=($pgn)
+        (set -x; aws ec2 --region=$region delete-placement-group --group-name $pgn $DRYRUN)
+    done
+}
+  
+scan_internet_gateway () {
+    echo
+    echo "Scanning internet-gateways..."
+    for gateway in $(aws --region $region ec2 describe-internet-gateways --filters Name=tag:owner,Values="$OWNER" | awk '/"InternetGatewayId":/{print$NF}' | tr -d '",'); do
+        echo "InternetGateway: $gateway"
+        resources+=($gateway)
+        (set -x; aws ec2 --region ${region} delete-internet-gateway --internet-gateway-id ${gateway} $DRYRUN)
+    done
+}
+  
+scan_subnet () {
+    echo
+    echo "Scanning subnets..."
+    for subnet in $(aws --region $region ec2 describe-subnets --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"SubnetId":/{print$NF}' | tr -d '",'); do
+        echo "Subnet: $subnet"
+        resources+=($subnet)
+        (set -x; aws ec2 --region ${region} delete-subnet --subnet-id $subnet $DRYRUN)
+    done
+}
+
+scan_key_pair () {
+    echo
+    echo "Scanning key pairs..."
+    for kp in $(aws --region $region ec2 describe-key-pairs --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"KeyPairId":/{print$NF}' | tr -d '",'); do
+        echo "KeyPair: $kp"
+        (set -x; aws ec2 --region ${region} delete-key-pair --key-pair-id $kp $DRYRUN)
+    done
+}
+  
+scan_tag () {
+    echo
+    echo "Scanning tags..."
+    for tag in $(aws --region $region ec2 describe-tags --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"ResourceId":/{print$NF}' | tr -d '",'); do
+        echo "Tag: $tag"
+        (set -x; aws ec2 --region ${region} delete-tags --resources $tag $DRYRUN)
+    done
+}
+  
+region_list_array=($(find /home -name "cumulus-config.*.yaml" -exec sh -c "grep -E 'cloud: *AWS' '{}' > /dev/null" \; -exec grep zone: "{}" \; | awk '{print$NF}' | sed 's|\(-[0-9]*\)[^-]*$|\1|' | sort| uniq))
+
+for region in "${region_list_array[@]}" ; do
+    echo "region: $region"
+    vpcs=($(aws --region $region ec2 describe-vpcs --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"VpcId":/{print$NF}' | tr -d '",'))
+
+    while true; do
+        resources=()
+
+        echo
+        echo "Scanning vpc..."
+        for vpc in $(aws --region $region ec2 describe-vpcs --output=json --filters Name=tag:owner,Values="$OWNER" | awk '/"VpcId":/{print$NF}' | tr -d '",'); do
+            echo "VPC: $vpc"
+            resources+=($vpc)
+            (set -x; aws ec2 --region=$region delete-vpc --vpc-id $vpc $DRYRUN)
+        done
+
+        for vpc in ${vpcs[@]}; do
+            scan_vpc_internet_gateway $vpc
+            scan_vpc_subnet $vpc
+            scan_vpc_route_table $vpc
+            scan_vpc_network_acl $vpc
+            scan_vpc_peering_connection $vpc
+            scan_vpc_endpoint $vpc
+            scan_vpc_nat_gateway $vpc
+            scan_vpc_security_group $vpc
+            scan_vpc_instance $vpc
+            scan_vpc_vpn_connection $vpc
+            scan_vpc_vpn_gateway $vpc
+            scan_vpc_network_interface $vpc
+            scan_vpc_dhcp_option $vpc
+            scan_vpc_volume $vpc
+        done
+  
+        scan_placement_group
+        scan_internet_gateway
+        scan_subnet
+
+        [ "${#resources[@]}" -eq 0 ] && break
+    done
+  
+    scan_key_pair
+    scan_tag
+  
+    echo
+done
diff --git a/script/cumulus/script/cleanup-azure.sh b/script/cumulus/script/cleanup-azure.sh
new file mode 100755
index 0000000..c318e89
--- /dev/null
+++ b/script/cumulus/script/cleanup-azure.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+DRYRUN="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --dry-run)"
+OWNER="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --owner | cut -f2 -d=)"
+OWNER="${OWNER:-$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')}"
+echo "OWNER=$OWNER"
+
+while true; do
+    resources=()
+
+    echo
+    echo "Scanning resource groups..."
+    for group in $(az group list --query "[?tags.owner=='$OWNER']"| awk '/"id":/{print$NF}' | tr -d '",' | sed 's|.*/||'); do
+        echo "Resource Group: $group"
+        resources+=($group)
+        [ -z "$DRYRUN" ] && (set -x; az group delete --resource-group $group --yes)
+    done
+
+    [ "${#resources[@]}" -eq 0 ] && break
+done
diff --git a/script/cumulus/script/cleanup-gcp.sh b/script/cumulus/script/cleanup-gcp.sh
new file mode 100755
index 0000000..3426956
--- /dev/null
+++ b/script/cumulus/script/cleanup-gcp.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+DRYRUN="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --dry-run)"
+OWNER="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --owner | cut -f2 -d=)"
+OWNER="${OWNER:-$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')}"
+echo "OWNER=$OWNER"
+
+
+network_urls=()
+for iid in $(gcloud compute instances list --filter "labels.owner:$OWNER" --format=yaml | awk '/^name:/{print$NF}' | tr -d '"'); do
+    for url in $(gcloud compute instances list --filter "name:$iid" --format=yaml | awk '/subnetwork:/{print$NF}' | tr -d '"'); do
+        network_urls+=($url)
+    done
+done
+
+while true; do
+    resources=()
+
+    echo
+    echo "Scanning instances..."
+    for iid in $(gcloud compute instances list --filter "labels.owner:$OWNER" --format=yaml | awk '/^name:/{print$NF}' | tr -d '"'); do
+        echo "Instance: $iid"
+        resources+=($iid)
+        zone=$(gcloud compute instances list --filter "name:$iid" --format=yaml | awk '/^zone:/{print$NF}' | tr -d '",')
+        [ -z "$DRYRUN" ] && (set -x; gcloud compute instances delete $iid --zone=${zone/*\//} --quiet)
+    done
+
+    for url in ${network_urls[@]}; do
+        net=${url/*\//}
+
+        for url in $(gcloud compute networks list --filter "name:$net" --format=yaml | awk '/\/subnetworks\// && /^- / {print$NF}' | tr -d '"'); do
+            subnet=${url/*\//}
+            echo "Instance: $iid, subnet: $subnet"
+            resources+=($subnet)
+            region=$(echo $url | sed -e 's|.*/regions/||' -e 's|/.*||')
+            [ -z "$DRYRUN" ] && (set -x; gcloud compute networks subnets delete $subnet --region=$region --quiet)
+        done
+
+        for net1 in $(gcloud compute networks list --filter "name:$net" --format=yaml | awk '/^name:/{print$NF}' | tr -d '"'); do
+            echo "Instance: $iid, network: $net1"
+            resources+=($net1)
+            [ -z "$DRYRUN" ] && (set -x; gcloud compute networks delete $net1 --quiet)
+        done
+
+        for fwr in $(gcloud compute firewall-rules list --filter network:$net --format=yaml | awk '/^name:/{print$NF}' | tr -d '"'); do
+            echo "Instance: $iid, firewall: $fwr"
+            [ -z "$DRYRUN" ] && (set -x; gcloud compute firewall-rules delete $fwr --quiet)
+        done
+    done
+
+    [ "${#resources[@]}" -eq 0 ] && break
+done
diff --git a/script/cumulus/script/cleanup-tencent.sh b/script/cumulus/script/cleanup-tencent.sh
new file mode 100755
index 0000000..a200272
--- /dev/null
+++ b/script/cumulus/script/cleanup-tencent.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+DRYRUN="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --dry-run)"
+OWNER="$(echo "-- $CUMULUS_OPTIONS" | tr ' ' '\n' | grep -- --owner | cut -f2 -d=)"
+OWNER="${OWNER:-$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')}"
+echo "OWNER=$OWNER"
+
+region_list_array=($(find /home -name "cumulus-config.*.yaml" -exec sh -c "grep -E 'cloud: *Tencent' '{}' > /dev/null" \; -exec grep zone: "{}" \; | awk '{print$NF}' | sort| uniq))
+
+for region in "${region_list_array[@]}" ; do
+    echo "region: $region"
+    region1=$(echo $region | cut -f1-2 -d-)
+
+    while true; do
+        resources=()
+
+        echo
+        echo "Scanning cvm..."
+        for iid in $(tccli cvm DescribeInstances --region $region1 --output=json | awk '/"InstanceId":/{print$NF}' | tr -d '",'); do
+            echo "cvm: $iid"
+            resources+=($iid)
+            [ -z "$DRYRUN" ] && (set -x; tccli cvm TerminateInstances --region $region1 --InstanceIds "[\"$iid\"]") 
+        done
+
+        echo
+        echo "Scanning vpcs..."
+        for vpc in $(tccli vpc DescribeVpcs --region $region1 --output=json | awk '/"VpcId":/{print$NF}' | tr -d '",'); do
+            echo "vpc: $vpc"
+            resources+=($vpc)
+            [ -z "$DRYRUN" ] && (set -x; tccli vpc DeleteVpc --region $region1 --VpcId $vpc)
+        done
+
+        [ "${#resources[@]}" -eq 0 ] && break
+    done
+
+    echo "Scanning key pairs..."
+    for kp in $(tccli cvm DescribeKeyPairs --region $region1 --output=json | grep KeyIds | cut -f4 -d'"'); do
+        [ -z "$DRYRUN" ] && (set -x; tccli cvm DeleteKeyPairs --region $region1 --KeyIds "[\"$kp\"]")
+    done
+
+done
diff --git a/script/cumulus/script/create-private-repository-aws.sh b/script/cumulus/script/create-private-repository-aws.sh
new file mode 100755
index 0000000..60deceb
--- /dev/null
+++ b/script/cumulus/script/create-private-repository-aws.sh
@@ -0,0 +1,11 @@
+#!/bin/bash -e
+
+registry_id=${1%.dkr.ecr.*}
+region=${1%.amazonaws.com/*}
+region=${region/*dkr.ecr./}
+repository_name=${1#*.amazonaws.com/}
+repository_name=${repository_name%:*}
+
+if [[ "$(aws ecr describe-repositories --region $region)" != *"\"$repository_name\""* ]]; then
+    aws ecr create-repository --registry-id $registry_id --repository-name $repository_name --region $region > /dev/null
+fi
diff --git a/script/cumulus/script/setup-sut.sh b/script/cumulus/script/setup-sut.sh
new file mode 100755
index 0000000..92977a9
--- /dev/null
+++ b/script/cumulus/script/setup-sut.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -e
+
+if [ $# = 0 ]; then
+    echo "Usage: user@host ..."
+    exit 3
+fi
+
+if [ ! -r ~/.ssh/id_rsa ]; then
+    echo "Setup self-signed key file..."
+    yes y | ssh-keygen -t rsa -N "" -f ~/.ssh/id_rsa
+fi
+
+for login in "$@"; do
+    echo "Setup passwordless ssh to $login..."
+    ssh-copy-id "$login"
+
+    echo "Setup passwordless sudo..."
+    username="$(ssh "$login" id -un)"
+    groupname="$(ssh "$login" id -gn)"
+    if [[ "$username" = *" "* ]]; then
+        echo "Unsupported: username contains whitespace!"
+        continue
+    fi
+
+    sudoerline="$username ALL=(ALL:ALL) NOPASSWD: ALL"
+    ssh -t "$login" sudo bash -c "'grep -q -F \"$sudoerline\" /etc/sudoers || echo \"$sudoerline\" | EDITOR=\"tee -a\" visudo'"
+
+    echo "Setup /opt/pkb..."
+    ssh -t "$login" sudo mkdir -p /opt/pkb
+    ssh -t "$login" sudo chown -R "$username:$groupname" /opt/pkb
+done
+
diff --git a/script/cumulus/shell.sh b/script/cumulus/shell.sh
new file mode 100755
index 0000000..f1f2ab4
--- /dev/null
+++ b/script/cumulus/shell.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+
+SDIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+REGISTRY=${CUMULUS_REGISTRY:-$REGISTRY}
+RELEASE=${CUMULUS_RELEASE:-$RELEASE}
+
+cloud=$1
+shift
+options=()
+while [ "$1" != "--" ]; do
+    options+=("$1")
+    shift
+done
+shift
+
+options+=(
+    "--rm"
+    "-e" "CUMULUS_OPTIONS=${CUMULUS_OPTIONS}"
+    "-e" "PKB_USER=$(id -un)"
+    "-e" "PKB_UID=$(id -u)"
+    "-e" "PKB_GID=$(id -g)"
+    "-e" "DOCKER_GID=$(getent group docker | cut -f3 -d:)"
+    $(env | cut -f1 -d= | grep -E '_(proxy|PROXY)$' | sed 's/^/-e /')
+    "-v" "/etc/localtime:/etc/localtime"
+    "-v" "/var/run/docker.sock:/var/run/docker.sock"
+    $(find "$SDIR" -name ".??*" -type d ! -name ".docker" ! -name ".gitconfig" ! -name ".ssh" -exec sh -c 'printf -- "-v\\n{}:/home/$(basename "{}")\\n-v\\n{}:/root/$(basename "{}")\\n"' \;)
+)
+[ -n "$REGISTRY" ] && options+=(
+    "--pull" "always"
+)
+[ -r "$HOME"/.gitconfig ] && options+=(
+    "-v" "$HOME/.gitconfig:/home/.gitconfig:ro"
+    "-v" "$HOME/.gitconfig:/root/.gitconfig:ro"
+)
+[ -d "$HOME/.docker" ] && options+=(
+    "-v" "$HOME/.docker:/home/.docker"
+    "-v" "$HOME/.docker:/root/.docker"
+)
+
+docker run "${options[@]}" -i ${REGISTRY}cumulus-${cloud}${RELEASE} "$@"
diff --git a/script/cumulus/ssh_config b/script/cumulus/ssh_config
new file mode 100644
index 0000000..0d5fa12
--- /dev/null
+++ b/script/cumulus/ssh_config
@@ -0,0 +1,2 @@
+# Remove these lines if you are not behind a firewall. Otherwise, modify as needed with your socks proxy setup.
+
diff --git a/script/cumulus/validate.sh b/script/cumulus/validate.sh
new file mode 100644
index 0000000..d639c53
--- /dev/null
+++ b/script/cumulus/validate.sh
@@ -0,0 +1,207 @@
+#!/bin/bash -e
+
+WORKLOAD_NAME=${WORKLOAD_NAME:-$WORKLOAD}
+CUMULUS_CONFIG="${CUMULUS_CONFIG:-$LOGSDIRH/cumulus-config.yaml}"
+CUMULUS_CONFIG_OVERRIDES="${CUMULUS_CONFIG_OVERRIDES:-''}"
+LOGSTARFILE="${LOGSTARFILE:-$LOGSDIRH/output.tar}"
+if [[ "$CUMULUS_OPTIONS" = *--owner=* ]]; then
+    owner="$(echo "$CUMULUS_OPTIONS" | tr ' ' '\n' | grep -E '^--owner=' | cut -f2 -d=)"
+else
+    owner="$( (git config user.name || id -un) 2> /dev/null | tr ' ' '-')"
+    CUMULUS_OPTIONS="$CUMULUS_OPTIONS --owner=$owner"
+fi
+if [ "$owner" = "root" ] || [ -z "$owner" ]; then
+    echo "Please run as a user or specify --owner=<user> in CUMULUS_OPTIONS"
+    exit 3
+fi
+
+# convert arrays to strings
+if [ ${#WORKLOAD_PARAMS[@]} -gt 0 ]; then
+    WORKLOAD_PARAMS="$(IFS=';';echo "${WORKLOAD_PARAMS[*]}")"
+fi
+
+# add tags
+if [ -n "$WORKLOAD_TAGS" ]; then
+    WORKLOAD_TAGS="${WORKLOAD_TAGS// /,}"
+    if [[ "$CUMULUS_OPTIONS" = *"--tags="* ]]; then
+        CUMULUS_OPTIONS="${CUMULUS_OPTIONS/--tags=/--tags=$WORKLOAD_TAGS,}"
+    else
+        CUMULUS_OPTIONS="$CUMULUS_OPTIONS --tags=$WORKLOAD_TAGS"
+    fi
+fi
+
+# args: s2 s3
+_reconfigure_cumulus () {
+    options=(
+        "-e"
+        "s|dpt_name:.*|dpt_name: \"$WORKLOAD_NAME\"|"
+        "-e"
+        "s|dpt_script_args:.*|dpt_script_args: \"${SCRIPT_ARGS}\"|"
+        "-e"
+        "$1"
+        "-e"
+        "$2"
+        "-e"
+        "s|dpt_logs_dir:.*|dpt_logs_dir: \"/home\"|"
+        "-e"
+        "s|dpt_timeout:.*|dpt_timeout: \"$TIMEOUT\"|"
+        "-e"
+        "s|dpt_cluster_yaml:.*|dpt_cluster_yaml: \"${CLUSTER_CONFIG/${LOGSDIRH//\//\\\/}/\/home}\"|"
+        "-e"
+        "s|dpt_params:.*|dpt_params: \"$WORKLOAD_BOM\"|"
+        "-e"
+        "s|dpt_tunables:.*|dpt_tunables: \"$WORKLOAD_PARAMS;testcase:$TESTCASE$TESTCASE_CUSTOMIZED\"|"
+        "-e"
+        "s|dpt_registry_map: \"\"|dpt_registry_map: \"$REGISTRY,$REGISTRY\"|"
+        "-e"
+        "s|dpt_namespace:.*|dpt_namespace: \"$NAMESPACE\"|"
+        "-e" 
+        "s|dpt_trace_mode:.*|dpt_trace_mode: \"$EVENT_TRACE_PARAMS\"|"
+    )
+    if [ ${#DATASET[@]} -gt 0 ]; then
+        options+=(
+            "-e"
+            "s|dpt_docker_dataset:.*|dpt_docker_dataset: \"$(echo "${DATASET[@]}" | tr ' ' ',')\"|"
+        )
+    fi
+    sed "${options[@]}" -i "$CUMULUS_CONFIG"
+}
+
+# args: <none>
+_invoke_cumulus () {
+    if [ -r runs/*/pkb.log ]; then
+        run_uri="$(ls -1 runs/*/pkb.log | cut -f2 -d/)"
+    else
+        run_uri=$(cat /proc/sys/kernel/random/uuid | cut -f5 -d-)
+    fi
+    vmounts+=(
+        "-v" "$LOGSDIRH:/home"
+        "-v" "$LOGSDIRH:/tmp/pkb"
+    )
+    if [ -z "$(grep -E '^\s+cloud:' "$CUMULUS_CONFIG")" ]; then
+        options=(
+            "--ip_addresses=EXTERNAL"
+            "--trace_skip_install"
+            "--trace_skip_cleanup"
+        )
+        if [ -d "$HOME/.ssh" ]; then
+            vmounts+=(
+                "-v" "$(readlink -e "$HOME/.ssh"):/home/.ssh"
+                "-v" "$(readlink -e "$HOME/.ssh"):/root/.ssh"
+            )
+            mkdir -p "$LOGSDIRH/.ssh"
+        fi
+    else
+        options=()
+        vmounts+=(
+            "-v" "$SCRIPT/cumulus/ssh_config:/home/.ssh/config:ro"
+            "-v" "$SCRIPT/cumulus/ssh_config:/root/.ssh/config:ro"
+        )
+        mkdir -p "$LOGSDIRH/.ssh"
+        touch "$LOGSDIRH/.ssh/config"
+        if [ -n "REGISTRY" ]; then
+            certdir="/etc/docker/certs.d/${REGISTRY/\/*/}"
+            if [ -d "$certdir" ]; then
+                vmounts+=(
+                    "-v" "/etc/docker/certs.d:/etc/docker/certs.d:ro"
+                )
+                options+=(
+                    "--skopeo_src_cert_dir=$certdir"
+                )
+            fi
+        fi
+    fi
+    options+=(
+        "--trace_allow_benchmark_control"
+        "--run_uri=$run_uri"
+        "--temp_dir=/tmp/pkb"
+        "--benchmarks=docker_pt"
+        "--benchmark_config_file=/home/$(basename "$CUMULUS_CONFIG")"
+        "--ignore_package_requirements"
+    )
+    insecure_registries="$(docker info -f '{{range .RegistryConfig.IndexConfigs}}{{if(not .Secure)}}{{.Name}},{{end}}{{end}}' 2> /dev/null)"
+    if [ -n "$insecure_registries" ]; then
+        options+=(
+            "--skopeo_insecure_registries=${insecure_registries%,}"
+        )
+    fi
+    runoptions=(
+        "--name" "$NAMESPACE"
+    )
+    touch "$LOGSDIRH/.gitconfig"
+    mkdir -p "$LOGSDIRH/.docker"
+    find "$SCRIPT/cumulus" -name ".??*" -type d -exec bash -c "mkdir -p '$LOGSDIRH'/\$(basename '{}')" \;
+    ( # must be in subshell
+        [ -e "$SCRIPT/cumulus/auto-provisioning.sh" ] && . "$SCRIPT/cumulus/auto-provisioning.sh"
+        [[ "$CUMULUS_OPTIONS" = *"--dry-run"* ]] && exit 0
+        image="$(awk -v h=static '/^\s+cloud:/{h=$NF}END{print tolower(h)}' "$CUMULUS_CONFIG")"
+        $SCRIPT/cumulus/shell.sh $image "${vmounts[@]}" "${runoptions[@]}" -- python3 /PerfKitBenchmarker/pkb.py ${CUMULUS_OPTIONS/--docker-run/} "${options[@]}" $CTESTSH_OPTIONS
+    )
+}
+
+_reconfigure_reuse_sut () {
+    sutdir="$(echo $LOGSDIRH | sed 's|/[^/]*\(logs-[^/]*\)$|/sut-\1|')"
+    if [[ "$CTESTSH_OPTIONS" = *"--reuse-sut"* ]]; then
+        export CTESTSH_OPTIONS="${CTESTSH_OPTIONS/--reuse-sut/} --install_packages=false --dpt_reuse_sut"
+        grep -v dpt_tunables: "$CUMULUS_CONFIG" | grep -v dpt_namespace: | grep -v dpt_docker_options: | grep -v dpt_script_args > "$CUMULUS_CONFIG".1
+        grep -v dpt_tunables: "$sutdir/cumulus-config.yaml" | grep -v dpt_namespace: | grep -v dpt_docker_options: | grep -v dpt_script_args > "$CUMULUS_CONFIG".2
+        if [ -n "$(diff "$CUMULUS_CONFIG".1 "$CUMULUS_CONFIG".2)" ]; then
+            echo "$CUMULUS_CONFIG does not match $sutdur/cumulus-config.yaml"
+            exit 3
+        fi
+        rm -f "$CUMULUS_CONFIG".1 "$CUMULUS_CONFIG".2
+        (   cat "$sutdir"/runs/*/pkb.log
+            echo "===cumulus-config.yaml==="
+            cat "$CUMULUS_CONFIG"
+        ) | awk -v keyfile="/home/sut/$(cd "$sutdir" && ls -1 runs/*/*_keyfile)" -f "$SCRIPT/cumulus/reuse-sut.awk" > "$CUMULUS_CONFIG".mod.yaml
+        export CUMULUS_CONFIG="$CUMULUS_CONFIG.mod.yaml"
+        vmounts+=(
+            "-v" "$(readlink -e "$sutdir"):/home/sut:ro"
+        )
+        mkdir -p "$LOGSDIRH/sut"
+    elif [[ "$CTESTSH_OPTIONS" = *"--cleanup-sut"* ]]; then
+        export CTESTSH_OPTIONS="${CTESTSH_OPTIONS/--cleanup-sut/} --run_stage=teardown"
+        export LOGSDIRH="$sutdir"
+        cd "$LOGSDIRH"
+        export CUMULUS_CONFIG="$LOGSDIRH/cumulus-config.yaml"
+    fi
+}
+
+# args: image [options]
+cumulus_docker_run () {
+    image=$1; shift
+
+    "$SCRIPT/cumulus/provision.sh" "$CLUSTER_CONFIG" "$CUMULUS_CONFIG" docker
+
+    s2="s|dpt_docker_options:.*|dpt_docker_options: \"${@}\"|"
+    s3="s|dpt_docker_image:.*|dpt_docker_image: \"$image\"|"
+    _reconfigure_cumulus "$s2" "$s3"
+
+    vmounts=()
+    _reconfigure_reuse_sut
+    _invoke_cumulus
+}
+
+# args: job-filter
+cumulus_kubernetes_run () {
+    "$SCRIPT/cumulus/provision.sh" "$CLUSTER_CONFIG" "$CUMULUS_CONFIG" kubernetes
+
+    s2="s|dpt_kubernetes_job:.*|dpt_kubernetes_job: \"$1\"|"
+    s3="s|dpt_kubernetes_yaml:.*|dpt_kubernetes_yaml: \"${KUBERNETES_CONFIG/${LOGSDIRH//\//\\\/}/\/home}\"|"
+    _reconfigure_cumulus "$s2" "$s3"
+
+    vmounts=()
+    _reconfigure_reuse_sut
+    _invoke_cumulus
+}
+
+rebuild_config "$CLUSTER_CONFIG_M4" > "$CLUSTER_CONFIG"
+if [ -n "$DOCKER_IMAGE" ] && [[ $CUMULUS_OPTIONS = *"--docker-run"* ]]; then
+    IMAGE=$(image_name "$DOCKER_IMAGE")
+    DATASET=($(dataset_images))
+    cumulus_docker_run $IMAGE $DOCKER_OPTIONS
+else
+    rebuild_kubernetes_config > "$KUBERNETES_CONFIG"
+    cumulus_kubernetes_run $JOB_FILTER
+fi
+
diff --git a/script/docker/validate.sh b/script/docker/validate.sh
new file mode 100644
index 0000000..6a6f772
--- /dev/null
+++ b/script/docker/validate.sh
@@ -0,0 +1,47 @@
+#!/bin/bash -e
+
+# args: image [options]
+docker_run () {
+    image=$1; shift
+    containers=()
+
+    [[ "$CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0
+
+    stop_docker () {
+        docker rm -f -v ${containers[@]} >/dev/null 2>/dev/null || true
+    }
+
+    # set trap
+    trap stop_docker ERR SIGINT EXIT
+
+    options1=""
+    if [ "$IMAGEARCH" != "linux/amd64" ]; then
+        options1="--platform $IMAGEARCH"
+    fi
+
+    # start the jobs
+    mkdir -p "$LOGSDIRH/$NAMESPACE"
+    [ -n "$REGISTRY" ] && docker pull $options1 $image
+
+    if [ ${#DATASET[@]} -gt 0 ]; then
+        containers+=($(for ds in ${DATASET[@]}; do [ -n "$REGISTRY" ] && docker pull $ds > /dev/null; docker create $ds -; done))
+        options1="$options1$(echo;for ds in ${containers[@]}; do echo "--volumes-from $ds"; done)"
+    fi
+    (set -x; docker run $options1 --name $NAMESPACE --rm --detach "${@}" $image)
+    containers+=($NAMESPACE)
+
+    # show the logs
+    docker logs -f $NAMESPACE 2>/dev/null &
+
+    # extract logs
+    timeout ${TIMEOUT/,*/}s bash -c "docker exec $NAMESPACE cat /export-logs | tar xf - -C '$LOGSDIRH/$NAMESPACE'"
+
+    # cleanup
+    trap - ERR SIGINT EXIT
+    stop_docker
+}
+
+IMAGE=$(image_name "$DOCKER_IMAGE")
+DATASET=($(dataset_images))
+docker_run $IMAGE $DOCKER_OPTIONS
+ 
diff --git a/script/kubernetes.cmake b/script/kubernetes.cmake
new file mode 100644
index 0000000..9101ea6
--- /dev/null
+++ b/script/kubernetes.cmake
@@ -0,0 +1,8 @@
+
+execute_process(COMMAND kubectl get pod RESULT_VARIABLE status_code OUTPUT_QUIET ERROR_QUIET)
+if(status_code EQUAL 0)
+    set(BACKEND "kubernetes")
+else()
+    set(BACKEND "docker")
+endif()
+
diff --git a/script/kubernetes/validate.sh b/script/kubernetes/validate.sh
new file mode 100644
index 0000000..a6e7628
--- /dev/null
+++ b/script/kubernetes/validate.sh
@@ -0,0 +1,96 @@
+#!/bin/bash -e
+
+# args: config
+print_labels () {
+    echo "Labels: $(awk '
+/HAS-SETUP-/ {
+    for(i=1;i<=NF;i++)
+        if ($i ~ /HAS-SETUP-/)
+            a[gensub(/[:"]/,"","g",$i)]=1
+}
+END {
+    for(l in a)
+        print(l)
+}' "$1" | tr '\n' ' ')"
+}
+
+# args: job-filter
+kubernetes_run () {
+    export LOGSDIRH NAMESPACE
+
+    [[ "$CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0
+
+    # show EVENT_TRACE_PARAMS
+    echo "EVENT_TRACE_PARAMS=$EVENT_TRACE_PARAMS"
+
+    # create namespace
+    kubectl create namespace $NAMESPACE
+
+    # upload docker registry secret
+    config_json="$HOME/.docker/config.json"
+    if [ "$REGISTRY_AUTH" = "docker" ] && [ -n "$(grep auths "$config_json" 2> /dev/null)" ]; then
+        secret_name="docker-registry-secret"
+        kubectl create secret docker-registry $secret_name --from-file=.dockerconfigjson="$config_json" -n $NAMESPACE
+        kubectl patch serviceaccount default -p "{\"imagePullSecrets\": [{\"name\": \"$secret_name\"}]}" -n $NAMESPACE
+    fi
+
+    stop_kubernetes () {
+        kubectl get node -o json
+        kubectl --namespace=$NAMESPACE describe pod 2> /dev/null || true
+        kubectl delete -f "$KUBERNETES_CONFIG" --namespace=$NAMESPACE --ignore-not-found=true || true
+        kubectl delete namespace $NAMESPACE --wait --timeout=0 --ignore-not-found=true || (kubectl replace --raw "/api/v1/namespaces/$NAMESPACE/finalize" -f <(kubectl get ns $NAMESPACE -o json | grep -v '"kubernetes"')) || true
+    }
+
+    # set trap for cleanup
+    trap stop_kubernetes ERR SIGINT EXIT
+
+    # start the jobs and wait until the namespace is stable
+    kubectl create -f "$KUBERNETES_CONFIG" --namespace=$NAMESPACE
+
+    wait_for_pods_ready () {
+        until kubectl --namespace=$NAMESPACE wait pod --all --for=condition=Ready --timeout=1s 1>/dev/null 2>&1; do 
+            if kubectl --namespace=$NAMESPACE get pod -o json | grep -q Unschedulable; then 
+                return 3
+            fi
+        done 
+        return 0
+    }
+
+    # wait until either resource is ready or unschedulable
+    export -pf wait_for_pods_ready
+    timeout ${TIMEOUT/*,/}s bash -c wait_for_pods_ready
+
+    extract_logs () {
+        container=$1; shift
+        for pod1 in $@; do
+            mkdir -p "$LOGSDIRH/$pod1"
+            kubectl logs -f --namespace=$NAMESPACE $pod1 -c $container &
+            kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- cat /export-logs | tar -xf - -C "$LOGSDIRH/$pod1"
+        done
+    }
+
+    # copy logs
+    export -pf extract_logs
+    timeout ${TIMEOUT/,*/}s bash -c "extract_logs ${1/*=/} $(kubectl get pod --namespace=$NAMESPACE --selector="$1" -o=jsonpath="{.items[*].metadata.name}")"
+
+    # cleanup
+    trap - ERR SIGINT EXIT
+    stop_kubernetes
+}
+
+if [ -z "$REGISTRY" ]; then
+    major=($(kubectl version -o json | grep '"major"' | cut -f4 -d'"'))
+    minor=($(kubectl version -o json | grep '"minor"' | cut -f4 -d'"'))
+    if [ ${major[0]} -gt 1 ] || [ ${minor[0]} -ge 24 ]; then
+        echo
+        echo "With Kubernetes v${major[0]}.${minor[0]}, a docker registry is required for on-prem validation. Use cmake -DREGISTRY=<URL> .. to specify the registry URL."
+        echo
+        exit 3
+    fi
+fi
+
+rebuild_config "$CLUSTER_CONFIG_M4" > "$CLUSTER_CONFIG"
+rebuild_kubernetes_config > "$KUBERNETES_CONFIG"
+print_labels "$KUBERNETES_CONFIG"
+kubernetes_run $JOB_FILTER
+
diff --git a/script/make-kpi.sh b/script/make-kpi.sh
new file mode 100755
index 0000000..d0a79ea
--- /dev/null
+++ b/script/make-kpi.sh
@@ -0,0 +1,24 @@
+#!/bin/bash -e
+
+script_args () {
+    awk '
+    /script_args:/ {
+        $1=""
+        print gensub(/"/,"","g")
+    }' "$1"
+}
+
+for itr in "$1"/itr-*; do 
+    if [ -x $itr/kpi.sh ] && [ -r $itr/../cumulus-config.yaml ]; then 
+        (
+            cd $itr
+            echo "# $itr" 
+            ./kpi.sh $(script_args ../cumulus-config.yaml) || true
+        )
+    fi
+done
+
+if [ -x "$1"/kpi.sh ] && [ -r "$1"/workload-config.yaml ]; then 
+    cd "$1"
+    ./kpi.sh $(script_args workload-config.yaml) || true
+fi
diff --git a/script/march/CMakeLists.txt b/script/march/CMakeLists.txt
new file mode 100644
index 0000000..18d1de5
--- /dev/null
+++ b/script/march/CMakeLists.txt
@@ -0,0 +1,3 @@
+if(ENABLE_BUILD)
+    add_custom_target(build_march bash -c "PLATFORM=${PLATFORM} BACKEND=${BACKEND} RELEASE=${RELEASE} REGISTRY=${REGISTRY} CUMULUS_SUT='${CUMULUS_SUT}' '${CMAKE_CURRENT_SOURCE_DIR}/setup.sh'")
+endif()
diff --git a/script/march/README.md b/script/march/README.md
new file mode 100644
index 0000000..f68ece1
--- /dev/null
+++ b/script/march/README.md
@@ -0,0 +1,7 @@
+
+### See Also:
+
+- [Building Multi-Architecture Docker Images with BuildX](https://medium.com/@artur.klauser/building-multi-architecture-docker-images-with-buildx-27d80f7e2408)   
+- [Managing Multi-Arch Kubernetes Clusters](https://cablespaghetti.dev/2021/02/20/managing-multi-arch-kubernetes-clusters)  
+- [Multi-Arch Build and Images, the Simpler Way](https://www.docker.com/blog/multi-arch-build-and-images-the-simple-way)  
+
diff --git a/script/march/setup.sh b/script/march/setup.sh
new file mode 100755
index 0000000..3f1e248
--- /dev/null
+++ b/script/march/setup.sh
@@ -0,0 +1,28 @@
+#!/bin/bash -e
+
+version () {
+    printf '%02d' $(echo "$1" | tr . ' ' | sed -e 's/ 0*/ /g')
+}
+
+check_flag () {
+    grep flags: $1 2> /dev/null | sed 's|^.*F$|F|'
+}
+
+if docker version | grep -q -E 'OS/Arch:.*linux/amd64'; then
+    # check binfmt_misc version
+    kernel_version=$(uname -r | sed 's|-.*||')
+    if [[ "$(version $kernel_version)" < "$(version 4.8)" ]]; then
+        echo "Fixed_Binary not supported in kernel version earlier than 4.8"
+        exit 3
+    fi
+
+    # check F flag in /proc/sys/fs/binfmt_misc/qemu-aarch64
+    march_path="/proc/sys/fs/binfmt_misc/qemu-aarch64"
+    if [ -z "$(check_flag $march_path)" ]; then
+        docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
+        if [ -z "$(check_flag $march_path)" ]; then
+            echo "Failed to setup qemu with Fixed_Binary flags"
+            exit 3
+        fi
+    fi
+fi
diff --git a/script/overwrite.sh b/script/overwrite.sh
new file mode 100755
index 0000000..0156d4c
--- /dev/null
+++ b/script/overwrite.sh
@@ -0,0 +1,32 @@
+#!/bin/bash -e
+
+if [ -r "$TEST_CONFIG" ]; then
+    TESTCASE_CUSTOMIZED=""
+    _insection=0
+    _prefix=undef
+    while IFS= read _line; do
+        _prefix1="$(echo "$_line" | sed 's/[^ ].*$//')"
+        [[ "$_line" = "#"* ]] && continue
+        [[ "$_line" != *:* ]] && continue
+        _k="$(echo "$_line" | cut -f1 -d: | sed -e 's|^ *||' -e 's| *$||' | tr -d "\"'")"
+        _v="$(echo "$_line" | cut -f2- -d: | sed -e 's|^ *||' -e 's| *$||' | tr -d "\"'")"
+        [ $_prefix = undef ] && _prefix=${#_prefix1}
+        if [ ${#_prefix1} -eq $_prefix ]; then
+            case "$TESTCASE" in
+            $_k)
+                _insection=1;;
+            *)
+                _insection=0;;
+            esac
+        elif [ $_insection -gt 0 ] && [ ${#_prefix1} -gt $_prefix ]; then
+            eval "_tmp2=\"\${$_k}\""
+            if [ "$_v" != "$_tmp2" ]; then
+                eval "export $_k=\"$_v\""
+                echo "OVERWRITE: $_k=$_v"
+                TESTCASE_CUSTOMIZED="_customized"
+            fi
+        fi
+    done < <(cat "$TEST_CONFIG"; echo)
+    # save test config
+    cp -f "$TEST_CONFIG" "${LOGSDIRH:-$(pwd)}/test-config.yaml" > /dev/null 2>&1 || echo -n ""
+fi
diff --git a/script/prerequisite.cmake b/script/prerequisite.cmake
new file mode 100644
index 0000000..cb9810e
--- /dev/null
+++ b/script/prerequisite.cmake
@@ -0,0 +1,19 @@
+
+# check prerequisite: m4
+execute_process(COMMAND m4 --version RESULT_VARIABLE m4check OUTPUT_QUIET ERROR_QUIET)
+if(NOT m4check EQUAL 0)
+    message(FATAL_ERROR "Missing m4. Please install GNU m4.")
+endif()
+
+# check prerequisite: awk
+execute_process(COMMAND awk --version RESULT_VARIABLE awkcheck OUTPUT_QUIET ERROR_QUIET)
+if(NOT awkcheck EQUAL 0)
+    message(FATAL_ERROR "Missing awk. Please install gawk.")
+endif()
+
+# check prerequisites
+execute_process(COMMAND bash -c "docker --version | cut -f3 -d' ' | tr -d ," OUTPUT_VARIABLE docker_version OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET)
+if(docker_version VERSION_LESS "20.10.10")
+    message(FATAL_ERROR "Outdated docker ${docker_version}. Please install docker 20.10.10 or later.")
+endif()
+
diff --git a/script/save_env.sh b/script/save_env.sh
new file mode 100644
index 0000000..45055c9
--- /dev/null
+++ b/script/save_env.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+###############################################################################
+#This script to save the env including kernel,net setting,hugepage etc.       #
+#Pls run this script before start the workload.                               #
+#This script will generate a restore.sh for using after the workload finished.#
+#Also you should add the special setting for your workload.                   #
+###############################################################################
+
+set -e
+IFS_old=$IFS      # record separator
+IFS=$'\n'         # use "\n" as separator
+
+set_sysctl(){
+    echo "sysctl \"$(sysctl $1 | sed 's/ //g')\"" >> ${file_name}
+}
+
+set_net(){
+    set_sysctl $1
+}
+
+set_kernel(){
+    set_sysctl $1
+}
+
+set_vm(){
+    set_sysctl $1
+}
+
+set_hugepages(){
+    command=$(cat $1 | cut -d '[' -f2 | cut -d ']' -f1)
+    echo "echo $command > $1" >> ${file_name}
+}
+
+set_ulimit(){
+    echo "ulimit -n $(ulimit -n)" >> ${file_name}
+}
+
+check_os() {
+    system_vesion=$(lsb_release -i | awk '{print $3}')
+        if [ $system_vesion == $1 ]; then
+            echo "system vesion is $system_vesion" 
+        else
+            echo "[erro] system vesion Don't match" 
+            exit 1
+        fi
+}
+
+set_grubby(){
+    check_os $1
+    n_line=$(grep '^GRUB_CMDLINE_LINUX=' -n /etc/default/grub | cut -d ':' -f 1)
+    str=$(grep '^GRUB_CMDLINE_LINUX=' /etc/default/grub | cut -d '=' -f2)
+    str=$(echo $str | sed 's/\"//g'| sed $'s/\'//g')
+    echo "sed -i 's/^GRUB_CMDLINE_LINUX=/#GRUB_CMDLINE_LINUX=/' /etc/default/grub" >> ${file_name}
+    echo "sed -i \"$n_line a \GRUB_CMDLINE_LINUX=\'$str\'\" /etc/default/grub" >> ${file_name}
+    if [ "Ubuntu" == $1 ]; then       
+        echo "update-grub" >> ${file_name}
+    elif [ "CentOS" == $1 ]; then
+        echo "grub2-mkconfig -o /boot/grub2/grub.cfg" >> ${file_name}
+    fi
+        echo "reboot" >> ${file_name}
+}
+
+set_system_kernel(){
+    check_os $1
+    if [ "Ubuntu" == $1 ]; then
+        n_line=$(grep '^GRUB_DEFAULT=' -n /etc/default/grub | cut -d ':' -f 1)
+        str=$(grep '^GRUB_DEFAULT=' /etc/default/grub | cut -d '=' -f2)
+        str=$(echo $str | sed 's/\"//g'| sed $'s/\'//g')
+        echo "sed -i 's/^GRUB_DEFAULT=/#GRUB_DEFAULT=/' /etc/default/grub" >> ${file_name}
+        echo "sed -i \"$n_line a \GRUB_DEFAULT=$str\" /etc/default/grub" >> ${file_name}
+        echo "update-grub" >> ${file_name}
+    elif [ "CentOS" == $1 ]; then
+        kernel_version=$(uname -r)
+        echo "grubby --set-default /boot/vmlinuz-${kernel_version}" >> ${file_name}
+    fi
+        echo "reboot" >> ${file_name}
+}
+
+make_sure_new_file(){
+    if [[ -a ${file_name} ]];then
+        echo "[info] file exist, delete"
+        rm ${file_name}
+    else
+        echo "[info] file not exist, touch it"
+        touch ${file_name}
+    fi
+}
+
+write_first_line(){
+    # echo $str_fist_line > $1
+    cat > ${file_name} << EOF
+#!/bin/bash
+set -e
+EOF
+}
+
+# remove command if not only
+remove_command(){
+    head_line=$(grep -n $1 ${file_name} | head -1 | cut -d ':' -f1)
+    tail_line=$(grep -n $1 ${file_name} | tail -1 | cut -d ':' -f1)
+    cmd=$(grep -n $1 ${file_name} | head -1 | cut -d ':' -f2)
+    if [ $head_line -ne $tail_line ];then
+        sed -i "${head_line}d" ${file_name}
+        echo $tail_line
+    fi
+}
+
+file_name="restore.sh"
+make_sure_new_file
+write_first_line
+
+# Net
+set_net net.core.rmem_max
+set_net net.ipv4.tcp_rmem
+set_net net.ipv4.tcp_wmem
+set_net net.core.netdev_max_backlog
+set_net net.core.somaxconn
+set_net net.ipv4.tcp_no_metrics_save
+
+# Kernel
+set_kernel kernel.sched_cfs_bandwidth_slice_us
+set_kernel kernel.sched_child_runs_first
+set_kernel kernel.sched_latency_ns
+set_kernel kernel.sched_migration_cost_ns
+set_kernel kernel.sched_min_granularity_ns
+set_kernel kernel.sched_nr_migrate
+set_kernel kernel.sched_rr_timeslice_ms
+set_kernel kernel.sched_rt_period_us
+set_kernel kernel.sched_rt_runtime_us
+set_kernel kernel.sched_schedstats
+set_kernel kernel.sched_tunable_scaling
+set_kernel kernel.sched_wakeup_granularity_ns
+set_kernel kernel.numa_balancing
+
+# VM
+set_vm vm.dirty_expire_centisecs
+set_vm vm.dirty_writeback_centisecs
+set_vm vm.dirty_ratio
+set_vm vm.dirty_background_ratio
+set_vm vm.swappiness
+set_vm vm.overcommit_memory
+
+#hugepage
+set_hugepages /sys/kernel/mm/transparent_hugepage/enabled
+set_hugepages /sys/kernel/mm/transparent_hugepage/defrag 
+set_hugepages /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages 
+
+# ulimit
+set_ulimit
+
+# grubby
+#set_grubby Ubuntu
+set_grubby CentOS
+
+# system_kernel 
+#set_system_kernel Ubuntu
+set_system_kernel CentOS
+
+
+# remove redundant command
+# this is for you set both gruby and system kernel, if only one, pls comment this cmd.
+remove_command reboot
+
+IFS=$IFS_old # back separator
diff --git a/script/scan-all.cmake b/script/scan-all.cmake
new file mode 100644
index 0000000..50e7cbe
--- /dev/null
+++ b/script/scan-all.cmake
@@ -0,0 +1,8 @@
+
+file(GLOB wdirs "*")
+foreach(dir ${wdirs})
+    if(EXISTS ${dir}/CMakeLists.txt)
+        add_subdirectory(${dir})
+    endif()
+endforeach()
+
diff --git a/script/stack.cmake b/script/stack.cmake
new file mode 100644
index 0000000..49bc318
--- /dev/null
+++ b/script/stack.cmake
@@ -0,0 +1,14 @@
+
+include(component)
+
+function(add_stack name)
+    add_component_build(stack ${name} ${ARGN})
+    set(stack ${component} PARENT_SCOPE)
+    set(sut_reqs "${sut_reqs}" PARENT_SCOPE)
+endfunction()
+
+function(add_testcase name)
+    add_component_testcase(stack ${stack} ${name} ${ARGN})
+endfunction()
+
+
diff --git a/script/update-workload-list.sh b/script/update-workload-list.sh
new file mode 100755
index 0000000..4ede6dd
--- /dev/null
+++ b/script/update-workload-list.sh
@@ -0,0 +1,210 @@
+#!/bin/bash -xe
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+BUILD_DIR="$DIR/../_wltable"
+
+format_name () {
+    echo "${1/-/}" | sed 's/^[^:]*: *//' | sed 's/;.*//' | sed 's/and .*//' |  sed 's/(.*//' | sed 's/[ .]*$//' | tr -d '`[]' | sed 's|\(.*\), *\(.*\)|\2 \1|'
+}
+
+calc_nimages () {
+    cd "$BUILD_DIR"
+    for p in $(cat "$DIR"/../workload/platforms); do
+        cmake -DPLATFORM=$p -DACCEPT_LICENSE=ALL -DREGISTRY=a -DBACKEND=cumulus -DCUMULUS_OPTIONS="--dry-run --docker-run" .. > /dev/null 2>&1
+        make bom 2> /dev/null
+    done | awk '
+/^BOM of/ {
+    p1=gensub(/\/.*/,"",1,$3)
+}
+/^# workload/ {
+    w=$2
+}
+/^# image:/ {
+    im[w][$3]=1
+    pm[w][p1]=1
+}
+END {
+    for(w in im) {
+        pp=""
+        for (p1 in pm[w])
+            pp=pp" "p1
+        print w" "length(im[w])pp
+    }
+}' > "$BUILD_DIR"/nimages.txt
+}
+
+calc_nnodes () {
+    cd "$BUILD_DIR"
+    for p in $(cat "$DIR"/../workload/platforms); do
+        cmake -DPLATFORM=$p -DREGISTRY=a -DBACKEND=cumulus -DCUMULUS_OPTIONS="--dry-run --docker-run" -DBENCHMARK="$2" -DACCEPT_LICENSE=ALL .. > /dev/null 2>&1
+        (   cd "${1/*\/workload/workload}/$2"
+            ctest -j $(nproc) 2>/dev/null 1>&2 || true
+            cat logs-*/cluster-config.yaml 2> /dev/null || true
+            rm -rf logs-* > /dev/null 2>&1 || true
+        ) 
+    done | awk '
+/^\s*cluster:/ {
+    if (l>0) a[l]=0
+    l=0
+}
+/^\s*-\s*labels:/ {
+    l++
+}
+END {
+    if (l>0) a[l]=0
+    for (x in a) print x
+}' | sort -n
+}
+
+write_table () {
+    echo "| Category | Workload | Platform | #Node | #IMG | Keywords |"
+    echo "|:---|:---|:---|:--:|:--:|:--:|"
+
+    for buildsh in "$1"/*/build.sh; do
+        workload_dirname="$(dirname "$buildsh")"
+        workload_dirname="${workload_dirname/*\//}"
+        readme="${buildsh/build.sh/README.md}"
+        if [ -r "$readme" ]; then
+            section=0
+            name=""
+            category=""
+            platform=""
+            keywords=""
+            while IFS= read line; do
+                line="$(echo $line | sed 's/\r//')"
+                case "$line" in 
+                "### "*)
+                    if [[ "$line" = *"Index Info"* ]] || [[ "$line" = *"Contact"* ]]; then
+                        section=1
+                    else
+                        section=0
+                    fi
+                    ;;
+                "- Name: "*)
+                    if [ $section -eq 1 ]; then
+                        name="\`$(echo "${line/- Name: /}" | sed -e 's/ *$//' -e 's/^ *//' | tr -d '`')\`"
+                    fi
+                    ;;
+                "- Category: "*)
+                    if [ $section -eq 1 ]; then
+                        category="$(echo "${line/- Category: /}" | sed -e 's/ *$//' -e 's/^ *//')"
+                    fi
+                    ;;
+                "- Keywords: "*)
+                    if [ $section -eq 1 ]; then
+                        keywords="$(echo "${line/- Keywords: /}" | tr 'a-z' 'A-Z' | sed -e 's/ *$//' -e 's/^ *//')"
+                    fi    
+                    ;;   
+                esac
+            done < "$readme"
+
+            platform=" $(awk -v w="${1/*\/workload/workload}/$workload_dirname" '$1==w{$1="";$2="";print$0}' "$BUILD_DIR/nimages.txt") "
+            platform="$(for p1 in $(cat "$DIR/../workload/platforms"); do
+                            if [[ "$platform" = *" $p1 "* ]]; then
+                                p2="${p1/ROME/RO}"
+                                p2="${p2/MILAN/MI}"
+                                p2="${p2/GRAVITON2/G2}"
+                                p2="${p2/GRAVITON3/G3}"
+                                echo '`'$p2'`'
+                            fi
+                        done | awk '
+{
+    if (l!="") {
+        l=l"<span/>"$1
+    } else {
+        l=$1
+    }
+}
+!((NR-0)%4) {
+    print l
+    l=""
+}
+END{
+    if (l!="") print l
+}' | tr '\n' ' ')"
+            nimages="$(awk -v w="${1/*\/workload/workload}/$workload_dirname" '$1==w{print$2}' "$BUILD_DIR/nimages.txt")"
+            nnodes="$(calc_nnodes "$1" "$workload_dirname" | tr '\n' '/')"
+            echo "| $category | [$name]($workload_dirname) | $platform | ${nnodes%/} | $nimages | $keywords |"
+        fi
+    done | sort
+}
+
+rm -rf "$BUILD_DIR"
+mkdir -p "$BUILD_DIR"
+
+for readme1 in "$DIR"/../workload/README.md; do
+    for buildsh in "$(dirname "$readme1")"/*/build.sh; do
+        chmod a+rx "$buildsh"
+        chmod a+rx "${buildsh/build.sh/validate.sh}"
+        chmod a+rx "${buildsh/build.sh/kpi.sh}"
+    done
+done
+
+calc_nimages
+for readme1 in "$DIR"/../workload/README.md; do
+    if [ -r "$readme1" ]; then 
+        awk '
+{
+    print$0
+}
+/^### List of Workloads:/ {
+    exit
+}' "$readme1" > "$readme1".wip
+        distsvg="${readme1/README.md/dist.svg}"
+        write_table "$(dirname "$readme1")" | tee -a "$readme1".wip | awk '
+BEGIN{
+  split("#a73107 #e24b6e #16ade4 #ade416 #16e4b4 #2b5969 #e4c916",colors)
+  split("DataServices ML/DL/AI HPC uServices Synthetic Networking Media",labels)
+}
+{
+  for(i=1;i<=length(labels);i++)
+    if ($2=="`"labels[i]"`") values[i]=values[i]+1
+}
+END{
+  total=0
+  for(i=1;i<=length(labels);i++)
+      total=total+values[i]
+
+  radius = 30
+  PI = 3.141592654
+  circuit = 2.0 * PI * radius
+  radius2 = radius * 2.0
+  radius4 = radius2 * 2.0
+
+  vbox_width = radius4 + 100 + 10 + 5
+  label_height = 10
+
+  print("<?xml version=\"1.0\" encoding=\"utf-8\"?>")
+  print("<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" version=\"1.1\" xml:space=\"preserve\" width=\"" vbox_width "\" height=\"" radius4 "\" viewBox=\"0 0 " vbox_width " " radius4 "\">")
+
+  offset = 0
+  label_offset = label_height/2
+  for(i=1;i<=length(labels);i++) {
+    delta = values[i] / total * circuit
+    _offset = 0 - offset
+    print("<circle r=\"" radius "\" cx=\"" radius2 "\" cy=\"" radius2 "\" fill=\"transparent\" stroke=\"" colors[i] "\" stroke-width=\"" radius2 "\" stroke-dashoffset=\"" _offset "\" stroke-dasharray=\"" delta " " circuit "\"></circle>")
+    offset = offset + delta
+
+    print("<rect x=\"" radius4+5 "\" y=\"" label_offset+label_height-label_height/2-1 "\" width=\"" 20*1.2 "\" height=\"" label_height+1 "\" fill=\"" colors[i] "\"></rect>")
+    print("<text x=\"" radius4+5 "\" y=\"" label_offset+label_height*2-label_height/2 "\" font-size=\".8em\">" labels[i] " " int(values[i]/total*100+0.5) "%</text>")
+    label_offset=label_offset+label_height*1.5
+  }
+  print("</svg>")
+}' > "$distsvg"
+        [ -z "$(grep -F dist.svg "$readme1".wip)" ] && rm -f "$distsvg"
+        awk '
+/^### List of Workloads:/ {
+    l=1
+}
+/^>/ && l==1 {
+    le=1
+}
+/^###/ && !/^### List of Workloads:/ && l==1 {
+    le=1
+}
+le==1 {
+    print$0
+}' "$readme1" >> "$readme1".wip
+        mv -f "$readme1".wip "$readme1"
+    fi
+done
diff --git a/script/validate.sh b/script/validate.sh
new file mode 100644
index 0000000..eb09705
--- /dev/null
+++ b/script/validate.sh
@@ -0,0 +1,140 @@
+#!/bin/bash -e
+
+PLATFORM=${PLATFORM:-SPR}
+IMAGEARCH=${IMAGEARCH:-linux/amd64}
+WORKLOAD=${WORKLOAD:-default}
+TIMEOUT=${TIMEOUT:-300}
+RELEASE=${RELEASE:-:latest}
+BACKEND=${BACKEND:-docker}
+SCRIPT="${SCRIPT:-"$DIR/../../script"}"
+
+# default settings
+LOGSDIRH="${LOGSDIRH:-$(pwd)}"
+KUBERNETES_CONFIG_M4="${KUBERNETES_CONFIG_M4:-$DIR/kubernetes-config.yaml.m4}"
+KUBERNETES_CONFIG="${KUBERNETES_CONFIG:-$LOGSDIRH/kubernetes-config.yaml}"
+HELM_CONFIG="${HELM_CONFIG:-$DIR/helm}"
+CLUSTER_CONFIG_M4="${CLUSTER_CONFIG_M4:-$DIR/cluster-config.yaml.m4}"
+CLUSTER_CONFIG="${CLUSTER_CONFIG:-$LOGSDIRH/cluster-config.yaml}"
+JOB_FILTER="${JOB_FILTER:-job-name=benchmark}"
+NAMESPACE=${NAMESPACE:-$( (git config user.name || id -un) 2> /dev/null | tr 'A-Z' 'a-z' | tr -c -d 'a-z0-9-' | sed 's|^\(.\{12\}\).*$|\1|')-$(cut -f5 -d- /proc/sys/kernel/random/uuid)}
+HELM_OPTIONS="${HELM_OPTIONS:-${RECONFIG_OPTIONS//-D/--set }}"
+
+# args: image or Dockerfile
+image_name () {
+    if [ "$IMAGEARCH" = "linux/amd64" ]; then
+        arch=""
+    else
+        arch="-${IMAGEARCH/*\//}"
+    fi
+    if [ -e "$1" ]; then
+        echo $REGISTRY$(head -n 2 "$1" | grep '^# ' | tail -n 1 | cut -d' ' -f2)$arch$RELEASE
+    else
+        echo $REGISTRY$1$arch$RELEASE
+    fi
+}
+
+# args: yaml
+rebuild_config () {
+    (cd "$DIR" && \
+    m4 -Itemplate -I../../template \
+      -DNAMESPACE=$NAMESPACE \
+      -DTESTCASE=$TESTCASE \
+      -DPLATFORM=$PLATFORM \
+      -DIMAGEARCH=$IMAGEARCH \
+      -DWORKLOAD=$WORKLOAD \
+      -DBACKEND=$BACKEND \
+      -DREGISTRY=$REGISTRY \
+      -DRELEASE=$RELEASE \
+      $RECONFIG_OPTIONS \
+      "$@")
+}
+
+# args: none
+test_pass_fail () {
+  local ret=0
+  for status_path in "$LOGSDIRH"/*/status "$LOGSDIRH"/status
+  do
+    [ ! -e $status_path ] && continue
+    
+    local value=$(< $status_path)
+    if [ "$value" != "0" ]; then
+      echo "Failure reported in: $status_path"
+      ret=1
+    fi
+  done
+  return $ret
+}
+
+rebuild_kubernetes_config () {
+    if [ -r "$KUBERNETES_CONFIG_M4" ]; then
+        rebuild_config "$KUBERNETES_CONFIG_M4"
+    elif [ -d "$HELM_CONFIG" ]; then
+      local options="-n $NAMESPACE \
+          --set NAMESPACE=$NAMESPACE \
+          --set TESTCASE=$TESTCASE \
+          --set PLATFORM=$PLATFORM \
+          --set IMAGEARCH=$IMAGEARCH \
+          --set WORKLOAD=$WORKLOAD \
+          --set BACKEND=$BACKEND \
+          --set REGISTRY=$REGISTRY \
+          --set RELEASE=$RELEASE \
+          $HELM_OPTIONS"
+
+      local chart_list=$(find "$HELM_CONFIG" -name "Chart.yaml")
+
+      if helm version &>/dev/null; then
+        while read chart_path
+        do
+          local appdir="$(dirname $chart_path)"
+          local appname="$(basename "$appdir")"
+          helm template "$appname" "$appdir" $options
+        done <<< "$chart_list"
+      else
+        while read chart_path
+        do
+          local appdir="$(dirname $chart_path)"
+          local appname="$(basename "$appdir")"
+          docker run --rm -v "$appdir":/apps:ro alpine/helm:3.7.1 template "$appname" /apps $options
+        done <<< "$chart_list"
+      fi
+    else
+        echo "Missing Kubernetes configuration"
+        exit 3
+    fi
+}
+
+dataset_images () {
+    if [ ${#DOCKER_DATASET[@]} -gt 0 ]; then
+        for ds in "${DOCKER_DATASET[@]}"; do
+            image_name "$ds"
+        done
+    elif [ -n "$DOCKER_DATASET" ]; then
+        image_name "$DOCKER_DATASET"
+    fi
+}
+
+save_script_args () {
+    echo "script_args: \"$SCRIPT_ARGS\"" >> "$LOGSDIRH/workload-config.yaml"
+}
+
+save_workload_params () {
+    echo "Tunables: $WORKLOAD_PARAMS"
+    echo "tunables: \"$WORKLOAD_PARAMS;testcase:$TESTCASE$TESTCASE_CUSTOMIZED\"" >> "$LOGSDIRH/workload-config.yaml"
+}
+
+save_kpish () {
+    cp -f "$DIR/kpi.sh" "$LOGSDIRH"
+}
+
+WORKLOAD_BOM="$("$DIR"/build.sh --bom | grep -E '^ARG' | sed 's/^ARG //' | tr '=\n' ':;' | sed 's/;$//')"
+if [ -r "$SCRIPT/${BACKEND}/validate.sh" ]; then
+    save_kpish
+    save_script_args
+    save_workload_params
+    . "$SCRIPT/${BACKEND}/validate.sh"
+    test_pass_fail
+else
+    echo "$BACKEND not supported"
+    exit 3
+fi
+
diff --git a/script/workload.cmake b/script/workload.cmake
new file mode 100644
index 0000000..0d2e31e
--- /dev/null
+++ b/script/workload.cmake
@@ -0,0 +1,13 @@
+
+include(component)
+
+function(add_workload name)
+    add_component_build(workload ${name} ${ARGN})
+    set(workload ${component} PARENT_SCOPE)
+    set(sut_reqs "${sut_reqs}" PARENT_SCOPE)
+endfunction()
+
+function(add_testcase name)
+    add_component_testcase(workload ${workload} ${name} ${ARGN})
+endfunction()
+
diff --git a/stack/CMakeLists.txt b/stack/CMakeLists.txt
new file mode 100644
index 0000000..ea71d18
--- /dev/null
+++ b/stack/CMakeLists.txt
@@ -0,0 +1,2 @@
+include(stack)
+include(scan-all)
diff --git a/template/config.m4 b/template/config.m4
new file mode 100644
index 0000000..0508353
--- /dev/null
+++ b/template/config.m4
@@ -0,0 +1,69 @@
+define(`IMAGENAME',`defn(`REGISTRY')regexp(esyscmd(`im="$(head -n 2 $1 2>/dev/null|grep -E "^# "|tail -n 1|cut -f2 -d" ")" && test -z "$im" && echo "$1" || echo "$im"'),`\(.*\)',`\1')`'ifelse(defn(`IMAGEARCH'),`linux/amd64',,-patsubst(defn(`IMAGEARCH'),`.*/',`'))`'defn(`RELEASE')')dnl
+define(`IMAGEPOLICY',`ifelse(defn(`REGISTRY'),`',`IfNotPresent',$1)')dnl
+define(`PODANTIAFFINITY',`affinity:
+ifelse($#,3,`      ',$4)  podAntiAffinity:
+ifelse($1,`preferred',`dnl
+ifelse($#,3,`      ',$4)    preferredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)    - weight: 1
+ifelse($#,3,`      ',$4)      podAffinityTerm:
+ifelse($#,3,`      ',$4)        labelSelector:
+ifelse($#,3,`      ',$4)          matchExpressions:
+ifelse($#,3,`      ',$4)          - key: $2
+ifelse($#,3,`      ',$4)            operator: In
+ifelse($#,3,`      ',$4)            values:
+ifelse($#,3,`      ',$4)            - $3
+ifelse($#,3,`      ',$4)        topologyKey: "kubernetes.io/hostname"
+',`dnl
+ifelse($#,3,`      ',$4)    requiredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)    - labelSelector:
+ifelse($#,3,`      ',$4)        matchExpressions:
+ifelse($#,3,`      ',$4)        - key: $2
+ifelse($#,3,`      ',$4)          operator: In
+ifelse($#,3,`      ',$4)          values:
+ifelse($#,3,`      ',$4)          - $3
+ifelse($#,3,`      ',$4)      topologyKey: "kubernetes.io/hostname"
+')')dnl
+define(`PODAFFINITY',`affinity:
+ifelse($#,3,`      ',$4)  podAffinity:
+ifelse($1,`preferred',`dnl
+ifelse($#,3,`      ',$4)    preferredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)    - weight: 1
+ifelse($#,3,`      ',$4)      podAffinityTerm:
+ifelse($#,3,`      ',$4)        labelSelector:
+ifelse($#,3,`      ',$4)          matchExpressions:
+ifelse($#,3,`      ',$4)          - key: $2
+ifelse($#,3,`      ',$4)            operator: In
+ifelse($#,3,`      ',$4)            values:
+ifelse($#,3,`      ',$4)            - $3
+ifelse($#,3,`      ',$4)        topologyKey: "kubernetes.io/hostname"
+',`dnl
+ifelse($#,3,`      ',$4)    requiredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)    - labelSelector:
+ifelse($#,3,`      ',$4)        matchExpressions:
+ifelse($#,3,`      ',$4)        - key: $2
+ifelse($#,3,`      ',$4)          operator: In
+ifelse($#,3,`      ',$4)          values:
+ifelse($#,3,`      ',$4)          - $3
+ifelse($#,3,`      ',$4)      topologyKey: "kubernetes.io/hostname"
+')')dnl
+define(`NODEAFFINITY',`affinity:
+ifelse($#,3,`      ',$4)  nodeAffinity:
+ifelse($1,`preferred',`dnl
+ifelse($#,3,`      ',$4)    preferredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)    - weight: 1
+ifelse($#,3,`      ',$4)      preference:
+ifelse($#,3,`      ',$4)        matchExpressions:
+ifelse($#,3,`      ',$4)        - key: $2
+ifelse($#,3,`      ',$4)          operator: In
+ifelse($#,3,`      ',$4)          values:
+ifelse($#,3,`      ',$4)          - $3
+',`dnl
+ifelse($#,3,`      ',$4)    requiredDuringSchedulingIgnoredDuringExecution:
+ifelse($#,3,`      ',$4)      nodeSelectorTerms:
+ifelse($#,3,`      ',$4)      - matchExpressions:
+ifelse($#,3,`      ',$4)        - key: $2
+ifelse($#,3,`      ',$4)          operator: In
+ifelse($#,3,`      ',$4)          values:
+ifelse($#,3,`      ',$4)          - $3
+')')dnl
+define(`loop',`ifelse(eval($2<=$3),1,`pushdef(`$1',$2)$4`'loop(`$1',incr($2),$3,`$4')popdef(`$1')')')dnl
diff --git a/third-party-programs.txt b/third-party-programs.txt
new file mode 100755
index 0000000..da6229b
--- /dev/null
+++ b/third-party-programs.txt
@@ -0,0 +1,1225 @@
+Workload Services Framework Third Party Programs File 
+
+This file is the "third-party-programs.txt" file specified in the associated Intel end user license agreement for the Intel software you are licensing.
+
+Third party programs and their corresponding required notices and/or license terms are listed below.
+-------------------------------------------------------------
+Workload Services Framework code 
+<Copyright (c)2022 Intel Corporation>
+
+1. The following 3rd party components are used in the Workload Services Framework under the Apache License:
+	perfkitbenchmarker
+	absl-py
+	boto3
+	google-cloud-datastore
+	google-cloud-monitoring
+	kafka-python
+	pymongo
+	dataclasses
+	openstacksdk
+	requests
+	python-openstackclient
+	csapi
+	aliyun-cli
+	awscli
+	google-cloud-cli
+	tccli
+	docker-cli
+	openssl
+	skopeo
+	kubectl
+	alpine/helm
+	gosu
+	ipp-crypto
+	kafka
+	zookeeper
+	
+Apache License
+Version 2.0, January 2004
+
+http://www.apache.org/licenses/
+TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+1. Definitions.
+
+"License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document.
+
+"Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License.
+
+"Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control 
+with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management 
+of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity.
+
+"You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License.
+
+"Source" form shall mean the preferred form for making modifications, including but not limited to software source code, 
+documentation source, and configuration files.
+
+"Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but 
+not limited to compiled object code, generated documentation, and conversions to other media types.
+
+"Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated 
+by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below).
+
+"Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and 
+for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original 
+work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, 
+or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof.
+
+"Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or 
+additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the 
+Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. 
+For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to 
+the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code 
+control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing 
+and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by 
+the copyright owner as "Not a Contribution."
+
+"Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received 
+by Licensor and subsequently incorporated within the Work.
+
+2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to 
+You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare 
+Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works 
+in Source or Object form.
+
+3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a 
+perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent 
+license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies 
+only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone 
+or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute 
+patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a 
+Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses 
+granted to You under this License for that Work shall terminate as of the date such litigation is filed.
+
+4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or 
+without modifications, and in Source or Object form, provided that You meet the following conditions:
+
+You must give any other recipients of the Work or Derivative Works a copy of this License; and
+
+You must cause any modified files to carry prominent notices stating that You changed the files; and
+
+You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and 
+attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and
+
+If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute 
+must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices 
+that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text 
+file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the 
+Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices 
+normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. 
+You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to 
+the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License.
+You may add Your own copyright statement to Your modifications and may provide additional or different license terms 
+and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, 
+provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License.
+5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for 
+inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any 
+additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any 
+separate license agreement you may have executed with Licensor regarding such Contributions.
+
+6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product 
+names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and 
+reproducing the content of the NOTICE file.
+
+7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work 
+(and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, 
+either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, 
+MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness 
+of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License.
+
+8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, 
+or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, 
+shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential 
+damages of any character arising as a result of this License or out of the use or inability to use the Work (including but 
+not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial 
+damages or losses), even if such Contributor has been advised of the possibility of such damages.
+
+9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose 
+to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights 
+consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your 
+sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each 
+Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your 
+accepting any such warranty or additional liability.
+
+END OF TERMS AND CONDITIONS
+
+APPENDIX: How to apply the Apache License to your work
+To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" 
+replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate 
+comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on 
+the same "printed page" as the copyright notice for easier identification within third-party archives.
+
+Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use 
+this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, 
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing 
+permissions and limitations under the License.
+-------------------------------------------------------------
+
+
+2.The following 3rd party components are used in the Workload Services Framework under the BSD 2-clause License:
+
+	packaging
+	nasm
+
+BSD Two Clause License
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
+in the documentation and/or other materials provided with the distribution.
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, 
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR 
+BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------
+
+3. The following 3rd party components are used in the Workload Services Framework under the BSD License:
+
+	libarchive
+
+The libarchive distribution as a whole is Copyright by Tim Kientzle
+and is subject to the copyright notice reproduced at the bottom of
+this file.
+
+Each individual file in this distribution should have a clear
+copyright/licensing statement at the beginning of the file.  If any do
+not, please let me know and I will rectify it.  The following is
+intended to summarize the copyright status of the individual files;
+the actual statements in the files are controlling.
+
+* Except as listed below, all C sources (including .c and .h files)
+  and documentation files are subject to the copyright notice reproduced
+  at the bottom of this file.
+
+* The following source files are also subject in whole or in part to
+  a 3-clause UC Regents copyright; please read the individual source
+  files for details:
+   libarchive/archive_read_support_filter_compress.c
+   libarchive/archive_write_add_filter_compress.c
+   libarchive/mtree.5
+
+* The following source files are in the public domain:
+   libarchive/archive_getdate.c
+
+* The following source files are triple-licensed with the ability to choose
+  from CC0 1.0 Universal, OpenSSL or Apache 2.0 licenses:
+   libarchive/archive_blake2.h
+   libarchive/archive_blake2_impl.h
+   libarchive/archive_blake2s_ref.c
+   libarchive/archive_blake2sp_ref.c
+
+* The build files---including Makefiles, configure scripts,
+  and auxiliary scripts used as part of the compile process---have
+  widely varying licensing terms.  Please check individual files before
+  distributing them to see if those restrictions apply to you.
+
+I intend for all new source code to use the license below and hope over
+time to replace code with other licenses with new implementations that
+do use the license below.  The varying licensing of the build scripts
+seems to be an unavoidable mess.
+
+
+Copyright (c) 2003-2018 <author(s)>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer
+   in this position and unchanged.
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+-------------------------------------------------------------
+
+4. The following 3rd party components are used in the Workload Services Framework under the BSD 3-clause License:
+
+	cmake
+	jinja2
+	jsonlines
+	pandas
+	numpy
+	colorama
+	miniconda
+	Rocky Linux
+	intel-ipsec-mb
+
+	
+BSD 3-clause "New" or "Revised" License
+Copyright (c) <YEAR>, <OWNER>
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following 
+conditions are met:
+Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer 
+in the documentation and/or other materials provided with the distribution.
+Neither the name of the <ORGANIZATION> nor the names of its contributors may be used to endorse or promote products derived 
+from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, 
+BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT 
+SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE 
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+-------------------------------------------------------------
+
+
+5.  The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v2.0 license:
+
+	dnf-plugins-core
+	numactl
+	jdk
+	numactl
+	net-tools
+	perl
+	
+The GNU General Public License (GPL)
+Version 2, June 1991
+Copyright (C) 1989, 1991 Free Software Foundation, Inc.
+59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+Everyone is permitted to copy and distribute verbatim copies
+of this license document, but changing it is not allowed.
+
+Preamble
+
+The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things.
+
+To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software.
+
+Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations.
+
+Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
+
+0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you".
+
+Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does.
+
+1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program.
+
+You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee.
+
+2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions:
+
+a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change.
+
+b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License.
+
+c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.)
+
+These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it.
+
+Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program.
+
+In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License.
+
+3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following:
+
+a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or,
+
+c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.)
+
+The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable.
+
+If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code.
+
+4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance.
+
+5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it.
+
+6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License.
+
+7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program.
+
+If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances.
+
+It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice.
+
+This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License.
+
+8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License.
+
+9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation.
+
+10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally.
+
+NO WARRANTY
+
+11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+END OF TERMS AND CONDITIONS
+
+
+-------------------------------------------------------------
+
+6. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v3.0 license:
+
+	Development Tools
+	gcc
+	make
+	gawk
+	m4
+	wget
+	procps
+	bc
+	netcat
+	libpg5
+	HammerDB
+	Mysql
+	
+Copyright © 2007 Free Software Foundation, Inc. <https://fsf.org/>
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+Preamble
+The GNU General Public License is a free, copyleft license for software and other kinds of works.
+
+The licenses for most software and other practical works are designed to take away your freedom to share and change the works. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change all versions of a program--to make sure it remains free software for all its users. We, the Free Software Foundation, use the GNU General Public License for most of our software; it applies also to any other work released this way by its authors. You can apply it to your programs, too.
+
+When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for them if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs, and that you know you can do these things.
+
+To protect your rights, we need to prevent others from denying you these rights or asking you to surrender the rights. Therefore, you have certain responsibilities if you distribute copies of the software, or if you modify it: responsibilities to respect the freedom of others.
+
+For example, if you distribute copies of such a program, whether gratis or for a fee, you must pass on to the recipients the same freedoms that you received. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights.
+
+Developers that use the GNU GPL protect your rights with two steps: (1) assert copyright on the software, and (2) offer you this License giving you legal permission to copy, distribute and/or modify it.
+
+For the developers' and authors' protection, the GPL clearly explains that there is no warranty for this free software. For both users' and authors' sake, the GPL requires that modified versions be marked as changed, so that their problems will not be attributed erroneously to authors of previous versions.
+
+Some devices are designed to deny users access to install or run modified versions of the software inside them, although the manufacturer can do so. This is fundamentally incompatible with the aim of protecting users' freedom to change the software. The systematic pattern of such abuse occurs in the area of products for individuals to use, which is precisely where it is most unacceptable. Therefore, we have designed this version of the GPL to prohibit the practice for those products. If such problems arise substantially in other domains, we stand ready to extend this provision to those domains in future versions of the GPL, as needed to protect the freedom of users.
+
+Finally, every program is threatened constantly by software patents. States should not allow patents to restrict development and use of software on general-purpose computers, but in those that do, we wish to avoid the special danger that patents applied to a free program could make it effectively proprietary. To prevent this, the GPL assures that patents cannot be used to render the program non-free.
+
+The precise terms and conditions for copying, distribution and modification follow.
+
+TERMS AND CONDITIONS
+0. Definitions.
+“This License” refers to version 3 of the GNU General Public License.
+
+“Copyright” also means copyright-like laws that apply to other kinds of works, such as semiconductor masks.
+
+“The Program” refers to any copyrightable work licensed under this License. Each licensee is addressed as “you”. “Licensees” and “recipients” may be individuals or organizations.
+
+To “modify” a work means to copy from or adapt all or part of the work in a fashion requiring copyright permission, other than the making of an exact copy. The resulting work is called a “modified version” of the earlier work or a work “based on” the earlier work.
+
+A “covered work” means either the unmodified Program or a work based on the Program.
+
+To “propagate” a work means to do anything with it that, without permission, would make you directly or secondarily liable for infringement under applicable copyright law, except executing it on a computer or modifying a private copy. Propagation includes copying, distribution (with or without modification), making available to the public, and in some countries other activities as well.
+
+To “convey” a work means any kind of propagation that enables other parties to make or receive copies. Mere interaction with a user through a computer network, with no transfer of a copy, is not conveying.
+
+An interactive user interface displays “Appropriate Legal Notices” to the extent that it includes a convenient and prominently visible feature that (1) displays an appropriate copyright notice, and (2) tells the user that there is no warranty for the work (except to the extent that warranties are provided), that licensees may convey the work under this License, and how to view a copy of this License. If the interface presents a list of user commands or options, such as a menu, a prominent item in the list meets this criterion.
+
+1. Source Code.
+The “source code” for a work means the preferred form of the work for making modifications to it. “Object code” means any non-source form of a work.
+
+A “Standard Interface” means an interface that either is an official standard defined by a recognized standards body, or, in the case of interfaces specified for a particular programming language, one that is widely used among developers working in that language.
+
+The “System Libraries” of an executable work include anything, other than the work as a whole, that (a) is included in the normal form of packaging a Major Component, but which is not part of that Major Component, and (b) serves only to enable use of the work with that Major Component, or to implement a Standard Interface for which an implementation is available to the public in source code form. A “Major Component”, in this context, means a major essential component (kernel, window system, and so on) of the specific operating system (if any) on which the executable work runs, or a compiler used to produce the work, or an object code interpreter used to run it.
+
+The “Corresponding Source” for a work in object code form means all the source code needed to generate, install, and (for an executable work) run the object code and to modify the work, including scripts to control those activities. However, it does not include the work's System Libraries, or general-purpose tools or generally available free programs which are used unmodified in performing those activities but which are not part of the work. For example, Corresponding Source includes interface definition files associated with source files for the work, and the source code for shared libraries and dynamically linked subprograms that the work is specifically designed to require, such as by intimate data communication or control flow between those subprograms and other parts of the work.
+
+The Corresponding Source need not include anything that users can regenerate automatically from other parts of the Corresponding Source.
+
+The Corresponding Source for a work in source code form is that same work.
+
+2. Basic Permissions.
+All rights granted under this License are granted for the term of copyright on the Program, and are irrevocable provided the stated conditions are met. This License explicitly affirms your unlimited permission to run the unmodified Program. The output from running a covered work is covered by this License only if the output, given its content, constitutes a covered work. This License acknowledges your rights of fair use or other equivalent, as provided by copyright law.
+
+You may make, run and propagate covered works that you do not convey, without conditions so long as your license otherwise remains in force. You may convey covered works to others for the sole purpose of having them make modifications exclusively for you, or provide you with facilities for running those works, provided that you comply with the terms of this License in conveying all material for which you do not control copyright. Those thus making or running the covered works for you must do so exclusively on your behalf, under your direction and control, on terms that prohibit them from making any copies of your copyrighted material outside their relationship with you.
+
+Conveying under any other circumstances is permitted solely under the conditions stated below. Sublicensing is not allowed; section 10 makes it unnecessary.
+
+3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+No covered work shall be deemed part of an effective technological measure under any applicable law fulfilling obligations under article 11 of the WIPO copyright treaty adopted on 20 December 1996, or similar laws prohibiting or restricting circumvention of such measures.
+
+When you convey a covered work, you waive any legal power to forbid circumvention of technological measures to the extent such circumvention is effected by exercising rights under this License with respect to the covered work, and you disclaim any intention to limit operation or modification of the work as a means of enforcing, against the work's users, your or third parties' legal rights to forbid circumvention of technological measures.
+
+4. Conveying Verbatim Copies.
+You may convey verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice; keep intact all notices stating that this License and any non-permissive terms added in accord with section 7 apply to the code; keep intact all notices of the absence of any warranty; and give all recipients a copy of this License along with the Program.
+
+You may charge any price or no price for each copy that you convey, and you may offer support or warranty protection for a fee.
+
+5. Conveying Modified Source Versions.
+You may convey a work based on the Program, or the modifications to produce it from the Program, in the form of source code under the terms of section 4, provided that you also meet all of these conditions:
+
+a) The work must carry prominent notices stating that you modified it, and giving a relevant date.
+b) The work must carry prominent notices stating that it is released under this License and any conditions added under section 7. This requirement modifies the requirement in section 4 to “keep intact all notices”.
+c) You must license the entire work, as a whole, under this License to anyone who comes into possession of a copy. This License will therefore apply, along with any applicable section 7 additional terms, to the whole of the work, and all its parts, regardless of how they are packaged. This License gives no permission to license the work in any other way, but it does not invalidate such permission if you have separately received it.
+d) If the work has interactive user interfaces, each must display Appropriate Legal Notices; however, if the Program has interactive interfaces that do not display Appropriate Legal Notices, your work need not make them do so.
+A compilation of a covered work with other separate and independent works, which are not by their nature extensions of the covered work, and which are not combined with it such as to form a larger program, in or on a volume of a storage or distribution medium, is called an “aggregate” if the compilation and its resulting copyright are not used to limit the access or legal rights of the compilation's users beyond what the individual works permit. Inclusion of a covered work in an aggregate does not cause this License to apply to the other parts of the aggregate.
+
+6. Conveying Non-Source Forms.
+You may convey a covered work in object code form under the terms of sections 4 and 5, provided that you also convey the machine-readable Corresponding Source under the terms of this License, in one of these ways:
+
+a) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by the Corresponding Source fixed on a durable physical medium customarily used for software interchange.
+b) Convey the object code in, or embodied in, a physical product (including a physical distribution medium), accompanied by a written offer, valid for at least three years and valid for as long as you offer spare parts or customer support for that product model, to give anyone who possesses the object code either (1) a copy of the Corresponding Source for all the software in the product that is covered by this License, on a durable physical medium customarily used for software interchange, for a price no more than your reasonable cost of physically performing this conveying of source, or (2) access to copy the Corresponding Source from a network server at no charge.
+c) Convey individual copies of the object code with a copy of the written offer to provide the Corresponding Source. This alternative is allowed only occasionally and noncommercially, and only if you received the object code with such an offer, in accord with subsection 6b.
+d) Convey the object code by offering access from a designated place (gratis or for a charge), and offer equivalent access to the Corresponding Source in the same way through the same place at no further charge. You need not require recipients to copy the Corresponding Source along with the object code. If the place to copy the object code is a network server, the Corresponding Source may be on a different server (operated by you or a third party) that supports equivalent copying facilities, provided you maintain clear directions next to the object code saying where to find the Corresponding Source. Regardless of what server hosts the Corresponding Source, you remain obligated to ensure that it is available for as long as needed to satisfy these requirements.
+e) Convey the object code using peer-to-peer transmission, provided you inform other peers where the object code and Corresponding Source of the work are being offered to the general public at no charge under subsection 6d.
+A separable portion of the object code, whose source code is excluded from the Corresponding Source as a System Library, need not be included in conveying the object code work.
+
+A “User Product” is either (1) a “consumer product”, which means any tangible personal property which is normally used for personal, family, or household purposes, or (2) anything designed or sold for incorporation into a dwelling. In determining whether a product is a consumer product, doubtful cases shall be resolved in favor of coverage. For a particular product received by a particular user, “normally used” refers to a typical or common use of that class of product, regardless of the status of the particular user or of the way in which the particular user actually uses, or expects or is expected to use, the product. A product is a consumer product regardless of whether the product has substantial commercial, industrial or non-consumer uses, unless such uses represent the only significant mode of use of the product.
+
+“Installation Information” for a User Product means any methods, procedures, authorization keys, or other information required to install and execute modified versions of a covered work in that User Product from a modified version of its Corresponding Source. The information must suffice to ensure that the continued functioning of the modified object code is in no case prevented or interfered with solely because modification has been made.
+
+If you convey an object code work under this section in, or with, or specifically for use in, a User Product, and the conveying occurs as part of a transaction in which the right of possession and use of the User Product is transferred to the recipient in perpetuity or for a fixed term (regardless of how the transaction is characterized), the Corresponding Source conveyed under this section must be accompanied by the Installation Information. But this requirement does not apply if neither you nor any third party retains the ability to install modified object code on the User Product (for example, the work has been installed in ROM).
+
+The requirement to provide Installation Information does not include a requirement to continue to provide support service, warranty, or updates for a work that has been modified or installed by the recipient, or for the User Product in which it has been modified or installed. Access to a network may be denied when the modification itself materially and adversely affects the operation of the network or violates the rules and protocols for communication across the network.
+
+Corresponding Source conveyed, and Installation Information provided, in accord with this section must be in a format that is publicly documented (and with an implementation available to the public in source code form), and must require no special password or key for unpacking, reading or copying.
+
+7. Additional Terms.
+“Additional permissions” are terms that supplement the terms of this License by making exceptions from one or more of its conditions. Additional permissions that are applicable to the entire Program shall be treated as though they were included in this License, to the extent that they are valid under applicable law. If additional permissions apply only to part of the Program, that part may be used separately under those permissions, but the entire Program remains governed by this License without regard to the additional permissions.
+
+When you convey a copy of a covered work, you may at your option remove any additional permissions from that copy, or from any part of it. (Additional permissions may be written to require their own removal in certain cases when you modify the work.) You may place additional permissions on material, added by you to a covered work, for which you have or can give appropriate copyright permission.
+
+Notwithstanding any other provision of this License, for material you add to a covered work, you may (if authorized by the copyright holders of that material) supplement the terms of this License with terms:
+
+a) Disclaiming warranty or limiting liability differently from the terms of sections 15 and 16 of this License; or
+b) Requiring preservation of specified reasonable legal notices or author attributions in that material or in the Appropriate Legal Notices displayed by works containing it; or
+c) Prohibiting misrepresentation of the origin of that material, or requiring that modified versions of such material be marked in reasonable ways as different from the original version; or
+d) Limiting the use for publicity purposes of names of licensors or authors of the material; or
+e) Declining to grant rights under trademark law for use of some trade names, trademarks, or service marks; or
+f) Requiring indemnification of licensors and authors of that material by anyone who conveys the material (or modified versions of it) with contractual assumptions of liability to the recipient, for any liability that these contractual assumptions directly impose on those licensors and authors.
+All other non-permissive additional terms are considered “further restrictions” within the meaning of section 10. If the Program as you received it, or any part of it, contains a notice stating that it is governed by this License along with a term that is a further restriction, you may remove that term. If a license document contains a further restriction but permits relicensing or conveying under this License, you may add to a covered work material governed by the terms of that license document, provided that the further restriction does not survive such relicensing or conveying.
+
+If you add terms to a covered work in accord with this section, you must place, in the relevant source files, a statement of the additional terms that apply to those files, or a notice indicating where to find the applicable terms.
+
+Additional terms, permissive or non-permissive, may be stated in the form of a separately written license, or stated as exceptions; the above requirements apply either way.
+
+8. Termination.
+You may not propagate or modify a covered work except as expressly provided under this License. Any attempt otherwise to propagate or modify it is void, and will automatically terminate your rights under this License (including any patent licenses granted under the third paragraph of section 11).
+
+However, if you cease all violation of this License, then your license from a particular copyright holder is reinstated (a) provisionally, unless and until the copyright holder explicitly and finally terminates your license, and (b) permanently, if the copyright holder fails to notify you of the violation by some reasonable means prior to 60 days after the cessation.
+
+Moreover, your license from a particular copyright holder is reinstated permanently if the copyright holder notifies you of the violation by some reasonable means, this is the first time you have received notice of violation of this License (for any work) from that copyright holder, and you cure the violation prior to 30 days after your receipt of the notice.
+
+Termination of your rights under this section does not terminate the licenses of parties who have received copies or rights from you under this License. If your rights have been terminated and not permanently reinstated, you do not qualify to receive new licenses for the same material under section 10.
+
+9. Acceptance Not Required for Having Copies.
+You are not required to accept this License in order to receive or run a copy of the Program. Ancillary propagation of a covered work occurring solely as a consequence of using peer-to-peer transmission to receive a copy likewise does not require acceptance. However, nothing other than this License grants you permission to propagate or modify any covered work. These actions infringe copyright if you do not accept this License. Therefore, by modifying or propagating a covered work, you indicate your acceptance of this License to do so.
+
+10. Automatic Licensing of Downstream Recipients.
+Each time you convey a covered work, the recipient automatically receives a license from the original licensors, to run, modify and propagate that work, subject to this License. You are not responsible for enforcing compliance by third parties with this License.
+
+An “entity transaction” is a transaction transferring control of an organization, or substantially all assets of one, or subdividing an organization, or merging organizations. If propagation of a covered work results from an entity transaction, each party to that transaction who receives a copy of the work also receives whatever licenses to the work the party's predecessor in interest had or could give under the previous paragraph, plus a right to possession of the Corresponding Source of the work from the predecessor in interest, if the predecessor has it or can get it with reasonable efforts.
+
+You may not impose any further restrictions on the exercise of the rights granted or affirmed under this License. For example, you may not impose a license fee, royalty, or other charge for exercise of rights granted under this License, and you may not initiate litigation (including a cross-claim or counterclaim in a lawsuit) alleging that any patent claim is infringed by making, using, selling, offering for sale, or importing the Program or any portion of it.
+
+11. Patents.
+A “contributor” is a copyright holder who authorizes use under this License of the Program or a work on which the Program is based. The work thus licensed is called the contributor's “contributor version”.
+
+A contributor's “essential patent claims” are all patent claims owned or controlled by the contributor, whether already acquired or hereafter acquired, that would be infringed by some manner, permitted by this License, of making, using, or selling its contributor version, but do not include claims that would be infringed only as a consequence of further modification of the contributor version. For purposes of this definition, “control” includes the right to grant patent sublicenses in a manner consistent with the requirements of this License.
+
+Each contributor grants you a non-exclusive, worldwide, royalty-free patent license under the contributor's essential patent claims, to make, use, sell, offer for sale, import and otherwise run, modify and propagate the contents of its contributor version.
+
+In the following three paragraphs, a “patent license” is any express agreement or commitment, however denominated, not to enforce a patent (such as an express permission to practice a patent or covenant not to sue for patent infringement). To “grant” such a patent license to a party means to make such an agreement or commitment not to enforce a patent against the party.
+
+If you convey a covered work, knowingly relying on a patent license, and the Corresponding Source of the work is not available for anyone to copy, free of charge and under the terms of this License, through a publicly available network server or other readily accessible means, then you must either (1) cause the Corresponding Source to be so available, or (2) arrange to deprive yourself of the benefit of the patent license for this particular work, or (3) arrange, in a manner consistent with the requirements of this License, to extend the patent license to downstream recipients. “Knowingly relying” means you have actual knowledge that, but for the patent license, your conveying the covered work in a country, or your recipient's use of the covered work in a country, would infringe one or more identifiable patents in that country that you have reason to believe are valid.
+
+If, pursuant to or in connection with a single transaction or arrangement, you convey, or propagate by procuring conveyance of, a covered work, and grant a patent license to some of the parties receiving the covered work authorizing them to use, propagate, modify or convey a specific copy of the covered work, then the patent license you grant is automatically extended to all recipients of the covered work and works based on it.
+
+A patent license is “discriminatory” if it does not include within the scope of its coverage, prohibits the exercise of, or is conditioned on the non-exercise of one or more of the rights that are specifically granted under this License. You may not convey a covered work if you are a party to an arrangement with a third party that is in the business of distributing software, under which you make payment to the third party based on the extent of your activity of conveying the work, and under which the third party grants, to any of the parties who would receive the covered work from you, a discriminatory patent license (a) in connection with copies of the covered work conveyed by you (or copies made from those copies), or (b) primarily for and in connection with specific products or compilations that contain the covered work, unless you entered into that arrangement, or that patent license was granted, prior to 28 March 2007.
+
+Nothing in this License shall be construed as excluding or limiting any implied license or other defenses to infringement that may otherwise be available to you under applicable patent law.
+
+12. No Surrender of Others' Freedom.
+If conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot convey a covered work so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not convey it at all. For example, if you agree to terms that obligate you to collect a royalty for further conveying from those to whom you convey the Program, the only way you could satisfy both those terms and this License would be to refrain entirely from conveying the Program.
+
+13. Use with the GNU Affero General Public License.
+Notwithstanding any other provision of this License, you have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License into a single combined work, and to convey the resulting work. The terms of this License will continue to apply to the part which is the covered work, but the special requirements of the GNU Affero General Public License, section 13, concerning interaction through a network will apply to the combination as such.
+
+14. Revised Versions of this License.
+The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Program specifies that a certain numbered version of the GNU General Public License “or any later version” applies to it, you have the option of following the terms and conditions either of that numbered version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of the GNU General Public License, you may choose any version ever published by the Free Software Foundation.
+
+If the Program specifies that a proxy can decide which future versions of the GNU General Public License can be used, that proxy's public statement of acceptance of a version permanently authorizes you to choose that version for the Program.
+
+Later license versions may give you additional or different permissions. However, no additional obligations are imposed on any author or copyright holder as a result of your choosing to follow a later version.
+
+15. Disclaimer of Warranty.
+THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM “AS IS” WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+16. Limitation of Liability.
+IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
+
+17. Interpretation of Sections 15 and 16.
+If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability in connection with the Program, unless a warranty or assumption of liability accompanies a copy of the Program in return for a fee.
+
+END OF TERMS AND CONDITIONS
+
+How to Apply These Terms to Your New Programs
+If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms.
+
+To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively state the exclusion of warranty; and each file should have at least the “copyright” line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <https://www.gnu.org/licenses/>.
+Also add information on how to contact you by electronic and paper mail.
+
+If the program does terminal interaction, make it output a short notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, your program's commands might be different; for a GUI interface, you would use an “about box”.
+
+You should also get your employer (if you work as a programmer) or school, if any, to sign a “copyright disclaimer” for the program, if necessary. For more information on this, and how to apply and follow the GNU GPL, see <https://www.gnu.org/licenses/>.
+
+The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read <https://www.gnu.org/licenses/why-not-lgpl.html>.
+-------------------------------------------------------------
+
+
+7.  The following 3rd party components are used in the Workload Services Framework under the MIT License:
+
+	setuptools
+	colorlog
+	blinker
+	PyYAML
+	six
+	pywinrm
+	timeout-decorator
+	azure-cli
+	cpuid
+	print
+	
+The MIT License
+Copyright (c) <year> <copyright holders>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+-------------------------------------------------------------
+
+8. The following 3rd party components are used in the Workload Services Framework under the zlib license:
+
+	zlib-devel
+	
+The zlib/libpng License
+Copyright (c) <year> <copyright holders>
+
+This software is provided 'as-is', without any express or implied warranty. In no event will the authors be held liable for any damages arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose, including commercial applications, and to alter it and redistribute it freely, subject to the following restrictions:
+
+1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
+
+2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
+
+3. This notice may not be removed or altered from any source distribution.
+
+-------------------------------------------------------------
+
+9. The following 3rd party components are used in the Workload Services Framework under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 license:
+	Python
+	contextlib2
+	
+Note: The type hints included in this package come from the typeshed project,
+and are hence distributed under the Apache License 2.0 rather than under the
+Python Software License that covers the module implementation and test suite.
+
+A. HISTORY OF THE SOFTWARE
+==========================
+
+contextlib2 is a derivative of the contextlib module distributed by the PSF
+as part of the Python standard library. According, it is itself redistributed
+under the PSF license (reproduced in full below). As the contextlib module
+was added only in Python 2.5, the licenses for earlier Python versions are
+not applicable and have not been included.
+
+Python was created in the early 1990s by Guido van Rossum at Stichting
+Mathematisch Centrum (CWI, see http://www.cwi.nl) in the Netherlands
+as a successor of a language called ABC.  Guido remains Python's
+principal author, although it includes many contributions from others.
+
+In 1995, Guido continued his work on Python at the Corporation for
+National Research Initiatives (CNRI, see http://www.cnri.reston.va.us)
+in Reston, Virginia where he released several versions of the
+software.
+
+In May 2000, Guido and the Python core development team moved to
+BeOpen.com to form the BeOpen PythonLabs team.  In October of the same
+year, the PythonLabs team moved to Digital Creations (now Zope
+Corporation, see http://www.zope.com).  In 2001, the Python Software
+Foundation (PSF, see http://www.python.org/psf/) was formed, a
+non-profit organization created specifically to own Python-related
+Intellectual Property.  Zope Corporation is a sponsoring member of
+the PSF.
+
+All Python releases are Open Source (see http://www.opensource.org for
+the Open Source Definition).  Historically, most, but not all, Python
+releases have also been GPL-compatible; the table below summarizes
+the various releases that included the contextlib module.
+
+    Release         Derived     Year        Owner       GPL-
+                    from                                compatible? (1)
+
+    2.5             2.4         2006        PSF         yes
+    2.5.1           2.5         2007        PSF         yes
+    2.5.2           2.5.1       2008        PSF         yes
+    2.5.3           2.5.2       2008        PSF         yes
+    2.6             2.5         2008        PSF         yes
+    2.6.1           2.6         2008        PSF         yes
+    2.6.2           2.6.1       2009        PSF         yes
+    2.6.3           2.6.2       2009        PSF         yes
+    2.6.4           2.6.3       2009        PSF         yes
+    2.6.5           2.6.4       2010        PSF         yes
+    3.0             2.6         2008        PSF         yes
+    3.0.1           3.0         2009        PSF         yes
+    3.1             3.0.1       2009        PSF         yes
+    3.1.1           3.1         2009        PSF         yes
+    3.1.2           3.1.1       2010        PSF         yes
+    3.1.3           3.1.2       2010        PSF         yes
+    3.1.4           3.1.3       2011        PSF         yes
+    3.2             3.1         2011        PSF         yes
+    3.2.1           3.2         2011        PSF         yes
+    3.2.2           3.2.1       2011        PSF         yes
+    3.3             3.2         2012        PSF         yes
+
+Footnotes:
+
+(1) GPL-compatible doesn't mean that we're distributing Python under
+    the GPL.  All Python licenses, unlike the GPL, let you distribute
+    a modified version without making your changes open source.  The
+    GPL-compatible licenses make it possible to combine Python with
+    other software that is released under the GPL; the others don't.
+
+Thanks to the many outside volunteers who have worked under Guido's
+direction to make these releases possible.
+
+
+B. TERMS AND CONDITIONS FOR ACCESSING OR OTHERWISE USING PYTHON
+===============================================================
+
+PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2
+--------------------------------------------
+
+1. This LICENSE AGREEMENT is between the Python Software Foundation
+("PSF"), and the Individual or Organization ("Licensee") accessing and
+otherwise using this software ("Python") in source or binary form and
+its associated documentation.
+
+2. Subject to the terms and conditions of this License Agreement, PSF hereby
+grants Licensee a nonexclusive, royalty-free, world-wide license to reproduce,
+analyze, test, perform and/or display publicly, prepare derivative works,
+distribute, and otherwise use Python alone or in any derivative version,
+provided, however, that PSF's License Agreement and PSF's notice of copyright,
+i.e., "Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
+2011 Python Software Foundation; All Rights Reserved" are retained in Python
+alone or in any derivative version prepared by Licensee.
+
+3. In the event Licensee prepares a derivative work that is based on
+or incorporates Python or any part thereof, and wants to make
+the derivative work available to others as provided herein, then
+Licensee hereby agrees to include in any such work a brief summary of
+the changes made to Python.
+
+4. PSF is making Python available to Licensee on an "AS IS"
+basis.  PSF MAKES NO REPRESENTATIONS OR WARRANTIES, EXPRESS OR
+IMPLIED.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, PSF MAKES NO AND
+DISCLAIMS ANY REPRESENTATION OR WARRANTY OF MERCHANTABILITY OR FITNESS
+FOR ANY PARTICULAR PURPOSE OR THAT THE USE OF PYTHON WILL NOT
+INFRINGE ANY THIRD PARTY RIGHTS.
+
+5. PSF SHALL NOT BE LIABLE TO LICENSEE OR ANY OTHER USERS OF PYTHON
+FOR ANY INCIDENTAL, SPECIAL, OR CONSEQUENTIAL DAMAGES OR LOSS AS
+A RESULT OF MODIFYING, DISTRIBUTING, OR OTHERWISE USING PYTHON,
+OR ANY DERIVATIVE THEREOF, EVEN IF ADVISED OF THE POSSIBILITY THEREOF.
+
+6. This License Agreement will automatically terminate upon a material
+breach of its terms and conditions.
+
+7. Nothing in this License Agreement shall be deemed to create any
+relationship of agency, partnership, or joint venture between PSF and
+Licensee.  This License Agreement does not grant permission to use PSF
+trademarks or trade name in a trademark sense to endorse or promote
+products or services of Licensee, or any third party.
+
+8. By copying, installing or otherwise using Python, Licensee
+agrees to be bound by the terms and conditions of this License
+Agreement.
+	
+-------------------------------------------------------------
+
+10. The following 3rd party components are used in the Workload Services Framework under the GNU Lesser General Public license:
+
+	ubuntu
+	
+GNU LESSER GENERAL PUBLIC LICENSE
+Version 3, 29 June 2007
+
+Copyright (C) 2007 Free Software Foundation, Inc. http://fsf.org/
+
+Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed.
+
+This version of the GNU Lesser General Public License incorporates the terms and conditions of version 3 of the GNU General Public License, supplemented by the additional permissions listed below.
+
+0. Additional Definitions.
+As used herein, "this License" refers to version 3 of the GNU Lesser General Public License, and the "GNU GPL" refers to version 3 of the GNU General Public License.
+
+"The Library" refers to a covered work governed by this License, other than an Application or a Combined Work as defined below.
+
+An "Application" is any work that makes use of an interface provided by the Library, but which is not otherwise based on the Library. Defining a subclass of a class defined by the Library is deemed a mode of using an interface provided by the Library.
+
+A "Combined Work" is a work produced by combining or linking an Application with the Library. The particular version of the Library with which the Combined Work was made is also called the "Linked Version".
+
+The "Minimal Corresponding Source" for a Combined Work means the Corresponding Source for the Combined Work, excluding any source code for portions of the Combined Work that, considered in isolation, are based on the Application, and not on the Linked Version.
+
+The "Corresponding Application Code" for a Combined Work means the object code and/or source code for the Application, including any data and utility programs needed for reproducing the Combined Work from the Application, but excluding the System Libraries of the Combined Work.
+
+1. Exception to Section 3 of the GNU GPL.
+You may convey a covered work under sections 3 and 4 of this License without being bound by section 3 of the GNU GPL.
+
+2. Conveying Modified Versions.
+If you modify a copy of the Library, and, in your modifications, a facility refers to a function or data to be supplied by an Application that uses the facility (other than as an argument passed when the facility is invoked), then you may convey a copy of the modified version:
+
+a) under this License, provided that you make a good faith effort to ensure that, in the event an Application does not supply the function or data, the facility still operates, and performs whatever part of its purpose remains meaningful, or
+b) under the GNU GPL, with none of the additional permissions of this License applicable to that copy.
+3. Object Code Incorporating Material from Library Header Files.
+The object code form of an Application may incorporate material from a header file that is part of the Library. You may convey such object code under terms of your choice, provided that, if the incorporated material is not limited to numerical parameters, data structure layouts and accessors, or small macros, inline functions and templates (ten or fewer lines in length), you do both of the following:
+
+a) Give prominent notice with each copy of the object code that the Library is used in it and that the Library and its use are covered by this License.
+b) Accompany the object code with a copy of the GNU GPL and this license document.
+4. Combined Works.
+You may convey a Combined Work under terms of your choice that, taken together, effectively do not restrict modification of the portions of the Library contained in the Combined Work and reverse engineering for debugging such modifications, if you also do each of the following:
+
+a) Give prominent notice with each copy of the Combined Work that the Library is used in it and that the Library and its use are covered by this License.
+b) Accompany the Combined Work with a copy of the GNU GPL and this license document.
+c) For a Combined Work that displays copyright notices during execution, include the copyright notice for the Library among these notices, as well as a reference directing the user to the copies of the GNU GPL and this license document.
+d) Do one of the following:
+Convey the Minimal Corresponding Source under the terms of this License, and the Corresponding Application Code in a form suitable for, and under terms that permit, the user to recombine or relink the Application with a modified version of the Linked Version to produce a modified Combined Work, in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.
+Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (a) uses at run time a copy of the Library already present on the user's computer system, and (b) will operate properly with a modified version of the Library that is interface-compatible with the Linked Version.
+e) Provide Installation Information, but only if you would otherwise be required to provide such information under section 6 of the GNU GPL, and only to the extent that such information is necessary to install and execute a modified version of the Combined Work produced by recombining or relinking the Application with a modified version of the Linked Version. (If you use option 4d0, the Installation Information must accompany the Minimal Corresponding Source and Corresponding Application Code. If you use option 4d1, you must provide the Installation Information in the manner specified by section 6 of the GNU GPL for conveying Corresponding Source.)
+5. Combined Libraries.
+You may place library facilities that are a work based on the Library side by side in a single library together with other library facilities that are not Applications and are not covered by this License, and convey such a combined library under terms of your choice, if you do both of the following:
+
+a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities, conveyed under the terms of this License.
+b) Give prominent notice with the combined library that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work.
+6. Revised Versions of the GNU Lesser General Public License.
+The Free Software Foundation may publish revised and/or new versions of the GNU Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns.
+
+Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation.
+
+If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library.
+	
+
+-------------------------------------------------------------
+
+11. The following 3rd party components are used in the Workload Services Framework under the PostgreSQL license:
+
+	PostgreSQL
+	
+PostgreSQL is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses.
+
+PostgreSQL Database Management System
+(formerly known as Postgres, then as Postgres95)
+
+Portions Copyright © 1996-2022, The PostgreSQL Global Development Group
+
+Portions Copyright © 1994, The Regents of the University of California
+
+Permission to use, copy, modify, and distribute this software and its documentation for any purpose, without fee, and without a written agreement is hereby granted, provided that the above copyright notice and this paragraph and the following two paragraphs appear in all copies.
+
+IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
+	
+-------------------------------------------------------------
+
+12. The following 3rd party components are used in the Workload Services Framework under the PCRE2 LICENCE license:
+	pcre
+	pcre-devel
+	
+PCRE2 LICENCE
+-------------
+
+PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+Releases 10.00 and above of PCRE2 are distributed under the terms of the "BSD"
+licence, as specified below, with one exemption for certain binary
+redistributions. The documentation for PCRE2, supplied in the "doc" directory,
+is distributed under the same terms as the software itself. The data in the
+testdata directory is not copyrighted and is in the public domain.
+
+The basic library functions are written in C and are freestanding. Also
+included in the distribution is a just-in-time compiler that can be used to
+optimize pattern matching. This is an optional feature that can be omitted when
+the library is built.
+
+
+THE BASIC LIBRARY FUNCTIONS
+---------------------------
+
+Written by:       Philip Hazel
+Email local part: Philip.Hazel
+Email domain:     gmail.com
+
+Retired from University of Cambridge Computing Service,
+Cambridge, England.
+
+Copyright (c) 1997-2021 University of Cambridge
+All rights reserved.
+
+
+PCRE2 JUST-IN-TIME COMPILATION SUPPORT
+--------------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Email domain:     freemail.hu
+
+Copyright(c) 2010-2021 Zoltan Herczeg
+All rights reserved.
+
+
+STACK-LESS JUST-IN-TIME COMPILER
+--------------------------------
+
+Written by:       Zoltan Herczeg
+Email local part: hzmester
+Email domain:     freemail.hu
+
+Copyright(c) 2009-2021 Zoltan Herczeg
+All rights reserved.
+
+
+THE "BSD" LICENCE
+-----------------
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notices,
+      this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notices, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of the University of Cambridge nor the names of any
+      contributors may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+EXEMPTION FOR BINARY LIBRARY-LIKE PACKAGES
+------------------------------------------
+
+The second condition in the BSD licence (covering binary redistributions) does
+not apply all the way down a chain of software. If binary package A includes
+PCRE2, it must respect the condition, but if package B is software that
+includes package A, the condition is not imposed on package B unless it uses
+PCRE2 independently.
+
+End
+------------------------------------------
+
+13. The following 3rd party components are used in the Workload Services Framework under the OpenSSL and SSLeay licenses:
+
+	OpenSSL
+	openssl-devel
+	
+The OpenSSL and SSLeay licenses License
+LICENSE ISSUES
+  ==============
+
+  The OpenSSL toolkit stays under a double license, i.e. both the conditions of
+  the OpenSSL License and the original SSLeay license apply to the toolkit.
+  See below for the actual license texts.
+
+  OpenSSL License
+  ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2019 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+ -----------------------
+
+14. The following 3rd party components are used in the Workload Services Framework under the CC BY-SA 4.0 licenses:
+
+	epel-release
+	
+By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
+
+Section 1 – Definitions.
+
+Adapted Material means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
+Adapter's License means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
+BY-SA Compatible License means a license listed at creativecommons.org/compatiblelicenses, approved by Creative Commons as essentially the equivalent of this Public License.
+Copyright and Similar Rights means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
+Effective Technological Measures means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
+Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
+License Elements means the license attributes listed in the name of a Creative Commons Public License. The License Elements of this Public License are Attribution and ShareAlike.
+Licensed Material means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
+Licensed Rights means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
+Licensor means the individual(s) or entity(ies) granting rights under this Public License.
+Share means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
+Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
+You means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
+Section 2 – Scope.
+
+License grant.
+Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
+reproduce and Share the Licensed Material, in whole or in part; and
+produce, reproduce, and Share Adapted Material.
+Exceptions and Limitations. For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
+Term. The term of this Public License is specified in Section 6(a).
+Media and formats; technical modifications allowed. The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
+Downstream recipients.
+Offer from the Licensor – Licensed Material. Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
+Additional offer from the Licensor – Adapted Material. Every recipient of Adapted Material from You automatically receives an offer from the Licensor to exercise the Licensed Rights in the Adapted Material under the conditions of the Adapter’s License You apply.
+No downstream restrictions. You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
+No endorsement. Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
+Other rights.
+
+Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
+Patent and trademark rights are not licensed under this Public License.
+To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties.
+Section 3 – License Conditions.
+
+Your exercise of the Licensed Rights is expressly made subject to the following conditions.
+
+Attribution.
+
+If You Share the Licensed Material (including in modified form), You must:
+
+retain the following if it is supplied by the Licensor with the Licensed Material:
+identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
+a copyright notice;
+a notice that refers to this Public License;
+a notice that refers to the disclaimer of warranties;
+a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
+indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
+indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
+You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
+If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
+ShareAlike.
+In addition to the conditions in Section 3(a), if You Share Adapted Material You produce, the following conditions also apply.
+
+The Adapter’s License You apply must be a Creative Commons license with the same License Elements, this version or later, or a BY-SA Compatible License.
+You must include the text of, or the URI or hyperlink to, the Adapter's License You apply. You may satisfy this condition in any reasonable manner based on the medium, means, and context in which You Share Adapted Material.
+You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, Adapted Material that restrict exercise of the rights granted under the Adapter's License You apply.
+Section 4 – Sui Generis Database Rights.
+
+Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
+
+for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database;
+if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material, including for purposes of Section 3(b); and
+You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
+For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
+Section 5 – Disclaimer of Warranties and Limitation of Liability.
+
+Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.
+To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.
+The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
+Section 6 – Term and Termination.
+
+This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
+Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
+
+automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
+upon express reinstatement by the Licensor.
+For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
+For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
+Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
+Section 7 – Other Terms and Conditions.
+
+The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
+Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
+Section 8 – Interpretation.
+
+For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
+To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
+No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
+Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
+--------------------------------------------
+15. The following 3rd party components are used in the Workload Services Framework under the BSD-3-Clause/BSD License/GPLv2 License licenses:
+
+	QAT_Engine
+	
+The Licensing of the files within this project is split as follows:
+Component	License	Details
+Intel® QuickAssist Technology(QAT) OpenSSL* Engine	BSD-3-Clause	Intel® QuickAssist Technology(QAT) OpenSSL* Engine - BSD-3-Clause. This product includes software developed by the OpenSSL Project for use in the OpenSSL Toolkit (http://www.openssl.org/). Please see the LICENSE and LICENSE.OPENSSL file contained in the top level folder. Further details can be found in the file headers of the relevant files.
+Intel® QuickAssist Technology(QAT) BoringSSL* Library	BSD License	Intel® QuickAssist Technology(QAT) BoringSSL* Library - BSD License. This product includes software developed by the BoringSSL Project (https://boringssl.googlesource.com/boringssl/). Please see the LICENSE.BORINGSSL contained in the top-level folder. Further details can be found in the file headers of the relevant files.
+Example Intel® Contiguous Memory Driver contained within the folder qat_contig_mem	GPLv2 License	Please see the file headers within the qat_contig_mem folder, and the full GPLv2 license contained in the file LICENSE.GPL within the qat_contig_mem folder.
+Example Intel® QuickAssist Technology Driver Configuration Files contained within the folder hierarchy qat	Dual BSD/GPLv2 License	Please see the file headers of the configuration files, and the full GPLv2 license contained in the file LICENSE.GPL within the qat folder.
+	
\ No newline at end of file
diff --git a/workload/CMakeLists.txt b/workload/CMakeLists.txt
new file mode 100644
index 0000000..a37668e
--- /dev/null
+++ b/workload/CMakeLists.txt
@@ -0,0 +1,2 @@
+include(workload)
+include(scan-all)
diff --git a/workload/HammerDB-TPCC/CMakeLists.txt b/workload/HammerDB-TPCC/CMakeLists.txt
new file mode 100755
index 0000000..f94b232
--- /dev/null
+++ b/workload/HammerDB-TPCC/CMakeLists.txt
@@ -0,0 +1,18 @@
+if(" ICX " MATCHES " ${PLATFORM} ")
+
+    #############################################mysql#######################################
+    add_workload("hammerdb_tpcc_mysql")
+    foreach(fs "disk" "ramfs")
+        foreach(hp "on" "off")
+        # gated test
+            if(NOT "${hp}" STREQUAL "on")
+                add_testcase(${workload}_${fs}_hugepage_${hp}_gated "mysql" "${fs}" "${hp}")
+            endif()
+
+            # performance test
+            add_testcase(${workload}_${fs}_hugepage_${hp} "mysql" "${fs}" "${hp}")
+        endforeach()
+    endforeach()
+    # for pkm test case
+    add_testcase(${workload}_disk_hugepage_off_pkm "mysql" "disk" "off")
+endif()
diff --git a/workload/HammerDB-TPCC/Dockerfile.1.hammerdb b/workload/HammerDB-TPCC/Dockerfile.1.hammerdb
new file mode 100755
index 0000000..3733fb2
--- /dev/null
+++ b/workload/HammerDB-TPCC/Dockerfile.1.hammerdb
@@ -0,0 +1,65 @@
+# tpcc-hammerdb
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG OS_VER=20.04
+ARG OS_IMAGE=ubuntu
+
+FROM ${OS_IMAGE}:${OS_VER}
+
+
+### 3rd party libraries ###
+## Database/OS	Library ##
+# Oracle Linux	libclntsh.so.
+# Oracle Windows	OCI.DLL
+# SQL Server Linux	libodbc.so.
+# SQL Server Windows	ODBC32.DLL
+# Db2 Linux	libdb2.so.
+# Db2 Windows	DB2CLI64.DLL
+# MySQL Linux	libmysqlclient.so
+# MySQL Windows	LIBMYSQL.DLL
+# Redis	Built in library
+###
+
+### Install pkgs on ubuntu
+## libmysqlclient-dev -> libmysqlclient.so.* -> mysql
+## netcat -> nc command used to test database service get started
+###
+
+RUN apt-get update \
+    && apt-get install -y wget libmysqlclient-dev libpq5 netcat bc numactl procps net-tools \
+    && apt-get upgrade -y gnutls-bin zlib1g-dev wget util-linux tar systemd libsqlite3-0 libpcre2-dev libpcre3-dev ncurses-dev heimdal-dev coreutils libgmp10 libunbound8 \
+        libgnutls30 krb5-locales libtinfo6 libroken18-heimdal libc-bin \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG HAMMERDB_VER="4.4"
+ARG HAMMERDB_PACKAGE="https://github.com/TPC-Council/HammerDB/releases/download/v${HAMMERDB_VER}/HammerDB-${HAMMERDB_VER}-Linux.tar.gz"
+RUN wget -c "${HAMMERDB_PACKAGE}" -O - | tar -xz
+
+ENV HAMMERDB_INSTALL_DIR="/HammerDB-${HAMMERDB_VER}"
+
+COPY script/common.sh /
+RUN chmod +x /common.sh
+
+COPY src/mysql/mysqloltp_custom.tcl ${HAMMERDB_INSTALL_DIR}/src/mysql/mysqloltp.tcl
+
+COPY script/prepare_common.sh /
+RUN chmod +x /prepare_common.sh
+
+COPY script/prepare_hammerdb.sh /
+RUN chmod +x /prepare_hammerdb.sh
+
+COPY script/run_hammerdb.sh /
+RUN chmod +x /run_hammerdb.sh
+
+COPY script/network_rps_tuning.sh /
+RUN chmod +x /network_rps_tuning.sh
+
+# run benchmark
+RUN mkfifo /export-logs
+CMD (/prepare_hammerdb.sh; /run_hammerdb.sh; echo $? > status) | tee output.log && \
+    sleep 60s && \
+    tar cf /export-logs status output.log *_tcl.log && \
+    sleep infinity
diff --git a/workload/HammerDB-TPCC/Dockerfile.2.mysql b/workload/HammerDB-TPCC/Dockerfile.2.mysql
new file mode 100755
index 0000000..312165e
--- /dev/null
+++ b/workload/HammerDB-TPCC/Dockerfile.2.mysql
@@ -0,0 +1,40 @@
+# tpcc-mysql
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG MYSQL_VER="8.0.26"
+ARG MYSQL_IMAGE=mysql
+
+FROM ${MYSQL_IMAGE}:${MYSQL_VER}
+
+### docker mysql known issue https://github.com/docker-library/mysql/issues/809
+COPY src/pub.txt /
+RUN apt-key add pub.txt
+
+RUN apt-get update \
+    && apt-mark hold mysql-common mysql-community-client mysql-community-client-core mysql-community-client-plugins mysql-community-server-core \
+    && apt-get install -y numactl procps net-tools wget perl bc \
+    && apt-get upgrade -y openssl zlib1g-dev dpkg libgmp10 gcc sasl2-bin libsasl2-dev libgnutls30 wget util-linux tar systemd libsqlite3-0 pwgen perl libpcre2-dev libpcre3-dev ncurses-dev coreutils heimdal-dev \ 
+        libvshadow-utils libunbound8 libldap-dev net-tools liblz4-dev libtasn1-6-dev libsepol1 libseccomp-dev libidn2-dev libgcrypt20 gpgv2 libc-dev-bin libcom-err2 libdb5.3 binutils-common bash libapt-pkg-dev \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/*
+
+### setup memlock as unlimited, otherwise hugepage will not take effect
+RUN echo "\* soft memlock unlimited" |tee /etc/security/limits.conf && \
+    echo "\* hard memlock unlimited" |tee /etc/security/limits.conf
+
+COPY script/prepare_common.sh /
+RUN chmod +x /prepare_common.sh
+
+COPY script/prepare_database.sh /
+RUN chmod +x /prepare_database.sh
+
+COPY script/network_rps_tuning.sh /
+RUN chmod +x /network_rps_tuning.sh
+
+# introduce mysqltunner tool
+RUN wget --no-check-certificate https://raw.githubusercontent.com/major/MySQLTuner-perl/master/mysqltuner.pl
+RUN wget --no-check-certificate https://launchpadlibrarian.net/78745738/tuning-primer.sh
+RUN chmod +x mysqltuner.pl
+RUN chmod +x tuning-primer.sh
diff --git a/workload/HammerDB-TPCC/README.md b/workload/HammerDB-TPCC/README.md
new file mode 100644
index 0000000..2c620d2
--- /dev/null
+++ b/workload/HammerDB-TPCC/README.md
@@ -0,0 +1,160 @@
+### Introduction
+
+HammerDB is the leading benchmarking and load testing software for the worlds most popular databases supporting Oracle Database, SQL Server, IBM Db2, MySQL, MariaDB and PostgreSQL.
+
+This workload uses HammerDB to measure Database(s) performance. At this moment, this benchmarks measure performance of these databases :
+
+* MySQL
+
+
+### Test Case
+Below are the list of testcase(s) for specific database(s).
+
+#### MySQL
+
+There are currently testcases that measure MySQL performance.
+
+* `test_static_hammerdb_tpcc_mysql_disk_hugepage_off_pkm` - Default Testcase
+    1. This testcase is the default testcase.
+* `test_static_hammerdb_tpcc_mysql_disk_hugepage_on_pkm` - Default Testcase
+    1. This testcase is the default testcase.
+* `test_static_hammerdb_tpcc_mysql_disk_hugepage_on_gated` - Gated Testcase
+    1. This testcase is the default testcase with less demanding requirement
+* `test_static_hammerdb_tpcc_mysql_disk_hugepage_off_gated` - Gated Testcase
+    1. This testcase is the default testcase with less demanding requirement
+* `test_static_hammerdb_tpcc_mysql_ramfs_hugepage_on_gated` - Gated Testcase
+    1. This testcase is the default testcase with less demanding requirement with ramfs
+* `test_static_hammerdb_tpcc_mysql_ramfs_hugepage_off_gated` - Gated Testcase
+    1. This testcase is the default testcase with less demanding requirement with ramfs
+* `test_static_hammerdb_tpcc_mysql_ramfs_hugepage_on_pkm` - Default Testcase + RAMFS
+    1. This testcase is the default testcase with RAMFS as storage for mysql.
+    2. Database is storage intensive. Although not practical, using RAM as storage will reduce the bottleneck introduced by storage.
+* `test_static_hammerdb_tpcc_mysql_ramfs_hugepage_off_pkm` - Default Testcase + RAMFS
+    1. This testcase is the default testcase with RAMFS as storage for mysql.
+    2. Database is storage intensive. Although not practical, using RAM as storage will reduce the bottleneck introduced by storage.
+
+### Docker Image
+
+Below are the list of Docker images for specific database(s). Due to the interaction between these containers, it is expected to run this workload using Kubernetes instead of Docker.
+
+Please make sure to read the section below for specific configuration to run those testcases.
+
+Remark: `validate.sh` are used to prepare the testcase flags. This is due to high number of configurable flags.
+
+
+Thanks to the flexibility of the framework, we have both manual and automated way to deploy/run this workload. We also have a way to configure host with pre-defined file (kernel boot param & ulimit)
+
+### Manual Deployment
+
+```
+# Deploy workload using generated testcase flags
+docker run --rm -v ${PWD}/helm:/apps:ro alpine/helm:3.7.1 template /apps --set DB_TYPE=mysql --set FS_TYPE=disk --set HUGEPAGE_STATUS=off > kubernetes-config.yaml
+kubectl apply -f kubernetes-config.yaml
+
+# Run the workload and retrieve logs
+mkdir -p logs-<REPLACE_YOUR_TESTCASE_HERE>
+pod=$(kubectl get pod --selector=job-name=benchmark -o=jsonpath="{.items[0].metadata.name}")
+kubectl exec $pod -- cat /export-logs | tar xf - -C logs-<REPLACE_YOUR_TESTCASE_HERE>
+
+# Delete workload deployment
+kubectl delete -f kubernetes-config.yaml
+```
+
+
+#### MySQL
+The docker image for this workload is:
+* `tpcc-hammerdb` - Docker image to run benchmark.
+    * `entrypoint.sh` will run specific configuration depending on the testcase
+* `tpcc-mysql` - Prebuilt image from official mysql (`mysql:8.0.26`)
+    * required for all testcase:
+        * kubernetes node with `HAS-SETUP-HUGEPAGE-2048kB-<your_hugepage_size>` label
+        * See also: [Hugepage Setup](../../doc/setup-hugepage.md)
+
+This workload should be executed on Kubernetes with specific label (nodeSelector).
+Use this command to enable those capability & label:
+
+#### Setting up Hugepages
+```
+# configure 32GB hugepages by default
+sudo grubby --update-kernel=DEFAULT --args="hugepages=16384"
+
+# configure kubernetes with HAS-SETUP-HUGEPAGE-2048kB-16384 label
+kubectl label nodes <your_node_name> HAS-SETUP-HUGEPAGE-2048kB-16384="yes"
+
+# reboot is REQUIRED
+
+# once testcase is completed, you can remove hugepages configuration & the label
+sudo grubby --update-kernel=DEFAULT --remove-args="hugepages=16384"
+kubectl label nodes <your_node_name> HAS-SETUP-HUGEPAGE-2048kB-16384-
+
+# The hugepage size depends on buffer pool size of database, by default the ratio is 0.5, for example:
+
+Database    Buffer pool size    Hugepage size         Hugepages                      Label               
+mysql               96GB                128GB           65536             HAS-SETUP-HUGEPAGE-2048kB-32768
+
+```
+
+#### Mount host disk(NVME SSD recommended) as database container storage
+By default, the host mount directory enabled with mount point `/mnt/disk1`
+Pretty much recommand to configure 4K aligment for NVME disk if supported
+
+#### check nvme disk if 4K aligned supported
+sudo nvme id-ns /dev/nvme0n1 -H |grep 4096 # possible output as follows:
+...
+LBA Format  1 : Metadata Size: 0   bytes - Data Size: 4096 bytes - Relative Performance: 0 Best (in use)
+...
+
+#### format to 4K
+sudo nvme format /dev/nvme0n1 -l <LBA> # here LBA is above value number 1, diff output with diff value
+
+#### run test case with 4K alignment
+export MYSQL_INNODB_PAGE_SIZE=4K && ctest -R <testcase>
+
+#### For disk 4K alignment refer to [Intel SSDs perform better with a 4096 Byte (4KB) alignment](https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ssd-server-storage-applications-paper.pdf )
+
+```
+# create label on mounted node
+kubectl label nodes <node_name> HAS-SETUP-DISK-MOUNT-1="yes"
+
+```
+
+### RUN on multi-node
+By default, the workload runs on multi-node
+```
+# Nice to have to disable irqbalance service on Bara Metal host machine
+systemctl disable irqbalance.service
+
+# The container will auto bind network interrupts on affnity cpu cores by sequence
+export ENABLE_IRQ_AFFINITY=true # by default the value is true
+
+ctest -R <testcase>
+
+```
+
+### KPI
+Run the [`kpi.sh`](kpi.sh) script to parse the KPIs from the validation logs. 
+
+The expected output will be similar to this. Please note that the numbers might be slightly different. 
+Primary KPI is `Peak New Orders Per Minute (orders/min)` which has a * as prefix
+
+```
+make kpi_static_hammerdb_tpcc_mysql_disk_hugepage_off_pkm
+New Orders Per Minute xxx (orders/min): xxxxx
+Transactions Per Minute xxx (trans/min): xxxxx
+Peak Num of Virtual Users: xxx
+*Peak New Orders Per Minute (orders/min): xxxxx
+Peak Transactions Per Minute (trans/min): xxxxx
+```
+
+### Index Info
+- Name: `HammerDB-TPCC`  
+- Category: `DataServices`  
+- Platform: `ICX`  
+- Keywords: `MYSQL`  
+- Permission:   
+
+### See Also
+
+- [`HammerDB Official Website`](https://www.hammerdb.com/)
+
+
diff --git a/workload/HammerDB-TPCC/build.sh b/workload/HammerDB-TPCC/build.sh
new file mode 100755
index 0000000..b1420a2
--- /dev/null
+++ b/workload/HammerDB-TPCC/build.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -e
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+. $DIR/../../script/build.sh
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/cluster-config.yaml.m4 b/workload/HammerDB-TPCC/cluster-config.yaml.m4
new file mode 100755
index 0000000..a54defe
--- /dev/null
+++ b/workload/HammerDB-TPCC/cluster-config.yaml.m4
@@ -0,0 +1,20 @@
+include(config.m4)
+
+define(`SERVER_LABEL_POLICY', `
+ifelse(DB_HUGEPAGE_STATUS,on,`- labels:',ENABLE_MOUNT_DIR,true,`- labels:',`- labels: {}')
+ifelse(DB_HUGEPAGE_STATUS,on,`dnl
+    HAS-SETUP-HUGEPAGE-2048kB-DB_HUGEPAGES: required
+')
+ifelse(ENABLE_MOUNT_DIR,true,`dnl
+    HAS-SETUP-DISK-MOUNT-1: required
+')
+')dnl
+
+cluster:
+ifelse(RUN_SINGLE_NODE,true,`dnl
+SERVER_LABEL_POLICY
+',`dnl
+- labels: {}
+SERVER_LABEL_POLICY
+')
+
diff --git a/workload/HammerDB-TPCC/helm/Chart.yaml b/workload/HammerDB-TPCC/helm/Chart.yaml
new file mode 100644
index 0000000..7acfbe3
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/Chart.yaml
@@ -0,0 +1,12 @@
+apiVersion: v2
+name: HammerDB-TPCC
+type: application
+version: "1.0"
+
+#################################################
+#                                               #
+#  #############              ################  #
+#  # HammberDB #  --tcl-->    # MySQL Server #  #
+#  #############              ################  #
+#                                               #
+#################################################
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/helm/templates/_helpers.tpl b/workload/HammerDB-TPCC/helm/templates/_helpers.tpl
new file mode 100644
index 0000000..069e846
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/templates/_helpers.tpl
@@ -0,0 +1,43 @@
+{{/*
+Expand to the image pull policy.
+*/}}
+{{- define "imagepolicy" }}
+{{- if ne .REGISTRY "" }}
+{{- "Always" }}
+{{- else }}
+{{- "IfNotPresent" }}
+{{- end }}
+{{- end }}
+
+{{/*
+Expand to nodeAffinity
+*/}}
+{{- define "nodeAffinity" }}
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+            - matchExpressions:
+              - key: {{ .key }}
+                operator: {{ .operator }}
+                values:
+                - "{{ .value }}"
+{{- end }}
+
+{{/*
+Expand to podAffinity
+*/}}
+{{- define "podAffinity" }}
+      affinity:
+        podAffinity:
+          preferredDuringSchedulingIgnoredDuringExecution:
+          - weight: 20
+            podAffinityTerm:
+              labelSelector:
+                matchExpressions:
+                - key: {{ .key }}
+                  operator: {{ .operator }}
+                  values:
+                  - "{{ .value }}"
+              topologyKey: "kubernetes.io/hostname"
+{{- end }}
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/helm/templates/benchmark.yaml b/workload/HammerDB-TPCC/helm/templates/benchmark.yaml
new file mode 100644
index 0000000..a6730d6
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/templates/benchmark.yaml
@@ -0,0 +1,107 @@
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: benchmark
+spec:
+  template:
+    metadata:
+      labels:
+        deployPolicy: standalone
+    spec:
+{{- if not .Values.RUN_SINGLE_NODE }}
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+{{- end }}
+      initContainers:
+      - name: wait-for-database-service
+        image: busybox:1.28
+        command: ["sh", "-c", "until nc -z -w5 {{ .Values.DB_SERVICE }} {{ .Values.DB_PORT }}; do echo waiting for database service; sleep 2; done"]
+      containers:
+      - name: benchmark
+        image: {{ .Values.REGISTRY }}tpcc-hammerdb{{ .Values.RELEASE }}
+        volumeMounts:
+          - mountPath: /dev/shm
+            name: shm-volume
+        imagePullPolicy: {{ include "imagepolicy" .Values }}
+        securityContext:
+          privileged: true
+        env:
+          - name: DEBUG
+            value: "{{ .Values.DEBUG }}"
+          - name: DB_TYPE
+            value: "{{ .Values.DB_TYPE }}"
+          - name: TPCC_NUM_WAREHOUSES
+            value: "{{ .Values.TPCC_NUM_WAREHOUSES }}"
+          - name: TPCC_THREADS_BUILD_SCHEMA
+            value: "{{ .Values.TPCC_THREADS_BUILD_SCHEMA }}"
+          - name: TPCC_HAMMER_NUM_VIRTUAL_USERS
+            value: "{{ .Values.TPCC_HAMMER_NUM_VIRTUAL_USERS }}"
+          - name: TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM
+            value: "{{ .Values.TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM }}"
+          - name: TPCC_VUSERS_STEPS
+            value: "{{ .Values.TPCC_VUSERS_STEPS }}"
+          - name: TPCC_TCL_SCRIPT_PATH
+            value: "{{ .Values.TPCC_TCL_SCRIPT_PATH }}"
+          - name: TPCC_MINUTES_OF_RAMPUP
+            value: "{{ .Values.TPCC_MINUTES_OF_RAMPUP }}"
+          - name: TPCC_MINUTES_OF_DURATION
+            value: "{{ .Values.TPCC_MINUTES_OF_DURATION }}"
+          - name: TPCC_TOTAL_ITERATIONS
+            value: "{{ .Values.TPCC_TOTAL_ITERATIONS }}"
+          - name: TPCC_RUNTIMER_SECONDS
+            value: "{{ .Values.TPCC_RUNTIMER_SECONDS }}"
+          - name: TPCC_WAIT_COMPLETE_MILLSECONDS
+            value: "{{ .Values.TPCC_WAIT_COMPLETE_MILLSECONDS }}"
+          - name: TPCC_INIT_MAX_WAIT_SECONDS
+            value: "{{ .Values.TPCC_INIT_MAX_WAIT_SECONDS }}"
+          - name: TPCC_ASYNC_SCALE
+            value: "{{ .Values.TPCC_ASYNC_SCALE }}"
+          - name: TPCC_CONNECT_POOL
+            value: "{{ .Values.TPCC_CONNECT_POOL }}"
+          - name: DB_HOST
+            value: "{{ .Values.DB_SERVICE }}"
+          - name: DB_PORT
+            value: "{{ .Values.DB_PORT }}"
+          - name: RUN_SINGLE_NODE
+            value: "{{ .Values.RUN_SINGLE_NODE }}"
+          - name: ENABLE_SOCKET_BIND
+            value: "{{ .Values.ENABLE_SOCKET_BIND }}"
+          - name: SOCKET_BIND_NODE
+            value: "{{ .Values.SOCKET_BIND_NODE }}"
+{{- if .Values.RUN_SINGLE_NODE}}
+          - name: SERVER_CORE_NEEDED_FACTOR
+            value: "{{ .Values.SERVER_CORE_NEEDED_FACTOR }}"
+{{- end }}
+          - name: ENABLE_RPSRFS_AFFINITY
+            value: "{{ .Values.ENABLE_RPSRFS_AFFINITY }}"
+          - name: RPS_SOCK_FLOW_ENTRIES
+            value: "{{ .Values.RPS_SOCK_FLOW_ENTRIES }}"
+          - name: ENABLE_IRQ_AFFINITY
+            value: "{{ .Values.ENABLE_IRQ_AFFINITY }}"
+          - name: EXCLUDE_IRQ_CORES
+            value: "{{ .Values.EXCLUDE_IRQ_CORES }}"
+          - name: NODE_IP
+            valueFrom:
+              fieldRef:
+                fieldPath: status.hostIP
+{{- if eq .Values.DB_TYPE "mysql" }}
+          - name: MYSQL_USER
+            value: "{{ .Values.MYSQL_USER }}"
+          - name: MYSQL_ROOT_PASSWORD
+            value: "{{ .Values.MYSQL_ROOT_PASSWORD }}"
+          - name: MYSQL_STORAGE_ENGINE
+            value: "{{ .Values.MYSQL_STORAGE_ENGINE }}"
+{{- end }}
+      volumes:
+      - name: shm-volume
+        emptyDir:
+          medium: Memory
+      restartPolicy: Never
+{{- if not .Values.RUN_SINGLE_NODE }}
+      {{- $args := dict "key" "HAS-SETUP-DISK-MOUNT-1" "operator" "NotIn" "value" "yes" }}
+      {{- include "nodeAffinity" $args }}
+{{- else }}
+      {{- $args := dict "key" "app" "operator" "In" "value" "database" }}
+      {{- include "podAffinity" $args }}
+{{- end }}
diff --git a/workload/HammerDB-TPCC/helm/templates/conf/mysql.cnf.tpl b/workload/HammerDB-TPCC/helm/templates/conf/mysql.cnf.tpl
new file mode 100644
index 0000000..035c109
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/templates/conf/mysql.cnf.tpl
@@ -0,0 +1,82 @@
+{{/*
+Expand to data
+*/}}
+{{- define "configMapOfMysql" }}
+[mysqld]
+default_authentication_plugin={{ .Values.MYSQL_DEFAULT_AUTHENTICATION_PLUGIN }} # mysql_native_password
+# general
+max_connections={{ .Values.MYSQL_MAX_CONNECTIONS }} # 4000
+table_open_cache={{ .Values.MYSQL_TABLE_OPEN_CACHE }} # 8000
+table_open_cache_instances={{ .Values.MYSQL_TABLE_OPEN_CACHE_INSTANCES }} # 16
+back_log={{ .Values.MYSQL_BACK_LOG }} # 1500
+default_password_lifetime={{ .Values.MYSQL_DEFAULT_PASSWORD_LIFETIME }} # 0
+ssl={{ .Values.MYSQL_SSL }} # 0
+performance_schema={{ .Values.MYSQL_PERFORMANCE_SCHEMA }} # OFF
+max_prepared_stmt_count={{ .Values.MYSQL_MAX_PREPARED_STMT_COUNT }} # 128000
+skip_log_bin={{ .Values.MYSQL_SKIP_LOG_BIN }} # 1
+character_set_server={{ .Values.MYSQL_CHARACTER_SET_SERVER }} # latin1
+collation_server={{ .Values.MYSQL_COLLATION_SERVER }} # latin1_swedish_ci
+transaction_isolation={{ .Values.MYSQL_TRANSACTION_ISOLATION }} # REPEATABLE-READ
+# files
+innodb_file_per_table={{ .Values.MYSQL_INNODB_FILE_PER_TABLE }} # ON
+innodb_log_file_size={{ .Values.MYSQL_INNODB_LOG_FILE_SIZE }} # 1024M
+innodb_log_files_in_group={{ .Values.MYSQL_INNODB_LOG_FILES_IN_GROUP }} # 32G scale up per 100 warehouse ~ 4G
+innodb_open_files={{ .Values.MYSQL_INNODB_OPEN_FILES }} # 4000
+# buffers
+innodb_buffer_pool_size={{ .Values.MYSQL_INNODB_BUFFER_POOL_SIZE }} # 96G scale up per 100 warehouse ~ 12G
+innodb_buffer_pool_instances={{ .Values.MYSQL_INNODB_BUFFER_POOL_INSTANCES }} # 16
+innodb_log_buffer_size={{ .Values.MYSQL_INNODB_LOG_BUFFER_SIZE }} # 64M
+# tune
+innodb_doublewrite={{ .Values.MYSQL_INNODB_DOUBLEWRITE }} # 0
+innodb_thread_concurrency={{ .Values.MYSQL_INNODB_THREAD_CONCURRENCY }} # 0
+innodb_flush_log_at_trx_commit={{ .Values.MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT }} # 0
+innodb_max_dirty_pages_pct={{ .Values.MYSQL_INNODB_MAX_DIRTY_PAGES_PCT }} # 90
+innodb_max_dirty_pages_pct_lwm={{ .Values.MYSQL_INNODB_MAX_DIRTY_PAGES_PCT_LWM }} # 10
+join_buffer_size={{ .Values.MYSQL_JOIN_BUFFER_SIZE }} # 32K
+sort_buffer_size={{ .Values.MYSQL_SORT_BUFFER_SIZE }} # 32K
+innodb_use_native_aio={{ .Values.MYSQL_INNODB_USE_NATIVE_AIO }} # 1
+innodb_stats_persistent={{ .Values.MYSQL_INNODB_STATS_PERSISTENT }} # 1
+innodb_spin_wait_delay={{ .Values.MYSQL_INNODB_SPIN_WAIT_DELAY }} # 6
+innodb_max_purge_lag_delay={{ .Values.MYSQL_INNODB_MAX_PURGE_LAG_DELAY }} # 300000
+innodb_max_purge_lag={{ .Values.MYSQL_INNODB_MAX_PURGE_LAG }} # 0
+innodb_checksum_algorithm={{ .Values.MYSQL_INNODB_CHECKSUM_ALGORITHM }} # none
+innodb_io_capacity={{ .Values.MYSQL_INNODB_IO_CAPACITY }} # 4000
+innodb_io_capacity_max={{ .Values.MYSQL_INNODB_IO_CAPACITY_MAX }} # 20000
+innodb_lru_scan_depth={{ .Values.MYSQL_INNODB_LRU_SCAN_DEPTH }} # 9000
+innodb_change_buffering={{ .Values.MYSQL_INNODB_CHANGE_BUFFERING }} # none
+innodb_read_only={{ .Values.MYSQL_INNODB_READ_ONLY }} # 0
+innodb_page_cleaners={{ .Values.MYSQL_INNODB_PAGE_CLEANERS }} # 4
+innodb_undo_log_truncate={{ .Values.MYSQL_INNODB_UNDO_LOG_TRUNCATE }} # off
+# perf special
+innodb_adaptive_flushing={{ .Values.MYSQL_INNODB_ADAPTIVE_FLUSHING }} # 1
+innodb_flush_neighbors={{ .Values.MYSQL_INNODB_FLUSH_NEIGHBORS }} # 0
+innodb_read_io_threads={{ .Values.MYSQL_INNODB_READ_IO_THREADS }} # 16
+innodb_write_io_threads={{ .Values.MYSQL_INNODB_WRITE_IO_THREADS }} # 16
+innodb_purge_threads={{ .Values.MYSQL_INNODB_PURGE_THREADS }} # 4
+innodb_adaptive_hash_index={{ .Values.MYSQL_INNODB_ADAPTIVE_HASH_INDEX }} # 0
+
+### WSF optimized
+# reduce spin lock wait, refer to https://dev.mysql.com/doc/refman/8.0/en/innodb-performance-spin_lock_polling.html
+innodb_spin_wait_pause_multiplier={{ .Values.MYSQL_INNODB_SPIN_WAIT_PAUSE_MULTIPLIER }} # 50 -> 5
+innodb_sync_spin_loops={{ .Values.MYSQL_INNODB_SYNC_SPIN_LOOPS }} # 30 -> 15
+# Intel SSDs perform better with a 4096 Byte (4KB) alignment, refer to https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ssd-server-storage-applications-paper.pdf
+innodb_page_size={{ .Values.MYSQL_INNODB_PAGE_SIZE }} # 16K - > 4K
+
+# mysqltuner.pl recommendations
+thread_cache_size={{ .Values.MYSQL_THREAD_CACHE_SIZE }}
+
+###special configuration
+{{- if ne .Values.DB_FS_TYPE "ramfs" }}
+innodb_flush_method=O_DIRECT_NO_FSYNC
+{{- end }}
+
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+large-pages
+{{- end }}
+
+{{- if .Values.DEBUG }}
+log-error={{ .Values.MYSQL_LOG_DIR }}/{{ .Values.MYSQL_ERROR_LOG }}
+{{- end }}
+###
+
+{{- end }}
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/helm/templates/database.yaml b/workload/HammerDB-TPCC/helm/templates/database.yaml
new file mode 100644
index 0000000..3ba1f55
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/templates/database.yaml
@@ -0,0 +1,171 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.DB_SERVICE }}
+  labels:
+    app: database
+spec:
+  ports:
+  - port: {{ .Values.DB_PORT }}
+    protocol: TCP
+  selector:
+    app: database
+
+---
+
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: db-config
+  labels:
+    app: db-config
+data:
+  {{ .Values.DB_TYPE }}.conf: |
+{{- if eq .Values.DB_TYPE "mysql" }}
+{{- include "configMapOfMysql" . |indent 4 }}
+{{- end }}
+
+---
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ .Values.DB_TYPE }}
+  labels:
+    app: database
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: database
+  template:
+    metadata:
+      labels:
+        app: database
+        deployPolicy: standalone
+    spec:
+{{- if not .Values.RUN_SINGLE_NODE }}
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+{{- end }}
+      containers:
+      - name: database
+        image: {{ .Values.REGISTRY }}tpcc-{{ .Values.DB_TYPE }}{{ .Values.RELEASE }}
+        imagePullPolicy: {{ include "imagepolicy" .Values }}
+        lifecycle:
+          preStop:
+            exec:
+              command: [ "sh", "-c", "rm -rf {{ .Values.MYSQL_MOUNT_PATH }}/*" ]
+        securityContext:
+          privileged: true
+{{- if eq .Values.DB_TYPE "mysql" }}
+        command: [ "sh", "-c", "/prepare_database.sh; /usr/local/bin/docker-entrypoint.sh mysqld" ]
+{{- end }}
+        ports:
+        - containerPort: {{ .Values.DB_PORT }}
+        env: 
+        - name: DEBUG
+          value: "{{ .Values.DEBUG }}"
+        - name: DB_TYPE
+          value: "{{ .Values.DB_TYPE }}"
+        - name: ENABLE_MOUNT_DIR
+          value: "{{ .Values.ENABLE_MOUNT_DIR }}"
+        - name: DB_MOUNT_DIR
+          value: {{ if eq .Values.DB_TYPE "mysql" }}"{{ .Values.MYSQL_MOUNT_PATH }}" {{ end }}
+        - name: RUN_SINGLE_NODE
+          value: "{{ .Values.RUN_SINGLE_NODE }}"
+        - name: ENABLE_SOCKET_BIND
+          value: "{{ .Values.ENABLE_SOCKET_BIND }}"
+        - name: SOCKET_BIND_NODE
+          value: "{{ .Values.SOCKET_BIND_NODE }}"
+{{- if .Values.RUN_SINGLE_NODE}}
+        - name: SERVER_CORE_NEEDED_FACTOR
+          value: "{{ .Values.SERVER_CORE_NEEDED_FACTOR }}"
+{{- end }}
+        - name: ENABLE_RPSRFS_AFFINITY
+          value: "{{ .Values.ENABLE_RPSRFS_AFFINITY }}"
+        - name: RPS_SOCK_FLOW_ENTRIES
+          value: "{{ .Values.RPS_SOCK_FLOW_ENTRIES }}"
+        - name: ENABLE_IRQ_AFFINITY
+          value: "{{ .Values.ENABLE_IRQ_AFFINITY }}"
+        - name: EXCLUDE_IRQ_CORES
+          value: "{{ .Values.EXCLUDE_IRQ_CORES }}"
+        - name: NODE_IP
+          valueFrom:
+            fieldRef:
+              fieldPath: status.hostIP
+{{- if eq .Values.DB_TYPE "mysql" }}
+        - name: MYSQL_ROOT_PASSWORD
+          value: "{{ .Values.MYSQL_ROOT_PASSWORD }}"
+        - name: MYSQL_LOG_DIR
+          value: "{{ .Values.MYSQL_LOG_DIR }}"
+{{- end }}
+        volumeMounts:
+        - mountPath: /etc/localtime
+          name: timezone
+          readOnly: true
+        - mountPath: {{ if eq .Values.DB_TYPE "mysql" }}{{ .Values.MYSQL_CONFIG_PATH }}{{ end }}
+          name: db-config-volume
+          subPath: {{ .Values.DB_TYPE }}.conf
+          readOnly: false
+        - mountPath: /dev/shm
+          name: shm-volume
+{{- if eq .Values.DB_FS_TYPE "ramfs" }}
+        - mountPath: {{ if eq .Values.DB_TYPE "mysql" }}{{ .Values.MYSQL_MOUNT_PATH }}{{ end }}
+          name: ramfs
+{{- else if .Values.ENABLE_MOUNT_DIR }}
+        - mountPath: {{ if eq .Values.DB_TYPE "mysql" }}{{ .Values.MYSQL_MOUNT_PATH }}{{ end }}
+          name: hostfs
+{{- end }}
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+        - mountPath: /hugepages-2Mi
+          name: hugepage-2mi
+{{- end }}
+        resources:
+          limits:
+            cpu: {{ .Values.DB_CPU_LIMIT }}
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+            hugepages-2Mi: {{ .Values.DB_HUGEPAGES_2MI }}
+{{- end }}
+          requests:
+            cpu: {{ .Values.DB_CPU_REQUEST }}
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+            hugepages-2Mi: {{ .Values.DB_HUGEPAGES_2MI }}
+{{- end }}
+      volumes:
+      - name: timezone
+        hostPath:
+          path: /etc/localtime
+          type: File
+      - name: db-config-volume
+        configMap:
+          name: db-config
+      - name: shm-volume
+        emptyDir:
+          medium: Memory
+{{- if eq .Values.DB_FS_TYPE "ramfs" }}
+      - name: ramfs
+        emptyDir:
+          medium: Memory
+{{- else if .Values.ENABLE_MOUNT_DIR }}
+      - name: hostfs
+        hostPath:
+          path: {{ .Values.MOUNT_DIR }}
+          type: Directory
+{{- end }}
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+      - name: hugepage-2mi
+        emptyDir:
+          medium: HugePages-2Mi
+{{- end }}
+      nodeSelector:
+{{- if eq .Values.DB_HUGEPAGE_STATUS "on" }}
+        HAS-SETUP-HUGEPAGE-2048kB-{{ .Values.DB_HUGEPAGES }}: "yes"
+{{- end }}
+{{- if .Values.ENABLE_MOUNT_DIR }}
+        HAS-SETUP-DISK-MOUNT-1: "yes"
+{{- end }}
+{{- if not .Values.RUN_SINGLE_NODE }}
+      {{- $args := dict "key" "HAS-SETUP-DISK-MOUNT-1" "operator" "In" "value" "yes" }}
+      {{- include "nodeAffinity" $args }}
+{{- end }}
diff --git a/workload/HammerDB-TPCC/helm/values.yaml b/workload/HammerDB-TPCC/helm/values.yaml
new file mode 100644
index 0000000..88b102e
--- /dev/null
+++ b/workload/HammerDB-TPCC/helm/values.yaml
@@ -0,0 +1,120 @@
+
+### Gloabl Configuration ###
+REGISTRY: ""
+RELEASE: ""
+PLATFORM: ""
+BACKEND: ""
+
+### Workload Configuration ###
+WORKLOAD: ""
+TESTCASE: ""
+
+DEBUG: false
+DB_TYPE: "mysql"
+DB_PORT: "3306"
+DB_FS_TYPE: "disk"
+DB_HUGEPAGE_STATUS: "off"
+DB_DATASIZE_OF_WAREHOUSE_RATIO: "0.1"
+DB_BUFFERSIZE_OF_DATASIZE_RATIO: "0.75"
+DB_HUGEPAGES_2MI: "1Gi"
+DB_HUGEPAGES: "512"
+
+DB_CPU_LIMIT: "8"
+DB_CPU_REQUEST: "1"
+### Database Configuration ###
+DB_SERVICE: "db-service"
+
+###### MySQL Configuration ######
+MYSQL_USER: "root"
+MYSQL_ROOT_PASSWORD: "Mysql@123"
+MYSQL_STORAGE_ENGINE: "innodb"
+MYSQL_CONFIG_PATH: "/etc/mysql/conf.d/mysqld.cnf"
+MYSQL_MOUNT_PATH: "/var/lib/mysql"
+MYSQL_LOG_DIR: "/var/log/mysql"
+MYSQL_ERROR_LOG: "error.log"
+
+### Mysql exposed parameters
+MYSQL_DEFAULT_AUTHENTICATION_PLUGIN: "mysql_native_password"
+# general
+MYSQL_MAX_CONNECTIONS: "4000"
+MYSQL_TABLE_OPEN_CACHE: "8000"
+MYSQL_TABLE_OPEN_CACHE_INSTANCES: "16"
+MYSQL_BACK_LOG: "1500"
+MYSQL_DEFAULT_PASSWORD_LIFETIME: "0"
+MYSQL_SSL: "0"
+MYSQL_PERFORMANCE_SCHEMA: "OFF"
+MYSQL_MAX_PREPARED_STMT_COUNT: "128000"
+MYSQL_SKIP_LOG_BIN: "1"
+MYSQL_CHARACTER_SET_SERVER: "latin1"
+MYSQL_COLLATION_SERVER: "latin1_swedish_ci"
+MYSQL_TRANSACTION_ISOLATION: "REPEATABLE-READ"
+# files
+MYSQL_INNODB_FILE_PER_TABLE: "ON"
+MYSQL_INNODB_LOG_FILE_SIZE: "1024M"
+MYSQL_INNODB_LOG_FILES_IN_GROUP: "32"
+MYSQL_INNODB_OPEN_FILES: "4000"
+# buffers
+MYSQL_INNODB_BUFFER_POOL_SIZE: "96G"
+MYSQL_INNODB_BUFFER_POOL_INSTANCES: "16"
+MYSQL_INNODB_LOG_BUFFER_SIZE: "64M"
+# tune
+MYSQL_INNODB_DOUBLEWRITE: "0"
+MYSQL_INNODB_THREAD_CONCURRENCY: "0"
+MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT: "0"
+MYSQL_INNODB_MAX_DIRTY_PAGES_PCT: "90"
+MYSQL_INNODB_MAX_DIRTY_PAGES_PCT_LWM: "10"
+MYSQL_JOIN_BUFFER_SIZE: "32K"
+MYSQL_SORT_BUFFER_SIZE: "32K"
+MYSQL_INNODB_USE_NATIVE_AIO: "1"
+MYSQL_INNODB_SPIN_WAIT_DELAY: "6"
+MYSQL_INNODB_STATS_PERSISTENT: "1"
+MYSQL_INNODB_MAX_PURGE_LAG_DELAY: "300000"
+MYSQL_INNODB_MAX_PURGE_LAG: "0"
+MYSQL_INNODB_CHECKSUM_ALGORITHM: "none"
+MYSQL_INNODB_IO_CAPACITY: "16000"
+MYSQL_INNODB_IO_CAPACITY_MAX: "32000"
+MYSQL_INNODB_LRU_SCAN_DEPTH: "9000"
+MYSQL_INNODB_CHANGE_BUFFERING: "none"
+MYSQL_INNODB_READ_ONLY: "0"
+MYSQL_INNODB_PAGE_CLEANERS: "4"
+MYSQL_INNODB_UNDO_LOG_TRUNCATE: "off"
+MYSQL_INNODB_ADAPTIVE_FLUSHING: "1"
+MYSQL_INNODB_FLUSH_NEIGHBORS: "0"
+MYSQL_INNODB_READ_IO_THREADS: "16"
+MYSQL_INNODB_WRITE_IO_THREADS: "16"
+MYSQL_INNODB_PURGE_THREADS: "4"
+MYSQL_INNODB_ADAPTIVE_HASH_INDEX: "0"
+MYSQL_THREAD_CACHE_SIZE: "300"
+
+### WSF optimized
+MYSQL_INNODB_SPIN_WAIT_PAUSE_MULTIPLIER: "5"
+MYSQL_INNODB_SYNC_SPIN_LOOPS: "15"
+MYSQL_INNODB_PAGE_SIZE: "16K"
+
+###### HammerDB Configuration ######
+TPCC_NUM_WAREHOUSES: "800"
+TPCC_THREADS_BUILD_SCHEMA: "128"
+TPCC_HAMMER_NUM_VIRTUAL_USERS: ""
+TPCC_MINUTES_OF_RAMPUP: "2"
+TPCC_RUNTIMER_SECONDS: "600"
+TPCC_MINUTES_OF_DURATION: "5"
+TPCC_TOTAL_ITERATIONS: "10000000"
+TPCC_INIT_MAX_WAIT_SECONDS: "30"
+TPCC_WAIT_COMPLETE_MILLSECONDS: "5000"
+TPCC_TCL_SCRIPT_PATH: "/tcls"
+TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM: "fixed"
+TPCC_VUSERS_STEPS: "8"
+TPCC_ASYNC_SCALE: false
+TPCC_CONNECT_POOL: false
+
+### External parameters
+RUN_SINGLE_NODE: false
+ENABLE_SOCKET_BIND: true
+SOCKET_BIND_NODE: 0
+ENABLE_MOUNT_DIR: true
+MOUNT_DIR: "/mnt/disk1"
+SERVER_CORE_NEEDED_FACTOR: "0.9"
+ENABLE_RPSRFS_AFFINITY: true
+RPS_SOCK_FLOW_ENTRIES: 32768
+ENABLE_IRQ_AFFINITY: true
+EXCLUDE_IRQ_CORES: false
diff --git a/workload/HammerDB-TPCC/kpi.sh b/workload/HammerDB-TPCC/kpi.sh
new file mode 100755
index 0000000..8dffdab
--- /dev/null
+++ b/workload/HammerDB-TPCC/kpi.sh
@@ -0,0 +1,42 @@
+#!/bin/bash -e
+
+awk '
+BEGIN {
+    i = 0
+    vu_list[i]=0
+    nopm_list[i]=0
+    tpm_list[i]=0
+    max_nopm=0
+    max_nopm_tpm=0
+    max_nopm_vu=0
+}
+/Active Virtual Users configured/{ 
+    split($0, word," "); 
+    split(word[2],result,":");
+    vu_list[i] = result[2]
+    }
+/NOPM/{  
+    split($0, word," "); 
+    nopm_list[i] = word[7]
+    }
+/TPM/{ 
+    split($0, word," "); 
+    tpm_list[i] = word[10]
+    }
+/vudestroy success/{i += 1}
+END {
+    for (counter = 0; counter < i; counter++) {
+        print "New Orders Per Minute VU" vu_list[counter] " (orders/min): " nopm_list[counter]
+        print "Transactions Per Minute VU" vu_list[counter] " (trans/min): " tpm_list[counter]
+        if (nopm_list[counter] > max_nopm){
+            max_nopm=nopm_list[counter]
+            max_nopm_tpm=tpm_list[counter]
+            max_nopm_vu=vu_list[counter]
+        }
+    }
+    primary="*"
+    print "Peak Num of Virtual Users: " max_nopm_vu
+    print primary "Peak New Orders Per Minute (orders/min): " max_nopm
+    print "Peak Transactions Per Minute (trans/min): " max_nopm_tpm
+}
+' */output*.log 2>/dev/null || true
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/script/common.sh b/workload/HammerDB-TPCC/script/common.sh
new file mode 100755
index 0000000..fdb6eea
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/common.sh
@@ -0,0 +1,213 @@
+#!/bin/bash -e
+
+function concat_params() {
+    RET=""
+    for i in "$@"; do
+        if [[ "$RET" == "" ]]; then
+            RET="$i"
+        else
+            RET="${RET} $i"
+        fi
+    done
+    echo "$RET"
+} 
+
+function workload_settings() {
+    RET=""
+    for i in "$@"; do
+        LOWER=$(echo "$i" | tr '[:upper:]' '[:lower:]')
+        if [[ "$RET" == "" ]]; then
+            RET="${LOWER}:$(eval echo \$$i)"
+        else
+            RET="${RET};${LOWER}:$(eval echo \$$i)"
+        fi
+    done
+    echo "$RET"
+}
+
+function k8s_settings() {
+    RET=""
+    for i in "$@"; do
+        if [[ "$RET" == "" ]]; then
+            RET="-D$i=\$$i"
+        else
+            RET="${RET} -D$i=\$$i"
+        fi
+    done
+    echo "$RET"
+}
+
+## return a virtual user list concated by '_' 
+## caculated based on cpu cores
+function get_baseline_vuser_list() {
+    num_cpus=${TPCC_THREADS_BUILD_SCHEMA:-128}
+    num_logical_cores="$num_cpus"
+    maxRange="$(echo "1.5 * $num_logical_cores" |bc |awk '{print int($0)}')"
+    minRange=1
+    hammer_users_list=()
+    hammer_users_list+=("$num_cpus")
+    loop=0
+    step=-6
+    step_adjust=1
+    while [[ $loop -lt 2 ]]; do
+        index=0
+        while [[ "$index" -lt 4 ]]; do
+            if [[ "$index" -eq 2 ]]; then
+                (( step = (step + step_adjust) * 2 ))
+            fi
+            (( num_logical_cores += step ))
+            if [[ "$num_logical_cores" -ge "$minRange" && "$num_logical_cores" -le "$maxRange" ]]; then
+                hammer_users_list+=("$num_logical_cores")
+            fi
+            (( index += 1 ))
+        done
+            (( loop += 1 ))
+            step=6
+            step_adjust=-1
+            (( num_logical_cores=num_cpus ))
+    done
+
+    if [[ ! " ${hammer_users_list[*]} " =~ ${maxRange} ]]; then
+        hammer_users_list+=("$maxRange")
+    fi
+    sorted_list=$(echo "${hammer_users_list[@]}" | xargs -n1 | sort -n | xargs)
+    echo "${sorted_list// /_}"
+}
+
+## return a fixed vuser list with fixed steps TPCC_VUSERS_STEPS(by default 8)
+## example: 
+## TPCC_THREADS_BUILD_SCHEMA=128
+## TPCC_VUSERS_FLOAT_FACTOR=0.1
+## TPCC_VUSERS_STEPS=4
+## return list: 112_116_120_124_128_132_136_140_144
+function get_fixed_vuser_list() {
+    TPCC_THREADS_BUILD_SCHEMA=${TPCC_THREADS_BUILD_SCHEMA:-128}
+    TPCC_VUSERS_STEPS=${TPCC_VUSERS_STEPS:-4}
+    TPCC_VUSERS_FLOAT_FACTOR=${TPCC_VUSERS_FLOAT_FACTOR:-0.1}
+    LOW=$(echo "$TPCC_THREADS_BUILD_SCHEMA $TPCC_VUSERS_FLOAT_FACTOR" |awk '{print int($1 - $1 * $2)}')
+    HIGH=$(echo "$TPCC_THREADS_BUILD_SCHEMA $TPCC_VUSERS_FLOAT_FACTOR" |awk '{print int($1 + $1 * $2)}')
+    MID=$TPCC_THREADS_BUILD_SCHEMA
+    results=()
+    results+=($MID)
+    temp=$MID
+    while [[ true ]]
+    do
+        temp=$((temp - TPCC_VUSERS_STEPS))
+        results+=($temp)
+        if [[ "$temp" -lt "$LOW" ]]; then
+            break
+        fi
+    done
+    sorted_results=$(echo "${results[@]}" | xargs -n1 | sort -n | xargs)
+    temp=$MID
+    while [[ true ]]
+    do
+        temp=$((temp + TPCC_VUSERS_STEPS))
+        sorted_results+=($temp)
+        if [[ "$temp" -gt "$HIGH" ]]; then
+            break
+        fi
+    done
+    echo "${sorted_results[@]}"|tr ' ' '_'
+}
+
+## return a binary search virtual user list between start and end
+## example: 
+## TPCC_THREADS_BUILD_SCHEMA: 128
+## TPCC_HAMMER_NUM_VIRTUAL_USERS: 115_121_124_126_128_134_137_138_139_140_140
+function get_binarysearch_vuser_list() {
+    TPCC_THREADS_BUILD_SCHEMA=${TPCC_THREADS_BUILD_SCHEMA:-128}
+    TPCC_VUSERS_FLOAT_FACTOR=${TPCC_VUSERS_FLOAT_FACTOR:-0.1}
+    LOW=$(echo "$TPCC_THREADS_BUILD_SCHEMA $TPCC_VUSERS_FLOAT_FACTOR" |awk '{print int($1 - $1 * $2)}')
+    HIGH=$(echo "$TPCC_THREADS_BUILD_SCHEMA $TPCC_VUSERS_FLOAT_FACTOR" |awk '{print int($1 + $1 * $2)}')
+    results=()
+    function binary_list(){
+        low=$1
+        high=$2
+        mid=$(echo "$low $high"|awk '{printf("%.f\n", ($1+$2) / 2)}') # ceil
+        if [[ $low -eq $mid ]]; then
+            return
+        fi
+        results+=($mid)
+        binary_list $mid $high
+    }
+    MID=$(echo "$LOW $HIGH"|awk '{printf("%.f\n", ($1+$2) / 2)}') # ceil
+    results+=($LOW)
+    binary_list $LOW $(( MID - 1 ))
+    results+=($MID)
+    binary_list $(( MID + 1 )) $HIGH
+    results+=($HIGH)
+
+    echo "${results[@]}"|tr ' ' '_' # format concat with "_"
+}
+
+## return a binary search virtual user list between start and end
+## which will skip elements which abs(list[i+...k] - list[i]) < TPCC_VUSERS_STEPS
+## example: 
+## TPCC_THREADS_BUILD_SCHEMA=128
+## TPCC_VUSERS_STEPS=4
+## return list: 112_116_120_124_128_132_136_140_144
+function get_advanced_binarysearch_vuser_list() {
+    TPCC_VUSERS_STEPS=${TPCC_VUSERS_STEPS:-4}
+    BINARY_VUSER_LIST=$(get_binarysearch_vuser_list)
+    results=()
+    for i in $(echo "$BINARY_VUSER_LIST" |tr '_' ' ')
+    do 
+        results+=($i)
+    done
+
+    ### begin to skip elements which difference less than steps
+    HEAD=${results[0]}
+    final_results+=($HEAD) # add the first element
+    LEN=${#results[@]}
+    TAIL_POS=$((LEN - 1))
+    for((i=0;i< TAIL_POS;))
+    do
+        j=$((i+1))
+        found=false
+        for((;j< TAIL_POS;j++))
+        do
+            diff=$(( ${results[$j]} - ${results[$i]} ))
+            diff=${diff#-} #abs
+            if [[ $diff -ge $TPCC_VUSERS_STEPS ]]; then
+                found=true
+                break
+            fi
+        done
+        if $found; then
+            final_results+=(${results[$j]})
+            i=$j # next start position to compare
+            continue
+        fi
+        i=$((i+1))
+    done
+    final_results+=(${results[$TAIL_POS]}) # add the last element
+    echo "${final_results[@]}"|tr ' ' '_' # format concat with "_"
+}
+
+## get min nearest to number of 2^N greater than input argument
+## 
+## e.g.:
+## input arg <= 2,     return 2
+## input arg is 3,     return 4=2^2
+## input arg is 4,     return 4=2^2
+## input arg is 5-7,   return 8=2^3
+## input arg is 9-15,  return 16=2^4
+## ...
+function get_min_nth_powerof2() {
+    num="$1"
+    if [[ -z "$num" ]]; then
+        num=0
+    fi
+    n=$(( num - 1 ))
+    n=$(( n | (n >> 1) ))
+    n=$(( n | (n >> 2) ))
+    n=$(( n | (n >> 4) ))
+    n=$(( n | (n >> 8) ))
+    n=$(( n | (n >> 16) ))
+    if [[ "$n" -lt 2 ]]; then
+        echo 2
+    else
+        echo "$(( n + 1 ))"
+    fi
+}
diff --git a/workload/HammerDB-TPCC/script/network_rps_tuning.sh b/workload/HammerDB-TPCC/script/network_rps_tuning.sh
new file mode 100644
index 0000000..5bdb820
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/network_rps_tuning.sh
@@ -0,0 +1,88 @@
+#!/bin/bash -e
+
+if [[ -z "$NODE_IP" ]]; then
+    echo "Node ip not found"
+    exit 1
+fi
+echo "Node ip: $NODE_IP"
+
+# find network device by node ip
+function get_network_device_by_ip() {
+    node_ip=$1
+    ALL_NETWORK_DEVICES=($(ls /sys/class/net))
+    for dev in "${ALL_NETWORK_DEVICES[@]}"
+    do
+        output=$(ifconfig $dev)
+        if [[ "$output" =~ "$node_ip" ]]; then
+            rtn_net_dev=$dev # device found by node ip
+            break
+        fi
+    done
+    echo "$rtn_net_dev"
+}
+NET_DEV=$(get_network_device_by_ip $NODE_IP)
+if [[ -z "$NET_DEV" ]]; then
+    echo "Network device not found"
+    exit 2
+fi
+echo "Network device $NET_DEV found"
+
+## if network device receive queue size less than cpu cores
+## enable steering to bind rx queue cpu using local cpu list
+RX_QUEUE_SIZE=$(ls /sys/class/net/$NET_DEV/queues/rx-* -d | wc -l)
+RPS_SOCK_FLOW_ENTRIES=${RPS_SOCK_FLOW_ENTRIES:-32768}
+echo "Network device $NET_DEV rx/tx queue size $RX_QUEUE_SIZE"
+if ${DEBUG:-false}; then
+    set -x
+fi
+
+#interrupt bind
+if ${ENABLE_IRQ_AFFINITY:-true}; then
+    function set_irq_smpaffinity() {
+        net_device=$1
+        irq_list=($(cat /proc/interrupts | grep "$net_device" | awk -F: '{print $1}'))
+        irq_len=${#irq_list[@]}
+        echo "Network device $net_device interrupt list: ${irq_list[@]}"
+
+        lscpu -p=CPU,NODE|sed -e '/^#/d' > /tmp/cpu_numa_map
+        local_cpulist=$(cat /sys/class/net/$net_device/device/local_cpulist)
+        local_node=$(lscpu |grep "$local_cpulist"|awk '/NUMA node/{print $2}'|awk -F 'node' '{print $2}')
+        local_node_cores=($(cat /tmp/cpu_numa_map|grep ",$local_node"|awk -F ',' '{print $1}'))
+        echo "Network device $net_device local cpu list: ${local_node_cores[@]}"
+        core_len=${#local_node_cores[@]}
+        for(( i=0; i < irq_len; i++ ))
+        do
+            irq=${irq_list[$i]}
+            core_index=$((i % core_len))
+            core=${local_node_cores[$core_index]}
+            echo "$core" > /proc/irq/$irq/smp_affinity_list
+            echo "Network device $net_device binding interrupt $irq on core $core"
+        done
+    } # end function
+    echo "Set network interrupt cpu affnity on device $NET_DEV"
+    set_irq_smpaffinity "$NET_DEV"
+fi
+
+# rps bind
+if ${ENABLE_RPSRFS_AFFINITY:-true}; then
+    local_cpulist=$(cat /sys/class/net/$net_device/device/local_cpulist)
+    echo "Network device $NET_DEV with cpu affinity on node: $(lscpu |grep "$local_cpulist")"
+    DEV_LOCAL_CPUS=$(cat /sys/class/net/$NET_DEV/device/local_cpus) # bitmask of local_cpulist
+    RPS_FLOW_CNT_VALUE=$(( RPS_SOCK_FLOW_ENTRIES / RX_QUEUE_SIZE ))
+    for((i=0; i < RX_QUEUE_SIZE; i++))
+    do
+        # rps
+        if [[ -f "/sys/class/net/$NET_DEV/queues/rx-$i/rps_cpus" ]]; then
+            echo "$DEV_LOCAL_CPUS" > /sys/class/net/$NET_DEV/queues/rx-$i/rps_cpus
+        fi
+        
+        # rfs
+        echo "$RPS_SOCK_FLOW_ENTRIES" > /proc/sys/net/core/rps_sock_flow_entries
+        if [[ -f "/sys/class/net/$NET_DEV/queues/rx-$i/rps_flow_cnt" ]]; then
+            echo $RPS_FLOW_CNT_VALUE > /sys/class/net/$NET_DEV/queues/rx-$i/rps_flow_cnt
+        fi
+    done
+fi
+if ${DEBUG:-false}; then
+    set +x
+fi
diff --git a/workload/HammerDB-TPCC/script/params/hammerdb_param.sh b/workload/HammerDB-TPCC/script/params/hammerdb_param.sh
new file mode 100644
index 0000000..3b3ed0f
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/params/hammerdb_param.sh
@@ -0,0 +1,30 @@
+#!/bin/bash -e
+
+### HammerDB settings
+export TPCC_NUM_WAREHOUSES=${TPCC_NUM_WAREHOUSES:-800}
+export TPCC_THREADS_BUILD_SCHEMA=${TPCC_THREADS_BUILD_SCHEMA:-128} # depends on actual cpu cores by default aligned with ICX
+export TPCC_HAMMER_NUM_VIRTUAL_USERS=${TPCC_HAMMER_NUM_VIRTUAL_USERS:-""}
+export TPCC_MINUTES_OF_RAMPUP=${TPCC_MINUTES_OF_RAMPUP:-2}
+export TPCC_RUNTIMER_SECONDS=${TPCC_RUNTIMER_SECONDS:-600}
+export TPCC_MINUTES_OF_DURATION=${TPCC_MINUTES_OF_DURATION:-5}
+export TPCC_TOTAL_ITERATIONS=${TPCC_TOTAL_ITERATIONS:-10000000}
+export TPCC_INIT_MAX_WAIT_SECONDS=${TPCC_INIT_MAX_WAIT_SECONDS:-30}
+export TPCC_TCL_SCRIPT_PATH=${TPCC_TCL_SCRIPT_PATH:-"/tcls"}
+export TPCC_WAIT_COMPLETE_MILLSECONDS=${TPCC_WAIT_COMPLETE_MILLSECONDS:-5000}
+### current supported algorithm: fixed, binary_search, advanced_binary_search
+export TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM=${TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM:-"fixed"}
+export TPCC_VUSERS_STEPS=${TPCC_VUSERS_STEPS:-4}
+export TPCC_VUSERS_FLOAT_FACTOR=${TPCC_VUSERS_FLOAT_FACTOR:-0.1}
+export TPCC_ASYNC_SCALE=${TPCC_ASYNC_SCALE:-false}
+export TPCC_CONNECT_POOL=${TPCC_CONNECT_POOL:-false}
+
+
+function scale_hammerdb_params_gated() {
+    export TPCC_NUM_WAREHOUSES=2
+    # Build virtual users must be less than or equal to number of warehouses
+    export TPCC_THREADS_BUILD_SCHEMA=2
+    export TPCC_HAMMER_NUM_VIRTUAL_USERS="2"
+    export TPCC_MINUTES_OF_RAMPUP=1
+    export TPCC_MINUTES_OF_DURATION=1
+    export TPCC_RUNTIMER_SECONDS=300
+}
diff --git a/workload/HammerDB-TPCC/script/params/mysql_param.sh b/workload/HammerDB-TPCC/script/params/mysql_param.sh
new file mode 100644
index 0000000..c5aa58f
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/params/mysql_param.sh
@@ -0,0 +1,74 @@
+#!/bin/bash -e
+
+### Mysql exposed parameters
+export MYSQL_DEFAULT_AUTHENTICATION_PLUGIN=${MYSQL_DEFAULT_AUTHENTICATION_PLUGIN:-mysql_native_password}
+# general
+export MYSQL_MAX_CONNECTIONS=${MYSQL_MAX_CONNECTIONS:-4000}
+export MYSQL_TABLE_OPEN_CACHE=${MYSQL_TABLE_OPEN_CACHE:-8000}
+export MYSQL_TABLE_OPEN_CACHE_INSTANCES=${MYSQL_TABLE_OPEN_CACHE_INSTANCES:-16}
+export MYSQL_BACK_LOG=${MYSQL_BACK_LOG:-1500}
+export MYSQL_DEFAULT_PASSWORD_LIFETIME=${MYSQL_DEFAULT_PASSWORD_LIFETIME:-0}
+export MYSQL_SSL=${MYSQL_SSL:-0}
+export MYSQL_PERFORMANCE_SCHEMA=${MYSQL_PERFORMANCE_SCHEMA:-OFF}
+export MYSQL_MAX_PREPARED_STMT_COUNT=${MYSQL_MAX_PREPARED_STMT_COUNT:-128000}
+export MYSQL_SKIP_LOG_BIN=${MYSQL_SKIP_LOG_BIN:-1}
+export MYSQL_CHARACTER_SET_SERVER=${MYSQL_CHARACTER_SET_SERVER:-latin1}
+export MYSQL_COLLATION_SERVER=${MYSQL_COLLATION_SERVER:-latin1_swedish_ci}
+export MYSQL_TRANSACTION_ISOLATION=${MYSQL_TRANSACTION_ISOLATION:-REPEATABLE-READ}
+# files
+export MYSQL_INNODB_FILE_PER_TABLE=${MYSQL_INNODB_FILE_PER_TABLE:-ON}
+export MYSQL_INNODB_LOG_FILE_SIZE=${MYSQL_INNODB_LOG_FILE_SIZE:-1024M}
+# export MYSQL_INNODB_LOG_FILES_IN_GROUP=${MYSQL_INNODB_LOG_FILES_IN_GROUP:-32}
+export MYSQL_INNODB_OPEN_FILES=${MYSQL_INNODB_OPEN_FILES:-4000}
+# buffers
+export MYSQL_INNODB_BUFFER_POOL_SIZE=${MYSQL_INNODB_BUFFER_POOL_SIZE:-96G}
+# export MYSQL_INNODB_BUFFER_POOL_INSTANCES=${MYSQL_INNODB_BUFFER_POOL_INSTANCES:-16}
+export MYSQL_INNODB_LOG_BUFFER_SIZE=${MYSQL_INNODB_LOG_BUFFER_SIZE:-64M}
+# tune
+export MYSQL_INNODB_DOUBLEWRITE=${MYSQL_INNODB_DOUBLEWRITE:-0}
+export MYSQL_INNODB_THREAD_CONCURRENCY=${MYSQL_INNODB_THREAD_CONCURRENCY:-0}
+export MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT=${MYSQL_INNODB_FLUSH_LOG_AT_TRX_COMMIT:-0}
+export MYSQL_INNODB_MAX_DIRTY_PAGES_PCT=${MYSQL_INNODB_MAX_DIRTY_PAGES_PCT:-90}
+export MYSQL_INNODB_MAX_DIRTY_PAGES_PCT_LWM=${MYSQL_INNODB_MAX_DIRTY_PAGES_PCT_LWM:-10}
+export MYSQL_JOIN_BUFFER_SIZE=${MYSQL_JOIN_BUFFER_SIZE:-32K}
+export MYSQL_SORT_BUFFER_SIZE=${MYSQL_SORT_BUFFER_SIZE:-32K}
+export MYSQL_INNODB_USE_NATIVE_AIO=${MYSQL_INNODB_USE_NATIVE_AIO:-1}
+export MYSQL_INNODB_STATS_PERSISTENT=${MYSQL_INNODB_STATS_PERSISTENT:-1}
+export MYSQL_INNODB_SPIN_WAIT_DELAY=${MYSQL_INNODB_SPIN_WAIT_DELAY:-6}
+export MYSQL_INNODB_MAX_PURGE_LAG_DELAY=${MYSQL_INNODB_MAX_PURGE_LAG_DELAY:-300000}
+export MYSQL_INNODB_MAX_PURGE_LAG=${MYSQL_INNODB_MAX_PURGE_LAG:-0}
+export MYSQL_INNODB_CHECKSUM_ALGORITHM=${MYSQL_INNODB_CHECKSUM_ALGORITHM:-none}
+export MYSQL_INNODB_IO_CAPACITY=${MYSQL_INNODB_IO_CAPACITY:-4000}
+export MYSQL_INNODB_IO_CAPACITY_MAX=${MYSQL_INNODB_IO_CAPACITY_MAX:-20000}
+export MYSQL_INNODB_LRU_SCAN_DEPTH=${MYSQL_INNODB_LRU_SCAN_DEPTH:-9000}
+export MYSQL_INNODB_CHANGE_BUFFERING=${MYSQL_INNODB_CHANGE_BUFFERING:-none}
+export MYSQL_INNODB_READ_ONLY=${MYSQL_INNODB_READ_ONLY:-0}
+export MYSQL_INNODB_PAGE_CLEANERS=${MYSQL_INNODB_PAGE_CLEANERS:-4}
+export MYSQL_INNODB_UNDO_LOG_TRUNCATE=${MYSQL_INNODB_UNDO_LOG_TRUNCATE:-off}
+# perf special
+export MYSQL_INNODB_ADAPTIVE_FLUSHING=${MYSQL_INNODB_ADAPTIVE_FLUSHING:-1}
+export MYSQL_INNODB_FLUSH_NEIGHBORS=${MYSQL_INNODB_FLUSH_NEIGHBORS:-0}
+export MYSQL_INNODB_READ_IO_THREADS=${MYSQL_INNODB_READ_IO_THREADS:-16}
+export MYSQL_INNODB_WRITE_IO_THREADS=${MYSQL_INNODB_WRITE_IO_THREADS:-16}
+export MYSQL_INNODB_PURGE_THREADS=${MYSQL_INNODB_PURGE_THREADS:-4}
+export MYSQL_INNODB_ADAPTIVE_HASH_INDEX=${MYSQL_INNODB_ADAPTIVE_HASH_INDEX:-0}
+
+### WSF optimized
+export MYSQL_INNODB_SPIN_WAIT_PAUSE_MULTIPLIER=${MYSQL_INNODB_SPIN_WAIT_PAUSE_MULTIPLIER:-50}
+export MYSQL_INNODB_SYNC_SPIN_LOOPS=${MYSQL_INNODB_SYNC_SPIN_LOOPS:-90}
+# Intel SSDs perform better with a 4096 Byte (4KB) alignment, refer to https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ssd-server-storage-applications-paper.pdf
+export MYSQL_INNODB_PAGE_SIZE=${MYSQL_INNODB_PAGE_SIZE:-16K} # by default with mysql default value 16K compatible with some Cloud disk without 4K alignment supported
+
+# mysqltuner.pl recommendations
+export MYSQL_INNODB_BUFFER_POOL_INSTANCES=${MYSQL_INNODB_BUFFER_POOL_INSTANCES:-64}
+# (MYSQL_INNODB_LOG_FILES_IN_GROUP * MYSQL_INNODB_LOG_FILE_SIZE) = 25% of buffer pool size
+export MYSQL_INNODB_LOG_FILES_IN_GROUP=${MYSQL_INNODB_LOG_FILES_IN_GROUP:-24}
+export MYSQL_THREAD_CACHE_SIZE=${MYSQL_THREAD_CACHE_SIZE:-300}
+
+# log settings
+export MYSQL_LOG_DIR=${MYSQL_LOG_DIR:-"/var/log/mysql"}
+
+function scale_mysql_gated_params() {
+    export MYSQL_INNODB_BUFFER_POOL_SIZE=1G
+    export MYSQL_INNODB_BUFFER_POOL_INSTANCES=1
+}
diff --git a/workload/HammerDB-TPCC/script/prepare_common.sh b/workload/HammerDB-TPCC/script/prepare_common.sh
new file mode 100644
index 0000000..96c2eb0
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/prepare_common.sh
@@ -0,0 +1,76 @@
+#!/bin/bash -e
+
+mount -o rw,remount /sys
+mount -o rw,remount /proc
+
+### cpu tuning
+if [[ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]]; then
+    for d in $(ls -d /sys/devices/system/cpu/cpu*|grep 'cpu[0-9]\+')
+    do
+        (echo performance > $d/cpufreq/scaling_governor) > /dev/null 2>&1 || true
+    done
+else
+    echo "Skip to enable cpu performance due to scaling_governor not found"
+fi
+
+### memory tuning
+# disable THP
+trap "echo $(cat /sys/kernel/mm/transparent_hugepage/enabled | awk -F'[][]' '{print $2}') > /sys/kernel/mm/transparent_hugepage/enabled" EXIT
+echo never > /sys/kernel/mm/transparent_hugepage/enabled
+
+# avoid to use swap
+echo 1 > /proc/sys/vm/swappiness
+# always overcommit never check
+echo 1 > /proc/sys/vm/overcommit_memory
+
+### network tuning, refer to https://docs.continuent.com/tungsten-clustering-6.1/performance-networking.html
+# Do not cache metrics on closing connections
+echo 1 > /proc/sys/net/ipv4/tcp_no_metrics_save
+
+# Turn on window scaling which can enlarge the transfer window
+echo 1 > /proc/sys/net/ipv4/tcp_window_scaling
+
+# Enable timestamps as defined in RFC1323
+echo 1 > /proc/sys/net/ipv4/tcp_timestamps
+
+# Enable select acknowledgments
+echo 1 > /proc/sys/net/ipv4/tcp_sack
+
+# Maximum number of remembered connection requests not yet acknowleged by client
+echo 10240 > /proc/sys/net/ipv4/tcp_max_syn_backlog
+
+# Recommended for hosts with jumbo frames enabled
+echo 1 > /proc/sys/net/ipv4/tcp_mtu_probing
+
+# Allowed local port range
+echo "1024 65535" > /proc/sys/net/ipv4/ip_local_port_range
+
+# Protect against TCP time-wait
+echo 1 > /proc/sys/net/ipv4/tcp_rfc1337
+
+# Decrease the time default value for tcp_fin_timeout connection
+echo 15 > /proc/sys/net/ipv4/tcp_fin_timeout
+
+# Increase number of incoming connections backlog queue
+echo 65536 > /proc/sys/net/core/netdev_max_backlog
+
+# Increase the maximum amount of option memory buffers
+echo 25165824 > /proc/sys/net/core/optmem_max
+
+# Default socket receive buffer
+echo 25165824 > /proc/sys/net/core/rmem_default
+
+# Default socket send buffer
+echo 25165824 > /proc/sys/net/core/wmem_default
+
+# Increase the read-buffer space allocatable(min, init, max) bytes
+echo "20480 12582912 25165824" > /proc/sys/net/ipv4/tcp_rmem
+
+# Increase the read-buffer space allocatable(min, init, max) bytes
+echo "20480 12582912 25165824" > /proc/sys/net/ipv4/tcp_wmem
+
+# Increase the tcp-time-wait buckets pool size to prevent simple DOS attacks
+echo 1440000 > /proc/sys/net/ipv4/tcp_max_tw_buckets
+
+# Allow to reuse TIME-WAIT sockets for new connections
+echo 1 > /proc/sys/net/ipv4/tcp_tw_reuse
diff --git a/workload/HammerDB-TPCC/script/prepare_database.sh b/workload/HammerDB-TPCC/script/prepare_database.sh
new file mode 100644
index 0000000..93ce2fa
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/prepare_database.sh
@@ -0,0 +1,127 @@
+#!/bin/bash -e
+
+if [[ "$DB_TYPE" == "mysql"  && "${DEBUG:-false}" ]]; then
+    mkdir -p "$MYSQL_LOG_DIR"
+    chown -R mysql:mysql "$MYSQL_LOG_DIR"
+    echo "MySQL log directory $MYSQL_LOG_DIR created"
+fi
+
+# auto cleanup mounted data
+if ${ENABLE_MOUNT_DIR:-false}; then
+    echo "Remove database directory $DB_MOUNT_DIR"
+    rm -rf "${DB_MOUNT_DIR:?}/"*
+fi
+
+source /prepare_common.sh
+
+# disable THP
+trap "echo $(cat /sys/kernel/mm/transparent_hugepage/enabled | awk -F'[][]' '{print $2}') > /sys/kernel/mm/transparent_hugepage/enabled" EXIT
+echo never > /sys/kernel/mm/transparent_hugepage/enabled
+
+### numactl bind core logic
+lscpu -p=CPU,NODE|sed -e '/^#/d' > /tmp/cpu_numa_map
+NUMACTL_OPTIONS=""
+if ${RUN_SINGLE_NODE:-true}; then
+    # on single node
+    if ${ENABLE_SOCKET_BIND:-true}; then
+        system_cores=$(nproc)
+        if [[ "$system_cores" -le 1 ]]; then
+            echo "Only $system_cores cores, skip to balance"
+        else
+            nodes=$(lscpu | awk '/^NUMA node\(s\)/{print $3'})
+            SERVER_CORE_NEEDED_FACTOR=${SERVER_CORE_NEEDED_FACTOR:-0.9}
+            SERVER_CORE_NEEDED=$(echo "$system_cores $SERVER_CORE_NEEDED_FACTOR" |awk '{ printf("%d\n",$1 * $2) }')
+            CLIENT_CORE_NEEDED=$((system_cores - SERVER_CORE_NEEDED))
+            CLIENT_CORE_NEEDED_LESS=true # assume client need less core
+            if [[ "$CLIENT_CORE_NEEDED" -gt "$SERVER_CORE_NEEDED" ]]; then
+                CLIENT_CORE_NEEDED_LESS=false
+            fi
+            # caculate which cores will be used
+            HALF_SYSTEM_CORES=$(( system_cores / 2 ))
+            for i in $(seq 0 $((HALF_SYSTEM_CORES - 1)))
+            do
+                if [[ !$CLIENT_CORE_NEEDED_LESS && "$i" -ge "$SERVER_CORE_NEEDED" ]]; then
+                    break
+                fi
+                nth_core_on_node=$(((2 * (i / nodes)) + 1))
+                core=$(grep ",$((i % nodes))" /tmp/cpu_numa_map | sed "${nth_core_on_node}q;d" | awk -F ',' '{print $1}')
+                core_list+=($core)
+                if [[ $CLIENT_CORE_NEEDED_LESS && "$i" -ge "$CLIENT_CORE_NEEDED" ]]; then
+                    core_list+=($((core+1))) # assign client leftover cores to server
+                fi
+            done
+            echo "Run on single node, system online cores: $system_cores, numa nodes: $nodes, server core needed factor: $SERVER_CORE_NEEDED_FACTOR, server core needed: ${#core_list[@]}, client core needed: $CLIENT_CORE_NEEDED"
+            NUMACTL_OPTIONS="numactl --physcpubind=$(echo ${core_list[@]}|tr ' ' ',') --localalloc"
+        fi
+    else
+        echo "Run on single node, socket bind disabled, skip to bind"
+    fi
+else
+    # on multi-node
+    if ${ENABLE_RPSRFS_AFFINITY:-true}; then
+        echo "Enable rps/rfs on server side"
+        source /network_rps_tuning.sh # enable network RPS tunning on multi-node
+    fi
+    if ${ENABLE_SOCKET_BIND:-true}; then
+        DEFAULT_NODES=$(lscpu |awk '/^NUMA node[0-9]+ CPU\(s\)/{split($2, result, "node"); print result[2]}' |tr '\n' ',')
+        if [[ "${DEFAULT_NODES}" =~ ^.*,$ ]]; then
+            DEFAULT_NODES=${DEFAULT_NODES::-1} # remove the last character ","
+        fi
+        if [[ -z "$SOCKET_BIND_NODE" ]]; then
+            echo "Not specified socket bind node, by default using all nodes $DEFAULT_NODES"
+            SOCKET_BIND_NODE=$DEFAULT_NODES
+        fi
+        if ${EXCLUDE_IRQ_CORES:-true}; then
+            function get_network_device_by_ip() {
+                node_ip=$1
+                ALL_NETWORK_DEVICES=($(ls /sys/class/net))
+                for dev in "${ALL_NETWORK_DEVICES[@]}"
+                do
+                    output=$(ifconfig $dev)
+                    if [[ "$output" =~ "$node_ip" ]]; then
+                        rtn_net_dev=$dev # device found by node ip
+                        break
+                    fi
+                done
+                echo "$rtn_net_dev"
+            }
+            NET_DEV=$(get_network_device_by_ip $NODE_IP)
+            file1=/tmp/node_cores
+            file2=/tmp/irq_cores
+            irq_cores=()
+            for i in $(cat /proc/interrupts |grep "$NET_DEV" |awk -F ':' '{print $1}')
+            do 
+                irq_cores+=($(cat /proc/irq/$i/smp_affinity_list))
+            done
+            echo "irq_cores: ${irq_cores[@]}"
+            echo "${irq_cores[@]}" |tr ' ' '\n' > $file2
+
+            nodes=($(echo $SOCKET_BIND_NODE |tr '_\|,' ' ')) #split by _ or ,
+            for node in ${nodes[@]}
+            do
+                node_cores+=($(grep ",$node" /tmp/cpu_numa_map | awk -F ',' '{print $1}'))
+            done
+            echo "node_cores: ${node_cores[@]}"
+            echo "${node_cores[@]}" |tr ' ' '\n' >  $file1
+            
+            # file1 - file2
+            app_cores=$(sort -m <(sort $file1 | uniq) <(sort $file2 | uniq) <(sort $file2 | uniq) | uniq -u |sort -n|tr '\n' ',')
+            if [[ "${app_cores}" =~ ^.*,$ ]]; then
+                app_cores=${app_cores::-1} # remove the last character ","
+            fi
+            echo "app_cores: $app_cores"
+            NUMACTL_OPTIONS="numactl --physcpubind=$app_cores --localalloc"
+            echo "Run on multi node, socket bind enabled, bind on cores exclude interrupt cores"
+        else
+            NUMACTL_OPTIONS="numactl --cpunodebind=$SOCKET_BIND_NODE --localalloc"
+            echo "Run on multi node, socket bind enabled, bind on nodes: $SOCKET_BIND_NODE"
+        fi
+    else
+        echo "Run on multi node, socket bind disabled, skip to bind"
+    fi
+fi
+echo "NUMACTL_OPTIONS: $NUMACTL_OPTIONS"
+### end numactl bind core logic
+
+### Replace "exec mysqld" with "exec numactl ... mysqld" to bind mysqld process with numactl
+sed -i "s#exec "$@"#exec $NUMACTL_OPTIONS "$@"#" /usr/local/bin/docker-entrypoint.sh
diff --git a/workload/HammerDB-TPCC/script/prepare_hammerdb.sh b/workload/HammerDB-TPCC/script/prepare_hammerdb.sh
new file mode 100644
index 0000000..ae3a074
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/prepare_hammerdb.sh
@@ -0,0 +1,3 @@
+#!/bin/bash -e
+
+source /prepare_common.sh
diff --git a/workload/HammerDB-TPCC/script/run_hammerdb.sh b/workload/HammerDB-TPCC/script/run_hammerdb.sh
new file mode 100644
index 0000000..2172a6f
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/run_hammerdb.sh
@@ -0,0 +1,269 @@
+#!/bin/bash
+
+if ${DEBUG:-false}; then
+    echo "HAMMERDB_INSTALL_DIR: ${HAMMERDB_INSTALL_DIR}"
+    echo "DB_TYPE: ${DB_TYPE}"
+    echo "TPCC_TCL_SCRIPT_PATH: ${TPCC_TCL_SCRIPT_PATH}"
+fi
+
+CPU_CORES=$(nproc)
+if [[ "$TPCC_THREADS_BUILD_SCHEMA" -gt  "$CPU_CORES" ]]; then
+    echo "Warning: specified build schema thread count $TPCC_THREADS_BUILD_SCHEMA greater than current cpu cores $CPU_CORES adjust to current cpu cores"
+    TPCC_THREADS_BUILD_SCHEMA="$CPU_CORES"
+fi
+
+# Only on single node can get cpu cores
+if ${RUN_SINGLE_NODE:-false}; then
+    # if not specified, generate default value(s)
+    if [[ -z "$TPCC_HAMMER_NUM_VIRTUAL_USERS" ]]; then
+        source /common.sh
+        echo "Info: no virtual user specified, auto-gen by current cpu cores $CPU_CORES"
+        algo=${TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM:-"fixed"}
+        algo_func=$(
+            case $algo in
+                baseline)
+                    echo "get_baseline_vuser_list"
+                    ;;
+                advanced_binary_search)
+                    echo "get_advanced_binarysearch_vuser_list"
+                    ;;
+                binary_search)
+                    echo "get_binarysearch_vuser_list"
+                    ;;
+                *)
+                    echo "get_fixed_vuser_list"
+                    ;;
+            esac
+        )
+        TPCC_HAMMER_NUM_VIRTUAL_USERS="$($algo_func)"
+    fi
+fi
+echo "TPCC_HAMMER_NUM_VIRTUAL_USERS=$TPCC_HAMMER_NUM_VIRTUAL_USERS"
+
+function buildschema_mysql() {
+    cat >"${TPCC_TCL_SCRIPT_PATH}/build_schema.tcl"<<EOF
+#!/bin/tclsh
+puts "SETTING CONFIGURATION"
+global complete
+proc wait_to_complete {} {
+    global complete
+    set complete [vucomplete]
+    puts "Is it complete ?: \$complete"
+    if {!\$complete} {
+        after $TPCC_WAIT_COMPLETE_MILLSECONDS wait_to_complete
+    } else {
+        puts "BUILD SCHEMA COMPLETE"
+        exit
+    }
+}
+dbset db mysql
+dbset bm TPC-C
+diset connection mysql_host $DB_HOST
+diset connection mysql_port $DB_PORT
+diset connection mysql_socket /tmp/mysql.sock
+diset tpcc mysql_user $MYSQL_USER
+diset tpcc mysql_pass $MYSQL_ROOT_PASSWORD
+diset tpcc mysql_count_ware $TPCC_NUM_WAREHOUSES
+diset tpcc mysql_partition true
+diset tpcc mysql_num_vu $TPCC_THREADS_BUILD_SCHEMA
+diset tpcc mysql_storage_engine $MYSQL_STORAGE_ENGINE
+print dict
+buildschema
+wait_to_complete
+vwait forever
+EOF
+}
+
+function rumhammer_mysql() {
+    cat >"${TPCC_TCL_SCRIPT_PATH}/run_timer.tcl"<<EOF
+#!/bin/tclsh
+proc runtimer { seconds } {
+    set x 0
+    set timerstop 0
+    while {!\$timerstop} {
+        incr x
+        after 1000
+        if { ![ expr {\$x % 60} ] } {
+            set y [ expr \$x / 60 ]
+            puts "Timer: \$y minutes elapsed"
+        }
+        update
+        if {  [ vucomplete ] || \$x eq \$seconds } {
+            set timerstop 1
+        }
+    }
+    return
+}
+puts "SETTING CONFIGURATION"
+dbset db mysql
+dbset bm TPC-C
+diset connection mysql_host $DB_HOST
+diset connection mysql_port $DB_PORT
+diset tpcc mysql_user $MYSQL_USER
+diset tpcc mysql_pass $MYSQL_ROOT_PASSWORD
+diset tpcc mysql_driver timed
+diset tpcc mysql_rampup $TPCC_MINUTES_OF_RAMPUP
+diset tpcc mysql_duration $TPCC_MINUTES_OF_DURATION
+diset tpcc mysql_total_iterations $TPCC_TOTAL_ITERATIONS
+diset tpcc mysql_async_scale ${TPCC_ASYNC_SCALE:-false}
+diset tpcc mysql_connect_pool ${TPCC_CONNECT_POOL:-false}
+vuset logtotemp 1
+vuset unique 1
+loadscript
+puts "SEQUENCE STARTED"
+foreach z [ split "$TPCC_HAMMER_NUM_VIRTUAL_USERS" "_" ] {
+    puts "\$z VU TEST"
+    vuset vu \$z
+    vucreate
+    vurun
+    runtimer $TPCC_RUNTIMER_SECONDS
+    vudestroy
+    after $TPCC_WAIT_COMPLETE_MILLSECONDS
+}
+puts "TEST SEQUENCE COMPLETE"
+exit
+EOF
+}
+
+if [[ ! -d "$TPCC_TCL_SCRIPT_PATH" ]]; then
+    mkdir -p "$TPCC_TCL_SCRIPT_PATH"
+fi
+
+if [[ "$DB_TYPE" == "mysql" ]]; then
+    buildschema_mysql
+    rumhammer_mysql
+fi
+
+# Make sure with a stable connection to database server
+echo "Checking if database connection is stable..."
+counter=0
+until ((counter >= ${TPCC_INIT_MAX_WAIT_SECONDS:-5})); do
+    nc -z -w5 $DB_HOST $DB_PORT
+    if [ $? -eq 0 ]; then
+        ((counter++))
+    else
+        echo "database service connection is unstable, retry"
+        counter=0
+    fi
+    sleep 1
+done
+echo "Database connection is stable for $counter seconds"
+
+cd ${HAMMERDB_INSTALL_DIR}
+
+### numactl bind core logic
+lscpu -p=CPU,NODE|sed -e '/^#/d' > /tmp/cpu_numa_map
+NUMACTL_OPTIONS=""
+if ${RUN_SINGLE_NODE:-true}; then
+    # on single node
+    if ${ENABLE_SOCKET_BIND:-true}; then
+        system_cores=$(nproc)
+        if [[ "$system_cores" -le 1 ]]; then
+            echo "Only $system_cores cores, skip to balance"
+        else
+            nodes=$(lscpu | awk '/^NUMA node\(s\)/{print $3'})
+            SERVER_CORE_NEEDED_FACTOR=${SERVER_CORE_NEEDED_FACTOR:-0.9}
+            SERVER_CORE_NEEDED=$(echo "$system_cores $SERVER_CORE_NEEDED_FACTOR" |awk '{ printf("%d\n",$1 * $2) }')
+            CLIENT_CORE_NEEDED=$((system_cores - SERVER_CORE_NEEDED))
+            CLIENT_CORE_NEEDED_LESS=true # assume client need less core
+            if [[ "$CLIENT_CORE_NEEDED" -gt "$SERVER_CORE_NEEDED" ]]; then
+                CLIENT_CORE_NEEDED_LESS=false
+            fi
+            # caculate which cores will be used
+            HALF_SYSTEM_CORES=$(( system_cores / 2 ))
+            for i in $(seq 0 $((HALF_SYSTEM_CORES - 1)))
+            do
+                if [[ $CLIENT_CORE_NEEDED_LESS && "$i" -ge "$CLIENT_CORE_NEEDED" ]]; then
+                    break
+                fi
+                nth_core_on_node=$(((2 * (i / nodes)) + 1))
+                core=$(grep ",$((i % nodes))" /tmp/cpu_numa_map | sed "${nth_core_on_node}q;d" | awk -F ',' '{print $1}')
+                if [[ ! $CLIENT_CORE_NEEDED_LESS && "$i" -ge "$SERVER_CORE_NEEDED" ]]; then
+                    core_list+=($core) # assign server leftover cores to client
+                fi
+                core_list+=($((core+1)))
+            done
+            echo "Run on single node, system online cores: $system_cores, numa nodes: $nodes, server core needed factor: $SERVER_CORE_NEEDED_FACTOR, client core needed: ${#core_list[@]}, server core needed: $SERVER_CORE_NEEDED"
+            NUMACTL_OPTIONS="numactl --physcpubind=$(echo "${core_list[@]}"|tr ' ' ',') --localalloc"
+        fi
+    else
+        echo "Run on single node, socket bind disabled, skip to bind"
+    fi
+else
+    # on multi-node
+    if ${ENABLE_RPSRFS_AFFINITY:-true}; then
+        echo "Enable rps/rfs on client side"
+        source /network_rps_tuning.sh # enable network RPS tunning on multi-node
+    fi
+    if ${ENABLE_SOCKET_BIND:-true}; then
+        DEFAULT_NODES=$(lscpu |awk '/^NUMA node[0-9]+ CPU\(s\)/{split($2, result, "node"); print result[2]}' |tr '\n' ',')
+        if [[ "${DEFAULT_NODES}" =~ ^.*,$ ]]; then
+            DEFAULT_NODES=${DEFAULT_NODES::-1} # remove the last character ","
+        fi
+        if [[ -z "$SOCKET_BIND_NODE" ]]; then
+            echo "Not specified socket bind node, by default using all nodes $DEFAULT_NODES"
+            SOCKET_BIND_NODE=$DEFAULT_NODES
+        fi
+        if ${EXCLUDE_IRQ_CORES:-false}; then
+            function get_network_device_by_ip() {
+                node_ip=$1
+                ALL_NETWORK_DEVICES=($(ls /sys/class/net))
+                for dev in "${ALL_NETWORK_DEVICES[@]}"
+                do
+                    output=$(ifconfig $dev)
+                    if [[ "$output" =~ "$node_ip" ]]; then
+                        rtn_net_dev=$dev # device found by node ip
+                        break
+                    fi
+                done
+                echo "$rtn_net_dev"
+            }
+            NET_DEV=$(get_network_device_by_ip $NODE_IP)
+            file1=/tmp/node_cores
+            file2=/tmp/irq_cores
+            irq_cores=()
+            for i in $(cat /proc/interrupts |grep "$NET_DEV" |awk -F ':' '{print $1}')
+            do 
+                irq_cores+=($(cat /proc/irq/$i/smp_affinity_list))
+            done
+            echo "irq_cores: ${irq_cores[@]}"
+            echo "${irq_cores[@]}" |tr ' ' '\n' > $file2
+
+            nodes=($(echo $SOCKET_BIND_NODE |tr '_\|,' ' ')) #split by _ or ,
+            for node in ${nodes[@]}
+            do
+                node_cores+=($(grep ",$node" /tmp/cpu_numa_map | awk -F ',' '{print $1}'))
+            done
+            echo "node_cores: ${node_cores[@]}"
+            echo "${node_cores[@]}" |tr ' ' '\n' >  $file1
+            
+            # file1 - file2
+            app_cores=$(sort -m <(sort $file1 | uniq) <(sort $file2 | uniq) <(sort $file2 | uniq) | uniq -u |sort -n|tr '\n' ',')
+            if [[ "${app_cores}" =~ ^.*,$ ]]; then
+                app_cores=${app_cores::-1} # remove the last character ","
+            fi
+            echo "app_cores: $app_cores"
+            NUMACTL_OPTIONS="numactl --physcpubind=$app_cores --localalloc"
+            echo "Run on multi node, socket bind enabled, bind on cores exclude interrupt cores"
+        else
+            NUMACTL_OPTIONS="numactl --cpunodebind=$SOCKET_BIND_NODE --localalloc"
+            echo "Run on multi node, socket bind enabled, bind on nodes: $SOCKET_BIND_NODE"
+        fi
+    else
+        echo "Run on multi node, socket bind disabled, skip to bind"
+    fi
+fi
+echo "NUMACTL_OPTIONS: $NUMACTL_OPTIONS"
+### end numactl bind core logic
+
+echo "===Stage 1: Build schema started==="
+start=$(date +%s)
+./hammerdbcli auto ${TPCC_TCL_SCRIPT_PATH}/build_schema.tcl | tee /build_schema_${DB_TYPE}_tcl.log
+end=$(date +%s)
+echo "===Stage 1: Build schema finished spent $(( end - start )) seconds"
+
+echo "===Stage 2: Run timer started"
+$NUMACTL_OPTIONS ./hammerdbcli auto ${TPCC_TCL_SCRIPT_PATH}/run_timer.tcl | tee /run_timer_${DB_TYPE}_tcl.log
+echo "===Stage 2: Run timer finished"
+
+exit
diff --git a/workload/HammerDB-TPCC/script/setup_hugepages.sh b/workload/HammerDB-TPCC/script/setup_hugepages.sh
new file mode 100755
index 0000000..4f645c0
--- /dev/null
+++ b/workload/HammerDB-TPCC/script/setup_hugepages.sh
@@ -0,0 +1,55 @@
+#!/bin/bash -e
+
+### 1000 warehouses (~100GB size)
+export DB_DATASIZE_OF_WAREHOUSE_RATIO=${DB_DATASIZE_OF_WAREHOUSE_RATIO:-0.1}
+export DB_BUFFERSIZE_OF_DATASIZE_RATIO=${DB_BUFFERSIZE_OF_DATASIZE_RATIO:-1.0}
+
+function to_bytes() {
+    str_value="$1";
+    if [[ -z "$str_value" ]];then
+        echo "0"
+    fi
+    int_value=$(echo "$str_value"|grep -o '^[0-9]\+')
+    if [[ "$str_value" =~ [tT][bB]* ]]; then
+        echo "$(( int_value*1024*1024*1024*1024 ))"
+    elif [[ "$str_value" =~ [gG][bB]* ]]; then
+        echo "$(( int_value*1024*1024*1024 ))"
+    elif [[ "$str_value" =~ [mM][bB]* ]]; then
+        echo "$(( int_value*1024*1024 ))"
+    elif [[ "$str_value" =~ [Kk][bB]* ]]; then
+        echo "$(( int_value*1024 ))"
+    else
+      echo "$int_value"
+    fi
+}
+
+DATASIZE_GB=$(echo "$TPCC_NUM_WAREHOUSES $DB_DATASIZE_OF_WAREHOUSE_RATIO" |awk '{print $1 * $2}')
+EXPECTED_DB_BUFFER_GB=$(echo "$DATASIZE_GB $DB_BUFFERSIZE_OF_DATASIZE_RATIO" |awk '{print $1 * $2}')
+
+DATASIZE_BYTES=$(to_bytes "${DATASIZE_GB}G")
+EXPECTED_DB_BUFFER_BYTES=$(to_bytes "${EXPECTED_DB_BUFFER_GB}G")
+
+DB_TYPE=${1:-mysql}
+if [[ "$DB_TYPE" == "mysql" ]]; then
+    CFG_DB_BUFFER_BYTES=$(to_bytes "$MYSQL_INNODB_BUFFER_POOL_SIZE")
+fi
+
+ACTUAL_DB_BUFFER_BYTES=$CFG_DB_BUFFER_BYTES
+if [[ "$CFG_DB_BUFFER_BYTES" -lt "$EXPECTED_DB_BUFFER_BYTES" ]]; then
+    echo "Warning: the configured buffer pool size $CFG_DB_BUFFER_BYTES is lower than expected $EXPECTED_DB_BUFFER_BYTES, use the expected size"
+    ACTUAL_DB_BUFFER_BYTES=$EXPECTED_DB_BUFFER_BYTES
+fi
+
+HUGEPAGE_BYTES=$ACTUAL_DB_BUFFER_BYTES
+ONE_GB=$(( 1 * 1024 * 1024 * 1024 ))
+if [[ "$HUGEPAGE_BYTES" -lt "$ONE_GB" ]]; then
+    HUGEPAGE_BYTES=$ONE_GB # at least 1GB
+fi
+
+HUGEPAGE_BYTES="$(get_min_nth_powerof2 $HUGEPAGE_BYTES)"
+
+HUGEPAGES_GB=$(( HUGEPAGE_BYTES / 1024 / 1024 / 1024 ))
+export DB_HUGEPAGES_2MI="${HUGEPAGES_GB}Gi"
+
+PER_HUGEPAGES_2MI_BYTES=$(( 2 * 1024 * 1024 ))
+export DB_HUGEPAGES=$(( HUGEPAGE_BYTES / PER_HUGEPAGES_2MI_BYTES ))
diff --git a/workload/HammerDB-TPCC/src/mysql/mysqloltp_custom.tcl b/workload/HammerDB-TPCC/src/mysql/mysqloltp_custom.tcl
new file mode 100755
index 0000000..1fdb981
--- /dev/null
+++ b/workload/HammerDB-TPCC/src/mysql/mysqloltp_custom.tcl
@@ -0,0 +1,2400 @@
+proc build_mysqltpcc {} {
+    global maxvuser suppo ntimes threadscreated _ED
+    upvar #0 dbdict dbdict
+    if {[dict exists $dbdict mysql library ]} {
+        set library [ dict get $dbdict mysql library ]
+    } else { set library "mysqltcl" }
+    upvar #0 configmysql configmysql
+    #set variables to values in dict
+    setlocaltpccvars $configmysql
+    if { ![string match windows $::tcl_platform(platform)] && ($mysql_host eq "127.0.0.1" || [ string tolower $mysql_host ] eq "localhost") && [ string tolower $mysql_socket ] != "null" } { set mysql_connector "$mysql_host:$mysql_socket" } else { set mysql_connector "$mysql_host:$mysql_port" }
+    if {[ tk_messageBox -title "Create Schema" -icon question -message "Ready to create a $mysql_count_ware Warehouse MySQL TPROC-C schema\nin host [string toupper $mysql_connector] under user [ string toupper $mysql_user ] in database [ string toupper $mysql_dbase ] with storage engine [ string toupper $mysql_storage_engine ]?" -type yesno ] == yes} { 
+        if { $mysql_num_vu eq 1 || $mysql_count_ware eq 1 } {
+            set maxvuser 1
+        } else {
+            set maxvuser [ expr $mysql_num_vu + 1 ]
+        }
+        set suppo 1
+        set ntimes 1
+        ed_edit_clear
+        set _ED(packagekeyname) "TPROC-C creation"
+        if { [catch {load_virtual} message]} {
+            puts "Failed to created thread for schema creation: $message"
+            return
+        }
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end "#!/usr/local/bin/tclsh8.6
+#LOAD LIBRARIES AND MODULES
+set library $library
+"
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end {if [catch {package require $library} message] { error "Failed to load $library - $message" }
+if [catch {::tcl::tm::path add modules} ] { error "Failed to find modules directory" }
+if [catch {package require tpcccommon} ] { error "Failed to load tpcc common functions" } else { namespace import tpcccommon::* }
+proc CreateStoredProcs { mysql_handler } {
+    puts "CREATING TPCC STORED PROCEDURES"
+    set sql(1) { CREATE PROCEDURE `NEWORD` (
+        no_w_id		INTEGER,
+        no_max_w_id		INTEGER,
+        no_d_id		INTEGER,
+        no_c_id		INTEGER,
+        no_o_ol_cnt		INTEGER,
+        OUT no_c_discount 	DECIMAL(4,4),
+        OUT no_c_last 		VARCHAR(16),
+        OUT no_c_credit 		VARCHAR(2),
+        OUT no_d_tax 		DECIMAL(4,4),
+        OUT no_w_tax 		DECIMAL(4,4),
+        INOUT no_d_next_o_id 	INTEGER,
+        IN timestamp 		DATETIME
+        )
+        BEGIN
+        DECLARE no_ol_supply_w_id	INTEGER;
+        DECLARE no_ol_i_id		INTEGER;
+        DECLARE no_ol_quantity 		INTEGER;
+        DECLARE no_o_all_local 		INTEGER;
+        DECLARE o_id 			INTEGER;
+        DECLARE no_i_name		VARCHAR(24);
+        DECLARE no_i_price		DECIMAL(5,2);
+        DECLARE no_i_data		VARCHAR(50);
+        DECLARE no_s_quantity		DECIMAL(6);
+        DECLARE no_ol_amount		DECIMAL(6,2);
+        DECLARE no_s_dist_01		CHAR(24);
+        DECLARE no_s_dist_02		CHAR(24);
+        DECLARE no_s_dist_03		CHAR(24);
+        DECLARE no_s_dist_04		CHAR(24);
+        DECLARE no_s_dist_05		CHAR(24);
+        DECLARE no_s_dist_06		CHAR(24);
+        DECLARE no_s_dist_07		CHAR(24);
+        DECLARE no_s_dist_08		CHAR(24);
+        DECLARE no_s_dist_09		CHAR(24);
+        DECLARE no_s_dist_10		CHAR(24);
+        DECLARE no_ol_dist_info 	CHAR(24);
+        DECLARE no_s_data	   	VARCHAR(50);
+        DECLARE x		        INTEGER;
+        DECLARE rbk		       	INTEGER;
+        DECLARE loop_counter    	INT;
+        DECLARE `Constraint Violation` CONDITION FOR SQLSTATE '23000';
+        DECLARE EXIT HANDLER FOR `Constraint Violation` ROLLBACK;
+        DECLARE EXIT HANDLER FOR NOT FOUND ROLLBACK;
+        SET no_o_all_local = 0;
+        SELECT c_discount, c_last, c_credit, w_tax
+        INTO no_c_discount, no_c_last, no_c_credit, no_w_tax
+        FROM customer, warehouse
+        WHERE warehouse.w_id = no_w_id AND customer.c_w_id = no_w_id AND
+        customer.c_d_id = no_d_id AND customer.c_id = no_c_id;
+        START TRANSACTION;
+        SELECT d_next_o_id, d_tax INTO no_d_next_o_id, no_d_tax
+        FROM district
+        WHERE d_id = no_d_id AND d_w_id = no_w_id FOR UPDATE;
+        UPDATE district SET d_next_o_id = d_next_o_id + 1 WHERE d_id = no_d_id AND d_w_id = no_w_id;
+        SET o_id = no_d_next_o_id;
+        INSERT INTO orders (o_id, o_d_id, o_w_id, o_c_id, o_entry_d, o_ol_cnt, o_all_local) VALUES (o_id, no_d_id, no_w_id, no_c_id, timestamp, no_o_ol_cnt, no_o_all_local);
+        INSERT INTO new_order (no_o_id, no_d_id, no_w_id) VALUES (o_id, no_d_id, no_w_id);
+        SET rbk = FLOOR(1 + (RAND() * 99));
+        SET loop_counter = 1;
+        WHILE loop_counter <= no_o_ol_cnt DO
+        IF ((loop_counter = no_o_ol_cnt) AND (rbk = 1))
+        THEN
+        SET no_ol_i_id = 100001;
+        ELSE
+        SET no_ol_i_id = FLOOR(1 + (RAND() * 100000));
+        END IF;
+        SET x = FLOOR(1 + (RAND() * 100));
+        IF ( x > 1 )
+        THEN
+        SET no_ol_supply_w_id = no_w_id;
+        ELSE
+        SET no_ol_supply_w_id = no_w_id;
+        SET no_o_all_local = 0;
+        WHILE ((no_ol_supply_w_id = no_w_id) AND (no_max_w_id != 1)) DO
+        SET no_ol_supply_w_id = FLOOR(1 + (RAND() * no_max_w_id));
+        END WHILE;
+        END IF;
+        SET no_ol_quantity = FLOOR(1 + (RAND() * 10));
+        SELECT i_price, i_name, i_data INTO no_i_price, no_i_name, no_i_data
+        FROM item WHERE i_id = no_ol_i_id;
+        SELECT s_quantity, s_data, s_dist_01, s_dist_02, s_dist_03, s_dist_04, s_dist_05, s_dist_06, s_dist_07, s_dist_08, s_dist_09, s_dist_10
+        INTO no_s_quantity, no_s_data, no_s_dist_01, no_s_dist_02, no_s_dist_03, no_s_dist_04, no_s_dist_05, no_s_dist_06, no_s_dist_07, no_s_dist_08, no_s_dist_09, no_s_dist_10
+        FROM stock WHERE s_i_id = no_ol_i_id AND s_w_id = no_ol_supply_w_id;
+        IF ( no_s_quantity > no_ol_quantity )
+        THEN
+        SET no_s_quantity = ( no_s_quantity - no_ol_quantity );
+        ELSE
+        SET no_s_quantity = ( no_s_quantity - no_ol_quantity + 91 );
+        END IF;
+        UPDATE stock SET s_quantity = no_s_quantity
+        WHERE s_i_id = no_ol_i_id
+        AND s_w_id = no_ol_supply_w_id;
+        SET no_ol_amount = (  no_ol_quantity * no_i_price * ( 1 + no_w_tax + no_d_tax ) * ( 1 - no_c_discount ) );
+        CASE no_d_id
+        WHEN 1 THEN
+        SET no_ol_dist_info = no_s_dist_01;
+        WHEN 2 THEN
+        SET no_ol_dist_info = no_s_dist_02;
+        WHEN 3 THEN
+        SET no_ol_dist_info = no_s_dist_03;
+        WHEN 4 THEN
+        SET no_ol_dist_info = no_s_dist_04;
+        WHEN 5 THEN
+        SET no_ol_dist_info = no_s_dist_05;
+        WHEN 6 THEN
+        SET no_ol_dist_info = no_s_dist_06;
+        WHEN 7 THEN
+        SET no_ol_dist_info = no_s_dist_07;
+        WHEN 8 THEN
+        SET no_ol_dist_info = no_s_dist_08;
+        WHEN 9 THEN
+        SET no_ol_dist_info = no_s_dist_09;
+        WHEN 10 THEN
+        SET no_ol_dist_info = no_s_dist_10;
+        END CASE;
+        INSERT INTO order_line (ol_o_id, ol_d_id, ol_w_id, ol_number, ol_i_id, ol_supply_w_id, ol_quantity, ol_amount, ol_dist_info)
+        VALUES (o_id, no_d_id, no_w_id, loop_counter, no_ol_i_id, no_ol_supply_w_id, no_ol_quantity, no_ol_amount, no_ol_dist_info);
+        set loop_counter = loop_counter + 1;
+        END WHILE;
+        COMMIT;
+    END }
+    set sql(2) { CREATE PROCEDURE `DELIVERY`(
+        d_w_id			INTEGER,
+        d_o_carrier_id  	INTEGER,
+        IN timestamp 		DATETIME
+        )
+        BEGIN
+        DECLARE d_no_o_id	INTEGER;
+        DECLARE current_rowid 	INTEGER;
+        DECLARE d_d_id	    	INTEGER;
+        DECLARE d_c_id        	INTEGER;
+        DECLARE d_ol_total	INTEGER;
+        DECLARE deliv_data	VARCHAR(100);
+        DECLARE loop_counter  	INT;
+        DECLARE `Constraint Violation` CONDITION FOR SQLSTATE '23000';
+        DECLARE EXIT HANDLER FOR `Constraint Violation` ROLLBACK;
+        SET loop_counter = 1;
+        WHILE loop_counter <= 10 DO
+        SET d_d_id = loop_counter;
+        SELECT no_o_id INTO d_no_o_id FROM new_order WHERE no_w_id = d_w_id AND no_d_id = d_d_id LIMIT 1;
+        DELETE FROM new_order WHERE no_w_id = d_w_id AND no_d_id = d_d_id AND no_o_id = d_no_o_id;
+        SELECT o_c_id INTO d_c_id FROM orders
+        WHERE o_id = d_no_o_id AND o_d_id = d_d_id AND
+        o_w_id = d_w_id;
+        UPDATE orders SET o_carrier_id = d_o_carrier_id
+        WHERE o_id = d_no_o_id AND o_d_id = d_d_id AND
+        o_w_id = d_w_id;
+        UPDATE order_line SET ol_delivery_d = timestamp
+        WHERE ol_o_id = d_no_o_id AND ol_d_id = d_d_id AND
+        ol_w_id = d_w_id;
+        SELECT SUM(ol_amount) INTO d_ol_total
+        FROM order_line
+        WHERE ol_o_id = d_no_o_id AND ol_d_id = d_d_id
+        AND ol_w_id = d_w_id;
+        UPDATE customer SET c_balance = c_balance + d_ol_total
+        WHERE c_id = d_c_id AND c_d_id = d_d_id AND
+        c_w_id = d_w_id;
+        SET deliv_data = CONCAT(d_d_id,' ',d_no_o_id,' ',timestamp);
+        COMMIT;
+        set loop_counter = loop_counter + 1;
+        END WHILE;
+    END }
+    set sql(3) { CREATE PROCEDURE `PAYMENT` (
+        p_w_id			INTEGER,
+        p_d_id			INTEGER,
+        p_c_w_id		INTEGER,
+        p_c_d_id		INTEGER,
+        INOUT p_c_id		INTEGER,
+        byname			INTEGER,
+        p_h_amount		DECIMAL(6,2),
+        INOUT p_c_last	  	VARCHAR(16),
+        OUT p_w_street_1  	VARCHAR(20),
+        OUT p_w_street_2  	VARCHAR(20),
+        OUT p_w_city		VARCHAR(20),
+        OUT p_w_state		CHAR(2),
+        OUT p_w_zip		CHAR(9),
+        OUT p_d_street_1	VARCHAR(20),
+        OUT p_d_street_2	VARCHAR(20),
+        OUT p_d_city		VARCHAR(20),
+        OUT p_d_state		CHAR(2),
+        OUT p_d_zip		CHAR(9),
+        OUT p_c_first		VARCHAR(16),
+        OUT p_c_middle		CHAR(2),
+        OUT p_c_street_1	VARCHAR(20),
+        OUT p_c_street_2	VARCHAR(20),
+        OUT p_c_city		VARCHAR(20),
+        OUT p_c_state		CHAR(2),
+        OUT p_c_zip		CHAR(9),
+        OUT p_c_phone		CHAR(16),
+        OUT p_c_since		DATETIME,
+        INOUT p_c_credit	CHAR(2),
+        OUT p_c_credit_lim 	DECIMAL(12,2),
+        OUT p_c_discount	DECIMAL(4,4),
+        INOUT p_c_balance 	DECIMAL(12,2),
+        OUT p_c_data		VARCHAR(500),
+        IN timestamp		DATETIME
+        )
+        BEGIN
+        DECLARE done      	INT DEFAULT 0;
+        DECLARE	namecnt		INTEGER;
+        DECLARE p_d_name	VARCHAR(11);
+        DECLARE p_w_name	VARCHAR(11);
+        DECLARE p_c_new_data	VARCHAR(500);
+        DECLARE h_data		VARCHAR(30);
+        DECLARE loop_counter  	INT;
+        DECLARE `Constraint Violation` CONDITION FOR SQLSTATE '23000';
+        DECLARE c_byname CURSOR FOR
+        SELECT c_first, c_middle, c_id, c_street_1, c_street_2, c_city, c_state, c_zip, c_phone, c_credit, c_credit_lim, c_discount, c_balance, c_since
+        FROM customer
+        WHERE c_w_id = p_c_w_id AND c_d_id = p_c_d_id AND c_last = p_c_last
+        ORDER BY c_first;
+        DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
+        DECLARE EXIT HANDLER FOR `Constraint Violation` ROLLBACK;
+        START TRANSACTION;
+        UPDATE warehouse SET w_ytd = w_ytd + p_h_amount
+        WHERE w_id = p_w_id;
+        SELECT w_street_1, w_street_2, w_city, w_state, w_zip, w_name
+        INTO p_w_street_1, p_w_street_2, p_w_city, p_w_state, p_w_zip, p_w_name
+        FROM warehouse
+        WHERE w_id = p_w_id;
+        UPDATE district SET d_ytd = d_ytd + p_h_amount
+        WHERE d_w_id = p_w_id AND d_id = p_d_id;
+        SELECT d_street_1, d_street_2, d_city, d_state, d_zip, d_name
+        INTO p_d_street_1, p_d_street_2, p_d_city, p_d_state, p_d_zip, p_d_name
+        FROM district
+        WHERE d_w_id = p_w_id AND d_id = p_d_id;
+        IF (byname = 1)
+        THEN
+        SELECT count(c_id) INTO namecnt
+        FROM customer
+        WHERE c_last = p_c_last AND c_d_id = p_c_d_id AND c_w_id = p_c_w_id;
+        OPEN c_byname;
+        IF ( MOD (namecnt, 2) = 1 )
+        THEN
+        SET namecnt = (namecnt + 1);
+        END IF;
+        SET loop_counter = 0;
+        WHILE loop_counter <= (namecnt/2) DO
+        FETCH c_byname
+        INTO p_c_first, p_c_middle, p_c_id, p_c_street_1, p_c_street_2, p_c_city,
+        p_c_state, p_c_zip, p_c_phone, p_c_credit, p_c_credit_lim, p_c_discount, p_c_balance, p_c_since;
+        set loop_counter = loop_counter + 1;
+        END WHILE;
+        CLOSE c_byname;
+        ELSE
+        SELECT c_first, c_middle, c_last,
+        c_street_1, c_street_2, c_city, c_state, c_zip,
+        c_phone, c_credit, c_credit_lim,
+        c_discount, c_balance, c_since
+        INTO p_c_first, p_c_middle, p_c_last,
+        p_c_street_1, p_c_street_2, p_c_city, p_c_state, p_c_zip,
+        p_c_phone, p_c_credit, p_c_credit_lim,
+        p_c_discount, p_c_balance, p_c_since
+        FROM customer
+        WHERE c_w_id = p_c_w_id AND c_d_id = p_c_d_id AND c_id = p_c_id;
+        END IF;
+        SET p_c_balance = ( p_c_balance + p_h_amount );
+        IF p_c_credit = 'BC'
+        THEN
+        SELECT c_data INTO p_c_data
+        FROM customer
+        WHERE c_w_id = p_c_w_id AND c_d_id = p_c_d_id AND c_id = p_c_id;
+        SET h_data = CONCAT(p_w_name,' ',p_d_name);
+        SET p_c_new_data = CONCAT(CAST(p_c_id AS CHAR),' ',CAST(p_c_d_id AS CHAR),' ',CAST(p_c_w_id AS CHAR),' ',CAST(p_d_id AS CHAR),' ',CAST(p_w_id AS CHAR),' ',CAST(FORMAT(p_h_amount,2) AS CHAR),CAST(timestamp AS CHAR),h_data);
+        SET p_c_new_data = SUBSTR(CONCAT(p_c_new_data,p_c_data),1,500-(LENGTH(p_c_new_data)));
+        UPDATE customer
+        SET c_balance = p_c_balance, c_data = p_c_new_data
+        WHERE c_w_id = p_c_w_id AND c_d_id = p_c_d_id AND
+        c_id = p_c_id;
+        ELSE
+        UPDATE customer SET c_balance = p_c_balance
+        WHERE c_w_id = p_c_w_id AND c_d_id = p_c_d_id AND
+        c_id = p_c_id;
+        END IF;
+        SET h_data = CONCAT(p_w_name,' ',p_d_name);
+        INSERT INTO history (h_c_d_id, h_c_w_id, h_c_id, h_d_id, h_w_id, h_date, h_amount, h_data)
+        VALUES (p_c_d_id, p_c_w_id, p_c_id, p_d_id, p_w_id, timestamp, p_h_amount, h_data);
+        COMMIT;
+    END }
+    set sql(4) { CREATE PROCEDURE `OSTAT` (
+        os_w_id                 INTEGER,
+        os_d_id                 INTEGER,
+        INOUT os_c_id           INTEGER,
+        byname                  INTEGER,
+        INOUT os_c_last         VARCHAR(16),
+        OUT os_c_first          VARCHAR(16),
+        OUT os_c_middle         CHAR(2),
+        OUT os_c_balance        DECIMAL(12,2),
+        OUT os_o_id             INTEGER,
+        OUT os_entdate          DATETIME,
+        OUT os_o_carrier_id     INTEGER
+        )
+        BEGIN 
+        DECLARE  os_ol_i_id 	INTEGER;
+        DECLARE  os_ol_supply_w_id INTEGER;
+        DECLARE  os_ol_quantity INTEGER;
+        DECLARE  os_ol_amount 	INTEGER;
+        DECLARE  os_ol_delivery_d 	DATETIME;
+        DECLARE done            INT DEFAULT 0;
+        DECLARE namecnt         INTEGER;
+        DECLARE i               INTEGER;
+        DECLARE loop_counter    INT;
+        DECLARE no_order_status VARCHAR(100);
+        DECLARE os_ol_i_id_array VARCHAR(200);
+        DECLARE os_ol_supply_w_id_array VARCHAR(200);
+        DECLARE os_ol_quantity_array VARCHAR(200);
+        DECLARE os_ol_amount_array VARCHAR(200);
+        DECLARE os_ol_delivery_d_array VARCHAR(420);
+        DECLARE `Constraint Violation` CONDITION FOR SQLSTATE '23000';
+        DECLARE c_name CURSOR FOR
+        SELECT c_balance, c_first, c_middle, c_id
+        FROM customer
+        WHERE c_last = os_c_last AND c_d_id = os_d_id AND c_w_id = os_w_id
+        ORDER BY c_first;
+        DECLARE c_line CURSOR FOR
+        SELECT ol_i_id, ol_supply_w_id, ol_quantity,
+        ol_amount, ol_delivery_d
+        FROM order_line
+        WHERE ol_o_id = os_o_id AND ol_d_id = os_d_id AND ol_w_id = os_w_id;
+        DECLARE EXIT HANDLER FOR `Constraint Violation` ROLLBACK;
+        DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = 1;
+        set no_order_status = '';
+        set os_ol_i_id_array = 'CSV,';
+        set os_ol_supply_w_id_array = 'CSV,';
+        set os_ol_quantity_array = 'CSV,';
+        set os_ol_amount_array = 'CSV,';
+        set os_ol_delivery_d_array = 'CSV,';
+        IF ( byname = 1 )
+        THEN
+        SELECT count(c_id) INTO namecnt
+        FROM customer
+        WHERE c_last = os_c_last AND c_d_id = os_d_id AND c_w_id = os_w_id;
+        IF ( MOD (namecnt, 2) = 1 )
+        THEN
+        SET namecnt = (namecnt + 1);
+        END IF;
+        OPEN c_name;
+        SET loop_counter = 0;
+        WHILE loop_counter <= (namecnt/2) DO
+        FETCH c_name
+        INTO os_c_balance, os_c_first, os_c_middle, os_c_id;
+        set loop_counter = loop_counter + 1;
+        END WHILE;
+        close c_name;
+        ELSE
+        SELECT c_balance, c_first, c_middle, c_last
+        INTO os_c_balance, os_c_first, os_c_middle, os_c_last
+        FROM customer
+        WHERE c_id = os_c_id AND c_d_id = os_d_id AND c_w_id = os_w_id;
+        END IF;
+        set done = 0;
+        SELECT o_id, o_carrier_id, o_entry_d
+        INTO os_o_id, os_o_carrier_id, os_entdate
+        FROM
+        (SELECT o_id, o_carrier_id, o_entry_d
+        FROM orders where o_d_id = os_d_id AND o_w_id = os_w_id and o_c_id = os_c_id
+        ORDER BY o_id DESC) AS sb LIMIT 1;
+        IF done THEN
+        set no_order_status = 'No orders for customer';
+        END IF;
+        set done = 0;
+        set i = 0;
+        OPEN c_line;
+        REPEAT
+        FETCH c_line INTO os_ol_i_id, os_ol_supply_w_id, os_ol_quantity, os_ol_amount, os_ol_delivery_d;
+        IF NOT done THEN
+        set os_ol_i_id_array = CONCAT(os_ol_i_id_array,',',CAST(i AS CHAR),',',CAST(os_ol_i_id AS CHAR));
+        set os_ol_supply_w_id_array = CONCAT(os_ol_supply_w_id_array,',',CAST(i AS CHAR),',',CAST(os_ol_supply_w_id AS CHAR));
+        set os_ol_quantity_array = CONCAT(os_ol_quantity_array,',',CAST(i AS CHAR),',',CAST(os_ol_quantity AS CHAR));
+        set os_ol_amount_array = CONCAT(os_ol_amount_array,',',CAST(i AS CHAR),',',CAST(os_ol_amount AS CHAR));
+        set os_ol_delivery_d_array = CONCAT(os_ol_delivery_d_array,',',CAST(i AS CHAR),',',CAST(os_ol_delivery_d AS CHAR));
+        set i = i+1;
+        END IF;
+        UNTIL done END REPEAT;
+        CLOSE c_line;
+    END }
+    set sql(5) { CREATE PROCEDURE `SLEV` (
+        st_w_id                 INTEGER,
+        st_d_id                 INTEGER,
+        threshold               INTEGER,
+        OUT stock_count		INTEGER
+        )
+        BEGIN 
+        DECLARE st_o_id         INTEGER;
+        DECLARE `Constraint Violation` CONDITION FOR SQLSTATE '23000';
+        DECLARE EXIT HANDLER FOR `Constraint Violation` ROLLBACK;
+        DECLARE EXIT HANDLER FOR NOT FOUND ROLLBACK;
+        SELECT d_next_o_id INTO st_o_id
+        FROM district
+        WHERE d_w_id=st_w_id AND d_id=st_d_id;
+        SELECT COUNT(DISTINCT (s_i_id)) INTO stock_count
+        FROM order_line, stock
+        WHERE ol_w_id = st_w_id AND
+        ol_d_id = st_d_id AND (ol_o_id < st_o_id) AND
+        ol_o_id >= (st_o_id - 20) AND s_w_id = st_w_id AND
+        s_i_id = ol_i_id AND s_quantity < threshold;
+    END }
+    for { set i 1 } { $i <= 5 } { incr i } {
+        mysqlexec $mysql_handler $sql($i)
+    }
+    return
+}
+
+proc GatherStatistics { mysql_handler } {
+    puts "GATHERING SCHEMA STATISTICS"
+    set sql(1) "analyze table customer, district, history, item, new_order, orders, order_line, stock, warehouse"
+    mysqlexec $mysql_handler $sql(1)
+    return
+}
+
+proc CreateDatabase { mysql_handler db } {
+    puts "CREATING DATABASE $db"
+    set sql(1) "SET FOREIGN_KEY_CHECKS = 0"
+    set sql(2) "CREATE DATABASE IF NOT EXISTS `$db` CHARACTER SET latin1 COLLATE latin1_swedish_ci"
+    for { set i 1 } { $i <= 2 } { incr i } {
+        mysqlexec $mysql_handler $sql($i)
+    }
+    return
+}
+
+proc CreateTables { mysql_handler mysql_storage_engine num_part } {
+    puts "CREATING TPCC TABLES"
+    set sql(1) "CREATE TABLE `customer` (
+  `c_id` INT(5) NOT NULL,
+  `c_d_id` INT(2) NOT NULL,
+  `c_w_id` INT(6) NOT NULL,
+  `c_first` VARCHAR(16) BINARY NULL,
+  `c_middle` CHAR(2) BINARY NULL,
+  `c_last` VARCHAR(16) BINARY NULL,
+  `c_street_1` VARCHAR(20) BINARY NULL,
+  `c_street_2` VARCHAR(20) BINARY NULL,
+  `c_city` VARCHAR(20) BINARY NULL,
+  `c_state` CHAR(2) BINARY NULL,
+  `c_zip` CHAR(9) BINARY NULL,
+  `c_phone` CHAR(16) BINARY NULL,
+  `c_since` DATETIME NULL,
+  `c_credit` CHAR(2) BINARY NULL,
+  `c_credit_lim` DECIMAL(12, 2) NULL,
+  `c_discount` DECIMAL(4, 4) NULL,
+  `c_balance` DECIMAL(12, 2) NULL,
+  `c_ytd_payment` DECIMAL(12, 2) NULL,
+  `c_payment_cnt` INT(8) NULL,
+  `c_delivery_cnt` INT(8) NULL,
+  `c_data` VARCHAR(500) BINARY NULL,
+PRIMARY KEY (`c_w_id`,`c_d_id`,`c_id`),
+KEY c_w_id (`c_w_id`,`c_d_id`,`c_last`(16),`c_first`(16))
+)
+ENGINE = $mysql_storage_engine"
+    set sql(2) "CREATE TABLE `district` (
+  `d_id` INT(2) NOT NULL,
+  `d_w_id` INT(6) NOT NULL,
+  `d_ytd` DECIMAL(12, 2) NULL,
+  `d_tax` DECIMAL(4, 4) NULL,
+  `d_next_o_id` INT NULL,
+  `d_name` VARCHAR(10) BINARY NULL,
+  `d_street_1` VARCHAR(20) BINARY NULL,
+  `d_street_2` VARCHAR(20) BINARY NULL,
+  `d_city` VARCHAR(20) BINARY NULL,
+  `d_state` CHAR(2) BINARY NULL,
+  `d_zip` CHAR(9) BINARY NULL,
+PRIMARY KEY (`d_w_id`,`d_id`)
+)
+ENGINE = $mysql_storage_engine"
+    set sql(3) "CREATE TABLE `history` (
+  `h_c_id` INT NULL,
+  `h_c_d_id` INT NULL,
+  `h_c_w_id` INT NULL,
+  `h_d_id` INT NULL,
+  `h_w_id` INT NULL,
+  `h_date` DATETIME NULL,
+  `h_amount` DECIMAL(6, 2) NULL,
+  `h_data` VARCHAR(24) BINARY NULL
+)
+ENGINE = $mysql_storage_engine"
+    set sql(4) "CREATE TABLE `item` (
+  `i_id` INT(6) NOT NULL,
+  `i_im_id` INT NULL,
+  `i_name` VARCHAR(24) BINARY NULL,
+  `i_price` DECIMAL(5, 2) NULL,
+  `i_data` VARCHAR(50) BINARY NULL,
+PRIMARY KEY (`i_id`)
+)
+ENGINE = $mysql_storage_engine"
+    set sql(5) "CREATE TABLE `new_order` (
+  `no_w_id` INT NOT NULL,
+  `no_d_id` INT NOT NULL,
+  `no_o_id` INT NOT NULL,
+PRIMARY KEY (`no_w_id`, `no_d_id`, `no_o_id`)
+)
+ENGINE = $mysql_storage_engine"
+    set sql(6) "CREATE TABLE `orders` (
+  `o_id` INT NOT NULL,
+  `o_w_id` INT NOT NULL,
+  `o_d_id` INT NOT NULL,
+  `o_c_id` INT NULL,
+  `o_carrier_id` INT NULL,
+  `o_ol_cnt` INT NULL,
+  `o_all_local` INT NULL,
+  `o_entry_d` DATETIME NULL,
+PRIMARY KEY (`o_w_id`,`o_d_id`,`o_id`),
+KEY o_w_id (`o_w_id`,`o_d_id`,`o_c_id`,`o_id`)
+)
+ENGINE = $mysql_storage_engine"
+    if {$num_part eq 0} {
+        set sql(7) "CREATE TABLE `order_line` (
+  `ol_w_id` INT NOT NULL,
+  `ol_d_id` INT NOT NULL,
+  `ol_o_id` iNT NOT NULL,
+  `ol_number` INT NOT NULL,
+  `ol_i_id` INT NULL,
+  `ol_delivery_d` DATETIME NULL,
+  `ol_amount` INT NULL,
+  `ol_supply_w_id` INT NULL,
+  `ol_quantity` INT NULL,
+  `ol_dist_info` CHAR(24) BINARY NULL,
+PRIMARY KEY (`ol_w_id`,`ol_d_id`,`ol_o_id`,`ol_number`)
+)
+ENGINE = $mysql_storage_engine"
+    } else {
+        set sql(7) "CREATE TABLE `order_line` (
+  `ol_w_id` INT NOT NULL,
+  `ol_d_id` INT NOT NULL,
+  `ol_o_id` iNT NOT NULL,
+  `ol_number` INT NOT NULL,
+  `ol_i_id` INT NULL,
+  `ol_delivery_d` DATETIME NULL,
+  `ol_amount` INT NULL,
+  `ol_supply_w_id` INT NULL,
+  `ol_quantity` INT NULL,
+  `ol_dist_info` CHAR(24) BINARY NULL,
+PRIMARY KEY (`ol_w_id`,`ol_d_id`,`ol_o_id`,`ol_number`)
+)
+ENGINE = $mysql_storage_engine
+PARTITION BY HASH (`ol_w_id`)
+PARTITIONS $num_part"
+    }
+    set sql(8) "CREATE TABLE `stock` (
+  `s_i_id` INT(6) NOT NULL,
+  `s_w_id` INT(6) NOT NULL,
+  `s_quantity` INT(6) NULL,
+  `s_dist_01` CHAR(24) BINARY NULL,
+  `s_dist_02` CHAR(24) BINARY NULL,
+  `s_dist_03` CHAR(24) BINARY NULL,
+  `s_dist_04` CHAR(24) BINARY NULL,
+  `s_dist_05` CHAR(24) BINARY NULL,
+  `s_dist_06` CHAR(24) BINARY NULL,
+  `s_dist_07` CHAR(24) BINARY NULL,
+  `s_dist_08` CHAR(24) BINARY NULL,
+  `s_dist_09` CHAR(24) BINARY NULL,
+  `s_dist_10` CHAR(24) BINARY NULL,
+  `s_ytd` BIGINT(10) NULL,
+  `s_order_cnt` INT(6) NULL,
+  `s_remote_cnt` INT(6) NULL,
+  `s_data` VARCHAR(50) BINARY NULL,
+PRIMARY KEY (`s_w_id`,`s_i_id`)
+)
+ENGINE = $mysql_storage_engine"
+    set sql(9) "CREATE TABLE `warehouse` (
+  `w_id` INT(6) NOT NULL,
+  `w_ytd` DECIMAL(12, 2) NULL,
+  `w_tax` DECIMAL(4, 4) NULL,
+  `w_name` VARCHAR(10) BINARY NULL,
+  `w_street_1` VARCHAR(20) BINARY NULL,
+  `w_street_2` VARCHAR(20) BINARY NULL,
+  `w_city` VARCHAR(20) BINARY NULL,
+  `w_state` CHAR(2) BINARY NULL,
+  `w_zip` CHAR(9) BINARY NULL,
+PRIMARY KEY (`w_id`)
+)
+ENGINE = $mysql_storage_engine"
+    for { set i 1 } { $i <= 9 } { incr i } {
+        mysqlexec $mysql_handler $sql($i)
+    }
+    return
+}
+
+proc gettimestamp { } {
+    set tstamp [ clock format [ clock seconds ] -format %Y%m%d%H%M%S ]
+    return $tstamp
+}
+
+proc Customer { mysql_handler d_id w_id CUST_PER_DIST } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set namearr [list BAR OUGHT ABLE PRI PRES ESE ANTI CALLY ATION EING]
+    set chalen [ llength $globArray ]
+    set bld_cnt 1
+    set c_d_id $d_id
+    set c_w_id $w_id
+    set c_middle "OE"
+    set c_balance -10.0
+    set c_credit_lim 50000
+    set h_amount 10.0
+    puts "Loading Customer for DID=$d_id WID=$w_id"
+    for {set c_id 1} {$c_id <= $CUST_PER_DIST } {incr c_id } {
+        set c_first [ MakeAlphaString 8 16 $globArray $chalen ]
+        if { $c_id <= 1000 } {
+            set c_last [ Lastname [ expr {$c_id - 1} ] $namearr ]
+        } else {
+            set nrnd [ NURand 255 0 999 123 ]
+            set c_last [ Lastname $nrnd $namearr ]
+        }
+        set c_add [ MakeAddress $globArray $chalen ]
+        set c_phone [ MakeNumberString ]
+        if { [RandomNumber 0 1] eq 1 } {
+            set c_credit "GC"
+        } else {
+            set c_credit "BC"
+        }
+        set disc_ran [ RandomNumber 0 50 ]
+        set c_discount [ expr {$disc_ran / 100.0} ]
+        set c_data [ MakeAlphaString 300 500 $globArray $chalen ]
+        append c_val_list ('$c_id', '$c_d_id', '$c_w_id', '$c_first', '$c_middle', '$c_last', '[ lindex $c_add 0 ]', '[ lindex $c_add 1 ]', '[ lindex $c_add 2 ]', '[ lindex $c_add 3 ]', '[ lindex $c_add 4 ]', '$c_phone', str_to_date('[ gettimestamp ]','%Y%m%d%H%i%s'), '$c_credit', '$c_credit_lim', '$c_discount', '$c_balance', '$c_data', '10.0', '1', '0')
+        set h_data [ MakeAlphaString 12 24 $globArray $chalen ]
+        append h_val_list ('$c_id', '$c_d_id', '$c_w_id', '$c_w_id', '$c_d_id', str_to_date([ gettimestamp ],'%Y%m%d%H%i%s'), '$h_amount', '$h_data')
+        if { $bld_cnt<= 999 } { 
+            append c_val_list ,
+            append h_val_list ,
+        }
+        incr bld_cnt
+        if { ![ expr {$c_id % 1000} ] } {
+            mysql::exec $mysql_handler "insert into customer (`c_id`, `c_d_id`, `c_w_id`, `c_first`, `c_middle`, `c_last`, `c_street_1`, `c_street_2`, `c_city`, `c_state`, `c_zip`, `c_phone`, `c_since`, `c_credit`, `c_credit_lim`, `c_discount`, `c_balance`, `c_data`, `c_ytd_payment`, `c_payment_cnt`, `c_delivery_cnt`) values $c_val_list"
+            mysql::exec $mysql_handler "insert into history (`h_c_id`, `h_c_d_id`, `h_c_w_id`, `h_w_id`, `h_d_id`, `h_date`, `h_amount`, `h_data`) values $h_val_list"
+            mysql::commit $mysql_handler
+            set bld_cnt 1
+            unset c_val_list
+            unset h_val_list
+        }
+    }
+    puts "Customer Done"
+    return
+}
+
+proc Orders { mysql_handler d_id w_id MAXITEMS ORD_PER_DIST } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set chalen [ llength $globArray ]
+    set bld_cnt 1
+    puts "Loading Orders for D=$d_id W=$w_id"
+    set o_d_id $d_id
+    set o_w_id $w_id
+    for {set i 1} {$i <= $ORD_PER_DIST } {incr i } {
+        set cust($i) $i
+    }
+    for {set i 1} {$i <= $ORD_PER_DIST } {incr i } {
+        set r [ RandomNumber $i $ORD_PER_DIST ]
+        set t $cust($i)
+        set cust($i) $cust($r)
+        set $cust($r) $t
+    }
+    set e ""
+    for {set o_id 1} {$o_id <= $ORD_PER_DIST } {incr o_id } {
+        set o_c_id $cust($o_id)
+        set o_carrier_id [ RandomNumber 1 10 ]
+        set o_ol_cnt [ RandomNumber 5 15 ]
+        if { $o_id > 2100 } {
+            set e "o1"
+            append o_val_list ('$o_id', '$o_c_id', '$o_d_id', '$o_w_id', str_to_date([ gettimestamp ],'%Y%m%d%H%i%s'), null, '$o_ol_cnt', '1')
+            set e "no1"
+            append no_val_list ('$o_id', '$o_d_id', '$o_w_id')
+        } else {
+            set e "o3"
+            append o_val_list ('$o_id', '$o_c_id', '$o_d_id', '$o_w_id', str_to_date([ gettimestamp ],'%Y%m%d%H%i%s'), '$o_carrier_id', '$o_ol_cnt', '1')
+        }
+        for {set ol 1} {$ol <= $o_ol_cnt } {incr ol } {
+            set ol_i_id [ RandomNumber 1 $MAXITEMS ]
+            set ol_supply_w_id $o_w_id
+            set ol_quantity 5
+            set ol_amount 0.0
+            set ol_dist_info [ MakeAlphaString 24 24 $globArray $chalen ]
+            if { $o_id > 2100 } {
+                set e "ol1"
+                append ol_val_list ('$o_id', '$o_d_id', '$o_w_id', '$ol', '$ol_i_id', '$ol_supply_w_id', '$ol_quantity', '$ol_amount', '$ol_dist_info', null)
+                if { $bld_cnt<= 99 } { append ol_val_list , } else {
+                    if { $ol != $o_ol_cnt } { append ol_val_list , }
+                }
+            } else {
+                set amt_ran [ RandomNumber 10 10000 ]
+                set ol_amount [ expr {$amt_ran / 100.0} ]
+                set e "ol2"
+                append ol_val_list ('$o_id', '$o_d_id', '$o_w_id', '$ol', '$ol_i_id', '$ol_supply_w_id', '$ol_quantity', '$ol_amount', '$ol_dist_info', str_to_date([ gettimestamp ],'%Y%m%d%H%i%s'))
+                if { $bld_cnt<= 99 } { append ol_val_list , } else {
+                    if { $ol != $o_ol_cnt } { append ol_val_list , }
+                }
+            }
+        }
+        if { $bld_cnt<= 99 } {
+            append o_val_list ,
+            if { $o_id > 2100 } {
+                append no_val_list ,
+            }
+        }
+        incr bld_cnt
+        if { ![ expr {$o_id % 100} ] } {
+            if { ![ expr {$o_id % 1000} ] } {
+                puts "...$o_id"
+            }
+            mysql::exec $mysql_handler "insert into orders (`o_id`, `o_c_id`, `o_d_id`, `o_w_id`, `o_entry_d`, `o_carrier_id`, `o_ol_cnt`, `o_all_local`) values $o_val_list"
+            if { $o_id > 2100 } {
+                mysql::exec $mysql_handler "insert into new_order (`no_o_id`, `no_d_id`, `no_w_id`) values $no_val_list"
+            }
+            mysql::exec $mysql_handler "insert into order_line (`ol_o_id`, `ol_d_id`, `ol_w_id`, `ol_number`, `ol_i_id`, `ol_supply_w_id`, `ol_quantity`, `ol_amount`, `ol_dist_info`, `ol_delivery_d`) values $ol_val_list"
+            mysql::commit $mysql_handler 
+            set bld_cnt 1
+            unset o_val_list
+            unset -nocomplain no_val_list
+            unset ol_val_list
+        }
+    }
+    mysql::commit $mysql_handler 
+    puts "Orders Done"
+    return
+}
+
+proc LoadItems { mysql_handler MAXITEMS } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set chalen [ llength $globArray ]
+    puts "Loading Item"
+    for {set i 0} {$i < [ expr {$MAXITEMS/10} ] } {incr i } {
+        set orig($i) 0
+    }
+    for {set i 0} {$i < [ expr {$MAXITEMS/10} ] } {incr i } {
+        set pos [ RandomNumber 0 $MAXITEMS ] 
+        set orig($pos) 1
+    }
+    for {set i_id 1} {$i_id <= $MAXITEMS } {incr i_id } {
+        set i_im_id [ RandomNumber 1 10000 ] 
+        set i_name [ MakeAlphaString 14 24 $globArray $chalen ]
+        set i_price_ran [ RandomNumber 100 10000 ]
+        set i_price [ format "%4.2f" [ expr {$i_price_ran / 100.0} ] ]
+        set i_data [ MakeAlphaString 26 50 $globArray $chalen ]
+        if { [ info exists orig($i_id) ] } {
+            if { $orig($i_id) eq 1 } {
+                set first [ RandomNumber 0 [ expr {[ string length $i_data] - 8}] ]
+                set last [ expr {$first + 8} ]
+                set i_data [ string replace $i_data $first $last "original" ]
+            }
+        }
+        mysql::exec $mysql_handler "insert into item (`i_id`, `i_im_id`, `i_name`, `i_price`, `i_data`) VALUES ('$i_id', '$i_im_id', '$i_name', '$i_price', '$i_data')"
+        if { ![ expr {$i_id % 50000} ] } {
+            puts "Loading Items - $i_id"
+        }
+    }
+    mysql::commit $mysql_handler 
+    puts "Item done"
+    return
+}
+
+proc Stock { mysql_handler w_id MAXITEMS } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set chalen [ llength $globArray ]
+    set bld_cnt 1
+    puts "Loading Stock Wid=$w_id"
+    set s_w_id $w_id
+    for {set i 0} {$i < [ expr {$MAXITEMS/10} ] } {incr i } {
+        set orig($i) 0
+    }
+    for {set i 0} {$i < [ expr {$MAXITEMS/10} ] } {incr i } {
+        set pos [ RandomNumber 0 $MAXITEMS ] 
+        set orig($pos) 1
+    }
+    for {set s_i_id 1} {$s_i_id <= $MAXITEMS } {incr s_i_id } {
+        set s_quantity [ RandomNumber 10 100 ]
+        set s_dist_01 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_02 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_03 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_04 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_05 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_06 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_07 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_08 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_09 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_dist_10 [ MakeAlphaString 24 24 $globArray $chalen ]
+        set s_data [ MakeAlphaString 26 50 $globArray $chalen ]
+        if { [ info exists orig($s_i_id) ] } {
+            if { $orig($s_i_id) eq 1 } {
+                set first [ RandomNumber 0 [ expr {[ string length $s_data]} - 8 ] ]
+                set last [ expr {$first + 8} ]
+                set s_data [ string replace $s_data $first $last "original" ]
+            }
+        }
+        append val_list ('$s_i_id', '$s_w_id', '$s_quantity', '$s_dist_01', '$s_dist_02', '$s_dist_03', '$s_dist_04', '$s_dist_05', '$s_dist_06', '$s_dist_07', '$s_dist_08', '$s_dist_09', '$s_dist_10', '$s_data', '0', '0', '0')
+        if { $bld_cnt<= 999 } { 
+            append val_list ,
+        }
+        incr bld_cnt
+        if { ![ expr {$s_i_id % 1000} ] } {
+            mysql::exec $mysql_handler "insert into stock (`s_i_id`, `s_w_id`, `s_quantity`, `s_dist_01`, `s_dist_02`, `s_dist_03`, `s_dist_04`, `s_dist_05`, `s_dist_06`, `s_dist_07`, `s_dist_08`, `s_dist_09`, `s_dist_10`, `s_data`, `s_ytd`, `s_order_cnt`, `s_remote_cnt`) values $val_list"
+            mysql::commit $mysql_handler
+            set bld_cnt 1
+            unset val_list
+        }
+        if { ![ expr {$s_i_id % 20000} ] } {
+            puts "Loading Stock - $s_i_id"
+        }
+    }
+    mysql::commit $mysql_handler
+    puts "Stock done"
+    return
+}
+
+proc District { mysql_handler w_id DIST_PER_WARE } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set chalen [ llength $globArray ]
+    puts "Loading District"
+    set d_w_id $w_id
+    set d_ytd 30000.0
+    set d_next_o_id 3001
+    for {set d_id 1} {$d_id <= $DIST_PER_WARE } {incr d_id } {
+        set d_name [ MakeAlphaString 6 10 $globArray $chalen ]
+        set d_add [ MakeAddress $globArray $chalen ]
+        set d_tax_ran [ RandomNumber 10 20 ]
+        set d_tax [ string replace [ format "%.2f" [ expr {$d_tax_ran / 100.0} ] ] 0 0 "" ]
+        mysql::exec $mysql_handler "insert into district (`d_id`, `d_w_id`, `d_name`, `d_street_1`, `d_street_2`, `d_city`, `d_state`, `d_zip`, `d_tax`, `d_ytd`, `d_next_o_id`) values ('$d_id', '$d_w_id', '$d_name', '[ lindex $d_add 0 ]', '[ lindex $d_add 1 ]', '[ lindex $d_add 2 ]', '[ lindex $d_add 3 ]', '[ lindex $d_add 4 ]', '$d_tax', '$d_ytd', '$d_next_o_id')"
+    }
+    mysql::commit $mysql_handler 
+    puts "District done"
+    return
+}
+
+proc LoadWare { mysql_handler ware_start count_ware MAXITEMS DIST_PER_WARE } {
+    set globArray [ list 0 1 2 3 4 5 6 7 8 9 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z a b c d e f g h i j k l m n o p q r s t u v w x y z ]
+    set chalen [ llength $globArray ]
+    puts "Loading Warehouse"
+    set w_ytd 3000000.00
+    for {set w_id $ware_start } {$w_id <= $count_ware } {incr w_id } {
+        set w_name [ MakeAlphaString 6 10 $globArray $chalen ]
+        set add [ MakeAddress $globArray $chalen ]
+        set w_tax_ran [ RandomNumber 10 20 ]
+        set w_tax [ string replace [ format "%.2f" [ expr {$w_tax_ran / 100.0} ] ] 0 0 "" ]
+        mysql::exec $mysql_handler "insert into warehouse (`w_id`, `w_name`, `w_street_1`, `w_street_2`, `w_city`, `w_state`, `w_zip`, `w_tax`, `w_ytd`) values ('$w_id', '$w_name', '[ lindex $add 0 ]', '[ lindex $add 1 ]', '[ lindex $add 2 ]' , '[ lindex $add 3 ]', '[ lindex $add 4 ]', '$w_tax', '$w_ytd')"
+        Stock $mysql_handler $w_id $MAXITEMS
+        District $mysql_handler $w_id $DIST_PER_WARE
+        mysql::commit $mysql_handler 
+    }
+}
+
+proc LoadCust { mysql_handler ware_start count_ware CUST_PER_DIST DIST_PER_WARE } {
+    for {set w_id $ware_start} {$w_id <= $count_ware } {incr w_id } {
+        for {set d_id 1} {$d_id <= $DIST_PER_WARE } {incr d_id } {
+            Customer $mysql_handler $d_id $w_id $CUST_PER_DIST
+        }
+    }
+    mysql::commit $mysql_handler 
+    return
+}
+
+proc LoadOrd { mysql_handler ware_start count_ware MAXITEMS ORD_PER_DIST DIST_PER_WARE } {
+    for {set w_id $ware_start} {$w_id <= $count_ware } {incr w_id } {
+        for {set d_id 1} {$d_id <= $DIST_PER_WARE } {incr d_id } {
+            Orders $mysql_handler $d_id $w_id $MAXITEMS $ORD_PER_DIST
+        }
+    }
+    mysql::commit $mysql_handler 
+    return
+}
+
+proc chk_socket { host socket } {
+    if { ![string match windows $::tcl_platform(platform)] && ($host eq "127.0.0.1" || [ string tolower $host ] eq "localhost") && [ string tolower $socket ] != "null" } {
+        return "TRUE"
+    } else {
+        return "FALSE"
+    }
+}
+
+proc do_tpcc { host port socket count_ware user password db mysql_storage_engine partition num_vu } {
+    global mysqlstatus
+    set MAXITEMS 100000
+    set CUST_PER_DIST 3000
+    set DIST_PER_WARE 10
+    set ORD_PER_DIST 3000
+    if { $num_vu > $count_ware } { set num_vu $count_ware }
+    if { $num_vu > 1 && [ chk_thread ] eq "TRUE" } {
+        set threaded "MULTI-THREADED"
+        set rema [ lassign [ findvuposition ] myposition totalvirtualusers ]
+        switch $myposition {
+            1 {
+                puts "Monitor Thread"
+                if { $threaded eq "MULTI-THREADED" } {
+                    tsv::lappend common thrdlst monitor
+                    for { set th 1 } { $th <= $totalvirtualusers } { incr th } {
+                        tsv::lappend common thrdlst idle
+                    }
+                    tsv::set application load "WAIT"
+                }
+            }
+            default {
+                puts "Worker Thread"
+                if { [ expr $myposition - 1 ] > $count_ware } { puts "No Warehouses to Create"; return }
+            }
+        }
+    } else {
+        set threaded "SINGLE-THREADED"
+        set num_vu 1
+    }
+    if { $threaded eq "SINGLE-THREADED" ||  $threaded eq "MULTI-THREADED" && $myposition eq 1 } {
+        puts "CREATING [ string toupper $db ] SCHEMA"
+        if { [ chk_socket $host $socket ] eq "TRUE" } {
+            if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+                puts "the local socket connection to $socket could not be established"
+                set connected "FALSE"
+            } else {
+                set connected "TRUE"
+            } 
+        } else {
+            if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+                puts "the tcp connection to $host:$port could not be established"
+                set connected "FALSE"
+            } else {
+                set connected "TRUE"
+            } 
+        }
+        if {$connected} {
+            CreateDatabase $mysql_handler $db
+            mysqluse $mysql_handler $db
+            mysql::autocommit $mysql_handler 0
+            if { $partition eq "true" } {
+                if {$count_ware < 200} {
+                    set num_part 0
+                } else {
+                    set num_part [ expr round($count_ware/100) ]
+                }
+            } else {
+                set num_part 0
+            }
+            CreateTables $mysql_handler $mysql_storage_engine $num_part
+            CreateStoredProcs $mysql_handler
+        } else {
+            error $mysqlstatus(message)
+            return
+        }
+        if { $threaded eq "MULTI-THREADED" } {
+            tsv::set application load "READY"
+            LoadItems $mysql_handler $MAXITEMS
+            puts "Monitoring Workers..."
+            set prevactive 0
+            while 1 {
+                set idlcnt 0; set lvcnt 0; set dncnt 0;
+                for {set th 2} {$th <= $totalvirtualusers } {incr th} {
+                    switch [tsv::lindex common thrdlst $th] {
+                        idle { incr idlcnt }
+                        active { incr lvcnt }
+                        done { incr dncnt }
+                    }
+                }
+                if { $lvcnt != $prevactive } {
+                    puts "Workers: $lvcnt Active $dncnt Done"
+                }
+                set prevactive $lvcnt
+                if { $dncnt eq [expr  $totalvirtualusers - 1] } { break }
+                after 10000
+            }} else {
+            LoadItems $mysql_handler $MAXITEMS
+    }}
+    if { $threaded eq "SINGLE-THREADED" ||  $threaded eq "MULTI-THREADED" && $myposition != 1 } {
+        if { $threaded eq "MULTI-THREADED" } {
+            puts "Waiting for Monitor Thread..."
+            set mtcnt 0
+            while 1 {
+                if { [ tsv::exists application load ] } {
+                    incr mtcnt
+                    if {  [ tsv::get application load ] eq "READY" } { break }
+                    if { $mtcnt eq 48 } {
+                        puts "Monitor failed to notify ready state"
+                        return
+                    }
+                }
+                after 5000
+            }
+            if { [ chk_socket $host $socket ] eq "TRUE" } {
+                if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+                    puts "the local socket connection to $socket could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            } else {
+                if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+                    puts "the tcp connection to $host:$port could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            }
+            if {$connected} {
+                mysqluse $mysql_handler $db
+                mysql::autocommit $mysql_handler 0
+            } else {
+                error $mysqlstatus(message)
+                return
+            }
+            set remb [ lassign [ findchunk $num_vu $count_ware $myposition ] chunk mystart myend ]
+            puts "Loading $chunk Warehouses start:$mystart end:$myend"
+            tsv::lreplace common thrdlst $myposition $myposition active
+        } else {
+            set mystart 1
+            set myend $count_ware
+        }
+        puts "Start:[ clock format [ clock seconds ] ]"
+        LoadWare $mysql_handler $mystart $myend $MAXITEMS $DIST_PER_WARE
+        LoadCust $mysql_handler $mystart $myend $CUST_PER_DIST $DIST_PER_WARE
+        LoadOrd $mysql_handler $mystart $myend $MAXITEMS $ORD_PER_DIST $DIST_PER_WARE
+        puts "End:[ clock format [ clock seconds ] ]"
+        mysql::commit $mysql_handler 
+        if { $threaded eq "MULTI-THREADED" } {
+            tsv::lreplace common thrdlst $myposition $myposition done
+        }
+    }
+    if { $threaded eq "SINGLE-THREADED" || $threaded eq "MULTI-THREADED" && $myposition eq 1 } {
+        # CreateStoredProcs $mysql_handler
+        GatherStatistics $mysql_handler
+        puts "[ string toupper $db ] SCHEMA COMPLETE"
+        mysqlclose $mysql_handler
+        return
+    }
+}
+}
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end "do_tpcc $mysql_host $mysql_port $mysql_socket $mysql_count_ware $mysql_user $mysql_pass $mysql_dbase $mysql_storage_engine $mysql_partition $mysql_num_vu"
+    } else { return }
+}
+
+proc insert_mysqlconnectpool_drivescript { testtype timedtype } {
+    #When using connect pooling delete the existing portions of the script and replace with new connect pool version
+    set syncdrvt(1) {
+        #RUN TPC-C
+        #MySQL connect pool uses prepared statements
+        set prepare true
+        #Get Connect data as a dict
+        set cpool [ get_connect_xml mysql ]
+        #Extract connect data only from dict
+        set connectonly [ dict filter [ dict get $cpool connections ] key c? ]
+        #Extract the keys, this will be c1, c2 etc and determines number of connections
+        set conkeys [ dict keys $connectonly ]
+        #Loop through the keys of the connection parameters
+        dict for {id conparams} $connectonly {
+            #Set the parameters to variables named from the keys, this allows us to build the connect strings according to the database
+            dict with conparams {
+                #set MySQL connect string
+                set $id [ list $mysql_host $mysql_port $mysql_socket $mysql_user $mysql_pass $mysql_dbase ]
+            }
+        }
+        #For the connect keys c1, c2 etc make a connection
+        foreach id [ split $conkeys ] {
+            lassign [ set $id ] 1 2 3 4 5 6
+            dict set connlist $id [ set mysql_handler$id [ ConnectToMySQL $1 $2 $3 $4 $5 $6 ] ]
+            if {  [ set mysql_handler$id ] eq "Failed" } {
+                puts "error, the database connection to $1 could not be established"
+            }
+        }
+        #Extract which storedprocedures use which connection
+        foreach sproc [ dict keys [ dict get $cpool sprocs ] ] { 
+            unset -nocomplain clist
+            #Extract the policy for the storedprocedures
+            set $sproc\_policy [ dict get $cpool sprocs $sproc policy ]
+            foreach sp [ dict get $cpool sprocs $sproc connections ] {
+                lappend clist [ dict get $connlist $sp ]
+            }
+            set newname "cs$sproc"
+            unset -nocomplain $newname
+            lappend $newname $clist
+        }
+        #Prepare statements multiple times for stored procedure for each connection and add to cursor list
+        foreach st {neword_st payment_st delivery_st slev_st ostat_st} cslist {csneworder cspayment csdelivery csstocklevel csorderstatus} cursor_list { neworder_cursors payment_cursors delivery_cursors stocklevel_cursors orderstatus_cursors } len { nolen pylen dllen sllen oslen } cnt { nocnt pycnt dlcnt slcnt oscnt } { 
+            unset -nocomplain $cursor_list
+            set curcnt 0
+            #For all of the connections
+            foreach mysql_handler [ join [ set $cslist ] ] {
+                #Create a cursor name
+                set cursor [ concat $st\_$curcnt ]
+                #Prepare a statement under the cursor name
+                set $st [ prep_statement $mysql_handler $st ]
+                incr curcnt
+                #Add it to a list of cursors for that stored procedure
+                lappend $cursor_list $cursor
+            }
+            #Record the number of cursors
+            set $len [ llength  [ set $cursor_list ] ]
+            #Initialise number of executions 
+            set $cnt 0
+            #For MySQL cursor names are placeholders to choose the correct policy. The placeholder is then used to select the connection. The prepared statements are always called neword_st, payment_st etc for each connection
+            #puts "sproc_cur:$st connections:[ set $cslist ] cursors:[set $cursor_list] number of cursors:[set $len] execs:[set $cnt]"
+        }
+        #Open standalone connect to determine highest warehouse id for all connections
+        set mmysql_handler [ ConnectToMySQL $host $port $socket $user $password $db ]
+        set w_id_input [ list [ mysql::sel $mmysql_handler "select max(w_id) from warehouse" -list ] ]
+        #2.4.1.1 set warehouse_id stays constant for a given terminal
+        set w_id  [ RandomNumber 1 $w_id_input ]  
+        set d_id_input [ list [ mysql::sel $mmysql_handler "select max(d_id) from district" -list ] ]
+        set stock_level_d_id  [ RandomNumber 1 $d_id_input ]  
+        puts "Processing $total_iterations transactions without output suppressed..."
+        set abchk 1; set abchk_mx 1024; set hi_t [ expr {pow([ lindex [ time {if {  [ tsv::get application abort ]  } { break }} ] 0 ],2)}]
+        for {set it 0} {$it < $total_iterations} {incr it} {
+            if { [expr {$it % $abchk}] eq 0 } { if { [ time {if {  [ tsv::get application abort ]  } { break }} ] > $hi_t }  {  set  abchk [ expr {min(($abchk * 2), $abchk_mx)}]; set hi_t [ expr {$hi_t * 2} ] } }
+            set choice [ RandomNumber 1 23 ]
+            if {$choice <= 10} {
+                puts "new order"
+                if { $KEYANDTHINK } { keytime 18 }
+                set curn_no [ pick_cursor $neworder_policy [ join $neworder_cursors ] $nocnt $nolen ]
+                set cursor_position [ lsearch $neworder_cursors $curn_no ]
+                set mysql_handler_no [ lindex [ join $csneworder ] $cursor_position ]
+                neword $mysql_handler_no $w_id $w_id_input $prepare $RAISEERROR
+                incr nocnt
+                if { $KEYANDTHINK } { thinktime 12 }
+            } elseif {$choice <= 20} {
+                puts "payment"
+                if { $KEYANDTHINK } { keytime 3 }
+                set curn_py [ pick_cursor $payment_policy [ join $payment_cursors ] $pycnt $pylen ]
+                set cursor_position [ lsearch $payment_cursors $curn_py ]
+                set mysql_handler_py [ lindex [ join $cspayment ] $cursor_position ]
+                payment $mysql_handler_py $w_id $w_id_input $prepare $RAISEERROR
+                incr pycnt
+                if { $KEYANDTHINK } { thinktime 12 }
+            } elseif {$choice <= 21} {
+                puts "delivery"
+                if { $KEYANDTHINK } { keytime 2 }
+                set curn_dl [ pick_cursor $delivery_policy [ join $delivery_cursors ] $dlcnt $dllen ]
+                set cursor_position [ lsearch $delivery_cursors $curn_dl ]
+                set mysql_handler_dl [ lindex [ join $csdelivery ] $cursor_position ]
+                delivery $mysql_handler_dl $w_id $prepare $RAISEERROR
+                incr dlcnt
+                if { $KEYANDTHINK } { thinktime 10 }
+            } elseif {$choice <= 22} {
+                puts "stock level"
+                if { $KEYANDTHINK } { keytime 2 }
+                set curn_sl [ pick_cursor $stocklevel_policy [ join $stocklevel_cursors ] $slcnt $sllen ]
+                set cursor_position [ lsearch $stocklevel_cursors $curn_sl ]
+                set mysql_handler_sl [ lindex [ join $csstocklevel ] $cursor_position ]
+                slev $mysql_handler_sl $w_id $stock_level_d_id $prepare $RAISEERROR
+                incr slcnt
+                if { $KEYANDTHINK } { thinktime 5 }
+            } elseif {$choice <= 23} {
+                puts "order status"
+                if { $KEYANDTHINK } { keytime 2 }
+                set curn_os [ pick_cursor $orderstatus_policy [ join $orderstatus_cursors ] $oscnt $oslen ]
+                set cursor_position [ lsearch $orderstatus_cursors $curn_os ]
+                set mysql_handler_os [ lindex [ join $csorderstatus ] $cursor_position ]
+                ostat $mysql_handler_os $w_id $prepare $RAISEERROR
+                incr oscnt
+                if { $KEYANDTHINK } { thinktime 5 }
+            }
+        }
+        foreach mysql_handler [ dict values $connlist ] { 
+            if {$prepare} {
+                foreach st {neword_st payment_st delivery_st slev_st ostat_st} { 
+                    catch {mysqlexec $mysql_handler "deallocate prepare $st"}
+                }
+            }
+            mysqlclose $mysql_handler
+        }
+        mysqlclose $mmysql_handler
+    }
+    #Find single connection start and end points
+    set syncdrvi(1a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "#RUN TPC-C" end ]
+    set syncdrvi(1b) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "mysqlclose \$mysql_handler" end ]
+    #puts "indexes are $syncdrvi(1a) and $syncdrvi(1b)"
+    #Delete text from start and end points
+    .ed_mainFrame.mainwin.textFrame.left.text fastdelete $syncdrvi(1a) $syncdrvi(1b)+1l
+    #Replace with connect pool version
+    .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(1a) $syncdrvt(1)
+    if { $testtype eq "timed" } {
+        #Diff between test and time sync scripts are the "puts stored proc lines", output suppressed
+        foreach line { {puts "new order"} {puts "payment"} {puts "delivery"} {puts "stock level"} {puts "order status"} } {
+            #find start of line
+            set index [.ed_mainFrame.mainwin.textFrame.left.text search -backwards $line end ]
+            #delete to end of line including newline
+            .ed_mainFrame.mainwin.textFrame.left.text fastdelete $index "$index lineend + 1 char"
+        }
+        foreach line {{"Processing $total_iterations transactions without output suppressed..."}} timedline {{"Processing $total_iterations transactions with output suppressed..."}} {
+            set index [.ed_mainFrame.mainwin.textFrame.left.text search -backwards $line end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastdelete $index "$index lineend + 1 char"
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $index "$timedline \n"
+        }
+        if { $timedtype eq "async" } {
+            set syncdrvt(3) {for {set it 0} {$it < $total_iterations} {incr it} {
+                    if { [expr {$it % $abchk}] eq 0 } { if { [ time {if {  [ tsv::get application abort ]  } { break }} ] > $hi_t }  {  set  abchk [ expr {min(($abchk * 2), $abchk_mx)}]; set hi_t [ expr {$hi_t * 2} ] } }
+                    set choice [ RandomNumber 1 23 ]
+                    if {$choice <= 10} {
+                        if { $async_verbose } { puts "$clientname:w_id:$w_id:neword" }
+                        if { $KEYANDTHINK } { async_keytime 18  $clientname neword $async_verbose }
+                        set curn_no [ pick_cursor $neworder_policy [ join $neworder_cursors ] $nocnt $nolen ]
+                        set cursor_position [ lsearch $neworder_cursors $curn_no ]
+                        set mysql_handler_no [ lindex [ join $csneworder ] $cursor_position ]
+                        neword $mysql_handler_no $w_id $w_id_input $prepare $RAISEERROR $clientname
+                        incr nocnt
+                        if { $KEYANDTHINK } { async_thinktime 12 $clientname neword $async_verbose }
+                    } elseif {$choice <= 20} {
+                        if { $async_verbose } { puts "$clientname:w_id:$w_id:payment" }
+                        if { $KEYANDTHINK } { async_keytime 3 $clientname payment $async_verbose }
+                        set curn_py [ pick_cursor $payment_policy [ join $payment_cursors ] $pycnt $pylen ]
+                        set cursor_position [ lsearch $payment_cursors $curn_py ]
+                        set mysql_handler_py [ lindex [ join $cspayment ] $cursor_position ]
+                        payment $mysql_handler_py $w_id $w_id_input $prepare $RAISEERROR $clientname
+                        incr pycnt
+                        if { $KEYANDTHINK } { async_thinktime 12 $clientname payment $async_verbose }
+                    } elseif {$choice <= 21} {
+                        if { $async_verbose } { puts "$clientname:w_id:$w_id:delivery" }
+                        if { $KEYANDTHINK } { async_keytime 2 $clientname delivery $async_verbose }
+                        set curn_dl [ pick_cursor $delivery_policy [ join $delivery_cursors ] $dlcnt $dllen ]
+                        set cursor_position [ lsearch $delivery_cursors $curn_dl ]
+                        set mysql_handler_dl [ lindex [ join $csdelivery ] $cursor_position ]
+                        delivery $mysql_handler_dl $w_id $prepare $RAISEERROR $clientname
+                        incr dlcnt
+                        if { $KEYANDTHINK } { async_thinktime 10 $clientname delivery $async_verbose }
+                    } elseif {$choice <= 22} {
+                        if { $async_verbose } { puts "$clientname:w_id:$w_id:slev" }
+                        if { $KEYANDTHINK } { async_keytime 2 $clientname slev $async_verbose }
+                        set curn_sl [ pick_cursor $stocklevel_policy [ join $stocklevel_cursors ] $slcnt $sllen ]
+                        set cursor_position [ lsearch $stocklevel_cursors $curn_sl ]
+                        set mysql_handler_sl [ lindex [ join $csstocklevel ] $cursor_position ]
+                        slev $mysql_handler_sl $w_id $stock_level_d_id $prepare $RAISEERROR $clientname
+                        incr slcnt
+                        if { $KEYANDTHINK } { async_thinktime 5 $clientname slev $async_verbose }
+                    } elseif {$choice <= 23} {
+                        if { $async_verbose } { puts "$clientname:w_id:$w_id:ostat" }
+                        if { $KEYANDTHINK } { async_keytime 2 $clientname ostat $async_verbose }
+                        set curn_os [ pick_cursor $orderstatus_policy [ join $orderstatus_cursors ] $oscnt $oslen ]
+                        set cursor_position [ lsearch $orderstatus_cursors $curn_os ]
+                        set mysql_handler_os [ lindex [ join $csorderstatus ] $cursor_position ]
+                        ostat $mysql_handler_os $w_id $prepare $RAISEERROR $clientname
+                        incr oscnt
+                        if { $KEYANDTHINK } { async_thinktime 5 $clientname ostat $async_verbose }
+                    }
+                }
+            }
+            set syncdrvi(3a) [.ed_mainFrame.mainwin.textFrame.left.text search -forwards "for {set it 0}" 1.0 ]
+            set syncdrvi(3b) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "foreach mysql_handler \[ dict values \$connlist \]" end ]
+            #End of run loop is previous line
+            set syncdrvi(3b) [ expr $syncdrvi(3b) - 1 ]
+            #Delete run loop
+            .ed_mainFrame.mainwin.textFrame.left.text fastdelete $syncdrvi(3a) $syncdrvi(3b)+1l
+            #Replace with asynchronous connect pool version
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(3a) $syncdrvt(3)
+            #Remove extra async connection
+            set syncdrvi(7a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "#Open standalone connect to determine highest warehouse id for all connections" end ]
+            set syncdrvi(7b) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards {set mmysql_handler [ ConnectToMySQL $host $port $socket $user $password $db ]} end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastdelete $syncdrvi(7a) $syncdrvi(7b)+1l
+            #Replace individual lines for Asynch
+            foreach line {{set mysql_handler [ ConnectToMySQLAsynch $host $port $socket $user $password $db $clientname $async_verbose ]} {dict set connlist $id [ set mysql_handler$id [ ConnectToMySQL $1 $2 $3 $4 $5 $6 ] ]} {#puts "sproc_cur:$st connections:[ set $cslist ] cursors:[set $cursor_list] number of cursors:[set $len] execs:[set $cnt]"}} asynchline {{set mmysql_handler [ ConnectToMySQLAsynch $host $port $socket $user $password $db $clientname $async_verbose ]} {dict set connlist $id [ set mysql_handler$id [ ConnectToMySQLAsynch $1 $2 $3 $4 $5 $6 $clientname $async_verbose ] ]} {#puts "$clientname:sproc_cur:$st connections:[ set $cslist ] cursors:[set $cursor_list] number of cursors:[set $len] execs:[set $cnt]"}} {
+                set index [.ed_mainFrame.mainwin.textFrame.left.text search -backwards $line end ]
+                .ed_mainFrame.mainwin.textFrame.left.text fastdelete $index "$index lineend + 1 char"
+                .ed_mainFrame.mainwin.textFrame.left.text fastinsert $index "$asynchline \n"
+            }
+            #Add client side counters for timed async only this is different from non-async
+            set syncdrvt(4) {initializeclientcountasync $totalvirtualusers $async_client
+            }
+            set syncdrvt(5) {getclienttpmasync $rampup $duration $totalvirtualusers $async_client
+            }
+            set syncdrvt(6) {printclientcountasync $clientname $nocnt $pycnt $dlcnt $slcnt $oscnt
+            }
+            set syncdrvi(4a) [.ed_mainFrame.mainwin.textFrame.left.text search -forwards "set ramptime 0" 1.0 ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(4a) $syncdrvt(4)
+            set syncdrvi(5a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "tsv::set application abort 1" end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(5a)+1l $syncdrvt(5)
+            set syncdrvi(6a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards {foreach mysql_handler [ dict values $connlist ]} end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(6a) $syncdrvt(6)
+        } else {
+            #Add client side counters for timed non-async only
+            set syncdrvt(4) {initializeclientcountsync $totalvirtualusers
+            }
+            set syncdrvt(5) {getclienttpmsync $rampup $duration $totalvirtualusers
+            }
+            set syncdrvt(6) {printclientcountsync $myposition $nocnt $pycnt $dlcnt $slcnt $oscnt
+            }
+            set syncdrvi(4a) [.ed_mainFrame.mainwin.textFrame.left.text search -forwards "set ramptime 0" 1.0 ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(4a) $syncdrvt(4)
+            set syncdrvi(5a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards "tsv::set application abort 1" end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(5a)+1l $syncdrvt(5)
+            set syncdrvi(6a) [.ed_mainFrame.mainwin.textFrame.left.text search -backwards {foreach mysql_handler [ dict values $connlist ]} end ]
+            .ed_mainFrame.mainwin.textFrame.left.text fastinsert $syncdrvi(6a) $syncdrvt(6)
+        }
+    }
+}
+
+proc loadmysqltpcc { } {
+    global _ED
+    upvar #0 dbdict dbdict
+    if {[dict exists $dbdict mysql library ]} {
+        set library [ dict get $dbdict mysql library ]
+    } else { set library "mysqltcl" }
+    upvar #0 configmysql configmysql
+    #set variables to values in dict
+    setlocaltpccvars $configmysql
+    ed_edit_clear
+    .ed_mainFrame.notebook select .ed_mainFrame.mainwin
+    set _ED(packagekeyname) "MySQL TPROC-C"
+    .ed_mainFrame.mainwin.textFrame.left.text fastinsert end "#!/usr/local/bin/tclsh8.6
+#EDITABLE OPTIONS##################################################
+set library $library ;# MySQL Library
+global mysqlstatus
+set total_iterations $mysql_total_iterations ;# Number of transactions before logging off
+set RAISEERROR \"$mysql_raiseerror\" ;# Exit script on MySQL error (true or false)
+set KEYANDTHINK \"$mysql_keyandthink\" ;# Time for user thinking and keying (true or false)
+set host \"$mysql_host\" ;# Address of the server hosting MySQL 
+set port \"$mysql_port\" ;# Port of the MySQL Server, defaults to 3306
+set socket \"$mysql_socket\" ;# MySQL Socket for local connections
+set user \"$mysql_user\" ;# MySQL user
+set password \"$mysql_pass\" ;# Password for the MySQL user
+set db \"$mysql_dbase\" ;# Database containing the TPC Schema
+set prepare \"$mysql_prepared\" ;# Use prepared statements
+#EDITABLE OPTIONS##################################################
+"
+    .ed_mainFrame.mainwin.textFrame.left.text fastinsert end {#LOAD LIBRARIES AND MODULES
+if [catch {package require $library} message] { error "Failed to load $library - $message" }
+if [catch {::tcl::tm::path add modules} ] { error "Failed to find modules directory" }
+if [catch {package require tpcccommon} ] { error "Failed to load tpcc common functions" } else { namespace import tpcccommon::* }
+#TIMESTAMP
+proc gettimestamp { } {
+    set tstamp [ clock format [ clock seconds ] -format %Y%m%d%H%M%S ]
+    return $tstamp
+}
+proc chk_socket { host socket } {
+    if { ![string match windows $::tcl_platform(platform)] && ($host eq "127.0.0.1" || [ string tolower $host ] eq "localhost") && [ string tolower $socket ] != "null" } {
+        return "TRUE"
+    } else {
+        return "FALSE"
+    }
+}
+
+proc ConnectToMySQL { host port socket user password db } {
+    global mysqlstatus
+    if { [ chk_socket $host $socket ] eq "TRUE" } {
+        if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+            puts "the local socket connection to $socket could not be established"
+            set connected "FALSE"
+        } else {
+            set connected "TRUE"
+        }
+    } else {
+        if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+            puts "the tcp connection to $host:$port could not be established"
+            set connected "FALSE"
+        } else {
+            set connected "TRUE"
+        }
+    }
+    if {$connected} {
+        mysqluse $mysql_handler $db
+        mysql::autocommit $mysql_handler 0
+        return $mysql_handler
+    } else {
+        error $mysqlstatus(message)
+        return
+    }
+}
+
+#NEW ORDER
+proc neword { mysql_handler no_w_id w_id_input prepare RAISEERROR } {
+    global mysqlstatus
+    #open new order cursor
+    #2.4.1.2 select district id randomly from home warehouse where d_w_id = d_id
+    set no_d_id [ RandomNumber 1 10 ]
+    #2.4.1.2 Customer id randomly selected where c_d_id = d_id and c_w_id = w_id
+    set no_c_id [ RandomNumber 1 3000 ]
+    #2.4.1.3 Items in the order randomly selected from 5 to 15
+    set ol_cnt [ RandomNumber 5 15 ]
+    #2.4.1.6 order entry date O_ENTRY_D generated by SUT
+    set date [ gettimestamp ]
+    if {$prepare} {
+        catch {mysqlexec $mysql_handler "set @no_w_id=$no_w_id,@w_id_input=$w_id_input,@no_d_id=$no_d_id,@no_c_id=$no_c_id,@ol_cnt=$ol_cnt,@next_o_id=0,@date=str_to_date($date,'%Y%m%d%H%i%s')"}
+        catch {mysqlexec $mysql_handler "execute neword_st using @no_w_id,@w_id_input,@no_d_id,@no_c_id,@ol_cnt,@next_o_id,@date"}
+    } else {
+        mysqlexec $mysql_handler "set @next_o_id = 0"
+        catch { mysqlexec $mysql_handler "CALL NEWORD($no_w_id,$w_id_input,$no_d_id,$no_c_id,$ol_cnt,@disc,@last,@credit,@dtax,@wtax,@next_o_id,str_to_date($date,'%Y%m%d%H%i%s'))" }
+    }
+    if { $mysqlstatus(code)  } {
+        if { $RAISEERROR } {
+            error "New Order : $mysqlstatus(message)"
+        } else { puts $mysqlstatus(message) 
+        } 
+    } else {
+        puts [ join [ mysql::sel $mysql_handler "select @disc,@last,@credit,@dtax,@wtax,@next_o_id" -list ] ]
+    }
+}
+#PAYMENT
+proc payment { mysql_handler p_w_id w_id_input prepare RAISEERROR } {
+    global mysqlstatus
+    #2.5.1.1 The home warehouse id remains the same for each terminal
+    #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+    set p_d_id [ RandomNumber 1 10 ]
+    #2.5.1.2 customer selected 60% of time by name and 40% of time by number
+    set x [ RandomNumber 1 100 ]
+    set y [ RandomNumber 1 100 ]
+    if { $x <= 85 } {
+        set p_c_d_id $p_d_id
+        set p_c_w_id $p_w_id
+    } else {
+        #use a remote warehouse
+        set p_c_d_id [ RandomNumber 1 10 ]
+        set p_c_w_id [ RandomNumber 1 $w_id_input ]
+        while { ($p_c_w_id == $p_w_id) && ($w_id_input != 1) } {
+            set p_c_w_id [ RandomNumber 1  $w_id_input ]
+        }
+    }
+    set nrnd [ NURand 255 0 999 123 ]
+    set name [ randname $nrnd ]
+    set p_c_id [ RandomNumber 1 3000 ]
+    if { $y <= 60 } {
+        #use customer name
+        #C_LAST is generated
+        set byname 1
+    } else {
+        #use customer number
+        set byname 0
+        set name {}
+    }
+    #2.5.1.3 random amount from 1 to 5000
+    set p_h_amount [ RandomNumber 1 5000 ]
+    #2.5.1.4 date selected from SUT
+    set h_date [ gettimestamp ]
+    #2.5.2.1 Payment Transaction
+    if {$prepare} {
+        catch {mysqlexec $mysql_handler "set @p_w_id=$p_w_id,@p_d_id=$p_d_id,@p_c_w_id=$p_c_w_id,@p_c_d_id=$p_c_d_id,@p_c_id=$p_c_id,@byname=$byname,@p_h_amount=$p_h_amount,@p_c_last='$name',@p_c_credit=0,@p_c_balance=0,@h_date=str_to_date($h_date,'%Y%m%d%H%i%s')"}
+        catch { mysqlexec $mysql_handler "execute payment_st using @p_w_id,@p_d_id,@p_c_w_id,@p_c_d_id,@p_c_id,@byname,@p_h_amount,@p_c_last,@p_c_credit,@p_c_balance,@h_date"}
+    } else {
+        mysqlexec $mysql_handler "set @p_c_id = $p_c_id, @p_c_last = '$name', @p_c_credit = 0, @p_c_balance = 0"
+        catch { mysqlexec $mysql_handler "CALL PAYMENT($p_w_id,$p_d_id,$p_c_w_id,$p_c_d_id,@p_c_id,$byname,$p_h_amount,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data,str_to_date($h_date,'%Y%m%d%H%i%s'))"}
+    }
+    if { $mysqlstatus(code) } {
+        if { $RAISEERROR } {
+            error "Payment : $mysqlstatus(message)"
+        } else { puts $mysqlstatus(message) 
+        } 
+    } else {
+        puts [ join [ mysql::sel $mysql_handler "select @p_c_id,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data" -list ] ]
+    }
+}
+#ORDER_STATUS
+proc ostat { mysql_handler w_id prepare RAISEERROR } {
+    global mysqlstatus
+    #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+    set d_id [ RandomNumber 1 10 ]
+    set nrnd [ NURand 255 0 999 123 ]
+    set name [ randname $nrnd ]
+    set c_id [ RandomNumber 1 3000 ]
+    set y [ RandomNumber 1 100 ]
+    if { $y <= 60 } {
+        set byname 1
+    } else {
+        set byname 0
+        set name {}
+    }
+    if {$prepare} {
+        catch {mysqlexec $mysql_handler "set @os_w_id=$w_id,@dos_d_id=$d_id,@os_c_id=$c_id,@byname=$byname,@os_c_last='$name'"}
+        catch {mysqlexec $mysql_handler "execute ostat_st using @os_w_id,@dos_d_id,@os_c_id,@byname,@os_c_last"}
+    } else {
+        mysqlexec $mysql_handler "set @os_c_id = $c_id, @os_c_last = '$name'"
+        catch { mysqlexec $mysql_handler "CALL OSTAT($w_id,$d_id,@os_c_id,$byname,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)"}
+    }
+    if { $mysqlstatus(code) } {
+        if { $RAISEERROR } {
+            error "Order Status : $mysqlstatus(message)"
+        } else { puts $mysqlstatus(message) 
+        } 
+    } else {
+        puts [ join [ mysql::sel $mysql_handler "select @os_c_id,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id" -list ] ]
+    }
+}
+#DELIVERY
+proc delivery { mysql_handler w_id prepare RAISEERROR } {
+    global mysqlstatus
+    set carrier_id [ RandomNumber 1 10 ]
+    set date [ gettimestamp ]
+    if {$prepare} {
+        catch {mysqlexec $mysql_handler "set @d_w_id=$w_id,@d_o_carrier_id=$carrier_id,@timestamp=str_to_date($date,'%Y%m%d%H%i%s')"}
+        catch {mysqlexec $mysql_handler "execute delivery_st using @d_w_id,@d_o_carrier_id,@timestamp"}
+    } else {
+        catch { mysqlexec $mysql_handler "CALL DELIVERY($w_id,$carrier_id,str_to_date($date,'%Y%m%d%H%i%s'))"}
+    }
+    if { $mysqlstatus(code) } {
+        if { $RAISEERROR } {
+            error "Delivery : $mysqlstatus(message)"
+        } else { puts $mysqlstatus(message) 
+        } 
+    } else {
+        puts "$w_id $carrier_id $date"
+    }
+}
+#STOCK LEVEL
+proc slev { mysql_handler w_id stock_level_d_id prepare RAISEERROR } {
+    global mysqlstatus
+    set threshold [ RandomNumber 10 20 ]
+    if {$prepare} {
+        catch {mysqlexec $mysql_handler "set @st_w_id=$w_id,@st_d_id=$stock_level_d_id,@threshold=$threshold"}
+        catch {mysqlexec $mysql_handler "execute slev_st using @st_w_id,@st_d_id,@threshold"}
+    } else {
+        catch {mysqlexec $mysql_handler "CALL SLEV($w_id,$stock_level_d_id,$threshold,@stock_count)"}
+    }
+    if { $mysqlstatus(code) } {
+        if { $RAISEERROR } {
+            error "Stock Level : $mysqlstatus(message)"
+        } else { puts $mysqlstatus(message) 
+        } 
+    } else {
+        puts [ join [ mysql::sel $mysql_handler "select @stock_count" -list ] ]
+    }
+}
+
+proc prep_statement { mysql_handler statement_st } {
+    switch $statement_st {
+        slev_st {
+            mysqlexec $mysql_handler "prepare slev_st from 'CALL SLEV(?,?,?,@stock_count)'"
+        }
+        delivery_st {
+            mysqlexec $mysql_handler "prepare delivery_st from 'CALL DELIVERY(?,?,?)'"
+        }
+        ostat_st {
+            mysqlexec $mysql_handler "prepare ostat_st from 'CALL OSTAT(?,?,?,?,?,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)'"
+        }
+        payment_st {
+            mysqlexec $mysql_handler "prepare payment_st from 'CALL PAYMENT(?,?,?,?,?,?,?,?,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,?,@p_c_credit_lim,@p_c_discount,?,@p_c_data,?)'"
+        }
+        neword_st {
+            mysqlexec $mysql_handler "prepare neword_st from 'CALL NEWORD(?,?,?,?,?,@disc,@last,@credit,@dtax,@wtax,?,?)'"
+        }
+    }
+}
+#RUN TPC-C
+set mysql_handler [ ConnectToMySQL $host $port $socket $user $password $db ]
+if {$prepare} {
+    foreach st {neword_st payment_st delivery_st slev_st ostat_st} { set $st [ prep_statement $mysql_handler $st ] }
+}
+set w_id_input [ list [ mysql::sel $mysql_handler "select max(w_id) from warehouse" -list ] ]
+#2.4.1.1 set warehouse_id stays constant for a given terminal
+set w_id  [ RandomNumber 1 $w_id_input ]  
+set d_id_input [ list [ mysql::sel $mysql_handler "select max(d_id) from district" -list ] ]
+set stock_level_d_id  [ RandomNumber 1 $d_id_input ]  
+puts "Processing $total_iterations transactions without output suppressed..."
+set abchk 1; set abchk_mx 1024; set hi_t [ expr {pow([ lindex [ time {if {  [ tsv::get application abort ]  } { break }} ] 0 ],2)}]
+for {set it 0} {$it < $total_iterations} {incr it} {
+    if { [expr {$it % $abchk}] eq 0 } { if { [ time {if {  [ tsv::get application abort ]  } { break }} ] > $hi_t }  {  set  abchk [ expr {min(($abchk * 2), $abchk_mx)}]; set hi_t [ expr {$hi_t * 2} ] } }
+    set choice [ RandomNumber 1 23 ]
+    if {$choice <= 10} {
+        puts "new order"
+        if { $KEYANDTHINK } { keytime 18 }
+        neword $mysql_handler $w_id $w_id_input $prepare $RAISEERROR
+        if { $KEYANDTHINK } { thinktime 12 }
+    } elseif {$choice <= 20} {
+        puts "payment"
+        if { $KEYANDTHINK } { keytime 3 }
+        payment $mysql_handler $w_id $w_id_input $prepare $RAISEERROR
+        if { $KEYANDTHINK } { thinktime 12 }
+    } elseif {$choice <= 21} {
+        puts "delivery"
+        if { $KEYANDTHINK } { keytime 2 }
+        delivery $mysql_handler $w_id $prepare $RAISEERROR
+        if { $KEYANDTHINK } { thinktime 10 }
+    } elseif {$choice <= 22} {
+        puts "stock level"
+        if { $KEYANDTHINK } { keytime 2 }
+        slev $mysql_handler $w_id $stock_level_d_id $prepare $RAISEERROR
+        if { $KEYANDTHINK } { thinktime 5 }
+    } elseif {$choice <= 23} {
+        puts "order status"
+        if { $KEYANDTHINK } { keytime 2 }
+        ostat $mysql_handler $w_id $prepare $RAISEERROR
+        if { $KEYANDTHINK } { thinktime 5 }
+    }
+}
+if {$prepare} {
+    foreach st {neword_st payment_st delivery_st slev_st ostat_st} { 
+        catch {mysqlexec $mysql_handler "deallocate prepare $st"}
+    }
+}
+mysqlclose $mysql_handler}
+    if { $mysql_connect_pool } {
+        insert_mysqlconnectpool_drivescript test sync
+    }
+}
+
+proc loadtimedmysqltpcc { } {
+    global opmode _ED
+    upvar #0 dbdict dbdict
+    if {[dict exists $dbdict mysql library ]} {
+        set library [ dict get $dbdict mysql library ]
+    } else { set library "mysqltcl" }
+    upvar #0 configmysql configmysql
+    #set variables to values in dict
+    setlocaltpccvars $configmysql
+    ed_edit_clear
+    .ed_mainFrame.notebook select .ed_mainFrame.mainwin
+    set _ED(packagekeyname) "MySQL TPROC-C Timed"
+    if { !$mysql_async_scale } {
+        #REGULAR TIMED SCRIPT
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end "#!/usr/local/bin/tclsh8.6
+#EDITABLE OPTIONS##################################################
+set library $library ;# MySQL Library
+global mysqlstatus
+set total_iterations $mysql_total_iterations ;# Number of transactions before logging off
+set RAISEERROR \"$mysql_raiseerror\" ;# Exit script on MySQL error (true or false)
+set KEYANDTHINK \"$mysql_keyandthink\" ;# Time for user thinking and keying (true or false)
+set rampup $mysql_rampup;  # Rampup time in minutes before first Transaction Count is taken
+set duration $mysql_duration;  # Duration in minutes before second Transaction Count is taken
+set mode \"$opmode\" ;# HammerDB operational mode
+set host \"$mysql_host\" ;# Address of the server hosting MySQL 
+set port \"$mysql_port\" ;# Port of the MySQL Server, defaults to 3306
+set socket \"$mysql_socket\" ;# MySQL Socket for local connections
+set user \"$mysql_user\" ;# MySQL user
+set password \"$mysql_pass\" ;# Password for the MySQL user
+set db \"$mysql_dbase\" ;# Database containing the TPC Schema
+set prepare \"$mysql_prepared\" ;# Use prepared statements
+#EDITABLE OPTIONS##################################################
+"
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end {#LOAD LIBRARIES AND MODULES
+if [catch {package require $library} message] { error "Failed to load $library - $message" }
+if [catch {::tcl::tm::path add modules} ] { error "Failed to find modules directory" }
+if [catch {package require tpcccommon} ] { error "Failed to load tpcc common functions" } else { namespace import tpcccommon::* }
+
+if { [ chk_thread ] eq "FALSE" } {
+    error "MYSQL Timed Script must be run in Thread Enabled Interpreter"
+}
+proc chk_socket { host socket } {
+    if { ![string match windows $::tcl_platform(platform)] && ($host eq "127.0.0.1" || [ string tolower $host ] eq "localhost") && [ string tolower $socket ] != "null" } {
+        return "TRUE"
+    } else {
+        return "FALSE"
+    }
+}
+
+proc ConnectToMySQL { host port socket user password db } {
+    global mysqlstatus
+    if { [ chk_socket $host $socket ] eq "TRUE" } {
+        if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+            puts "the local socket connection to $socket could not be established"
+            set connected "FALSE"
+        } else {
+            set connected "TRUE"
+        }
+    } else {
+        if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+            puts "the tcp connection to $host:$port could not be established"
+            set connected "FALSE"
+        } else {
+            set connected "TRUE"
+        }
+    }
+    if {$connected} {
+        mysqluse $mysql_handler $db
+        mysql::autocommit $mysql_handler 0
+        return $mysql_handler
+    } else {
+        error $mysqlstatus(message)
+        return
+    }
+}
+
+set rema [ lassign [ findvuposition ] myposition totalvirtualusers ]
+switch $myposition {
+    1 { 
+        if { $mode eq "Local" || $mode eq "Primary" } {
+            if { [ chk_socket $host $socket ] eq "TRUE" } {
+                if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+                    puts "the local socket connection to $socket could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            } else {
+                if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+                    puts "the tcp connection to $host:$port could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            }
+            if {$connected} {
+                mysqluse $mysql_handler $db
+                mysql::autocommit $mysql_handler 1
+            } else {
+                error $mysqlstatus(message)
+                return
+            }
+            set ramptime 0
+            puts "Beginning rampup time of $rampup minutes"
+            set rampup [ expr $rampup*60000 ]
+            while {$ramptime != $rampup} {
+                if { [ tsv::get application abort ] } { break } else { after 6000 }
+                set ramptime [ expr $ramptime+6000 ]
+                if { ![ expr {$ramptime % 60000} ] } {
+                    puts "Rampup [ expr $ramptime / 60000 ] minutes complete ..."
+                }
+            }
+            if { [ tsv::get application abort ] } { break }
+            puts "Rampup complete, Taking start Transaction Count."
+            if {[catch {set handler_stat [ list [ mysql::sel $mysql_handler "show global status where Variable_name = 'Com_commit' or Variable_name =  'Com_rollback'" -list ] ]}]} {
+                puts stderr {error, failed to query transaction statistics}
+                return
+            } else {
+                regexp {\{\{Com_commit\ ([0-9]+)\}\ \{Com_rollback\ ([0-9]+)\}\}} $handler_stat all com_comm com_roll
+                set start_trans [ expr $com_comm + $com_roll ]
+            }
+            if {[catch {set start_nopm [ list [ mysql::sel $mysql_handler "select sum(d_next_o_id) from district" -list ] ]}]} {
+                puts stderr {error, failed to query district table}
+                return
+            }
+            puts "Timing test period of $duration in minutes"
+            set testtime 0
+            set durmin $duration
+            set duration [ expr $duration*60000 ]
+            while {$testtime != $duration} {
+                if { [ tsv::get application abort ] } { break } else { after 6000 }
+                set testtime [ expr $testtime+6000 ]
+                if { ![ expr {$testtime % 60000} ] } {
+                    puts -nonewline  "[ expr $testtime / 60000 ]  ...,"
+                }
+            }
+            if { [ tsv::get application abort ] } { break }
+            puts "Test complete, Taking end Transaction Count."
+            if {[catch {set handler_stat [ list [ mysql::sel $mysql_handler "show global status where Variable_name = 'Com_commit' or Variable_name =  'Com_rollback'" -list ] ]}]} {
+                puts stderr {error, failed to query transaction statistics}
+                return
+            } else {
+                regexp {\{\{Com_commit\ ([0-9]+)\}\ \{Com_rollback\ ([0-9]+)\}\}} $handler_stat all com_comm com_roll
+                set end_trans [ expr $com_comm + $com_roll ]
+            }
+            if {[catch {set end_nopm [ list [ mysql::sel $mysql_handler "select sum(d_next_o_id) from district" -list ] ]}]} {
+                puts stderr {error, failed to query district table}
+                return
+            }
+            set tpm [ expr {($end_trans - $start_trans)/$durmin} ]
+            set nopm [ expr {($end_nopm - $start_nopm)/$durmin} ]
+            puts "[ expr $totalvirtualusers - 1 ] Active Virtual Users configured"
+            puts [ testresult $nopm $tpm MySQL ]
+            tsv::set application abort 1
+            if { $mode eq "Primary" } { eval [subst {thread::send -async $MASTER { remote_command ed_kill_vusers }}] }
+            catch { mysqlclose $mysql_handler }
+        } else {
+            puts "Operating in Replica Mode, No Snapshots taken..."
+        }
+    }
+    default {
+        #TIMESTAMP
+        proc gettimestamp { } {
+            set tstamp [ clock format [ clock seconds ] -format %Y%m%d%H%M%S ]
+            return $tstamp
+        }
+        #NEW ORDER
+        proc neword { mysql_handler no_w_id w_id_input prepare RAISEERROR } {
+            global mysqlstatus
+            #open new order cursor
+            #2.4.1.2 select district id randomly from home warehouse where d_w_id = d_id
+            set no_d_id [ RandomNumber 1 10 ]
+            #2.4.1.2 Customer id randomly selected where c_d_id = d_id and c_w_id = w_id
+            set no_c_id [ RandomNumber 1 3000 ]
+            #2.4.1.3 Items in the order randomly selected from 5 to 15
+            set ol_cnt [ RandomNumber 5 15 ]
+            #2.4.1.6 order entry date O_ENTRY_D generated by SUT
+            set date [ gettimestamp ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @no_w_id=$no_w_id,@w_id_input=$w_id_input,@no_d_id=$no_d_id,@no_c_id=$no_c_id,@ol_cnt=$ol_cnt,@next_o_id=0,@date=str_to_date($date,'%Y%m%d%H%i%s')"}
+                catch {mysqlexec $mysql_handler "execute neword_st using @no_w_id,@w_id_input,@no_d_id,@no_c_id,@ol_cnt,@next_o_id,@date"}
+            } else {
+                mysqlexec $mysql_handler "set @next_o_id = 0"
+                catch { mysqlexec $mysql_handler "CALL NEWORD($no_w_id,$w_id_input,$no_d_id,$no_c_id,$ol_cnt,@disc,@last,@credit,@dtax,@wtax,@next_o_id,str_to_date($date,'%Y%m%d%H%i%s'))" }
+            }
+            if { $mysqlstatus(code)  } {
+                if { $RAISEERROR } {
+                    error "New Order : $mysqlstatus(message)"
+                } else { puts $mysqlstatus(message) 
+                } 
+            } else {
+                catch {mysql::sel $mysql_handler "select @disc,@last,@credit,@dtax,@wtax,@next_o_id" -list}
+            }
+        }
+        #PAYMENT
+        proc payment { mysql_handler p_w_id w_id_input prepare RAISEERROR } {
+            global mysqlstatus
+            #2.5.1.1 The home warehouse id remains the same for each terminal
+            #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+            set p_d_id [ RandomNumber 1 10 ]
+            #2.5.1.2 customer selected 60% of time by name and 40% of time by number
+            set x [ RandomNumber 1 100 ]
+            set y [ RandomNumber 1 100 ]
+            if { $x <= 85 } {
+                set p_c_d_id $p_d_id
+                set p_c_w_id $p_w_id
+            } else {
+                #use a remote warehouse
+                set p_c_d_id [ RandomNumber 1 10 ]
+                set p_c_w_id [ RandomNumber 1 $w_id_input ]
+                while { ($p_c_w_id == $p_w_id) && ($w_id_input != 1) } {
+                    set p_c_w_id [ RandomNumber 1  $w_id_input ]
+                }
+            }
+            set nrnd [ NURand 255 0 999 123 ]
+            set name [ randname $nrnd ]
+            set p_c_id [ RandomNumber 1 3000 ]
+            if { $y <= 60 } {
+                #use customer name
+                #C_LAST is generated
+                set byname 1
+            } else {
+                #use customer number
+                set byname 0
+                set name {}
+            }
+            #2.5.1.3 random amount from 1 to 5000
+            set p_h_amount [ RandomNumber 1 5000 ]
+            #2.5.1.4 date selected from SUT
+            set h_date [ gettimestamp ]
+            #2.5.2.1 Payment Transaction
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @p_w_id=$p_w_id,@p_d_id=$p_d_id,@p_c_w_id=$p_c_w_id,@p_c_d_id=$p_c_d_id,@p_c_id=$p_c_id,@byname=$byname,@p_h_amount=$p_h_amount,@p_c_last='$name',@p_c_credit=0,@p_c_balance=0,@h_date=str_to_date($h_date,'%Y%m%d%H%i%s')"}
+                catch { mysqlexec $mysql_handler "execute payment_st using @p_w_id,@p_d_id,@p_c_w_id,@p_c_d_id,@p_c_id,@byname,@p_h_amount,@p_c_last,@p_c_credit,@p_c_balance,@h_date"}
+            } else {
+                mysqlexec $mysql_handler "set @p_c_id = $p_c_id, @p_c_last = '$name', @p_c_credit = 0, @p_c_balance = 0"
+                catch { mysqlexec $mysql_handler "CALL PAYMENT($p_w_id,$p_d_id,$p_c_w_id,$p_c_d_id,@p_c_id,$byname,$p_h_amount,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data,str_to_date($h_date,'%Y%m%d%H%i%s'))"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Payment : $mysqlstatus(message)"
+                } else { puts $mysqlstatus(message) 
+                } 
+            } else {
+                catch {mysql::sel $mysql_handler "select @p_c_id,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data" -list}
+            }
+        }
+        #ORDER_STATUS
+        proc ostat { mysql_handler w_id prepare RAISEERROR } {
+            global mysqlstatus
+            #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+            set d_id [ RandomNumber 1 10 ]
+            set nrnd [ NURand 255 0 999 123 ]
+            set name [ randname $nrnd ]
+            set c_id [ RandomNumber 1 3000 ]
+            set y [ RandomNumber 1 100 ]
+            if { $y <= 60 } {
+                set byname 1
+            } else {
+                set byname 0
+                set name {}
+            }
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @os_w_id=$w_id,@dos_d_id=$d_id,@os_c_id=$c_id,@byname=$byname,@os_c_last='$name'"}
+                catch {mysqlexec $mysql_handler "execute ostat_st using @os_w_id,@dos_d_id,@os_c_id,@byname,@os_c_last"}
+            } else {
+                mysqlexec $mysql_handler "set @os_c_id = $c_id, @os_c_last = '$name'"
+                catch { mysqlexec $mysql_handler "CALL OSTAT($w_id,$d_id,@os_c_id,$byname,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Order Status : $mysqlstatus(message)"
+                } else { puts $mysqlstatus(message) 
+                } 
+            } else {
+                catch {mysql::sel $mysql_handler "select @os_c_id,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id" -list}
+            }
+        }
+        #DELIVERY
+        proc delivery { mysql_handler w_id prepare RAISEERROR } {
+            global mysqlstatus
+            set carrier_id [ RandomNumber 1 10 ]
+            set date [ gettimestamp ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @d_w_id=$w_id,@d_o_carrier_id=$carrier_id,@timestamp=str_to_date($date,'%Y%m%d%H%i%s')"}
+                catch {mysqlexec $mysql_handler "execute delivery_st using @d_w_id,@d_o_carrier_id,@timestamp"}
+            } else {
+                catch { mysqlexec $mysql_handler "CALL DELIVERY($w_id,$carrier_id,str_to_date($date,'%Y%m%d%H%i%s'))"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Delivery : $mysqlstatus(message)"
+                } else { puts $mysqlstatus(message) 
+                } 
+            } else {
+                ;
+            }
+        }
+        #STOCK LEVEL
+        proc slev { mysql_handler w_id stock_level_d_id prepare RAISEERROR } {
+            global mysqlstatus
+            set threshold [ RandomNumber 10 20 ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @st_w_id=$w_id,@st_d_id=$stock_level_d_id,@threshold=$threshold"}
+                catch {mysqlexec $mysql_handler "execute slev_st using @st_w_id,@st_d_id,@threshold"}
+            } else {
+                catch {mysqlexec $mysql_handler "CALL SLEV($w_id,$stock_level_d_id,$threshold,@stock_count)"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Stock Level : $mysqlstatus(message)"
+                } else { puts $mysqlstatus(message) 
+                } 
+            } else {
+                catch {mysql::sel $mysql_handler "select @stock_count" -list}
+            }
+        }
+
+        proc prep_statement { mysql_handler statement_st } {
+            switch $statement_st {
+                slev_st {
+                    mysqlexec $mysql_handler "prepare slev_st from 'CALL SLEV(?,?,?,@stock_count)'"
+                }
+                delivery_st {
+                    mysqlexec $mysql_handler "prepare delivery_st from 'CALL DELIVERY(?,?,?)'"
+                }
+                ostat_st {
+                    mysqlexec $mysql_handler "prepare ostat_st from 'CALL OSTAT(?,?,?,?,?,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)'"
+                }
+                payment_st {
+                    mysqlexec $mysql_handler "prepare payment_st from 'CALL PAYMENT(?,?,?,?,?,?,?,?,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,?,@p_c_credit_lim,@p_c_discount,?,@p_c_data,?)'"
+                }
+                neword_st {
+                    mysqlexec $mysql_handler "prepare neword_st from 'CALL NEWORD(?,?,?,?,?,@disc,@last,@credit,@dtax,@wtax,?,?)'"
+                }
+            }
+        }
+        #RUN TPC-C
+        set mysql_handler [ ConnectToMySQL $host $port $socket $user $password $db ]
+        if {$prepare} {
+            foreach st {neword_st payment_st delivery_st slev_st ostat_st} { set $st [ prep_statement $mysql_handler $st ] }
+        }
+        set w_id_input [ list [ mysql::sel $mysql_handler "select max(w_id) from warehouse" -list ] ]
+        #2.4.1.1 set warehouse_id stays constant for a given terminal
+        set w_id  [ RandomNumber 1 $w_id_input ]  
+        set d_id_input [ list [ mysql::sel $mysql_handler "select max(d_id) from district" -list ] ]
+        set stock_level_d_id  [ RandomNumber 1 $d_id_input ]  
+        puts "Processing $total_iterations transactions with output suppressed..."
+        set abchk 1; set abchk_mx 1024; set hi_t [ expr {pow([ lindex [ time {if {  [ tsv::get application abort ]  } { break }} ] 0 ],2)}]
+        for {set it 0} {$it < $total_iterations} {incr it} {
+            if { [expr {$it % $abchk}] eq 0 } { if { [ time {if {  [ tsv::get application abort ]  } { break }} ] > $hi_t }  {  set  abchk [ expr {min(($abchk * 2), $abchk_mx)}]; set hi_t [ expr {$hi_t * 2} ] } }
+            set choice [ RandomNumber 1 23 ]
+            if {$choice <= 10} {
+                if { $KEYANDTHINK } { keytime 18 }
+                neword $mysql_handler $w_id $w_id_input $prepare $RAISEERROR
+                if { $KEYANDTHINK } { thinktime 12 }
+            } elseif {$choice <= 20} {
+                if { $KEYANDTHINK } { keytime 3 }
+                payment $mysql_handler $w_id $w_id_input $prepare $RAISEERROR
+                if { $KEYANDTHINK } { thinktime 12 }
+            } elseif {$choice <= 21} {
+                if { $KEYANDTHINK } { keytime 2 }
+                delivery $mysql_handler $w_id $prepare $RAISEERROR
+                if { $KEYANDTHINK } { thinktime 10 }
+            } elseif {$choice <= 22} {
+                if { $KEYANDTHINK } { keytime 2 }
+                slev $mysql_handler $w_id $stock_level_d_id $prepare $RAISEERROR
+                if { $KEYANDTHINK } { thinktime 5 }
+            } elseif {$choice <= 23} {
+                if { $KEYANDTHINK } { keytime 2 }
+                ostat $mysql_handler $w_id $prepare $RAISEERROR
+                if { $KEYANDTHINK } { thinktime 5 }
+            }
+        }
+        if {$prepare} {
+            foreach st {neword_st payment_st delivery_st slev_st ostat_st} { 
+                catch {mysqlexec $mysql_handler "deallocate prepare $st"}
+            }
+        }
+        mysqlclose $mysql_handler
+    }
+}}
+        if { $mysql_connect_pool } {
+            insert_mysqlconnectpool_drivescript timed sync
+        }
+    } else {
+        #ASYNCHRONOUS TIMED SCRIPT
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end "#!/usr/local/bin/tclsh8.6
+#EDITABLE OPTIONS##################################################
+set library $library ;# MySQL Library
+global mysqlstatus
+set total_iterations $mysql_total_iterations ;# Number of transactions before logging off
+set RAISEERROR \"$mysql_raiseerror\" ;# Exit script on MySQL error (true or false)
+set KEYANDTHINK \"$mysql_keyandthink\" ;# Time for user thinking and keying (true or false)
+set rampup $mysql_rampup;  # Rampup time in minutes before first Transaction Count is taken
+set duration $mysql_duration;  # Duration in minutes before second Transaction Count is taken
+set mode \"$opmode\" ;# HammerDB operational mode
+set host \"$mysql_host\" ;# Address of the server hosting MySQL 
+set port \"$mysql_port\" ;# Port of the MySQL Server, defaults to 3306
+set socket \"$mysql_socket\" ;# MySQL Socket for local connections
+set user \"$mysql_user\" ;# MySQL user
+set password \"$mysql_pass\" ;# Password for the MySQL user
+set db \"$mysql_dbase\" ;# Database containing the TPC Schema
+set prepare \"$mysql_prepared\" ;# Use prepared statements
+set async_client $mysql_async_client;# Number of asynchronous clients per Vuser
+set async_verbose $mysql_async_verbose;# Report activity of asynchronous clients
+set async_delay $mysql_async_delay;# Delay in ms between logins of asynchronous clients
+#EDITABLE OPTIONS##################################################
+"
+        .ed_mainFrame.mainwin.textFrame.left.text fastinsert end {#LOAD LIBRARIES AND MODULES
+if [catch {package require $library} message] { error "Failed to load $library - $message" }
+if [catch {::tcl::tm::path add modules} ] { error "Failed to find modules directory" }
+if [catch {package require tpcccommon} ] { error "Failed to load tpcc common functions" } else { namespace import tpcccommon::* }
+if [catch {package require promise } message] { error "Failed to load promise package for asynchronous clients" }
+
+if { [ chk_thread ] eq "FALSE" } {
+    error "MYSQL Timed Script must be run in Thread Enabled Interpreter"
+}
+proc chk_socket { host socket } {
+    if { ![string match windows $::tcl_platform(platform)] && ($host eq "127.0.0.1" || [ string tolower $host ] eq "localhost") && [ string tolower $socket ] != "null" } {
+        return "TRUE"
+    } else {
+        return "FALSE"
+    }
+}
+
+proc ConnectToMySQLAsynch { host port socket user password db clientname async_verbose } {
+    global mysqlstatus
+    if { [ chk_socket $host $socket ] eq "TRUE" } {
+        if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+            set connected "FALSE"
+            if { $RAISEERROR } {
+                puts "$clientname:socket login failed:$mysqlstatus(message)"
+            }
+        } else {
+            set connected "TRUE"
+        }
+    } else {
+        if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+            set connected "FALSE"
+            if { $RAISEERROR } {
+                puts "$clientname:tcp login failed:$mysqlstatus(message)"
+            }
+        } else {
+            set connected "TRUE"
+        }
+    }
+    if {$connected} {
+        if { $async_verbose } { puts "Connected $clientname:$mysql_handler" }
+        mysqluse $mysql_handler $db
+        mysql::autocommit $mysql_handler 0
+        return $mysql_handler
+    } else {
+        return "$clientname:login failed:$mysqlstatus(message)"
+    }
+}
+
+set rema [ lassign [ findvuposition ] myposition totalvirtualusers ]
+switch $myposition {
+    1 { 
+        if { $mode eq "Local" || $mode eq "Primary" } {
+            if { [ chk_socket $host $socket ] eq "TRUE" } {
+                if [catch {mysqlconnect -socket $socket -user $user -password $password} mysql_handler] {
+                    puts "the local socket connection to $socket could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            } else {
+                if [catch {mysqlconnect -host $host -port $port -user $user -password $password} mysql_handler] {
+                    puts "the tcp connection to $host:$port could not be established"
+                    set connected "FALSE"
+                } else {
+                    set connected "TRUE"
+                }
+            }
+            if {$connected} {
+                mysqluse $mysql_handler $db
+                mysql::autocommit $mysql_handler 1
+            } else {
+                error $mysqlstatus(message)
+                return
+            }
+            set ramptime 0
+            puts "Beginning rampup time of $rampup minutes"
+            set rampup [ expr $rampup*60000 ]
+            while {$ramptime != $rampup} {
+                if { [ tsv::get application abort ] } { break } else { after 6000 }
+                set ramptime [ expr $ramptime+6000 ]
+                if { ![ expr {$ramptime % 60000} ] } {
+                    puts "Rampup [ expr $ramptime / 60000 ] minutes complete ..."
+                }
+            }
+            if { [ tsv::get application abort ] } { break }
+            puts "Rampup complete, Taking start Transaction Count."
+            if {[catch {set handler_stat [ list [ mysql::sel $mysql_handler "show global status where Variable_name = 'Com_commit' or Variable_name =  'Com_rollback'" -list ] ]}]} {
+                puts stderr {error, failed to query transaction statistics}
+                return
+            } else {
+                regexp {\{\{Com_commit\ ([0-9]+)\}\ \{Com_rollback\ ([0-9]+)\}\}} $handler_stat all com_comm com_roll
+                set start_trans [ expr $com_comm + $com_roll ]
+            }
+            if {[catch {set start_nopm [ list [ mysql::sel $mysql_handler "select sum(d_next_o_id) from district" -list ] ]}]} {
+                puts stderr {error, failed to query district table}
+                return
+            }
+            puts "Timing test period of $duration in minutes"
+            set testtime 0
+            set durmin $duration
+            set duration [ expr $duration*60000 ]
+            while {$testtime != $duration} {
+                if { [ tsv::get application abort ] } { break } else { after 6000 }
+                set testtime [ expr $testtime+6000 ]
+                if { ![ expr {$testtime % 60000} ] } {
+                    puts -nonewline  "[ expr $testtime / 60000 ]  ...,"
+                }
+            }
+            if { [ tsv::get application abort ] } { break }
+            puts "Test complete, Taking end Transaction Count."
+            if {[catch {set handler_stat [ list [ mysql::sel $mysql_handler "show global status where Variable_name = 'Com_commit' or Variable_name =  'Com_rollback'" -list ] ]}]} {
+                puts stderr {error, failed to query transaction statistics}
+                return
+            } else {
+                regexp {\{\{Com_commit\ ([0-9]+)\}\ \{Com_rollback\ ([0-9]+)\}\}} $handler_stat all com_comm com_roll
+                set end_trans [ expr $com_comm + $com_roll ]
+            }
+            if {[catch {set end_nopm [ list [ mysql::sel $mysql_handler "select sum(d_next_o_id) from district" -list ] ]}]} {
+                puts stderr {error, failed to query district table}
+                return
+            }
+            set tpm [ expr {($end_trans - $start_trans)/$durmin} ]
+            set nopm [ expr {($end_nopm - $start_nopm)/$durmin} ]
+            puts "[ expr $totalvirtualusers - 1 ] VU \* $async_client AC \= [ expr ($totalvirtualusers - 1) * $async_client ] Active Sessions configured"
+            puts [ testresult $nopm $tpm MySQL ]
+            tsv::set application abort 1
+            if { $mode eq "Primary" } { eval [subst {thread::send -async $MASTER { remote_command ed_kill_vusers }}] }
+            catch { mysqlclose $mysql_handler }
+        } else {
+            puts "Operating in Replica Mode, No Snapshots taken..."
+        }
+    }
+    default {
+        #TIMESTAMP
+        proc gettimestamp { } {
+            set tstamp [ clock format [ clock seconds ] -format %Y%m%d%H%M%S ]
+            return $tstamp
+        }
+        #NEW ORDER
+        proc neword { mysql_handler no_w_id w_id_input prepare RAISEERROR clientname } {
+            global mysqlstatus
+            #open new order cursor
+            #2.4.1.2 select district id randomly from home warehouse where d_w_id = d_id
+            set no_d_id [ RandomNumber 1 10 ]
+            #2.4.1.2 Customer id randomly selected where c_d_id = d_id and c_w_id = w_id
+            set no_c_id [ RandomNumber 1 3000 ]
+            #2.4.1.3 Items in the order randomly selected from 5 to 15
+            set ol_cnt [ RandomNumber 5 15 ]
+            #2.4.1.6 order entry date O_ENTRY_D generated by SUT
+            set date [ gettimestamp ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @no_w_id=$no_w_id,@w_id_input=$w_id_input,@no_d_id=$no_d_id,@no_c_id=$no_c_id,@ol_cnt=$ol_cnt,@next_o_id=0,@date=str_to_date($date,'%Y%m%d%H%i%s')"}
+                catch {mysqlexec $mysql_handler "execute neword_st using @no_w_id,@w_id_input,@no_d_id,@no_c_id,@ol_cnt,@next_o_id,@date"}
+            } else {
+                mysqlexec $mysql_handler "set @next_o_id = 0"
+                catch { mysqlexec $mysql_handler "CALL NEWORD($no_w_id,$w_id_input,$no_d_id,$no_c_id,$ol_cnt,@disc,@last,@credit,@dtax,@wtax,@next_o_id,str_to_date($date,'%Y%m%d%H%i%s'))" }
+            }
+            if { $mysqlstatus(code)  } {
+                if { $RAISEERROR } {
+                    error "New Order in $clientname : $mysqlstatus(message)"
+                } else {
+                    puts "New Order in $clientname : $mysqlstatus(message)"
+                }
+            } else {
+                catch {mysql::sel $mysql_handler "select @disc,@last,@credit,@dtax,@wtax,@next_o_id" -list}
+            }
+        }
+        #PAYMENT
+        proc payment { mysql_handler p_w_id w_id_input prepare RAISEERROR clientname } {
+            global mysqlstatus
+            #2.5.1.1 The home warehouse id remains the same for each terminal
+            #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+            set p_d_id [ RandomNumber 1 10 ]
+            #2.5.1.2 customer selected 60% of time by name and 40% of time by number
+            set x [ RandomNumber 1 100 ]
+            set y [ RandomNumber 1 100 ]
+            if { $x <= 85 } {
+                set p_c_d_id $p_d_id
+                set p_c_w_id $p_w_id
+            } else {
+                #use a remote warehouse
+                set p_c_d_id [ RandomNumber 1 10 ]
+                set p_c_w_id [ RandomNumber 1 $w_id_input ]
+                while { ($p_c_w_id == $p_w_id) && ($w_id_input != 1) } {
+                    set p_c_w_id [ RandomNumber 1  $w_id_input ]
+                }
+            }
+            set nrnd [ NURand 255 0 999 123 ]
+            set name [ randname $nrnd ]
+            set p_c_id [ RandomNumber 1 3000 ]
+            if { $y <= 60 } {
+                #use customer name
+                #C_LAST is generated
+                set byname 1
+            } else {
+                #use customer number
+                set byname 0
+                set name {}
+            }
+            #2.5.1.3 random amount from 1 to 5000
+            set p_h_amount [ RandomNumber 1 5000 ]
+            #2.5.1.4 date selected from SUT
+            set h_date [ gettimestamp ]
+            #2.5.2.1 Payment Transaction
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @p_w_id=$p_w_id,@p_d_id=$p_d_id,@p_c_w_id=$p_c_w_id,@p_c_d_id=$p_c_d_id,@p_c_id=$p_c_id,@byname=$byname,@p_h_amount=$p_h_amount,@p_c_last='$name',@p_c_credit=0,@p_c_balance=0,@h_date=str_to_date($h_date,'%Y%m%d%H%i%s')"}
+                catch { mysqlexec $mysql_handler "execute payment_st using @p_w_id,@p_d_id,@p_c_w_id,@p_c_d_id,@p_c_id,@byname,@p_h_amount,@p_c_last,@p_c_credit,@p_c_balance,@h_date"}
+            } else {
+                mysqlexec $mysql_handler "set @p_c_id = $p_c_id, @p_c_last = '$name', @p_c_credit = 0, @p_c_balance = 0"
+                catch { mysqlexec $mysql_handler "CALL PAYMENT($p_w_id,$p_d_id,$p_c_w_id,$p_c_d_id,@p_c_id,$byname,$p_h_amount,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data,str_to_date($h_date,'%Y%m%d%H%i%s'))"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Payment in $clientname : $mysqlstatus(message)"
+                } else {
+                    puts "Payment in $clientname : $mysqlstatus(message)"
+                }
+            } else {
+                catch {mysql::sel $mysql_handler "select @p_c_id,@p_c_last,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,@p_c_credit,@p_c_credit_lim,@p_c_discount,@p_c_balance,@p_c_data" -list}
+            }
+        }
+        #ORDER_STATUS
+        proc ostat { mysql_handler w_id prepare RAISEERROR clientname } {
+            global mysqlstatus
+            #2.5.1.1 select district id randomly from home warehouse where d_w_id = d_id
+            set d_id [ RandomNumber 1 10 ]
+            set nrnd [ NURand 255 0 999 123 ]
+            set name [ randname $nrnd ]
+            set c_id [ RandomNumber 1 3000 ]
+            set y [ RandomNumber 1 100 ]
+            if { $y <= 60 } {
+                set byname 1
+            } else {
+                set byname 0
+                set name {}
+            }
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @os_w_id=$w_id,@dos_d_id=$d_id,@os_c_id=$c_id,@byname=$byname,@os_c_last='$name'"}
+                catch {mysqlexec $mysql_handler "execute ostat_st using @os_w_id,@dos_d_id,@os_c_id,@byname,@os_c_last"}
+            } else {
+                mysqlexec $mysql_handler "set @os_c_id = $c_id, @os_c_last = '$name'"
+                catch { mysqlexec $mysql_handler "CALL OSTAT($w_id,$d_id,@os_c_id,$byname,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Order Status in $clientname : $mysqlstatus(message)"
+                } else {
+                    puts "Order Status in $clientname : $mysqlstatus(message)"
+                }
+            } else {
+                catch {mysql::sel $mysql_handler "select @os_c_id,@os_c_last,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id" -list}
+            }
+        }
+        #DELIVERY
+        proc delivery { mysql_handler w_id prepare RAISEERROR clientname } {
+            global mysqlstatus
+            set carrier_id [ RandomNumber 1 10 ]
+            set date [ gettimestamp ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @d_w_id=$w_id,@d_o_carrier_id=$carrier_id,@timestamp=str_to_date($date,'%Y%m%d%H%i%s')"}
+                catch {mysqlexec $mysql_handler "execute delivery_st using @d_w_id,@d_o_carrier_id,@timestamp"}
+            } else {
+                catch { mysqlexec $mysql_handler "CALL DELIVERY($w_id,$carrier_id,str_to_date($date,'%Y%m%d%H%i%s'))"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Delivery in $clientname : $mysqlstatus(message)"
+                } else {
+                    puts "Delivery in $clientname : $mysqlstatus(message)"
+                }
+            } else {
+                ;
+            }
+        }
+        #STOCK LEVEL
+        proc slev { mysql_handler w_id stock_level_d_id prepare RAISEERROR clientname } {
+            global mysqlstatus
+            set threshold [ RandomNumber 10 20 ]
+            if {$prepare} {
+                catch {mysqlexec $mysql_handler "set @st_w_id=$w_id,@st_d_id=$stock_level_d_id,@threshold=$threshold"}
+                catch {mysqlexec $mysql_handler "execute slev_st using @st_w_id,@st_d_id,@threshold"}
+            } else {
+                catch {mysqlexec $mysql_handler "CALL SLEV($w_id,$stock_level_d_id,$threshold,@stock_count)"}
+            }
+            if { $mysqlstatus(code) } {
+                if { $RAISEERROR } {
+                    error "Stock Level in $clientname : $mysqlstatus(message)"
+                } else {
+                    puts "Stock Level in $clientname : $mysqlstatus(message)"
+                }
+            } else {
+                catch {mysql::sel $mysql_handler "select @stock_count" -list}
+            }
+        }
+
+        proc prep_statement { mysql_handler statement_st } {
+            switch $statement_st {
+                slev_st {
+                    mysqlexec $mysql_handler "prepare slev_st from 'CALL SLEV(?,?,?,@stock_count)'"
+                }
+                delivery_st {
+                    mysqlexec $mysql_handler "prepare delivery_st from 'CALL DELIVERY(?,?,?)'"
+                }
+                ostat_st {
+                    mysqlexec $mysql_handler "prepare ostat_st from 'CALL OSTAT(?,?,?,?,?,@os_c_first,@os_c_middle,@os_c_balance,@os_o_id,@os_entdate,@os_o_carrier_id)'"
+                }
+                payment_st {
+                    mysqlexec $mysql_handler "prepare payment_st from 'CALL PAYMENT(?,?,?,?,?,?,?,?,@p_w_street_1,@p_w_street_2,@p_w_city,@p_w_state,@p_w_zip,@p_d_street_1,@p_d_street_2,@p_d_city,@p_d_state,@p_d_zip,@p_c_first,@p_c_middle,@p_c_street_1,@p_c_street_2,@p_c_city,@p_c_state,@p_c_zip,@p_c_phone,@p_c_since,?,@p_c_credit_lim,@p_c_discount,?,@p_c_data,?)'"
+                }
+                neword_st {
+                    mysqlexec $mysql_handler "prepare neword_st from 'CALL NEWORD(?,?,?,?,?,@disc,@last,@credit,@dtax,@wtax,?,?)'"
+                }
+            }
+        }
+        #CONNECT ASYNC
+        promise::async simulate_client { clientname total_iterations host port socket user password RAISEERROR KEYANDTHINK db prepare async_verbose async_delay } {
+            global mysqlstatus
+            set acno [ expr [ string trimleft [ lindex [ split $clientname ":" ] 1 ] ac ] * $async_delay ]
+            if { $async_verbose } { puts "Delaying login of $clientname for $acno ms" }
+            async_time $acno
+            if {  [ tsv::get application abort ]  } { return "$clientname:abort before login" }
+            if { $async_verbose } { puts "Logging in $clientname" }
+            set mysql_handler [ ConnectToMySQLAsynch $host $port $socket $user $password $db $clientname $async_verbose ]
+            #RUN TPC-C
+            if {$prepare} {
+                foreach st {neword_st payment_st delivery_st slev_st ostat_st} { set $st [ prep_statement $mysql_handler $st ] }
+            }
+            set w_id_input [ list [ mysql::sel $mysql_handler "select max(w_id) from warehouse" -list ] ]
+            #2.4.1.1 set warehouse_id stays constant for a given terminal
+            set w_id  [ RandomNumber 1 $w_id_input ]  
+            set d_id_input [ list [ mysql::sel $mysql_handler "select max(d_id) from district" -list ] ]
+            set stock_level_d_id  [ RandomNumber 1 $d_id_input ]  
+            puts "Processing $total_iterations transactions with output suppressed..."
+            set abchk 1; set abchk_mx 1024; set hi_t [ expr {pow([ lindex [ time {if {  [ tsv::get application abort ]  } { break }} ] 0 ],2)}]
+            for {set it 0} {$it < $total_iterations} {incr it} {
+                if { [expr {$it % $abchk}] eq 0 } { if { [ time {if {  [ tsv::get application abort ]  } { break }} ] > $hi_t }  {  set  abchk [ expr {min(($abchk * 2), $abchk_mx)}]; set hi_t [ expr {$hi_t * 2} ] } }
+                set choice [ RandomNumber 1 23 ]
+                if {$choice <= 10} {
+                    if { $async_verbose } { puts "$clientname:w_id:$w_id:neword" }
+                    if { $KEYANDTHINK } { async_keytime 18  $clientname neword $async_verbose }
+                    neword $mysql_handler $w_id $w_id_input $prepare $RAISEERROR $clientname
+                    if { $KEYANDTHINK } { async_thinktime 12 $clientname neword $async_verbose }
+                } elseif {$choice <= 20} {
+                    if { $async_verbose } { puts "$clientname:w_id:$w_id:payment" }
+                    if { $KEYANDTHINK } { async_keytime 3 $clientname payment $async_verbose }
+                    payment $mysql_handler $w_id $w_id_input $prepare $RAISEERROR $clientname
+                    if { $KEYANDTHINK } { async_thinktime 12 $clientname payment $async_verbose }
+                } elseif {$choice <= 21} {
+                    if { $async_verbose } { puts "$clientname:w_id:$w_id:delivery" }
+                    if { $KEYANDTHINK } { async_keytime 2 $clientname delivery $async_verbose }
+                    delivery $mysql_handler $w_id $prepare $RAISEERROR $clientname
+                    if { $KEYANDTHINK } { async_thinktime 10 $clientname delivery $async_verbose }
+                } elseif {$choice <= 22} {
+                    if { $async_verbose } { puts "$clientname:w_id:$w_id:slev" }
+                    if { $KEYANDTHINK } { async_keytime 2 $clientname slev $async_verbose }
+                    slev $mysql_handler $w_id $stock_level_d_id $prepare $RAISEERROR $clientname
+                    if { $KEYANDTHINK } { async_thinktime 5 $clientname slev $async_verbose }
+                } elseif {$choice <= 23} {
+                    if { $async_verbose } { puts "$clientname:w_id:$w_id:ostat" }
+                    if { $KEYANDTHINK } { async_keytime 2 $clientname ostat $async_verbose }
+                    ostat $mysql_handler $w_id $prepare $RAISEERROR $clientname
+                    if { $KEYANDTHINK } { async_thinktime 5 $clientname ostat $async_verbose }
+                }
+            }
+            if {$prepare} {
+                foreach st {neword_st payment_st delivery_st slev_st ostat_st} { 
+                    catch {mysqlexec $mysql_handler "deallocate prepare $st"}
+                }
+            }
+            mysqlclose $mysql_handler
+            if { $async_verbose } { puts "$clientname:complete" }
+            return $clientname:complete
+        }
+        for {set ac 1} {$ac <= $async_client} {incr ac} {
+            set clientdesc "vuser$myposition:ac$ac"
+            lappend clientlist $clientdesc
+            lappend clients [simulate_client $clientdesc $total_iterations $host $port $socket $user $password $RAISEERROR $KEYANDTHINK $db $prepare $async_verbose $async_delay]
+        }
+        puts "Started asynchronous clients:$clientlist"
+        set acprom [ promise::eventloop [ promise::all $clients ] ]
+        puts "All asynchronous clients complete"
+        if { $async_verbose } {
+            foreach client $acprom { puts $client }
+        }
+    }
+}}
+        if { $mysql_connect_pool } {
+            insert_mysqlconnectpool_drivescript timed async
+        }
+    }
+}
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/src/pub.txt b/workload/HammerDB-TPCC/src/pub.txt
new file mode 100644
index 0000000..bd00bca
--- /dev/null
+++ b/workload/HammerDB-TPCC/src/pub.txt
@@ -0,0 +1,54 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+Comment: Hostname: 
+Version: Hockeypuck 2.1.0-166-geb2a11b
+
+xsFNBGG4urcBEACrbsRa7tSSyxSfFkB+KXSbNM9rxYqoB78u107skReefq4/+Y72
+TpDvlDZLmdv/lK0IpLa3bnvsM9IE1trNLrfi+JES62kaQ6hePPgn2RqxyIirt2se
+Si3Z3n3jlEg+mSdhAvW+b+hFnqxo+TY0U+RBwDi4oO0YzHefkYPSmNPdlxRPQBMv
+4GPTNfxERx6XvVSPcL1+jQ4R2cQFBryNhidBFIkoCOszjWhm+WnbURsLheBp757l
+qEyrpCufz77zlq2gEi+wtPHItfqsx3rzxSRqatztMGYZpNUHNBJkr13npZtGW+kd
+N/xu980QLZxN+bZ88pNoOuzD6dKcpMJ0LkdUmTx5z9ewiFiFbUDzZ7PECOm2g3ve
+Jrwr79CXDLE1+39Hr8rDM2kDhSr9tAlPTnHVDcaYIGgSNIBcYfLmt91133klHQHB
+IdWCNVtWJjq5YcLQJ9TxG9GQzgABPrm6NDd1t9j7w1L7uwBvMB1wgpirRTPVfnUS
+Cd+025PEF+wTcBhfnzLtFj5xD7mNsmDmeHkF/sDfNOfAzTE1v2wq0ndYU60xbL6/
+yl/Nipyr7WiQjCG0m3WfkjjVDTfs7/DXUqHFDOu4WMF9v+oqwpJXmAeGhQTWZC/Q
+hWtrjrNJAgwKpp263gDSdW70ekhRzsok1HJwX1SfxHJYCMFs2aH6ppzNsQARAQAB
+zTZNeVNRTCBSZWxlYXNlIEVuZ2luZWVyaW5nIDxteXNxbC1idWlsZEBvc3Mub3Jh
+Y2xlLmNvbT7CwZQEEwEIAD4WIQSFm+jXxYb1OEMLGcJGe5QtOnm9KQUCYbi6twIb
+AwUJA8JnAAULCQgHAgYVCgkICwIEFgIDAQIeAQIXgAAKCRBGe5QtOnm9KUewD/99
+2sS31WLGoUQ6NoL7qOB4CErkqXtMzpJAKKg2jtBGG3rKE1/0VAg1D8AwEK4LcCO4
+07wohnH0hNiUbeDck5x20pgS5SplQpuXX1K9vPzHeL/WNTb98S3H2Mzj4o9obED6
+Ey52tTupttMF8pC9TJ93LxbJlCHIKKwCA1cXud3GycRN72eqSqZfJGdsaeWLmFmH
+f6oee27d8XLoNjbyAxna/4jdWoTqmp8oT3bgv/TBco23NzqUSVPi+7ljS1hHvcJu
+oJYqaztGrAEf/lWIGdfl/kLEh8IYx8OBNUojh9mzCDlwbs83CBqoUdlzLNDdwmzu
+34Aw7xK14RAVinGFCpo/7EWoX6weyB/zqevUIIE89UABTeFoGih/hx2jdQV/NQNt
+hWTW0jH0hmPnajBVAJPYwAuO82rx2pnZCxDATMn0elOkTue3PCmzHBF/GT6c65aQ
+C4aojj0+Veh787QllQ9FrWbwnTz+4fNzU/MBZtyLZ4JnsiWUs9eJ2V1g/A+RiIKu
+357Qgy1ytLqlgYiWfzHFlYjdtbPYKjDaScnvtY8VO2Rktm7XiV4zKFKiaWp+vuVY
+pR0/7Adgnlj5Jt9lQQGOr+Z2VYx8SvBcC+by3XAtYkRHtX5u4MLlVS3gcoWfDiWw
+CpvqdK21EsXjQJxRr3dbSn0HaVj4FJZX0QQ7WZm6WM7BTQRhuLq3ARAA6RYjqfC0
+YcLGKvHhoBnsX29vy9Wn1y2JYpEnPUIB8X0VOyz5/ALv4Hqtl4THkH+mmMuhtndo
+q2BkCCk508jWBvKS1S+Bd2esB45BDDmIhuX3ozu9Xza4i1FsPnLkQ0uMZJv30ls2
+pXFmskhYyzmo6aOmH2536LdtPSlXtywfNV1HEr69V/AHbrEzfoQkJ/qvPzELBOjf
+jwtDPDePiVgW9LhktzVzn/BjO7XlJxw4PGcxJG6VApsXmM3t2fPN9eIHDUq8ocbH
+dJ4en8/bJDXZd9ebQoILUuCg46hE3p6nTXfnPwSRnIRnsgCzeAz4rxDR4/Gv1Xpz
+v5wqpL21XQi3nvZKlcv7J1IRVdphK66De9GpVQVTqC102gqJUErdjGmxmyCA1OOO
+RqEPfKTrXz5YUGsWwpH+4xCuNQP0qmreRw3ghrH8potIr0iOVXFic5vJfBTgtcuE
+B6E6ulAN+3jqBGTaBML0jxgj3Z5VC5HKVbpg2DbB/wMrLwFHNAbzV5hj2Os5Zmva
+0ySP1YHB26pAW8dwB38GBaQvfZq3ezM4cRAo/iJ/GsVE98dZEBO+Ml+0KYj+ZG+v
+yxzo20sweun7ZKT+9qZM90f6cQ3zqX6IfXZHHmQJBNv73mcZWNhDQOHs4wBoq+FG
+QWNqLU9xaZxdXw80r1viDAwOy13EUtcVbTkAEQEAAcLBfAQYAQgAJhYhBIWb6NfF
+hvU4QwsZwkZ7lC06eb0pBQJhuLq3AhsMBQkDwmcAAAoJEEZ7lC06eb0pSi8P/iy+
+dNnxrtiENn9vkkA7AmZ8RsvPXYVeDCDSsL7UfhbS77r2L1qTa2aB3gAZUDIOXln5
+1lSxMeeLtOequLMEV2Xi5km70rdtnja5SmWfc9fyExunXnsOhg6UG872At5CGEZU
+0c2Nt/hlGtOR3xbt3O/Uwl+dErQPA4BUbW5K1T7OC6oPvtlKfF4bGZFloHgt2yE9
+YSNWZsTPe6XJSapemHZLPOxJLnhs3VBirWE31QS0bRl5AzlO/fg7ia65vQGMOCOT
+LpgChTbcZHtozeFqva4IeEgE4xN+6r8WtgSYeGGDRmeMEVjPM9dzQObf+SvGd58u
+2z9f2agPK1H32c69RLoA0mHRe7Wkv4izeJUc5tumUY0e8OjdenZZjT3hjLh6tM+m
+rp2oWnQIoed4LxUw1dhMOj0rYXv6laLGJ1FsW5eSke7ohBLcfBBTKnMCBohROHy2
+E63Wggfsdn3UYzfqZ8cfbXetkXuLS/OM3MXbiNjg+ElYzjgWrkayu7yLakZx+mx6
+sHPIJYm2hzkniMG29d5mGl7ZT9emP9b+CfqGUxoXJkjs0gnDl44bwGJ0dmIBu3aj
+VAaHODXyY/zdDMGjskfEYbNXCAY2FRZSE58tgTvPKD++Kd2KGplMU2EIFT7JYfKh
+HAB5DGMkx92HUMidsTSKHe+QnnnoFmu4gnmDU31i
+=T17C
+-----END PGP PUBLIC KEY BLOCK-----
\ No newline at end of file
diff --git a/workload/HammerDB-TPCC/validate.sh b/workload/HammerDB-TPCC/validate.sh
new file mode 100755
index 0000000..ceb718b
--- /dev/null
+++ b/workload/HammerDB-TPCC/validate.sh
@@ -0,0 +1,149 @@
+#!/bin/bash -e
+
+### workspace
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+source "$DIR/script/common.sh"
+
+### General settings
+RUN_SINGLE_NODE=${RUN_SINGLE_NODE:-false}
+ENABLE_SOCKET_BIND=${ENABLE_SOCKET_BIND:-true}
+SOCKET_BIND_NODE=${SOCKET_BIND_NODE:-""} # by default bind all nodes
+ENABLE_MOUNT_DIR=${ENABLE_MOUNT_DIR:-true}
+MOUNT_DIR=${MOUNT_DIR:-"/mnt/disk1"}
+
+### multi-node settings
+ENABLE_RPSRFS_AFFINITY=${ENABLE_RPSRFS_AFFINITY:-true}
+RPS_SOCK_FLOW_ENTRIES=${RPS_SOCK_FLOW_ENTRIES:-32768}
+ENABLE_IRQ_AFFINITY=${ENABLE_IRQ_AFFINITY:-true}
+EXCLUDE_IRQ_CORES=${EXCLUDE_IRQ_CORES:-false}
+
+### database settings
+DB_TYPE=${1:-mysql}
+DB_FS_TYPE=${2:-disk}
+DB_HUGEPAGE_STATUS=${3:-off}
+DEBUG=${DEBUG:-false}
+DB_PORT=${DB_PORT:-3306}
+if [[ "$DB_TYPE" == "mysql" ]]; then
+    DB_PORT=3306
+fi
+
+### begin loading default parameters of database and hammerdb
+if [[ "$DB_TYPE" == "mysql" ]]; then
+    source "$DIR/script/params/mysql_param.sh"
+fi
+source "$DIR/script/params/hammerdb_param.sh"
+### end
+
+### begin sale down parameters for gated test case
+GATED=false
+if [[ "${TESTCASE}" =~ ^test.*_gated$ ]]; then
+    GATED=true
+    DEBUG=true
+    ENABLE_MOUNT_DIR=false # small data volume no need to mount directory for gated test
+    RUN_SINGLE_NODE=true   # no need to create labels for gated test
+    if [[ "$DB_TYPE" == "mysql" ]]; then
+        scale_mysql_gated_params
+    fi
+    scale_hammerdb_params_gated
+fi
+### end
+
+### single node settings
+if ${RUN_SINGLE_NODE:-false}; then
+    SERVER_CORE_NEEDED_FACTOR=${SERVER_CORE_NEEDED_FACTOR:-0.9}
+fi
+
+### multi-node setttings
+if ! ${RUN_SINGLE_NODE:-false}; then
+    if [[ -z "$TPCC_HAMMER_NUM_VIRTUAL_USERS" ]]; then
+        echo "Info: no virtual user specified, auto-gen by build thread count $TPCC_THREADS_BUILD_SCHEMA"
+        algo=${TPCC_HAMMER_NUM_VIRTUAL_USERS_GEN_ALGORITHM:-"fixed"}
+        algo_func=$(
+            case $algo in
+                baseline)
+                    echo "get_baseline_vuser_list"
+                    ;;
+                advanced_binary_search)
+                    echo "get_advanced_binarysearch_vuser_list"
+                    ;;
+                binary_search)
+                    echo "get_binarysearch_vuser_list"
+                    ;;
+                *)
+                    echo "get_fixed_vuser_list"
+                    ;;
+            esac
+        )
+        export TPCC_HAMMER_NUM_VIRTUAL_USERS="$($algo_func)"
+    fi
+    echo "TPCC_HAMMER_NUM_VIRTUAL_USERS=$TPCC_HAMMER_NUM_VIRTUAL_USERS"
+fi
+### end
+
+### begin CPU request & limit
+DB_CPU_REQUEST=${DB_CPU_REQUEST:-1}
+DB_CPU_LIMIT=${DB_CPU_LIMIT:-$TPCC_THREADS_BUILD_SCHEMA}
+### end
+
+### begin caculate hugepages depends on database buffer pool size
+if [[ "$DB_HUGEPAGE_STATUS" == "on" ]]; then
+    source "$DIR/script/setup_hugepages.sh" "${DB_TYPE}"
+fi
+### end
+
+### Logs Setting
+source "$DIR/../../script/overwrite.sh"
+
+### begin list all keys of parameters
+## encapsulate and export key=value as envrionments
+TESTCASE_KEYS="DEBUG GATED DB_FS_TYPE DB_HUGEPAGE_STATUS RUN_SINGLE_NODE ENABLE_SOCKET_BIND"
+if ${RUN_SINGLE_NODE:-false} && ${ENABLE_SOCKET_BIND:-true}; then
+    TESTCASE_KEYS="$(concat_params $TESTCASE_KEYS "SERVER_CORE_NEEDED_FACTOR")"
+elif ! ${RUN_SINGLE_NODE:-false}; then
+    TESTCASE_KEYS="$(concat_params $TESTCASE_KEYS "ENABLE_IRQ_AFFINITY" "ENABLE_RPSRFS_AFFINITY" "RPS_SOCK_FLOW_ENTRIES")"
+    if ${ENABLE_SOCKET_BIND:-true}; then
+        TESTCASE_KEYS="$(concat_params $TESTCASE_KEYS "SOCKET_BIND_NODE")"
+    fi
+fi
+
+DB_KEYS="DB_TYPE DB_PORT DB_DATASIZE_OF_WAREHOUSE_RATIO DB_BUFFERSIZE_OF_DATASIZE_RATIO DB_CPU_REQUEST DB_CPU_LIMIT ENABLE_MOUNT_DIR"
+if ${ENABLE_MOUNT_DIR:-true}; then
+    DB_KEYS="$(concat_params $DB_KEYS "MOUNT_DIR")"
+fi
+
+HAMMERDB_KEYS="$(env |awk -F= '/^TPCC_/{printf $1" "}')"
+if [[ "$DB_TYPE" == "mysql" ]]; then
+    MYSQL_KEYS="$(env |awk -F= '/^MYSQL_/{printf $1" "}')"
+    DB_KEYS="$(concat_params $DB_KEYS $MYSQL_KEYS)"
+fi
+if [[ "$DB_HUGEPAGE_STATUS" == "on" ]]; then
+    DB_KEYS="$(concat_params $DB_KEYS "DB_HUGEPAGES_2MI" "DB_HUGEPAGES")"
+fi
+ALL_KEYS="$(concat_params $TESTCASE_KEYS $DB_KEYS $HAMMERDB_KEYS)"
+### end
+
+### EVENT_TRACE_PARAMS for collecting emon data
+EVENT_TRACE_PARAMS="roi,Taking start Transaction Count,Taking end Transaction Count"
+
+### Workload Setting
+WORKLOAD_PARAMS="$(workload_settings $ALL_KEYS)"
+if ${DEBUG:-false}; then
+    echo
+    echo "WORKLOAD_PARAMS=$WORKLOAD_PARAMS" |sed -e "s/;/\\n/g"
+fi
+
+### Docker Setting
+DOCKER_IMAGE=""
+DOCKER_OPTIONS=""
+
+### Kubernetes Setting
+RECONFIG_OPTIONS=$(eval echo "$(k8s_settings $ALL_KEYS)")
+if ${DEBUG:-false}; then
+    echo
+    echo "RECONFIG_OPTIONS=$RECONFIG_OPTIONS" |sed -e "s/ /\\n/g"
+fi
+JOB_FILTER="job-name=benchmark"
+
+### Script Setting
+SCRIPT_ARGS="$DB_TYPE"
+source "$DIR/../../script/validate.sh"
diff --git a/workload/Kafka/CMakeLists.txt b/workload/Kafka/CMakeLists.txt
new file mode 100644
index 0000000..2938324
--- /dev/null
+++ b/workload/Kafka/CMakeLists.txt
@@ -0,0 +1,9 @@
+if(" ICX " MATCHES " ${PLATFORM} ")
+if(NOT BACKEND STREQUAL "docker")
+    add_workload("kafka")
+    add_testcase(${workload}_gated gated)
+    add_testcase(${workload}_1n default)
+    add_testcase(${workload}_3n default)
+    add_testcase(${workload}_3n_pkm default)
+endif()
+endif()
diff --git a/workload/Kafka/Dockerfile b/workload/Kafka/Dockerfile
new file mode 100644
index 0000000..7a523f8
--- /dev/null
+++ b/workload/Kafka/Dockerfile
@@ -0,0 +1,69 @@
+# kafka-client
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG OS_VER=8.6
+ARG OS_IMAGE=rockylinux
+FROM ${OS_IMAGE}:${OS_VER}
+
+RUN yum -y install wget procps gcc make zlib-devel
+RUN yum update -y && yum upgrade -y
+
+ENV BASE_DIR=/opt
+WORKDIR ${BASE_DIR}
+
+# Prepare python3
+ARG PYTHON_VER=3.10.2
+ARG PYTHON_FILE_NAME=Python-${PYTHON_VER}.tgz
+ARG PYTHON_PACKAGE=http://www.python.org/ftp/python/3.10.2/${PYTHON_FILE_NAME}
+ARG PYTHON_DIR=${BASE_DIR}/Python-${PYTHON_VER}
+RUN wget ${PYTHON_PACKAGE} \
+    && tar -zxvf ${PYTHON_FILE_NAME} \
+    && rm ${PYTHON_FILE_NAME} \
+    && cd ${PYTHON_DIR} \
+    && ./configure --enable-optimizations \
+    && make altinstall \
+    && ln -s /usr/local/bin/python3.10 /usr/bin/python3 \
+    && rm -rf ${PYTHON_DIR}
+
+# Prepare JDK
+ARG OPENJDK_VER=17.0.1
+ARG OPENJDK_FILE_NAME=openjdk-${OPENJDK_VER}_linux-x64_bin.tar.gz
+ARG OPENJDK_PKG=http://download.java.net/java/GA/jdk17.0.1/2a2082e5a09d4267845be086888add4f/12/GPL/${OPENJDK_FILE_NAME}
+RUN wget ${OPENJDK_PKG} \
+    && tar xzf ${OPENJDK_FILE_NAME} \
+    && rm ${OPENJDK_FILE_NAME} \
+    && cd ${BASE_DIR}/jdk-${OPENJDK_VER} \
+    && update-alternatives --install /usr/bin/java java ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/java 2 \
+    && update-alternatives --install /usr/bin/jar jar ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/jar 2 \
+    && update-alternatives --install /usr/bin/javac javac ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/javac 2 \
+    && update-alternatives --set jar ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/jar \
+    && update-alternatives --set javac ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/javac
+
+ENV JAVA_HOME=${BASE_DIR}/jdk-${OPENJDK_VER}/
+ENV JRE_HOME=${BASE_DIR}/jdk-${OPENJDK_VER}/jre/
+ENV PATH $PATH:${BASE_DIR}/jdk-${OPENJDK_VER}/bin:${BASE_DIR}/jdk-${OPENJDK_VER}/jre/bin
+
+# Prepare Kafka
+ENV KAFKA_VER=kafka_2.12-2.8.1
+ARG KAFKA_FILE_NAME=${KAFKA_VER}.tgz
+ARG KAFKA_PKG=http://archive.apache.org/dist/kafka/2.8.1/${KAFKA_VER}.tgz
+RUN wget ${KAFKA_PKG} \
+    && tar xzf ${BASE_DIR}/${KAFKA_VER}.tgz \
+    && rm ${KAFKA_FILE_NAME}
+
+ENV PATH $PATH:${BASE_DIR}/${KAFKA_VER}/bin
+
+# Copy helper script and testcases
+COPY run_test.sh ${BASE_DIR}
+COPY start_test.py ${BASE_DIR}
+ENV PATH $PATH:${BASE_DIR}
+
+RUN  mkfifo /export-logs
+
+CMD ( ./run_test.sh; echo $? > status) 2>&1 | tee ${IDENTIFIER}_std.logs && \
+cat log* > ${IDENTIFIER}_output.logs && \
+sync status ${IDENTIFIER}_output.logs ${IDENTIFIER}_std.logs && \
+tar cf /export-logs status ${IDENTIFIER}_output.logs ${IDENTIFIER}_std.logs && \
+sleep infinity
diff --git a/workload/Kafka/Dockerfile.1.server b/workload/Kafka/Dockerfile.1.server
new file mode 100644
index 0000000..db718e9
--- /dev/null
+++ b/workload/Kafka/Dockerfile.1.server
@@ -0,0 +1,63 @@
+# zookeeper-kafka-server
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG OS_VER=8.6
+ARG OS_IMAGE=rockylinux
+FROM ${OS_IMAGE}:${OS_VER}
+
+RUN yum -y install wget procps
+RUN yum update -y && yum upgrade -y
+
+ENV BASE_DIR=/opt
+WORKDIR ${BASE_DIR}
+
+# Prepare JDK
+ARG OPENJDK_VER=17.0.1
+ARG OPENJDK_FILE_NAME=openjdk-${OPENJDK_VER}_linux-x64_bin.tar.gz
+ARG OPENJDK_PKG=http://download.java.net/java/GA/jdk17.0.1/2a2082e5a09d4267845be086888add4f/12/GPL/${OPENJDK_FILE_NAME}
+RUN wget ${OPENJDK_PKG} \
+    && tar xzf ${OPENJDK_FILE_NAME} \
+    && rm ${OPENJDK_FILE_NAME} \
+    && cd ${BASE_DIR}/jdk-${OPENJDK_VER} \
+    && update-alternatives --install /usr/bin/java java ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/java 2 \
+    && update-alternatives --install /usr/bin/jar jar ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/jar 2 \
+    && update-alternatives --install /usr/bin/javac javac ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/javac 2 \
+    && update-alternatives --set jar ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/jar \
+    && update-alternatives --set javac ${BASE_DIR}/jdk-${OPENJDK_VER}/bin/javac
+
+ENV JAVA_HOME=${BASE_DIR}/jdk-${OPENJDK_VER}/
+ENV JRE_HOME=${BASE_DIR}/jdk-${OPENJDK_VER}/jre/
+ENV PATH $PATH:${BASE_DIR}/jdk-${OPENJDK_VER}/bin:${BASE_DIR}/jdk-${OPENJDK_VER}/jre/bin
+
+# Prepare zookeeper
+ARG ZOOKEEPER_VER=3.7.0
+ARG ZOOKEEPER_FILE_NAME=apache-zookeeper-${ZOOKEEPER_VER}-bin.tar.gz
+ARG ZOOKEEPER_PKG=http://archive.apache.org/dist/zookeeper/zookeeper-${ZOOKEEPER_VER}/${ZOOKEEPER_FILE_NAME}
+ENV ZOOKEEPER_DIR=${BASE_DIR}/apache-zookeeper-${ZOOKEEPER_VER}-bin
+RUN wget ${ZOOKEEPER_PKG} \
+    && tar xzf ${ZOOKEEPER_FILE_NAME} \
+    && cp ${ZOOKEEPER_DIR}/conf/zoo_sample.cfg ${ZOOKEEPER_DIR}/conf/zoo.cfg \
+    && rm ${ZOOKEEPER_FILE_NAME}
+
+# Prepare Kafka
+ENV KAFKA_VER=kafka_2.12-2.8.1
+ARG KAFKA_FILE_NAME=${KAFKA_VER}.tgz
+ARG KAFKA_PKG=http://archive.apache.org/dist/kafka/2.8.1/${KAFKA_FILE_NAME}
+ARG KAFKA_LOGS=${BASE_DIR}/kafka_logs
+RUN wget ${KAFKA_PKG} \
+    && tar xzf ${KAFKA_FILE_NAME} \
+    && rm ${KAFKA_FILE_NAME} \
+    && mkdir -p ${KAFKA_LOGS} \
+    && sed -i "s|^log.dirs=.*$|log.dirs=${KAFKA_LOGS}|" ${BASE_DIR}/${KAFKA_VER}/config/server.properties \
+    && sed -i "s|^num.network.threads=.*$|num.network.threads=64|" ${BASE_DIR}/${KAFKA_VER}/config/server.properties \
+    && echo "delete.topic.enable = true" >> ${BASE_DIR}/${KAFKA_VER}/config/server.properties \
+    && echo "advertised.listeners=PLAINTEXT://zookeeper-kafka-server-service:9092" >> ${BASE_DIR}/${KAFKA_VER}/config/server.properties
+
+ENV PATH $PATH:${BASE_DIR}/${KAFKA_VER}/bin
+
+# Start services
+CMD ${ZOOKEEPER_DIR}/bin/zkServer.sh start ${ZOOKEEPER_DIR}/conf/zoo.cfg >> ${BASE_DIR}/zookeeper.out 2>&1 \
+    && ${BASE_DIR}/${KAFKA_VER}/bin/kafka-server-start.sh -daemon ${BASE_DIR}/${KAFKA_VER}/config/server.properties >> ${BASE_DIR}/kafka.out 2>&1 \
+    && sleep infinity
diff --git a/workload/Kafka/README.md b/workload/Kafka/README.md
new file mode 100644
index 0000000..be4aa33
--- /dev/null
+++ b/workload/Kafka/README.md
@@ -0,0 +1,98 @@
+### Introduction
+
+Apache Kafka is a framework implementation of a software bus using stream-processing. It is an open-source software platform developed by the Apache Software Foundation written in Scala and Java. The project aims to provide a unified, high-throughput, low-latency platform for handling real-time data feeds.
+
+The example use case for Kafka is for operational-related tasks such as application logs collection, and event streaming from one system/framework/platform to another.
+
+### Test Case
+
+This workload is measuring Apache Kafka's performance by utilizing the built-in application bundled with Apache Kafka. Currently, the test case measures Apache Kafka producer and consumer performance. 
+
+* `run_test.sh` -  Run test
+    1. This script collects environment variables and invokes start_test.py to run test cases
+* `start_test.py` -  Start test
+    1. This script generates a set number of producers and consumers processes according to variable 'producer_n' and 'consumer_n'.
+    2. Each producer process invokes script `kafka-producer-perf-test.sh` which will generate 3 million message to the Kafka server. (`throughput=5000`, `record-size=1000`, `compression.type=lz4`)
+    3. Each consumer process invokes script `kafka-consumer-perf-test.sh` which will read 3 million message from Kafka server. (`timeout=500000`)
+
+Run designed test cases:
+```
+cd build
+cmake ..
+cd workload/Kafka
+ctest -V 
+```
+
+Example of test parameters:
+```
+    REPLICATION_FACTOR: 1
+    PARTITIONS: 128
+    PRODUCERS: 128
+    CONSUMERS: 128
+    NUM_RECORDS: 3000000
+    THROUGHPUT: 50000
+    RECORD_SIZE: 1000
+    COMPRESSION_TYPE: lz4
+    MESSAGES: 2000000
+    KAFKA_BENCHMARK_TOPIC: KAFKABENCHMARK
+    CONSUMER_TIMEOUT: 600000
+```
+
+### Docker Image
+
+The workload contains 3 docker images: `kafka-zookeeper-server`, `producer` and `consumer`. The container interact with each other using Kubernetes Service (ClusterIP). Due to this configuration, it is recommended to run this workload using Kubernetes instead of docker.
+
+* `kafka-zookeeper-server` - Kafka and Zookeeper server container
+    * Used to receive messages from producer and send messages to consumer, exposes port `2181`, `9092` and `9093` using `ClusterIP`
+* `producer` - Producer container
+    * Used to generate and send messages to Kafka and Zookeeper server
+* `consumer` - Consumer container
+    * Used to get messages from Kafka and Zookeeper server
+
+```
+# Deploy Kafka workload
+m4 -I ../../template -DREPLICATION_FACTOR=1 -DPARTITIONS=256 -DKAFKA_BENCHMARK_TOPIC=KAFKABENCHMARK -DMESSAGES=2000000 -DNUM_RECORDS=3000000 -DTHROUGHPUT=5000 -DRECORD_SIZE=1000 -DCOMPRESSION_TYPE=lz4 -DPRODUCERS=256 -DCONSUMERS=256 -DCONSUMER_TIMEOUT=600000 kubernetes-config.yaml.m4 > kubernetes-config.yaml
+kubectl apply -f kubernetes-config.yaml
+
+# Retrieve logs
+mkdir -p logs-kafka
+pod=$(kubectl get pod --selector=job-name=benchmark -o=jsonpath="{.items[0].metadata.name}")
+kubectl exec $pod -- cat /export-logs | tar xf - -C logs-kafka
+
+# Delete Kafka workload deployment
+kubectl delete -f kubernetes-config.yaml
+```
+
+### Test Cases
+
+* kafka_gated - Gated test case, for this test case, PARTITIONS, PRODUCERS and CONSUMERS will be set to 1, and cannot be changed.
+* kafka_1n - Used for single node testing, all pods will be deployed on one K8S worker node.
+* kafka_3n - Used for multi node (3 nodes) testing, need at least 3 K8S worker nodes for this test case.
+* kafka_3n_pkm - Used for multi node (3 nodes) [pkm](https://github.com/intel-innersource/applications.benchmarking.benchmark.platform-hero-features/blob/master/doc/cmakelists.txt.md#special-test-cases) testing, need at least 3 K8S worker nodes for this test case.
+
+### KPI
+
+Run the [`kpi.sh`](kpi.sh) script to parse the KPIs from the validation logs. 
+
+* Due to Primary KPI's measurement methodology, The Primary KPI is expected to increase with the PRODUCERS flag (number of producers)
+* To get higher Primary KPI result, please use powerful machine and override PRODUCERS flag.
+
+```
+[sfdev@localhost logs-kafka2]$ ./kpi.sh
+kafka_p95_latency (ms): x
+number_of_producer: xxx
+*Maximum Throughput (MB/s): xxxx
+Maximum Throughput for Latency SLA (MB/s): x
+max_p95_tx_latency (ms): xxxx
+```
+
+### Index Info
+- Name: `Kafka`
+- Category: `DataServices`
+- Platform: `ICX`
+- Keywords: `Kafka` 
+- Permission:
+
+### See Also
+
+- [Kafka Configurations](https://kafka.apache.org/documentation/#configuration)
diff --git a/workload/Kafka/build.sh b/workload/Kafka/build.sh
new file mode 100755
index 0000000..725c31b
--- /dev/null
+++ b/workload/Kafka/build.sh
@@ -0,0 +1,4 @@
+#!/bin/bash -e
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+. "$DIR"/../../script/build.sh
diff --git a/workload/Kafka/cluster-config.yaml.m4 b/workload/Kafka/cluster-config.yaml.m4
new file mode 100644
index 0000000..9e9d55d
--- /dev/null
+++ b/workload/Kafka/cluster-config.yaml.m4
@@ -0,0 +1,11 @@
+include(config.m4)
+
+cluster:
+- labels:
+    {}
+ifelse(index(TESTCASE,_3n),-1,,`dnl
+- labels:
+    {}
+- labels:
+    {}
+')
diff --git a/workload/Kafka/kpi.sh b/workload/Kafka/kpi.sh
new file mode 100755
index 0000000..5fdd829
--- /dev/null
+++ b/workload/Kafka/kpi.sh
@@ -0,0 +1,75 @@
+#!/bin/bash -e
+
+default_kafka_p95_latency=${kafka_p95_latency:-5}
+
+awk -v kafka_p95_latency=$default_kafka_p95_latency '
+BEGIN {
+    producer_records_sent[FILENAME] = 0
+    producer_records_per_sec[FILENAME] = 0
+    producer_MB_per_sec[FILENAME] = 0
+    producer_avg_latency[FILENAME] = 0
+    producer_max_latency[FILENAME] = 0
+    producer_50th[FILENAME] = 0
+    producer_95th[FILENAME] = 0
+    producer_99th[FILENAME] = 0
+    producer_999th[FILENAME] = 0
+    maximum_throughput=0
+    maximum_throughput_sla=0
+    max_p95_tx_latency=0
+    number_of_producer=0
+}
+
+#parse producer results
+/99.9th/{  
+    split($0, a, " ");
+    producer_records_sent[FILENAME] = a[1]
+    producer_records_per_sec[FILENAME] = a[4]
+    gsub(/\(/,"",a[6])
+    producer_MB_per_sec[FILENAME] = a[6]
+    producer_avg_latency[FILENAME] = a[8]
+    producer_max_latency[FILENAME] = a[12]
+    producer_50th[FILENAME] = a[16]
+    producer_95th[FILENAME] = a[19]
+    producer_99th[FILENAME] = a[22]
+    producer_999th[FILENAME] = a[25]
+
+    # maximum_throughput is sum of all producer_MB_per_sec
+    maximum_throughput += producer_MB_per_sec[FILENAME]
+    number_of_producer += 1
+
+    # only add if throughputs 95th is less than FLAG
+    if ( producer_95th[FILENAME] < kafka_p95_latency ) {
+        maximum_throughput_sla += producer_MB_per_sec[FILENAME]
+    }
+
+    # if producer_95th is bigger, assign to variable
+    if ( producer_95th[FILENAME] > max_p95_tx_latency) {
+        max_p95_tx_latency = producer_95th[FILENAME]
+    }
+}
+
+#parse consumer results
+/^[0-9]+\-[0-9]+\-[0-9]+/{
+    split($0,b,",");
+    
+    consumer_data_consumed_in_MB[FILENAME] = b[3]
+    consumer_MB_sec[FILENAME] = b[4]
+    consumer_data_consumed_in_nMsg[FILENAME] = b[5]
+    consumer_nMsg_sec[FILENAME] = b[6]
+    consumer_rebalance_time_ms[FILENAME] = b[7]
+    consumer_fetch_time_ms[FILENAME] = b[8]
+    consumer_fetch_MB_sec[FILENAME] = b[9]
+    consumer_fetch_nMsg_sec[FILENAME] = b[10]
+}
+
+END {
+    primary="*"
+    print "kafka_p95_latency (ms): " kafka_p95_latency
+    print "number_of_producer: " number_of_producer
+    print primary "Maximum Throughput (MB/s): "  maximum_throughput
+    print "Maximum Throughput for Latency SLA (MB/s): " maximum_throughput_sla
+    print "max_p95_tx_latency (ms): " max_p95_tx_latency
+}
+
+
+' */producer_output.logs 2>/dev/null || true
diff --git a/workload/Kafka/kubernetes-config.yaml.m4 b/workload/Kafka/kubernetes-config.yaml.m4
new file mode 100644
index 0000000..2d7c7eb
--- /dev/null
+++ b/workload/Kafka/kubernetes-config.yaml.m4
@@ -0,0 +1,194 @@
+include(config.m4)
+
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: zookeeper-kafka-server
+  labels:
+    app: zookeeper-kafka-server
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: zookeeper-kafka-server
+  template:
+    metadata:
+      labels:
+        app: zookeeper-kafka-server
+        zoo-producer-consumer: anti
+    spec:
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      containers:
+      - name: zookeeper-kafka-server-container
+        image: IMAGENAME(Dockerfile.1.server)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        ports:
+          - containerPort: 2181
+          - containerPort: 9092
+          - containerPort: 9093
+ifelse(index(TESTCASE,_3n),-1,,`dnl
+      PODANTIAFFINITY(required,zoo-producer-consumer,anti)
+')dnl
+
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: zookeeper-kafka-server-service
+  labels:
+    name: zookeeper-kafka-server-service
+spec:
+  ports:
+    - port: 2181
+      protocol: TCP
+      name: zookeeper-kafka-server-1
+    - port: 9092
+      protocol: TCP
+      name: zookeeper-kafka-server-2
+    - port: 9093
+      protocol: TCP
+      name: zookeeper-kafka-server-3
+ 
+  selector:
+    app: zookeeper-kafka-server
+  type: ClusterIP
+
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: kafka-topic-creator
+spec:
+  template:
+    spec:
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      initContainers:
+        - name: wait-for-zookeeper-kafka-server-service
+          image: busybox:1.28
+          command: ['sh', '-c', "until nc -z -w5 zookeeper-kafka-server-service 9092; do echo waiting for kafka service; sleep 2; done"]
+        - name: wait-for-zookeeper-server-service
+          image: busybox:1.28
+          command: ['sh', '-c', "until nc -z -w5 zookeeper-kafka-server-service 2181; do echo waiting for zookeeper service; sleep 2; done"]
+      containers:
+      - name: kafka-topic-creator-container
+        image: IMAGENAME(Dockerfile)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        command: ["sh", "-c", "run_test.sh && sleep infinity"]
+        env:
+          - name: IDENTIFIER
+            value: "topic"
+          - name: ZOOKEEPER_SERVER
+            value: "zookeeper-kafka-server-service:2181"
+          - name: K_REPLICATION_FACTOR
+            value: "defn(`REPLICATION_FACTOR')"
+          - name: K_PARTITIONS
+            value: "defn(`PARTITIONS')"
+          - name: K_KAFKA_BENCHMARK_TOPIC
+            value: "defn(`KAFKA_BENCHMARK_TOPIC')"
+      PODAFFINITY(required,app,zookeeper-kafka-server)
+      restartPolicy: Never
+  backoffLimit: 2
+
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: benchmark
+spec:
+  template:
+    metadata:
+      labels:
+        zoo-producer-consumer: anti
+    spec:
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      initContainers:
+        - name: wait-for-topic-created
+          image: IMAGENAME(Dockerfile)
+          imagePullPolicy: IMAGEPOLICY(Always)
+          command: ["sh", "-c", "until kafka-topics.sh --list --zookeeper zookeeper-kafka-server-service:2181 | grep KAFKA_BENCHMARK_TOPIC ; do echo waiting for topic; sleep 10; done"]
+      containers:
+      - name: benchmark
+        image: IMAGENAME(Dockerfile)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        env:
+          - name: IDENTIFIER
+            value: "producer"
+          - name: KAFKA_SERVER
+            value: "zookeeper-kafka-server-service:9092"
+          - name: K_KAFKA_BENCHMARK_TOPIC
+            value: "defn(`KAFKA_BENCHMARK_TOPIC')"
+          - name: K_NUM_RECORDS
+            value: "defn(`NUM_RECORDS')"
+          - name: K_THROUGHPUT
+            value: "defn(`THROUGHPUT')"
+          - name: K_RECORD_SIZE
+            value: "defn(`RECORD_SIZE')"
+          - name: K_COMPRESSION_TYPE
+            value: "defn(`COMPRESSION_TYPE')"
+          - name: K_MESSAGES
+            value: "defn(`MESSAGES')"
+          - name: K_PRODUCERS
+            value: "defn(`PRODUCERS')"
+          - name: K_CONSUMER_TIMEOUT
+            value: "defn(`CONSUMER_TIMEOUT')"
+      restartPolicy: Never
+ifelse(index(TESTCASE,_3n),-1,,`dnl
+      PODANTIAFFINITY(required,zoo-producer-consumer,anti)
+')dnl
+ifelse(index(TESTCASE,_1n),-1,,`dnl
+      PODAFFINITY(required,app,zookeeper-kafka-server)
+')dnl
+
+---
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: benchmarkconsumer
+spec:
+  template:
+    metadata:
+      labels:
+        zoo-producer-consumer: anti
+    spec:
+      hostNetwork: true
+      dnsPolicy: ClusterFirstWithHostNet
+      initContainers:
+        - name: wait-for-topic-created
+          image: IMAGENAME(Dockerfile)
+          imagePullPolicy: IMAGEPOLICY(Always)
+          command: ["sh", "-c", "until kafka-topics.sh --list --zookeeper zookeeper-kafka-server-service:2181 | grep KAFKA_BENCHMARK_TOPIC ; do echo waiting for topic; sleep 10; done"]
+      containers:
+      - name: kafka-consumer-container 
+        image: IMAGENAME(Dockerfile)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        env:
+          - name: IDENTIFIER
+            value: "consumer"
+          - name: KAFKA_SERVER
+            value: "zookeeper-kafka-server-service:9092"
+          - name: K_KAFKA_BENCHMARK_TOPIC
+            value: "defn(`KAFKA_BENCHMARK_TOPIC')"
+          - name: K_NUM_RECORDS
+            value: "defn(`NUM_RECORDS')"
+          - name: K_THROUGHPUT
+            value: "defn(`THROUGHPUT')"
+          - name: K_RECORD_SIZE
+            value: "defn(`RECORD_SIZE')"
+          - name: K_COMPRESSION_TYPE
+            value: "defn(`COMPRESSION_TYPE')"
+          - name: K_MESSAGES
+            value: "defn(`MESSAGES')"
+          - name: K_CONSUMERS
+            value: "defn(`CONSUMERS')"
+          - name: K_CONSUMER_TIMEOUT
+            value: "defn(`CONSUMER_TIMEOUT')"
+      restartPolicy: Never
+ifelse(index(TESTCASE,_3n),-1,,`dnl
+      PODANTIAFFINITY(required,zoo-producer-consumer,anti)
+')dnl
+ifelse(index(TESTCASE,_1n),-1,,`dnl
+      PODAFFINITY(required,app,zookeeper-kafka-server)
+')dnl
diff --git a/workload/Kafka/run_test.sh b/workload/Kafka/run_test.sh
new file mode 100755
index 0000000..9e977c1
--- /dev/null
+++ b/workload/Kafka/run_test.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# This script is used to run producer tasks, consumer tasks and create Kafka topic
+
+# Parameters
+id=${IDENTIFIER}
+k_topic=${K_KAFKA_BENCHMARK_TOPIC}
+k_message=${K_MESSAGES}
+k_kafka_server=${KAFKA_SERVER}
+k_zookeeper_server=${ZOOKEEPER_SERVER}
+k_num_records=${K_NUM_RECORDS}
+k_throughput=${K_THROUGHPUT}
+k_record_size=${K_RECORD_SIZE}
+k_compression_type=${K_COMPRESSION_TYPE}
+k_producers=${K_PRODUCERS}
+k_consumers=${K_CONSUMERS}
+k_consumer_timeout=${K_CONSUMER_TIMEOUT}
+k_partitions=${K_PARTITIONS}
+k_replication_factor=${K_REPLICATION_FACTOR}
+kafka_dir=$BASE_DIR/$KAFKA_VER
+kafka_consumer_group_id="PKB_CONSUMER_GROUP"
+
+# Calculate default PARTITIONS/PRODUCERS/CONSUMERS load according to CPU cores and free memory size(GB)
+# default_load = min(2 * cores, free memory GB)
+cores=`lscpu | egrep '^CPU\(s\)' | awk '{print $2}'`
+free_memory=`cat /proc/meminfo | egrep '^MemFree' | awk '{printf "%d\n", $2/1024/1024}'`
+default_load=$((2 * cores < free_memory ? 2 * cores : free_memory))
+echo "CPU Cores: $cores"
+echo "Free Memory: $free_memory"
+echo "Default Load: $default_load"
+if [[ $k_partitions == 0 ]]; then k_partitions=$default_load; fi
+if [[ $k_producers == 0 ]]; then k_producers=$default_load; fi
+if [[ $k_consumers == 0 ]]; then k_consumers=$default_load; fi
+
+echo "=== START $(basename $0)"
+echo "id = [$id]"
+echo "k_message = [$k_message]"
+echo "k_kafka_server = [$k_kafka_server]"
+echo "k_zookeeper_server=[$k_zookeeper_server]"
+echo "k_num_records = [$k_num_records]"
+echo "k_throughput = [$k_throughput]"
+echo "k_record_size = [$k_record_size]"
+echo "k_compression_type = [$k_compression_type]"
+echo "k_topic = [$k_topic]"
+echo "k_replication_factor=[$k_replication_factor]"
+echo "kafka_consumer_group_id=[$kafka_consumer_group_id]"
+echo "k_partitions=[$k_partitions]"
+echo "k_producers=[$k_producers]"
+echo "k_consumers=[$k_consumers]"
+echo "k_consumer_timeout=[$k_consumer_timeout]"
+echo "kafka_dir=[$kafka_dir]"
+
+if [[ ${id} == producer ]]; then
+    echo "Producer:"
+    python3 start_test.py -i $id -b $k_producers -k $kafka_dir -s $k_kafka_server -t $k_topic -h $k_throughput -n $k_num_records -r $k_record_size -c $k_compression_type
+elif [[ ${id} == consumer ]]; then
+    echo "Consumer:"
+    python3 start_test.py -i $id -a $k_consumers -k $kafka_dir -s $k_kafka_server -t $k_topic -m $k_message -g $kafka_consumer_group_id -l $k_consumer_timeout
+elif [[ ${id} == topic ]]; then
+    echo "Topic:"
+    sh $kafka_dir/bin/kafka-topics.sh --create --zookeeper $k_zookeeper_server --replication-factor $k_replication_factor --partitions ${k_partitions} --topic $k_topic
+else
+    echo "Unknown id: ${id}"
+fi
diff --git a/workload/Kafka/start_test.py b/workload/Kafka/start_test.py
new file mode 100644
index 0000000..e89e9ce
--- /dev/null
+++ b/workload/Kafka/start_test.py
@@ -0,0 +1,106 @@
+import sys
+import subprocess
+import getopt
+import uuid
+from multiprocessing import Pool
+from time import strftime, localtime
+
+def start_benchmark(kwargs):
+    log_name = "log_" + str(uuid.uuid4())
+    if kwargs['id'] == "consumer":
+        cmd = "date; sh " + kwargs['kafka_dir'] + "/bin/kafka-consumer-perf-test.sh " + " --messages " + kwargs['messages'] + \
+              " --topic " + kwargs['topic'] + " --broker-list " + kwargs['kafka_server'] + " --group " + kwargs['kafka_consumer_group_id'] + \
+              " --timeout " + kwargs['consumer_timeout']
+
+    elif kwargs['id'] == "producer":
+        cmd = "date; sh " + kwargs['kafka_dir'] + "/bin/kafka-producer-perf-test.sh " + " --topic " + kwargs['topic'] + \
+              " --num-records " + kwargs['num_records'] + " --throughput " + kwargs['throughput'] + " --record-size " \
+              + kwargs['record_size'] + " --producer-props bootstrap.servers="+kwargs['kafka_server'] + \
+              " compression.type="+kwargs['compression_type']
+    else:
+        raise Exception("id {} is not defined!")
+    print(cmd)
+    print(strftime("process started at :  [%Y-%m-%d %H:%M:%S]", localtime()))
+    popen = subprocess.Popen(cmd,
+                         shell = True,
+                         stdout = subprocess.PIPE,
+                         stderr = subprocess.PIPE,
+                         universal_newlines = True,
+                         bufsize = 1)
+    out,err = popen.communicate()
+    print(out)
+    print(strftime("process ended at :  [%Y-%m-%d %H:%M:%S]", localtime()))
+    print("--------------------------------------------------")
+    print(err)
+    with open(log_name, 'w') as FILE:
+        FILE.write(out)
+        FILE.write(err)
+
+
+if __name__ == '__main__':
+
+    try:
+        #i = IDENTIFIER
+        #k = KAFKA_DIR
+        #t = TOPIC
+        #m = MESSAGES
+        #h = THROUGHPUT
+        #s = KAFKA_SERVER
+        #g = KAFKA_CONSUMER_GROUP_ID
+        #n = NUM_RECORDS
+        #c = COMPRESSION_TYPE
+        #r = RECORD_SIZE
+        #a = CONSUMERS
+        #b = PRODUCERS
+        #l = CONSUMER_TIMEOUT
+        opts, args = getopt.getopt(sys.argv[1:], "i:m:k:t:h:s:g:n:c:r:a:b:l:")
+    except getopt.GetoptError:
+        print("error when set options!")
+        exit(1)
+    params = dict()
+    for option, value in opts:
+        if option in ["-i"]:
+            params['id'] = value
+        elif option in ["-k"]:
+            params['kafka_dir'] = value
+        elif option in ["-m"]:
+            params['messages'] = value
+        elif option in ["-t"]:
+            params['topic'] = value
+        elif option in ["-s"]:
+            params['kafka_server'] = value
+        elif option in ["-h"]:
+            params['throughput'] = value
+        elif option in ["-g"]:
+            params['kafka_consumer_group_id'] = value
+        elif option in ["-n"]:
+            params['num_records'] = value
+        elif option in ["-c"]:
+            params['compression_type'] = value
+        elif option in ["-r"]:
+            params['record_size'] = value
+        elif option in ["-a"]:
+            params['consumers'] = int(value)
+        elif option in ["-b"]:
+            params['producers'] = int(value)
+        elif option in ["-l"]:
+            params['consumer_timeout'] = value
+    print(params)
+
+    obj_lst = []
+    id = params['id']
+    print("id is [{}]".format(id))
+    if id == "consumer":
+        pool = Pool(params['consumers'])
+        for i in range(params['consumers']):
+            p_obj = pool.apply_async(start_benchmark, (params,))
+            obj_lst.append(p_obj)
+    elif id == "producer":
+        pool = Pool(params['producers'])
+        for i in range(params['producers']):
+            p_obj = pool.apply_async(start_benchmark, (params,))
+            obj_lst.append(p_obj)
+    else:
+        raise Exception("id {} is not defined!")
+    pool.close()
+    pool.join()
\ No newline at end of file
diff --git a/workload/Kafka/validate.sh b/workload/Kafka/validate.sh
new file mode 100755
index 0000000..03a09dc
--- /dev/null
+++ b/workload/Kafka/validate.sh
@@ -0,0 +1,49 @@
+#!/bin/bash -e
+
+# General setting
+DEFAULT_OR_GATED=${1:-default}
+WORKLOAD=${WORKLOAD:-kafka}
+BACKEND=${BACKEND:-kubernetes}
+TIMEOUT=${TIMEOUT:-3000}
+
+# Kafka Setting
+REPLICATION_FACTOR=${REPLICATION_FACTOR:-1}
+KAFKA_BENCHMARK_TOPIC=${KAFKA_BENCHMARK_TOPIC:-KAFKABENCHMARK}
+MESSAGES=${MESSAGES:-2000000}
+NUM_RECORDS=${NUM_RECORDS:-3000000}
+THROUGHPUT=${THROUGHPUT:-50000}
+RECORD_SIZE=${RECORD_SIZE:-1000}
+COMPRESSION_TYPE=${COMPRESSION_TYPE:-lz4}
+CONSUMER_TIMEOUT=${CONSUMER_TIMEOUT:-600000}
+# Set PARTITIONS/PRODUCERS/CONSUMERS to 0 if value is not specified by user, will update this value at runtime in run_test.sh if value is 0
+PARTITIONS=${PARTITIONS:-0}
+PRODUCERS=${PRODUCERS:-0}
+CONSUMERS=${CONSUMERS:-0}
+
+if [[ $DEFAULT_OR_GATED == "gated" ]]; then
+    PARTITIONS=1
+    PRODUCERS=1
+    CONSUMERS=1
+fi
+
+# Logs Setting
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+. "$DIR/../../script/overwrite.sh"
+
+# Workload Setting
+WORKLOAD_PARAMS="replication_factor:${REPLICATION_FACTOR};partitions:${PARTITIONS};num_records:${NUM_RECORDS};throughput:${THROUGHPUT};record_size:${RECORD_SIZE};compression_type:${COMPRESSION_TYPE}; \
+    messages:${MESSAGES};producers:${PRODUCERS};consumers:${CONSUMERS};consumer_timeout:${CONSUMER_TIMEOUT}"
+
+# Docker Setting
+DOCKER_IMAGE=""
+DOCKER_OPTIONS=""
+
+# Kubernetes Setting
+RECONFIG_OPTIONS="-DREPLICATION_FACTOR=${REPLICATION_FACTOR} -DPARTITIONS=${PARTITIONS} -DKAFKA_BENCHMARK_TOPIC=${KAFKA_BENCHMARK_TOPIC} \
+    -DMESSAGES=${MESSAGES} -DNUM_RECORDS=${NUM_RECORDS} -DTHROUGHPUT=${THROUGHPUT} -DRECORD_SIZE=${RECORD_SIZE} -DCOMPRESSION_TYPE=${COMPRESSION_TYPE} \
+    -DPRODUCERS=${PRODUCERS} -DCONSUMERS=${CONSUMERS} -DCONSUMER_TIMEOUT=${CONSUMER_TIMEOUT}"
+
+# Used for log collection
+JOB_FILTER="job-name=benchmark"
+
+. "$DIR/../../script/validate.sh"
diff --git a/workload/OpenSSL-RSAMB/CMakeLists.txt b/workload/OpenSSL-RSAMB/CMakeLists.txt
new file mode 100644
index 0000000..fbffdfa
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/CMakeLists.txt
@@ -0,0 +1,16 @@
+
+if(" ICX " MATCHES " ${PLATFORM} ")
+
+    add_workload("openssl_rsamb_qatsw")
+
+    string(REPLACE "_qatsw" "_sw" workload1 "${workload}")
+    foreach(algo "rsa" "dsa" "ecdsa" "ecdh" "aes-sha" "aes-gcm")
+
+        add_testcase(${workload}_${algo} "qatsw-${algo}")
+        add_testcase(${workload1}_${algo} "sw-${algo}")
+
+    endforeach()
+
+    add_testcase(${workload}_rsa_gated "qatsw-rsa")
+
+endif()
diff --git a/workload/OpenSSL-RSAMB/Dockerfile.2.qatsw b/workload/OpenSSL-RSAMB/Dockerfile.2.qatsw
new file mode 100644
index 0000000..5f8165d
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/Dockerfile.2.qatsw
@@ -0,0 +1,67 @@
+# openssl-rsamb-qat-sw
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+ARG OS_VER=8.6
+ARG OS_IMAGE=rockylinux
+FROM ${OS_IMAGE}:${OS_VER} AS build
+RUN dnf -y install epel-release dnf-plugins-core
+RUN dnf config-manager --set-enabled powertools
+RUN dnf group -y --nobest install "Development Tools"
+RUN dnf -y install cpuid cmake openssl-devel pcre pcre-devel libarchive nasm
+
+ARG OPENSSL_VER="1_1_1m"
+ARG OPENSSL_REPO="https://github.com/openssl/openssl.git"
+RUN git clone -b OpenSSL_${OPENSSL_VER} ${OPENSSL_REPO}
+RUN cd /openssl && \
+    ./config && \
+    make depend && \
+    make -j && \
+    make install_sw
+
+ARG IPP_CRYPTO_VER="ippcp_2021.5"
+ARG IPP_CRYPTO_REPO="https://github.com/intel/ipp-crypto.git"
+RUN git clone -b ${IPP_CRYPTO_VER} ${IPP_CRYPTO_REPO} && \
+    cd /ipp-crypto/sources/ippcp/crypto_mb && \
+    cmake . -B"../build" \
+      -DOPENSSL_INCLUDE_DIR=/usr/local/include/openssl \
+      -DOPENSSL_LIBRARIES=/usr/local/lib64 \
+      -DOPENSSL_ROOT_DIR=/usr/local/bin/openssl && \
+    cd ../build && \
+    make -j crypto_mb && \
+    make install
+
+ARG IPSEC_MB_VER="v1.1"
+ARG IPSEC_MB_REPO="https://github.com/intel/intel-ipsec-mb.git"
+RUN git clone -b ${IPSEC_MB_VER} ${IPSEC_MB_REPO} && \
+    cd /intel-ipsec-mb && \
+    make -j SAFE_DATA=y SAFE_PARAM=y SAFE_LOOKUP=y && \
+    make install NOLDCONFIG=y PREFIX=/usr/local/
+
+ARG QAT_ENGINE_VER="v0.6.11"
+ARG QAT_ENGINE_REPO="https://github.com/intel/QAT_Engine.git"
+RUN git clone -b ${QAT_ENGINE_VER} ${QAT_ENGINE_REPO} && \
+    cd /QAT_Engine && \
+    ./autogen.sh && \
+    ./configure \
+      --with-openssl_install_dir=/usr/local/ \
+      --enable-multibuff_offload \
+      --enable-multibuff_ecx \
+      --enable-ipsec_offload \
+      --enable-qat_sw && \
+    make -j && \
+    make install
+
+FROM ${OS_IMAGE}:${OS_VER}
+RUN yum install -y numactl
+RUN yum update -y && yum upgrade -y
+
+COPY --from=build /usr/local/ /usr/local/
+ENV  OPENSSL_ENGINES=/usr/local/lib/engines-1.1
+
+COPY script/run_test.sh /
+RUN  mkfifo /export-logs
+CMD  (/run_test.sh; echo $? > status) 2>&1 | tee output.logs && \
+     tar cf /export-logs status output.logs && \
+     sleep infinity
diff --git a/workload/OpenSSL-RSAMB/README.md b/workload/OpenSSL-RSAMB/README.md
new file mode 100644
index 0000000..3f0cd18
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/README.md
@@ -0,0 +1,73 @@
+
+### Introduction
+
+This is the speed test in the OpenSSL software, integrated with the QAT software stack. 
+
+### Test Case
+
+- **rsa**: This test case meausres the RSA cipher performance.  
+- **dsa**: This test case meausres the DSA cipher performance.  
+- **ecdsa**: This test case meausres the ECDSA cipher performance.  
+- **ecdh**: This test case measures the ECDH (x25519) cipher performance.  
+- **aes-sha**: This test case measures AES-CBC cipher performance.
+- **aes-gcm**: This test case measures AES-GCM cipher performance.  
+
+For each test case, there are a few variations: `sw`, `qatsw`. The `sw` test case is the default performance from the OpenSSL software. The `qatsw` test case is the performance optimized with QAT (`IPPMB` and `IPSECMB`).
+
+### Docker Image
+
+The workload provides the following docker images: `openssl-rsamb-qat-sw`. The `-qat-sw` image must be used to run the `qatsw-*` test cases.
+
+The workload supports the following environment variables:  
+- **`CONFIG`**: Specify the workload configuration: `(sw|qatsw)-(rsa|dsa|ecdsa|ecdh|aes-sha)`.  
+- **`ASYNC_JOBS`**: Specify the number of asynchronous submissions. Default 64.  
+- **`PROCESSES`**: Specify the number of processes. Default 8.  
+
+```
+mkdir -p logs-sw-rsa
+id=$(docker run --rm --detach -e CONFIG=sw-rsa openssl-rsamb-qat-sw)
+docker exec $id cat /export-logs | tar xf - -C logs-sw-rsa
+docker rm -f $id
+```
+
+```
+mkdir -p logs-qatsw-rsa
+id=$(docker run --rm --detach -e CONFIG=qatsw-rsa openssl-rsamb-qat-sw)
+docker exec $id cat /export-logs | tar xf - -C logs-qatsw-rsa
+```
+
+### KPI
+
+Run the [`kpi.sh`](kpi.sh) script to parse the KPIs from the validation logs. The script takes the following command line argument:  
+
+```
+Usage: (sw|qatsw)-(rsa|dsa|ecdsa|ecdh|aes-sha)
+```
+
+#### RSA/DSA/ECDSA KPI
+
+- **`sign (s)`**: The signing time measured in seconds.   
+- **`sign/s`**: The signing throughput in terms of # of signing operations per second.  
+- **`verify (s)`**: The verification time measured in seconds.  
+- **`verify/s`**: The verification throughput in terms of # of verification operations per second.  
+
+#### ECDH KPI
+
+- **`op (s)`**: The cipher time measured in seconds. 
+- **`op/s`**: The cipher throughput in terms of # of operations per second.  
+
+#### AES KPI
+
+- **`(k)`**: The cipher throughput.  
+
+### Index Info
+- Name: `OpenSSL RSAMB`  
+- Category: `Synthetic`  
+- Platform: `ICX`
+- Keywords: `QAT`  
+- Permission:
+
+### See Also
+
+- [OpenSSL](https://www.openssl.org)
+
diff --git a/workload/OpenSSL-RSAMB/build.sh b/workload/OpenSSL-RSAMB/build.sh
new file mode 100755
index 0000000..ebceabc
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/build.sh
@@ -0,0 +1,8 @@
+#!/bin/bash -e
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+
+
+# build workload images
+FIND_OPTIONS="-name *.${WORKLOAD/*_/}"
+. "$DIR"/../../script/build.sh
diff --git a/workload/OpenSSL-RSAMB/cluster-config.yaml.m4 b/workload/OpenSSL-RSAMB/cluster-config.yaml.m4
new file mode 100644
index 0000000..9230258
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/cluster-config.yaml.m4
@@ -0,0 +1,5 @@
+include(config.m4)
+
+cluster:
+- labels: {}
+
diff --git a/workload/OpenSSL-RSAMB/kpi.sh b/workload/OpenSSL-RSAMB/kpi.sh
new file mode 100755
index 0000000..057117b
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/kpi.sh
@@ -0,0 +1,58 @@
+#!/bin/bash -e
+
+awk -vtest_case="$1" '
+function kvformat(key, value) {
+    unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value);
+    value=gensub(/^([0-9+-.]+).*/,"\\1",1, value);
+    key=gensub(/(.*): *$/,"\\1",1, key);
+    if (unit!="") key=key" ("unit")";
+    return key": "value;
+}
+/:aes-[0-9]+-cbc-/ {
+    algorithm=gensub(/.*:(aes-[0-9]+-cbc-hmac-sha[0-9]+):.*/,"\\1",1)
+}
+/:aes-[0-9]+-gcm/ {
+    algorithm=gensub(/.*:(aes-[0-9]+-gcm):.*/,"\\1",1)
+}
+/^options:/ {
+    for (i=1;i<=NF;i++)
+        options[i+1]=(i==1)?gensub(/options:(.*)/,"\\1",1,$i):$i
+}
+/^evp/ {
+    for (i=2;i<=NF;i++) {
+        if (test_case~"sw_aes-gcm")
+        {
+            primary=((algorithm~/aes-256-gcm/)&&(options[i]~/aes/))?"*":""
+        }
+        else
+        {
+            primary=((algorithm~/aes-256-.*-sha256/)&&(options[i]~/aes/))?"*":""
+        }
+        print primary algorithm" "kvformat(options[i],$i)
+    }
+}
+(/^rsa/ || /^dsa/) && NF==7 {
+    primary=($2=="2048")?"*":""
+    print kvformat($1"-"$2" sign",$4)
+    print kvformat($1"-"$2" verify",$5)
+    print primary kvformat($1"-"$2" sign/s",$6)
+    print kvformat($1"-"$2" verify/s",$7)
+}
+/ecdh/ && NF==6 {
+    if (!($0~/infs/)) {
+        primary=($4~"X25519")?"*":""     # x25519 is the only common tested algorithm for qatsw/sw test cases
+
+        print kvformat($3"-"$1" "$4" op",$5)
+        print primary kvformat($3"-"$1" "$4" op/s",$6)
+    }
+}
+/ecdsa/ && NF==8 {
+    if (!($0~/infs/)) {
+        primary=($4~"nistp256")?"*":""
+        print kvformat($3"-"$1" "$4" sign",$5)
+        print kvformat($3"-"$1" "$4" verify",$6)
+        print primary kvformat($3"-"$1" "$4" sign/s",$7)
+        print kvformat($3"-"$1" "$4" verify/s",$8)
+    }
+}
+' */output.logs 2>/dev/null || true
diff --git a/workload/OpenSSL-RSAMB/kubernetes-config.yaml.m4 b/workload/OpenSSL-RSAMB/kubernetes-config.yaml.m4
new file mode 100644
index 0000000..3f557b4
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/kubernetes-config.yaml.m4
@@ -0,0 +1,28 @@
+include(config.m4)
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: benchmark
+spec:
+  template:
+    spec:
+      containers:
+      - name: benchmark
+        image: IMAGENAME(Dockerfile.2.patsubst(WORKLOAD,`.*_'))
+        imagePullPolicy: IMAGEPOLICY(Always)
+        env:
+        - name: `CONFIG'
+          value: "CONFIG"
+        - name: `PROCESSES'
+          value: "PROCESSES"
+        - name: `BIND_CORE'
+          value: "BIND_CORE"
+        - name: `BIND'
+          value: "BIND"
+        - name: `ASYNC_JOBS'
+          value: "ASYNC_JOBS"
+        securityContext:
+          privileged: true
+      restartPolicy: Never
+  backoffLimit: 4
diff --git a/workload/OpenSSL-RSAMB/script/run_test.sh b/workload/OpenSSL-RSAMB/script/run_test.sh
new file mode 100755
index 0000000..9741705
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/script/run_test.sh
@@ -0,0 +1,92 @@
+#!/bin/bash -e
+
+CONFIG=${CONFIG:-sw-rsa}
+ASYNC_JOBS=${ASYNC_JOBS:-64}
+PROCESSES=${PROCESSES:-8}
+BIND_CORE=${BIND_CORE:-1c1t}
+
+CPU_NUM=$(lscpu | grep -E "^CPU\(s\)\:" | awk '{print $2}')
+
+if [ $PROCESSES -gt $CPU_NUM ] || [ $PROCESSES -lt 1 ];
+then
+    echo "Wrong input for PROCESSES"
+    exit 3
+fi
+
+
+FIRST_CORE_SOCKET2=$(lscpu | grep "NUMA node0 CPU(s):" | awk '{print $4}' | awk '{split($1, arr, ","); print arr[2]}' | awk '{split($1, arr, "-"); print arr[1]}')
+if [ "$BIND_CORE" == "1c1t" ] ; then
+    LAST_CORE=$(( $PROCESSES - 1 ))
+    CPU_LISTS=0-$LAST_CORE
+elif [ "$BIND_CORE" == "1c2t" ] ; then
+    LAST_CORE=$(( $PROCESSES/2 - 1 ))
+    LAST_CORE2=$(( $FIRST_CORE_SOCKET2+$LAST_CORE ))
+    CPU_LISTS="0-$LAST_CORE,$FIRST_CORE_SOCKET2-$LAST_CORE2"
+else
+    echo "Wrong type for core binding"
+    exit 3
+fi
+
+echo CPU_LISTS:$CPU_LISTS
+
+BIND=${BIND:-false}
+
+if [ "$BIND" == "true" ] ; then
+    BIND_CMD="numactl --physcpubind=$CPU_LISTS --membind=0 "
+fi
+
+case $CONFIG in
+qatsw-rsa)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} rsa512
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} rsa1024
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} rsa2048
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} rsa3072
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} rsa4096
+    ;;
+qatsw-dsa)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} dsa
+    ;;
+qatsw-ecdsa)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} ecdsap256
+    ;;
+qatsw-ecdh)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} ecdhx25519
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} ecdhp256
+    ;;
+qatsw-aes-sha)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-128-cbc-hmac-sha1
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-128-cbc-hmac-sha256
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-256-cbc-hmac-sha1
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-256-cbc-hmac-sha256
+    ;;
+qatsw-aes-gcm)
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-128-gcm
+    $BIND_CMD openssl speed -engine qatengine -async_jobs ${ASYNC_JOBS} -multi ${PROCESSES} -evp aes-256-gcm
+    ;;
+sw-rsa)
+    $BIND_CMD openssl speed -multi ${PROCESSES} rsa
+    ;;
+sw-dsa)
+    $BIND_CMD openssl speed -multi ${PROCESSES} dsa
+    ;;
+sw-ecdsa)
+    $BIND_CMD openssl speed -multi ${PROCESSES} ecdsap256
+    ;;
+sw-ecdh)
+    $BIND_CMD openssl speed -multi ${PROCESSES} ecdhx25519 
+    $BIND_CMD openssl speed -multi ${PROCESSES} ecdhp256
+    ;;
+sw-aes-sha)
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-128-cbc-hmac-sha1
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-128-cbc-hmac-sha256
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-256-cbc-hmac-sha1
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-256-cbc-hmac-sha256
+    ;;
+sw-aes-gcm)
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-128-gcm
+    $BIND_CMD openssl speed -multi ${PROCESSES} -evp aes-256-gcm
+    ;;
+*)
+    echo "$CONFIG unsupported"
+    exit 3;;
+esac
diff --git a/workload/OpenSSL-RSAMB/validate.sh b/workload/OpenSSL-RSAMB/validate.sh
new file mode 100755
index 0000000..ab64db2
--- /dev/null
+++ b/workload/OpenSSL-RSAMB/validate.sh
@@ -0,0 +1,29 @@
+#!/bin/bash -e
+
+CONFIG=${1:-qat-rsa}
+WORKLOAD=${WORKLOAD:-openssl_rsamb_qatsw}
+ASYNC_JOBS=${ASYNC_JOBS:-64}
+PROCESSES=${PROCESSES:-8}
+BIND_CORE=${BIND_CORE:-1c1t}
+BIND=${BIND:-false}
+
+# Logs Setting
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+. "$DIR/../../script/overwrite.sh"
+
+# Workload Setting
+WORKLOAD_PARAMS="mode:${CONFIG/-*/};algorithm:$(echo $CONFIG | cut -f2- -d-);ASYNC_JOBS:$ASYNC_JOBS;PROCESSES:$PROCESSES;BIND_CORE:$BIND_CORE;BIND:$BIND"
+
+# Docker Setting
+
+DOCKER_IMAGE="$DIR/Dockerfile.2.${WORKLOAD/*_/}"
+DOCKER_OPTIONS="--privileged -e CONFIG=$CONFIG -e ASYNC_JOBS=$ASYNC_JOBS -e PROCESSES=$PROCESSES -e BIND_CORE=$BIND_CORE -e BIND=$BIND"
+
+# Kubernetes Setting
+RECONFIG_OPTIONS="-DCONFIG=$CONFIG -DASYNC_JOBS=$ASYNC_JOBS -DPROCESSES=$PROCESSES -DBIND_CORE=$BIND_CORE -DBIND=$BIND"
+JOB_FILTER="job-name=benchmark"
+
+# Script args
+SCRIPT_ARGS="$TESTCASE"
+
+. "$DIR/../../script/validate.sh"
diff --git a/workload/README.md b/workload/README.md
new file mode 100644
index 0000000..35afaeb
--- /dev/null
+++ b/workload/README.md
@@ -0,0 +1,8 @@
+## Workload Services Framework Repository
+
+### List of Workloads:
+| Category | Workload | Platform | #Node | #IMG | Keywords |
+|:---|:---|:---|:--:|:--:|:--:|
+| `DataServices` | [`HammerDB-TPCC`](HammerDB-TPCC) | `ICX`  | 1/2 | 2 | `MYSQL` |
+| `DataServices` | [`Kafka`](Kafka) | `ICX`  | 1/3 | 2 | `KAFKA` |
+| `Synthetic` | [`OpenSSL RSAMB`](OpenSSL-RSAMB) | `ICX`  | 1 | 1 | `QAT` |
diff --git a/workload/dummy/CMakeLists.txt b/workload/dummy/CMakeLists.txt
new file mode 100644
index 0000000..ff9c66b
--- /dev/null
+++ b/workload/dummy/CMakeLists.txt
@@ -0,0 +1,42 @@
+
+# CMakeLists.txt is the place to manage test cases. Think of a test case as certain 
+# combination of configurable parameters of a workload. You need to configure at 
+# least a common test case  and a gated test case. The common test case is used to 
+# measure the workload performance and the gated test case is used to validate 
+# workload features at commit time. The gated test case should be short to improve 
+# validation efficiency. It is recomended that you define as many commonly used 
+# test cases as needed.  
+
+# As a prerequisite to the workload build and test. You can test certain conditions
+# such as repository access permissions and license agreement. See doc/cmakelists.txt
+# for full documentation. The simple dummy workload does not have such prerequisite.   
+
+# It is recommendded to condition your workload build on the supported platforms.
+# The full list of supported platforms is in workload/platforms.  
+if(" ICX " MATCHES " ${PLATFORM} ")
+
+    # The add_workload function defines a workload. The workload name must be unique 
+    # and do not have special characters except _. You can specify additional 
+    # constraints as additional parameters such as license constraints and platform 
+    # constraints. See doc/cmakelists.txt.md for details. 
+    add_workload("dummy")
+
+    # The add_testcase function adds a test case, which will be executed through
+    # validate.sh. The test case name must be unique and avoid any special 
+    # characters except _. Any arguments to add_testcase will be passed literally 
+    # to validate.sh. See CMakeLists.txt for details. You can define any number
+    # of test cases here but there are special test cases that every workload 
+    # must have as follows: 
+
+    add_testcase(${workload}_pi 2000)
+
+    # Define at least a gated test case for commit validation. The gated test case 
+    # must be short and cover most of the workload features.  
+    add_testcase(${workload}_gated 1000)
+
+endif()
+
+# For workloads with multiple versions, repeat the definitions to support multiple
+# versions. The recommendation is to suffix the versions as part of the workload 
+# name.  
+
diff --git a/workload/dummy/Dockerfile b/workload/dummy/Dockerfile
new file mode 100644
index 0000000..342a1a9
--- /dev/null
+++ b/workload/dummy/Dockerfile
@@ -0,0 +1,35 @@
+# dummy
+
+# Copyright (c) 2022 Intel Corporation
+# SPDX-License-Identifier: Apache License 2.0
+
+# The last of the first two comment lines defines the docker image name. If the comment
+# line starts with #, then the image will be pushed to the docker registry if defined.
+# If comment line starts with ##, then the image is a local image and will not be pushed. 
+# See doc/dockerfile.md for full documentation.  
+
+# You can use any base OS in your workload. Popular choices are ubuntu, debian and alpine.
+# Declare the OS ingredient as OS_VER and OS_IMAGE.
+ARG OS_VER=3.15
+ARG OS_IMAGE=alpine
+
+FROM ${OS_IMAGE}:${OS_VER}
+
+# For any major ingredients of the workload, list them as pairs of _VER and _REPO ARGS.
+# This will be extracted by the script to the github Wiki in the workload ingredient 
+# table. You can also use _VER/_PACKAGE, _VER/_IMAGE pairs for the same purpose. 
+ARG DUMMY_VER=0.7
+ARG DUMMY_REPO=https://github.com/dummy/dummy.git
+
+# See QATzip for an example of git cloning a repository with access control. 
+
+COPY run_test.sh /
+
+# This is the standarized logs collection sequence. In this example, the logs is the 
+# standard output but the logs can contain anything relevant to the workload execution. 
+# The logs are archived together with the workload exit code to a fifo /export-logs. 
+# Then sleep infinity for logs retrievial.
+RUN  mkfifo /export-logs
+CMD  (/run_test.sh; echo $? > status) 2>&1 | tee output.logs && \
+     tar cf /export-logs status output.logs && \
+     sleep infinity
diff --git a/workload/dummy/README.md b/workload/dummy/README.md
new file mode 100644
index 0000000..dad8f1b
--- /dev/null
+++ b/workload/dummy/README.md
@@ -0,0 +1,43 @@
+### Introduction
+
+This is a dummy workload. It is used to quickly test the validation pipeline and also an example of any new workload.   
+
+### Test Case
+
+There are two defined test cases: 
+- `pi`: This test case calculate `PI` to the 2000 digits.  
+- `gated`: This test case validates the dummy workload for commit validation.  
+
+### Docker Image
+
+The workload contains a single docker image: `dummy`. Configure the docker image with the environment variable `SCALE`: The `PI` precision digits.  
+
+```
+mkdir -p logs
+id=$(docker run --rm --detach -e SCALE=2000 dummy)
+docker exec $id cat /export-logs | tar xf - -C logs
+docker rm -f $id
+```
+
+### KPI
+
+Run the [`kpi.sh`](kpi.sh) script to generate the KPIs. The KPI script uses the following command line options:  
+
+```
+Usage: <scale>
+```
+
+The following KPI is defined:
+- `throughput`: The workload throughput value in the unit of digits/second.  
+
+### Index Info
+
+- Name: `dummy`
+- Category: `Synthetic`
+- Platform: `ICX`
+- keywords:
+
+### See Also
+
+- [Workload Elements](../../doc/workload.md)   
+
diff --git a/workload/dummy/build.sh b/workload/dummy/build.sh
new file mode 100755
index 0000000..fd8f7ca
--- /dev/null
+++ b/workload/dummy/build.sh
@@ -0,0 +1,14 @@
+#!/bin/bash -e
+
+# For most workloads, the build.sh can be used as is. 
+# The build process will follow certain order to detect Dockerfiles and build the 
+# docker images accordingly. The build process can be customized through DOCKER_CONTEXT 
+# and FIND_OPTIONS. DOCKER_CONTEXT specifies the list of build contexts and FIND_OPTIONS 
+# defines special rules to find a subset of Dockerfiles. 
+# See doc/build.sh.md for full documentation. 
+# See SpecCpu-2017 for working with multiple workload versions using FIND_OPTIONS. 
+# See QATzip for building workloads with software stacks
+
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+. "$DIR"/../../script/build.sh
+
diff --git a/workload/dummy/cluster-config.yaml.m4 b/workload/dummy/cluster-config.yaml.m4
new file mode 100644
index 0000000..d23c354
--- /dev/null
+++ b/workload/dummy/cluster-config.yaml.m4
@@ -0,0 +1,9 @@
+include(config.m4)
+
+# The cluster-config.yaml.m4 manifest specifies the workload running environment. 
+# For the simple dummy workload, the manifest requests to run the workload on a 
+# single-node cluster, without any special requirement of host setup. See 
+# doc/cluster-config.md for full documentation.
+
+cluster:
+- labels: {}
diff --git a/workload/dummy/kpi.sh b/workload/dummy/kpi.sh
new file mode 100755
index 0000000..04f6281
--- /dev/null
+++ b/workload/dummy/kpi.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# The kpi.sh must be self-contained (without using any external script with the 
+# exception of commonly available shell scripts and gawk). 
+
+# The kpi.sh script can take arguments, defined by SCRIPT_ARGS in validate.sh. 
+# See doc/kpi.sh.md and doc/validate.sh.md for full documentation. 
+SCALE=${SCALE:-1}
+
+awk -v scale=$SCALE '
+# The kpi.sh must output KPIs in the format of "key: value" or "key (unit): value". 
+# The key string must not contain "," or ":". The value must be an integer or a float.
+
+/real/ {
+    # For each test case, define a primary KPI for regression tracking. Prefix the
+    # primary KPI with "*". There should be 1 and only 1 primary KPI for each test case.
+    print "*throughput (digits/s): "(scale/$2)
+}
+
+# The kpi.sh parses the workload validation logs at a sub-directory (relative to the 
+# current directory.) For example, with the docker backend, the logs are saved under 
+# <container-id>, and with the Kubernetes backend, the logs are saved under <pod-id>.  
+' */output.logs 2>/dev/null
+
+# The kpi.sh must exit with status 0 so that `make kpi` can continue for all workloads. 
+# avoid using "exit 0" as the kpi script might be copied to be within a shell script as 
+# functions.
+echo -n ""
diff --git a/workload/dummy/kubernetes-config.yaml.m4 b/workload/dummy/kubernetes-config.yaml.m4
new file mode 100644
index 0000000..58c5e2d
--- /dev/null
+++ b/workload/dummy/kubernetes-config.yaml.m4
@@ -0,0 +1,26 @@
+include(config.m4)
+
+# You can specify the Kubernetes deployment in the format of a kubernetes-config.yaml.m4
+# or helm charts. See WordPress5MT as an example of using helm charts.  
+
+# The IMAGENAME macro extract the docker image name from the Dockerfile and apply 
+# REGISTRY and RELEASE tags. 
+
+# The IMAGEPOLICY macro specifies the image pulling policy. If REGISTRY is defined,
+# the image pull policy is default to IfNotExist, or else Always. 
+
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: dummy-benchmark
+spec:
+  template:
+    spec:
+      containers:
+      - name: dummy-benchmark
+        image: IMAGENAME(Dockerfile)
+        imagePullPolicy: IMAGEPOLICY(Always)
+        env:
+        - name: `SCALE'
+          value: "SCALE"
+      restartPolicy: Never
diff --git a/workload/dummy/run_test.sh b/workload/dummy/run_test.sh
new file mode 100755
index 0000000..ec622c0
--- /dev/null
+++ b/workload/dummy/run_test.sh
@@ -0,0 +1,5 @@
+#!/bin/sh -e
+
+# This dummy workload calcualtes the PI sequence.
+time -p sh -c "echo \"scale=${SCALE:-20}; 4*a(1)\" | bc -l"
+
diff --git a/workload/dummy/validate.sh b/workload/dummy/validate.sh
new file mode 100755
index 0000000..f2b30d4
--- /dev/null
+++ b/workload/dummy/validate.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+
+# The validate.sh scirpt runs the workload. See doc/validate.sh.md for details. 
+
+# define the workload arguments
+SCALE=${1:-1}
+
+# Logs Setting
+  # DIR is the workload script directory. When validate.sh is executed, the current 
+  # directory is usually the logs directory. 
+DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )"
+  # This script allows the user to overwrite any environment variables, given a 
+  # TEST_CONFIG yaml configuration. See doc/ctest.md for details. 
+. "$DIR/../../script/overwrite.sh"
+
+# Workload Setting
+  # The workload parameters will be saved to the cumulus database. Specify an array of
+  # configuration parameters in the format of "key:value" pairs. 
+WORKLOAD_PARAMS=("scale:$SCALE")
+  # Workload tags can be used to track Intel values across multple versions of workload
+  # implementations. See doc/intel-values.md for details.  
+#WORKLOAD_TAGS="BC-BASELINE"
+
+# Docker Setting
+  # if the workload does not support docker run, leave DOCKER_IMAGE empty. Otherwise
+  # specify the image name and the docker run options.
+DOCKER_IMAGE="$DIR/Dockerfile"
+DOCKER_OPTIONS="-e SCALE=$SCALE"
+
+# Kubernetes Setting
+  # You can alternatively specify HELM_CONFIG and HELM_OPTIONS
+RECONFIG_OPTIONS="-DSCALE=$SCALE"
+JOB_FILTER="job-name=dummy-benchmark"
+
+# kpi args
+SCRIPT_ARGS="${SCALE}"
+
+# Let the common validate.sh takes over to manage the workload execution.
+. "$DIR/../../script/validate.sh"
+
diff --git a/workload/platforms b/workload/platforms
new file mode 100644
index 0000000..953e4ad
--- /dev/null
+++ b/workload/platforms
@@ -0,0 +1 @@
+ICX