diff --git a/.github/workflows/run_mobile_comparative_benchmark.yml b/.github/workflows/run_mobile_comparative_benchmark.yml
index 876ef3f5..0d24987a 100644
--- a/.github/workflows/run_mobile_comparative_benchmark.yml
+++ b/.github/workflows/run_mobile_comparative_benchmark.yml
@@ -57,6 +57,7 @@ jobs:
           fi
           echo "benchmark-gcs-dir=${BENCHMARK_GCS_DIR}" >> "${GITHUB_OUTPUT}"
 
+
   build_iree:
     needs: setup
     runs-on:
@@ -68,6 +69,7 @@ jobs:
     env:
       IREE_SOURCE_DIR: iree-src
       IREE_INSTALL_DIR: iree-install
+      IREE_ANDROID_TOOLS_DIR: tools
       ANDROID_PLATFORM_VERSION: 31
       X86_BUILD_DIR: iree-build
       ANDROID_BUILD_DIR: iree-build-android
@@ -76,6 +78,9 @@ jobs:
       iree-install-dir: ${{ env.IREE_INSTALL_DIR }}
       iree-install-dir-archive: ${{ steps.archive.outputs.iree-install-dir-archive }}
       iree-install-dir-gcs-artifact: ${{ steps.upload.outputs.iree-install-dir-gcs-artifact }}
+      iree-android-tools-dir: ${{ env.IREE_ANDROID_TOOLS_DIR }}
+      iree-android-tools-dir-archive: ${{ steps.archive.outputs.iree-android-tools-dir-archive }}
+      iree-android-tools-dir-gcs-artifact: ${{ steps.upload.outputs.iree-android-tools-dir-gcs-artifact }}
     steps:
       - name: "Checking out PR repository"
         uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
@@ -97,18 +102,28 @@ jobs:
         id: archive
         env:
           IREE_INSTALL_DIR_ARCHIVE: ${{ env.IREE_INSTALL_DIR }}.tgz
+          IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ env.IREE_ANDROID_TOOLS_DIR }}.tgz
         run: |
           tar -zcvf "${IREE_INSTALL_DIR_ARCHIVE}" -C "${X86_BUILD_DIR}" "${IREE_INSTALL_DIR}"
           echo "iree-install-dir-archive=${IREE_INSTALL_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
+
+          tar -zcvf "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" -C "${ANDROID_BUILD_DIR}" "${IREE_ANDROID_TOOLS_DIR}"
+          echo "iree-android-tools-dir-archive=${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
       - name: "Uploading archives"
         id: upload
         env:
           IREE_INSTALL_DIR_ARCHIVE: ${{ steps.archive.outputs.iree-install-dir-archive }}
           IREE_INSTALL_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.iree-install-dir-archive }}
+          IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ steps.archive.outputs.iree-android-tools-dir-archive }}
+          IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.iree-android-tools-dir-archive }}
         run: |
           gcloud storage cp "${IREE_INSTALL_DIR_ARCHIVE}" "${IREE_INSTALL_DIR_GCS_ARTIFACT}"
           echo "iree-install-dir-gcs-artifact=${IREE_INSTALL_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
 
+          gcloud storage cp "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}" "${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}"
+          echo "iree-android-tools-dir-gcs-artifact=${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
+
+
   # We compile and benchmark on the same machine because it takes too much time to compress and upload compiled artifacts.
   compile_and_benchmark_on_c2-standard-60:
     needs: [setup, build_iree]
@@ -154,7 +169,7 @@ jobs:
             --env OOBI_IREE_BENCHMARK_MODULE_PATH="${IREE_INSTALL_DIR}/bin/iree-benchmark-module" \
             --env OOBI_IREE_RUN_MODULE_PATH="${IREE_INSTALL_DIR}/bin/iree-run-module" \
             "gcr.io/iree-oss/openxla-benchmark/base@sha256:1bf3e319465ec8fb465baae3f6ba9a5b09cb84a5349a675c671a552fc77f2251" \
-            ./experimental/iree/benchmark_iree.sh \
+            ./experimental/iree/benchmark_iree_x86.sh \
               "${TARGET_DEVICE}" \
               "${BENCHMARK_ARTIFACTS_DIR}" \
               "${IREE_RESULTS_FILE}"
@@ -163,3 +178,110 @@ jobs:
           RESULTS_GCS_DIR: ${{ env.BENCHMARK_GCS_DIR }}/${{ env.TARGET_DEVICE }}-results
         run: |
           gcloud storage cp "${IREE_RESULTS_FILE}" "${RESULTS_GCS_DIR}/"
+
+
+  compile_models_for_android:
+    needs: [setup, build_iree]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - cpu
+      - os-family=Linux
+    env:
+      BENCHMARK_ARTIFACTS_DIR: android-benchmark-artifacts-dir
+      TARGET_DEVICE: pixel-6-pro
+      IREE_INSTALL_DIR: ${{ needs.build_iree.outputs.iree-install-dir }}
+      IREE_INSTALL_DIR_ARCHIVE: ${{ needs.build_iree.outputs.iree-install-dir-archive }}
+      IREE_INSTALL_DIR_GCS_ARTIFACT: ${{ needs.build_iree.outputs.iree-install-dir-gcs-artifact }}
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+    outputs:
+      benchmark-artifacts-dir: ${{ env.BENCHMARK_ARTIFACTS_DIR }}
+      benchmark-artifacts-dir-archive: ${{ steps.archive.outputs.benchmark-artifacts-dir-archive }}
+      benchmark-artifacts-dir-gcs-artifact: ${{ steps.upload.outputs.benchmark-artifacts-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Downloading and unpacking IREE tools"
+        run: |
+          gcloud storage cp "${IREE_INSTALL_DIR_GCS_ARTIFACT}" "${IREE_INSTALL_DIR_ARCHIVE}"
+          tar -xvf "${IREE_INSTALL_DIR_ARCHIVE}"
+      - name: "Compiling workloads"
+        id: compile
+        env:
+          IREE_COMPILE_PATH: ${{ env.IREE_INSTALL_DIR }}/bin/iree-compile
+          OOBI_TEMP_DIR: temp
+        run: |
+          mkdir "${BENCHMARK_ARTIFACTS_DIR}"
+          mkdir "${OOBI_TEMP_DIR}"
+
+          docker run --mount="type=bind,src="${PWD}",target=/work" --workdir="/work" \
+            --env IREE_COMPILE_PATH="${IREE_COMPILE_PATH}" \
+            --env OOBI_TEMP_DIR="${OOBI_TEMP_DIR}" \
+            "gcr.io/iree-oss/openxla-benchmark/android@sha256:34f140fcf806f64f5d0492dfc5af774ea440406264cd68c0405e23a69cbe6d93" \
+              ./experimental/iree/compile_workloads.sh \
+                  "${TARGET_DEVICE}" \
+                  "${BENCHMARK_ARTIFACTS_DIR}"
+      - name: "Creating archives"
+        id: archive
+        env:
+          BENCHMARK_ARTIFACTS_DIR_ARCHIVE: ${{ env.BENCHMARK_ARTIFACTS_DIR }}.tgz
+        run: |
+          tar -cvf "${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" "${BENCHMARK_ARTIFACTS_DIR}"
+          echo "benchmark-artifacts-dir-archive=${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" >> "${GITHUB_OUTPUT}"
+      - name: "Uploading archives"
+        id: upload
+        env:
+          BENCHMARK_ARTIFACTS_DIR_ARCHIVE: ${{ steps.archive.outputs.benchmark-artifacts-dir-archive }}
+          BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT: ${{ env.BENCHMARK_GCS_DIR }}/${{ steps.archive.outputs.benchmark-artifacts-dir-archive }}
+        run: |
+          gcloud storage cp "${BENCHMARK_ARTIFACTS_DIR_ARCHIVE}" "${BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT}"
+          echo "benchmark-artifacts-dir-gcs-artifact=${BENCHMARK_ARTIFACTS_DIR_GCS_ARTIFACT}" >> "${GITHUB_OUTPUT}"
+
+
+  benchmark_on_pixel-6-pro:
+    needs: [setup, build_iree, compile_models_for_android]
+    runs-on:
+      - self-hosted  # must come first
+      - runner-group=${{ needs.setup.outputs.runner-group }}
+      - environment=prod
+      - machine-type=pixel-6-pro
+    env:
+      BENCHMARK_GCS_DIR: ${{ needs.setup.outputs.benchmark-gcs-dir }}
+      RESULTS_DIR: results-dir
+      TARGET_DEVICE: pixel-6-pro
+      IREE_ANDROID_TOOLS_DIR: ${{ needs.build_iree.outputs.iree-android-tools-dir }}
+      IREE_ANDROID_TOOLS_DIR_ARCHIVE: ${{ needs.build_iree.outputs.iree-android-tools-dir-archive }}
+      IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT: ${{ needs.build_iree.outputs.iree-android-tools-dir-gcs-artifact }}
+      IREE_ARTIFACTS_DIR: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir }}
+      IREE_ARTIFACTS_DIR_ARCHIVE: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir-archive }}
+      IREE_ARTIFACTS_DIR_GCS_ARTIFACT: ${{ needs.compile_models_for_android.outputs.benchmark-artifacts-dir-gcs-artifact }}
+    steps:
+      - name: "Checking out PR repository"
+        uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791  # v2.5.0
+      - name: "Setup"
+        id: setup
+        run: |
+          echo "results-gcs-dir=${BENCHMARK_GCS_DIR}/${TARGET_DEVICE}-results" >> "${GITHUB_OUTPUT}"
+          mkdir "${RESULTS_DIR}"
+      - name: "Downloading and unpacking tools"
+        run: |
+          gcloud storage cp "${IREE_ANDROID_TOOLS_DIR_GCS_ARTIFACT}" "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}"
+          tar -xvf "${IREE_ANDROID_TOOLS_DIR_ARCHIVE}"
+      - name: "Benchmarking IREE on Android"
+        env:
+          IREE_RESULTS_JSON: iree-${{ env.TARGET_DEVICE }}.json
+          RESULTS_GCS_DIR: ${{ steps.setup.outputs.results-gcs-dir }}
+        run: |
+          RESULTS_PATH="${RESULTS_DIR}/${IREE_RESULTS_JSON}"
+          # We use the `https` path instead of `gs` since artifact streaming only works with `https`.
+          IREE_ARTIFACTS_DIR_HTTPS_ARTIFACT=$(echo "${IREE_ARTIFACTS_DIR_GCS_ARTIFACT}" | sed -E 's/gs:\/\/(.*)/https:\/\/storage.googleapis.com\/\1/')
+
+          OOBI_IREE_RUN_MODULE_PATH="${IREE_ANDROID_TOOLS_DIR}/iree-run-module" \
+          OOBI_IREE_BENCHMARK_MODULE_PATH="${IREE_ANDROID_TOOLS_DIR}/iree-benchmark-module" \
+            ./experimental/iree/benchmark_iree_android.sh \
+              "${TARGET_DEVICE}" \
+              "${IREE_ARTIFACTS_DIR_HTTPS_ARTIFACT}" \
+              "${RESULTS_PATH}"
+
+          gcloud storage cp "${RESULTS_PATH}" "${RESULTS_GCS_DIR}/"
diff --git a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
index d2db164f..a291b67b 100644
--- a/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
+++ b/common_benchmark_suite/openxla/benchmark/devices/mobile_devices.py
@@ -19,4 +19,17 @@
     },
 )
 
-ALL_DEVICES = [MOBILE_PIXEL_6_PRO]
+MOBILE_PIXEL_8_PRO = def_types.DeviceSpec(
+    name="pixel-8-pro",
+    host_type="mobile",
+    host_model="pixel-8-pro",
+    host_environment="android",
+    accelerator_type="cpu",
+    accelerator_model="armv9-a",
+    accelerator_architecture="armv9-a",
+    accelerator_attributes={
+        "num_of_cores": 9,
+    },
+)
+
+ALL_DEVICES = [MOBILE_PIXEL_6_PRO, MOBILE_PIXEL_8_PRO]
diff --git a/comparative_benchmark/scripts/adb_fetch_and_push.py b/comparative_benchmark/scripts/adb_fetch_and_push.py
new file mode 100755
index 00000000..2ad30afb
--- /dev/null
+++ b/comparative_benchmark/scripts/adb_fetch_and_push.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+#
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import argparse
+import requests
+import socket
+import struct
+import time
+
+ADB_SERVER_ADDR = ("localhost", 5037)
+
+
+def adb_download_and_push_file(source_url: str,
+                               destination: str,
+                               verbose: bool = False):
+  """Fetch file from the URL and stream to the device.
+  In the case of fetching, this method avoids the temporary file on the host
+  and reduces the overhead when the file is large.
+  Args:
+    source_url: URL to fetch the file.
+    destination: the full destination path on the device.
+    verbose: output verbose message.
+  Returns:
+    File path on the device.
+  """
+
+  if verbose:
+    print(f"Streaming file {source_url} to {destination}.")
+
+  req = requests.get(source_url, stream=True, timeout=60)
+  if not req.ok:
+    raise RuntimeError(
+        f"Failed to fetch {source_url}: {req.status_code} - {req.text}")
+
+  # Implement the ADB sync protocol to stream file chunk to the device, since
+  # the adb client tool doesn't support it.
+  #
+  # Alternatively we can use thrid-party library such as
+  # https://github.com/JeffLIrion/adb_shell. But the protocol we need is
+  # simple and fairly stable. This part can be replaced with other solutions
+  # if needed.
+  #
+  # To understand the details of the protocol, see
+  # https://cs.android.com/android/_/android/platform/packages/modules/adb/+/93c8e3c26e4de3a2b767a2394200bc0721bb1e24:OVERVIEW.TXT
+
+  def wait_ack_ok(sock: socket.socket):
+    buf = bytearray()
+    while len(buf) < 4:
+      data = sock.recv(4 - len(buf))
+      if not data:
+        break
+      buf += data
+
+    if buf.decode("utf-8") != "OKAY":
+      raise RuntimeError(f"ADB communication error: {buf}")
+
+  with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+    sock.connect(ADB_SERVER_ADDR)
+    # Connect to any device (the first 4 hexadecimals is the following text
+    # command length).
+    sock.sendall(b"0012host:transport-any")
+    wait_ack_ok(sock)
+    # Switch to sync mode.
+    sock.sendall(b"0005sync:")
+    wait_ack_ok(sock)
+    # Send the destination file path and file permissions 0755 (rwxr-xr-x).
+    file_attr = f"{destination},{0o755}".encode("utf-8")
+    sock.sendall(b"SEND" + struct.pack("I", len(file_attr)) + file_attr)
+    # Stream the file chunks. 64k bytes is the max chunk size for adb.
+    for data in req.iter_content(chunk_size=64 * 1024):
+      sock.sendall(b"DATA" + struct.pack("I", len(data)) + data)
+    # End the file stream and set the creation time.
+    sock.sendall(b"DONE" + struct.pack("I", int(time.time())))
+    wait_ack_ok(sock)
+
+  return destination
+
+
+def _parse_arguments() -> argparse.Namespace:
+  parser = argparse.ArgumentParser(description="Runs benchmarks.")
+  parser.add_argument("-s",
+                      "--source_url",
+                      type=str,
+                      required=True,
+                      help="The url of file to download.")
+  parser.add_argument("-d",
+                      "--destination",
+                      type=str,
+                      required=True,
+                      help="The path on the device to stream the file to.")
+  parser.add_argument("--verbose",
+                      action="store_true",
+                      help="Show verbose messages.")
+  return parser.parse_args()
+
+
+if __name__ == "__main__":
+  adb_download_and_push_file(**vars(_parse_arguments()))
diff --git a/experimental/iree/benchmark_iree_android.sh b/experimental/iree/benchmark_iree_android.sh
new file mode 100755
index 00000000..0c9e1e16
--- /dev/null
+++ b/experimental/iree/benchmark_iree_android.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Copyright 2023 The OpenXLA Authors
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# Environment variables:
+# PYTHON: Python interpreter, default: /usr/bin/python3
+# OOBI_TARGET_DEVICE: target benchmark device, can also be specified the first
+#   argument.
+# OOBI_VENV_DIR: name of the virtual environment.
+# OOBI_IREE_BENCHMARK_MODULE_PATH: the path to `iree-benchmark-module`.
+# OOBI_IREE_RUN_MODULE_PATH: the path to `iree-run-module`.
+# OOBI_IREE_COMPILED_ARTIFACTS_PATH: the path to the IREE vmfb files to benchmark.
+# OOBI_ANDROID_BENCHMARK_DIR: the on-device directory where benchmark artifacts are copied to.
+#
+# Example usage:
+# ./benchmark_iree.sh <target-device> <path-to-compiled-artifacts> <results-path>
+
+set -xeuo pipefail
+
+VENV_DIR="${OOBI_VENV_DIR:-iree-benchmarks.venv}"
+PYTHON="${PYTHON:-/usr/bin/python3}"
+ROOT_DIR="${OOBI_ANDROID_BENCHMARK_DIR:-/data/local/tmp/oobi_benchmarks}"
+IREE_BENCHMARK_MODULE_PATH="${OOBI_IREE_BENCHMARK_MODULE_PATH:-/tmp/iree-build-android/tools/iree-benchmark-module}"
+IREE_RUN_MODULE_PATH="${OOBI_IREE_RUN_MODULE_PATH:-/tmp/iree-build-android/tools/iree-run-module}"
+TARGET_DEVICE="${1:-"${OOBI_TARGET_DEVICE}"}"
+COMPILED_ARTIFACTS_PATH="${2:-"${OOBI_IREE_COMPILED_ARTIFACTS_PATH}"}"
+OUTPUT_PATH="${3:-"${OOBI_OUTPUT}"}"
+
+TD="$(cd $(dirname $0) && pwd)"
+
+# Setup benchmarking environment.
+adb shell "rm -rf ${ROOT_DIR}"
+adb shell "mkdir ${ROOT_DIR}"
+
+adb push "${TD}/../../comparative_benchmark/scripts/set_android_scaling_governor.sh" "${ROOT_DIR}"
+adb shell "chmod +x ${ROOT_DIR}/set_android_scaling_governor.sh"
+adb shell "su root sh ${ROOT_DIR}/set_android_scaling_governor.sh performance"
+#adb shell "su root sendhint -m DISPLAY_INACTIVE -e 0"
+adb shell "su root setprop persist.vendor.disable.thermal.control 1"
+
+adb push "${TD}/benchmark_lib.py" "${ROOT_DIR}"
+adb shell "chmod +x ${ROOT_DIR}/benchmark_lib.py"
+
+adb push "${IREE_RUN_MODULE_PATH}" "${ROOT_DIR}"
+IREE_RUN_MODULE_PATH="${ROOT_DIR}/iree-run-module"
+
+adb push "${IREE_BENCHMARK_MODULE_PATH}" "${ROOT_DIR}"
+IREE_BENCHMARK_MODULE_PATH="${ROOT_DIR}/iree-benchmark-module"
+
+VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
+
+DEVICE_ARTIFACT_DIR="${ROOT_DIR}/artifacts"
+adb shell mkdir "${DEVICE_ARTIFACT_DIR}"
+
+if [[ "${COMPILED_ARTIFACTS_PATH}" = https* ]]; then
+  archive_name=$(basename "${COMPILED_ARTIFACTS_PATH}")
+
+  "${TD}/../../comparative_benchmark/scripts/adb_fetch_and_push.py" \
+    --source_url="${COMPILED_ARTIFACTS_PATH}" \
+    --destination="${ROOT_DIR}/${archive_name}" \
+    --verbose
+
+  adb shell "tar -xf ${ROOT_DIR}/${archive_name} --strip-components=1 -C ${DEVICE_ARTIFACT_DIR}"
+else
+  adb push "${COMPILED_ARTIFACTS_PATH}/." "${DEVICE_ARTIFACT_DIR}/"
+fi
+
+"${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"
+
+# A num_threads to cpu_ids map. We use the biggest cores for each configuration.
+THREAD_CONFIG="{1: '0', 4: '0,1,2,3'}"
+
+"${TD}/run_benchmarks.py" \
+  --target_device="${TARGET_DEVICE}" \
+  --output="${OUTPUT_PATH}" \
+  --artifact_dir="${DEVICE_ARTIFACT_DIR}" \
+  --iree_run_module_path="${IREE_RUN_MODULE_PATH}" \
+  --iree_benchmark_module_path="${IREE_BENCHMARK_MODULE_PATH}" \
+  --thread_config="${THREAD_CONFIG}" \
+  --verbose
+
+# Cleanup.
+adb shell rm -rf "${ROOT_DIR}"
diff --git a/experimental/iree/benchmark_iree.sh b/experimental/iree/benchmark_iree_x86.sh
similarity index 88%
rename from experimental/iree/benchmark_iree.sh
rename to experimental/iree/benchmark_iree_x86.sh
index 9818368e..5ff2a448 100755
--- a/experimental/iree/benchmark_iree.sh
+++ b/experimental/iree/benchmark_iree_x86.sh
@@ -34,7 +34,7 @@ VENV_DIR="${VENV_DIR}" PYTHON="${PYTHON}" source "${TD}/setup_venv.sh"
 
 "${TD}/../../comparative_benchmark/scripts/create_results_json.sh" "${OUTPUT_PATH}"
 
-THREADS="1,8,15,30"
+THREAD_CONFIG="{1: '0', 8: '0,1,2,3,4,5,6,7', 15: '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14', 30: '0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29'}"
 
 "${TD}/run_benchmarks.py" \
   --target_device="${TARGET_DEVICE}" \
@@ -42,5 +42,5 @@ THREADS="1,8,15,30"
   --artifact_dir="${COMPILED_ARTIFACTS_PATH}" \
   --iree_run_module_path="${IREE_RUN_MODULE_PATH}" \
   --iree_benchmark_module_path="${IREE_BENCHMARK_MODULE_PATH}" \
-  --threads="${THREADS}" \
+  --thread_config="${THREAD_CONFIG}" \
   --verbose
diff --git a/experimental/iree/benchmark_lib.py b/experimental/iree/benchmark_lib.py
index 06b02a95..09dd9d04 100644
--- a/experimental/iree/benchmark_lib.py
+++ b/experimental/iree/benchmark_lib.py
@@ -7,7 +7,6 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import argparse
-import json
 import pathlib
 import re
 import subprocess
@@ -94,25 +93,22 @@ def run_benchmark_command(benchmark_command: str,
 
 def _parse_arguments() -> argparse.Namespace:
   parser = argparse.ArgumentParser(description="Runs benchmarks.")
-  parser.add_argument("--command",
-                      type=str,
-                      required=True,
-                      help="The command to run.")
-  parser.add_argument("-o",
-                      "--output",
+  # We need to store the command in a text file because argparse is unable to
+  # ignore quoted string with dashes in it, instead interpreting them as
+  # arguments.
+  parser.add_argument("--command_path",
                       type=pathlib.Path,
                       required=True,
-                      help="JSON filepath to save results to.")
+                      help="The command to run stored in a text file.")
   parser.add_argument("--verbose",
                       action="store_true",
                       help="Show verbose messages.")
   return parser.parse_args()
 
 
-def main(command: str, output: pathlib.Path):
-  results = run_benchmark_command(command)
-  with open(output, "w") as outfile:
-    json.dump(results, outfile)
+def main(command_path: pathlib.Path, verbose: bool = False):
+  results = run_benchmark_command(command_path.read_text(), verbose)
+  print(f"results_dict: {results}")
 
 
 if __name__ == "__main__":
diff --git a/experimental/iree/compile_workloads.sh b/experimental/iree/compile_workloads.sh
index ab59046d..2d5b1c04 100755
--- a/experimental/iree/compile_workloads.sh
+++ b/experimental/iree/compile_workloads.sh
@@ -44,10 +44,10 @@ declare -a BENCHMARK_NAMES=(
   "models/BERT_BASE_FP16_JAX_I32_SEQLEN.+/.+"
   "models/BERT_BASE_BF16_JAX_I32_SEQLEN.+/.+"
   "models/T5_4CG_SMALL_FP32_JAX_1X128XI32_GEN.+/.+"
-  "models/SD_PIPELINE_FP32_JAX_64XI32_BATCH.+/.+"
-  "models/SD_PIPELINE_FP16_JAX_64XI32_BATCH.+/.+"
-  "models/SD_PIPELINE_BF16_JAX_64XI32_BATCH.+/.+"
   "models/VIT_CLASSIFICATION_JAX_3X224X224XF32/.+"
+  "models/SD_PIPELINE_FP32_JAX_64XI32_BATCH1/.+"
+  "models/SD_PIPELINE_FP16_JAX_64XI32_BATCH1/.+"
+  "models/SD_PIPELINE_BF16_JAX_64XI32_BATCH1/.+"
 )
 
 for benchmark_name in "${BENCHMARK_NAMES[@]}"; do
diff --git a/experimental/iree/run_benchmarks.py b/experimental/iree/run_benchmarks.py
index e4626dd1..27bc2632 100755
--- a/experimental/iree/run_benchmarks.py
+++ b/experimental/iree/run_benchmarks.py
@@ -7,6 +7,7 @@
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 import argparse
+import ast
 import dataclasses
 import json
 import os
@@ -17,6 +18,8 @@
 
 import benchmark_lib
 
+from typing import Any, Dict, List
+
 # Add common_benchmark_suite dir to the search path.
 sys.path.insert(
     0, str(pathlib.Path(__file__).parents[2] / "common_benchmark_suite"))
@@ -32,48 +35,155 @@
 ALL_DEVICE_NAMES = [device.name for device in devices.ALL_DEVICES]
 
 
-def check_accuracy(artifact_dir: pathlib.Path,
-                   iree_run_module_path: pathlib.Path,
-                   atol: float,
-                   num_threads: str,
-                   verbose: bool = False) -> bool:
-  module_path = artifact_dir / f"module.vmfb"
-  output_npy = artifact_dir / "outputs_npy" / "output_0.npy"
-  command = [
-      str(iree_run_module_path),
+def get_directory_names(target_device: def_types.DeviceSpec,
+                        directory: pathlib.Path):
+  if target_device in devices.mobile_devices.ALL_DEVICES:
+    output = subprocess.run(
+        ["adb", "shell", "ls", str(directory)], check=True, capture_output=True)
+    output = output.stdout.decode()
+    contents = output.split("\n")
+    # Remove empty elements.
+    return [item for item in contents if item]
+  else:
+    return os.listdir(directory)
+
+
+def get_common_command_parameters(target_device: def_types.DeviceSpec,
+                                  artifacts_dir: pathlib.Path,
+                                  task_topology_cpu_ids: str) -> List[str]:
+  module_path = artifacts_dir / "module.vmfb"
+  parameters = [
       f"--module={module_path}",
-      f"--task_topology_group_count={num_threads}",
+      f"--task_topology_cpu_ids={task_topology_cpu_ids}",
       "--device=local-task",
       "--function=main",
-      f"--expected_output=@{output_npy}",
-      f"--expected_f32_threshold={atol}",
-      f"--expected_f16_threshold={atol}",
-      f"--expected_f64_threshold={atol}",
   ]
 
-  inputs_dir = artifact_dir / "inputs_npy"
-  num_inputs = len(list(inputs_dir.glob("*.npy")))
-  for i in range(num_inputs):
-    command.append(f"--input=@{inputs_dir}/input_{i}.npy")
+  inputs_dir = artifacts_dir / "inputs_npy"
+  inputs = get_directory_names(target_device, inputs_dir)
+  for input in inputs:
+    parameters.append(f"--input=@{inputs_dir}/{input}")
+
+  return parameters
+
+
+def generate_accuracy_check_command(target_device: def_types.DeviceSpec,
+                                    artifacts_dir: pathlib.Path,
+                                    iree_run_module_path: pathlib.Path,
+                                    atol: float,
+                                    task_topology_cpu_ids: str) -> str:
+  output_npy = artifacts_dir / "outputs_npy" / "output_0.npy"
+  command = [str(iree_run_module_path)] + get_common_command_parameters(
+      target_device, artifacts_dir, task_topology_cpu_ids) + [
+          f"--expected_output=@{output_npy}",
+          f"--expected_f32_threshold={atol}",
+          f"--expected_f16_threshold={atol}",
+          f"--expected_f64_threshold={atol}",
+      ]
+  return " ".join(command)
+
+
+def benchmark_on_x86(target_device: def_types.DeviceSpec,
+                     benchmark: def_types.BenchmarkCase,
+                     artifacts_dir: pathlib.Path,
+                     iree_run_module_path: pathlib.Path,
+                     iree_benchmark_module_path: pathlib.Path,
+                     task_topology_cpu_ids: str,
+                     verbose: bool) -> Dict[str, Any]:
+  # Check accuracy.
+  atol = benchmark.verify_parameters["absolute_tolerance"]
+  command = generate_accuracy_check_command(target_device, artifacts_dir,
+                                            iree_run_module_path, atol,
+                                            task_topology_cpu_ids)
+
+  try:
+    output = subprocess.run(command,
+                            shell=True,
+                            check=True,
+                            capture_output=True)
+    if verbose:
+      print(output.stdout.decode())
+    is_accurate = True
+  except subprocess.CalledProcessError as e:
+    print(f"Error running command: {e}")
+    is_accurate = False
+
+  # Run benchmark.
+  command = [str(iree_benchmark_module_path)] + get_common_command_parameters(
+      target_device, artifacts_dir,
+      task_topology_cpu_ids) + ["--print_statistics"]
+  metrics = benchmark_lib.run_benchmark_command(" ".join(command), verbose)
+  metrics["accuracy"] = is_accurate
+  return metrics
 
-  command_str = " ".join(command)
-  print(f"Running command: {command_str}")
+
+def benchmark_on_android(target_device: def_types.DeviceSpec,
+                         benchmark: def_types.BenchmarkCase,
+                         artifacts_dir: pathlib.Path,
+                         iree_run_module_device_path: pathlib.Path,
+                         iree_benchmark_module_device_path: pathlib.Path,
+                         task_topology_cpu_ids: str,
+                         verbose: bool) -> Dict[str, Any]:
+  # Check accuracy.
+  atol = benchmark.verify_parameters["absolute_tolerance"]
+  command = generate_accuracy_check_command(target_device, artifacts_dir,
+                                            iree_run_module_device_path, atol,
+                                            task_topology_cpu_ids)
+  command = f"adb shell su root {command}"
 
   try:
-    output = subprocess.run(command, check=True, capture_output=True)
+    output = subprocess.run(command,
+                            shell=True,
+                            check=True,
+                            capture_output=True)
     if verbose:
       print(output.stdout.decode())
-    return True
+    is_accurate = True
   except subprocess.CalledProcessError as e:
     print(f"Error running command: {e}")
+    is_accurate = False
+
+  # Run benchmark.
+  root_dir = iree_benchmark_module_device_path.parent
+  benchmark_command = [str(iree_benchmark_module_device_path)
+                      ] + get_common_command_parameters(
+                          target_device, artifacts_dir,
+                          task_topology_cpu_ids) + ["--print_statistics"]
+  benchmark_command = " ".join(benchmark_command)
 
-  return False
+  command_path = root_dir / "command.txt"
+  subprocess.run(f"adb shell \"echo '{benchmark_command}' > {command_path}\"",
+                 shell=True,
+                 check=True,
+                 capture_output=True)
+
+  benchmark_lib_path = root_dir / "benchmark_lib.py"
+  command = f"adb shell su root /data/data/com.termux/files/usr/bin/python {benchmark_lib_path} --command_path=\"{command_path}\""
+  if verbose:
+    command += " --verbose"
+
+  output = subprocess.run(command, shell=True, check=True, capture_output=True)
+  output = output.stdout.decode()
+  if verbose:
+    print(output)
+
+  match = re.search(r"results_dict: (\{.*\})", output)
+  if match:
+    dictionary_string = match.group(1)
+    metrics = ast.literal_eval(dictionary_string)
+  else:
+    metrics = {"error": f"Could not parse results"}
+
+  metrics["accuracy"] = is_accurate
+  return metrics
 
 
 def benchmark_one(benchmark: def_types.BenchmarkCase,
                   target_device: def_types.DeviceSpec,
-                  artifact_dir: pathlib.Path,
+                  artifacts_dir: pathlib.Path,
+                  iree_run_module_path: pathlib.Path,
                   iree_benchmark_module_path: pathlib.Path, num_threads: str,
+                  task_topology_cpu_ids: str,
                   verbose: bool) -> utils.BenchmarkResult:
   model = benchmark.model
   benchmark_definition = {
@@ -85,23 +195,16 @@ def benchmark_one(benchmark: def_types.BenchmarkCase,
       "tags": model.model_impl.tags + model.tags,
   }
 
-  inputs_dir = artifact_dir / "inputs_npy"
-  num_inputs = len(list(inputs_dir.glob("*.npy")))
-
-  module_path = artifact_dir / "module.vmfb"
-  command = [
-      str(iree_benchmark_module_path),
-      f"--module={module_path}",
-      f"--task_topology_group_count={num_threads}",
-      "--device=local-task",
-      "--function=main",
-      "--print_statistics",
-  ]
-
-  for i in range(num_inputs):
-    command.append(f"--input=@{inputs_dir}/input_{i}.npy")
+  if target_device in devices.mobile_devices.ALL_DEVICES:
+    metrics = benchmark_on_android(target_device, benchmark, artifacts_dir,
+                                   iree_run_module_path,
+                                   iree_benchmark_module_path,
+                                   task_topology_cpu_ids, verbose)
+  else:
+    metrics = benchmark_on_x86(target_device, benchmark, artifacts_dir,
+                               iree_run_module_path, iree_benchmark_module_path,
+                               task_topology_cpu_ids, verbose)
 
-  metrics = benchmark_lib.run_benchmark_command(" ".join(command), verbose)
   return utils.BenchmarkResult(
       definition=benchmark_definition,
       metrics={
@@ -121,7 +224,8 @@ def _parse_arguments() -> argparse.Namespace:
       "--artifact_dir",
       type=pathlib.Path,
       required=True,
-      help="The directory containing all required benchmark artifacts.")
+      help=
+      "The directory containing all required benchmark artifacts on the host.")
   parser.add_argument("-device",
                       "--target_device",
                       dest="target_device_name",
@@ -137,9 +241,12 @@ def _parse_arguments() -> argparse.Namespace:
                       type=pathlib.Path,
                       required=True,
                       help="Path to the iree-benchmark-module binary.")
-  parser.add_argument("--threads",
-                      type=str,
-                      help="A comma-separated list of threads.")
+  parser.add_argument(
+      "--thread_config",
+      type=str,
+      help=
+      "A string dictionary of num_threads to cpu_ids. If cpu_ids is empty, does not pin threads to a specific CPU. Example: {1: '0', 4: '1,2,3,4', 5: '0,1,2,3,4'}"
+  )
   parser.add_argument("--verbose",
                       action="store_true",
                       help="Show verbose messages.")
@@ -148,7 +255,8 @@ def _parse_arguments() -> argparse.Namespace:
 
 def main(output: pathlib.Path, artifact_dir: pathlib.Path,
          target_device_name: str, iree_run_module_path: pathlib.Path,
-         iree_benchmark_module_path: pathlib.Path, threads: str, verbose: bool):
+         iree_benchmark_module_path: pathlib.Path, thread_config: str,
+         verbose: bool):
 
   try:
     target_device = next(device for device in devices.ALL_DEVICES
@@ -161,30 +269,22 @@ def main(output: pathlib.Path, artifact_dir: pathlib.Path,
   all_benchmarks = jax_benchmark_definitions.ALL_BENCHMARKS + tflite_benchmark_definitions.ALL_BENCHMARKS
 
   benchmarks = {}
-  contents = os.listdir(artifact_dir)
+  contents = get_directory_names(target_device, artifact_dir)
   for item in contents:
-    if os.path.isdir(artifact_dir / item):
-      name_pattern = re.compile(f".*{item}.*")
-      for benchmark in all_benchmarks:
-        if name_pattern.match(benchmark.name):
-          benchmarks[item] = benchmark
+    name_pattern = re.compile(f".*{item}.*")
+    for benchmark in all_benchmarks:
+      if name_pattern.match(benchmark.name):
+        benchmarks[item] = benchmark
 
-  threads = threads.split(",")
+  thread_config = ast.literal_eval(thread_config)
   for directory, benchmark in benchmarks.items():
-    benchmark_artifacts = artifact_dir / directory
-
-    for num_thread in threads:
-      atol = benchmark.verify_parameters["absolute_tolerance"]
-      is_accurate = check_accuracy(benchmark_artifacts, iree_run_module_path,
-                                   atol, num_thread, verbose)
-
-      result = benchmark_one(benchmark, target_device, benchmark_artifacts,
-                             iree_benchmark_module_path, num_thread, verbose)
-      result.metrics["compiler_level"]["accuracy"] = is_accurate
-
+    model_artifact_dir = artifact_dir / directory
+    for num_thread, cpu_ids in thread_config.items():
+      result = benchmark_one(benchmark, target_device, model_artifact_dir,
+                             iree_run_module_path, iree_benchmark_module_path,
+                             num_thread, cpu_ids, verbose)
       if verbose:
         print(json.dumps(dataclasses.asdict(result), indent=2))
-
       utils.append_benchmark_result(output, result)