dazza-codes · dazza-codes · Jun 24, 2023 · Nov 27, 2022
diff --git a/examples/aio_aws_batch_job_stats.ipynb b/examples/aio_aws_batch_job_stats.ipynb
@@ -0,0 +1,203 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "from pathlib import Path\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from IPython.core.display import display\n",
+    "\n",
+    "# display(HTML(\"<style>div.output pre { white-space: pre;}</style>\"))\n",
+    "\n",
+    "# Default value of display.max_rows is 10 i.e. at max 10 rows will be printed.\n",
+    "# Set it None to display all rows in the dataframe\n",
+    "pd.set_option('display.max_rows', None)\n",
+    "\n",
+    "# Width of the display in characters. If set to None and pandas will correctly auto-detect the width.\n",
+    "pd.set_option('display.width', None)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "home = Path(os.getenv(\"HOME\"))\n",
+    "LOGS_PATH = home / \"batch_jobs\"\n",
+    "LOGS_PATH.mkdir(parents=True, exist_ok=True)\n",
+    "\n",
+    "parsed_file_pattern = \"jobs_*.csv\"\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# jobs = pd.read_csv(jobs_csv)\n",
+    "import dask.dataframe as dd\n",
+    "ddf = dd.read_csv(LOGS_PATH / \"**\" / parsed_file_pattern)\n",
+    "jobs = ddf.compute()\n",
+    "\n",
+    "jobs['created'] = pd.to_datetime(jobs.created)\n",
+    "jobs['started'] = pd.to_datetime(jobs.started)\n",
+    "jobs['stopped'] = pd.to_datetime(jobs.stopped)\n",
+    "jobs['elapsed'] = jobs.stopped - jobs.created\n",
+    "jobs['runtime'] = jobs.stopped - jobs.started\n",
+    "jobs['spinup'] = jobs.started - jobs.created\n",
+    "jobs.info()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Successful Jobs\n",
+    "\n",
+    "These are only for SUCCEEDED jobs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "success_jobs = jobs.query('status == \"SUCCEEDED\"')\n",
+    "np.unique(success_jobs.status)\n",
+    "\n",
+    "job = success_jobs.iloc[-1].copy()\n",
+    "job.created, job.started, job.stopped, job.spinup, job.runtime, job.elapsed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Job Spinup\n",
+    "AWS does not charge for the spinup time on AWS-Batch.\n",
+    "This is the delay between job-submission (`created_at`) and the\n",
+    "start of the job (`started_at`).  This can differ for cold-start\n",
+    "vs warm-start jobs because the docker image needs to be transferred\n",
+    "into a new EC2-node for a batch compute environment.\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.runtime.sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.spinup.describe())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Job Runtime\n",
+    "These are job runtime, after spinup."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.runtime.sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.runtime.describe())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Elapsed Time\n",
+    "These are total job time, from job submission to completion.\n",
+    "\n",
+    "These values are not very useful, since they do not account for the parallel execution of all the batch jobs.  The AWS-Batch compute environment will specify a limit to the available cores for concurrent jobs.  The job-definition could require N-cores per job."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.elapsed.sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Assume there are 500 parallel batch jobs, then elapsed time is\n",
+    "display(success_jobs.elapsed.sum() / 500)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "display(success_jobs.elapsed.describe())"
+   ]
+  }
+ ],
+ "metadata": {
+  "file_extension": ".py",
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.15"
+  },
+  "mimetype": "text/x-python",
+  "name": "python",
+  "npconvert_exporter": "python",
+  "pygments_lexer": "ipython3",
+  "version": 3
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}