Merge pull request #61 from GoogleCloudPlatform/alokpattani-patch-1

Update requirements, EDA and formalization notebooks
GoogleCloudPlatform · Feb 8, 2024 · 890601e · 890601e
2 parents 51b0441 + d626148
commit 890601e
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 22 deletions.
diff --git a/01_exploratory_data_analysis.ipynb b/01_exploratory_data_analysis.ipynb
@@ -451,8 +451,15 @@
     "        \"NUM_TX\": \"# of Transactions\",\n",
     "    },\n",
     "    title=\"Number of Transactions by Amount\",\n",
+    "    color_discrete_sequence=['blue']\n",
     ")\n",
     "\n",
+    "transaction_amount_interactive_histogram.update_traces(\n",
+    "    marker_color='black',\n",
+    "    marker_line_color='white',\n",
+    "    marker_line_width=0.1\n",
+    "    )\n",
+    "\n",
     "transaction_amount_interactive_histogram.show()"
    ]
   },
@@ -561,7 +568,7 @@
     "id": "43e521da16bf"
    },
    "source": [
-    "The plot above shows that the vast majority of customers have less than 3000 transactions with an average value of 100 or less, and most of those appear to have very low fraud rates. But there's a cluster of customers at the far right with a very large number of transactions (in the 3000-6000 range), some of whom who have very high average transaction values (from 100 to more than 300 dollars). And the customers in this group with very high transaction counts also have an extremely high rate of fraud: all upwards of 60%, with some at nearly 100% fraud.\n",
+    "The plot above shows that the vast majority of customers have about 6000 or fewer transactions with an average value of 100 or less, and most of those appear to have very low fraud rates. But there are customers at the far right with a very large number of transactions (in the 8000-11000 range), some of whom have very high average transaction values (from 100 to more than 300 dollars). And the customers in this group with very high transaction counts also have an extremely high rate of fraud: most upwards of 60%, with some higher than 90% fraud.\n",
     "\n",
     "These trends found in the data are definitely worth keeping in mind as you think about how to construct a model to predict fraudulent transactions.\n",
     "\n",
@@ -586,9 +593,9 @@
   },
   "environment": {
    "kernel": "python3",
-   "name": "common-cpu.m115",
+   "name": "common-cpu.m116",
    "type": "gcloud",
-   "uri": "gcr.io/deeplearning-platform-release/base-cpu:m115"
+   "uri": "gcr.io/deeplearning-platform-release/base-cpu:m116"
   },
   "kernelspec": {
    "display_name": "Python 3 (Local)",

diff --git a/requirements.txt b/requirements.txt
@@ -3,8 +3,8 @@ google-cloud-bigquery<=3.13.0
 google-cloud-bigquery-storage<=2.22.0
 
 google-cloud-aiplatform<=1.36.1
+google-cloud-pipeline-components<=0.3.0
 plotly<=5.18.0
-itables<=1.6.2
 xgboost<=2.0.1
 kfp<=2.3.0
 apache_beam[gcp]==2.53.0
diff --git a/vertex_ai/06_formalization.ipynb b/vertex_ai/06_formalization.ipynb
@@ -4,7 +4,8 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "ur8xi4C7S06n"
+    "id": "ur8xi4C7S06n",
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -115,7 +116,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "GCP_PROJECTS = !gcloud config get-value project\n",
@@ -155,7 +158,19 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {
-    "id": "pRUOFELefqf1"
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "! pip install --upgrade 'google-cloud-pipeline-components==0.3.0'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "id": "pRUOFELefqf1",
+    "tags": []
    },
    "outputs": [],
    "source": [
@@ -193,7 +208,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "print(\"kfp version:\", kfp.__version__)"
@@ -209,7 +226,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# Components variables\n",
@@ -265,7 +284,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# Vertex AI SDK\n",
@@ -275,7 +296,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "!gsutil ubla set on gs://{BUCKET_NAME}"
@@ -292,7 +315,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "!mkdir -p -m 777 $PIPELINE_DIR $COMPONENTS_DIR"
@@ -356,7 +381,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "@component(\n",
@@ -453,7 +480,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "@component(output_component_file=EVALUATE)\n",
@@ -533,7 +562,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "@dsl.pipeline(\n",
@@ -624,7 +655,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# compile the pipeline\n",
@@ -642,7 +675,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# instantiate pipeline representation\n",
@@ -657,7 +692,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "# submit the pipeline run (may take ~20 minutes for the first run)\n",
@@ -682,12 +719,12 @@
   },
   "environment": {
    "kernel": "python3",
-   "name": "common-cpu.m111",
+   "name": "tf2-cpu.2-11.m116",
    "type": "gcloud",
-   "uri": "gcr.io/deeplearning-platform-release/base-cpu:m111"
+   "uri": "gcr.io/deeplearning-platform-release/tf2-cpu.2-11:m116"
   },
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (Local)",
    "language": "python",
    "name": "python3"
   },
@@ -701,7 +738,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.12"
+   "version": "3.10.13"
   },
   "toc-autonumbering": false,
   "toc-showmarkdowntxt": true