diff --git a/01_exploratory_data_analysis.ipynb b/01_exploratory_data_analysis.ipynb index f5a7964..ebd7db3 100644 --- a/01_exploratory_data_analysis.ipynb +++ b/01_exploratory_data_analysis.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "cellView": "form", "id": "ur8xi4C7S06n", @@ -130,7 +130,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "tags": [] }, @@ -172,28 +172,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "BUCKET_NAME = \"gcp-data-science-demo-fraudfinder\"\n", - "PROJECT = \"gcp-data-science-demo\"\n", - "REGION = \"us-central1\"\n", - "ID = \"22jof\"\n", - "FEATURESTORE_ID = \"fraudfinder_22jof\"\n", - "MODEL_NAME = \"ff_model\"\n", - "ENDPOINT_NAME = \"ff_model_endpoint\"\n", - "TRAINING_DS_SIZE = \"1000\"\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "PROJECT_ID = GCP_PROJECTS[0]\n", "BUCKET_NAME = f\"{PROJECT_ID}-fraudfinder\"\n", @@ -213,7 +196,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "id": "0c4746a0c78c", "tags": [] @@ -247,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": { "id": "079accfc15e2", "tags": [] @@ -305,79 +288,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": { "id": "f0405eb973fb", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Finished job_id: b19dfee7-693c-4a0b-9ad1-f14dbed3846b\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
NUM_TXMIN_TX_DATEMAX_TX_DATENUM_CUSTOMERSNUM_TERMINALSMIN_TX_AMOUNTAVG_TX_AMOUNTMAX_TX_AMOUNT
01480173322022-01-01 00:00:00+00:002025-12-31 23:59:59+00:004999350000E-955.0058087271110.460000000
\n", - "
" - ], - "text/plain": [ - " NUM_TX MIN_TX_DATE MAX_TX_DATE \\\n", - "0 148017332 2022-01-01 00:00:00+00:00 2025-12-31 23:59:59+00:00 \n", - "\n", - " NUM_CUSTOMERS NUM_TERMINALS MIN_TX_AMOUNT AVG_TX_AMOUNT MAX_TX_AMOUNT \n", - "0 49993 5000 0E-9 55.005808727 1110.460000000 " - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "run_bq_query(\n", " \"\"\"\n", @@ -423,76 +339,12 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": { "id": "daf0467aaeb5", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Finished job_id: 3ec35098-3dbb-4553-967a-fdea8baadb3c\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
TX_FRAUDNUM_TXOVR_TOTAL_TXPCT_TOTAL_TX
001450620421480173320.980034
1129552901480173320.019966
\n", - "
" - ], - "text/plain": [ - " TX_FRAUD NUM_TX OVR_TOTAL_TX PCT_TOTAL_TX\n", - "0 0 145062042 148017332 0.980034\n", - "1 1 2955290 148017332 0.019966" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "run_bq_query(\n", " \"\"\"\n", @@ -543,127 +395,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": { "id": "c41e0815b2b3", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Finished job_id: be2f3aff-3b4f-48e8-96d1-39dd24ad4cd2\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ROUNDED_TX_AMOUNTNUM_TX
00E-925292
11.00000000098178
22.000000000208129
33.000000000381410
44.000000000616521
.........
9271008.0000000001
9281033.0000000001
9291051.0000000001
9301102.0000000001
9311110.0000000001
\n", - "

932 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " ROUNDED_TX_AMOUNT NUM_TX\n", - "0 0E-9 25292\n", - "1 1.000000000 98178\n", - "2 2.000000000 208129\n", - "3 3.000000000 381410\n", - "4 4.000000000 616521\n", - ".. ... ...\n", - "927 1008.000000000 1\n", - "928 1033.000000000 1\n", - "929 1051.000000000 1\n", - "930 1102.000000000 1\n", - "931 1110.000000000 1\n", - "\n", - "[932 rows x 2 columns]" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "transaction_amount_dist = run_bq_query(\n", " \"\"\"\n", @@ -697,29 +434,12 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "id": "54039166d146", "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "# Create histogram of transaction amounts\n", "transaction_amount_interactive_histogram = px.bar(\n", @@ -767,100 +487,12 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "id": "e3ae4a0f60f6", "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Finished job_id: 214b23b5-1550-4505-9430-671c29ca7173\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
CUSTOMER_IDNUM_TXAVG_TX_AMOUNTPCT_TX_FRAUD
07228491864464655542552.6592497700.009401
15406457556416978458433.1987761780.000218
20683522698674547331499.2784550390.000302
31020433424310577429083.5343310020.000233
48314565180058922584120.7410905670.000171
\n", - "
" - ], - "text/plain": [ - " CUSTOMER_ID NUM_TX AVG_TX_AMOUNT PCT_TX_FRAUD\n", - "0 7228491864464655 5425 52.659249770 0.009401\n", - "1 5406457556416978 4584 33.198776178 0.000218\n", - "2 0683522698674547 3314 99.278455039 0.000302\n", - "3 1020433424310577 4290 83.534331002 0.000233\n", - "4 8314565180058922 5841 20.741090567 0.000171" - ] - }, - "execution_count": 25, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "customer_aggregates = run_bq_query(\n", " \"\"\"\n", @@ -902,29 +534,12 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": { "id": "6460910cc6ec", "tags": [] }, - "outputs": [ - { - "data": { - "text/html": [ - "\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "customer_aggregates_scatter_plot = px.scatter(\n", " customer_aggregates,\n",