From 93147a8e19e5373fbdc5b416c9d715f4567c0b43 Mon Sep 17 00:00:00 2001 From: 23fredibaron Date: Fri, 13 Jan 2017 07:50:39 -0500 Subject: [PATCH] Tutorial Example-Dependent Cost-Sensitive Churn Modeling --- doc/tutorials/tutorial_edcs_churn.ipynb | 2914 +++++++++++++++++++++++ 1 file changed, 2914 insertions(+) create mode 100644 doc/tutorials/tutorial_edcs_churn.ipynb diff --git a/doc/tutorials/tutorial_edcs_churn.ipynb b/doc/tutorials/tutorial_edcs_churn.ipynb new file mode 100644 index 0000000..57c20ae --- /dev/null +++ b/doc/tutorials/tutorial_edcs_churn.ipynb @@ -0,0 +1,2914 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# LECTURA DE LA BASE" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from pandas import DataFrame \n", + "from sklearn.tree import DecisionTreeClassifier" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import zipfile\n", + "with zipfile.ZipFile('cost_sensitive_classification_churn.csv.zip', 'r') as z:\n", + " f = z.open('cost_sensitive_classification_churn.csv')\n", + " data = pd.io.parsers.read_table(f, sep=',')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Target, variable que reporta el abandono o no de los clientes." + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idx1x2x3x4x5x6x7x8x9...x42x43x44x45x46C_FPC_FNC_TPC_TNtarget
00011101100...1152274.0000001028.571429121.82857100
11011101100...3152453.4285711028.57142982.74285700
22111101100...1831466.2857141285.714286102.92857100
33111001100...1843292.0000001285.714286151.78571400
44011101100...1752453.4285711028.57142982.74285700
\n", + "

5 rows × 52 columns

\n", + "
" + ], + "text/plain": [ + " id x1 x2 x3 x4 x5 x6 x7 x8 x9 ... x42 x43 x44 x45 x46 \\\n", + "0 0 0 1 1 1 0 1 1 0 0 ... 1 1 5 2 2 \n", + "1 1 0 1 1 1 0 1 1 0 0 ... 3 1 5 2 4 \n", + "2 2 1 1 1 1 0 1 1 0 0 ... 1 8 3 1 4 \n", + "3 3 1 1 1 0 0 1 1 0 0 ... 1 8 4 3 2 \n", + "4 4 0 1 1 1 0 1 1 0 0 ... 1 7 5 2 4 \n", + "\n", + " C_FP C_FN C_TP C_TN target \n", + "0 74.000000 1028.571429 121.828571 0 0 \n", + "1 53.428571 1028.571429 82.742857 0 0 \n", + "2 66.285714 1285.714286 102.928571 0 0 \n", + "3 92.000000 1285.714286 151.785714 0 0 \n", + "4 53.428571 1028.571429 82.742857 0 0 \n", + "\n", + "[5 rows x 52 columns]" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(9379, 52)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.shape" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Propoción de la varible target" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 8930\n", + "1 449\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.target.value_counts(normalize=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0.952127\n", + "1 0.047873\n", + "Name: target, dtype: float64" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "data.target.value_counts(normalize=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X =data[['x'+str(i) for i in range(1, 47)]]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
x1x2x3x4x5x6x7x8x9x10...x37x38x39x40x41x42x43x44x45x46
00111011001...-0.208086-0.066014-0.075314-0.435398-0.0415111522
10111011001...-0.208086-0.066014-0.075314-0.435398-0.0415131524
21111011001...-0.208086-0.066014-0.0753140.836751-0.0415118314
31110011001...-0.208086-0.066014-0.0753140.836751-0.0415118432
40111011001...-0.208086-0.066014-0.075314-0.435398-0.0415117524
\n", + "

5 rows × 46 columns

\n", + "
" + ], + "text/plain": [ + " x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 ... x37 x38 x39 \\\n", + "0 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 -0.075314 \n", + "1 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 -0.075314 \n", + "2 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 -0.075314 \n", + "3 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 -0.075314 \n", + "4 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 -0.075314 \n", + "\n", + " x40 x41 x42 x43 x44 x45 x46 \n", + "0 -0.435398 -0.04151 1 1 5 2 2 \n", + "1 -0.435398 -0.04151 3 1 5 2 4 \n", + "2 0.836751 -0.04151 1 8 3 1 4 \n", + "3 0.836751 -0.04151 1 8 4 3 2 \n", + "4 -0.435398 -0.04151 1 7 5 2 4 \n", + "\n", + "[5 rows x 46 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y = data.target" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "Name: target, dtype: float64" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 8930\n", + "1 449\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y.value_counts(normalize=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matriz de costos" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "cost_mat = data[['C_FP','C_FN','C_TP','C_TN']].values" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[ 74. , 1028.571429, 121.828571, 0. ],\n", + " [ 53.428571, 1028.571429, 82.742857, 0. ],\n", + " [ 66.285714, 1285.714286, 102.928571, 0. ],\n", + " ..., \n", + " [ 98. , 1371.428571, 161.771429, 0. ],\n", + " [ 53.428571, 1028.571429, 82.742857, 0. ],\n", + " [ 53.428571, 1028.571429, 82.742857, 0. ]])" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_mat" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "\n", + "# Training and test" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.cross_validation import train_test_split" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "temp = train_test_split(X, y, cost_mat, test_size=0.33, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "X_train, X_test, y_train, y_test, cost_mat_train, cost_mat_test = temp" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "[ x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 ... x37 x38 \\\n", + " 5667 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7263 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1563 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4950 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8030 0 1 1 1 1 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7040 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1979 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1883 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 438 0 1 1 1 0 0 1 1 0 1 ... 3.071972 -0.066014 \n", + " 9049 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3794 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8849 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4113 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2986 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6828 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8640 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2280 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8756 0 1 1 1 0 0 1 1 0 1 ... 6.352029 -0.066014 \n", + " 7536 0 1 1 0 1 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4712 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 373 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4539 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8644 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2585 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1332 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7838 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9047 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3774 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1784 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9374 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " ... .. .. .. .. .. .. .. .. .. ... ... ... ... \n", + " 2734 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 189 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9167 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2747 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2047 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7849 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2558 1 1 1 0 1 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9274 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8666 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6396 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3385 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4555 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1184 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6420 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5051 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5311 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2433 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6949 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 769 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1685 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8322 0 1 1 1 0 0 1 0 0 1 ... 1.431943 -0.066014 \n", + " 5578 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4426 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 466 1 1 1 0 0 1 0 0 0 1 ... -0.208086 -0.066014 \n", + " 6265 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5734 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5191 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5390 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 860 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7270 0 1 1 1 1 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " \n", + " x39 x40 x41 x42 x43 x44 x45 x46 \n", + " 5667 -0.075314 -0.435398 -0.04151 2 8 2 1 4 \n", + " 7263 -0.075314 3.381048 -0.04151 1 8 1 2 3 \n", + " 1563 -0.075314 -0.435398 -0.04151 1 1 1 2 3 \n", + " 4950 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 8030 -0.075314 -0.435398 -0.04151 3 1 3 2 2 \n", + " 7040 -0.075314 -0.435398 -0.04151 1 8 3 1 2 \n", + " 1979 -0.075314 -0.435398 -0.04151 2 8 5 2 4 \n", + " 1883 -0.075314 -0.435398 -0.04151 2 7 3 1 2 \n", + " 438 -0.075314 -0.435398 -0.04151 2 8 5 2 4 \n", + " 9049 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 3794 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 8849 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 4113 -0.075314 3.381048 -0.04151 1 8 3 2 2 \n", + " 2986 -0.075314 0.836751 -0.04151 2 8 3 2 4 \n", + " 6828 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 8640 -0.075314 0.836751 -0.04151 1 2 4 2 1 \n", + " 2280 -0.075314 -0.435398 -0.04151 2 7 5 2 2 \n", + " 8756 -0.075314 -0.435398 -0.04151 1 7 1 2 4 \n", + " 7536 -0.075314 -0.435398 -0.04151 2 7 5 2 2 \n", + " 4712 -0.075314 -0.435398 -0.04151 1 1 5 2 2 \n", + " 373 -0.075314 -0.435398 -0.04151 1 8 4 2 2 \n", + " 4539 -0.075314 3.381048 -0.04151 2 8 1 3 2 \n", + " 8644 -0.075314 -0.435398 -0.04151 3 1 3 1 2 \n", + " 2585 -0.075314 -0.435398 -0.04151 1 8 5 1 4 \n", + " 1332 -0.075314 -0.435398 -0.04151 2 8 3 1 4 \n", + " 7838 -0.075314 -0.435398 -0.04151 1 7 3 2 4 \n", + " 9047 -0.075314 -0.435398 -0.04151 2 7 5 1 4 \n", + " 3774 -0.075314 0.836751 -0.04151 2 8 1 3 2 \n", + " 1784 -0.075314 -0.435398 -0.04151 1 8 3 2 2 \n", + " 9374 -0.075314 -0.435398 -0.04151 2 7 3 1 4 \n", + " ... ... ... ... ... ... ... ... ... \n", + " 2734 -0.075314 0.836751 -0.04151 3 7 3 1 2 \n", + " 189 -0.075314 -0.435398 -0.04151 1 7 4 2 4 \n", + " 9167 -0.075314 -0.435398 -0.04151 1 8 5 3 4 \n", + " 2747 -0.075314 -0.435398 -0.04151 1 8 4 2 4 \n", + " 2047 -0.075314 -0.435398 -0.04151 2 8 3 2 2 \n", + " 7849 -0.075314 -0.435398 -0.04151 2 7 5 2 4 \n", + " 2558 -0.075314 0.836751 -0.04151 1 8 4 2 2 \n", + " 9274 -0.075314 -0.435398 -0.04151 3 7 5 2 2 \n", + " 8666 -0.075314 -0.435398 -0.04151 2 3 4 2 2 \n", + " 6396 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 3385 -0.075314 0.836751 -0.04151 1 8 5 2 2 \n", + " 4555 -0.075314 -0.435398 -0.04151 3 8 5 2 4 \n", + " 1184 -0.075314 -0.435398 -0.04151 1 12 4 2 2 \n", + " 6420 -0.075314 -0.435398 -0.04151 3 7 5 3 4 \n", + " 5051 -0.075314 -0.435398 -0.04151 1 8 4 3 2 \n", + " 5311 -0.075314 0.836751 -0.04151 1 7 3 1 2 \n", + " 2433 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 6949 -0.075314 0.836751 -0.04151 1 8 3 1 4 \n", + " 769 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 1685 -0.075314 -0.435398 -0.04151 1 1 5 2 3 \n", + " 8322 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 5578 -0.075314 -0.435398 -0.04151 1 1 5 2 2 \n", + " 4426 -0.075314 -0.435398 -0.04151 1 8 1 2 3 \n", + " 466 -0.075314 0.836751 -0.04151 1 2 4 2 3 \n", + " 6265 -0.075314 0.836751 -0.04151 1 8 3 1 4 \n", + " 5734 -0.075314 -0.435398 -0.04151 2 7 5 2 4 \n", + " 5191 -0.075314 -0.435398 -0.04151 1 7 5 2 2 \n", + " 5390 -0.075314 -0.435398 -0.04151 2 8 5 2 3 \n", + " 860 -0.075314 -0.435398 -0.04151 1 8 3 2 4 \n", + " 7270 -0.075314 -0.435398 -0.04151 1 8 3 2 4 \n", + " \n", + " [6283 rows x 46 columns],\n", + " x1 x2 x3 x4 x5 x6 x7 x8 x9 x10 ... x37 x38 \\\n", + " 2260 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4428 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5304 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2246 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6069 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8435 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 360 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2802 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7690 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7143 0 1 1 1 0 0 1 0 0 1 ... 1.431943 -0.066014 \n", + " 169 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9188 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4344 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3208 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 483 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3261 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7766 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1183 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3307 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7931 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3473 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1603 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2287 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3838 0 0 1 1 0 0 1 0 0 1 ... 1.431943 -0.066014 \n", + " 1084 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5418 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8159 1 0 1 0 0 0 1 0 0 1 ... 4.712000 -0.066014 \n", + " 8410 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 416 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3653 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " ... .. .. .. .. .. .. .. .. .. ... ... ... ... \n", + " 1669 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2112 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8035 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6066 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7015 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5404 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 84 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6086 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7229 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2283 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2887 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8131 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 5281 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 418 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7888 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 7367 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 2709 0 1 1 1 0 0 1 1 0 1 ... 7.992058 -0.066014 \n", + " 2357 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 6578 1 1 1 0 0 1 0 0 0 1 ... -0.208086 -0.066014 \n", + " 1366 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 293 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 9325 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8869 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1506 1 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8468 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8405 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 1634 0 1 1 1 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 4011 1 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 8290 0 1 1 0 0 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " 3315 1 1 1 0 1 1 1 0 0 1 ... -0.208086 -0.066014 \n", + " \n", + " x39 x40 x41 x42 x43 x44 x45 x46 \n", + " 2260 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 4428 -0.075314 0.836751 -0.04151 1 8 3 2 4 \n", + " 5304 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 2246 -0.075314 -0.435398 -0.04151 1 8 1 2 4 \n", + " 6069 -0.075314 -0.435398 -0.04151 2 8 4 2 2 \n", + " 8435 -0.075314 0.836751 -0.04151 1 8 4 1 4 \n", + " 360 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 2802 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 7690 -0.075314 -0.435398 -0.04151 1 7 5 1 3 \n", + " 7143 -0.075314 -0.435398 -0.04151 2 8 5 2 4 \n", + " 169 -0.075314 -0.435398 -0.04151 3 7 5 2 4 \n", + " 9188 -0.075314 -0.435398 -0.04151 1 8 5 1 4 \n", + " 4344 -0.075314 -0.435398 -0.04151 2 8 5 1 4 \n", + " 3208 -0.075314 0.836751 -0.04151 2 7 3 2 4 \n", + " 483 -0.075314 -0.435398 -0.04151 2 7 2 1 4 \n", + " 3261 -0.075314 -0.435398 -0.04151 3 7 5 2 4 \n", + " 7766 -0.075314 -0.435398 -0.04151 2 8 3 2 4 \n", + " 1183 -0.075314 -0.435398 -0.04151 1 7 2 1 4 \n", + " 3307 -0.075314 -0.435398 -0.04151 1 1 5 2 2 \n", + " 7931 -0.075314 -0.435398 -0.04151 2 7 5 2 4 \n", + " 3473 -0.075314 0.836751 -0.04151 1 8 4 2 2 \n", + " 1603 -0.075314 -0.435398 -0.04151 1 7 3 1 4 \n", + " 2287 -0.075314 -0.435398 -0.04151 3 1 5 3 4 \n", + " 3838 -0.075314 -0.435398 -0.04151 1 8 2 2 4 \n", + " 1084 -0.075314 -0.435398 -0.04151 2 7 5 2 4 \n", + " 5418 -0.075314 -0.435398 -0.04151 2 8 3 2 4 \n", + " 8159 -0.075314 3.381048 -0.04151 1 8 5 2 4 \n", + " 8410 -0.075314 -0.435398 -0.04151 1 8 3 2 4 \n", + " 416 -0.075314 -0.435398 -0.04151 2 6 2 2 4 \n", + " 3653 -0.075314 -0.435398 -0.04151 1 8 4 2 4 \n", + " ... ... ... ... ... ... ... ... ... \n", + " 1669 -0.075314 -0.435398 -0.04151 1 1 5 2 2 \n", + " 2112 -0.075314 0.836751 -0.04151 1 2 4 3 3 \n", + " 8035 -0.075314 -0.435398 -0.04151 1 1 5 2 2 \n", + " 6066 -0.075314 -0.435398 -0.04151 3 8 4 2 4 \n", + " 7015 -0.075314 -0.435398 -0.04151 2 8 1 2 4 \n", + " 5404 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 84 -0.075314 -0.435398 -0.04151 2 8 5 2 4 \n", + " 6086 -0.075314 -0.435398 -0.04151 2 1 1 2 3 \n", + " 7229 -0.075314 -0.435398 -0.04151 2 1 5 2 3 \n", + " 2283 -0.075314 0.836751 -0.04151 1 8 5 2 2 \n", + " 2887 -0.075314 -0.435398 -0.04151 2 8 3 2 4 \n", + " 8131 -0.075314 -0.435398 -0.04151 3 8 1 3 4 \n", + " 5281 -0.075314 -0.435398 -0.04151 2 7 5 2 4 \n", + " 418 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 7888 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 7367 -0.075314 -0.435398 -0.04151 2 1 5 2 2 \n", + " 2709 -0.075314 -0.435398 -0.04151 3 8 3 2 4 \n", + " 2357 -0.075314 -0.435398 -0.04151 3 7 3 2 2 \n", + " 6578 -0.075314 -0.435398 -0.04151 1 8 3 2 4 \n", + " 1366 -0.075314 -0.435398 -0.04151 1 8 5 2 4 \n", + " 293 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 9325 -0.075314 0.836751 -0.04151 1 8 5 2 4 \n", + " 8869 -0.075314 -0.435398 -0.04151 1 7 3 2 2 \n", + " 1506 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " 8468 -0.075314 3.381048 -0.04151 1 8 3 2 2 \n", + " 8405 -0.075314 -0.435398 -0.04151 3 7 3 2 2 \n", + " 1634 -0.075314 -0.435398 -0.04151 1 1 3 2 4 \n", + " 4011 -0.075314 3.381048 -0.04151 3 8 5 1 4 \n", + " 8290 -0.075314 -0.435398 -0.04151 1 8 1 2 2 \n", + " 3315 -0.075314 -0.435398 -0.04151 1 7 5 2 4 \n", + " \n", + " [3096 rows x 46 columns],\n", + " 5667 0\n", + " 7263 0\n", + " 1563 0\n", + " 4950 0\n", + " 8030 0\n", + " 7040 0\n", + " 1979 0\n", + " 1883 0\n", + " 438 0\n", + " 9049 0\n", + " 3794 0\n", + " 8849 0\n", + " 4113 0\n", + " 2986 0\n", + " 6828 0\n", + " 8640 0\n", + " 2280 0\n", + " 8756 0\n", + " 7536 0\n", + " 4712 0\n", + " 373 0\n", + " 4539 0\n", + " 8644 0\n", + " 2585 0\n", + " 1332 0\n", + " 7838 0\n", + " 9047 0\n", + " 3774 0\n", + " 1784 0\n", + " 9374 0\n", + " ..\n", + " 2734 0\n", + " 189 0\n", + " 9167 0\n", + " 2747 0\n", + " 2047 1\n", + " 7849 0\n", + " 2558 0\n", + " 9274 0\n", + " 8666 0\n", + " 6396 0\n", + " 3385 0\n", + " 4555 0\n", + " 1184 0\n", + " 6420 0\n", + " 5051 0\n", + " 5311 0\n", + " 2433 0\n", + " 6949 0\n", + " 769 0\n", + " 1685 0\n", + " 8322 0\n", + " 5578 0\n", + " 4426 0\n", + " 466 0\n", + " 6265 0\n", + " 5734 0\n", + " 5191 0\n", + " 5390 0\n", + " 860 0\n", + " 7270 0\n", + " Name: target, dtype: float64,\n", + " 2260 0\n", + " 4428 0\n", + " 5304 0\n", + " 2246 0\n", + " 6069 0\n", + " 8435 0\n", + " 360 0\n", + " 2802 0\n", + " 7690 0\n", + " 7143 0\n", + " 169 1\n", + " 9188 0\n", + " 4344 0\n", + " 3208 0\n", + " 483 0\n", + " 3261 0\n", + " 7766 0\n", + " 1183 0\n", + " 3307 0\n", + " 7931 0\n", + " 3473 0\n", + " 1603 0\n", + " 2287 0\n", + " 3838 0\n", + " 1084 0\n", + " 5418 0\n", + " 8159 0\n", + " 8410 0\n", + " 416 0\n", + " 3653 0\n", + " ..\n", + " 1669 1\n", + " 2112 0\n", + " 8035 0\n", + " 6066 0\n", + " 7015 0\n", + " 5404 0\n", + " 84 0\n", + " 6086 0\n", + " 7229 0\n", + " 2283 0\n", + " 2887 0\n", + " 8131 0\n", + " 5281 0\n", + " 418 0\n", + " 7888 0\n", + " 7367 0\n", + " 2709 0\n", + " 2357 0\n", + " 6578 0\n", + " 1366 0\n", + " 293 0\n", + " 9325 0\n", + " 8869 0\n", + " 1506 1\n", + " 8468 0\n", + " 8405 0\n", + " 1634 0\n", + " 4011 0\n", + " 8290 0\n", + " 3315 0\n", + " Name: target, dtype: float64,\n", + " array([[ 53.428571, 1028.571429, 82.742857, 0. ],\n", + " [ 171.714286, 1885.714286, 308.994286, 0. ],\n", + " [ 148.571429, 1628.571429, 267.131429, 0. ],\n", + " ..., \n", + " [ 110. , 1200. , 197.36 , 0. ],\n", + " [ 87.714286, 1714.285714, 136.571429, 0. ],\n", + " [ 62. , 1200. , 96.2 , 0. ]]),\n", + " array([[ 62. , 1200. , 96.2 , 0. ],\n", + " [ 87.714286, 1714.285714, 136.571429, 0. ],\n", + " [ 53.428571, 1028.571429, 82.742857, 0. ],\n", + " ..., \n", + " [ 66.285714, 1285.714286, 102.928571, 0. ],\n", + " [ 74. , 1028.571429, 121.828571, 0. ],\n", + " [ 66.285714, 1285.714286, 102.928571, 0. ]])]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "temp" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2939\n", + "1 157\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_test.value_counts(normalize=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5991\n", + "1 292\n", + "Name: target, dtype: int64" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_train.value_counts(normalize=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# Bagging y Minimum risk\n" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n", + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n" + ] + } + ], + "source": [ + "from sklearn.ensemble import BaggingClassifier\n", + "cost_loss_tot=0\n", + "\n", + "res =np.zeros(100)\n", + "res1=np.zeros(100)\n", + "res2=np.zeros(100)\n", + "\n", + "for w in range (1,101):\n", + " \n", + " \n", + "\n", + " dt = DecisionTreeClassifier(max_depth=4,random_state=42)\n", + " bagreg = BaggingClassifier(dt, n_estimators=w, \n", + " bootstrap=True, oob_score=True, random_state=1)\n", + "\n", + " bagreg.fit(X_train, y_train)\n", + " \n", + " y_pred = bagreg.predict_proba(X_test)\n", + "\n", + " # 'C_FP','C_FN','C_TP','C_TN'\n", + "\n", + " y_pred_metacost = np.zeros(X_test.shape[0])\n", + "\n", + "\n", + " for i in range(X_test.shape[0]):\n", + " c_j0 = y_pred[i, 0] * cost_mat_test[i, 3] + y_pred[i, 1]* cost_mat_test[i, 1]\n", + " c_j1 = y_pred[i, 0] * cost_mat_test[i, 0] + y_pred[i, 1]* cost_mat_test[i, 2]\n", + " \n", + " if c_j0 > c_j1:\n", + " y_pred_metacost[i] = 1\n", + "\n", + " y_pred_bagging = bagreg.predict(X_test)\n", + "\n", + " y_pred_bagging\n", + "\n", + " from costcla.metrics import savings_score, cost_loss \n", + "\n", + " cost_loss1=cost_loss(y_test, y_pred_bagging, cost_mat_test)\n", + "\n", + " cost_loss2=cost_loss(y_test, y_pred_metacost, cost_mat_test)\n", + " \n", + " cost_loss_tot=cost_loss1-cost_loss2\n", + "\n", + " res[w-1] =cost_loss_tot\n", + " res1[w-1]=cost_loss1\n", + " res2[w-1]=cost_loss2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RESULTADOS METACOST" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "150477.54279500002" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_loss2=cost_loss(y_test, y_pred_metacost, cost_mat_test)\n", + "cost_loss2" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Valor Metacost
0140836.642812
1141260.928524
2146430.499941
3147687.094226
4142435.642808
\n", + "
" + ], + "text/plain": [ + " Valor Metacost\n", + "0 140836.642812\n", + "1 141260.928524\n", + "2 146430.499941\n", + "3 147687.094226\n", + "4 142435.642808" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2=DataFrame(res2,columns=['Valor Metacost'])\n", + "res2.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Metacost 139281.471377\n", + "dtype: float64" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Metacost 8\n", + "dtype: int64" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res2.idxmin()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "minimun=139281.471377" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RESULTADOS BAGGING" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "190800.00001999998" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + " cost_loss1=cost_loss(y_test, y_pred_bagging, cost_mat_test)\n", + " cost_loss1" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Valor Bagging
0190829.428591
1190800.000020
2191067.428592
3190800.000020
4190800.000020
\n", + "
" + ], + "text/plain": [ + " Valor Bagging\n", + "0 190829.428591\n", + "1 190800.000020\n", + "2 191067.428592\n", + "3 190800.000020\n", + "4 190800.000020" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res1=DataFrame(res1,columns=['Valor Bagging'])\n", + "res1.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Bagging 190800.00002\n", + "dtype: float64" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res1.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Bagging 1\n", + "dtype: int64" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res1.idxmin()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AHORRO METACOST-BAGGING" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Valor Ahorro
049992.785779
149539.071496
244636.928651
343112.905794
448364.357212
\n", + "
" + ], + "text/plain": [ + " Valor Ahorro\n", + "0 49992.785779\n", + "1 49539.071496\n", + "2 44636.928651\n", + "3 43112.905794\n", + "4 48364.357212" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res=DataFrame(res,columns=['Valor Ahorro'])\n", + "res.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 51518.528643\n", + "dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res.max()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 76\n", + "dtype: int64" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res.idxmin()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# REGRESIÓN LOGÍSTICA" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=10000000.0, class_weight=None, dual=False,\n", + " fit_intercept=True, intercept_scaling=1, max_iter=100,\n", + " multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,\n", + " solver='liblinear', tol=0.0001, verbose=0, warm_start=False)" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "logreg = LogisticRegression(C=1e7)\n", + "logreg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "\n", + "y_pred_log=logreg.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "minimunlog=cost_loss(y_test, y_pred_log, cost_mat_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "190800.00001999998" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "minimunlog" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AHORRO METACOST- REGRESIÓN LOGÍSTICA" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "51518.528642999969" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "minimunlog-minimun" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "collapsed": true + }, + "source": [ + "# RANDOM FOREST" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.ensemble import RandomForestClassifier\n", + "res3=np.zeros(100)\n", + "ran=10\n", + "\n", + "for r in range (1,101):\n", + " \n", + " ran=ran+10\n", + " rfclas = RandomForestClassifier(n_estimators=1000,random_state=ran, n_jobs=-1)\n", + " rfclas.fit(X_train,y_train)\n", + " y_pred_ran = rfclas.predict(X_test)\n", + "\n", + " cost_loss1=cost_loss(y_test, y_pred_ran, cost_mat_test)\n", + " res3[r-1] =cost_loss1\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "191768.17144599999" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_loss(y_test, y_pred_ran, cost_mat_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Valor Ahorro
0191689.885732
1191554.742875
2191640.742875
3191694.171446
4191640.742875
\n", + "
" + ], + "text/plain": [ + " Valor Ahorro\n", + "0 191689.885732\n", + "1 191554.742875\n", + "2 191640.742875\n", + "3 191694.171446\n", + "4 191640.742875" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res3=DataFrame(res3,columns=['Valor Ahorro'])\n", + "res3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 191468.742875\n", + "dtype: float64" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res3.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 41\n", + "dtype: int64" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res3.idxmin()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AHORRO METACOST-RADOM FOREST" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "52272.52862299999" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "191554-minimun" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NAYVE BAYES " + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "gnb = GaussianNB()\n", + "gnb.fit(X_train,y_train)\n", + "y_pred_navy=gnb.predict(X_test)\n", + "pred = gnb.predict_proba(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "247910.46270899998" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_loss(y_test, y_pred_navy, cost_mat_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AHORRO METACOST-NAYVE BAYES" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "52273.27149799999" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "191554.742875-139281.471377" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KNEIGHBORS CLASSIFIER" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "res4=np.zeros(40)\n", + "ran=20\n", + "\n", + "for r in range (1,41):\n", + " ran=ran+150\n", + " knn = KNeighborsClassifier(n_neighbors=ran)\n", + " knn.fit(X_train, y_train)\n", + " y_pred_knn=knn.predict(X_test)\n", + " pred = knn.predict_proba(X_test)\n", + " cost_loss1=cost_loss(y_test, y_pred_knn, cost_mat_test)\n", + " res4[r-1] =cost_loss1\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Valor Ahorro
0190800.00002
1190800.00002
2190800.00002
3190800.00002
4190800.00002
\n", + "
" + ], + "text/plain": [ + " Valor Ahorro\n", + "0 190800.00002\n", + "1 190800.00002\n", + "2 190800.00002\n", + "3 190800.00002\n", + "4 190800.00002" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res4=DataFrame(res4,columns=['Valor Ahorro'])\n", + "res4.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 190800.00002\n", + "dtype: float64" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res4.min()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Valor Ahorro 0\n", + "dtype: int64" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "res4.idxmin()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# AHORRO METACOST-KNEIGHBORS CLASSIFIER" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "190800.00002-139281.471377" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# EVALUACION DE MODELOS " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# METACOST - BAGGING" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\freddialexsander\\Anaconda3\\lib\\site-packages\\sklearn\\ensemble\\bagging.py:537: UserWarning: Some inputs do not have OOB scores. This probably means too few estimators were used to compute any reliable oob estimates.\n", + " warn(\"Some inputs do not have OOB scores. \"\n" + ] + } + ], + "source": [ + " from sklearn.ensemble import BaggingClassifier\n", + "\n", + "dt = DecisionTreeClassifier(max_depth=4,random_state=42)\n", + "bagreg = BaggingClassifier(dt, n_estimators=9, \n", + " bootstrap=True, oob_score=True, random_state=1)\n", + "\n", + "bagreg.fit(X_train, y_train)\n", + " \n", + "y_pred = bagreg.predict_proba(X_test)\n", + "\n", + " # 'C_FP','C_FN','C_TP','C_TN'\n", + "\n", + "y_pred_metacost = np.zeros(X_test.shape[0])\n", + "\n", + "\n", + "for i in range (1,3096):\n", + " c_j0 = y_pred[i, 0] * cost_mat_test[i, 3] + y_pred[i, 1]* cost_mat_test[i, 1]\n", + " c_j1 = y_pred[i, 0] * cost_mat_test[i, 0] + y_pred[i, 1]* cost_mat_test[i, 2]\n", + " \n", + " if c_j0 > c_j1:\n", + " y_pred_metacost[i] = 1\n", + "\n", + "y_pred_bagging = bagreg.predict(X_test)\n", + "\n", + "y_pred_bagging\n", + "\n", + "from costcla.metrics import savings_score, cost_loss \n", + "\n", + "cost_loss1=cost_loss(y_test, y_pred_bagging, cost_mat_test)\n", + "cost_loss2=cost_loss(y_test, y_pred_metacost, cost_mat_test)\n", + " \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "190800.00001999998" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_loss1" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "from sklearn.metrics import f1_score\n", + "from sklearn.metrics import log_loss" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "a=accuracy_score(y_test, y_pred_metacost)" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.96965116279069696" + ] + }, + "execution_count": 48, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.19441770933589989" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_metacost)" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "9.3377197442290782" + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_metacost)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.94928940568475451" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, y_pred_bagging)" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_bagging)" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.7514818779072931" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_bagging)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# REGRESIÓN LOGISTICA" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "LogisticRegression(C=10000000.0, class_weight=None, dual=False,\n", + " fit_intercept=True, intercept_scaling=1, max_iter=100,\n", + " multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,\n", + " solver='liblinear', tol=0.0001, verbose=0, warm_start=False)" + ] + }, + "execution_count": 61, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "logreg = LogisticRegression(C=1e7)\n", + "logreg.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "y_pred_log=logreg.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.94928940568475451" + ] + }, + "execution_count": 63, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, y_pred_log)" + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_log)" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.7514818779072931" + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_log)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RANDOM FOREST" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "rfclas = RandomForestClassifier(n_estimators=1000,random_state=120, n_jobs=-1)\n", + "rfclas.fit(X_train,y_train)\n", + "y_pred_ran = rfclas.predict(X_test)\n", + "\n", + "cost_loss1=cost_loss(y_test, y_pred_ran, cost_mat_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "191554.742875" + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cost_loss1" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.94186046511627908" + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, y_pred_ran)" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.01098901098901099" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_ran)" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "2.0080745934826387" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_ran)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# NAVY BAYES" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "gnb = GaussianNB()\n", + "gnb.fit(X_train,y_train)\n", + "y_pred_navy=gnb.predict(X_test)\n", + "pred = gnb.predict_proba(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.11531007751937984" + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, y_pred_navy)" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.099309437684972024" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_navy)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "30.556813257570116" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_navy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# KNEIGHBORS CLASSIFIER" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "knn = KNeighborsClassifier(n_neighbors=160)\n", + "knn.fit(X_train, y_train)\n", + "y_pred_knn=knn.predict(X_test)\n", + "pred = knn.predict_proba(X_test)\n", + "cost_loss1=cost_loss(y_test, y_pred_knn, cost_mat_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "0.94928940568475451" + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "accuracy_score(y_test, y_pred_knn)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\fbaron\\AppData\\Local\\Continuum\\Anaconda3\\lib\\site-packages\\sklearn\\metrics\\classification.py:1074: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n", + " 'precision', 'predicted', average, warn_for)\n" + ] + }, + { + "data": { + "text/plain": [ + "0.0" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f1_score(y_test, y_pred_knn)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": { + "collapsed": false + }, + "outputs": [ + { + "data": { + "text/plain": [ + "1.7514818779072931" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "log_loss(y_test, y_pred_knn)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# GRAFÍCAS" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "Modelos = (\"MetaCost\",\"Bagging\",\"R.Log\", \"R.Forest\", \"N.Bayes\",\"knn\")\n", + "posicion_y = np.arange(len(Modelos))\n", + "unidades = (139281.471377, 190800.00002,190800.00001999998,191468.742875,247910.46270899998, 190800.00002)\n", + "plt.barh(posicion_y, unidades, align = \"center\")\n", + "plt.yticks(posicion_y, paises)\n", + "plt.xlabel(\"DOLARES\")\n", + "plt.title(\"COSTO MÍNIMO\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "Modelos = (\"MetaCost\",\"Bagging\",\"R.Log\", \"R.Forest\", \"N.Bayes\",\"knn\")\n", + "posicion_y = np.arange(len(Modelos))\n", + "unidades = (0.96965116279069696, 0.94928940568475451,0.94928940568475451,0.94186046511627908,0.11531007751937984,0.94928940568475451)\n", + "plt.barh(posicion_y, unidades, align = \"center\")\n", + "plt.yticks(posicion_y, paises)\n", + "plt.xlabel('ACCURACY')\n", + "plt.title(\"EVALUACIÓN DE MODELOS\")\n", + "plt.show()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.5.1" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}