From bb302a4d12e81f1ef2b86d376d6479259e778433 Mon Sep 17 00:00:00 2001 From: vdiazpliego Date: Tue, 1 Jun 2021 17:58:39 +0200 Subject: [PATCH] pr --- ...Supervised Classification-checkpoint.ipynb | 340 ++++++++++++++++++ Supervised Classification.ipynb | 246 ++++++++++++- 2 files changed, 582 insertions(+), 4 deletions(-) create mode 100644 .ipynb_checkpoints/Supervised Classification-checkpoint.ipynb diff --git a/.ipynb_checkpoints/Supervised Classification-checkpoint.ipynb b/.ipynb_checkpoints/Supervised Classification-checkpoint.ipynb new file mode 100644 index 0000000..d32c0f8 --- /dev/null +++ b/.ipynb_checkpoints/Supervised Classification-checkpoint.ipynb @@ -0,0 +1,340 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Supervised classification\n", + "\n", + "In the data.csv there are letters (uppercases and lowercases) and numbers, 28x28 pixels in a row format.\n", + "\n", + "* First, you need to know which labels are which, meaning you need to visualize some data to realize which number labels represents a letter, or a number." + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comenzando...\n", + "\n", + "Modelo SoftMax (MLR) : Numeros\n" + ] + } + ], + "source": [ + "# Import libraries\n", + "import zipfile\n", + "zf=zipfile.ZipFile('data_all.zip')\n", + "import time # para tiempo\n", + "inicio=time.time() # momento inicial\n", + "print ('Comenzando...\\n')\n", + "print ('Modelo SoftMax (MLR) : Numeros') \n", + "import pandas as pd # dataframe\n", + "import numpy as np # numerical python, algebra lineal\n", + "\n", + "import matplotlib.pyplot as plt # plots, graficos\n", + "import seaborn as sns # plots\n", + "%matplotlib inline\n", + "\n", + "from sklearn.metrics import confusion_matrix # metricas, matriz de confusion\n", + "from scipy.optimize import minimize # minimizar, opt\n", + "\n", + "from sklearn.datasets import make_circles, load_boston # datasets\n", + "from sklearn.model_selection import train_test_split as tts\n", + "\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "import warnings # avisos\n", + "warnings.filterwarnings('ignore', category=RuntimeWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
e123456789...775776777778779780781782783784
036.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
23.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
333.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
430.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 785 columns

\n", + "
" + ], + "text/plain": [ + " e 1 2 3 4 5 6 7 8 9 ... 775 776 777 778 \\\n", + "0 36.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "2 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "3 33.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "4 30.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "\n", + " 779 780 781 782 783 784 \n", + "0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 785 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df=pd.read_csv(zf.open('data_all.csv'), nrows=1e5)\n", + "\n", + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Now, try to train a classifier model to predict the uppercases. Use every single model you know for classification." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Try to do the same thing with lowercases." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "* Try to do the same thing with numbers." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Your code here" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/Supervised Classification.ipynb b/Supervised Classification.ipynb index cdfc860..d32c0f8 100755 --- a/Supervised Classification.ipynb +++ b/Supervised Classification.ipynb @@ -13,11 +13,249 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comenzando...\n", + "\n", + "Modelo SoftMax (MLR) : Numeros\n" + ] + } + ], + "source": [ + "# Import libraries\n", + "import zipfile\n", + "zf=zipfile.ZipFile('data_all.zip')\n", + "import time # para tiempo\n", + "inicio=time.time() # momento inicial\n", + "print ('Comenzando...\\n')\n", + "print ('Modelo SoftMax (MLR) : Numeros') \n", + "import pandas as pd # dataframe\n", + "import numpy as np # numerical python, algebra lineal\n", + "\n", + "import matplotlib.pyplot as plt # plots, graficos\n", + "import seaborn as sns # plots\n", + "%matplotlib inline\n", + "\n", + "from sklearn.metrics import confusion_matrix # metricas, matriz de confusion\n", + "from scipy.optimize import minimize # minimizar, opt\n", + "\n", + "from sklearn.datasets import make_circles, load_boston # datasets\n", + "from sklearn.model_selection import train_test_split as tts\n", + "\n", + "from sklearn.model_selection import GridSearchCV\n", + "from sklearn.model_selection import RandomizedSearchCV\n", + "\n", + "import warnings # avisos\n", + "warnings.filterwarnings('ignore', category=RuntimeWarning)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
e123456789...775776777778779780781782783784
036.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
10.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
23.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
333.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
430.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 785 columns

\n", + "
" + ], + "text/plain": [ + " e 1 2 3 4 5 6 7 8 9 ... 775 776 777 778 \\\n", + "0 36.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "2 3.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "3 33.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "4 30.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 \n", + "\n", + " 779 780 781 782 783 784 \n", + "0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 785 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "# Import libraries" + "df=pd.read_csv(zf.open('data_all.csv'), nrows=1e5)\n", + "\n", + "df.head()" ] }, { @@ -94,7 +332,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.5" } }, "nbformat": 4,