diff --git a/lab-hypothesis-testing.ipynb b/lab-hypothesis-testing.ipynb index 0cc26d5..dec4443 100644 --- a/lab-hypothesis-testing.ipynb +++ b/lab-hypothesis-testing.ipynb @@ -38,7 +38,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -278,14 +278,14 @@ "[800 rows x 11 columns]" ] }, - "execution_count": 3, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n", - "df" + "pokemon = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n", + "pokemon" ] }, { @@ -297,11 +297,34 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "El valor del test es 3.590444254130357 y el valor de p es 0.0002567969150153481\n", + "Se rechaza la hipotesis al tener el tipo dragon un Hp mayor al tipo planta.\n" + ] + } + ], "source": [ - "#code here" + "# Filtramos:\n", + "t_dragon = pokemon[pokemon['Type 1'] == 'Dragon']['HP']\n", + "t_cesped = pokemon[pokemon['Type 1'] == 'Grass']['HP']\n", + "\n", + "# Hacemos T Student:\n", + "t_stat, p_value = st.ttest_ind(t_dragon, t_cesped, alternative='greater')\n", + "\n", + "# Obtenemos los resultados: \n", + "print(f\"El valor del test es {t_stat} y el valor de p es {p_value}\")\n", + "\n", + "# Evaluamos el valor p: \n", + "if p_value < 0.5:\n", + " print(\"Se rechaza la hipotesis al tener el tipo dragon un Hp mayor al tipo planta.\")\n", + "else:\n", + " print(\"No se rechaza la hipotesis, pero por falta de datos.\")" ] }, { @@ -313,11 +336,52 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "#code here" + "pokemon.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Comparacion para ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed'], el valor de t_student es 11.47504444631443, para p es 1.0490163118824507e-18\n", + "Se rechaza la hipotesis debido que hay una diferencia significativa entre ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed'] de los legendarios y los no legendarios.\n" + ] + } + ], + "source": [ + "# Filtrado: \n", + "t_legendario = pokemon[pokemon['Legendary'] == True]\n", + "n_t_lengendario = pokemon[pokemon['Legendary'] == False]\n", + "\n", + "# Atributos a comparar: \n", + "atributos = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n", + "\n", + "# Haceos t_student para cada atributo:\n", + "ajustes = 0.05 / len(atributos)\n", + "for atributo in atributos: \n", + " stadist_legendario = t_legendario[atributo]\n", + " stadist_no_legendario = n_t_lengendario[atributo]\n", + "\n", + "# Hacemos t-student:\n", + " t_stat, p_value = st.ttest_ind(stadist_legendario, stadist_no_legendario, equal_var=False) \n", + "\n", + "# Revisamos valores:\n", + "print(f\"Comparacion para {atributos}, el valor de t_student es {t_stat}, para p es {p_value}\")\n", + "\n", + "# Veamos si aceptamos o rechazmos la hipotesis: \n", + "if p_value < ajustes:\n", + " print(f\"Se rechaza la hipotesis debido que hay una diferencia significativa entre {atributos} de los legendarios y los no legendarios.\")\n", + "else: \n", + " print(\"No se rechaza la hipotesis al no haber una diferencia signficativa entre categorías.\")" ] }, { @@ -337,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -453,14 +517,14 @@ "4 624.0 262.0 1.9250 65500.0 " ] }, - "execution_count": 5, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n", - "df.head()" + "distancias = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n", + "distancias.head()" ] }, { @@ -486,14 +550,36 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "# Se declaran las distancias:\n", + "coor_escuela = (-118, 34)\n", + "coor_hosp = (-122, 37)\n", + "\n", + "# Se crea la función para calcular la distancia euclidea:\n", + "def dist_eu (x1, y1, x2, y2):\n", + " return np.sqrt((x2 -x1)**2 + (y2 - y1)**2)\n", + "\n", + "# Calculamos las distancias de cada casa con el hospital y el colegio: \n", + "distancias['d_hospital'] = distancias.apply(lambda row: dist_eu(row['X'], row['Y'], coor_hosp[0], coor_hosp[1]), axis=1)\n", + "distancias['d_hospital'] = distancias.apply(lambda row: dist_eu(row['X'], row['Y'], coor_escuela[0], coor_escuela[1]), axis=1)\n", + "\n", + "# Clasificamos las casas como \"lejos\" o \"cerca\":\n", + "distancias['cerca'] = ((distancias['d_hospital'] < 0.5) | (distancias['d_escuela'] < 0.5))\n", + "\n", + "# Dividimos el dataset en dos: \n", + "cerca = distancias[distancias['Cerca'] == True]['Price']\n", + "lejos = distancias[distancias['Lejos'] == True]['Price']\n", + "\n", + "# ##\n", + "t_stat, p_value = st.ttest_ind(cerca, lejos, equal_var=False)\n", + "print(f\"Valor de t {t_stat}, valor de p {p_value}\")\n", + "\n", + "if p_value < 0.5:\n", + " print(\"Se rechaza la hipotesis nula.\")\n", + "else: \n", + " print(\"No se rechaza la hipotesis.\")\n", + "\n" + ] } ], "metadata": { @@ -512,7 +598,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.12.5" } }, "nbformat": 4,