Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Laboratorio de testeo de hipotesis #225

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 109 additions & 23 deletions lab-hypothesis-testing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -51,7 +51,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -278,14 +278,14 @@
"[800 rows x 11 columns]"
]
},
"execution_count": 3,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
"df"
"pokemon = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/pokemon.csv\")\n",
"pokemon"
]
},
{
Expand All @@ -297,11 +297,34 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 18,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"El valor del test es 3.590444254130357 y el valor de p es 0.0002567969150153481\n",
"Se rechaza la hipotesis al tener el tipo dragon un Hp mayor al tipo planta.\n"
]
}
],
"source": [
"#code here"
"# Filtramos:\n",
"t_dragon = pokemon[pokemon['Type 1'] == 'Dragon']['HP']\n",
"t_cesped = pokemon[pokemon['Type 1'] == 'Grass']['HP']\n",
"\n",
"# Hacemos T Student:\n",
"t_stat, p_value = st.ttest_ind(t_dragon, t_cesped, alternative='greater')\n",
"\n",
"# Obtenemos los resultados: \n",
"print(f\"El valor del test es {t_stat} y el valor de p es {p_value}\")\n",
"\n",
"# Evaluamos el valor p: \n",
"if p_value < 0.5:\n",
" print(\"Se rechaza la hipotesis al tener el tipo dragon un Hp mayor al tipo planta.\")\n",
"else:\n",
" print(\"No se rechaza la hipotesis, pero por falta de datos.\")"
]
},
{
Expand All @@ -313,11 +336,52 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"#code here"
"pokemon.info()"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Comparacion para ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed'], el valor de t_student es 11.47504444631443, para p es 1.0490163118824507e-18\n",
"Se rechaza la hipotesis debido que hay una diferencia significativa entre ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed'] de los legendarios y los no legendarios.\n"
]
}
],
"source": [
"# Filtrado: \n",
"t_legendario = pokemon[pokemon['Legendary'] == True]\n",
"n_t_lengendario = pokemon[pokemon['Legendary'] == False]\n",
"\n",
"# Atributos a comparar: \n",
"atributos = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']\n",
"\n",
"# Haceos t_student para cada atributo:\n",
"ajustes = 0.05 / len(atributos)\n",
"for atributo in atributos: \n",
" stadist_legendario = t_legendario[atributo]\n",
" stadist_no_legendario = n_t_lengendario[atributo]\n",
"\n",
"# Hacemos t-student:\n",
" t_stat, p_value = st.ttest_ind(stadist_legendario, stadist_no_legendario, equal_var=False) \n",
"\n",
"# Revisamos valores:\n",
"print(f\"Comparacion para {atributos}, el valor de t_student es {t_stat}, para p es {p_value}\")\n",
"\n",
"# Veamos si aceptamos o rechazmos la hipotesis: \n",
"if p_value < ajustes:\n",
" print(f\"Se rechaza la hipotesis debido que hay una diferencia significativa entre {atributos} de los legendarios y los no legendarios.\")\n",
"else: \n",
" print(\"No se rechaza la hipotesis al no haber una diferencia signficativa entre categorías.\")"
]
},
{
Expand All @@ -337,7 +401,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 34,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -453,14 +517,14 @@
"4 624.0 262.0 1.9250 65500.0 "
]
},
"execution_count": 5,
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
"df.head()"
"distancias = pd.read_csv(\"https://raw.githubusercontent.com/data-bootcamp-v4/data/main/california_housing.csv\")\n",
"distancias.head()"
]
},
{
Expand All @@ -486,14 +550,36 @@
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"# Se declaran las distancias:\n",
"coor_escuela = (-118, 34)\n",
"coor_hosp = (-122, 37)\n",
"\n",
"# Se crea la función para calcular la distancia euclidea:\n",
"def dist_eu (x1, y1, x2, y2):\n",
" return np.sqrt((x2 -x1)**2 + (y2 - y1)**2)\n",
"\n",
"# Calculamos las distancias de cada casa con el hospital y el colegio: \n",
"distancias['d_hospital'] = distancias.apply(lambda row: dist_eu(row['X'], row['Y'], coor_hosp[0], coor_hosp[1]), axis=1)\n",
"distancias['d_hospital'] = distancias.apply(lambda row: dist_eu(row['X'], row['Y'], coor_escuela[0], coor_escuela[1]), axis=1)\n",
"\n",
"# Clasificamos las casas como \"lejos\" o \"cerca\":\n",
"distancias['cerca'] = ((distancias['d_hospital'] < 0.5) | (distancias['d_escuela'] < 0.5))\n",
"\n",
"# Dividimos el dataset en dos: \n",
"cerca = distancias[distancias['Cerca'] == True]['Price']\n",
"lejos = distancias[distancias['Lejos'] == True]['Price']\n",
"\n",
"# ##\n",
"t_stat, p_value = st.ttest_ind(cerca, lejos, equal_var=False)\n",
"print(f\"Valor de t {t_stat}, valor de p {p_value}\")\n",
"\n",
"if p_value < 0.5:\n",
" print(\"Se rechaza la hipotesis nula.\")\n",
"else: \n",
" print(\"No se rechaza la hipotesis.\")\n",
"\n"
]
}
],
"metadata": {
Expand All @@ -512,7 +598,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.12.5"
}
},
"nbformat": 4,
Expand Down