diff --git a/lab-sql-9.sql b/lab-sql-9.sql new file mode 100644 index 0000000..d182adb --- /dev/null +++ b/lab-sql-9.sql @@ -0,0 +1,50 @@ +-- In this lab we will find the customers who were active in consecutive months of May and June. Follow the steps to complete the analysis. +-- Create a table rentals_may to store the data from rental table with information for the month of May. + +-- + +select *, convert(rental_date,date) as month +from sakila.rental +; +-- creating a table with only the month of may +CREATE TABLE rentals_may AS +-- Insert values in the table rentals_may using the table rental, filtering values only for the month of May. +select * , +date_format(convert(rental_date ,date),'%M') as month +from sakila.rental +having month = 'May'; + +-- viewing the table for May + +select * +from sakila.rentals_may; + +-- Create a table rentals_june to store the data from rental table with information for the month of June. +CREATE TABLE rentals_june AS +-- Insert values in the table rentals_june using the table rental, filtering values only for the month of June. +select * , +date_format(convert(rental_date ,date),'%M') as month +from sakila.rental +having month = 'June'; + +-- viewing the table for June +select * +from sakila.rentals_june; + +-- Check the number of rentals for each customer for May. +-- looking to table customer +select * +from sakila.customer; + +-- Use Join with customer ID, and groupby customer, to get rental of each customer fro MAy +SELECT c.customer_id, count(r.rental_id) as rental_cust +FROM sakila.customer c +LEFT JOIN sakila.rentals_May r ON c.customer_id = r.customer_id +GROUP BY c.customer_id ; + + +-- Now same thing for the month of June +SELECT c.customer_id, count(r.rental_id) as rental_cust +FROM sakila.customer c +LEFT JOIN sakila.rentals_june r ON c.customer_id = r.customer_id +GROUP BY c.customer_id ; diff --git a/lab-sql-9_Python.ipynb b/lab-sql-9_Python.ipynb new file mode 100644 index 0000000..e3391a5 --- /dev/null +++ b/lab-sql-9_Python.ipynb @@ -0,0 +1,1265 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' !pip install mysql.connector\\n!pip install mysql.connector.python\\n!pip install sqlalchemy\\n!pip install pymysql '" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "''' !pip install mysql.connector\n", + "!pip install mysql.connector.python\n", + "!pip install sqlalchemy\n", + "!pip install pymysql '''" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd\n" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "import getpass \n", + "password = getpass.getpass('Mickimitamoin12')" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updatemonth
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53May
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53May
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53May
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53May
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53May
...........................
115111532005-05-31 21:36:4427255062005-06-10 01:26:4422006-02-15 21:30:53May
115211542005-05-31 21:42:092732592005-06-08 16:40:0912006-02-15 21:30:53May
115311552005-05-31 22:17:1120482512005-06-04 20:27:1122006-02-15 21:30:53May
115411562005-05-31 22:37:344601062005-06-01 23:02:3422006-02-15 21:30:53May
115511572005-05-31 22:47:451449612005-06-02 18:01:4512006-02-15 21:30:53May
\n", + "

1156 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "1151 1153 2005-05-31 21:36:44 2725 506 \n", + "1152 1154 2005-05-31 21:42:09 2732 59 \n", + "1153 1155 2005-05-31 22:17:11 2048 251 \n", + "1154 1156 2005-05-31 22:37:34 460 106 \n", + "1155 1157 2005-05-31 22:47:45 1449 61 \n", + "\n", + " return_date staff_id last_update month \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 May \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 May \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 May \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 May \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 May \n", + "... ... ... ... ... \n", + "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 May \n", + "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 May \n", + "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 May \n", + "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 May \n", + "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 May \n", + "\n", + "[1156 rows x 8 columns]" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "\n", + "engine = create_engine(connection_string)\n", + "\n", + "result = pd.read_sql_query('SELECT* FROM sakila.rentals_may', engine)\n", + "\n", + "result\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updatemonth
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:53May
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:53May
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:53May
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:53May
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:53May
...........................
115111532005-05-31 21:36:4427255062005-06-10 01:26:4422006-02-15 21:30:53May
115211542005-05-31 21:42:092732592005-06-08 16:40:0912006-02-15 21:30:53May
115311552005-05-31 22:17:1120482512005-06-04 20:27:1122006-02-15 21:30:53May
115411562005-05-31 22:37:344601062005-06-01 23:02:3422006-02-15 21:30:53May
115511572005-05-31 22:47:451449612005-06-02 18:01:4512006-02-15 21:30:53May
\n", + "

1156 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "1151 1153 2005-05-31 21:36:44 2725 506 \n", + "1152 1154 2005-05-31 21:42:09 2732 59 \n", + "1153 1155 2005-05-31 22:17:11 2048 251 \n", + "1154 1156 2005-05-31 22:37:34 460 106 \n", + "1155 1157 2005-05-31 22:47:45 1449 61 \n", + "\n", + " return_date staff_id last_update month \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 May \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 May \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 May \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 May \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 May \n", + "... ... ... ... ... \n", + "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 May \n", + "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 May \n", + "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 May \n", + "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 May \n", + "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 May \n", + "\n", + "[1156 rows x 8 columns]" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the number of rentals for each customer for May\n", + "\n", + "df_rental_may = result \n", + "\n", + "df_rental_may \n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updatemonth
011582005-06-14 22:53:3316324162005-06-18 21:37:3322006-02-15 21:30:53June
111592005-06-14 22:55:1343955162005-06-17 02:11:1312006-02-15 21:30:53June
211602005-06-14 23:00:3427952392005-06-18 01:58:3422006-02-15 21:30:53June
311612005-06-14 23:07:0816902852005-06-21 17:12:0812006-02-15 21:30:53June
411622005-06-14 23:09:389873102005-06-23 22:00:3812006-02-15 21:30:53June
...........................
230634652005-06-21 22:10:0114885102005-06-30 21:35:0112006-02-15 21:30:53June
230734662005-06-21 22:13:333712262005-06-25 21:01:3322006-02-15 21:30:53June
230834672005-06-21 22:19:257295432005-06-27 00:03:2522006-02-15 21:30:53June
230934682005-06-21 22:43:4528991002005-06-30 01:49:4512006-02-15 21:30:53June
231034692005-06-21 22:48:5940871812005-06-28 19:32:5912006-02-15 21:30:53June
\n", + "

2311 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1158 2005-06-14 22:53:33 1632 416 \n", + "1 1159 2005-06-14 22:55:13 4395 516 \n", + "2 1160 2005-06-14 23:00:34 2795 239 \n", + "3 1161 2005-06-14 23:07:08 1690 285 \n", + "4 1162 2005-06-14 23:09:38 987 310 \n", + "... ... ... ... ... \n", + "2306 3465 2005-06-21 22:10:01 1488 510 \n", + "2307 3466 2005-06-21 22:13:33 371 226 \n", + "2308 3467 2005-06-21 22:19:25 729 543 \n", + "2309 3468 2005-06-21 22:43:45 2899 100 \n", + "2310 3469 2005-06-21 22:48:59 4087 181 \n", + "\n", + " return_date staff_id last_update month \n", + "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 June \n", + "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 June \n", + "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 June \n", + "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 June \n", + "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 June \n", + "... ... ... ... ... \n", + "2306 2005-06-30 21:35:01 1 2006-02-15 21:30:53 June \n", + "2307 2005-06-25 21:01:33 2 2006-02-15 21:30:53 June \n", + "2308 2005-06-27 00:03:25 2 2006-02-15 21:30:53 June \n", + "2309 2005-06-30 01:49:45 1 2006-02-15 21:30:53 June \n", + "2310 2005-06-28 19:32:59 1 2006-02-15 21:30:53 June \n", + "\n", + "[2311 rows x 8 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# now lets create a data frame for the info for June \n", + "\n", + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "\n", + "engine = create_engine(connection_string)\n", + "\n", + "df_rental_june = pd.read_sql_query('SELECT* FROM sakila.rentals_june', engine)\n", + "\n", + "df_rental_june " + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer_id\n", + "1 2\n", + "2 1\n", + "3 2\n", + "5 3\n", + "6 3\n", + " ..\n", + "594 4\n", + "595 1\n", + "596 6\n", + "597 2\n", + "599 1\n", + "Name: rental_id, Length: 520, dtype: int64" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the number of rentals for each customer for May using Python\n", + "\n", + "\n", + "df_rental_may.groupby('customer_id')['rental_id'].count()\n", + "\n", + "# Hint: You can store the results from the two queries in two separate dataframes.\n", + "\n", + "df_may_count = df_rental_may.groupby('customer_id')['rental_id'].count()\n", + "\n", + "df_may_count" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "customer_id\n", + "1 7\n", + "2 1\n", + "3 4\n", + "4 6\n", + "5 5\n", + " ..\n", + "595 2\n", + "596 2\n", + "597 3\n", + "598 1\n", + "599 4\n", + "Name: rental_id, Length: 590, dtype: int64" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Check the number of rentals for each customer for Juen\n", + "# same techniq\n", + "\n", + "df_rental_june.groupby('customer_id')['rental_id'].count()\n", + "\n", + "# Hint: You can store the results from the two queries in two separate dataframes.\n", + "\n", + "df_june_count = df_rental_june.groupby('customer_id')['rental_id'].count()\n", + "\n", + "df_june_count\n" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_id_mayrental_id_june
customer_id
127.0
211.0
324.0
535.0
634.0
.........
59446.0
59512.0
59662.0
59723.0
59914.0
\n", + "

520 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " rental_id_may rental_id_june\n", + "customer_id \n", + "1 2 7.0\n", + "2 1 1.0\n", + "3 2 4.0\n", + "5 3 5.0\n", + "6 3 4.0\n", + "... ... ...\n", + "594 4 6.0\n", + "595 1 2.0\n", + "596 6 2.0\n", + "597 2 3.0\n", + "599 1 4.0\n", + "\n", + "[520 rows x 2 columns]" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Write a function that checks if customer borrowed more or less films in the month of June as compared to May.\n", + "\n", + "# Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas dataframes. Here is a link to the documentation for the merge function.\n", + "df_may_count = pd.DataFrame(df_may_count)\n", + "\n", + "\n", + "merged_df = df_may_count.merge(df_june_count, on='customer_id', suffixes=('_may', '_june'), how='left')\n", + "\n", + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_id_mayrental_id_junerental_total
customer_id
127.0-5.0
211.00.0
324.0-2.0
535.0-2.0
634.0-1.0
............
59446.0-2.0
59512.0-1.0
59662.04.0
59723.0-1.0
59914.0-3.0
\n", + "

520 rows × 3 columns

\n", + "
" + ], + "text/plain": [ + " rental_id_may rental_id_june rental_total\n", + "customer_id \n", + "1 2 7.0 -5.0\n", + "2 1 1.0 0.0\n", + "3 2 4.0 -2.0\n", + "5 3 5.0 -2.0\n", + "6 3 4.0 -1.0\n", + "... ... ... ...\n", + "594 4 6.0 -2.0\n", + "595 1 2.0 -1.0\n", + "596 6 2.0 4.0\n", + "597 2 3.0 -1.0\n", + "599 1 4.0 -3.0\n", + "\n", + "[520 rows x 3 columns]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "merged_df['rental_total'] = merged_df['rental_id_may'] - merged_df['rental_id_june']\n", + "\n", + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Count of June is the bigger: 358 rental\n" + ] + } + ], + "source": [ + "# creating New column to evaluate if month of may got more rental than June month\n", + "\n", + "\n", + "merged_df['rental_total'] = merged_df['rental_id_may'] - merged_df['rental_id_june']\n", + "\n", + "Borrow_month = []\n", + "\n", + "\n", + "count_june = 0\n", + "count_may = 0\n", + "\n", + "# function to define wich month customer did the most rental\n", + "def compare_rental_counts(row):\n", + "\n", + " global count_june, count_may\n", + "\n", + " if row ['rental_total'] < 0:\n", + " Borrow_month.append( 'June')\n", + " # count location total for June month\n", + " count_june += 1 \n", + " else :\n", + " Borrow_month.append('May')\n", + " # count location total for May mont\n", + " count_may += 1\n", + " \n", + "\n", + "\n", + "merged_df.apply(compare_rental_counts, axis=1)\n", + "\n", + "# choose whcih month had the more rental\n", + "if count_june > count_may :\n", + " print(f\"Count of June is the bigger: {count_june} rental\")\n", + "\n", + "if count_june < count_may :\n", + " print(f\"Count of May is the bigger with: {count_may} rental\")\n", + "\n", + "merged_df['comparison_result'] = Borrow_month\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_id_mayrental_id_junerental_totalcomparison_result
customer_id
127.0-5.0June
211.00.0May
324.0-2.0June
535.0-2.0June
634.0-1.0June
...............
59446.0-2.0June
59512.0-1.0June
59662.04.0May
59723.0-1.0June
59914.0-3.0June
\n", + "

520 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " rental_id_may rental_id_june rental_total comparison_result\n", + "customer_id \n", + "1 2 7.0 -5.0 June\n", + "2 1 1.0 0.0 May\n", + "3 2 4.0 -2.0 June\n", + "5 3 5.0 -2.0 June\n", + "6 3 4.0 -1.0 June\n", + "... ... ... ... ...\n", + "594 4 6.0 -2.0 June\n", + "595 1 2.0 -1.0 June\n", + "596 6 2.0 4.0 May\n", + "597 2 3.0 -1.0 June\n", + "599 1 4.0 -3.0 June\n", + "\n", + "[520 rows x 4 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Write a function that checks if customer borrowed more or less films in the month of June as compared to May.\n", + "\n", + "# You can find a new column for the data Frame with the month were the user Borrowed the most movie \n", + "\n", + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}