diff --git a/.ipynb_checkpoints/[sql-9] Laura-checkpoint.ipynb b/.ipynb_checkpoints/[sql-9] Laura-checkpoint.ipynb new file mode 100644 index 0000000..527f520 --- /dev/null +++ b/.ipynb_checkpoints/[sql-9] Laura-checkpoint.ipynb @@ -0,0 +1,1223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e03960fd", + "metadata": {}, + "outputs": [], + "source": [ + "# 7. Create a Python connection with SQL databas" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1ef05283", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py:21: UserWarning: Pandas requires version '2.8.0' or newer of 'numexpr' (version '2.7.3' currently installed).\n", + " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", + "C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\pandas\\core\\arrays\\masked.py:62: UserWarning: Pandas requires version '1.3.4' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", + " from pandas.core import (\n" + ] + } + ], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f873d767", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cfaeaa9c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updaterental_dayrental_month
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:5324May
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:5324May
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:5324May
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:5324May
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:5324May
..............................
115111532005-05-31 21:36:4427255062005-06-10 01:26:4422006-02-15 21:30:5331May
115211542005-05-31 21:42:092732592005-06-08 16:40:0912006-02-15 21:30:5331May
115311552005-05-31 22:17:1120482512005-06-04 20:27:1122006-02-15 21:30:5331May
115411562005-05-31 22:37:344601062005-06-01 23:02:3422006-02-15 21:30:5331May
115511572005-05-31 22:47:451449612005-06-02 18:01:4512006-02-15 21:30:5331May
\n", + "

1156 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "1151 1153 2005-05-31 21:36:44 2725 506 \n", + "1152 1154 2005-05-31 21:42:09 2732 59 \n", + "1153 1155 2005-05-31 22:17:11 2048 251 \n", + "1154 1156 2005-05-31 22:37:34 460 106 \n", + "1155 1157 2005-05-31 22:47:45 1449 61 \n", + "\n", + " return_date staff_id last_update rental_day rental_month \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 24 May \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 24 May \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 24 May \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 24 May \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 24 May \n", + "... ... ... ... ... ... \n", + "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 31 May \n", + "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 31 May \n", + "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 31 May \n", + "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 31 May \n", + "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 31 May \n", + "\n", + "[1156 rows x 9 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "df_may = pd.read_sql_query('select * from sakila.rentals_may', engine)\n", + "df_may" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7cdb13c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updaterental_dayrental_month
011582005-06-14 22:53:3316324162005-06-18 21:37:3322006-02-15 21:30:5314June
111592005-06-14 22:55:1343955162005-06-17 02:11:1312006-02-15 21:30:5314June
211602005-06-14 23:00:3427952392005-06-18 01:58:3422006-02-15 21:30:5314June
311612005-06-14 23:07:0816902852005-06-21 17:12:0812006-02-15 21:30:5314June
411622005-06-14 23:09:389873102005-06-23 22:00:3812006-02-15 21:30:5314June
..............................
230634652005-06-21 22:10:0114885102005-06-30 21:35:0112006-02-15 21:30:5321June
230734662005-06-21 22:13:333712262005-06-25 21:01:3322006-02-15 21:30:5321June
230834672005-06-21 22:19:257295432005-06-27 00:03:2522006-02-15 21:30:5321June
230934682005-06-21 22:43:4528991002005-06-30 01:49:4512006-02-15 21:30:5321June
231034692005-06-21 22:48:5940871812005-06-28 19:32:5912006-02-15 21:30:5321June
\n", + "

2311 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1158 2005-06-14 22:53:33 1632 416 \n", + "1 1159 2005-06-14 22:55:13 4395 516 \n", + "2 1160 2005-06-14 23:00:34 2795 239 \n", + "3 1161 2005-06-14 23:07:08 1690 285 \n", + "4 1162 2005-06-14 23:09:38 987 310 \n", + "... ... ... ... ... \n", + "2306 3465 2005-06-21 22:10:01 1488 510 \n", + "2307 3466 2005-06-21 22:13:33 371 226 \n", + "2308 3467 2005-06-21 22:19:25 729 543 \n", + "2309 3468 2005-06-21 22:43:45 2899 100 \n", + "2310 3469 2005-06-21 22:48:59 4087 181 \n", + "\n", + " return_date staff_id last_update rental_day rental_month \n", + "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 14 June \n", + "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 14 June \n", + "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 14 June \n", + "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 14 June \n", + "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 14 June \n", + "... ... ... ... ... ... \n", + "2306 2005-06-30 21:35:01 1 2006-02-15 21:30:53 21 June \n", + "2307 2005-06-25 21:01:33 2 2006-02-15 21:30:53 21 June \n", + "2308 2005-06-27 00:03:25 2 2006-02-15 21:30:53 21 June \n", + "2309 2005-06-30 01:49:45 1 2006-02-15 21:30:53 21 June \n", + "2310 2005-06-28 19:32:59 1 2006-02-15 21:30:53 21 June \n", + "\n", + "[2311 rows x 9 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_june = pd.read_sql_query('select * from sakila.rentals_june', engine)\n", + "df_june" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e1fe8257", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_may
01MARYSMITH2
12PATRICIAJOHNSON1
23LINDAWILLIAMS2
35ELIZABETHBROWN3
46JENNIFERDAVIS3
...............
515594EDUARDOHIATT4
516595TERRENCEGUNDERSON1
517596ENRIQUEFORSYTHE6
518597FREDDIEDUGGAN2
519599AUSTINCINTRON1
\n", + "

520 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may\n", + "0 1 MARY SMITH 2\n", + "1 2 PATRICIA JOHNSON 1\n", + "2 3 LINDA WILLIAMS 2\n", + "3 5 ELIZABETH BROWN 3\n", + "4 6 JENNIFER DAVIS 3\n", + ".. ... ... ... ...\n", + "515 594 EDUARDO HIATT 4\n", + "516 595 TERRENCE GUNDERSON 1\n", + "517 596 ENRIQUE FORSYTHE 6\n", + "518 597 FREDDIE DUGGAN 2\n", + "519 599 AUSTIN CINTRON 1\n", + "\n", + "[520 rows x 4 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 7.1 Check the number of rentals for each customer for May\n", + "\n", + "data_cus_may = pd.read_sql_query('select rent.customer_id, customer.first_name, customer.last_name,count(rent.rental_id) as rentals_may \\\n", + " from sakila.rentals_may as rent \\\n", + " join sakila.customer as customer \\\n", + " on rent.customer_id = customer.customer_id \\\n", + " group by customer_id \\\n", + " order by customer_id', engine)\n", + "data_cus_may" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "1f9bf686", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_june
01MARYSMITH7
12PATRICIAJOHNSON1
23LINDAWILLIAMS4
34BARBARAJONES6
45ELIZABETHBROWN5
...............
585595TERRENCEGUNDERSON2
586596ENRIQUEFORSYTHE2
587597FREDDIEDUGGAN3
588598WADEDELVALLE1
589599AUSTINCINTRON4
\n", + "

590 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_june\n", + "0 1 MARY SMITH 7\n", + "1 2 PATRICIA JOHNSON 1\n", + "2 3 LINDA WILLIAMS 4\n", + "3 4 BARBARA JONES 6\n", + "4 5 ELIZABETH BROWN 5\n", + ".. ... ... ... ...\n", + "585 595 TERRENCE GUNDERSON 2\n", + "586 596 ENRIQUE FORSYTHE 2\n", + "587 597 FREDDIE DUGGAN 3\n", + "588 598 WADE DELVALLE 1\n", + "589 599 AUSTIN CINTRON 4\n", + "\n", + "[590 rows x 4 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 7.2 Check the number of rentals for each customer for June\n", + "\n", + "data_cus_june = pd.read_sql_query('select rent.customer_id, customer.first_name, customer.last_name,count(rent.rental_id) as rentals_june \\\n", + " from sakila.rentals_june as rent \\\n", + " join sakila.customer as customer \\\n", + " on rent.customer_id = customer.customer_id \\\n", + " group by customer_id \\\n", + " order by customer_id', engine)\n", + "data_cus_june" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8091316a", + "metadata": {}, + "outputs": [], + "source": [ + "# 8. Write a function that checks if customer borrowed more or less films in the month of June as compared to May." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "4fb4b90d", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_mayrentals_june
01MARYSMITH2.07.0
12PATRICIAJOHNSON1.01.0
23LINDAWILLIAMS2.04.0
35ELIZABETHBROWN3.05.0
46JENNIFERDAVIS3.04.0
..................
593583MARSHALLTHORN0.06.0
594585PERRYSWAFFORD0.04.0
595591KENTARSENAULT0.03.0
596592TERRANCEROUSH0.05.0
597598WADEDELVALLE0.01.0
\n", + "

598 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may rentals_june\n", + "0 1 MARY SMITH 2.0 7.0\n", + "1 2 PATRICIA JOHNSON 1.0 1.0\n", + "2 3 LINDA WILLIAMS 2.0 4.0\n", + "3 5 ELIZABETH BROWN 3.0 5.0\n", + "4 6 JENNIFER DAVIS 3.0 4.0\n", + ".. ... ... ... ... ...\n", + "593 583 MARSHALL THORN 0.0 6.0\n", + "594 585 PERRY SWAFFORD 0.0 4.0\n", + "595 591 KENT ARSENAULT 0.0 3.0\n", + "596 592 TERRANCE ROUSH 0.0 5.0\n", + "597 598 WADE DELVALLE 0.0 1.0\n", + "\n", + "[598 rows x 5 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Creating the data frame\n", + "data_may_june = pd.merge(data_cus_may, data_cus_june, on = ['customer_id', 'first_name', 'last_name'], how = 'outer')\n", + "\n", + "data_may_june= data_may_june.fillna(0)\n", + "data_may_june" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e6a7d9e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_mayrentals_junerental_comparisontotal_rentals
01MARYSMITH2.07.0More rentals in June9.0
12PATRICIAJOHNSON1.01.0Equal rentals in May and June2.0
23LINDAWILLIAMS2.04.0More rentals in June6.0
35ELIZABETHBROWN3.05.0More rentals in June8.0
46JENNIFERDAVIS3.04.0More rentals in June7.0
........................
593583MARSHALLTHORN0.06.0More rentals in June6.0
594585PERRYSWAFFORD0.04.0More rentals in June4.0
595591KENTARSENAULT0.03.0More rentals in June3.0
596592TERRANCEROUSH0.05.0More rentals in June5.0
597598WADEDELVALLE0.01.0More rentals in June1.0
\n", + "

598 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may rentals_june \\\n", + "0 1 MARY SMITH 2.0 7.0 \n", + "1 2 PATRICIA JOHNSON 1.0 1.0 \n", + "2 3 LINDA WILLIAMS 2.0 4.0 \n", + "3 5 ELIZABETH BROWN 3.0 5.0 \n", + "4 6 JENNIFER DAVIS 3.0 4.0 \n", + ".. ... ... ... ... ... \n", + "593 583 MARSHALL THORN 0.0 6.0 \n", + "594 585 PERRY SWAFFORD 0.0 4.0 \n", + "595 591 KENT ARSENAULT 0.0 3.0 \n", + "596 592 TERRANCE ROUSH 0.0 5.0 \n", + "597 598 WADE DELVALLE 0.0 1.0 \n", + "\n", + " rental_comparison total_rentals \n", + "0 More rentals in June 9.0 \n", + "1 Equal rentals in May and June 2.0 \n", + "2 More rentals in June 6.0 \n", + "3 More rentals in June 8.0 \n", + "4 More rentals in June 7.0 \n", + ".. ... ... \n", + "593 More rentals in June 6.0 \n", + "594 More rentals in June 4.0 \n", + "595 More rentals in June 3.0 \n", + "596 More rentals in June 5.0 \n", + "597 More rentals in June 1.0 \n", + "\n", + "[598 rows x 7 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Function for comparing each customer rentals between May and June\n", + "\n", + "def compare_rentals(row):\n", + " may_rentals = row['rentals_may']\n", + " june_rentals = row['rentals_june']\n", + "\n", + " if may_rentals > june_rentals:\n", + " return 'Less rentals in June'\n", + " elif may_rentals < june_rentals:\n", + " return 'More rentals in June'\n", + " else:\n", + " return 'Equal rentals in May and June'\n", + "\n", + "\n", + "data_may_june['rental_comparison'] = data_may_june.apply(compare_rentals, axis=1)\n", + "data_may_june['total_rentals'] = (data_may_june['rentals_may'] + data_may_june['rentals_june'])\n", + "data_may_june\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "237c98f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There where more rentals in June - in total: 2311.0 compared to 1156.0 rentals\n" + ] + } + ], + "source": [ + "# Function for comparing total rentals between May and June\n", + "\n", + "total_may = data_may_june['rentals_may'].sum()\n", + "total_june = data_may_june['rentals_june'].sum()\n", + "\n", + "if total_may > total_june:\n", + " print('There where more rentals in May - in total: ', total_may, 'compared to ', total_june, 'rentals')\n", + "elif total_may < total_june:\n", + " print('There where more rentals in June - in total: ', total_june, 'compared to ', total_may, 'rentals')\n", + "else:\n", + " print('There where the same aount of rentals in May and June - in total: ', total_june)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[sql-9] Laura.ipynb b/[sql-9] Laura.ipynb new file mode 100644 index 0000000..527f520 --- /dev/null +++ b/[sql-9] Laura.ipynb @@ -0,0 +1,1223 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "e03960fd", + "metadata": {}, + "outputs": [], + "source": [ + "# 7. Create a Python connection with SQL databas" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1ef05283", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\pandas\\core\\computation\\expressions.py:21: UserWarning: Pandas requires version '2.8.0' or newer of 'numexpr' (version '2.7.3' currently installed).\n", + " from pandas.core.computation.check import NUMEXPR_INSTALLED\n", + "C:\\Users\\ljant\\anaconda3\\lib\\site-packages\\pandas\\core\\arrays\\masked.py:62: UserWarning: Pandas requires version '1.3.4' or newer of 'bottleneck' (version '1.3.2' currently installed).\n", + " from pandas.core import (\n" + ] + } + ], + "source": [ + "import pymysql\n", + "from sqlalchemy import create_engine\n", + "import pandas as pd\n", + "\n", + "import warnings\n", + "warnings.filterwarnings('ignore')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "f873d767", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "········\n" + ] + } + ], + "source": [ + "import getpass\n", + "password = getpass.getpass()" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "cfaeaa9c", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updaterental_dayrental_month
012005-05-24 22:53:303671302005-05-26 22:04:3012006-02-15 21:30:5324May
122005-05-24 22:54:3315254592005-05-28 19:40:3312006-02-15 21:30:5324May
232005-05-24 23:03:3917114082005-06-01 22:12:3912006-02-15 21:30:5324May
342005-05-24 23:04:4124523332005-06-03 01:43:4122006-02-15 21:30:5324May
452005-05-24 23:05:2120792222005-06-02 04:33:2112006-02-15 21:30:5324May
..............................
115111532005-05-31 21:36:4427255062005-06-10 01:26:4422006-02-15 21:30:5331May
115211542005-05-31 21:42:092732592005-06-08 16:40:0912006-02-15 21:30:5331May
115311552005-05-31 22:17:1120482512005-06-04 20:27:1122006-02-15 21:30:5331May
115411562005-05-31 22:37:344601062005-06-01 23:02:3422006-02-15 21:30:5331May
115511572005-05-31 22:47:451449612005-06-02 18:01:4512006-02-15 21:30:5331May
\n", + "

1156 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1 2005-05-24 22:53:30 367 130 \n", + "1 2 2005-05-24 22:54:33 1525 459 \n", + "2 3 2005-05-24 23:03:39 1711 408 \n", + "3 4 2005-05-24 23:04:41 2452 333 \n", + "4 5 2005-05-24 23:05:21 2079 222 \n", + "... ... ... ... ... \n", + "1151 1153 2005-05-31 21:36:44 2725 506 \n", + "1152 1154 2005-05-31 21:42:09 2732 59 \n", + "1153 1155 2005-05-31 22:17:11 2048 251 \n", + "1154 1156 2005-05-31 22:37:34 460 106 \n", + "1155 1157 2005-05-31 22:47:45 1449 61 \n", + "\n", + " return_date staff_id last_update rental_day rental_month \n", + "0 2005-05-26 22:04:30 1 2006-02-15 21:30:53 24 May \n", + "1 2005-05-28 19:40:33 1 2006-02-15 21:30:53 24 May \n", + "2 2005-06-01 22:12:39 1 2006-02-15 21:30:53 24 May \n", + "3 2005-06-03 01:43:41 2 2006-02-15 21:30:53 24 May \n", + "4 2005-06-02 04:33:21 1 2006-02-15 21:30:53 24 May \n", + "... ... ... ... ... ... \n", + "1151 2005-06-10 01:26:44 2 2006-02-15 21:30:53 31 May \n", + "1152 2005-06-08 16:40:09 1 2006-02-15 21:30:53 31 May \n", + "1153 2005-06-04 20:27:11 2 2006-02-15 21:30:53 31 May \n", + "1154 2005-06-01 23:02:34 2 2006-02-15 21:30:53 31 May \n", + "1155 2005-06-02 18:01:45 1 2006-02-15 21:30:53 31 May \n", + "\n", + "[1156 rows x 9 columns]" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "connection_string = 'mysql+pymysql://root:' + password + '@localhost/bank'\n", + "engine = create_engine(connection_string)\n", + "df_may = pd.read_sql_query('select * from sakila.rentals_may', engine)\n", + "df_may" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7cdb13c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rental_idrental_dateinventory_idcustomer_idreturn_datestaff_idlast_updaterental_dayrental_month
011582005-06-14 22:53:3316324162005-06-18 21:37:3322006-02-15 21:30:5314June
111592005-06-14 22:55:1343955162005-06-17 02:11:1312006-02-15 21:30:5314June
211602005-06-14 23:00:3427952392005-06-18 01:58:3422006-02-15 21:30:5314June
311612005-06-14 23:07:0816902852005-06-21 17:12:0812006-02-15 21:30:5314June
411622005-06-14 23:09:389873102005-06-23 22:00:3812006-02-15 21:30:5314June
..............................
230634652005-06-21 22:10:0114885102005-06-30 21:35:0112006-02-15 21:30:5321June
230734662005-06-21 22:13:333712262005-06-25 21:01:3322006-02-15 21:30:5321June
230834672005-06-21 22:19:257295432005-06-27 00:03:2522006-02-15 21:30:5321June
230934682005-06-21 22:43:4528991002005-06-30 01:49:4512006-02-15 21:30:5321June
231034692005-06-21 22:48:5940871812005-06-28 19:32:5912006-02-15 21:30:5321June
\n", + "

2311 rows × 9 columns

\n", + "
" + ], + "text/plain": [ + " rental_id rental_date inventory_id customer_id \\\n", + "0 1158 2005-06-14 22:53:33 1632 416 \n", + "1 1159 2005-06-14 22:55:13 4395 516 \n", + "2 1160 2005-06-14 23:00:34 2795 239 \n", + "3 1161 2005-06-14 23:07:08 1690 285 \n", + "4 1162 2005-06-14 23:09:38 987 310 \n", + "... ... ... ... ... \n", + "2306 3465 2005-06-21 22:10:01 1488 510 \n", + "2307 3466 2005-06-21 22:13:33 371 226 \n", + "2308 3467 2005-06-21 22:19:25 729 543 \n", + "2309 3468 2005-06-21 22:43:45 2899 100 \n", + "2310 3469 2005-06-21 22:48:59 4087 181 \n", + "\n", + " return_date staff_id last_update rental_day rental_month \n", + "0 2005-06-18 21:37:33 2 2006-02-15 21:30:53 14 June \n", + "1 2005-06-17 02:11:13 1 2006-02-15 21:30:53 14 June \n", + "2 2005-06-18 01:58:34 2 2006-02-15 21:30:53 14 June \n", + "3 2005-06-21 17:12:08 1 2006-02-15 21:30:53 14 June \n", + "4 2005-06-23 22:00:38 1 2006-02-15 21:30:53 14 June \n", + "... ... ... ... ... ... \n", + "2306 2005-06-30 21:35:01 1 2006-02-15 21:30:53 21 June \n", + "2307 2005-06-25 21:01:33 2 2006-02-15 21:30:53 21 June \n", + "2308 2005-06-27 00:03:25 2 2006-02-15 21:30:53 21 June \n", + "2309 2005-06-30 01:49:45 1 2006-02-15 21:30:53 21 June \n", + "2310 2005-06-28 19:32:59 1 2006-02-15 21:30:53 21 June \n", + "\n", + "[2311 rows x 9 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_june = pd.read_sql_query('select * from sakila.rentals_june', engine)\n", + "df_june" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e1fe8257", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_may
01MARYSMITH2
12PATRICIAJOHNSON1
23LINDAWILLIAMS2
35ELIZABETHBROWN3
46JENNIFERDAVIS3
...............
515594EDUARDOHIATT4
516595TERRENCEGUNDERSON1
517596ENRIQUEFORSYTHE6
518597FREDDIEDUGGAN2
519599AUSTINCINTRON1
\n", + "

520 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may\n", + "0 1 MARY SMITH 2\n", + "1 2 PATRICIA JOHNSON 1\n", + "2 3 LINDA WILLIAMS 2\n", + "3 5 ELIZABETH BROWN 3\n", + "4 6 JENNIFER DAVIS 3\n", + ".. ... ... ... ...\n", + "515 594 EDUARDO HIATT 4\n", + "516 595 TERRENCE GUNDERSON 1\n", + "517 596 ENRIQUE FORSYTHE 6\n", + "518 597 FREDDIE DUGGAN 2\n", + "519 599 AUSTIN CINTRON 1\n", + "\n", + "[520 rows x 4 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 7.1 Check the number of rentals for each customer for May\n", + "\n", + "data_cus_may = pd.read_sql_query('select rent.customer_id, customer.first_name, customer.last_name,count(rent.rental_id) as rentals_may \\\n", + " from sakila.rentals_may as rent \\\n", + " join sakila.customer as customer \\\n", + " on rent.customer_id = customer.customer_id \\\n", + " group by customer_id \\\n", + " order by customer_id', engine)\n", + "data_cus_may" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "1f9bf686", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_june
01MARYSMITH7
12PATRICIAJOHNSON1
23LINDAWILLIAMS4
34BARBARAJONES6
45ELIZABETHBROWN5
...............
585595TERRENCEGUNDERSON2
586596ENRIQUEFORSYTHE2
587597FREDDIEDUGGAN3
588598WADEDELVALLE1
589599AUSTINCINTRON4
\n", + "

590 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_june\n", + "0 1 MARY SMITH 7\n", + "1 2 PATRICIA JOHNSON 1\n", + "2 3 LINDA WILLIAMS 4\n", + "3 4 BARBARA JONES 6\n", + "4 5 ELIZABETH BROWN 5\n", + ".. ... ... ... ...\n", + "585 595 TERRENCE GUNDERSON 2\n", + "586 596 ENRIQUE FORSYTHE 2\n", + "587 597 FREDDIE DUGGAN 3\n", + "588 598 WADE DELVALLE 1\n", + "589 599 AUSTIN CINTRON 4\n", + "\n", + "[590 rows x 4 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# 7.2 Check the number of rentals for each customer for June\n", + "\n", + "data_cus_june = pd.read_sql_query('select rent.customer_id, customer.first_name, customer.last_name,count(rent.rental_id) as rentals_june \\\n", + " from sakila.rentals_june as rent \\\n", + " join sakila.customer as customer \\\n", + " on rent.customer_id = customer.customer_id \\\n", + " group by customer_id \\\n", + " order by customer_id', engine)\n", + "data_cus_june" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "8091316a", + "metadata": {}, + "outputs": [], + "source": [ + "# 8. Write a function that checks if customer borrowed more or less films in the month of June as compared to May." + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "4fb4b90d", + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_mayrentals_june
01MARYSMITH2.07.0
12PATRICIAJOHNSON1.01.0
23LINDAWILLIAMS2.04.0
35ELIZABETHBROWN3.05.0
46JENNIFERDAVIS3.04.0
..................
593583MARSHALLTHORN0.06.0
594585PERRYSWAFFORD0.04.0
595591KENTARSENAULT0.03.0
596592TERRANCEROUSH0.05.0
597598WADEDELVALLE0.01.0
\n", + "

598 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may rentals_june\n", + "0 1 MARY SMITH 2.0 7.0\n", + "1 2 PATRICIA JOHNSON 1.0 1.0\n", + "2 3 LINDA WILLIAMS 2.0 4.0\n", + "3 5 ELIZABETH BROWN 3.0 5.0\n", + "4 6 JENNIFER DAVIS 3.0 4.0\n", + ".. ... ... ... ... ...\n", + "593 583 MARSHALL THORN 0.0 6.0\n", + "594 585 PERRY SWAFFORD 0.0 4.0\n", + "595 591 KENT ARSENAULT 0.0 3.0\n", + "596 592 TERRANCE ROUSH 0.0 5.0\n", + "597 598 WADE DELVALLE 0.0 1.0\n", + "\n", + "[598 rows x 5 columns]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Creating the data frame\n", + "data_may_june = pd.merge(data_cus_may, data_cus_june, on = ['customer_id', 'first_name', 'last_name'], how = 'outer')\n", + "\n", + "data_may_june= data_may_june.fillna(0)\n", + "data_may_june" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "e6a7d9e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
customer_idfirst_namelast_namerentals_mayrentals_junerental_comparisontotal_rentals
01MARYSMITH2.07.0More rentals in June9.0
12PATRICIAJOHNSON1.01.0Equal rentals in May and June2.0
23LINDAWILLIAMS2.04.0More rentals in June6.0
35ELIZABETHBROWN3.05.0More rentals in June8.0
46JENNIFERDAVIS3.04.0More rentals in June7.0
........................
593583MARSHALLTHORN0.06.0More rentals in June6.0
594585PERRYSWAFFORD0.04.0More rentals in June4.0
595591KENTARSENAULT0.03.0More rentals in June3.0
596592TERRANCEROUSH0.05.0More rentals in June5.0
597598WADEDELVALLE0.01.0More rentals in June1.0
\n", + "

598 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " customer_id first_name last_name rentals_may rentals_june \\\n", + "0 1 MARY SMITH 2.0 7.0 \n", + "1 2 PATRICIA JOHNSON 1.0 1.0 \n", + "2 3 LINDA WILLIAMS 2.0 4.0 \n", + "3 5 ELIZABETH BROWN 3.0 5.0 \n", + "4 6 JENNIFER DAVIS 3.0 4.0 \n", + ".. ... ... ... ... ... \n", + "593 583 MARSHALL THORN 0.0 6.0 \n", + "594 585 PERRY SWAFFORD 0.0 4.0 \n", + "595 591 KENT ARSENAULT 0.0 3.0 \n", + "596 592 TERRANCE ROUSH 0.0 5.0 \n", + "597 598 WADE DELVALLE 0.0 1.0 \n", + "\n", + " rental_comparison total_rentals \n", + "0 More rentals in June 9.0 \n", + "1 Equal rentals in May and June 2.0 \n", + "2 More rentals in June 6.0 \n", + "3 More rentals in June 8.0 \n", + "4 More rentals in June 7.0 \n", + ".. ... ... \n", + "593 More rentals in June 6.0 \n", + "594 More rentals in June 4.0 \n", + "595 More rentals in June 3.0 \n", + "596 More rentals in June 5.0 \n", + "597 More rentals in June 1.0 \n", + "\n", + "[598 rows x 7 columns]" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Function for comparing each customer rentals between May and June\n", + "\n", + "def compare_rentals(row):\n", + " may_rentals = row['rentals_may']\n", + " june_rentals = row['rentals_june']\n", + "\n", + " if may_rentals > june_rentals:\n", + " return 'Less rentals in June'\n", + " elif may_rentals < june_rentals:\n", + " return 'More rentals in June'\n", + " else:\n", + " return 'Equal rentals in May and June'\n", + "\n", + "\n", + "data_may_june['rental_comparison'] = data_may_june.apply(compare_rentals, axis=1)\n", + "data_may_june['total_rentals'] = (data_may_june['rentals_may'] + data_may_june['rentals_june'])\n", + "data_may_june\n" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "237c98f8", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "There where more rentals in June - in total: 2311.0 compared to 1156.0 rentals\n" + ] + } + ], + "source": [ + "# Function for comparing total rentals between May and June\n", + "\n", + "total_may = data_may_june['rentals_may'].sum()\n", + "total_june = data_may_june['rentals_june'].sum()\n", + "\n", + "if total_may > total_june:\n", + " print('There where more rentals in May - in total: ', total_may, 'compared to ', total_june, 'rentals')\n", + "elif total_may < total_june:\n", + " print('There where more rentals in June - in total: ', total_june, 'compared to ', total_may, 'rentals')\n", + "else:\n", + " print('There where the same aount of rentals in May and June - in total: ', total_june)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/[sql-9] Laura.sql b/[sql-9] Laura.sql new file mode 100644 index 0000000..8e81ba2 --- /dev/null +++ b/[sql-9] Laura.sql @@ -0,0 +1,57 @@ +-- 1. Create a table rentals_may to store the data from rental table with information for the month of May. + +create table sakila.rentals_may +select *, + date_format(rental_date, '%d') as rental_day, + date_format(rental_date, '%M') as rental_month +from sakila.rental +where (date_format(rental_date, '%M') = 'May') +; + +-- 2. Insert values in the table rentals_may using the table rental, filtering values only for the month of May. +select * +from sakila.rentals_may +; + +-- 3. Create a table rentals_june to store the data from rental table with information for the month of June. +create table sakila.rentals_june +select *, + date_format(rental_date, '%d') as rental_day, + date_format(rental_date, '%M') as rental_month +from sakila.rental +where (date_format(rental_date, '%M') = 'June') +; + +-- 4. Insert values in the table rentals_june using the table rental, filtering values only for the month of June. +select * +from sakila.rentals_june +; + +-- 5. Check the number of rentals for each customer for May. +select rent.customer_id, customer.first_name, customer.last_name, count(rent.rental_id) as rentals_may +from sakila.rentals_may as rent +join sakila.customer as customer +on rent.customer_id = customer.customer_id +group by customer_id +order by customer_id +; + +-- 6. Check the number of rentals for each customer for June. +select rent.customer_id, customer.first_name, customer.last_name, count(rent.rental_id) as rentals_june +from sakila.rentals_june as rent +join sakila.customer as customer +on rent.customer_id = customer.customer_id +group by customer_id +order by customer_id +; + +-- 7. Create a Python connection with SQL database and retrieve the results of the last two queries (also mentioned below) as dataframes: + +-- 7.1 Check the number of rentals for each customer for May + +-- 7.2 Check the number of rentals for each customer for June +-- Hint: You can store the results from the two queries in two separate dataframes. + +-- 8. Write a function that checks if customer borrowed more or less films in the month of June as compared to May. +-- Hint: For this part, you can create a join between the two dataframes created before, using the merge function available for pandas +-- dataframes. Here is a link to the documentation for the merge function. \ No newline at end of file