diff --git a/your-code/Pandas Lab.ipynb b/your-code/Pandas Lab.ipynb new file mode 100644 index 0000000..2e68d94 --- /dev/null +++ b/your-code/Pandas Lab.ipynb @@ -0,0 +1,812 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Introduction to Pandas\n", + "\n", + "Complete the following set of exercises to solidify your knowledge of Pandas fundamentals." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1. Import Numpy and Pandas and alias them to `np` and `pd` respectively." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# your code here\n", + "import numpy as np\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 2. Create a Pandas Series containing the elements of the list below." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0 5.7\n", + "1 75.2\n", + "2 74.4\n", + "3 84.0\n", + "4 66.5\n", + "5 66.3\n", + "6 55.8\n", + "7 75.7\n", + "8 29.1\n", + "9 43.7\n", + "dtype: float64\n" + ] + } + ], + "source": [ + "# your code here\n", + "x = pd.Series(lst)\n", + "print(x)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 3. Use indexing to return the third value in the Series above.\n", + "\n", + "*Hint: Remember that indexing begins at 0.*" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74.4" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "x[2]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", + " [61.3, 40.8, 30.8, 37.8, 87.6],\n", + " [20.6, 73.2, 44.2, 14.6, 91.8],\n", + " [57.4, 0.1, 96.1, 4.2, 69.5],\n", + " [83.6, 20.5, 85.4, 22.8, 35.9],\n", + " [49.0, 69.0, 0.1, 31.8, 89.1],\n", + " [23.3, 40.7, 95.0, 83.8, 26.9],\n", + " [27.6, 26.4, 53.8, 88.8, 68.5],\n", + " [96.6, 96.4, 53.4, 72.4, 50.1],\n", + " [73.7, 39.0, 43.2, 81.6, 34.7]]" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [], + "source": [ + "# your code here\n", + "exam = pd.DataFrame(b)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 5. Rename the data frame columns based on the names in the list below." + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_2Score_3Score_4Score_5
053.195.067.535.078.4
161.340.830.837.887.6
220.673.244.214.691.8
357.40.196.14.269.5
483.620.585.422.835.9
549.069.00.131.889.1
623.340.795.083.826.9
727.626.453.888.868.5
896.696.453.472.450.1
973.739.043.281.634.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_2 Score_3 Score_4 Score_5\n", + "0 53.1 95.0 67.5 35.0 78.4\n", + "1 61.3 40.8 30.8 37.8 87.6\n", + "2 20.6 73.2 44.2 14.6 91.8\n", + "3 57.4 0.1 96.1 4.2 69.5\n", + "4 83.6 20.5 85.4 22.8 35.9\n", + "5 49.0 69.0 0.1 31.8 89.1\n", + "6 23.3 40.7 95.0 83.8 26.9\n", + "7 27.6 26.4 53.8 88.8 68.5\n", + "8 96.6 96.4 53.4 72.4 50.1\n", + "9 73.7 39.0 43.2 81.6 34.7" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "exam.columns = colnames\n", + "exam" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 6. Create a subset of this data frame that contains only the Score 1, 3, and 5 columns." + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Score_1Score_3Score_5
053.167.578.4
161.330.887.6
220.644.291.8
357.496.169.5
483.685.435.9
549.00.189.1
623.395.026.9
727.653.868.5
896.653.450.1
973.743.234.7
\n", + "
" + ], + "text/plain": [ + " Score_1 Score_3 Score_5\n", + "0 53.1 67.5 78.4\n", + "1 61.3 30.8 87.6\n", + "2 20.6 44.2 91.8\n", + "3 57.4 96.1 69.5\n", + "4 83.6 85.4 35.9\n", + "5 49.0 0.1 89.1\n", + "6 23.3 95.0 26.9\n", + "7 27.6 53.8 68.5\n", + "8 96.6 53.4 50.1\n", + "9 73.7 43.2 34.7" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "exam[['Score_1', 'Score_3', 'Score_5']]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 7. From the original data frame, calculate the average Score_3 value." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "56.95000000000001" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "exam[\"Score_3\"].mean()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 8. From the original data frame, calculate the maximum Score_4 value." + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "88.8" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "exam[\"Score_4\"].max()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 9. From the original data frame, calculate the median Score 2 value." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "40.75" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "exam[\"Score_2\"].median()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 10. Create a Pandas DataFrame from the dictionary of product orders below." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": {}, + "outputs": [], + "source": [ + "orders = {'Description': ['LUNCH BAG APPLE DESIGN',\n", + " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", + " 'RIBBON REEL STRIPES DESIGN ',\n", + " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", + " 'PLAYING CARDS JUBILEE UNION JACK',\n", + " 'POPCORN HOLDER',\n", + " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", + " 'PARTY BUNTING',\n", + " 'JAZZ HEARTS ADDRESS BOOK',\n", + " 'SET OF 4 SANTA PLACE SETTINGS'],\n", + " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", + " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", + " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]}" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DescriptionQuantityUnitPriceRevenue
0LUNCH BAG APPLE DESIGN11.651.65
1SET OF 60 VINTAGE LEAF CAKE CASES240.5513.20
2RIBBON REEL STRIPES DESIGN11.651.65
3WORLD WAR 2 GLIDERS ASSTD DESIGNS28800.18518.40
4PLAYING CARDS JUBILEE UNION JACK21.252.50
5POPCORN HOLDER70.855.95
6BOX OF VINTAGE ALPHABET BLOCKS111.9511.95
7PARTY BUNTING44.9519.80
8JAZZ HEARTS ADDRESS BOOK100.191.90
9SET OF 4 SANTA PLACE SETTINGS481.2560.00
\n", + "
" + ], + "text/plain": [ + " Description Quantity UnitPrice Revenue\n", + "0 LUNCH BAG APPLE DESIGN 1 1.65 1.65\n", + "1 SET OF 60 VINTAGE LEAF CAKE CASES 24 0.55 13.20\n", + "2 RIBBON REEL STRIPES DESIGN 1 1.65 1.65\n", + "3 WORLD WAR 2 GLIDERS ASSTD DESIGNS 2880 0.18 518.40\n", + "4 PLAYING CARDS JUBILEE UNION JACK 2 1.25 2.50\n", + "5 POPCORN HOLDER 7 0.85 5.95\n", + "6 BOX OF VINTAGE ALPHABET BLOCKS 1 11.95 11.95\n", + "7 PARTY BUNTING 4 4.95 19.80\n", + "8 JAZZ HEARTS ADDRESS BOOK 10 0.19 1.90\n", + "9 SET OF 4 SANTA PLACE SETTINGS 48 1.25 60.00" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# your code here\n", + "data_frame = pd.DataFrame(orders)\n", + "data_frame" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 11. Calculate the total quantity ordered and revenue generated from these orders." + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "2978\n", + "637.0\n" + ] + } + ], + "source": [ + "# your code here\n", + "q = data_frame[\"Quantity\"].sum()\n", + "r = data_frame[\"Revenue\"].sum()\n", + "print(q)\n", + "print(r)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 12. Obtain the prices of the most expensive and least expensive items ordered and print the difference." + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.95\n", + "0.18\n" + ] + } + ], + "source": [ + "# your code here\n", + "least_expensive = data_frame[\"UnitPrice\"].min()\n", + "most_expensive = data_frame[\"UnitPrice\"].max()\n", + "\n", + "print(most_expensive)\n", + "print(least_expensive)" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "11.77\n" + ] + } + ], + "source": [ + "print(most_expensive - least_expensive)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": false, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": false, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + }, + "varInspector": { + "cols": { + "lenName": 16, + "lenType": 16, + "lenVar": 40 + }, + "kernels_config": { + "python": { + "delete_cmd_postfix": "", + "delete_cmd_prefix": "del ", + "library": "var_list.py", + "varRefreshCmd": "print(var_dic_list())" + }, + "r": { + "delete_cmd_postfix": ") ", + "delete_cmd_prefix": "rm(", + "library": "var_list.r", + "varRefreshCmd": "cat(var_dic_list()) " + } + }, + "types_to_exclude": [ + "module", + "function", + "builtin_function_or_method", + "instance", + "_Feature" + ], + "window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/your-code/main.ipynb b/your-code/main.ipynb deleted file mode 100755 index f50ae3d..0000000 --- a/your-code/main.ipynb +++ /dev/null @@ -1,285 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Introduction to Pandas\n", - "\n", - "Complete the following set of exercises to solidify your knowledge of Pandas fundamentals." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 1. Import Numpy and Pandas and alias them to `np` and `pd` respectively." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 2. Create a Pandas Series containing the elements of the list below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lst = [5.7, 75.2, 74.4, 84.0, 66.5, 66.3, 55.8, 75.7, 29.1, 43.7]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 3. Use indexing to return the third value in the Series above.\n", - "\n", - "*Hint: Remember that indexing begins at 0.*" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 4. Create a Pandas DataFrame from the list of lists below. Each sublist should be represented as a row." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "b = [[53.1, 95.0, 67.5, 35.0, 78.4],\n", - " [61.3, 40.8, 30.8, 37.8, 87.6],\n", - " [20.6, 73.2, 44.2, 14.6, 91.8],\n", - " [57.4, 0.1, 96.1, 4.2, 69.5],\n", - " [83.6, 20.5, 85.4, 22.8, 35.9],\n", - " [49.0, 69.0, 0.1, 31.8, 89.1],\n", - " [23.3, 40.7, 95.0, 83.8, 26.9],\n", - " [27.6, 26.4, 53.8, 88.8, 68.5],\n", - " [96.6, 96.4, 53.4, 72.4, 50.1],\n", - " [73.7, 39.0, 43.2, 81.6, 34.7]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 5. Rename the data frame columns based on the names in the list below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "colnames = ['Score_1', 'Score_2', 'Score_3', 'Score_4', 'Score_5']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 6. Create a subset of this data frame that contains only the Score 1, 3, and 5 columns." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 7. From the original data frame, calculate the average Score_3 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 8. From the original data frame, calculate the maximum Score_4 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 9. From the original data frame, calculate the median Score 2 value." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 10. Create a Pandas DataFrame from the dictionary of product orders below." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "orders = {'Description': ['LUNCH BAG APPLE DESIGN',\n", - " 'SET OF 60 VINTAGE LEAF CAKE CASES ',\n", - " 'RIBBON REEL STRIPES DESIGN ',\n", - " 'WORLD WAR 2 GLIDERS ASSTD DESIGNS',\n", - " 'PLAYING CARDS JUBILEE UNION JACK',\n", - " 'POPCORN HOLDER',\n", - " 'BOX OF VINTAGE ALPHABET BLOCKS',\n", - " 'PARTY BUNTING',\n", - " 'JAZZ HEARTS ADDRESS BOOK',\n", - " 'SET OF 4 SANTA PLACE SETTINGS'],\n", - " 'Quantity': [1, 24, 1, 2880, 2, 7, 1, 4, 10, 48],\n", - " 'UnitPrice': [1.65, 0.55, 1.65, 0.18, 1.25, 0.85, 11.95, 4.95, 0.19, 1.25],\n", - " 'Revenue': [1.65, 13.2, 1.65, 518.4, 2.5, 5.95, 11.95, 19.8, 1.9, 60.0]}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 11. Calculate the total quantity ordered and revenue generated from these orders." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "#### 12. Obtain the prices of the most expensive and least expensive items ordered and print the difference." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# your code here" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.2" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}