From 755451e79b87d10185f1b7eb7235a2c4b9944be8 Mon Sep 17 00:00:00 2001 From: Abbas Harris Date: Wed, 2 Dec 2020 10:34:47 -0500 Subject: [PATCH] pull request --- .../abbash_notebook-checkpoint.ipynb | 146 ++ .../practice_notebook-checkpoint.ipynb | 175 ++ .../shariq_notebook-checkpoint.ipynb | 1922 +++++++++++++++++ notebooks/abbash_notebook.ipynb | 111 +- notebooks/shariq_notebook.ipynb | 2 +- 5 files changed, 2348 insertions(+), 8 deletions(-) create mode 100644 notebooks/.ipynb_checkpoints/abbash_notebook-checkpoint.ipynb create mode 100644 notebooks/.ipynb_checkpoints/practice_notebook-checkpoint.ipynb create mode 100644 notebooks/.ipynb_checkpoints/shariq_notebook-checkpoint.ipynb diff --git a/notebooks/.ipynb_checkpoints/abbash_notebook-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/abbash_notebook-checkpoint.ipynb new file mode 100644 index 0000000..0a4be93 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/abbash_notebook-checkpoint.ipynb @@ -0,0 +1,146 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.fftpack\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Numpy Tutorials: Fast Fourier Transform: The First Three Natural Frequencies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What is Fast Fourier Transform (FFT)?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " A Fast Fourier Transform is a fundamental concept in the world of engineering. It is specifically used in the field of vibrations and measuring frequencies of various devices. FFT is primarily used to compute discrete functions, such as trigonometric functions, time it takes to complete a cycle, etc. Whereas, the FFT utilizes signals of any device/function and converts them from time domains into frequency domains. As a result, we are able to associate frequencies to the devices at certain vibrations at certain times. In turn, the correlated frequencies are considered to be \"natural frequencies\" due to the vibrations being unforced. \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 1: Associating Natural Frequencies to a Sine Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the first example of setting up a Fast Fourier Transform, we will take a look into a sin wave and generate it's natural frequencies at each peak.\n", + " We will define a sin wave in terms of frequency, a certain value of samples over a certain time frame, in this case a 100 Hz wave frequency over a 10 second period:" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "N_freq = 100 #sample size of frequencies, in terms of Hertz\n", + "time = 10 #duration of sin function, in terms of seconds\n", + "wave_freq = N_freq * time #this outputs a wave frequency over the given time frame\n", + "\n", + "#define a sine wave function in terms of the listed variables\n", + "def sine_function(frequency, N_freq, time):\n", + " x = np.linspace(0, time, wave_freq) \n", + " y = np.sin((2 * np.pi * x)) #sine function that utilizes the variables associated with 'x'\n", + " return x,y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once defining the sine wave function that implements frequency over a certain period of time, we can proceed to graph this data to output a sine wave graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "x, y = sine_function(100, N_freq, time) #x and y variables are 'returned' when we define the function\n", + "plt.title('Sine Wave Graph')\n", + "plt.plot(x, y)\n", + "plt.xlabel('Time (seconds)')\n", + "plt.ylabel('frequency (Hz)')\n", + "plt.grid('True')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have created a sine wave, we can generate a code to convert this function to output corresponding natural frequencies:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/.ipynb_checkpoints/practice_notebook-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/practice_notebook-checkpoint.ipynb new file mode 100644 index 0000000..7fe335b --- /dev/null +++ b/notebooks/.ipynb_checkpoints/practice_notebook-checkpoint.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# This is the Practice Notebook for Fall 2020\n", + "\n", + "Use this notebook to play around with Jupyter notebooks and the interface." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "plt.rcParams.update({'font.size': 18})\n", + "plt.rcParams['lines.linewidth'] = 3\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Let's manually create an array from 0-8, then square each value. \n", + "\n", + "_Can you do it with a NumPy command?_" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "x=np.array([0, 1, 2, 3, 4, 5, 6, 7, 8])\n", + "y=x**2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## We can plot the result to see the parabola using `matplotlib.pyplot`, which we shortened above to `plt`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Text(0, 0.5, 'y')" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(x, y)\n", + "plt.xlabel('x')\n", + "plt.ylabel('y')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Now, let's build a 2D array and print the result." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 2 3]\n", + " [4 5 6]\n", + " [7 8 9]]\n" + ] + } + ], + "source": [ + "A = np.array([[1, 2, 3], \n", + " [4, 5, 6],\n", + " [7, 8, 9]])\n", + "print(A)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Then, take the transpose and print it again." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[1 4 7]\n", + " [2 5 8]\n", + " [3 6 9]]\n" + ] + } + ], + "source": [ + "print(A.T)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## What's next?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/.ipynb_checkpoints/shariq_notebook-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/shariq_notebook-checkpoint.ipynb new file mode 100644 index 0000000..a8220e8 --- /dev/null +++ b/notebooks/.ipynb_checkpoints/shariq_notebook-checkpoint.ipynb @@ -0,0 +1,1922 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "zVqckeyP8wnd" + }, + "source": [ + "# Collaborative Filtering Recommender Systems for Movies" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "XvLAtMbR-IGT" + }, + "source": [ + "Download data from https://grouplens.org/datasets/movielens/100k/ " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "ghLtL-5r5n9P" + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import numpy as np" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "# get users\n", + "user_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']\n", + "users = pd.read_csv('ml-100k/u.user', sep='|', names=user_cols, encoding='latin-1')" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(943, 5)" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "kMOuE-bgaxDL", + "outputId": "a4ee4c5b-a416-4f56-a294-e3afaffbf436" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idagesexoccupationzip_code
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
3424Mtechnician43537
4533Fother15213
\n", + "
" + ], + "text/plain": [ + " user_id age sex occupation zip_code\n", + "0 1 24 M technician 85711\n", + "1 2 53 F other 94043\n", + "2 3 23 M writer 32067\n", + "3 4 24 M technician 43537\n", + "4 5 33 F other 15213" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "users.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "# get items\n", + "item_cols = ['movie_id', 'movie_title' ,'release_date','video_release_date', 'IMDb_URL', 'unknown', 'Action', 'Adventure', 'Animation', 'Children\\'s', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror', 'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']\n", + "items = pd.read_csv('ml-100k/u.item', sep='|', names=item_cols, encoding='latin-1')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1682, 24)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "items.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 360 + }, + "colab_type": "code", + "id": "XwyB1-QxY5Rt", + "outputId": "3dfc68a9-f7f3-4650-a380-7982f7ff63e0" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
movie_idmovie_titlerelease_datevideo_release_dateIMDb_URLunknownActionAdventureAnimationChildren's...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
34Get Shorty (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Get%20Shorty%...01000...0000000000
45Copycat (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Copycat%20(1995)00000...0000000100
\n", + "

5 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + " movie_id movie_title release_date video_release_date \\\n", + "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", + "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", + "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", + "3 4 Get Shorty (1995) 01-Jan-1995 NaN \n", + "4 5 Copycat (1995) 01-Jan-1995 NaN \n", + "\n", + " IMDb_URL unknown Action \\\n", + "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 \n", + "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 \n", + "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 \n", + "3 http://us.imdb.com/M/title-exact?Get%20Shorty%... 0 1 \n", + "4 http://us.imdb.com/M/title-exact?Copycat%20(1995) 0 0 \n", + "\n", + " Adventure Animation Children's ... Fantasy Film-Noir Horror Musical \\\n", + "0 0 1 1 ... 0 0 0 0 \n", + "1 1 0 0 ... 0 0 0 0 \n", + "2 0 0 0 ... 0 0 0 0 \n", + "3 0 0 0 ... 0 0 0 0 \n", + "4 0 0 0 ... 0 0 0 0 \n", + "\n", + " Mystery Romance Sci-Fi Thriller War Western \n", + "0 0 0 0 0 0 0 \n", + "1 0 0 0 1 0 0 \n", + "2 0 0 0 1 0 0 \n", + "3 0 0 0 0 0 0 \n", + "4 0 0 0 1 0 0 \n", + "\n", + "[5 rows x 24 columns]" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "items.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "AG9BVXm2dY7X" + }, + "source": [ + "ua.base -- The data sets ua.base, ua.test, ub.base, and ub.test\n", + "ua.test split the u data into a training set and a test set with\n", + "ub.base exactly 10 ratings per user in the test set. The sets\n", + "ub.test ua.test and ub.test are disjoint. These data sets can\n", + " be generated from u.data" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "urp6j4yW6OJl", + "outputId": "0907767b-a04b-4e7b-f315-e9fe0cde73a2" + }, + "outputs": [], + "source": [ + "# get base ratings\n", + "r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']\n", + "ratings_base = pd.read_csv('ml-100k/ua.base', sep='\\t', names=r_cols, encoding='latin-1')" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(90570, 4)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ratings_base.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 204 + }, + "colab_type": "code", + "id": "ScNSSMlc6aZP", + "outputId": "41f69eff-568e-4014-e2a3-3349fd545496" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_idmovie_idratingunix_timestamp
0115874965758
1123876893171
2134878542960
3143876893119
4153889751712
\n", + "
" + ], + "text/plain": [ + " user_id movie_id rating unix_timestamp\n", + "0 1 1 5 874965758\n", + "1 1 2 3 876893171\n", + "2 1 3 4 878542960\n", + "3 1 4 3 876893119\n", + "4 1 5 3 889751712" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ratings_base.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "ovjdNulP-tow" + }, + "source": [ + "### Create a pivot table from the ratings_base dataset" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "colab_type": "code", + "id": "9axc9p326fC2", + "outputId": "912bf5a5-256f-4392-8f5a-872083fe7d02" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id12345678910...934935936937938939940941942943
movie_id
15400040004...2340400500
23000000000...4000000005
34000000000...0040000000
43000005004...5000002000
53000000000...0000000000
\n", + "

5 rows × 943 columns

\n", + "
" + ], + "text/plain": [ + "user_id 1 2 3 4 5 6 7 8 9 10 ... 934 935 \\\n", + "movie_id ... \n", + "1 5 4 0 0 0 4 0 0 0 4 ... 2 3 \n", + "2 3 0 0 0 0 0 0 0 0 0 ... 4 0 \n", + "3 4 0 0 0 0 0 0 0 0 0 ... 0 0 \n", + "4 3 0 0 0 0 0 5 0 0 4 ... 5 0 \n", + "5 3 0 0 0 0 0 0 0 0 0 ... 0 0 \n", + "\n", + "user_id 936 937 938 939 940 941 942 943 \n", + "movie_id \n", + "1 4 0 4 0 0 5 0 0 \n", + "2 0 0 0 0 0 0 0 5 \n", + "3 4 0 0 0 0 0 0 0 \n", + "4 0 0 0 0 2 0 0 0 \n", + "5 0 0 0 0 0 0 0 0 \n", + "\n", + "[5 rows x 943 columns]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rtable = pd.pivot_table(ratings_base,index='movie_id', columns='user_id', values='rating',fill_value = 0)\n", + "rtable.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 136 + }, + "colab_type": "code", + "id": "pZcWfHsPF8rY", + "outputId": "5d1e01e4-16f5-42d8-ae5c-d664956d8895" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "array([[5, 4, 0, ..., 5, 0, 0],\n", + " [3, 0, 0, ..., 0, 0, 5],\n", + " [4, 0, 0, ..., 0, 0, 0],\n", + " ...,\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0],\n", + " [0, 0, 0, ..., 0, 0, 0]])" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Y\n", + "rtable.values" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "colab_type": "code", + "id": "pubVgFhXhCZu", + "outputId": "e8067e91-e44e-4a42-d279-7ecaf5c9b082" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id12345678910...934935936937938939940941942943
movie_id
11.01.00.00.00.01.00.00.00.01.0...1.01.01.00.01.00.00.01.00.00.0
21.00.00.00.00.00.00.00.00.00.0...1.00.00.00.00.00.00.00.00.01.0
31.00.00.00.00.00.00.00.00.00.0...0.00.01.00.00.00.00.00.00.00.0
41.00.00.00.00.00.01.00.00.01.0...1.00.00.00.00.00.01.00.00.00.0
51.00.00.00.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

5 rows × 943 columns

\n", + "
" + ], + "text/plain": [ + "user_id 1 2 3 4 5 6 7 8 9 10 ... 934 935 \\\n", + "movie_id ... \n", + "1 1.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0 ... 1.0 1.0 \n", + "2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 \n", + "3 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "4 1.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 ... 1.0 0.0 \n", + "5 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 \n", + "\n", + "user_id 936 937 938 939 940 941 942 943 \n", + "movie_id \n", + "1 1.0 0.0 1.0 0.0 0.0 1.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", + "3 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", + "5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", + "\n", + "[5 rows x 943 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "R = (rtable != 0)\n", + "R = R.astype(float)\n", + "R.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "iqMJX_th6Tim" + }, + "source": [ + "$$\\sum_{i:r(i, j)=1}(\\theta(j)^T x^{(i)} - y^{(i, j)} x_k^{(i)} + \\lambda \\theta_k^{(j)}$$\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "CXBaE-DzE4X7" + }, + "outputs": [], + "source": [ + "# user j rating (theta)\n", + "# item/movie i (X)\n", + "# Xk - for loop?" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "Xn015qHn-_2k" + }, + "outputs": [], + "source": [ + "def cost(params, Y, R, num_users, num_movies, num_features, lambd):\n", + " # unwrap params into X and theta\n", + " X = params[: num_movies * num_features].reshape(num_movies, num_features)\n", + " theta = params[num_movies * num_features:].reshape(num_users, num_features)\n", + " \n", + " # make prediction and compute loss\n", + " pred = np.dot(X, theta.T)\n", + " loss = (pred - Y)\n", + " # cost\n", + " J = np.sum((np.square(loss * R))) / 2\n", + " \n", + " # compute cost regularization and add to original cost\n", + " r_X = lambd / 2 * np.sum(np.square(theta))\n", + " r_Theta = lambd / 2 * np.sum(np.square(X))\n", + " J = J + r_X + r_Theta\n", + " \n", + " return J" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "-PEimp90eP0T" + }, + "outputs": [], + "source": [ + "def Gradient(params, Y, R, num_users, num_movies, num_features, lambd): \n", + " # unwrap params into X and theta\n", + " X = params[: num_movies * num_features].reshape(num_movies, num_features)\n", + " theta = params[num_movies * num_features:].reshape(num_users, num_features)\n", + "\n", + " # make prediction and compute loss\n", + " pred = np.dot(X, theta.T)\n", + " loss = (pred - Y)\n", + " \n", + " # compute gradient with regularization\n", + " X_grad = np.dot(loss * R, theta) + lambd * X\n", + " theta_grad = np.dot((loss * R).T, X) + lambd * Theta\n", + "\n", + " grad = np.squeeze(np.concatenate((X_grad.reshape([num_movies * num_features, 1], order = \"f\"),\n", + " theta_grad.reshape([num_users * num_features, 1], order = \"f\"))))\n", + "\n", + " return grad" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "colab": {}, + "colab_type": "code", + "id": "610dv4Kb2VDg" + }, + "outputs": [], + "source": [ + "def Optimize(params, Y, r, n_users, n_items, n_features, lambd, step, maxrun):\n", + " costs = []\n", + " for iter in range(maxrun):\n", + " params_prime = params\n", + " J = cost(params_prime, Y, r, n_users, n_items, n_features, lambd)\n", + " grad = Gradient(params_prime, Y, r, n_users, n_items, n_features, lambd)\n", + " params = params_prime - step * grad\n", + " # append cost value on each iteration to costs list so we can plot\n", + " costs.append(J)\n", + "\n", + " return params, costs" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "Dr5HjphVuvmf", + "outputId": "629e8371-b7fd-4c29-b0b0-0907ee7852c5" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "39380.64508844781" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "n_users = np.size(rtable, 1) # users\n", + "n_items = np.size(rtable, 0) # movies\n", + "n_features = 1 # try a small number \n", + "\n", + "# Initialization\n", + "X = np.random.normal(loc = 0.0, scale = 1.0, size = (n_items, n_features)) # i.e. n movies (items), and 1 user (feature) \n", + "Theta = np.random.normal(loc = 0.0, scale = 1.0, size = (n_users,n_features))\n", + "\n", + "init_params = np.concatenate((X.reshape(n_items * n_features, 1, order = \"F\"),\n", + " Theta.reshape(n_users * n_features, 1, order = \"F\")))\n", + "init_params = np.squeeze(init_params)\n", + "\n", + "# Optimization\n", + "lamba = 0.1\n", + "maxrun = 10000\n", + "step = 0.00001\n", + "params, J = Optimize(init_params, rtable.values, R.values, n_users, n_items, \\\n", + " n_features, lamba, step, maxrun)\n", + "J[-1]" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 269 + }, + "colab_type": "code", + "id": "Y2UQhu5z5be5", + "outputId": "e86ae0ba-863d-4cdf-ef7b-f3753049644d" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# now plot the cost\n", + "import matplotlib.pyplot as plt\n", + "plt.plot(J,\"b.\",markersize=1,label=\"Cost\") # note: this is 0-based\n", + "plt.show()\n", + " \n", + "# Extract X and Theta from params vector\n", + "X = params[0:(n_items * n_features)]\n", + "Theta = params[(n_items * n_features):len(params)]\n", + "X = X.reshape(n_items, n_features, order = \"F\")\n", + "Theta = Theta.reshape(n_users, n_features, order = \"F\")\n", + "pred = np.dot(X, Theta.T)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 51 + }, + "colab_type": "code", + "id": "o4mn6z6h7Z5w", + "outputId": "da0ef956-6f71-4069-83a9-4ec8c40d7716" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(1680, 1)\n", + "(943, 1)\n" + ] + } + ], + "source": [ + "param_X = init_params[:n_items].reshape(n_items, n_features)\n", + "print(param_X.shape)\n", + "\n", + "param_theta = init_params[n_items:].reshape(n_users, n_features)\n", + "print(param_theta.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 34 + }, + "colab_type": "code", + "id": "omeFv6DlB1OP", + "outputId": "aa4b0197-9c01-41fa-a59d-aa4c1863999c" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "(1680,)" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "init_params[Theta.shape[0]:].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 265 + }, + "colab_type": "code", + "id": "TspeeoeKeP9g", + "outputId": "69302ea4-f49d-4faa-c73f-8cd3eac01b3d" + }, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + } + ], + "source": [ + "n_users = np.size(rtable, 1)\n", + "n_items = np.size(rtable, 0)\n", + "n_features = 1 # try a small number\n", + "\n", + "# Initialization\n", + "X = np.random.normal(loc = 0.0, scale = 1.0, size = (n_items, n_features))\n", + "Theta = np.random.normal(loc = 0.0, scale = 1.0, size = (n_users,n_features))\n", + "\n", + "init_params = np.concatenate((X.reshape(n_items * n_features, 1, order = \"F\"),\n", + " Theta.reshape(n_users * n_features, 1, order = \"F\")))\n", + "init_params = np.squeeze(init_params)\n", + "\n", + "# Optimization\n", + "lamba = 0.1\n", + "maxrun = 10000\n", + "step = 0.00001\n", + "params, costs = Optimize(init_params, rtable.values, R.values, n_users, n_items, \\\n", + " n_features, lamba, step, maxrun)\n", + "# now plot the cost\n", + "import matplotlib.pyplot as plt\n", + "plt.plot(costs,\"b.\",markersize=1,label=\"Cost\") # note: this is 0-based\n", + "plt.show()\n", + " \n", + "# Extract X and Theta from params vector\n", + "X = params[0:(n_items * n_features)]\n", + "Theta = params[(n_items * n_features):len(params)]\n", + "X = X.reshape(n_items, n_features, order = \"F\")\n", + "Theta = Theta.reshape(n_users, n_features, order = \"F\")\n", + "pred = np.dot(X, Theta.T)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "colab_type": "code", + "id": "H2LA-WUBQ7h8", + "outputId": "d9d95c2b-3820-483d-edbd-5d0598055947" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id12345678910...934935936937938939940941942943
movie_id
13.9814973.9714023.3750804.8024573.4408053.6530024.3131274.1945504.1854074.204742...3.8528114.4494584.2433443.4480893.9653825.1527493.5451094.0742174.3559603.809908
23.3611143.3525912.8491874.0541542.9046703.0838033.6410703.5409693.5332513.549573...3.2524793.7561583.5821612.9108203.3475104.3498652.9927223.4393863.6772293.216261
33.1798983.1718352.6955723.8355732.7480642.9175393.4447603.3500573.3427543.358197...3.0771203.5536443.3890272.7538823.1670284.1153412.8313683.2539513.4789703.042855
43.6460873.6368413.0907564.3978863.1509433.3452643.9497783.8411913.8328183.850524...3.5282414.0746253.8858753.1576143.6313294.7186693.2464603.7309953.9890043.488952
53.3685843.3600422.8555194.0631642.9111253.0906573.6491623.5488393.5411033.557462...3.2597073.7645063.5901222.9172893.3549494.3595322.9993733.4470303.6854013.223409
\n", + "

5 rows × 943 columns

\n", + "
" + ], + "text/plain": [ + "user_id 1 2 3 4 5 6 \\\n", + "movie_id \n", + "1 3.981497 3.971402 3.375080 4.802457 3.440805 3.653002 \n", + "2 3.361114 3.352591 2.849187 4.054154 2.904670 3.083803 \n", + "3 3.179898 3.171835 2.695572 3.835573 2.748064 2.917539 \n", + "4 3.646087 3.636841 3.090756 4.397886 3.150943 3.345264 \n", + "5 3.368584 3.360042 2.855519 4.063164 2.911125 3.090657 \n", + "\n", + "user_id 7 8 9 10 ... 934 935 \\\n", + "movie_id ... \n", + "1 4.313127 4.194550 4.185407 4.204742 ... 3.852811 4.449458 \n", + "2 3.641070 3.540969 3.533251 3.549573 ... 3.252479 3.756158 \n", + "3 3.444760 3.350057 3.342754 3.358197 ... 3.077120 3.553644 \n", + "4 3.949778 3.841191 3.832818 3.850524 ... 3.528241 4.074625 \n", + "5 3.649162 3.548839 3.541103 3.557462 ... 3.259707 3.764506 \n", + "\n", + "user_id 936 937 938 939 940 941 \\\n", + "movie_id \n", + "1 4.243344 3.448089 3.965382 5.152749 3.545109 4.074217 \n", + "2 3.582161 2.910820 3.347510 4.349865 2.992722 3.439386 \n", + "3 3.389027 2.753882 3.167028 4.115341 2.831368 3.253951 \n", + "4 3.885875 3.157614 3.631329 4.718669 3.246460 3.730995 \n", + "5 3.590122 2.917289 3.354949 4.359532 2.999373 3.447030 \n", + "\n", + "user_id 942 943 \n", + "movie_id \n", + "1 4.355960 3.809908 \n", + "2 3.677229 3.216261 \n", + "3 3.478970 3.042855 \n", + "4 3.989004 3.488952 \n", + "5 3.685401 3.223409 \n", + "\n", + "[5 rows x 943 columns]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# make a prediction\n", + "pred_df = pd.DataFrame(pred, columns = rtable.columns, index = rtable.index)\n", + "pred_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 284 + }, + "colab_type": "code", + "id": "RMdtfvpCjnXz", + "outputId": "7c0c2f7b-d596-4f7b-e418-28d0aaf13f35" + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
user_id12345678910...934935936937938939940941942943
movie_id
14435344444...4443454444
23334334444...3443343343
33334333333...3433343333
44434334444...4443453443
53334334444...3443343343
\n", + "

5 rows × 943 columns

\n", + "
" + ], + "text/plain": [ + "user_id 1 2 3 4 5 6 7 8 9 10 ... 934 935 \\\n", + "movie_id ... \n", + "1 4 4 3 5 3 4 4 4 4 4 ... 4 4 \n", + "2 3 3 3 4 3 3 4 4 4 4 ... 3 4 \n", + "3 3 3 3 4 3 3 3 3 3 3 ... 3 4 \n", + "4 4 4 3 4 3 3 4 4 4 4 ... 4 4 \n", + "5 3 3 3 4 3 3 4 4 4 4 ... 3 4 \n", + "\n", + "user_id 936 937 938 939 940 941 942 943 \n", + "movie_id \n", + "1 4 3 4 5 4 4 4 4 \n", + "2 4 3 3 4 3 3 4 3 \n", + "3 3 3 3 4 3 3 3 3 \n", + "4 4 3 4 5 3 4 4 3 \n", + "5 4 3 3 4 3 3 4 3 \n", + "\n", + "[5 rows x 943 columns]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# convert the ratings to numbers numbers 1-5\n", + "pd.options.display.float_format = '{:,.0f}'.format\n", + "pred_df.head()" + ] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "group_4_assignment_7.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/notebooks/abbash_notebook.ipynb b/notebooks/abbash_notebook.ipynb index b56b6ad..0a4be93 100644 --- a/notebooks/abbash_notebook.ipynb +++ b/notebooks/abbash_notebook.ipynb @@ -2,21 +2,118 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import scipy.fftpack\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Numpy Tutorials: Fast Fourier Transform: The First Three Natural Frequencies" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### What is Fast Fourier Transform (FFT)?" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + " A Fast Fourier Transform is a fundamental concept in the world of engineering. It is specifically used in the field of vibrations and measuring frequencies of various devices. FFT is primarily used to compute discrete functions, such as trigonometric functions, time it takes to complete a cycle, etc. Whereas, the FFT utilizes signals of any device/function and converts them from time domains into frequency domains. As a result, we are able to associate frequencies to the devices at certain vibrations at certain times. In turn, the correlated frequencies are considered to be \"natural frequencies\" due to the vibrations being unforced. \n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Example 1: Associating Natural Frequencies to a Sine Function" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "For the first example of setting up a Fast Fourier Transform, we will take a look into a sin wave and generate it's natural frequencies at each peak.\n", + " We will define a sin wave in terms of frequency, a certain value of samples over a certain time frame, in this case a 100 Hz wave frequency over a 10 second period:" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [], + "source": [ + "N_freq = 100 #sample size of frequencies, in terms of Hertz\n", + "time = 10 #duration of sin function, in terms of seconds\n", + "wave_freq = N_freq * time #this outputs a wave frequency over the given time frame\n", + "\n", + "#define a sine wave function in terms of the listed variables\n", + "def sine_function(frequency, N_freq, time):\n", + " x = np.linspace(0, time, wave_freq) \n", + " y = np.sin((2 * np.pi * x)) #sine function that utilizes the variables associated with 'x'\n", + " return x,y" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once defining the sine wave function that implements frequency over a certain period of time, we can proceed to graph this data to output a sine wave graph:" + ] + }, + { + "cell_type": "code", + "execution_count": 48, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "hello\n" - ] + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" } ], "source": [ - "print('hello')" + "x, y = sine_function(100, N_freq, time) #x and y variables are 'returned' when we define the function\n", + "plt.title('Sine Wave Graph')\n", + "plt.plot(x, y)\n", + "plt.xlabel('Time (seconds)')\n", + "plt.ylabel('frequency (Hz)')\n", + "plt.grid('True')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now that we have created a sine wave, we can generate a code to convert this function to output corresponding natural frequencies:" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": null, diff --git a/notebooks/shariq_notebook.ipynb b/notebooks/shariq_notebook.ipynb index a8220e8..bdf9ddc 100644 --- a/notebooks/shariq_notebook.ipynb +++ b/notebooks/shariq_notebook.ipynb @@ -1914,7 +1914,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.4" + "version": "3.8.3" } }, "nbformat": 4,