-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
This recommendation system recommends movies based on the movie type and genre.
- Loading branch information
0 parents
commit 8bf5514
Showing
8 changed files
with
881 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,173 @@ | ||
import streamlit as st | ||
from PIL import Image | ||
import json | ||
from Classifier import KNearestNeighbours | ||
from bs4 import BeautifulSoup | ||
import requests, io | ||
import PIL.Image | ||
from urllib.request import urlopen | ||
|
||
with open('./Data/movie_data.json', 'r+', encoding='utf-8') as f: | ||
data = json.load(f) | ||
with open('./Data/movie_titles.json', 'r+', encoding='utf-8') as f: | ||
movie_titles = json.load(f) | ||
hdr = {'User-Agent': 'Mozilla/5.0'} | ||
|
||
|
||
def movie_poster_fetcher(imdb_link): | ||
## Display Movie Poster | ||
url_data = requests.get(imdb_link, headers=hdr).text | ||
s_data = BeautifulSoup(url_data, 'html.parser') | ||
imdb_dp = s_data.find("meta", property="og:image1") | ||
movie_poster_link = imdb_dp.attrs['content'] | ||
u = urlopen(movie_poster_link) | ||
raw_data = u.read() | ||
image = PIL.Image.open(io.BytesIO(raw_data)) | ||
image = image.resize((158, 301), ) | ||
st.image(image, use_column_width=False) | ||
|
||
|
||
def get_movie_info(imdb_link): | ||
url_data = requests.get(imdb_link, headers=hdr).text | ||
s_data = BeautifulSoup(url_data, 'html.parser') | ||
imdb_content = s_data.find("meta", property="og:description") | ||
movie_descr = imdb_content.attrs['content'] | ||
movie_descr = str(movie_descr).split('.') | ||
movie_director = movie_descr[0] | ||
movie_cast = str(movie_descr[1]).replace('With', 'Cast: ').strip() | ||
movie_story = 'Story: ' + str(movie_descr[2]).strip() + '.' | ||
rating = s_data.find("span", class_="sc-bde20123-1 iZlgcd").text | ||
movie_rating = 'Total Rating count: ' + str(rating) | ||
return movie_director, movie_cast, movie_story, movie_rating | ||
|
||
|
||
def KNN_Movie_Recommender(test_point, k): | ||
# Create dummy target variable for the KNN Classifier | ||
target = [0 for item in movie_titles] | ||
# Instantiate object for the Classifier | ||
model = KNearestNeighbours(data, target, test_point, k=k) | ||
# Run the algorithm | ||
model.fit() | ||
# Print list of 10 recommendations < Change value of k for a different number > | ||
table = [] | ||
for i in model.indices: | ||
# Returns back movie title and imdb link | ||
table.append([movie_titles[i][0], movie_titles[i][2], data[i][-1]]) | ||
print(table) | ||
return table | ||
|
||
|
||
st.set_page_config( | ||
page_title="Movie Recommender System", | ||
) | ||
|
||
|
||
def run(): | ||
img1 = Image.open('./meta/logo.jpg') | ||
img1 = img1.resize((250, 250), ) | ||
st.image(img1, use_column_width=False) | ||
st.title("Movie Recommender System") | ||
st.markdown('''<h4 style='text-align: left; color: #d73b5c;'>* Data is based "IMDB 5000 Movie Dataset"</h4>''', | ||
unsafe_allow_html=True) | ||
genres = ['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Family', | ||
'Fantasy', 'Film-Noir', 'Game-Show', 'History', 'Horror', 'Music', 'Musical', 'Mystery', 'News', | ||
'Reality-TV', 'Romance', 'Sci-Fi', 'Short', 'Sport', 'Thriller', 'War', 'Western'] | ||
movies = [title[0] for title in movie_titles] | ||
category = ['--Select--', 'Movie based', 'Genre based'] | ||
cat_op = st.selectbox('Select Recommendation Type', category) | ||
if cat_op == category[0]: | ||
st.warning('Please select Recommendation Type!!') | ||
elif cat_op == category[1]: | ||
select_movie = st.selectbox('Select movie: (Recommendation will be based on this selection)', | ||
['--Select--'] + movies) | ||
dec = st.radio("Want to Fetch Movie Poster?", ('Yes', 'No')) | ||
st.markdown( | ||
'''<h4 style='text-align: left; color: #d73b5c;'>* Fetching a Movie Posters will take a time."</h4>''', | ||
unsafe_allow_html=True) | ||
if dec == 'No': | ||
if select_movie == '--Select--': | ||
st.warning('Please select Movie!!') | ||
else: | ||
no_of_reco = st.slider('Number of movies you want Recommended:', min_value=5, max_value=20, step=1) | ||
genres = data[movies.index(select_movie)] | ||
test_points = genres | ||
table = KNN_Movie_Recommender(test_points, no_of_reco + 1) | ||
table.pop(0) | ||
c = 0 | ||
st.success('Some of the movies from our Recommendation, have a look below') | ||
for movie, link, ratings in table: | ||
c += 1 | ||
director, cast, story, total_rat = get_movie_info(link) | ||
st.markdown(f"({c})[ {movie}]({link})") | ||
st.markdown(director) | ||
st.markdown(cast) | ||
st.markdown(story) | ||
st.markdown(total_rat) | ||
st.markdown('IMDB Rating: ' + str(ratings) + '⭐') | ||
else: | ||
if select_movie == '--Select--': | ||
st.warning('Please select Movie!!') | ||
else: | ||
no_of_reco = st.slider('Number of movies you want Recommended:', min_value=5, max_value=20, step=1) | ||
genres = data[movies.index(select_movie)] | ||
test_points = genres | ||
table = KNN_Movie_Recommender(test_points, no_of_reco + 1) | ||
table.pop(0) | ||
c = 0 | ||
st.success('Some of the movies from our Recommendation, have a look below') | ||
for movie, link, ratings in table: | ||
c += 1 | ||
st.markdown(f"({c})[ {movie}]({link})") | ||
movie_poster_fetcher(link) | ||
director, cast, story, total_rat = get_movie_info(link) | ||
st.markdown(director) | ||
st.markdown(cast) | ||
st.markdown(story) | ||
st.markdown(total_rat) | ||
st.markdown('IMDB Rating: ' + str(ratings) + '⭐') | ||
elif cat_op == category[2]: | ||
sel_gen = st.multiselect('Select Genres:', genres) | ||
dec = st.radio("Want to Fetch Movie Poster?", ('Yes', 'No')) | ||
st.markdown( | ||
'''<h4 style='text-align: left; color: #d73b5c;'>* Fetching a Movie Posters will take a time."</h4>''', | ||
unsafe_allow_html=True) | ||
if dec == 'No': | ||
if sel_gen: | ||
imdb_score = st.slider('Choose IMDb score:', 1, 10, 8) | ||
no_of_reco = st.number_input('Number of movies:', min_value=5, max_value=20, step=1) | ||
test_point = [1 if genre in sel_gen else 0 for genre in genres] | ||
test_point.append(imdb_score) | ||
table = KNN_Movie_Recommender(test_point, no_of_reco) | ||
c = 0 | ||
st.success('Some of the movies from our Recommendation, have a look below') | ||
for movie, link, ratings in table: | ||
c += 1 | ||
st.markdown(f"({c})[ {movie}]({link})") | ||
director, cast, story, total_rat = get_movie_info(link) | ||
st.markdown(director) | ||
st.markdown(cast) | ||
st.markdown(story) | ||
st.markdown(total_rat) | ||
st.markdown('IMDB Rating: ' + str(ratings) + '⭐') | ||
else: | ||
if sel_gen: | ||
imdb_score = st.slider('Choose IMDb score:', 1, 10, 8) | ||
no_of_reco = st.number_input('Number of movies:', min_value=5, max_value=20, step=1) | ||
test_point = [1 if genre in sel_gen else 0 for genre in genres] | ||
test_point.append(imdb_score) | ||
table = KNN_Movie_Recommender(test_point, no_of_reco) | ||
c = 0 | ||
st.success('Some of the movies from our Recommendation, have a look below') | ||
for movie, link, ratings in table: | ||
c += 1 | ||
st.markdown(f"({c})[ {movie}]({link})") | ||
movie_poster_fetcher(link) | ||
director, cast, story, total_rat = get_movie_info(link) | ||
st.markdown(director) | ||
st.markdown(cast) | ||
st.markdown(story) | ||
st.markdown(total_rat) | ||
st.markdown('IMDB Rating: ' + str(ratings) + '⭐') | ||
|
||
|
||
run() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
import numpy as np | ||
from operator import itemgetter | ||
|
||
|
||
class KNearestNeighbours: | ||
def __init__(self, data, target, test_point, k): | ||
self.data = data | ||
self.target = target | ||
self.test_point = test_point | ||
self.k = k | ||
self.distances = list() | ||
self.categories = list() | ||
self.indices = list() | ||
self.counts = list() | ||
self.category_assigned = None | ||
|
||
@staticmethod | ||
def dist(p1, p2): | ||
"""Method returns the euclidean distance between two points""" | ||
return np.linalg.norm(np.array(p1) - np.array(p2)) | ||
|
||
def fit(self): | ||
"""Method that performs the KNN classification""" | ||
# Create a list of (distance, index) tuples from the test point to each point in the data | ||
self.distances.extend([(self.dist(self.test_point, point), i) for point, i in zip(self.data, [i for i in range(len(self.data))])]) | ||
# Sort the distances in ascending order | ||
sorted_li = sorted(self.distances, key=itemgetter(0)) | ||
# Fetch the indices of the k nearest point from the data | ||
self.indices.extend([index for (val, index) in sorted_li[:self.k]]) | ||
# Fetch the categories from the train data target | ||
for i in self.indices: | ||
self.categories.append(self.target[i]) | ||
# Fetch the count for each category from the K nearest neighbours | ||
self.counts.extend([(i, self.categories.count(i)) for i in set(self.categories)]) | ||
# Find the highest repeated category among the K nearest neighbours | ||
self.category_assigned = sorted(self.counts, key=itemgetter(1), reverse=True)[0][0] |
Oops, something went wrong.