-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
158 lines (134 loc) · 6.13 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import streamlit as st
import pandas as pd
import requests
import ast
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
TMDB_API_KEY = "da7812d5a36a96ec885b30dd3fcffe79"
BASE_POSTER_URL = "https://image.tmdb.org/t/p/w500"
# Load the datasets
movies_metadata = pd.read_csv('tmdb_5000_movies.csv')
credits = pd.read_csv('tmdb_5000_credits.csv')
# Convert the 'id' and 'movie_id' columns to strings, filling NaN values with a placeholder like an empty string
movies_metadata['id'] = movies_metadata['id'].astype(str).fillna('')
credits['movie_id'] = credits['movie_id'].astype(str).fillna('')
# Merge the datasets on the 'movie_id' column
movies = pd.merge(movies_metadata, credits, left_on='id', right_on='movie_id')
# Select important columns
movies = movies[["movie_id", 'original_title', 'overview', 'genres', 'cast', 'crew', 'vote_average']]
print(movies['original_title'])
# Handle missing values (e.g., filling NaNs with an empty string)
movies['overview'] = movies['overview'].fillna('')
# Convert 'genres', 'cast', and 'crew' from stringified lists to actual lists
movies['genres'] = movies['genres'].apply(lambda x: [i['name'] for i in ast.literal_eval(x)] if isinstance(x, str) else [])
movies['cast'] = movies['cast'].apply(lambda x: [i['name'] for i in ast.literal_eval(x)[:3]] if isinstance(x, str) else []) # Get top 3 cast members
movies['crew'] = movies['crew'].apply(lambda x: [i['name'] for i in ast.literal_eval(x) if i['job'] == 'Director'] if isinstance(x, str) else [])
# Create the TF-IDF matrix for 'overview'
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(movies['overview'])
# Calculate cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Function to get movie recommendations
def get_recommendations(title, cosine_sim=cosine_sim):
title = title.lower()
idx = movies[movies['original_title'].str.lower() == title].index[0] # Get the index of the movie
sim_scores = list(enumerate(cosine_sim[idx])) # Get similarity scores
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True) # Sort by similarity
sim_scores = sim_scores[1:num_recommendations + 1] # Get the top 10 similar movies
movie_indices = [i[0] for i in sim_scores] # Get the indices of these movies
if show_genres:
recommended_movies = movies[['original_title', 'genres', 'overview', 'vote_average']].iloc[movie_indices].to_dict('records')
else:
recommended_movies = movies[['original_title', 'overview', 'vote_average']].iloc[movie_indices].to_dict('records')
# Return the titles and additional info (e.g., genre, vote average)
return recommended_movies
def fetch_poster_from_tmdb(movie_title):
"""
Fetch the movie poster URL from TMDB API using the movie title.
"""
search_url = f"https://api.themoviedb.org/3/search/movie"
params = {
"api_key": TMDB_API_KEY,
"query": movie_title
}
response = requests.get(search_url, params=params)
if response.status_code == 200:
data = response.json()
if data['results']:
poster_path = data['results'][0].get('poster_path', None)
if poster_path:
return f"{BASE_POSTER_URL}{poster_path}"
return "https://via.placeholder.com/500x750?text=No+Image" # Placeholder for missing images
# Streamlit app
page = st.sidebar.radio("Navigation", ["Home", "Recommendations"])
st.markdown(
"""
<style>
.title {
text-align: center;
font-size: 36px;
font-weight: bold;
font-family: monospace;
color: #63bef2;
background-color: white;
}
.movie-container {
border: 5px solid #ddd;
border-radius: 8px;
border-color: #63bef2;
padding: 15px;
margin-bottom: 10px;
background-color: #f9f9f9;
}
</style>
""",
unsafe_allow_html=True
)
if page == "Home":
# Home Page: Display popular movies
st.title("Welcome to the Movie Recommendation System")
st.subheader("Popular Movies")
popular_movies = [
"Inception", "The Dark Knight", "Avatar", "Interstellar", "Titanic",
"Inside Out", "Iron Man", "Frozen", "The Avengers", "The Matrix",
"Pulp Fiction", "Forrest Gump", "The Lion King", "Toy Story", "Shrek"
]
num_columns = 5
rows = [popular_movies[i:i + num_columns] for i in range(0, len(popular_movies), num_columns)]
for row in rows:
cols = st.columns(num_columns)
for col, title in zip(cols, row):
poster_url = fetch_poster_from_tmdb(title)
col.image(poster_url, caption=title, use_container_width=True)
# Input from the user
elif page == "Recommendations":
with st.sidebar:
st.header("Settings")
num_recommendations = st.slider("Number of Recommendations", 5, 20, 10) # Default is 10
show_genres = st.checkbox("Show Genres", value=True)
# Recommendations Page
st.title("Movie Recommendations")
st.subheader("Enter a Movie Title")
movie = st.text_input("Movie Title")
if movie:
try:
recommendations = get_recommendations(movie)
for rec in recommendations:
poster_url = fetch_poster_from_tmdb(rec['original_title'])
st.markdown(
f"""
<div class="movie-container">
<img src="{poster_url}" style="width:200px; float:left; margin-right:20px; border-radius:8px;">
<h3 style="margin-bottom: 5px;"><strong>🎬 {rec['original_title']}</strong></h3>
<p>{"<b>Genres:</b> " + ", ".join(rec['genres']) if show_genres else ""}</p>
<p><b>Overview:</b> {rec['overview']}</p>
<p><b>Rating:</b> ⭐ {rec['vote_average']}</p>
<div style="clear:both;"></div>
</div>
""",
unsafe_allow_html=True
)
except IndexError:
st.error("Movie not found in dataset.")
else:
st.info("Enter a movie title to get recommendations.")