tanya17's picture
Rename movie_recommendation_system.py to app.py
48f1fc7 verified
raw
history blame
2.62 kB
# -*- coding: utf-8 -*-
"""movie recommendation system.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1iy9ZFUvOu-sz4vP4--9-dr-T-8LMZNOP
"""
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
movies_df = pd.read_csv('/content/movies.csv')
movies_df.head()
movies_df.shape
selected_features = ['genres', 'keywords', 'tagline', 'cast','director']
print(selected_features)
for feature in selected_features:
movies_df[feature]=movies_df[feature].fillna('')
combined_features = movies_df['genres']+' '+movies_df['keywords']+' '+movies_df['tagline']+' '+movies_df['cast']+' '+movies_df['director']
print(combined_features)
vectorizer = TfidfVectorizer()
feature_vectors = vectorizer.fit_transform(combined_features)
print(feature_vectors)
similarity = cosine_similarity(feature_vectors)
print(similarity)
print(similarity.shape)
movie_name = input(' Enter name of the movie: ')
list_of_all_titles = movies_df['title'].tolist()
new_var = print(list_of_all_titles)
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
print(find_close_match)
close_match = find_close_match[0]
print(close_match)
index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
print(index_of_the_movie)
similarity_score = list(enumerate(similarity[index_of_the_movie]))
print(similarity_score)
len(similarity_score)
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print(sorted_similar_movies)
print('suggesting movies for you : \n')
i = 1
for movie in sorted_similar_movies:
index = movie[0]
title_from_index = movies_df[movies_df.index==index]['title'].values[0]
if (i<30):
print(i, '.',title_from_index)
i+=1
"""movie recommendation system
"""
movie_name = input(' which is your favourite movie : ')
list_of_all_titles = movies_df['title'].tolist()
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
close_match = find_close_match[0]
index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
similarity_score = list(enumerate(similarity[index_of_the_movie]))
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
print('suggesting movies for you : \n')
i = 1
for movie in sorted_similar_movies:
index = movie[0]
title_from_index = movies_df[movies_df.index==index]['title'].values[0]
if (i<30):
print(i, '.',title_from_index)
i+=1