# -*- coding: utf-8 -*- """movie recommendation system.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1iy9ZFUvOu-sz4vP4--9-dr-T-8LMZNOP """ import numpy as np import pandas as pd import difflib from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity movies_df = pd.read_csv('/content/movies.csv') movies_df.head() movies_df.shape selected_features = ['genres', 'keywords', 'tagline', 'cast','director'] print(selected_features) for feature in selected_features: movies_df[feature]=movies_df[feature].fillna('') combined_features = movies_df['genres']+' '+movies_df['keywords']+' '+movies_df['tagline']+' '+movies_df['cast']+' '+movies_df['director'] print(combined_features) vectorizer = TfidfVectorizer() feature_vectors = vectorizer.fit_transform(combined_features) print(feature_vectors) similarity = cosine_similarity(feature_vectors) print(similarity) print(similarity.shape) movie_name = input(' Enter name of the movie: ') list_of_all_titles = movies_df['title'].tolist() new_var = print(list_of_all_titles) find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles) print(find_close_match) close_match = find_close_match[0] print(close_match) index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0] print(index_of_the_movie) similarity_score = list(enumerate(similarity[index_of_the_movie])) print(similarity_score) len(similarity_score) sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) print(sorted_similar_movies) print('suggesting movies for you : \n') i = 1 for movie in sorted_similar_movies: index = movie[0] title_from_index = movies_df[movies_df.index==index]['title'].values[0] if (i<30): print(i, '.',title_from_index) i+=1 """movie recommendation system """ movie_name = input(' which is your favourite movie : ') list_of_all_titles = movies_df['title'].tolist() find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles) close_match = find_close_match[0] index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0] similarity_score = list(enumerate(similarity[index_of_the_movie])) sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) print('suggesting movies for you : \n') i = 1 for movie in sorted_similar_movies: index = movie[0] title_from_index = movies_df[movies_df.index==index]['title'].values[0] if (i<30): print(i, '.',title_from_index) i+=1