Spaces:
Sleeping
Sleeping
app.py
Browse files- movie_recommendation_system.py +105 -0
movie_recommendation_system.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""movie recommendation system.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1iy9ZFUvOu-sz4vP4--9-dr-T-8LMZNOP
|
8 |
+
"""
|
9 |
+
|
10 |
+
import numpy as np
|
11 |
+
import pandas as pd
|
12 |
+
import difflib
|
13 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
14 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
15 |
+
|
16 |
+
movies_df = pd.read_csv('/content/movies.csv')
|
17 |
+
|
18 |
+
movies_df.head()
|
19 |
+
|
20 |
+
movies_df.shape
|
21 |
+
|
22 |
+
selected_features = ['genres', 'keywords', 'tagline', 'cast','director']
|
23 |
+
|
24 |
+
print(selected_features)
|
25 |
+
|
26 |
+
for feature in selected_features:
|
27 |
+
movies_df[feature]=movies_df[feature].fillna('')
|
28 |
+
|
29 |
+
combined_features = movies_df['genres']+' '+movies_df['keywords']+' '+movies_df['tagline']+' '+movies_df['cast']+' '+movies_df['director']
|
30 |
+
|
31 |
+
print(combined_features)
|
32 |
+
|
33 |
+
vectorizer = TfidfVectorizer()
|
34 |
+
|
35 |
+
feature_vectors = vectorizer.fit_transform(combined_features)
|
36 |
+
|
37 |
+
print(feature_vectors)
|
38 |
+
|
39 |
+
similarity = cosine_similarity(feature_vectors)
|
40 |
+
|
41 |
+
print(similarity)
|
42 |
+
|
43 |
+
print(similarity.shape)
|
44 |
+
|
45 |
+
movie_name = input(' Enter name of the movie: ')
|
46 |
+
|
47 |
+
list_of_all_titles = movies_df['title'].tolist()
|
48 |
+
new_var = print(list_of_all_titles)
|
49 |
+
|
50 |
+
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
|
51 |
+
print(find_close_match)
|
52 |
+
|
53 |
+
close_match = find_close_match[0]
|
54 |
+
print(close_match)
|
55 |
+
|
56 |
+
index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
|
57 |
+
print(index_of_the_movie)
|
58 |
+
|
59 |
+
similarity_score = list(enumerate(similarity[index_of_the_movie]))
|
60 |
+
print(similarity_score)
|
61 |
+
|
62 |
+
len(similarity_score)
|
63 |
+
|
64 |
+
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
|
65 |
+
print(sorted_similar_movies)
|
66 |
+
|
67 |
+
print('suggesting movies for you : \n')
|
68 |
+
|
69 |
+
i = 1
|
70 |
+
|
71 |
+
for movie in sorted_similar_movies:
|
72 |
+
index = movie[0]
|
73 |
+
title_from_index = movies_df[movies_df.index==index]['title'].values[0]
|
74 |
+
if (i<30):
|
75 |
+
print(i, '.',title_from_index)
|
76 |
+
i+=1
|
77 |
+
|
78 |
+
"""movie recommendation system
|
79 |
+
|
80 |
+
"""
|
81 |
+
|
82 |
+
movie_name = input(' which is your favourite movie : ')
|
83 |
+
|
84 |
+
list_of_all_titles = movies_df['title'].tolist()
|
85 |
+
|
86 |
+
find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
|
87 |
+
|
88 |
+
close_match = find_close_match[0]
|
89 |
+
|
90 |
+
index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
|
91 |
+
|
92 |
+
similarity_score = list(enumerate(similarity[index_of_the_movie]))
|
93 |
+
|
94 |
+
sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
|
95 |
+
|
96 |
+
print('suggesting movies for you : \n')
|
97 |
+
|
98 |
+
i = 1
|
99 |
+
|
100 |
+
for movie in sorted_similar_movies:
|
101 |
+
index = movie[0]
|
102 |
+
title_from_index = movies_df[movies_df.index==index]['title'].values[0]
|
103 |
+
if (i<30):
|
104 |
+
print(i, '.',title_from_index)
|
105 |
+
i+=1
|