tanya17 commited on
Commit
322ecf7
·
verified ·
1 Parent(s): 8550192
Files changed (1) hide show
  1. movie_recommendation_system.py +105 -0
movie_recommendation_system.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """movie recommendation system.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1iy9ZFUvOu-sz4vP4--9-dr-T-8LMZNOP
8
+ """
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import difflib
13
+ from sklearn.feature_extraction.text import TfidfVectorizer
14
+ from sklearn.metrics.pairwise import cosine_similarity
15
+
16
+ movies_df = pd.read_csv('/content/movies.csv')
17
+
18
+ movies_df.head()
19
+
20
+ movies_df.shape
21
+
22
+ selected_features = ['genres', 'keywords', 'tagline', 'cast','director']
23
+
24
+ print(selected_features)
25
+
26
+ for feature in selected_features:
27
+ movies_df[feature]=movies_df[feature].fillna('')
28
+
29
+ combined_features = movies_df['genres']+' '+movies_df['keywords']+' '+movies_df['tagline']+' '+movies_df['cast']+' '+movies_df['director']
30
+
31
+ print(combined_features)
32
+
33
+ vectorizer = TfidfVectorizer()
34
+
35
+ feature_vectors = vectorizer.fit_transform(combined_features)
36
+
37
+ print(feature_vectors)
38
+
39
+ similarity = cosine_similarity(feature_vectors)
40
+
41
+ print(similarity)
42
+
43
+ print(similarity.shape)
44
+
45
+ movie_name = input(' Enter name of the movie: ')
46
+
47
+ list_of_all_titles = movies_df['title'].tolist()
48
+ new_var = print(list_of_all_titles)
49
+
50
+ find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
51
+ print(find_close_match)
52
+
53
+ close_match = find_close_match[0]
54
+ print(close_match)
55
+
56
+ index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
57
+ print(index_of_the_movie)
58
+
59
+ similarity_score = list(enumerate(similarity[index_of_the_movie]))
60
+ print(similarity_score)
61
+
62
+ len(similarity_score)
63
+
64
+ sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
65
+ print(sorted_similar_movies)
66
+
67
+ print('suggesting movies for you : \n')
68
+
69
+ i = 1
70
+
71
+ for movie in sorted_similar_movies:
72
+ index = movie[0]
73
+ title_from_index = movies_df[movies_df.index==index]['title'].values[0]
74
+ if (i<30):
75
+ print(i, '.',title_from_index)
76
+ i+=1
77
+
78
+ """movie recommendation system
79
+
80
+ """
81
+
82
+ movie_name = input(' which is your favourite movie : ')
83
+
84
+ list_of_all_titles = movies_df['title'].tolist()
85
+
86
+ find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)
87
+
88
+ close_match = find_close_match[0]
89
+
90
+ index_of_the_movie = movies_df[movies_df.title == close_match]['index'].values[0]
91
+
92
+ similarity_score = list(enumerate(similarity[index_of_the_movie]))
93
+
94
+ sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True)
95
+
96
+ print('suggesting movies for you : \n')
97
+
98
+ i = 1
99
+
100
+ for movie in sorted_similar_movies:
101
+ index = movie[0]
102
+ title_from_index = movies_df[movies_df.index==index]['title'].values[0]
103
+ if (i<30):
104
+ print(i, '.',title_from_index)
105
+ i+=1