Tesneem commited on
Commit
3331cdd
1 Parent(s): 61804bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +112 -78
app.py CHANGED
@@ -1,22 +1,76 @@
1
- import gradio as gr
2
-
3
- # def greet(name):
4
- # return "Hello " + name + "!!"
5
- from sentence_transformers import SentenceTransformer
6
  import numpy as np
 
7
  from sklearn.metrics.pairwise import cosine_similarity
8
- from datasets import load_dataset
9
- # Load pre-trained SentenceTransformer model
10
- embedding_model = SentenceTransformer("thenlper/gte-large")
11
 
12
- # # Example dataset with genres (replace with your actual data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # dataset = load_dataset("hugginglearners/netflix-shows")
14
- # dataset = dataset.filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None)
15
- # data = dataset['train'] # Accessing the 'train' split of the dataset
16
 
17
- # # Convert the dataset to a list of dictionaries for easier indexing
18
- # data_list = list[data]
19
- # print(data_list)
20
  # # Combine description and genre for embedding
21
  # def combine_description_title_and_genre(description, listed_in, title):
22
  # return f"{description} Genre: {listed_in} Title: {title}"
@@ -29,80 +83,60 @@ embedding_model = SentenceTransformer("thenlper/gte-large")
29
  # def vector_search(query):
30
  # query_embedding = get_embedding(query)
31
 
32
- # # Generate embeddings for the combined description and genre
33
- # embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]])
34
-
35
- # # Calculate cosine similarity between the query and all embeddings
36
- # similarities = cosine_similarity([query_embedding], embeddings)
37
- # Load dataset (using the correct dataset identifier for your case)
38
- dataset = load_dataset("hugginglearners/netflix-shows")
39
 
40
- # Combine description and genre for embedding
41
- def combine_description_title_and_genre(description, listed_in, title):
42
- return f"{description} Genre: {listed_in} Title: {title}"
43
 
44
- # Generate embedding for the query
45
- def get_embedding(text):
46
- return embedding_model.encode(text)
47
 
48
- # Vector search function
49
- def vector_search(query):
50
- query_embedding = get_embedding(query)
51
-
52
- # Function to generate embeddings for each item in the dataset
53
- def generate_embeddings(example):
54
- return {
55
- 'embedding': get_embedding(combine_description_title_and_genre(example["description"], example["listed_in"], example["title"]))
56
- }
57
-
58
- # Generate embeddings for the dataset using map
59
- embeddings_dataset = dataset["train"].map(generate_embeddings)
60
-
61
- # Extract embeddings
62
- embeddings = np.array([embedding['embedding'] for embedding in embeddings_dataset])
63
-
64
- # Calculate cosine similarity between the query and all embeddings
65
- similarities = cosine_similarity([query_embedding], embeddings)
66
- # # Adjust similarity scores based on ratings
67
- # ratings = np.array([item["rating"] for item in data_list])
68
- # adjusted_similarities = similarities * ratings.reshape(-1, 1)
69
-
70
- # Get top N most similar items (e.g., top 3)
71
- top_n = 3
72
- top_indices = similarities[0].argsort()[-top_n:][::-1] # Get indices of the top N results
73
- top_items = [dataset["train"][i] for i in top_indices]
74
 
75
- # Format the output for display
76
- search_result = ""
77
- for item in top_items:
78
- search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}\n"
79
 
80
- return search_result
81
 
82
- # Gradio Interface
83
- def movie_search(query):
84
- return vector_search(query)
85
- with gr.Blocks() as demo:
86
- gr.Markdown("# Netflix Recommendation System")
87
- gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.")
88
- query = gr.Textbox(label="Enter your query")
89
- output = gr.Textbox(label="Recommendations")
90
- submit_button = gr.Button("Submit")
91
 
92
- submit_button.click(fn=movie_search, inputs=query, outputs=output)
93
 
94
- demo.launch()
95
 
96
 
97
- # iface = gr.Interface(fn=movie_search,
98
- # inputs=gr.inputs.Textbox(label="Enter your query"),
99
- # outputs="text",
100
- # live=True,
101
- # title="Netflix Recommendation System",
102
- # description="Enter a query to get Netflix recommendations based on description and genre.")
103
 
104
- # iface.launch()
105
 
106
 
107
- # demo = gr.Interface(fn=greet, inputs="text", outputs="text")
108
- # demo.launch()
 
 
 
 
 
 
1
  import numpy as np
2
+ import pandas as pd
3
  from sklearn.metrics.pairwise import cosine_similarity
 
 
 
4
 
5
+ # Load embeddings and metadata
6
+ embeddings = np.load("path/to/netflix_embeddings.npy")
7
+ metadata = pd.read_csv("path/to/netflix_metadata.csv")
8
+
9
+ # Vector search function
10
+ def vector_search(query, model):
11
+ query_embedding = model.encode(query)
12
+ similarities = cosine_similarity([query_embedding], embeddings)[0]
13
+ top_n = 3
14
+ top_indices = similarities.argsort()[-top_n:][::-1]
15
+ results = metadata.iloc[top_indices]
16
+
17
+ # Format results for display
18
+ result_text = "\n".join(f"Title: {row['title']}, Description: {row['description']}, Genre: {row['listed_in']}" for _, row in results.iterrows())
19
+ return result_text
20
+
21
+ # Gradio Interface
22
+ import gradio as gr
23
+ from sentence_transformers import SentenceTransformer
24
+
25
+ model = SentenceTransformer("thenlper/gte-large")
26
+ with gr.Blocks() as demo:
27
+ query = gr.Textbox(label="Enter your query")
28
+ output = gr.Textbox(label="Recommendations")
29
+ submit_button = gr.Button("Submit")
30
+
31
+ submit_button.click(fn=lambda q: vector_search(q, model), inputs=query, outputs=output)
32
+
33
+ demo.launch()
34
+
35
+ # import gradio as gr
36
+
37
+ # # def greet(name):
38
+ # # return "Hello " + name + "!!"
39
+ # from sentence_transformers import SentenceTransformer
40
+ # import numpy as np
41
+ # from sklearn.metrics.pairwise import cosine_similarity
42
+ # from datasets import load_dataset
43
+ # # Load pre-trained SentenceTransformer model
44
+ # embedding_model = SentenceTransformer("thenlper/gte-large")
45
+
46
+ # # # Example dataset with genres (replace with your actual data)
47
+ # # dataset = load_dataset("hugginglearners/netflix-shows")
48
+ # # dataset = dataset.filter(lambda x: x['description'] is not None and x['listed_in'] is not None and x['title'] is not None)
49
+ # # data = dataset['train'] # Accessing the 'train' split of the dataset
50
+
51
+ # # # Convert the dataset to a list of dictionaries for easier indexing
52
+ # # data_list = list[data]
53
+ # # print(data_list)
54
+ # # # Combine description and genre for embedding
55
+ # # def combine_description_title_and_genre(description, listed_in, title):
56
+ # # return f"{description} Genre: {listed_in} Title: {title}"
57
+
58
+ # # # Generate embedding for the query
59
+ # # def get_embedding(text):
60
+ # # return embedding_model.encode(text)
61
+
62
+ # # # Vector search function
63
+ # # def vector_search(query):
64
+ # # query_embedding = get_embedding(query)
65
+
66
+ # # # Generate embeddings for the combined description and genre
67
+ # # embeddings = np.array([get_embedding(combine_description_title_and_genre(item["description"], item["listed_in"],item["title"])) for item in data_list[0]])
68
+
69
+ # # # Calculate cosine similarity between the query and all embeddings
70
+ # # similarities = cosine_similarity([query_embedding], embeddings)
71
+ # # Load dataset (using the correct dataset identifier for your case)
72
  # dataset = load_dataset("hugginglearners/netflix-shows")
 
 
73
 
 
 
 
74
  # # Combine description and genre for embedding
75
  # def combine_description_title_and_genre(description, listed_in, title):
76
  # return f"{description} Genre: {listed_in} Title: {title}"
 
83
  # def vector_search(query):
84
  # query_embedding = get_embedding(query)
85
 
86
+ # # Function to generate embeddings for each item in the dataset
87
+ # def generate_embeddings(example):
88
+ # return {
89
+ # 'embedding': get_embedding(combine_description_title_and_genre(example["description"], example["listed_in"], example["title"]))
90
+ # }
 
 
91
 
92
+ # # Generate embeddings for the dataset using map
93
+ # embeddings_dataset = dataset["train"].map(generate_embeddings)
 
94
 
95
+ # # Extract embeddings
96
+ # embeddings = np.array([embedding['embedding'] for embedding in embeddings_dataset])
 
97
 
98
+ # # Calculate cosine similarity between the query and all embeddings
99
+ # similarities = cosine_similarity([query_embedding], embeddings)
100
+ # # # Adjust similarity scores based on ratings
101
+ # # ratings = np.array([item["rating"] for item in data_list])
102
+ # # adjusted_similarities = similarities * ratings.reshape(-1, 1)
103
+
104
+ # # Get top N most similar items (e.g., top 3)
105
+ # top_n = 3
106
+ # top_indices = similarities[0].argsort()[-top_n:][::-1] # Get indices of the top N results
107
+ # top_items = [dataset["train"][i] for i in top_indices]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
+ # # Format the output for display
110
+ # search_result = ""
111
+ # for item in top_items:
112
+ # search_result += f"Title: {item['title']}, Description: {item['description']}, Genre: {item['listed_in']}\n"
113
 
114
+ # return search_result
115
 
116
+ # # Gradio Interface
117
+ # def movie_search(query):
118
+ # return vector_search(query)
119
+ # with gr.Blocks() as demo:
120
+ # gr.Markdown("# Netflix Recommendation System")
121
+ # gr.Markdown("Enter a query to receive Netflix show recommendations based on title, description, and genre.")
122
+ # query = gr.Textbox(label="Enter your query")
123
+ # output = gr.Textbox(label="Recommendations")
124
+ # submit_button = gr.Button("Submit")
125
 
126
+ # submit_button.click(fn=movie_search, inputs=query, outputs=output)
127
 
128
+ # demo.launch()
129
 
130
 
131
+ # # iface = gr.Interface(fn=movie_search,
132
+ # # inputs=gr.inputs.Textbox(label="Enter your query"),
133
+ # # outputs="text",
134
+ # # live=True,
135
+ # # title="Netflix Recommendation System",
136
+ # # description="Enter a query to get Netflix recommendations based on description and genre.")
137
 
138
+ # # iface.launch()
139
 
140
 
141
+ # # demo = gr.Interface(fn=greet, inputs="text", outputs="text")
142
+ # # demo.launch()