|
import gradio as gr |
|
import pandas as pd |
|
import tiktoken |
|
|
|
import time |
|
from sentence_transformers import SentenceTransformer |
|
|
|
|
|
|
|
|
|
|
|
import os |
|
import torch |
|
|
|
from openai.embeddings_utils import get_embedding, cosine_similarity |
|
import os |
|
|
|
|
|
|
|
df = pd.read_pickle('entire_data.pkl') |
|
embedder = SentenceTransformer('all-mpnet-base-v2') |
|
|
|
def search(query): |
|
n = 15 |
|
query_embedding = embedder.encode(query) |
|
df["similarity"] = df.embedding.apply(lambda x: cosine_similarity(x, query_embedding.reshape(768,-1))) |
|
|
|
results = ( |
|
df.sort_values("similarity", ascending=False) |
|
.head(n)) |
|
|
|
resultlist = [] |
|
|
|
hlist = [] |
|
for r in results.index: |
|
if results.name[r] not in hlist: |
|
smalldf = results.loc[results.name == results.name[r]] |
|
smallarr = smalldf.similarity[r].max() |
|
sm =smalldf.rating[r].mean() |
|
|
|
if smalldf.shape[1] > 3: |
|
smalldf = smalldf[:3] |
|
|
|
resultlist.append( |
|
{ |
|
"name":results.name[r], |
|
"description":results.description[r], |
|
"relevance score": smallarr.tolist(), |
|
"rating": sm.tolist(), |
|
"relevant_reviews": [ smalldf.text[s] for s in smalldf.index] |
|
}) |
|
hlist.append(results.name[r]) |
|
return resultlist |
|
|
|
def greet(query): |
|
bm25 = search(query) |
|
return bm25 |
|
|
|
demo = gr.Interface(fn=greet, inputs="text", outputs="json") |
|
|
|
demo.launch() |