Spaces:
Runtime error
Runtime error
File size: 3,073 Bytes
1410c49 15ecd97 1410c49 11ee1d3 1410c49 11ee1d3 15ecd97 1410c49 d4a5514 1410c49 c5e63e3 dd37b3a fc825b8 dd37b3a 2f33609 1410c49 dd37b3a 2f33609 dd37b3a 2f33609 1410c49 8ca0641 29f96f7 c5e63e3 1410c49 a6ae805 29f96f7 fc825b8 29f96f7 cf44989 1410c49 9b22dfc 1410c49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import gradio as gr
import pandas as pd
import pickle
from sentence_transformers import SentenceTransformer, util
import re
mdl_name = 'sentence-transformers/all-distilroberta-v1'
model = SentenceTransformer(mdl_name)
embedding_cache_path = "scotch_embd_distilroberta.pkl"
with open(embedding_cache_path, "rb") as fIn:
cache_data = pickle.load(fIn)
embedding_table = cache_data["embeddings"]
reviews = cache_data["data"]
reviews['price'] = reviews.price.apply(lambda x: re.findall("\d+", x.replace(",","").replace(".00","").replace("$",""))[0]).astype('int')
def user_query_recommend(query, price_rng):
# Embed user query
embedding = model.encode(query)
# Calculate similarity with all reviews
sim_scores = util.cos_sim(embedding, embedding_table)
#print(sim_scores.shape)
# Recommend
recommendations = reviews.copy()
recommendations['sim'] = sim_scores.T
if price_rng == "$0-$70":
min_p, max_p = 0, 70
if price_rng == "$70-$150":
min_p, max_p = 70, 150
if price_rng == "$150+":
min_p, max_p = 150, 10000
op=recommendations\
.groupby("name")\
.sim.nlargest(2)\
.reset_index()\
[["name","sim"]]
op = pd.merge(op,
recommendations[['name', 'category', 'price', 'description','description_sent','sim']],
how="left",on=["name",'sim'])
op = op.loc[(op.price >= min_p) & (op.price <= max_p),
['name', 'category', 'price', 'description', 'description_sent','sim']].sort_values('sim',ascending=False)\
.groupby(['name', 'category', 'price', 'description'])\
.agg({"description_sent": lambda x: " ".join(x),
"sim":['max']})\
.reset_index()\
.set_axis(['name', 'category', 'price', 'description', 'description_sent','sim'],axis="columns")
#op = op.loc[(op.price >= min_p) & (op.price <= max_p), ['name', 'price', 'description_sent']]
return op[['name', 'price', 'description_sent']].reset_index(drop=True).head(6)
interface = gr.Interface(
user_query_recommend,
inputs=[gr.inputs.Textbox(lines=5, label = "enter flavour profile"),
gr.inputs.Radio(choices = ["$0-$70", "$70-$150", "$150+"], default="$0-$70", type="value", label='Price range')],
outputs=gr.outputs.Dataframe(max_rows=3, overflow_row_behaviour="paginate", type="pandas", label="Scotch recommendations"),
title = "Scotch Recommendation",
description = "Looking for scotch recommendations and have some flavours in mind? \nGet recommendations at a preferred price range using semantic search :) ",
examples=[["very sweet with lemons and oranges and marmalades", "$0-$70"],
["smoky peaty and wood fire","$70-$150"],
["salty and spicy with exotic fruits", "$150+"],
["fragrant nose with chocolate, custard, toffee, pudding and caramel", "$70-$150"],
],
theme="grass",
)
interface.launch(
enable_queue=True,
#cache_examples=True,
) |