Spaces:
Runtime error
Runtime error
import os | |
from dataclasses import dataclass | |
from operator import add, sub | |
import gradio as gr | |
import numpy as np | |
from datasets import load_dataset | |
from sklearn.metrics.pairwise import cosine_similarity | |
from pyparsing import Word, alphas, Char, ParseException | |
term = Word(alphas) | |
operator = Char("+ -") | |
expression = term + (operator + term)[...] | |
operations = {"+": add, "-": sub} | |
def parse_expression(input): | |
try: | |
return expression.parseString(input) | |
except ParseException as e: | |
raise gr.Error(f"Parsing error: {e.msg} at position [{e.loc}].") | |
def evaluate_expression(input): | |
# Skip every other item | |
words = input[::2] | |
operators = input[1::2] | |
result = word_to_vectors(words[0]) | |
for operator, word in zip(operators, words[1:]): | |
result = operations[operator](result, word_to_vectors(word)) | |
return result | |
dataset = load_dataset("karmiq/glove", split="train") | |
df = dataset.to_pandas() | |
all_words = df["word"].to_numpy() | |
all_vectors = np.array(df["embeddings"].to_list()) | |
def word_to_vectors(word): | |
result = df.loc[df["word"] == word].embeddings.to_numpy() | |
if len(result) < 1: | |
raise gr.Error("Word not found in the dictionary.") | |
else: | |
return result[0] | |
def expression_to_vectors(input): | |
return evaluate_expression(parse_expression(input)) | |
def get_results(expression): | |
if len(expression) < 1: | |
raise gr.Error("Please provide an expression.") | |
expression = expression.lower() | |
vectors = expression_to_vectors(expression) | |
similarity_scores = cosine_similarity([vectors], all_vectors)[0] | |
top_indices = np.argsort(similarity_scores)[::-1] | |
return dict( | |
[ | |
(all_words[i], similarity_scores[i]) | |
for i in top_indices | |
if not all_words[i] in expression.split() | |
][:10] | |
) | |
examples = [ | |
"king - man + woman", | |
"mother - woman + man", | |
"berlin - germany + france", | |
"saxophone - jazz + classical", | |
] | |
initial_output = get_results(examples[0]) | |
css = """ | |
button.gallery-item { color: var(--body-text-color) !important; } | |
.output-class { color: var(--color-red-700) !important; } | |
.confidence-set .label .text { font-weight: var(--weight-medium); } | |
.confidence-set:hover .label { color: var(--color-red-700) !important; } | |
""" | |
with gr.Blocks( | |
css=css, | |
theme=gr.themes.Monochrome(radius_size=gr.themes.sizes.radius_sm), | |
) as app: | |
with gr.Row(): | |
with gr.Column(): | |
input = gr.Textbox(value=examples[0], label="Expression") | |
with gr.Row(): | |
btn = gr.Button("Run", variant="primary") | |
with gr.Row(): | |
gr.Markdown( | |
"Demonstration of computing cosine similarity of embeddings " | |
"from the [GloVe](https://nlp.stanford.edu/projects/glove/) dataset." | |
) | |
with gr.Row(): | |
gr.Examples(examples, inputs=input) | |
with gr.Column(): | |
output = gr.Label(label="Closest words", value=initial_output) | |
btn.click(fn=get_results, inputs=input, outputs=output) | |
app.launch() | |