search-demo / app.py
Shreemit's picture
Added Images
2f3a582
raw
history blame
2.59 kB
import streamlit as st
import numpy as np
import numpy.linalg as la
import pickle
#import streamlit_analytics
# Compute Cosine Similarity
def cosine_similarity(x,y):
x_arr = np.array(x)
y_arr = np.array(y)
return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr))
# Function to Load Glove Embeddings
def load_glove_embeddings(file):
print("Loading Glove Model")
glove_model = {}
with open(file, 'r', encoding='utf-8') as f:
for line in f:
values = line.split()
word = values[0]
vector = np.asarray(values[1:], dtype='float32')
glove_model[word] = vector
print("Loaded {} words".format(len(glove_model)))
return glove_model
# Get Averaged Glove Embedding of a sentence
def averaged_glove_embeddings(sentence, embeddings_dict):
words = sentence.split(" ")
glove_embedding = np.zeros(50)
count_words = 0
for word in words:
if word in embeddings_dict:
glove_embedding += embeddings_dict[word]
count_words += 1
return glove_embedding/max(count_words,1)
# Gold standard words to search from
gold_words = ["flower","mountain","tree","car","building"]
# Text Search
#with streamlit_analytics.track():
st.title("Search Based Retrieval Demo")
st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)")
text_search = st.text_input("", value="")
# Load glove embeddings
glove_embeddings = load_glove_embeddings('glove.6B.50d.txt')
if text_search:
input_embedding = averaged_glove_embeddings(text_search, glove_embeddings)
cosine_sim = {}
for index in range(len(gold_words)):
cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]])
print(cosine_sim)
sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True)
st.write("(My search uses glove embeddings)")
st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ")
word = gold_words[sorted_cosine_sim[0][0]]
if word == "flower":
st.subheader(word)
st.image('images/flower.png', width=200)
elif word == "mountain":
st.subheader(word)
st.image('images/mountain.png', width=200)
elif word == "tree":
st.subheader(word)
st.image('images/tree.png', width=200)
elif word == "car":
st.subheader(word)
st.image('images/car.png', width=200)
else:
st.subheader(word)
st.image('images/building.png', width=200)
st.write("")