import streamlit as st import numpy as np import numpy.linalg as la import pickle #import streamlit_analytics # Compute Cosine Similarity def cosine_similarity(x,y): x_arr = np.array(x) y_arr = np.array(y) return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr)) # Function to Load Glove Embeddings def load_glove_embeddings(file): print("Loading Glove Model") glove_model = {} with open(file, 'r', encoding='utf-8') as f: for line in f: values = line.split() word = values[0] vector = np.asarray(values[1:], dtype='float32') glove_model[word] = vector print("Loaded {} words".format(len(glove_model))) return glove_model # Get Averaged Glove Embedding of a sentence def averaged_glove_embeddings(sentence, embeddings_dict): words = sentence.split(" ") glove_embedding = np.zeros(50) count_words = 0 for word in words: if word in embeddings_dict: glove_embedding += embeddings_dict[word] count_words += 1 return glove_embedding/max(count_words,1) # Gold standard words to search from gold_words = ["flower","mountain","tree","car","building"] # Text Search #with streamlit_analytics.track(): st.title("Search Based Retrieval Demo") st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)") text_search = st.text_input("", value="") # Load glove embeddings glove_embeddings = load_glove_embeddings('glove.6B.50d.txt') if text_search: input_embedding = averaged_glove_embeddings(text_search, glove_embeddings) cosine_sim = {} for index in range(len(gold_words)): cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]]) print(cosine_sim) sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True) st.write("(My search uses glove embeddings)") st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ") word = gold_words[sorted_cosine_sim[0][0]] if word == "flower": st.subheader(word) st.image('images/flower.png', width=200) elif word == "mountain": st.subheader(word) st.image('images/mountain.png', width=200) elif word == "tree": st.subheader(word) st.image('images/tree.png', width=200) elif word == "car": st.subheader(word) st.image('images/car.png', width=200) else: st.subheader(word) st.image('images/building.png', width=200) st.write("")