File size: 2,589 Bytes
76b0ac8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8911197
 
 
 
 
 
 
 
 
 
 
76b0ac8
 
 
 
 
 
 
 
 
 
 
 
 
8911197
76b0ac8
 
 
 
 
 
 
 
 
 
8911197
 
76b0ac8
 
 
 
 
 
 
 
 
 
 
 
2f3a582
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76b0ac8
2f3a582
76b0ac8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import streamlit as st
import numpy as np
import numpy.linalg as la
import pickle 
#import streamlit_analytics


# Compute Cosine Similarity
def cosine_similarity(x,y):

    x_arr = np.array(x)
    y_arr = np.array(y)
    return np.dot(x_arr,y_arr)/(la.norm(x_arr)*la.norm(y_arr))


# Function to Load Glove Embeddings

def load_glove_embeddings(file):
    print("Loading Glove Model")
    glove_model = {}
    with open(file, 'r', encoding='utf-8') as f:
        for line in f:
            values = line.split()
            word = values[0]
            vector = np.asarray(values[1:], dtype='float32')
            glove_model[word] = vector
    print("Loaded {} words".format(len(glove_model)))
    return glove_model

# Get Averaged Glove Embedding of a sentence
def averaged_glove_embeddings(sentence, embeddings_dict):
    words = sentence.split(" ")
    glove_embedding = np.zeros(50)
    count_words = 0
    for word in words:
        if word in embeddings_dict:
            glove_embedding += embeddings_dict[word]
            count_words += 1
    
    return glove_embedding/max(count_words,1)



# Gold standard words to search from
gold_words = ["flower","mountain","tree","car","building"]

# Text Search
#with streamlit_analytics.track():
st.title("Search Based Retrieval Demo")
st.subheader("Pass in an input word or even a sentence (e.g. jasmine or mount adams)")
text_search = st.text_input("", value="")

# Load glove embeddings
glove_embeddings = load_glove_embeddings('glove.6B.50d.txt')

if text_search:
    input_embedding = averaged_glove_embeddings(text_search, glove_embeddings)
    cosine_sim = {}
    for index in range(len(gold_words)):
        cosine_sim[index] = cosine_similarity(input_embedding, glove_embeddings[gold_words[index]])

    print(cosine_sim)
    sorted_cosine_sim = sorted(cosine_sim.items(), key = lambda x: x[1], reverse=True)

    st.write("(My search uses glove embeddings)")
    st.write("Closest word I have between flower, mountain, tree, car and building for your input is: ")
    word = gold_words[sorted_cosine_sim[0][0]]
    if word == "flower":
        st.subheader(word)
        st.image('images/flower.png', width=200)
    elif word == "mountain":
        st.subheader(word)
        st.image('images/mountain.png', width=200)
    elif word == "tree":
        st.subheader(word)
        st.image('images/tree.png', width=200)
    elif word == "car":
        st.subheader(word)
        st.image('images/car.png', width=200)
    else:
        st.subheader(word)
        st.image('images/building.png', width=200)
 
    st.write("")