Mark7549 commited on
Commit
bdf0a5e
·
1 Parent(s): 169869e

Added cosine similarity front-end

Browse files
Files changed (2) hide show
  1. app.py +24 -6
  2. word2vec.py +30 -7
app.py CHANGED
@@ -15,7 +15,7 @@ if active_tab == "Nearest neighbours":
15
  col1, col2 = st.columns(2)
16
  with st.container():
17
  with col1:
18
- word = st.text_input("Enter a word", placeholder="ἀνήρ")
19
 
20
  with col2:
21
  time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
@@ -52,14 +52,32 @@ if active_tab == "Nearest neighbours":
52
  df = pd.DataFrame(nearest_neighbours, columns=["Word", "Time slice", "Similarity"])
53
  st.table(df)
54
 
55
-
56
-
57
-
58
-
59
  # Cosine similarity tab
60
  elif active_tab == "Cosine similarity":
 
 
61
  with st.container():
62
- st.write("Cosine similarity tab")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # 3D graph tab
65
  elif active_tab == "3D graph":
 
15
  col1, col2 = st.columns(2)
16
  with st.container():
17
  with col1:
18
+ word = st.text_input("Enter a word", placeholder="πατήρ")
19
 
20
  with col2:
21
  time_slice = st.selectbox("Time slice", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
 
52
  df = pd.DataFrame(nearest_neighbours, columns=["Word", "Time slice", "Similarity"])
53
  st.table(df)
54
 
55
+
 
 
 
56
  # Cosine similarity tab
57
  elif active_tab == "Cosine similarity":
58
+ col1, col2 = st.columns(2)
59
+ col3, col4 = st.columns(2)
60
  with st.container():
61
+ with col1:
62
+ word_1 = st.text_input("Enter a word", placeholder="πατήρ")
63
+
64
+ with col2:
65
+ time_slice_1 = st.selectbox("Time slice word 1", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
66
+
67
+ with st.container():
68
+ with col3:
69
+ word_2 = st.text_input("Enter a word", placeholder="μήτηρ")
70
+
71
+ with col4:
72
+ time_slice_2 = st.selectbox("Time slice word 2", ["Archaic", "Classical", "Hellenistic", "Early Roman", "Late Roman"])
73
+
74
+ # Create button for calculating cosine similarity
75
+ cosine_similarity_button = st.button("Calculate cosine similarity")
76
+
77
+ # If the button is clicked, execute calculation
78
+ if cosine_similarity_button:
79
+ cosine_simularity_score = get_cosine_similarity(word_1, time_slice_1, word_2, time_slice_2)
80
+ st.write(cosine_simularity_score)
81
 
82
  # 3D graph tab
83
  elif active_tab == "3D graph":
word2vec.py CHANGED
@@ -104,19 +104,27 @@ def cosine_similarity(vector_a, vector_b):
104
  return "{:.2f}".format(similarity)
105
 
106
 
107
- def get_cosine_similarity(word1, word2, time_slice):
108
  '''
109
  Return the cosine similarity of two words
110
  '''
111
  # TO DO: MOET NETTER
112
 
113
  # Return if path does not exist
114
- if not os.path.exists(f'models/{time_slice}.model'):
115
- return
116
 
117
- model = load_word2vec_model(f'models/{time_slice}.model')
118
- dict = model_dictionary(model)
119
- return cosine_similarity(dict[word1], dict[word2])
 
 
 
 
 
 
 
 
 
 
120
 
121
 
122
  def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
@@ -163,6 +171,21 @@ def convert_model_to_time_name(model_name):
163
  return 'Late Roman'
164
 
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
167
  '''
168
  Return the nearest neighbours of a word
@@ -241,7 +264,7 @@ def main():
241
  late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
242
 
243
  models = [archaic, classical, early_roman, hellen, late_roman]
244
- nearest_neighbours = get_nearest_neighbours('πατήρ', archaic[1], models, n=5)
245
  print(nearest_neighbours)
246
  # vector = get_word_vector(model, 'ἀνήρ')
247
  # print(vector)
 
104
  return "{:.2f}".format(similarity)
105
 
106
 
107
+ def get_cosine_similarity(word1, time_slice_1, word2, time_slice_2):
108
  '''
109
  Return the cosine similarity of two words
110
  '''
111
  # TO DO: MOET NETTER
112
 
113
  # Return if path does not exist
 
 
114
 
115
+ time_slice_1 = convert_time_name_to_model(time_slice_1)
116
+ time_slice_2 = convert_time_name_to_model(time_slice_2)
117
+
118
+ if not os.path.exists(f'models/{time_slice_1}.model'):
119
+ return
120
+
121
+ model_1 = load_word2vec_model(f'models/{time_slice_1}.model')
122
+ model_2 = load_word2vec_model(f'models/{time_slice_2}.model')
123
+
124
+ dict_1 = model_dictionary(model_1)
125
+ dict_2 = model_dictionary(model_2)
126
+
127
+ return cosine_similarity(dict_1[word1], dict_2[word2])
128
 
129
 
130
  def get_cosine_similarity_one_word(word, time_slice1, time_slice2):
 
171
  return 'Late Roman'
172
 
173
 
174
+ def convert_time_name_to_model(time_name):
175
+ '''
176
+ Convert the time slice name to the model name
177
+ '''
178
+ if time_name == 'Archaic':
179
+ return 'archaic_cbow'
180
+ elif time_name == 'Classical':
181
+ return 'classical_cbow'
182
+ elif time_name == 'Early Roman':
183
+ return 'early_roman_cbow'
184
+ elif time_name == 'Hellenistic':
185
+ return 'hellen_cbow'
186
+ elif time_name == 'Late Roman':
187
+ return 'late_roman_cbow'
188
+
189
  def get_nearest_neighbours(word, time_slice_model, n=10, models=load_all_models()):
190
  '''
191
  Return the nearest neighbours of a word
 
264
  late_roman = ('late_roman', load_word2vec_model('models/late_roman_cbow.model'))
265
 
266
  models = [archaic, classical, early_roman, hellen, late_roman]
267
+ nearest_neighbours = get_nearest_neighbours('πατήρ', 'archaic_cbow', n=5)
268
  print(nearest_neighbours)
269
  # vector = get_word_vector(model, 'ἀνήρ')
270
  # print(vector)