kargaranamir commited on
Commit
ea89f80
1 Parent(s): 90ee191

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -33
app.py CHANGED
@@ -141,36 +141,17 @@ def convert_df(df):
141
 
142
 
143
  @st.cache_resource
144
- def load_GlotLID_v1(model_name, file_name):
145
  model_path = hf_hub_download(repo_id=model_name, filename=file_name)
146
  model = fasttext.load_model(model_path)
147
  return model
148
 
149
- @st.cache_resource
150
- def load_GlotLID_v2(model_name, file_name):
151
- model_path = hf_hub_download(repo_id=model_name, filename=file_name)
152
- model = fasttext.load_model(model_path)
153
- return model
154
-
155
-
156
- @st.cache_resource
157
- def load_OpenLID():
158
- model_path = hf_hub_download(repo_id='laurievb/OpenLID', filename='model.bin')
159
- model = fasttext.load_model(model_path)
160
- return model
161
-
162
-
163
- @st.cache_resource
164
- def load_NLLB():
165
- model_path = hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')
166
- model = fasttext.load_model(model_path)
167
- return model
168
-
169
 
170
- model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
171
- model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
172
- model_3 = load_OpenLID()
173
- model_4 = load_NLLB()
 
174
 
175
 
176
  # @st.cache_resource
@@ -196,7 +177,7 @@ def plot(label, prob):
196
  ax.set_xlabel("Confidence", color=BLACK_COLOR)
197
  st.pyplot(fig)
198
 
199
- def compute(sentences, version = 'v2'):
200
  """Computes the language probablities and labels for the given sentences.
201
 
202
  Args:
@@ -208,9 +189,11 @@ def compute(sentences, version = 'v2'):
208
  progress_text = "Computing Language..."
209
 
210
  if version == 'nllb-218':
211
- model_choice = model_4
212
  elif version == 'openlid-201':
213
- model_choice = model_3
 
 
214
  elif version == 'v2':
215
  model_choice = model_2
216
  else:
@@ -232,7 +215,7 @@ def compute(sentences, version = 'v2'):
232
  output_label_language = output_label.split('_')[0]
233
 
234
  # script control
235
- if version in ['v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
236
  main_script, all_scripts = get_script(sent)
237
  output_label_script = output_label.split('_')[1]
238
 
@@ -273,8 +256,8 @@ with tab1:
273
 
274
  version = st.radio(
275
  "Choose model",
276
- ["nllb-218", "openlid-201", "v1", "v2"],
277
- captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages)"],
278
  index = 3,
279
  key = 'version_tab1',
280
  horizontal = True
@@ -312,8 +295,8 @@ with tab2:
312
 
313
  version = st.radio(
314
  "Choose model",
315
- ["nllb-218", "openlid-201", "v1", "v2"],
316
- captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages)"],
317
  index = 3,
318
  key = 'version_tab2',
319
  horizontal = True
 
141
 
142
 
143
  @st.cache_resource
144
+ def load_model(model_name, file_name):
145
  model_path = hf_hub_download(repo_id=model_name, filename=file_name)
146
  model = fasttext.load_model(model_path)
147
  return model
148
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ model_1 = load_model(constants.MODEL_NAME, "model_v1.bin")
151
+ model_2 = load_model(constants.MODEL_NAME, "model_v2.bin")
152
+ model_3 = load_model(constants.MODEL_NAME, "model_v3.bin")
153
+ openlid = load_model('laurievb/OpenLID', "model.bin")
154
+ nllb = load_model('facebook/fasttext-language-identification', "model.bin")
155
 
156
 
157
  # @st.cache_resource
 
177
  ax.set_xlabel("Confidence", color=BLACK_COLOR)
178
  st.pyplot(fig)
179
 
180
+ def compute(sentences, version = 'v3'):
181
  """Computes the language probablities and labels for the given sentences.
182
 
183
  Args:
 
189
  progress_text = "Computing Language..."
190
 
191
  if version == 'nllb-218':
192
+ model_choice = nllb
193
  elif version == 'openlid-201':
194
+ model_choice = openlid
195
+ elif version == 'v3':
196
+ model_choice = model_3
197
  elif version == 'v2':
198
  model_choice = model_2
199
  else:
 
215
  output_label_language = output_label.split('_')[0]
216
 
217
  # script control
218
+ if version in ['v3', 'v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
219
  main_script, all_scripts = get_script(sent)
220
  output_label_script = output_label.split('_')[1]
221
 
 
256
 
257
  version = st.radio(
258
  "Choose model",
259
+ ["nllb-218", "openlid-201", "v1", "v2", "v3"],
260
+ captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2", "GlotLID version 3 (More languages, better quality data)"],
261
  index = 3,
262
  key = 'version_tab1',
263
  horizontal = True
 
295
 
296
  version = st.radio(
297
  "Choose model",
298
+ ["nllb-218", "openlid-201", "v1", "v2", "v3"],
299
+ captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages), GlotLID version 3 (More languages, better quality data)"],
300
  index = 3,
301
  key = 'version_tab2',
302
  horizontal = True