dsfsi-language-identification-spaces

Runtime error

App Files Files Community

kargaranamir commited on Mar 27, 2024

Commit

ea89f80

verified ·

1 Parent(s): 90ee191

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -33

app.py CHANGED Viewed

@@ -141,36 +141,17 @@ def convert_df(df):
 @st.cache_resource
-def load_GlotLID_v1(model_name, file_name):
     model_path = hf_hub_download(repo_id=model_name, filename=file_name)
     model = fasttext.load_model(model_path)
     return model
-@st.cache_resource
-def load_GlotLID_v2(model_name, file_name):
-    model_path = hf_hub_download(repo_id=model_name, filename=file_name)
-    model = fasttext.load_model(model_path)
-    return model
-@st.cache_resource
-def load_OpenLID():
-    model_path = hf_hub_download(repo_id='laurievb/OpenLID', filename='model.bin')
-    model = fasttext.load_model(model_path)
-    return model
-@st.cache_resource
-def load_NLLB():
-    model_path = hf_hub_download(repo_id='facebook/fasttext-language-identification', filename='model.bin')
-    model = fasttext.load_model(model_path)
-    return model
-model_1 = load_GlotLID_v1(constants.MODEL_NAME, "model_v1.bin")
-model_2 = load_GlotLID_v2(constants.MODEL_NAME, "model_v2.bin")
-model_3 = load_OpenLID()
-model_4 = load_NLLB()
 # @st.cache_resource
@@ -196,7 +177,7 @@ def plot(label, prob):
     ax.set_xlabel("Confidence", color=BLACK_COLOR)
     st.pyplot(fig)
-def compute(sentences, version = 'v2'):
     """Computes the language probablities and labels for the given sentences.
     Args:
@@ -208,9 +189,11 @@ def compute(sentences, version = 'v2'):
     progress_text = "Computing Language..."
     if version == 'nllb-218':
-        model_choice = model_4
     elif version == 'openlid-201':
-        model_choice = model_3
     elif version == 'v2':
         model_choice = model_2
     else:
@@ -232,7 +215,7 @@ def compute(sentences, version = 'v2'):
         output_label_language = output_label.split('_')[0]
         # script control
-        if version in ['v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
             main_script, all_scripts = get_script(sent)
             output_label_script = output_label.split('_')[1]
@@ -273,8 +256,8 @@ with tab1:
     version = st.radio(
         "Choose model",
-        ["nllb-218", "openlid-201", "v1", "v2"],
-        captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages)"],
         index = 3,
         key = 'version_tab1',
         horizontal = True
@@ -312,8 +295,8 @@ with tab2:
     version = st.radio(
         "Choose model",
-        ["nllb-218", "openlid-201", "v1", "v2"],
-        captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages)"],
         index = 3,
         key = 'version_tab2',
         horizontal = True

 @st.cache_resource
+def load_model(model_name, file_name):
     model_path = hf_hub_download(repo_id=model_name, filename=file_name)
     model = fasttext.load_model(model_path)
     return model
+model_1 = load_model(constants.MODEL_NAME, "model_v1.bin")
+model_2 = load_model(constants.MODEL_NAME, "model_v2.bin")
+model_3 = load_model(constants.MODEL_NAME, "model_v3.bin")
+openlid = load_model('laurievb/OpenLID', "model.bin")
+nllb = load_model('facebook/fasttext-language-identification', "model.bin")
 # @st.cache_resource
     ax.set_xlabel("Confidence", color=BLACK_COLOR)
     st.pyplot(fig)
+def compute(sentences, version = 'v3'):
     """Computes the language probablities and labels for the given sentences.
     Args:
     progress_text = "Computing Language..."
     if version == 'nllb-218':
+        model_choice = nllb
     elif version == 'openlid-201':
+        model_choice = openlid
+     elif version == 'v3':
+        model_choice = model_3
     elif version == 'v2':
         model_choice = model_2
     else:
         output_label_language = output_label.split('_')[0]
         # script control
+        if version in ['v3', 'v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
             main_script, all_scripts = get_script(sent)
             output_label_script = output_label.split('_')[1]
     version = st.radio(
         "Choose model",
+        ["nllb-218", "openlid-201", "v1", "v2", "v3"],
+        captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2", "GlotLID version 3 (More languages, better quality data)"],
         index = 3,
         key = 'version_tab1',
         horizontal = True
     version = st.radio(
         "Choose model",
+        ["nllb-218", "openlid-201", "v1", "v2", "v3"],
+        captions=["NLLB", "OpenLID", "GlotLID version 1", "GlotLID version 2 (more data and languages), GlotLID version 3 (More languages, better quality data)"],
         index = 3,
         key = 'version_tab2',
         horizontal = True