dsfsi-language-identification-spaces

Runtime error

App Files Files Community

ThapeloAndrewSindane commited on Sep 8, 2024

Commit

9e87bd5

•

1 Parent(s): 730820d

Adding All

Browse files

Adding all models prediction

Files changed (1) hide show

app.py +103 -33

app.py CHANGED Viewed

@@ -192,6 +192,41 @@ def plot(label, prob):
     ax.set_xlabel("Confidence", color=BLACK_COLOR)
     st.pyplot(fig)
 def compute(sentences, version = 'v3'):
     """Computes the language probablities and labels for the given sentences.
@@ -213,8 +248,10 @@ def compute(sentences, version = 'v3'):
         model_choice = model_afroxlmr_base
     elif version=='afrolm':
         model_choice = model_afrolm
-    else:
         model_choice = za_lid
     my_bar = st.progress(0, text=progress_text)
@@ -224,33 +261,63 @@ def compute(sentences, version = 'v3'):
     sentences = [preprocess_text(sent) for sent in sentences]
     for index, sent in enumerate(sentences):
-        output = model_choice.predict(sent)
-        output_label = output[index]['label']
-        output_prob =  output[index]['score']
-        output_label_language = output[index]['label']
-        # output_label  = output[0][0].split('__')[-1].replace('_Hans', '_Hani').replace('_Hant', '_Hani')
-        # output_prob = max(min(output[1][0], 1), 0)
-        # output_label_language = output_label.split('_')[0]
-        # # script control
-        # if version in ['v3', 'v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
-        #     main_script, all_scripts = get_script(sent)
-        #     output_label_script = output_label.split('_')[1]
-        #     if output_label_script not in all_scripts:
-        #         output_label_script = main_script
-        #         output_label = f"und_{output_label_script}"
-        #         output_prob = 0
-        labels = labels + [output_label]
-        probs = probs + [output_prob]
-        my_bar.progress(
-            min((index) / len(sentences), 1),
-            text=progress_text,
-        )
     my_bar.empty()
     return probs, labels
@@ -276,8 +343,8 @@ with tab1:
     version = st.radio(
         "Choose model",
-        ["xlmrlarge", "serengeti", "afriberta", "afroxlmrbase", "afrolm", "BERT"],
-        captions=["za-XLMR-Large", "za-Serengeti", "za-AfriBERTa", "za-Afro-XLMR-BASE", "za-AfroLM", "za-BERT"],
         index = 4,
         key = 'version_tab1',
         horizontal = True
@@ -308,15 +375,18 @@ with tab1:
             f.write(f"{sent}, {label}: {prob}\n")
         # plot
-        plot(label, prob)
 with tab2:
     version = st.radio(
         "Choose model",
-        ["xlmrlarge", "serengeti", "afriberta", "afroxlmrbase", "afrolm", "BERT"],
-        captions=["za-XLMR-Large", "za-Serengeti", "za-AfriBERTa", "za-Afro-XLMR-BASE", "za-AfroLM", "za-BERT"],
         index = 4,
         key = 'version_tab2',
         horizontal = True

     ax.set_xlabel("Confidence", color=BLACK_COLOR)
     st.pyplot(fig)
+# @st.cache_resource
+def plot_multiples(models, labels, probs):
+    ORANGE_COLOR = "#FF8000"
+    BLACK_COLOR = "#31333F"
+    fig, ax = plt.subplots(figsize=(8, len(models)))
+    fig.patch.set_facecolor("none")
+    ax.set_facecolor("none")
+    ax.spines["left"].set_color(BLACK_COLOR)
+    ax.spines["bottom"].set_color(BLACK_COLOR)
+    ax.tick_params(axis="x", colors=BLACK_COLOR)
+    ax.spines[["right", "top"]].set_visible(False)
+    # Plot bars for each model, label, and probability
+    y_positions = range(len(models))  # Y positions for each model
+    ax.barh(y=y_positions, width=probs, color=ORANGE_COLOR)
+    # Add labels next to each bar
+    for i, (prob, label) in enumerate(zip(probs, labels)):
+        ax.text(prob + 0.01, i, f"{label} ({prob:.2f})", va='center', color=BLACK_COLOR)
+    # Set y-ticks and labels
+    ax.set_yticks(y_positions)
+    ax.set_yticklabels(models, color=BLACK_COLOR)
+    ax.set_xlim(0, 1)
+    ax.set_xlabel("Confidence", color=BLACK_COLOR)
+    ax.set_title("Model Predictions", color=BLACK_COLOR)
+    st.pyplot(fig)
 def compute(sentences, version = 'v3'):
     """Computes the language probablities and labels for the given sentences.
         model_choice = model_afroxlmr_base
     elif version=='afrolm':
         model_choice = model_afrolm
+    elif version == 'BERT':
         model_choice = za_lid
+    else:
+         model_choice = [model_xlmr_large,model_serengeti, model_afriberta, model_afroxlmr_base, model_afrolm, za_lid]
     my_bar = st.progress(0, text=progress_text)
     sentences = [preprocess_text(sent) for sent in sentences]
     for index, sent in enumerate(sentences):
+        if type(model_choice) == list:
+                 all_models_pred = []
+                 for model in model_choise:
+                            output = model.predict(sent)
+                            output_label = output[index]['label']
+                            output_prob =  output[index]['score']
+                            output_label_language = output[index]['label']
+                            # output_label  = output[0][0].split('__')[-1].replace('_Hans', '_Hani').replace('_Hant', '_Hani')
+                            # output_prob = max(min(output[1][0], 1), 0)
+                            # output_label_language = output_label.split('_')[0]
+                            # # script control
+                            # if version in ['v3', 'v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
+                            #     main_script, all_scripts = get_script(sent)
+                            #     output_label_script = output_label.split('_')[1]
+                            #     if output_label_script not in all_scripts:
+                            #         output_label_script = main_script
+                            #         output_label = f"und_{output_label_script}"
+                            #         output_prob = 0
+                            labels = labels + [output_label]
+                            probs = probs + [output_prob]
+                            my_bar.progress(
+                                min((index) / len(sentences), 1),
+                                text=progress_text,
+                            )
+        else:
+                output = model_choice.predict(sent)
+                output_label = output[index]['label']
+                output_prob =  output[index]['score']
+                output_label_language = output[index]['label']
+                # output_label  = output[0][0].split('__')[-1].replace('_Hans', '_Hani').replace('_Hant', '_Hani')
+                # output_prob = max(min(output[1][0], 1), 0)
+                # output_label_language = output_label.split('_')[0]
+                # # script control
+                # if version in ['v3', 'v2', 'openlid-201', 'nllb-218'] and output_label_language!= 'zxx':
+                #     main_script, all_scripts = get_script(sent)
+                #     output_label_script = output_label.split('_')[1]
+                #     if output_label_script not in all_scripts:
+                #         output_label_script = main_script
+                #         output_label = f"und_{output_label_script}"
+                #         output_prob = 0
+                labels = labels + [output_label]
+                probs = probs + [output_prob]
+                my_bar.progress(
+                    min((index) / len(sentences), 1),
+                    text=progress_text,
+                )
     my_bar.empty()
     return probs, labels
     version = st.radio(
         "Choose model",
+        ["xlmrlarge", "serengeti", "afriberta", "afroxlmrbase", "afrolm", "BERT", "All-Models"],
+        captions=["za-XLMR-Large", "za-Serengeti", "za-AfriBERTa", "za-Afro-XLMR-BASE", "za-AfroLM", "za-BERT", 'All-Models'],
         index = 4,
         key = 'version_tab1',
         horizontal = True
             f.write(f"{sent}, {label}: {prob}\n")
         # plot
+        if version == "All-Models":
+               plot_multiples(["xlmrlarge", "serengeti", "afriberta", "afroxlmrbase", "afrolm", "BERT"], labels, probs)
+        else:
+               plot(label, prob)
 with tab2:
     version = st.radio(
         "Choose model",
+        ["xlmrlarge", "serengeti", "afriberta", "afroxlmrbase", "afrolm", "BERT", "All-Models"],
+        captions=["za-XLMR-Large", "za-Serengeti", "za-AfriBERTa", "za-Afro-XLMR-BASE", "za-AfroLM", "za-BERT", "All-Models"],
         index = 4,
         key = 'version_tab2',
         horizontal = True