Spaces:

sartifyllc
/

Swahili-Text-Embeddings-Leaderboard

Running

App Files Files Community

Mollel commited on Jul 13, 2024

Commit

07c4ca7

verified ·

1 Parent(s): 93dfa2b

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -94

app.py CHANGED Viewed

@@ -49,35 +49,6 @@ def extract_table_from_markdown(markdown_text, table_start):
 #     return df
-# def markdown_table_to_df(table_content):
-#     """Convert markdown table to pandas DataFrame."""
-#     # Split the table content into lines
-#     lines = table_content.split('\n')
-#     # Extract headers
-#     headers = [h.strip() for h in lines[0].split('|') if h.strip()]
-#     # Extract data
-#     data = []
-#     for line in lines[2:]:  # Skip the header separator line
-#         row = [cell.strip() for cell in line.split('|') if cell.strip()]
-#         if row:  # Include any non-empty row
-#             # Pad the row with empty strings if it's shorter than the headers
-#             padded_row = row + [''] * (len(headers) - len(row))
-#             data.append(padded_row[:len(headers)])  # Trim if longer than headers
-#     # Create DataFrame
-#     df = pd.DataFrame(data, columns=headers)
-#     # Convert numeric columns to float and handle Dimension column
-#     for col in df.columns:
-#         if col == "Dimension":
-#             df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
-#         elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
-#             df[col] = pd.to_numeric(df[col], errors='coerce')
-#     return df
 def markdown_table_to_df(table_content):
     """Convert markdown table to pandas DataFrame."""
     # Split the table content into lines
@@ -101,12 +72,14 @@ def markdown_table_to_df(table_content):
     # Convert numeric columns to float and handle Dimension column
     for col in df.columns:
         if col == "Dimension":
-            df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else None)
         elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
             df[col] = pd.to_numeric(df[col], errors='coerce')
     return df
 def setup_page():
     """Set up the Streamlit page."""
     st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
@@ -134,70 +107,6 @@ def display_leaderboard(df):
     csv = df_display.to_csv(index=False)
     st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
-# def display_leaderboard(df):
-#     """Display the leaderboard with compact columns."""
-#     st.header("📊 Leaderboard")
-#     # Determine which non-benchmark columns are present
-#     present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]
-#     # Add filters
-#     columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
-#     selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
-#     # Filter dataframe
-#     df_display = df[present_non_benchmark_cols + selected_columns]
-#     # Define column widths
-#     col_widths = {
-#         "Model Name": 200,
-#         "Publisher": 100,
-#         "Open?": 60,
-#         "Basemodel": 150,
-#         "Matryoshka": 80,
-#         "Dimension": 80,
-#         "Average": 80
-#     }
-#     # Set default width for benchmark columns
-#     default_width = 80
-#     # Custom CSS to make the table more compact
-#     st.markdown("""
-#     <style>
-#     .streamlit-expanderHeader {
-#         font-size: 1em;
-#     }
-#     .stDataFrame {
-#         font-size: 0.8em;
-#     }
-#     .stDataFrame td, .stDataFrame th {
-#         white-space: nowrap;
-#         overflow: hidden;
-#         text-overflow: ellipsis;
-#         max-width: 200px;
-#     }
-#     </style>
-#     """, unsafe_allow_html=True)
-#     # Format the dataframe
-#     df_styled = df_display.style.format({col: "{:.2f}" for col in df_display.columns if df_display[col].dtype in ['float64', 'int64']})
-#     # Set column widths
-#     df_styled.set_properties(**{
-#         'width': lambda x: f"{col_widths.get(x.name, default_width)}px",
-#         'max-width': lambda x: f"{col_widths.get(x.name, default_width)}px",
-#     })
-#     # Display the dataframe
-#     st.dataframe(df_styled, height=400)
-#     # Download buttons
-#     csv = df_display.to_csv(index=False)
-#     st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
 def display_evaluation():
     """Display the evaluation section."""
     st.header("🧪 Evaluation")

 #     return df
 def markdown_table_to_df(table_content):
     """Convert markdown table to pandas DataFrame."""
     # Split the table content into lines
     # Convert numeric columns to float and handle Dimension column
     for col in df.columns:
         if col == "Dimension":
+            df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
         elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
             df[col] = pd.to_numeric(df[col], errors='coerce')
     return df
 def setup_page():
     """Set up the Streamlit page."""
     st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
     csv = df_display.to_csv(index=False)
     st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
 def display_evaluation():
     """Display the evaluation section."""
     st.header("🧪 Evaluation")