Spaces:
Runtime error
Runtime error
File size: 3,364 Bytes
bda7c4e 6b2b26c 00b7e99 bda7c4e 00b7e99 bda7c4e 00b7e99 6b2b26c bda7c4e 00b7e99 6b2b26c bda7c4e 6b2b26c 1c32a9e bda7c4e 1c32a9e bda7c4e 1c32a9e bda7c4e 00b7e99 bda7c4e 00b7e99 6b2b26c bda7c4e 6b2b26c bda7c4e 6b2b26c 00b7e99 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
import streamlit as st
import pandas as pd
import io
import re
# Constants
GITHUB_URL = "https://github.com/Sartify/STEL"
POSSIBLE_NON_BENCHMARK_COLS = ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka", "Dimension", "Average"]
def extract_table_from_markdown(markdown_text, table_start):
"""Extract table content from markdown text."""
lines = markdown_text.split('\n')
table_content = []
capture = False
for line in lines:
if line.startswith(table_start):
capture = True
if capture and line.strip() == '':
break
if capture:
table_content.append(line)
return '\n'.join(table_content)
def markdown_table_to_df(table_content):
"""Convert markdown table to pandas DataFrame."""
# Split the table content into lines
lines = table_content.split('\n')
# Extract headers
headers = [h.strip() for h in lines[0].split('|') if h.strip()]
# Extract data
data = []
for line in lines[2:]: # Skip the header separator line
row = [cell.strip() for cell in line.split('|') if cell.strip()]
if row:
data.append(row)
# Create DataFrame
df = pd.DataFrame(data, columns=headers)
# Convert numeric columns to float
for col in df.columns:
if df[col].dtype == object:
try:
df[col] = df[col].astype(float)
except ValueError:
pass # Keep as string if conversion fails
return df
def setup_page():
"""Set up the Streamlit page."""
st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
st.title("⚡ Swahili Text Embeddings Leaderboard (STEL)")
st.image("https://raw.githubusercontent.com/username/repo/main/STEL.jpg", width=300)
def display_leaderboard(df):
"""Display the leaderboard."""
st.header("📊 Leaderboard")
# Determine which non-benchmark columns are present
present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]
# Add filters
columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
# Filter dataframe
df_display = df[present_non_benchmark_cols + selected_columns]
# Display dataframe
st.dataframe(df_display.style.format("{:.4f}", subset=selected_columns))
# Download buttons
csv = df_display.to_csv(index=False)
st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
# ... (rest of the code remains the same)
def main():
setup_page()
# Read README content
with open("README.md", "r") as f:
readme_content = f.read()
# Extract and process leaderboard table
leaderboard_table = extract_table_from_markdown(readme_content, "| Model Name")
df_leaderboard = markdown_table_to_df(leaderboard_table)
display_leaderboard(df_leaderboard)
display_evaluation()
display_contribution()
display_sponsorship()
st.markdown("---")
st.markdown("Thank you for being part of this effort to advance Swahili language technologies!")
if __name__ == "__main__":
main() |