Mollel commited on
Commit
07c4ca7
1 Parent(s): 93dfa2b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -94
app.py CHANGED
@@ -49,35 +49,6 @@ def extract_table_from_markdown(markdown_text, table_start):
49
 
50
  # return df
51
 
52
- # def markdown_table_to_df(table_content):
53
- # """Convert markdown table to pandas DataFrame."""
54
- # # Split the table content into lines
55
- # lines = table_content.split('\n')
56
-
57
- # # Extract headers
58
- # headers = [h.strip() for h in lines[0].split('|') if h.strip()]
59
-
60
- # # Extract data
61
- # data = []
62
- # for line in lines[2:]: # Skip the header separator line
63
- # row = [cell.strip() for cell in line.split('|') if cell.strip()]
64
- # if row: # Include any non-empty row
65
- # # Pad the row with empty strings if it's shorter than the headers
66
- # padded_row = row + [''] * (len(headers) - len(row))
67
- # data.append(padded_row[:len(headers)]) # Trim if longer than headers
68
-
69
- # # Create DataFrame
70
- # df = pd.DataFrame(data, columns=headers)
71
-
72
- # # Convert numeric columns to float and handle Dimension column
73
- # for col in df.columns:
74
- # if col == "Dimension":
75
- # df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
76
- # elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
77
- # df[col] = pd.to_numeric(df[col], errors='coerce')
78
-
79
- # return df
80
-
81
  def markdown_table_to_df(table_content):
82
  """Convert markdown table to pandas DataFrame."""
83
  # Split the table content into lines
@@ -101,12 +72,14 @@ def markdown_table_to_df(table_content):
101
  # Convert numeric columns to float and handle Dimension column
102
  for col in df.columns:
103
  if col == "Dimension":
104
- df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else None)
105
  elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
106
  df[col] = pd.to_numeric(df[col], errors='coerce')
107
 
108
  return df
109
 
 
 
110
  def setup_page():
111
  """Set up the Streamlit page."""
112
  st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
@@ -134,70 +107,6 @@ def display_leaderboard(df):
134
  csv = df_display.to_csv(index=False)
135
  st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
136
 
137
- # def display_leaderboard(df):
138
- # """Display the leaderboard with compact columns."""
139
- # st.header("📊 Leaderboard")
140
-
141
- # # Determine which non-benchmark columns are present
142
- # present_non_benchmark_cols = [col for col in POSSIBLE_NON_BENCHMARK_COLS if col in df.columns]
143
-
144
- # # Add filters
145
- # columns_to_filter = [col for col in df.columns if col not in present_non_benchmark_cols]
146
- # selected_columns = st.multiselect("Select benchmarks to display:", columns_to_filter, default=columns_to_filter)
147
-
148
- # # Filter dataframe
149
- # df_display = df[present_non_benchmark_cols + selected_columns]
150
-
151
- # # Define column widths
152
- # col_widths = {
153
- # "Model Name": 200,
154
- # "Publisher": 100,
155
- # "Open?": 60,
156
- # "Basemodel": 150,
157
- # "Matryoshka": 80,
158
- # "Dimension": 80,
159
- # "Average": 80
160
- # }
161
-
162
- # # Set default width for benchmark columns
163
- # default_width = 80
164
-
165
- # # Custom CSS to make the table more compact
166
- # st.markdown("""
167
- # <style>
168
- # .streamlit-expanderHeader {
169
- # font-size: 1em;
170
- # }
171
- # .stDataFrame {
172
- # font-size: 0.8em;
173
- # }
174
- # .stDataFrame td, .stDataFrame th {
175
- # white-space: nowrap;
176
- # overflow: hidden;
177
- # text-overflow: ellipsis;
178
- # max-width: 200px;
179
- # }
180
- # </style>
181
- # """, unsafe_allow_html=True)
182
-
183
- # # Format the dataframe
184
- # df_styled = df_display.style.format({col: "{:.2f}" for col in df_display.columns if df_display[col].dtype in ['float64', 'int64']})
185
-
186
- # # Set column widths
187
- # df_styled.set_properties(**{
188
- # 'width': lambda x: f"{col_widths.get(x.name, default_width)}px",
189
- # 'max-width': lambda x: f"{col_widths.get(x.name, default_width)}px",
190
- # })
191
-
192
- # # Display the dataframe
193
- # st.dataframe(df_styled, height=400)
194
-
195
- # # Download buttons
196
- # csv = df_display.to_csv(index=False)
197
- # st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
198
-
199
-
200
-
201
  def display_evaluation():
202
  """Display the evaluation section."""
203
  st.header("🧪 Evaluation")
 
49
 
50
  # return df
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def markdown_table_to_df(table_content):
53
  """Convert markdown table to pandas DataFrame."""
54
  # Split the table content into lines
 
72
  # Convert numeric columns to float and handle Dimension column
73
  for col in df.columns:
74
  if col == "Dimension":
75
+ df[col] = df[col].apply(lambda x: int(x) if x.isdigit() else "")
76
  elif col not in ["Model Name", "Publisher", "Open?", "Basemodel", "Matryoshka"]:
77
  df[col] = pd.to_numeric(df[col], errors='coerce')
78
 
79
  return df
80
 
81
+
82
+
83
  def setup_page():
84
  """Set up the Streamlit page."""
85
  st.set_page_config(page_title="Swahili Text Embeddings Leaderboard", page_icon="⚡", layout="wide")
 
107
  csv = df_display.to_csv(index=False)
108
  st.download_button(label="Download as CSV", data=csv, file_name="leaderboard.csv", mime="text/csv")
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  def display_evaluation():
111
  """Display the evaluation section."""
112
  st.header("🧪 Evaluation")