MohamedRashad commited on
Commit
eb50697
·
1 Parent(s): d392fbe

Add utility functions for model submission and retrieval in Gradio interface

Browse files
Files changed (2) hide show
  1. app.py +196 -530
  2. utils.py +251 -0
app.py CHANGED
@@ -4,6 +4,8 @@ import numpy as np
4
  import pandas as pd
5
  import gradio as gr
6
  from huggingface_hub import HfApi, hf_hub_download
 
 
7
 
8
 
9
  OWNER = "Navid-AI"
@@ -19,19 +21,19 @@ HEADER = """<div style="text-align: center; margin-bottom: 20px;">
19
  ABOUT_SECTION = """
20
  ## About
21
 
22
- The AraGen Leaderboard is designed to evaluate and compare the performance of Chat Arabic Large Language Models (LLMs) on a set of generative tasks. By leveraging the new **3C3H** evaluation measure which evaluate the model's output across six dimensions —Correctness, Completeness, Conciseness, Helpfulness, Honesty, and Harmlessness— the leaderboard provides a comprehensive and holistic evaluation of a model's performance in generating human-like and ethically responsible content.
23
 
24
- ### Why Focus on Chat Models?
25
 
26
- AraGen Leaderboard —And 3C3H in general— is specifically designed to assess **chat models**, which interact in conversational settings, intended for end user interaction and require a blend of factual accuracy and user-centric dialogue capabilities. While it is technically possible to submit foundational models, we kindly ask users to refrain from doing so. For evaluations of foundational models using likelihood accuracy based benchmarks, please refer to the [Open Arabic LLM Leaderboard (OALL)](https://huggingface.co/spaces/OALL/Open-Arabic-LLM-Leaderboard).
27
 
28
  ### How to Submit Your Model?
29
 
30
- Navigate to the submission section below to submit your open chat model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submmited metadata (precision, revision, #params) is accurate.
31
 
32
  ### Contact
33
 
34
- For any inquiries or assistance, feel free to reach out through the community tab at [Inception AraGen Community](https://huggingface.co/spaces/inceptionai/AraGen-Leaderboard/discussions) or via [email](mailto:ali.filali@inceptionai.ai).
35
  """
36
 
37
  CITATION_BUTTON_LABEL = """
@@ -40,286 +42,18 @@ Copy the following snippet to cite these results
40
 
41
  CITATION_BUTTON_TEXT = """
42
  @misc{AraGen,
43
- author = {El Filali, Ali and Sengupta, Neha and Abouelseoud, Arwa and Nakov, Preslav and Fourrier, Clémentine},
44
- title = {Rethinking LLM Evaluation with 3C3H: AraGen Benchmark and Leaderboard},
45
- year = {2024},
46
- publisher = {Inception},
47
- howpublished = "url{https://huggingface.co/spaces/inceptionai/AraGen-Leaderboard}"
48
  }
49
  """
50
 
51
-
52
- def load_results():
53
- # Get the current directory of the script and construct the path to results.json
54
- current_dir = os.path.dirname(os.path.abspath(__file__))
55
- results_file = os.path.join(current_dir, "assets", "results", "results.json")
56
-
57
- # Load the JSON data from the specified file
58
- with open(results_file, 'r') as f:
59
- data = json.load(f)
60
-
61
- # Filter out any entries that only contain '_last_sync_timestamp'
62
- filtered_data = []
63
- for entry in data:
64
- # If '_last_sync_timestamp' is the only key, skip it
65
- if len(entry.keys()) == 1 and "_last_sync_timestamp" in entry:
66
- continue
67
- filtered_data.append(entry)
68
-
69
- data = filtered_data
70
-
71
- # Lists to collect data
72
- data_3c3h = []
73
- data_tasks = []
74
-
75
- for model_data in data:
76
- # Extract model meta data
77
- meta = model_data.get('Meta', {})
78
- model_name = meta.get('Model Name', 'UNK')
79
- revision = meta.get('Revision', 'UNK')
80
- precision = meta.get('Precision', 'UNK')
81
- params = meta.get('Params', 'UNK')
82
- license = meta.get('License', 'UNK')
83
-
84
- # Convert "Model Size" to numeric, treating "UNK" as infinity
85
- try:
86
- model_size_numeric = float(params)
87
- except (ValueError, TypeError):
88
- model_size_numeric = np.inf
89
-
90
- # 3C3H Scores
91
- scores_data = model_data.get('claude-3.5-sonnet Scores', {})
92
- scores_3c3h = scores_data.get('3C3H Scores', {})
93
- scores_tasks = scores_data.get('Tasks Scores', {})
94
-
95
- # Multiply scores by 100 to get percentages (keep them as numeric values)
96
- formatted_scores_3c3h = {k: v*100 for k, v in scores_3c3h.items()}
97
- formatted_scores_tasks = {k: v*100 for k, v in scores_tasks.items()}
98
-
99
- # For 3C3H Scores DataFrame
100
- data_entry_3c3h = {
101
- 'Model Name': model_name,
102
- 'Revision': revision,
103
- 'License': license,
104
- 'Precision': precision,
105
- 'Model Size': model_size_numeric, # Numeric value for sorting
106
- '3C3H Score': formatted_scores_3c3h.get("3C3H Score", np.nan),
107
- 'Correctness': formatted_scores_3c3h.get("Correctness", np.nan),
108
- 'Completeness': formatted_scores_3c3h.get("Completeness", np.nan),
109
- 'Conciseness': formatted_scores_3c3h.get("Conciseness", np.nan),
110
- 'Helpfulness': formatted_scores_3c3h.get("Helpfulness", np.nan),
111
- 'Honesty': formatted_scores_3c3h.get("Honesty", np.nan),
112
- 'Harmlessness': formatted_scores_3c3h.get("Harmlessness", np.nan),
113
- }
114
- data_3c3h.append(data_entry_3c3h)
115
-
116
- # For Tasks Scores DataFrame
117
- data_entry_tasks = {
118
- 'Model Name': model_name,
119
- 'Revision': revision,
120
- 'License': license,
121
- 'Precision': precision,
122
- 'Model Size': model_size_numeric, # Numeric value for sorting
123
- **formatted_scores_tasks
124
- }
125
- data_tasks.append(data_entry_tasks)
126
-
127
- df_3c3h = pd.DataFrame(data_3c3h)
128
- df_tasks = pd.DataFrame(data_tasks)
129
-
130
- # Round the numeric score columns to 4 decimal places
131
- score_columns_3c3h = ['3C3H Score', 'Correctness', 'Completeness', 'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness']
132
- df_3c3h[score_columns_3c3h] = df_3c3h[score_columns_3c3h].round(4)
133
-
134
- # Replace np.inf with a large number in 'Model Size Filter' for filtering
135
- max_model_size_value = 1000 # Define a maximum value
136
- df_3c3h['Model Size Filter'] = df_3c3h['Model Size'].replace(np.inf, max_model_size_value)
137
-
138
- # Sort df_3c3h by '3C3H Score' descending if column exists
139
- if '3C3H Score' in df_3c3h.columns:
140
- df_3c3h = df_3c3h.sort_values(by='3C3H Score', ascending=False)
141
- df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1)) # Add Rank column starting from 1
142
- else:
143
- df_3c3h.insert(0, 'Rank', range(1, len(df_3c3h) + 1))
144
-
145
- # Extract task columns
146
- task_columns = [col for col in df_tasks.columns if col not in ['Model Name', 'Revision', 'License', 'Precision', 'Model Size', 'Model Size Filter']]
147
-
148
- # Round the task score columns to 4 decimal places
149
- if task_columns:
150
- df_tasks[task_columns] = df_tasks[task_columns].round(4)
151
-
152
- # Replace np.inf with a large number in 'Model Size Filter' for filtering
153
- df_tasks['Model Size Filter'] = df_tasks['Model Size'].replace(np.inf, max_model_size_value)
154
-
155
- # Sort df_tasks by the first task column if it exists
156
- if task_columns:
157
- first_task = task_columns[0]
158
- df_tasks = df_tasks.sort_values(by=first_task, ascending=False)
159
- df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1)) # Add Rank column starting from 1
160
- else:
161
- df_tasks = df_tasks.sort_values(by='Model Name', ascending=True)
162
- df_tasks.insert(0, 'Rank', range(1, len(df_tasks) + 1))
163
-
164
- return df_3c3h, df_tasks, task_columns
165
-
166
- def load_requests(status_folder):
167
- api = HfApi()
168
- requests_data = []
169
- folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
170
-
171
- hf_api_token = os.environ.get('HF_API_TOKEN', None)
172
-
173
- try:
174
- # List files in the dataset repository
175
- files_info = api.list_repo_files(
176
- repo_id=DATASET_REPO_ID,
177
- repo_type="dataset",
178
- token=hf_api_token
179
- )
180
- except Exception as e:
181
- print(f"Error accessing dataset repository: {e}")
182
- return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible
183
-
184
- # Filter files in the desired folder
185
- files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
186
-
187
- for file_path in files_in_folder:
188
- try:
189
- # Download the JSON file
190
- local_file_path = hf_hub_download(
191
- repo_id=DATASET_REPO_ID,
192
- filename=file_path,
193
- repo_type="dataset",
194
- token=hf_api_token
195
- )
196
- # Load JSON data
197
- with open(local_file_path, 'r') as f:
198
- request = json.load(f)
199
- requests_data.append(request)
200
- except Exception as e:
201
- print(f"Error loading file {file_path}: {e}")
202
- continue # Skip files that can't be loaded
203
-
204
- df = pd.DataFrame(requests_data)
205
- return df
206
-
207
- def submit_model(model_name, revision, precision, params, license):
208
- # Load existing evaluations
209
- df_3c3h, df_tasks, _ = load_results()
210
- existing_models_results = df_3c3h[['Model Name', 'Revision', 'Precision']]
211
-
212
- # Handle 'Missing' precision
213
- if precision == 'Missing':
214
- precision = None
215
- else:
216
- precision = precision.strip().lower()
217
-
218
- # Load pending and finished requests from the dataset repository
219
- df_pending = load_requests('pending')
220
- df_finished = load_requests('finished')
221
-
222
- # Check if model is already evaluated
223
- model_exists_in_results = ((existing_models_results['Model Name'] == model_name) &
224
- (existing_models_results['Revision'] == revision) &
225
- (existing_models_results['Precision'] == precision)).any()
226
- if model_exists_in_results:
227
- return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
228
-
229
- # Check if model is in pending requests
230
- if not df_pending.empty:
231
- existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
232
- model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
233
- (existing_models_pending['revision'] == revision) &
234
- (existing_models_pending['precision'] == precision)).any()
235
- if model_exists_in_pending:
236
- return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' is already in the pending evaluations.**"
237
-
238
- # Check if model is in finished requests
239
- if not df_finished.empty:
240
- existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
241
- model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
242
- (existing_models_finished['revision'] == revision) &
243
- (existing_models_finished['precision'] == precision)).any()
244
- if model_exists_in_finished:
245
- return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
246
-
247
- # Check if model exists on HuggingFace Hub
248
- api = HfApi()
249
- try:
250
- model_info = api.model_info(model_name)
251
- except Exception as e:
252
- return f"**Error: Could not find model '{model_name}' on HuggingFace Hub. Please ensure the model name is correct and the model is public.**"
253
-
254
- # Proceed with submission
255
- status = "PENDING"
256
-
257
- # Prepare the submission data
258
- submission = {
259
- "model_name": model_name,
260
- "license": license,
261
- "revision": revision,
262
- "precision": precision,
263
- "status": status,
264
- "params": params
265
- }
266
-
267
- # Serialize the submission to JSON
268
- submission_json = json.dumps(submission, indent=2)
269
-
270
- # Define the file path in the repository
271
- org_model = model_name.split('/')
272
- if len(org_model) != 2:
273
- return "**Please enter the full model name including the organization or username, e.g., 'inceptionai/jais-family-30b-8k'**"
274
- org, model_id = org_model
275
- precision_str = precision if precision else 'Missing'
276
- file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}.json"
277
-
278
- # Upload the submission to the dataset repository
279
- try:
280
- hf_api_token = os.environ.get('HF_API_TOKEN', None)
281
- api.upload_file(
282
- path_or_fileobj=submission_json.encode('utf-8'),
283
- path_in_repo=file_path_in_repo,
284
- repo_id=DATASET_REPO_ID,
285
- repo_type="dataset",
286
- token=hf_api_token
287
- )
288
- except Exception as e:
289
- return f"**Error: Could not submit the model. {str(e)}**"
290
-
291
- return f"**Model '{model_name}' has been submitted for evaluation.**"
292
 
293
  def main():
294
- df_3c3h, df_tasks, task_columns = load_results()
295
-
296
- # Extract unique Precision and License values for filters
297
- precision_options_3c3h = sorted(df_3c3h['Precision'].dropna().unique().tolist())
298
- precision_options_3c3h = [p for p in precision_options_3c3h if p != 'UNK']
299
- precision_options_3c3h.append('Missing')
300
-
301
- license_options_3c3h = sorted(df_3c3h['License'].dropna().unique().tolist())
302
- license_options_3c3h = [l for l in license_options_3c3h if l != 'UNK']
303
- license_options_3c3h.append('Missing')
304
-
305
- precision_options_tasks = sorted(df_tasks['Precision'].dropna().unique().tolist())
306
- precision_options_tasks = [p for p in precision_options_tasks if p != 'UNK']
307
- precision_options_tasks.append('Missing')
308
-
309
- license_options_tasks = sorted(df_tasks['License'].dropna().unique().tolist())
310
- license_options_tasks = [l for l in license_options_tasks if l != 'UNK']
311
- license_options_tasks.append('Missing')
312
-
313
- # Get min and max model sizes for sliders, handling 'inf' values
314
- min_model_size_3c3h = int(df_3c3h['Model Size Filter'].min())
315
- max_model_size_3c3h = int(df_3c3h['Model Size Filter'].max())
316
-
317
- min_model_size_tasks = int(df_tasks['Model Size Filter'].min())
318
- max_model_size_tasks = int(df_tasks['Model Size Filter'].max())
319
-
320
- # Exclude 'Model Size Filter' from column selectors
321
- column_choices_3c3h = [col for col in df_3c3h.columns if col != 'Model Size Filter']
322
- column_choices_tasks = [col for col in df_tasks.columns if col != 'Model Size Filter']
323
 
324
  with gr.Blocks() as demo:
325
  gr.Markdown(HEADER)
@@ -335,176 +69,107 @@ def main():
335
  interactive=True
336
  )
337
 
 
 
 
 
 
 
 
338
  with gr.Row():
339
  license_filter_retrieval = gr.CheckboxGroup(
340
- choices=license_options_3c3h,
341
- value=license_options_3c3h.copy(), # Default all selected
342
  label="Filter by License",
343
  )
344
  precision_filter_retrieval = gr.CheckboxGroup(
345
- choices=precision_options_3c3h,
346
- value=precision_options_3c3h.copy(), # Default all selected
347
  label="Filter by Precision",
348
  )
349
- with gr.Row():
350
- model_size_min_filter_3c3h = gr.Slider(
351
- minimum=min_model_size_3c3h,
352
- maximum=max_model_size_3c3h,
353
- value=min_model_size_3c3h,
354
- step=1,
355
- label="Minimum Model Size",
356
- interactive=True
357
- )
358
- model_size_max_filter_3c3h = gr.Slider(
359
- minimum=min_model_size_3c3h,
360
- maximum=max_model_size_3c3h,
361
- value=max_model_size_3c3h,
362
- step=1,
363
- label="Maximum Model Size",
364
- interactive=True
365
- )
366
 
367
- leaderboard_3c3h = gr.Dataframe(
368
- df_3c3h[['Rank', 'Model Name', '3C3H Score', 'Correctness', 'Completeness',
369
- 'Conciseness', 'Helpfulness', 'Honesty', 'Harmlessness']],
370
  interactive=False
371
  )
372
 
373
- def filter_df_3c3h(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
374
- filtered_df = df_3c3h.copy()
375
 
376
- # Ensure min_size <= max_size
377
- if min_size > max_size:
378
- min_size, max_size = max_size, min_size
379
 
380
- # Apply search filter
381
- if search_query:
382
- filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
383
 
384
- # Apply Precision filter
385
- if precision_filters:
386
- include_missing = 'Missing' in precision_filters
387
- selected_precisions = [p for p in precision_filters if p != 'Missing']
388
- if include_missing:
389
- filtered_df = filtered_df[
390
- (filtered_df['Precision'].isin(selected_precisions)) |
391
- (filtered_df['Precision'] == 'UNK') |
392
- (filtered_df['Precision'].isna())
393
- ]
394
- else:
395
- filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
396
 
397
- # Apply License filter
398
- if license_filters:
399
- include_missing = 'Missing' in license_filters
400
- selected_licenses = [l for l in license_filters if l != 'Missing']
401
- if include_missing:
402
- filtered_df = filtered_df[
403
- (filtered_df['License'].isin(selected_licenses)) |
404
- (filtered_df['License'] == 'UNK') |
405
- (filtered_df['License'].isna())
406
- ]
407
- else:
408
- filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
409
 
410
- # Apply Model Size filter
411
- filtered_df = filtered_df[
412
- (filtered_df['Model Size Filter'] >= min_size) &
413
- (filtered_df['Model Size Filter'] <= max_size)
414
- ]
415
 
416
- # Remove existing 'Rank' column if present
417
- if 'Rank' in filtered_df.columns:
418
- filtered_df = filtered_df.drop(columns=['Rank'])
419
 
420
- # Recalculate Rank after filtering
421
- filtered_df = filtered_df.reset_index(drop=True)
422
- filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
423
 
424
- # Ensure selected columns are present
425
- selected_cols = [col for col in selected_cols if col in filtered_df.columns]
426
 
427
- return filtered_df[selected_cols]
428
 
429
- # Bind the filter function to the appropriate events
430
- filter_inputs_3c3h = [
431
- search_box_retrieval,
432
- precision_filter_retrieval,
433
- license_filter_retrieval,
434
- model_size_min_filter_3c3h,
435
- model_size_max_filter_3c3h
436
- ]
437
- search_box_retrieval.submit(
438
- filter_df_3c3h,
439
- inputs=filter_inputs_3c3h,
440
- outputs=leaderboard_3c3h
441
- )
442
 
443
- # Bind change events for CheckboxGroups and sliders
444
- for component in filter_inputs_3c3h:
445
- component.change(
446
- filter_df_3c3h,
447
- inputs=filter_inputs_3c3h,
448
- outputs=leaderboard_3c3h
449
- )
450
 
451
- with gr.Tab("Submit Retriever"):
452
-
453
- model_name_input = gr.Textbox(
454
- label="Model",
455
- placeholder="Enter the full model name from HuggingFace Hub (e.g., inceptionai/jais-family-30b-8k)"
456
- )
457
- revision_input = gr.Textbox(
458
- label="Revision",
459
- placeholder="main",
460
- value="main"
461
- )
462
- precision_input = gr.Dropdown(
463
- choices=["float16", "float32", "bfloat16", "8bit", "4bit"],
464
- label="Precision",
465
- value="float16"
466
- )
467
- params_input = gr.Textbox(
468
- label="Params",
469
- placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
470
- )
471
- # Changed from Dropdown to Textbox with default value "Open"
472
- license_input = gr.Textbox(
473
- label="License",
474
- placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
475
- value="Open"
476
- )
477
- submit_button = gr.Button("Submit Model")
478
- submission_result = gr.Markdown()
479
-
480
- submit_button.click(
481
- submit_model,
482
- inputs=[model_name_input, revision_input, precision_input, params_input, license_input],
483
- outputs=submission_result
484
- )
485
-
486
- # Load pending, finished, and failed requests
487
- df_pending = load_requests('pending')
488
- df_finished = load_requests('finished')
489
- df_failed = load_requests('failed')
490
-
491
- # Display the tables
492
- gr.Markdown("## Evaluation Status")
493
- with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
494
- if not df_pending.empty:
495
- gr.Dataframe(df_pending)
496
- else:
497
- gr.Markdown("No pending evaluations.")
498
- with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
499
- if not df_finished.empty:
500
- gr.Dataframe(df_finished)
501
- else:
502
- gr.Markdown("No finished evaluations.")
503
- with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
504
- if not df_failed.empty:
505
- gr.Dataframe(df_failed)
506
- else:
507
- gr.Markdown("No failed evaluations.")
508
 
509
  with gr.Tab("Reranking"):
510
  with gr.Tabs():
@@ -518,133 +183,134 @@ def main():
518
  )
519
  with gr.Row():
520
  column_selector_tasks = gr.CheckboxGroup(
521
- choices=column_choices_tasks,
522
- value=['Rank', 'Model Name'] + task_columns,
523
  label="Select columns to display",
524
  )
525
  with gr.Row():
526
  license_filter_tasks = gr.CheckboxGroup(
527
- choices=license_options_tasks,
528
- value=license_options_tasks.copy(), # Default all selected
529
  label="Filter by License",
530
  )
531
  precision_filter_tasks = gr.CheckboxGroup(
532
- choices=precision_options_tasks,
533
- value=precision_options_tasks.copy(), # Default all selected
534
  label="Filter by Precision",
535
  )
536
- with gr.Row():
537
- model_size_min_filter_tasks = gr.Slider(
538
- minimum=min_model_size_tasks,
539
- maximum=max_model_size_tasks,
540
- value=min_model_size_tasks,
541
- step=1,
542
- label="Minimum Model Size",
543
- interactive=True
544
- )
545
- model_size_max_filter_tasks = gr.Slider(
546
- minimum=min_model_size_tasks,
547
- maximum=max_model_size_tasks,
548
- value=max_model_size_tasks,
549
- step=1,
550
- label="Maximum Model Size",
551
- interactive=True
552
- )
553
 
554
  leaderboard_tasks = gr.Dataframe(
555
- df_tasks[['Rank', 'Model Name'] + task_columns],
556
- interactive=False
 
 
557
  )
558
 
559
- def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
560
- filtered_df = df_tasks.copy()
561
 
562
- # Ensure min_size <= max_size
563
- if min_size > max_size:
564
- min_size, max_size = max_size, min_size
565
 
566
- # Apply search filter
567
- if search_query:
568
- filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
569
 
570
- # Apply Precision filter
571
- if precision_filters:
572
- include_missing = 'Missing' in precision_filters
573
- selected_precisions = [p for p in precision_filters if p != 'Missing']
574
- if include_missing:
575
- filtered_df = filtered_df[
576
- (filtered_df['Precision'].isin(selected_precisions)) |
577
- (filtered_df['Precision'] == 'UNK') |
578
- (filtered_df['Precision'].isna())
579
- ]
580
- else:
581
- filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
582
 
583
- # Apply License filter
584
- if license_filters:
585
- include_missing = 'Missing' in license_filters
586
- selected_licenses = [l for l in license_filters if l != 'Missing']
587
- if include_missing:
588
- filtered_df = filtered_df[
589
- (filtered_df['License'].isin(selected_licenses)) |
590
- (filtered_df['License'] == 'UNK') |
591
- (filtered_df['License'].isna())
592
- ]
593
- else:
594
- filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
595
 
596
- # Apply Model Size filter
597
- filtered_df = filtered_df[
598
- (filtered_df['Model Size Filter'] >= min_size) &
599
- (filtered_df['Model Size Filter'] <= max_size)
600
- ]
601
 
602
- # Remove existing 'Rank' column if present
603
- if 'Rank' in filtered_df.columns:
604
- filtered_df = filtered_df.drop(columns=['Rank'])
605
 
606
- # Sort by the first task column if it exists
607
- if task_columns:
608
- first_task = task_columns[0]
609
- filtered_df = filtered_df.sort_values(by=first_task, ascending=False)
610
- else:
611
- filtered_df = filtered_df.sort_values(by='Model Name', ascending=True)
612
 
613
- # Recalculate Rank after filtering
614
- filtered_df = filtered_df.reset_index(drop=True)
615
- filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
616
 
617
- # Ensure selected columns are present
618
- selected_cols = [col for col in selected_cols if col in filtered_df.columns]
619
 
620
- return filtered_df[selected_cols]
621
 
622
- # Bind the filter function to the appropriate events
623
- filter_inputs_tasks = [
624
- search_box_tasks,
625
- column_selector_tasks,
626
- precision_filter_tasks,
627
- license_filter_tasks,
628
- model_size_min_filter_tasks,
629
- model_size_max_filter_tasks
630
- ]
631
- search_box_tasks.submit(
632
- filter_df_tasks,
633
- inputs=filter_inputs_tasks,
634
- outputs=leaderboard_tasks
635
- )
636
 
637
- # Bind change events for CheckboxGroups and sliders
638
- for component in filter_inputs_tasks:
639
- component.change(
640
- filter_df_tasks,
641
- inputs=filter_inputs_tasks,
642
- outputs=leaderboard_tasks
643
- )
644
-
645
- with gr.Tab("Submit Reranker"):
646
- pass
647
-
648
  with gr.Tab("LLM Context Answering"):
649
  with gr.Tabs():
650
  with gr.Tab("Leaderboard"):
@@ -654,7 +320,7 @@ def main():
654
 
655
  with gr.Row():
656
  with gr.Accordion("📙 Citation", open=False):
657
- citation_button = gr.Textbox(
658
  value=CITATION_BUTTON_TEXT,
659
  label=CITATION_BUTTON_LABEL,
660
  lines=20,
 
4
  import pandas as pd
5
  import gradio as gr
6
  from huggingface_hub import HfApi, hf_hub_download
7
+ from pathlib import Path
8
+ from utils import submit_gradio_module, load_retrieval_results
9
 
10
 
11
  OWNER = "Navid-AI"
 
21
  ABOUT_SECTION = """
22
  ## About
23
 
24
+ The Arabic RAG Leaderboard is designed to evaluate and compare the performance of Retrieval-Augmented Generation (RAG) models on a set of retrieval and generative tasks. By leveraging a comprehensive evaluation framework, the leaderboard provides a detailed assessment of a model's ability to retrieve relevant information and generate accurate, coherent, and contextually appropriate responses.
25
 
26
+ ### Why Focus on RAG Models?
27
 
28
+ The Arabic RAG Leaderboard is specifically designed to assess **RAG models**, which combine retrieval mechanisms with generative capabilities to enhance the quality and relevance of generated content. These models are particularly useful in scenarios where access to up-to-date and contextually relevant information is crucial. While foundational models can be evaluated, the primary focus is on RAG models that excel in both retrieval and generation tasks.
29
 
30
  ### How to Submit Your Model?
31
 
32
+ Navigate to the submission section below to submit your RAG model from the HuggingFace Hub for evaluation. Ensure that your model is public and the submitted metadata (precision, revision, #params) is accurate.
33
 
34
  ### Contact
35
 
36
+ For any inquiries or assistance, feel free to reach out through the community tab at [Navid-AI Community](https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard/discussions) or via [email](mailto:support@navid-ai.com).
37
  """
38
 
39
  CITATION_BUTTON_LABEL = """
 
42
 
43
  CITATION_BUTTON_TEXT = """
44
  @misc{AraGen,
45
+ author = {Mohaned A. Rashad, Hamza Shahid},
46
+ title = {The Arabic RAG Leaderboard},
47
+ year = {2025},
48
+ publisher = {Navid-AI},
49
+ howpublished = "url{https://huggingface.co/spaces/Navid-AI/The-Arabic-Rag-Leaderboard}"
50
  }
51
  """
52
 
53
+ df = load_retrieval_results()
54
+ print(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
  with gr.Blocks() as demo:
59
  gr.Markdown(HEADER)
 
69
  interactive=True
70
  )
71
 
72
+ with gr.Row():
73
+ column_selector_tasks = gr.CheckboxGroup(
74
+ choices=[],
75
+ value=['Rank', 'Model Name'],
76
+ label="Select columns to display",
77
+ )
78
+
79
  with gr.Row():
80
  license_filter_retrieval = gr.CheckboxGroup(
81
+ choices=[],
82
+ value=[], # Default all selected
83
  label="Filter by License",
84
  )
85
  precision_filter_retrieval = gr.CheckboxGroup(
86
+ choices=[],
87
+ value=[], # Default all selected
88
  label="Filter by Precision",
89
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
+ retrieval_leaderboard = gr.Dataframe(
92
+ df,
 
93
  interactive=False
94
  )
95
 
96
+ # def filter_df_3c3h(search_query, selected_cols, precision_filters, license_filters):
97
+ # filtered_df = df_3c3h.copy()
98
 
99
+ # # Ensure min_size <= max_size
100
+ # if min_size > max_size:
101
+ # min_size, max_size = max_size, min_size
102
 
103
+ # # Apply search filter
104
+ # if search_query:
105
+ # filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
106
 
107
+ # # Apply Precision filter
108
+ # if precision_filters:
109
+ # include_missing = 'Missing' in precision_filters
110
+ # selected_precisions = [p for p in precision_filters if p != 'Missing']
111
+ # if include_missing:
112
+ # filtered_df = filtered_df[
113
+ # (filtered_df['Precision'].isin(selected_precisions)) |
114
+ # (filtered_df['Precision'] == 'UNK') |
115
+ # (filtered_df['Precision'].isna())
116
+ # ]
117
+ # else:
118
+ # filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
119
 
120
+ # # Apply License filter
121
+ # if license_filters:
122
+ # include_missing = 'Missing' in license_filters
123
+ # selected_licenses = [l for l in license_filters if l != 'Missing']
124
+ # if include_missing:
125
+ # filtered_df = filtered_df[
126
+ # (filtered_df['License'].isin(selected_licenses)) |
127
+ # (filtered_df['License'] == 'UNK') |
128
+ # (filtered_df['License'].isna())
129
+ # ]
130
+ # else:
131
+ # filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
132
 
133
+ # # Apply Model Size filter
134
+ # filtered_df = filtered_df[
135
+ # (filtered_df['Model Size Filter'] >= min_size) &
136
+ # (filtered_df['Model Size Filter'] <= max_size)
137
+ # ]
138
 
139
+ # # Remove existing 'Rank' column if present
140
+ # if 'Rank' in filtered_df.columns:
141
+ # filtered_df = filtered_df.drop(columns=['Rank'])
142
 
143
+ # # Recalculate Rank after filtering
144
+ # filtered_df = filtered_df.reset_index(drop=True)
145
+ # filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
146
 
147
+ # # Ensure selected columns are present
148
+ # selected_cols = [col for col in selected_cols if col in filtered_df.columns]
149
 
150
+ # return filtered_df[selected_cols]
151
 
152
+ # # Bind the filter function to the appropriate events
153
+ # filter_inputs_3c3h = [
154
+ # search_box_retrieval,
155
+ # precision_filter_retrieval,
156
+ # license_filter_retrieval,
157
+ # ]
158
+ # search_box_retrieval.submit(
159
+ # filter_df_3c3h,
160
+ # inputs=filter_inputs_3c3h,
161
+ # outputs=leaderboard_3c3h
162
+ # )
 
 
163
 
164
+ # # Bind change events for CheckboxGroups and sliders
165
+ # for component in filter_inputs_3c3h:
166
+ # component.change(
167
+ # filter_df_3c3h,
168
+ # inputs=filter_inputs_3c3h,
169
+ # outputs=leaderboard_3c3h
170
+ # )
171
 
172
+ submit_gradio_module()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  with gr.Tab("Reranking"):
175
  with gr.Tabs():
 
183
  )
184
  with gr.Row():
185
  column_selector_tasks = gr.CheckboxGroup(
186
+ choices=[],
187
+ value=['Rank', 'Model Name'],
188
  label="Select columns to display",
189
  )
190
  with gr.Row():
191
  license_filter_tasks = gr.CheckboxGroup(
192
+ choices=[],
193
+ value=[], # Default all selected
194
  label="Filter by License",
195
  )
196
  precision_filter_tasks = gr.CheckboxGroup(
197
+ choices=[],
198
+ value=[], # Default all selected
199
  label="Filter by Precision",
200
  )
201
+ # with gr.Row():
202
+ # model_size_min_filter_tasks = gr.Slider(
203
+ # minimum=min_model_size_tasks,
204
+ # maximum=max_model_size_tasks,
205
+ # value=min_model_size_tasks,
206
+ # step=1,
207
+ # label="Minimum Model Size",
208
+ # interactive=True
209
+ # )
210
+ # model_size_max_filter_tasks = gr.Slider(
211
+ # minimum=min_model_size_tasks,
212
+ # maximum=max_model_size_tasks,
213
+ # value=max_model_size_tasks,
214
+ # step=1,
215
+ # label="Maximum Model Size",
216
+ # interactive=True
217
+ # )
218
 
219
  leaderboard_tasks = gr.Dataframe(
220
+ df,
221
+ # headers="auto", # Automatically use DataFrame's headers
222
+ # label="MultiIndex DataFrame",
223
+ # interactive=False,
224
  )
225
 
226
+ # def filter_df_tasks(search_query, selected_cols, precision_filters, license_filters, min_size, max_size):
227
+ # filtered_df = df_tasks.copy()
228
 
229
+ # # Ensure min_size <= max_size
230
+ # if min_size > max_size:
231
+ # min_size, max_size = max_size, min_size
232
 
233
+ # # Apply search filter
234
+ # if search_query:
235
+ # filtered_df = filtered_df[filtered_df['Model Name'].str.contains(search_query, case=False, na=False)]
236
 
237
+ # # Apply Precision filter
238
+ # if precision_filters:
239
+ # include_missing = 'Missing' in precision_filters
240
+ # selected_precisions = [p for p in precision_filters if p != 'Missing']
241
+ # if include_missing:
242
+ # filtered_df = filtered_df[
243
+ # (filtered_df['Precision'].isin(selected_precisions)) |
244
+ # (filtered_df['Precision'] == 'UNK') |
245
+ # (filtered_df['Precision'].isna())
246
+ # ]
247
+ # else:
248
+ # filtered_df = filtered_df[filtered_df['Precision'].isin(selected_precisions)]
249
 
250
+ # # Apply License filter
251
+ # if license_filters:
252
+ # include_missing = 'Missing' in license_filters
253
+ # selected_licenses = [l for l in license_filters if l != 'Missing']
254
+ # if include_missing:
255
+ # filtered_df = filtered_df[
256
+ # (filtered_df['License'].isin(selected_licenses)) |
257
+ # (filtered_df['License'] == 'UNK') |
258
+ # (filtered_df['License'].isna())
259
+ # ]
260
+ # else:
261
+ # filtered_df = filtered_df[filtered_df['License'].isin(selected_licenses)]
262
 
263
+ # # Apply Model Size filter
264
+ # filtered_df = filtered_df[
265
+ # (filtered_df['Model Size Filter'] >= min_size) &
266
+ # (filtered_df['Model Size Filter'] <= max_size)
267
+ # ]
268
 
269
+ # # Remove existing 'Rank' column if present
270
+ # if 'Rank' in filtered_df.columns:
271
+ # filtered_df = filtered_df.drop(columns=['Rank'])
272
 
273
+ # # Sort by the first task column if it exists
274
+ # if task_columns:
275
+ # first_task = task_columns[0]
276
+ # filtered_df = filtered_df.sort_values(by=first_task, ascending=False)
277
+ # else:
278
+ # filtered_df = filtered_df.sort_values(by='Model Name', ascending=True)
279
 
280
+ # # Recalculate Rank after filtering
281
+ # filtered_df = filtered_df.reset_index(drop=True)
282
+ # filtered_df.insert(0, 'Rank', range(1, len(filtered_df) + 1))
283
 
284
+ # # Ensure selected columns are present
285
+ # selected_cols = [col for col in selected_cols if col in filtered_df.columns]
286
 
287
+ # return filtered_df[selected_cols]
288
 
289
+ # # Bind the filter function to the appropriate events
290
+ # filter_inputs_tasks = [
291
+ # search_box_tasks,
292
+ # column_selector_tasks,
293
+ # precision_filter_tasks,
294
+ # license_filter_tasks,
295
+ # model_size_min_filter_tasks,
296
+ # model_size_max_filter_tasks
297
+ # ]
298
+ # search_box_tasks.submit(
299
+ # filter_df_tasks,
300
+ # inputs=filter_inputs_tasks,
301
+ # outputs=leaderboard_tasks
302
+ # )
303
 
304
+ # # Bind change events for CheckboxGroups and sliders
305
+ # for component in filter_inputs_tasks:
306
+ # component.change(
307
+ # filter_df_tasks,
308
+ # inputs=filter_inputs_tasks,
309
+ # outputs=leaderboard_tasks
310
+ # )
311
+
312
+ submit_gradio_module()
313
+
 
314
  with gr.Tab("LLM Context Answering"):
315
  with gr.Tabs():
316
  with gr.Tab("Leaderboard"):
 
320
 
321
  with gr.Row():
322
  with gr.Accordion("📙 Citation", open=False):
323
+ gr.Textbox(
324
  value=CITATION_BUTTON_TEXT,
325
  label=CITATION_BUTTON_LABEL,
326
  lines=20,
utils.py ADDED
@@ -0,0 +1,251 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import json
4
+ import os
5
+ from pathlib import Path
6
+ from huggingface_hub import HfApi
7
+
8
+ api = HfApi()
9
+
10
+ def load_retrieval_results():
11
+ base_path = Path(__file__).parent
12
+ results_dir = base_path / "assets" / "results"
13
+
14
+ retrieval_dataframe_path = results_dir / "retrieval_results.json"
15
+ if not retrieval_dataframe_path.exists():
16
+ df = pd.DataFrame(columns=["Model", "Model Size", "Embedding Dimension", "Max Tokens", "Web Search Dataset (MRR)", "Web Search Dataset (nDCG@k=None)"])
17
+ else:
18
+ df = pd.read_json(retrieval_dataframe_path)
19
+ return df
20
+
21
+ def get_model_info(model_id, verbose=False):
22
+ model_info = api.model_info(model_id)
23
+ num_downloads = model_info.downloads
24
+ num_likes = model_info.likes
25
+ license = model_info.card_data["license"]
26
+ num_parameters = round(model_info.safetensors.total / 1e6)
27
+ supported_precisions = list(model_info.safetensors.parameters.keys())
28
+ if verbose:
29
+ print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.")
30
+ print(f"The model has approximately {num_parameters:.2f} billion parameters.")
31
+ print(f"The model supports the following precisions: {supported_precisions}")
32
+ return num_downloads, num_likes, license, num_parameters, supported_precisions
33
+
34
+ def fetch_model_information(model_name):
35
+ try:
36
+ _, _, license, num_parameters, supported_precisions = get_model_info(model_name)
37
+ if len(supported_precisions) == 0:
38
+ supported_precisions = [None]
39
+ except Exception as e:
40
+ gr.Error(f"**Error: Could not fetch model information. {str(e)}**")
41
+ return
42
+ return gr.update(choices=supported_precisions, value=supported_precisions[0]), num_parameters, license
43
+
44
+ def submit_model(model_name, revision, precision, params, license):
45
+ # Load existing evaluations
46
+ df_retrieval = load_retrieval_results()
47
+ if df_retrieval.empty:
48
+ return "**Error: Could not load the retrieval results.**"
49
+
50
+ existing_models_results = df_retrieval[['Model', 'Revision', 'Precision']]
51
+
52
+ # Handle 'Missing' precision
53
+ if precision == 'Missing':
54
+ precision = None
55
+ else:
56
+ precision = precision.strip().lower()
57
+
58
+ # Load pending and finished requests from the dataset repository
59
+ df_pending = load_requests('pending')
60
+ df_finished = load_requests('finished')
61
+
62
+ # Check if model is already evaluated
63
+ model_exists_in_results = ((existing_models_results['Model Name'] == model_name) &
64
+ (existing_models_results['Revision'] == revision) &
65
+ (existing_models_results['Precision'] == precision)).any()
66
+ if model_exists_in_results:
67
+ return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
68
+
69
+ # Check if model is in pending requests
70
+ if not df_pending.empty:
71
+ existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
72
+ model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
73
+ (existing_models_pending['revision'] == revision) &
74
+ (existing_models_pending['precision'] == precision)).any()
75
+ if model_exists_in_pending:
76
+ return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' is already in the pending evaluations.**"
77
+
78
+ # Check if model is in finished requests
79
+ if not df_finished.empty:
80
+ existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
81
+ model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
82
+ (existing_models_finished['revision'] == revision) &
83
+ (existing_models_finished['precision'] == precision)).any()
84
+ if model_exists_in_finished:
85
+ return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' has already been evaluated.**"
86
+
87
+ # Check if model exists on HuggingFace Hub
88
+ try:
89
+ api.model_info(model_name)
90
+ except Exception as e:
91
+ return f"**Error: Could not find model '{model_name}' on HuggingFace Hub. Please ensure the model name is correct and the model is public.**"
92
+
93
+ # Proceed with submission
94
+ status = "PENDING"
95
+
96
+ # Prepare the submission data
97
+ submission = {
98
+ "model_name": model_name,
99
+ "license": license,
100
+ "revision": revision,
101
+ "precision": precision,
102
+ "status": status,
103
+ "params": params
104
+ }
105
+
106
+ # Serialize the submission to JSON
107
+ submission_json = json.dumps(submission, indent=2)
108
+
109
+ # Define the file path in the repository
110
+ org_model = model_name.split('/')
111
+ if len(org_model) != 2:
112
+ return "**Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct'**"
113
+ org, model_id = org_model
114
+ precision_str = precision if precision else 'Missing'
115
+ file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}.json"
116
+
117
+ # Upload the submission to the dataset repository
118
+ try:
119
+ hf_api_token = os.environ.get('HF_API_TOKEN', None)
120
+ api.upload_file(
121
+ path_or_fileobj=submission_json.encode('utf-8'),
122
+ path_in_repo=file_path_in_repo,
123
+ repo_id=DATASET_REPO_ID,
124
+ repo_type="dataset",
125
+ token=hf_api_token
126
+ )
127
+ except Exception as e:
128
+ return f"**Error: Could not submit the model. {str(e)}**"
129
+
130
+ return f"**Model '{model_name}' has been submitted for evaluation.**"
131
+
132
+ def load_requests(status_folder):
133
+ api = HfApi()
134
+ requests_data = []
135
+ folder_path_in_repo = status_folder # 'pending', 'finished', or 'failed'
136
+
137
+ hf_api_token = os.environ.get('HF_API_TOKEN', None)
138
+
139
+ try:
140
+ # List files in the dataset repository
141
+ files_info = api.list_repo_files(
142
+ repo_id=DATASET_REPO_ID,
143
+ repo_type="dataset",
144
+ token=hf_api_token
145
+ )
146
+ except Exception as e:
147
+ print(f"Error accessing dataset repository: {e}")
148
+ return pd.DataFrame() # Return empty DataFrame if repository not found or inaccessible
149
+
150
+ # Filter files in the desired folder
151
+ files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]
152
+
153
+ for file_path in files_in_folder:
154
+ try:
155
+ # Download the JSON file
156
+ local_file_path = hf_hub_download(
157
+ repo_id=DATASET_REPO_ID,
158
+ filename=file_path,
159
+ repo_type="dataset",
160
+ token=hf_api_token
161
+ )
162
+ # Load JSON data
163
+ with open(local_file_path, 'r') as f:
164
+ request = json.load(f)
165
+ requests_data.append(request)
166
+ except Exception as e:
167
+ print(f"Error loading file {file_path}: {e}")
168
+ continue # Skip files that can't be loaded
169
+
170
+ df = pd.DataFrame(requests_data)
171
+ return df
172
+
173
+
174
+ def submit_gradio_module():
175
+ with gr.Tab("Submit Model") as submitter_tab:
176
+
177
+ with gr.Row(equal_height=True):
178
+ model_name_input = gr.Textbox(
179
+ label="Model",
180
+ placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)",
181
+ scale=4,
182
+ )
183
+ fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary")
184
+
185
+ with gr.Row():
186
+ precision_input = gr.Dropdown(
187
+ choices=["F16", "F32", "BF16", "I8", "U8", "I16"],
188
+ label="Precision",
189
+ value="F16"
190
+ )
191
+ params_input = gr.Textbox(
192
+ label="Params (in Millions)",
193
+ placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
194
+ )
195
+
196
+ with gr.Row():
197
+ license_input = gr.Textbox(
198
+ label="License",
199
+ placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)",
200
+ value="Open"
201
+ )
202
+ revision_input = gr.Textbox(
203
+ label="Revision",
204
+ placeholder="main",
205
+ value="main"
206
+ )
207
+
208
+ submit_button = gr.Button("Submit Model", variant="primary")
209
+ submission_result = gr.Markdown()
210
+ fetch_outputs = [precision_input, params_input, license_input]
211
+
212
+ fetch_data_button.click(
213
+ fetch_model_information,
214
+ inputs=[model_name_input],
215
+ outputs=fetch_outputs
216
+ )
217
+ model_name_input.submit(
218
+ fetch_model_information,
219
+ inputs=[model_name_input],
220
+ outputs=fetch_outputs
221
+ )
222
+ submit_button.click(
223
+ submit_model,
224
+ inputs=[model_name_input, revision_input, precision_input, params_input, license_input],
225
+ outputs=submission_result
226
+ )
227
+
228
+ # Load pending, finished, and failed requests
229
+ df_pending = load_requests('pending')
230
+ df_finished = load_requests('finished')
231
+ df_failed = load_requests('failed')
232
+
233
+ # Display the tables
234
+ gr.Markdown("## Evaluation Status")
235
+ with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
236
+ if not df_pending.empty:
237
+ gr.Dataframe(df_pending)
238
+ else:
239
+ gr.Markdown("No pending evaluations.")
240
+ with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
241
+ if not df_finished.empty:
242
+ gr.Dataframe(df_finished)
243
+ else:
244
+ gr.Markdown("No finished evaluations.")
245
+ with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
246
+ if not df_failed.empty:
247
+ gr.Dataframe(df_failed)
248
+ else:
249
+ gr.Markdown("No failed evaluations.")
250
+
251
+ return submitter_tab