evijit HF staff commited on
Commit
18bb408
·
verified ·
1 Parent(s): e8bb780

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +184 -39
app.py CHANGED
@@ -1,6 +1,5 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import plotly.express as px
4
  from dataclasses import dataclass, field
5
  from typing import List, Dict, Tuple, Union
6
  import json
@@ -8,6 +7,10 @@ import os
8
  from collections import OrderedDict
9
  import re
10
 
 
 
 
 
11
 
12
  def load_css(css_file_path):
13
  """Load CSS from a file."""
@@ -486,66 +489,208 @@ with gr.Column(visible=True) as leaderboard_tab:
486
  datatype=["markdown", "markdown", "markdown"] + ["markdown"] * (len(category_choices)+1) # Support markdown in all columns
487
  )
488
 
 
 
 
 
 
 
 
 
489
  def create_category_chart(selected_systems, selected_categories):
490
  if not selected_systems:
491
- # Create an empty figure with a prompt message
492
- df = pd.DataFrame({'AI System': [], 'Category': [], 'Evaluations Completed': []})
493
- fig = px.bar(df,
494
- x='AI System',
495
- y='Evaluations Completed',
496
- title='Please select at least one AI system for comparison')
497
- fig.update_layout(showlegend=True)
498
  return fig
499
 
500
- # Sort categories before processing
501
  selected_categories = sort_categories(selected_categories)
 
502
 
503
- data = []
 
504
  for system_name in selected_systems:
 
505
  for category in selected_categories:
506
  if category in models[system_name]['scores']:
507
  completed = 0
508
  total = 0
 
509
 
 
510
  for section in models[system_name]['scores'][category].values():
511
  if section['status'] != 'N/A':
 
512
  questions = section.get('questions', {})
513
  completed += sum(1 for q in questions.values() if q)
514
  total += len(questions)
515
 
516
- if total > 0: # Only add if there are evaluations to do
517
- data.append({
518
- 'AI System': system_name,
519
- 'Category': category.split('.')[1].strip(),
520
- 'Evaluations Completed': completed,
521
- 'Total Evaluations': total
522
- })
523
-
524
- df = pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
 
526
  if df.empty:
527
- fig = px.bar(title='No data available for the selected AI systems and categories')
528
- else:
529
- fig = px.bar(
530
- df,
531
- x='AI System',
532
- y='Evaluations Completed',
533
- color='Category',
534
- title='Number of Evaluations Completed by Category',
535
- labels={
536
- 'Evaluations Completed': 'Evaluations Completed',
537
- 'AI System': 'AI System Name',
538
- 'Category': 'Evaluation Category'
539
- },
540
- hover_data=['Total Evaluations']
541
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
542
 
543
- fig.update_layout(
544
- showlegend=True,
545
- xaxis_title="AI System Name",
546
- yaxis_title="Number of Evaluations Completed",
547
- # hovermode='x unified'
548
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
 
550
  return fig
551
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from dataclasses import dataclass, field
4
  from typing import List, Dict, Tuple, Union
5
  import json
 
7
  from collections import OrderedDict
8
  import re
9
 
10
+ import plotly.graph_objects as go
11
+ import plotly.express as px
12
+ # from plotly.subplots import make_subplots
13
+ # import math
14
 
15
  def load_css(css_file_path):
16
  """Load CSS from a file."""
 
489
  datatype=["markdown", "markdown", "markdown"] + ["markdown"] * (len(category_choices)+1) # Support markdown in all columns
490
  )
491
 
492
+ def hex_to_rgba(hex_color, alpha):
493
+ """Convert hex color to rgba string with given alpha value."""
494
+ hex_color = hex_color.lstrip('#')
495
+ r = int(hex_color[:2], 16)
496
+ g = int(hex_color[2:4], 16)
497
+ b = int(hex_color[4:], 16)
498
+ return f'rgba({r},{g},{b},{alpha})'
499
+
500
  def create_category_chart(selected_systems, selected_categories):
501
  if not selected_systems:
502
+ fig = go.Figure()
503
+ fig.add_annotation(
504
+ text="Please select at least one AI system for comparison",
505
+ xref="paper", yref="paper",
506
+ x=0.5, y=0.5,
507
+ showarrow=False
508
+ )
509
  return fig
510
 
 
511
  selected_categories = sort_categories(selected_categories)
512
+ BASE_SCORE = 5
513
 
514
+ # Prepare all data first
515
+ all_data = []
516
  for system_name in selected_systems:
517
+ system_data = []
518
  for category in selected_categories:
519
  if category in models[system_name]['scores']:
520
  completed = 0
521
  total = 0
522
+ category_name = category.split('.')[1].strip()
523
 
524
+ all_na = True
525
  for section in models[system_name]['scores'][category].values():
526
  if section['status'] != 'N/A':
527
+ all_na = False
528
  questions = section.get('questions', {})
529
  completed += sum(1 for q in questions.values() if q)
530
  total += len(questions)
531
 
532
+ if all_na:
533
+ score = BASE_SCORE
534
+ display_score = 0
535
+ status = 'N/A'
536
+ elif total > 0:
537
+ raw_score = (completed / total) * 100
538
+ score = BASE_SCORE + (90 * raw_score / 100)
539
+ display_score = raw_score
540
+ status = 'Active'
541
+ else:
542
+ score = BASE_SCORE
543
+ display_score = 0
544
+ status = 'Active'
545
+
546
+ system_data.append({
547
+ 'AI System': system_name,
548
+ 'Category': category_name,
549
+ 'Score': score,
550
+ 'Display Score': display_score,
551
+ 'Status': status,
552
+ 'Original Score': f"{display_score:.1f}%",
553
+ 'Completed': completed,
554
+ 'Total': total
555
+ })
556
+ if system_data:
557
+ # Add first point again to close the shape
558
+ system_data.append(system_data[0].copy())
559
+ all_data.extend(system_data)
560
+
561
+ df = pd.DataFrame(all_data)
562
 
563
  if df.empty:
564
+ fig = go.Figure()
565
+ fig.add_annotation(
566
+ text="No data available for the selected AI systems and categories",
567
+ xref="paper", yref="paper",
568
+ x=0.5, y=0.5,
569
+ showarrow=False
 
 
 
 
 
 
 
 
570
  )
571
+ return fig
572
+
573
+ fig = go.Figure()
574
+
575
+ # Define colors
576
+ colors = [
577
+ '#FF4B4B', '#4B7BFF', '#4BFF4B', '#FFD700', '#FF4BFF',
578
+ '#4BFFFF', '#FF884B', '#884BFF', '#4BFF88', '#FFFF4B'
579
+ ]
580
+
581
+ # Calculate average scores for sorting
582
+ system_scores = {
583
+ system: df[df['AI System'] == system]['Score'].mean()
584
+ for system in selected_systems
585
+ }
586
+ sorted_systems = sorted(selected_systems,
587
+ key=lambda x: system_scores[x],
588
+ reverse=True)
589
+
590
+ # Plot each system
591
+ for idx, system_name in enumerate(sorted_systems):
592
+ system_df = df[df['AI System'] == system_name]
593
 
594
+ # Get color for this system
595
+ base_color = colors[idx % len(colors)]
596
+ line_color = hex_to_rgba(base_color, 0.9)
597
+ fill_color = hex_to_rgba(base_color, 0.15)
598
+ hover_color = hex_to_rgba(base_color, 1.0)
599
+
600
+ # First, add the complete shape with all points (including N/A)
601
+ fig.add_trace(go.Scatterpolar(
602
+ r=system_df['Score'].tolist(),
603
+ theta=system_df['Category'].tolist(),
604
+ name=system_name,
605
+ fill='toself',
606
+ line=dict(color=line_color),
607
+ fillcolor=fill_color,
608
+ hoverinfo='skip', # Disable hover for the shape trace
609
+ showlegend=True
610
+ ))
611
+
612
+ # Then add separate trace for hover information on non-N/A points
613
+ non_na_df = system_df[system_df['Status'] != 'N/A']
614
+ if not non_na_df.empty:
615
+ fig.add_trace(go.Scatterpolar(
616
+ r=non_na_df['Score'].tolist(),
617
+ theta=non_na_df['Category'].tolist(),
618
+ mode='markers',
619
+ marker=dict(size=1, color='rgba(0,0,0,0)'), # Nearly invisible markers
620
+ customdata=list(zip(
621
+ non_na_df['Original Score'],
622
+ non_na_df['Status'],
623
+ non_na_df['Completed'],
624
+ non_na_df['Total']
625
+ )),
626
+ hovertemplate=(
627
+ f"<span style='background-color: {hover_color}; color: white; padding: 10px; display: block'>" +
628
+ "<b>%{theta}</b><br>" +
629
+ f"AI System: {system_name}<br>" +
630
+ "Score: %{customdata[0]}<br>" +
631
+ "Status: %{customdata[1]}<br>" +
632
+ "Evaluations completed: %{customdata[2]}/%{customdata[3]}" +
633
+ "</span>" +
634
+ "<extra></extra>"),
635
+ showlegend=False
636
+ ))
637
+
638
+ # Finally add N/A markers
639
+ na_df = system_df[system_df['Status'] == 'N/A']
640
+ if not na_df.empty:
641
+ fig.add_trace(go.Scatterpolar(
642
+ r=na_df['Score'].tolist(),
643
+ theta=na_df['Category'].tolist(),
644
+ mode='markers+lines',
645
+ line=dict(color='rgba(128, 128, 128, 0.3)', dash='dot'),
646
+ marker=dict(color='rgba(128, 128, 128, 0.3)', size=8),
647
+ customdata=list(zip(
648
+ na_df['Original Score'],
649
+ na_df['Status'],
650
+ na_df['Completed'],
651
+ na_df['Total']
652
+ )),
653
+ hovertemplate="<b>%{theta}</b><br>" +
654
+ f"AI System: {system_name}<br>" +
655
+ "Status: N/A<br>" +
656
+ "Evaluations completed: %{customdata[2]}/%{customdata[3]}<br>" +
657
+ "<extra></extra>",
658
+ showlegend=False
659
+ ))
660
+
661
+ # Update layout
662
+ fig.update_layout(
663
+ polar=dict(
664
+ radialaxis=dict(
665
+ visible=True,
666
+ range=[0, 100],
667
+ ticksuffix='%',
668
+ showline=True,
669
+ linewidth=1,
670
+ gridwidth=1,
671
+ gridcolor='rgba(0,0,0,0.1)',
672
+ ticktext=[f'{i}%' for i in range(0, 101, 20)],
673
+ tickvals=list(range(0, 101, 20))
674
+ ),
675
+ angularaxis=dict(
676
+ gridcolor='rgba(0,0,0,0.1)',
677
+ linecolor='rgba(0,0,0,0.1)',
678
+ )
679
+ ),
680
+ showlegend=True,
681
+ title=dict(
682
+ text='Category Completion Rates by AI System',
683
+ x=0.5,
684
+ xanchor='center'
685
+ ),
686
+ legend=dict(
687
+ yanchor="top",
688
+ y=1.2,
689
+ xanchor="left",
690
+ x=1.1
691
+ ),
692
+ margin=dict(t=100, b=100, l=100, r=100)
693
+ )
694
 
695
  return fig
696