import gradio as gr import pandas as pd import plotly.graph_objects as go def get_covered_languages(): #Load data all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv') with open("data/covered_languages.txt") as f: covered_languages = f.read().splitlines() # Split strings with commas and flatten the list covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')] covered_languages = list(set(covered_languages)) # Get language codes covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values] assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes" return covered_language_codes def build_dataframes(covered_language_codes): # Load data clean_languages = pd.read_csv('data/merged_language_list_clean.csv') # Create a dataframe for languages with a lead languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)] # Create a dataframe for languages without a lead languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)] return languages_with_lead, languages_without_lead # def create_progress_bar(languages_with_lead, languages_without_lead): # top_labels = ['With lead', 'Without lead'] # colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)'] # x_data = [len(languages_with_lead), len(languages_without_lead)] # y_data = ['Progress'] # fig = go.Figure() # for i in range(0, len(x_data[0])): # for xd, yd in zip(x_data, y_data): # fig.add_trace(go.Bar( # x=[xd[i]], y=[yd], # orientation='h', # marker=dict( # color=colors[i], # line=dict(color='rgb(248, 248, 249)', width=1) # ), # hoverinfo='text', # hovertext=f"{top_labels[i]} records: {xd[i]}" # )) # fig.update_layout( # xaxis=dict( # showgrid=False, # showline=False, # showticklabels=False, # zeroline=False, # domain=[0.15, 1] # ), # yaxis=dict( # showgrid=False, # showline=False, # showticklabels=False, # zeroline=False, # domain=[0.15, 0.5] # ), # barmode='stack', # paper_bgcolor='rgb(248, 248, 255)', # plot_bgcolor='rgb(248, 248, 255)', # margin=dict(l=120, r=10, t=140, b=80), # showlegend=False # ) # annotations = [] # for yd, xd in zip(y_data, x_data): # # labeling the y-axis # annotations.append(dict(xref='paper', yref='y', # x=0.14, y=yd, # xanchor='right', # text=str(yd), # font=dict(family='Arial', size=14, # color='rgb(67, 67, 67)'), # showarrow=False, align='right')) # # labeling the first percentage of each bar (x_axis) # if xd[0] > 0: # annotations.append(dict(xref='x', yref='y', # x=xd[0] / 2, y=yd, # text=str(xd[0]), # font=dict(family='Arial', size=14, # color='rgb(248, 248, 255)'), # showarrow=False)) # space = xd[0] # for i in range(1, len(xd)): # if xd[i] > 0: # # labeling the rest of percentages for each bar (x_axis) # annotations.append(dict(xref='x', yref='y', # x=space + (xd[i]/2), y=yd, # text=str(xd[i]), # font=dict(family='Arial', size=14, # color='rgb(248, 248, 255)'), # showarrow=False)) # space += xd[i] # fig.update_layout(annotations=annotations, height=80) # return fig def create_piechart(): colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)'] fig = go.Figure( go.Pie( labels=["With lead", "Without lead"], values=[len(languages_with_lead), len(languages_without_lead)], marker=dict(colors=colors) ) ) fig.update_layout( title_text="Language Leads", height=500, margin=dict(l=10, r=10, t=50, b=10) ) fig.update_traces(textposition='inside', textinfo='label+value') return fig with gr.Blocks() as demo: gr.Markdown("## Language Leads Dashboard") languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages()) with gr.Row(): piechart = create_piechart() gr.Plot(value=piechart, label="Language Leads") with gr.Tab("Looking for leads!"): gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).") gr.DataFrame(languages_without_lead) with gr.Tab("Languages with leads"): gr.Markdown("We found at least one lead for these languages:") gr.DataFrame(languages_with_lead) demo.launch()