|
import gradio as gr |
|
import pandas as pd |
|
import plotly.graph_objects as go |
|
|
|
def get_covered_languages(): |
|
|
|
all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv') |
|
with open("data/covered_languages.txt") as f: |
|
covered_languages = f.read().splitlines() |
|
|
|
|
|
covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')] |
|
covered_languages = list(set(covered_languages)) |
|
|
|
|
|
covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values] |
|
assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes" |
|
return covered_language_codes |
|
|
|
def build_dataframes(covered_language_codes): |
|
|
|
clean_languages = pd.read_csv('data/merged_language_list_clean.csv') |
|
|
|
|
|
languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)] |
|
|
|
|
|
languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)] |
|
return languages_with_lead, languages_without_lead |
|
|
|
def create_progress_bar(progress): |
|
top_labels = ['With lead', 'Without lead'] |
|
|
|
colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)'] |
|
|
|
x_data = [len(languages_with_lead), len(languages_without_lead)] |
|
|
|
y_data = ['Progress'] |
|
|
|
fig = go.Figure() |
|
|
|
for i in range(0, len(x_data[0])): |
|
for xd, yd in zip(x_data, y_data): |
|
fig.add_trace(go.Bar( |
|
x=[xd[i]], y=[yd], |
|
orientation='h', |
|
marker=dict( |
|
color=colors[i], |
|
line=dict(color='rgb(248, 248, 249)', width=1) |
|
), |
|
hoverinfo='text', |
|
hovertext=f"{top_labels[i]} records: {xd[i]}" |
|
)) |
|
|
|
fig.update_layout( |
|
xaxis=dict( |
|
showgrid=False, |
|
showline=False, |
|
showticklabels=False, |
|
zeroline=False, |
|
domain=[0.15, 1] |
|
|
|
), |
|
yaxis=dict( |
|
showgrid=False, |
|
showline=False, |
|
showticklabels=False, |
|
zeroline=False, |
|
domain=[0.15, 0.5] |
|
), |
|
barmode='stack', |
|
paper_bgcolor='rgb(248, 248, 255)', |
|
plot_bgcolor='rgb(248, 248, 255)', |
|
margin=dict(l=120, r=10, t=140, b=80), |
|
showlegend=False |
|
) |
|
|
|
annotations = [] |
|
|
|
for yd, xd in zip(y_data, x_data): |
|
|
|
annotations.append(dict(xref='paper', yref='y', |
|
x=0.14, y=yd, |
|
xanchor='right', |
|
text=str(yd), |
|
font=dict(family='Arial', size=14, |
|
color='rgb(67, 67, 67)'), |
|
showarrow=False, align='right')) |
|
|
|
if xd[0] > 0: |
|
annotations.append(dict(xref='x', yref='y', |
|
x=xd[0] / 2, y=yd, |
|
text=str(xd[0]), |
|
font=dict(family='Arial', size=14, |
|
color='rgb(248, 248, 255)'), |
|
showarrow=False)) |
|
space = xd[0] |
|
for i in range(1, len(xd)): |
|
if xd[i] > 0: |
|
|
|
annotations.append(dict(xref='x', yref='y', |
|
x=space + (xd[i]/2), y=yd, |
|
text=str(xd[i]), |
|
font=dict(family='Arial', size=14, |
|
color='rgb(248, 248, 255)'), |
|
showarrow=False)) |
|
space += xd[i] |
|
|
|
fig.update_layout(annotations=annotations, height=80) |
|
return fig |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Language Leads Dashboard") |
|
languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages()) |
|
with gr.Row(): |
|
progress_bar_output = gr.Plot(label="Language Stats") |
|
with gr.Tab("Looking for leads!"): |
|
gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).") |
|
gr.DataFrame(languages_without_lead) |
|
with gr.Tab("Languages with leads"): |
|
gr.Markdown("We found at least one lead for these languages:") |
|
gr.DataFrame(languages_with_lead) |
|
|
|
demo.launch() |