File size: 4,904 Bytes
72246a1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
import gradio as gr
import pandas as pd
import plotly.graph_objects as go
def get_covered_languages():
#Load data
all_languages = pd.read_csv('data/merged_language_list_with_duplicates.csv')
with open("data/covered_languages.txt") as f:
covered_languages = f.read().splitlines()
# Split strings with commas and flatten the list
covered_languages = [lang.strip() for sublist in covered_languages for lang in sublist.split(',')]
covered_languages = list(set(covered_languages))
# Get language codes
covered_language_codes = [all_languages.loc[all_languages['Language'] == lang, 'Code'].values[0] for lang in covered_languages if lang in all_languages['Language'].values]
assert len(covered_language_codes) == len(covered_languages), "Mismatch between covered languages and their codes"
return covered_language_codes
def build_dataframes(covered_language_codes):
# Load data
clean_languages = pd.read_csv('data/merged_language_list_clean.csv')
# Create a dataframe for languages with a lead
languages_with_lead = clean_languages[clean_languages['Code'].isin(covered_language_codes)]
# Create a dataframe for languages without a lead
languages_without_lead = clean_languages[~clean_languages['Code'].isin(covered_language_codes)]
return languages_with_lead, languages_without_lead
def create_progress_bar(progress):
top_labels = ['With lead', 'Without lead']
colors = ['rgba(38, 24, 74, 0.8)', 'rgba(190, 192, 213, 1)']
x_data = [len(languages_with_lead), len(languages_without_lead)]
y_data = ['Progress']
fig = go.Figure()
for i in range(0, len(x_data[0])):
for xd, yd in zip(x_data, y_data):
fig.add_trace(go.Bar(
x=[xd[i]], y=[yd],
orientation='h',
marker=dict(
color=colors[i],
line=dict(color='rgb(248, 248, 249)', width=1)
),
hoverinfo='text',
hovertext=f"{top_labels[i]} records: {xd[i]}"
))
fig.update_layout(
xaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
domain=[0.15, 1]
),
yaxis=dict(
showgrid=False,
showline=False,
showticklabels=False,
zeroline=False,
domain=[0.15, 0.5]
),
barmode='stack',
paper_bgcolor='rgb(248, 248, 255)',
plot_bgcolor='rgb(248, 248, 255)',
margin=dict(l=120, r=10, t=140, b=80),
showlegend=False
)
annotations = []
for yd, xd in zip(y_data, x_data):
# labeling the y-axis
annotations.append(dict(xref='paper', yref='y',
x=0.14, y=yd,
xanchor='right',
text=str(yd),
font=dict(family='Arial', size=14,
color='rgb(67, 67, 67)'),
showarrow=False, align='right'))
# labeling the first percentage of each bar (x_axis)
if xd[0] > 0:
annotations.append(dict(xref='x', yref='y',
x=xd[0] / 2, y=yd,
text=str(xd[0]),
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
space = xd[0]
for i in range(1, len(xd)):
if xd[i] > 0:
# labeling the rest of percentages for each bar (x_axis)
annotations.append(dict(xref='x', yref='y',
x=space + (xd[i]/2), y=yd,
text=str(xd[i]),
font=dict(family='Arial', size=14,
color='rgb(248, 248, 255)'),
showarrow=False))
space += xd[i]
fig.update_layout(annotations=annotations, height=80)
return fig
with gr.Blocks() as demo:
gr.Markdown("## Language Leads Dashboard")
languages_with_lead, languages_without_lead = build_dataframes(get_covered_languages())
with gr.Row():
progress_bar_output = gr.Plot(label="Language Stats")
with gr.Tab("Looking for leads!"):
gr.Markdown("These languages don't have a lead yet! Would you like to lead one of them? Sign up using [this form](https://forms.gle/mFCMXNRjxvyFvW5q9).")
gr.DataFrame(languages_without_lead)
with gr.Tab("Languages with leads"):
gr.Markdown("We found at least one lead for these languages:")
gr.DataFrame(languages_with_lead)
demo.launch() |