File size: 2,289 Bytes
450856d
d5df872
450856d
b41a0ac
 
7ac4c38
6609394
450856d
7ac4c38
 
 
450856d
 
7ac4c38
 
 
 
 
 
 
 
 
3c986cb
450856d
 
 
d5df872
6609394
 
 
 
 
 
d5df872
b41a0ac
 
 
 
 
 
 
 
 
a3bc6dc
b41a0ac
 
 
a3bc6dc
 
 
 
 
 
 
b41a0ac
 
 
90ed9b3
 
ae90632
90ed9b3
 
7ac4c38
 
 
90ed9b3
 
7ac4c38
90ed9b3
7ac4c38
90ed9b3
 
7ac4c38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
import streamlit as st
import os
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import argilla as rg
from datetime import datetime


ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL")
ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")

client =  rg.Argilla(
    api_url=ARGILLA_API_URL,
    api_key=ARGILLA_API_KEY
)

workspace = client.workspaces('cohere')

users_map = {str(user.id):user.username for user in list(workspace.users)}

ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)

df = ds.to_pandas()

st.title("MMLU Translations Progress")
# Get the current time and round down to the top of the hour
now = datetime.now()
top_of_the_hour = now.replace(minute=0, second=0, microsecond=0)

# Display the timestamp in markdown format
st.markdown(f"**Last updated:** {top_of_the_hour.strftime('%Y-%m-%d %H:%M')}")

# Extract the language from the metadata column and create a new column
df['language'] = df['metadata'].apply(lambda x: x.get('language'))

# Count the occurrences of each language
language_counts = df['language'].value_counts()

# Plotting the bar chart using matplotlib
fig, ax = plt.subplots()
language_counts.plot(kind='bar', ax=ax)
ax.set_title('Number of Completed Tasks for Each Language')
ax.set_xlabel('Language')
ax.set_ylabel('Count')

# Convert the language counts to a DataFrame for display in the table
language_counts_df = language_counts.reset_index()
language_counts_df.columns = ['Language', 'Count']

# Display the table in the Streamlit app
st.table(language_counts_df)

# Display the plot in the Streamlit app
st.pyplot(fig)


# Extract user_id from the is_edit_required field in the response column and count occurrences
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
user_id_counts = user_ids.value_counts()

# Map user IDs to usernames
user_id_counts.index = user_id_counts.index.map(users_map)

# Convert the user ID counts to a DataFrame for display in the table
user_id_counts_df = user_id_counts.reset_index()
user_id_counts_df.columns = ['Username', 'Count']

# Display the table of username counts in the Streamlit app
st.table(user_id_counts_df)

st.dataframe(df)