Spaces:
Sleeping
Sleeping
File size: 2,289 Bytes
450856d d5df872 450856d b41a0ac 7ac4c38 6609394 450856d 7ac4c38 450856d 7ac4c38 3c986cb 450856d d5df872 6609394 d5df872 b41a0ac a3bc6dc b41a0ac a3bc6dc b41a0ac 90ed9b3 ae90632 90ed9b3 7ac4c38 90ed9b3 7ac4c38 90ed9b3 7ac4c38 90ed9b3 7ac4c38 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import os
from datasets import load_dataset
import pandas as pd
import matplotlib.pyplot as plt
import argilla as rg
from datetime import datetime
ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL")
ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY")
HF_TOKEN = os.environ.get("HF_TOKEN")
client = rg.Argilla(
api_url=ARGILLA_API_URL,
api_key=ARGILLA_API_KEY
)
workspace = client.workspaces('cohere')
users_map = {str(user.id):user.username for user in list(workspace.users)}
ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN)
df = ds.to_pandas()
st.title("MMLU Translations Progress")
# Get the current time and round down to the top of the hour
now = datetime.now()
top_of_the_hour = now.replace(minute=0, second=0, microsecond=0)
# Display the timestamp in markdown format
st.markdown(f"**Last updated:** {top_of_the_hour.strftime('%Y-%m-%d %H:%M')}")
# Extract the language from the metadata column and create a new column
df['language'] = df['metadata'].apply(lambda x: x.get('language'))
# Count the occurrences of each language
language_counts = df['language'].value_counts()
# Plotting the bar chart using matplotlib
fig, ax = plt.subplots()
language_counts.plot(kind='bar', ax=ax)
ax.set_title('Number of Completed Tasks for Each Language')
ax.set_xlabel('Language')
ax.set_ylabel('Count')
# Convert the language counts to a DataFrame for display in the table
language_counts_df = language_counts.reset_index()
language_counts_df.columns = ['Language', 'Count']
# Display the table in the Streamlit app
st.table(language_counts_df)
# Display the plot in the Streamlit app
st.pyplot(fig)
# Extract user_id from the is_edit_required field in the response column and count occurrences
user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id'])
user_id_counts = user_ids.value_counts()
# Map user IDs to usernames
user_id_counts.index = user_id_counts.index.map(users_map)
# Convert the user ID counts to a DataFrame for display in the table
user_id_counts_df = user_id_counts.reset_index()
user_id_counts_df.columns = ['Username', 'Count']
# Display the table of username counts in the Streamlit app
st.table(user_id_counts_df)
st.dataframe(df) |