import streamlit as st import os from datasets import load_dataset import pandas as pd import matplotlib.pyplot as plt import argilla as rg from datetime import datetime ARGILLA_API_URL = os.environ.get("ARGILLA_API_URL") ARGILLA_API_KEY = os.environ.get("ARGILLA_API_KEY") HF_TOKEN = os.environ.get("HF_TOKEN") client = rg.Argilla( api_url=ARGILLA_API_URL, api_key=ARGILLA_API_KEY ) workspace = client.workspaces('cohere') users_map = {str(user.id):user.username for user in list(workspace.users)} ds = load_dataset("CohereForAI/mmlu-translations-results", split="train", token=HF_TOKEN) df = ds.to_pandas() st.title("MMLU Translations Progress") # Get the current time and round down to the top of the hour now = datetime.now() top_of_the_hour = now.replace(minute=0, second=0, microsecond=0) # Display the timestamp in markdown format st.markdown(f"**Last updated:** {top_of_the_hour.strftime('%Y-%m-%d %H:%M')}") # Extract the language from the metadata column and create a new column df['language'] = df['metadata'].apply(lambda x: x.get('language')) # Count the occurrences of each language language_counts = df['language'].value_counts() # Plotting the bar chart using matplotlib fig, ax = plt.subplots() language_counts.plot(kind='bar', ax=ax) ax.set_title('Number of Completed Tasks for Each Language') ax.set_xlabel('Language') ax.set_ylabel('Count') # Convert the language counts to a DataFrame for display in the table language_counts_df = language_counts.reset_index() language_counts_df.columns = ['Language', 'Count'] # Display the table in the Streamlit app st.table(language_counts_df) # Display the plot in the Streamlit app st.pyplot(fig) # Extract user_id from the is_edit_required field in the response column and count occurrences user_ids = df['responses'].apply(lambda x: x['is_edit_required']).explode().apply(lambda x: x['user_id']) user_id_counts = user_ids.value_counts() # Map user IDs to usernames user_id_counts.index = user_id_counts.index.map(users_map) # Convert the user ID counts to a DataFrame for display in the table user_id_counts_df = user_id_counts.reset_index() user_id_counts_df.columns = ['Username', 'Count'] # Display the table of username counts in the Streamlit app st.table(user_id_counts_df) st.dataframe(df)