import json import os import shutil from datetime import datetime from pathlib import Path import jsonlines import streamlit as st from dotenv import load_dotenv from huggingface_hub import HfApi, Repository from utils import http_post, validate_json if Path(".env").is_file(): load_dotenv(".env") HF_TOKEN = os.getenv("HF_TOKEN") AUTONLP_USERNAME = os.getenv("AUTONLP_USERNAME") HF_AUTONLP_BACKEND_API = os.getenv("HF_AUTONLP_BACKEND_API") LOCAL_REPO = "submission_repo" LOGS_REPO = "submission-logs" ## TODO ## # 1. Add check that fields are nested under `tasks` field correctly # 2. Add check that names of tasks and datasets are valid # 3. Decide whether we should have 1 dataset repo per participant or 1 repo per submission ########### ### APP ### ########### st.title("GEM Submissions") st.markdown( """ Welcome to the [GEM benchmark](https://gem-benchmark.com/)! GEM is a benchmark environment for Natural Language Generation with a focus on its Evaluation, both through human annotations and automated Metrics. GEM aims to: - measure NLG progress across many NLG tasks across languages. - audit data and models and present results via data cards and model robustness reports. - develop standards for evaluation of generated text using both automated and human metrics. Use this page to submit your system's predictions to the benchmark. """ ) with st.form(key="form"): # Flush local repo shutil.rmtree(LOCAL_REPO, ignore_errors=True) submission_errors = 0 uploaded_file = st.file_uploader("Upload submission.json file", type=["json"]) if uploaded_file: if uploaded_file.name != "submission.json": st.error(f"⛔ Invalid filename. Please upload a submission.json file.") submission_errors += 1 else: data = str(uploaded_file.read(), "utf-8") json_data = json.loads(data) is_valid, message = validate_json(json_data) if is_valid: st.success(message) else: st.error(message) submission_errors += 1 with st.expander("Submission format"): st.markdown( """ Please follow this JSON format for your `submission.json` file: ```json { "submission_name": "An identifying name of your system", "param_count": 123, # The number of parameters your system has. "description": "An optional brief description of the system that will be shown on the results page", "tasks": { "dataset_identifier": { "values": ["output-0", "output-1", "..."], # A list of system outputs. "keys": ["gem_id-0", "gem_id-1", ...] # A list of GEM IDs. } } } ``` Here, `dataset_identifier` is the identifier of the dataset followed by an identifier of the set the outputs were created from, for example `_validation` or `_test`. For example, the `mlsum_de` test set has the identifier `mlsum_de_test`. The `keys` field is needed to avoid accidental shuffling that will impact your metrics. Simply add a list of IDs from the `gem_id` column of each evaluation dataset in the same order as your values. Please see the sample submission below: """ ) with open("sample-submission.json", "r") as f: example_submission = json.load(f) st.json(example_submission) token = st.text_input( "Enter 🤗 Hub access token", type="password", help="You can generate an access token via your 🤗 Hub settings. See the [docs](https://huggingface.co/docs/hub/security#user-access-tokens) for more details", ) if token: try: user_info = HfApi().whoami(token) except Exception as e: st.error("⛔ Invalid access token") submission_errors += 1 submit_button = st.form_submit_button("Make Submission") if submit_button and submission_errors == 0: st.write("⏳ Preparing submission for evaluation ...") user_name = user_info["name"] submission_name = json_data["submission_name"] submission_name_formatted = submission_name.lower().replace(" ", "-").replace("/", "-") print(submission_name_formatted) submission_time = str(int(datetime.now().timestamp())) # Create submission dataset under benchmarks ORG dataset_repo_url = ( f"https://huggingface.co/datasets/GEM-submissions/{user_name}__{submission_name_formatted}__{submission_time}" ) repo = Repository( local_dir=LOCAL_REPO, clone_from=dataset_repo_url, repo_type="dataset", private=True, use_auth_token=HF_TOKEN, ) submission_metadata = {"benchmark": "gem", "type": "prediction", "submission_name": submission_name} repo.repocard_metadata_save(submission_metadata) with open(f"{LOCAL_REPO}/submission.json", "w", encoding="utf-8") as f: json.dump(json_data, f) # TODO: add informative commit msg commit_url = repo.push_to_hub() if commit_url is not None: commit_sha = commit_url.split("/")[-1] else: commit_sha = repo.git_head_commit_url().split("/")[-1] submission_id = submission_name + "__" + commit_sha + "__" + submission_time payload = { "username": AUTONLP_USERNAME, "dataset": "GEM/references", "task": 1, "model": "gem", "submission_dataset": f"GEM-submissions/{user_name}", "submission_id": submission_id, "col_mapping": {}, "split": "test", "config": None, } json_resp = http_post( path="/evaluate/create", payload=payload, token=HF_TOKEN, domain=HF_AUTONLP_BACKEND_API ).json() logs_repo_url = f"https://huggingface.co/datasets/GEM-submissions/{LOGS_REPO}" logs_repo = Repository( local_dir=LOGS_REPO, clone_from=logs_repo_url, repo_type="dataset", private=True, use_auth_token=HF_TOKEN, ) json_resp["submission_name"] = submission_name with jsonlines.open(f"{LOGS_REPO}/logs.jsonl") as r: lines = [] for obj in r: lines.append(obj) lines.append(json_resp) with jsonlines.open(f"{LOGS_REPO}/logs.jsonl", mode="w") as writer: for job in lines: writer.write(job) logs_repo.push_to_hub(commit_message=f"Submission with job ID {json_resp['id']}") if json_resp["status"] == 1: st.success( f"✅ Submission {submission_name} was successfully submitted for evaluation with job ID {json_resp['id']}" ) else: st.error("🙈 Oh noes, there was an error submitting your submission! Please contact the organisers") # Flush local repo shutil.rmtree(LOCAL_REPO, ignore_errors=True) shutil.rmtree(LOGS_REPO, ignore_errors=True)