Spaces:

Navid-AI
/

The-Arabic-Rag-Leaderboard

Running on CPU Upgrade

File size: 10,827 Bytes

import gradio as gr
import pandas as pd
import json
import os
from pathlib import Path
from huggingface_hub import HfApi, hf_hub_download

api = HfApi()

OWNER = "Navid-AI"
DATASET_REPO_ID = f"{OWNER}/requests-dataset"

def load_retrieval_results():
    base_path = Path(__file__).parent
    results_dir = base_path / "results"

    retrieval_dataframe_path = results_dir / "retrieval_results.json"
    if not retrieval_dataframe_path.exists():
        df = pd.DataFrame(columns=["Model", "Revision", "Precision", "Task", "Model Size (in Millions)", "Embedding Dimension", "Max Tokens", "Num Likes", "Web Search Dataset (Overall Score)"])
    else:
        df = pd.read_json(retrieval_dataframe_path)
    return df

def get_model_info(model_id, verbose=False):
    model_info = api.model_info(model_id)
    num_downloads = model_info.downloads
    num_likes = model_info.likes
    license = model_info.card_data["license"]
    num_parameters = round(model_info.safetensors.total / 1e6)
    supported_precisions = list(model_info.safetensors.parameters.keys())
    if verbose:
        print(f"Model '{model_id}' has {num_downloads} downloads, {num_likes} likes, and is licensed under {license}.")
        print(f"The model has approximately {num_parameters:.2f} billion parameters.")
        print(f"The model supports the following precisions: {supported_precisions}")
    return num_downloads, num_likes, license, num_parameters, supported_precisions

def fetch_model_information(model_name):
    try:
        _, _, license, num_parameters, supported_precisions = get_model_info(model_name)
        if len(supported_precisions) == 0:
            supported_precisions = [None]
    except Exception as e:
        gr.Error(f"Error: Could not fetch model information. {str(e)}")
        return
    return gr.update(choices=supported_precisions, value=supported_precisions[0]), num_parameters, license

def submit_model(model_name, revision, precision, params, license, task):
    # Load existing evaluations
    if task == "Retriever":
        df = load_retrieval_results()
    elif task == "Reranker":
        df = load_retrieval_results()
    else:
        return gr.Error(f"Error: Task '{task}' is not supported.")
    
    existing_models_results = df[['Model', 'Revision', 'Precision', 'Task']]

    # Handle 'Missing' precision
    if precision == 'Missing':
        precision = None
    else:
        precision = precision.strip().lower()

    # Load pending and finished requests from the dataset repository
    df_pending = load_requests('pending')
    df_finished = load_requests('finished')

    # Check if model is already evaluated
    model_exists_in_results = ((existing_models_results['Model'] == model_name) &
                               (existing_models_results['Revision'] == revision) &
                               (existing_models_results['Precision'] == precision) &
                               (existing_models_results['Task'] == task)).any()
    if model_exists_in_results:
        return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has already been evaluated.")

    # Check if model is in pending requests
    if not df_pending.empty:
        existing_models_pending = df_pending[['model_name', 'revision', 'precision']]
        model_exists_in_pending = ((existing_models_pending['model_name'] == model_name) &
                                   (existing_models_pending['revision'] == revision) &
                                   (existing_models_pending['precision'] == precision) &
                                      (existing_models_pending['task'] == task)).any()
        if model_exists_in_pending:
            return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' is already in the evaluation queue.")

    # Check if model is in finished requests
    if not df_finished.empty:
        existing_models_finished = df_finished[['model_name', 'revision', 'precision']]
        model_exists_in_finished = ((existing_models_finished['model_name'] == model_name) &
                                    (existing_models_finished['revision'] == revision) &
                                    (existing_models_finished['precision'] == precision) &
                                    (existing_models_finished['task'] == task)).any()
        if model_exists_in_finished:
            return gr.Info(f"Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has already been evaluated.")

    # Check if model exists on HuggingFace Hub
    try:
        api.model_info(model_name)
    except Exception as e:
        print(f"Error fetching model info: {e}")
        return gr.Error(f"Error: Model '{model_name}' not found on HuggingFace Hub.")

    # Proceed with submission
    status = "PENDING"

    # Prepare the submission data
    submission = {
        "model_name": model_name,
        "license": license,
        "revision": revision,
        "precision": precision,
        "status": status,
        "params": params,
        "task": task
    }

    # Serialize the submission to JSON
    submission_json = json.dumps(submission, indent=2)

    # Define the file path in the repository
    org_model = model_name.split('/')
    if len(org_model) != 2:
        return "**Please enter the full model name including the organization or username, e.g., 'intfloat/multilingual-e5-large-instruct'**"
    org, model_id = org_model
    precision_str = precision if precision else 'Missing'
    file_path_in_repo = f"pending/{org}/{model_id}_eval_request_{revision}_{precision_str}_{task.lower()}.json"

    # Upload the submission to the dataset repository
    try:
        hf_api_token = os.environ.get('HF_TOKEN', None)
        api.upload_file(
            path_or_fileobj=submission_json.encode('utf-8'),
            path_in_repo=file_path_in_repo,
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        print(f"Error uploading file: {e}")
        return gr.Error(f"Error: Could not submit model '{model_name}' for evaluation.")

    return f"**Model '{model_name}' with revision '{revision}' and precision '{precision}' for task '{task}' has been submitted successfully.**"

def load_requests(status_folder):
    api = HfApi()
    requests_data = []
    folder_path_in_repo = status_folder  # 'pending', 'finished', or 'failed'

    hf_api_token = os.environ.get('HF_TOKEN', None)

    try:
        # List files in the dataset repository
        files_info = api.list_repo_files(
            repo_id=DATASET_REPO_ID,
            repo_type="dataset",
            token=hf_api_token
        )
    except Exception as e:
        print(f"Error accessing dataset repository: {e}")
        return pd.DataFrame()  # Return empty DataFrame if repository not found or inaccessible

    # Filter files in the desired folder
    files_in_folder = [f for f in files_info if f.startswith(f"{folder_path_in_repo}/") and f.endswith('.json')]

    for file_path in files_in_folder:
        try:
            # Download the JSON file
            local_file_path = hf_hub_download(
                repo_id=DATASET_REPO_ID,
                filename=file_path,
                repo_type="dataset",
                token=hf_api_token
            )
            # Load JSON data
            with open(local_file_path, 'r') as f:
                request = json.load(f)
            requests_data.append(request)
        except Exception as e:
            print(f"Error loading file {file_path}: {e}")
            continue  # Skip files that can't be loaded

    df = pd.DataFrame(requests_data)
    return df


def submit_gradio_module(task_type):
    var = gr.State(value=task_type)
    with gr.Tab(f"Submit {task_type}") as submitter_tab: 
        with gr.Row(equal_height=True):
            model_name_input = gr.Textbox(
                label="Model", 
                placeholder="Enter the full model name from HuggingFace Hub (e.g., intfloat/multilingual-e5-large-instruct)",
                scale=4,
            )
            fetch_data_button = gr.Button(value="Auto Fetch Model Info", variant="secondary")
        
        with gr.Row():
            precision_input = gr.Dropdown(
                choices=["F16", "F32", "BF16", "I8", "U8", "I16"], 
                label="Precision",
                value="F16"
            )
            license_input = gr.Textbox(
                label="License", 
                placeholder="Enter the license type (Generic one is 'Open' in case no License is provided)", 
                value="Open"
            )
            revision_input = gr.Textbox(
                label="Revision", 
                placeholder="main", 
                value="main"
            )
        
        with gr.Row():
            params_input = gr.Textbox(
                label="Params (in Millions)",
                placeholder="Enter the approximate number of parameters as Integer (e.g., 7, 13, 30, 70 ...)"
            )
        
        submit_button = gr.Button("Submit Model", variant="primary")
        submission_result = gr.Markdown()
        fetch_outputs = [precision_input, params_input, license_input]

        fetch_data_button.click(
            fetch_model_information,
            inputs=[model_name_input],
            outputs=fetch_outputs
        )
        model_name_input.submit(
            fetch_model_information,
            inputs=[model_name_input],
            outputs=fetch_outputs
        )
        submit_button.click(
            submit_model,
            inputs=[model_name_input, revision_input, precision_input, params_input, license_input, var],
            outputs=submission_result
        )
        
        # Load pending, finished, and failed requests
        df_pending = load_requests('pending')
        df_finished = load_requests('finished')
        df_failed = load_requests('failed')

        # Display the tables
        gr.Markdown("## Evaluation Status")
        with gr.Accordion(f"Pending Evaluations ({len(df_pending)})", open=False):
            if not df_pending.empty:
                gr.Dataframe(df_pending)
            else:
                gr.Markdown("No pending evaluations.")
        with gr.Accordion(f"Finished Evaluations ({len(df_finished)})", open=False):
            if not df_finished.empty:
                gr.Dataframe(df_finished)
            else:
                gr.Markdown("No finished evaluations.")
        with gr.Accordion(f"Failed Evaluations ({len(df_failed)})", open=False):
            if not df_failed.empty:
                gr.Dataframe(df_failed)
            else:
                gr.Markdown("No failed evaluations.")

    return submitter_tab