Spaces:

iberbench
/

leaderboard

Running

App Files Files Community

Alvaro Romo commited on Jan 14

Commit

77175ac

1 Parent(s): 8621bb7

Initial commit

Browse files

Files changed (16) hide show

assets/html/01_model_info.html +12 -0
assets/html/02_technical_detail.html +16 -0
assets/html/03_linceses.html +8 -0
assets/html/04_model_card.html +9 -0
assets/html/05_checklist.html +8 -0
assets/images/hf-logo.png +0 -0
main.py +242 -0
requirements.txt +9 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-310.pyc +0 -0
src/__pycache__/__init__.cpython-38.pyc +0 -0
src/__pycache__/check_validity.cpython-310.pyc +0 -0
src/__pycache__/check_validity.cpython-38.pyc +0 -0
src/__pycache__/submit.cpython-310.pyc +0 -0
src/check_validity.py +173 -0
src/submit.py +27 -0

assets/html/01_model_info.html ADDED Viewed

	@@ -0,0 +1,12 @@

+<div style="margin-left: 10px;">
+  <h4 style="margin: 0; color: #007BFF;">Model Information</h4>
+  <p>
+    Your model should be <strong>public</strong> on the Hub and follow the
+    <strong>username/model-id</strong> format (e.g., mistralai/Mistral-7B-v0.1).
+    Specify the <strong>revision</strong> (commit hash or branch) and <strong>model type</strong>.
+  </p>
+  <a href="https://huggingface.co/docs/hub/models-uploading" target="_blank"
+     style="color: #007BFF; text-decoration: underline; font-family: monospace;">
+     Model uploading guide →
+  </a>
+</div>

assets/html/02_technical_detail.html ADDED Viewed

	@@ -0,0 +1,16 @@

+<div style="margin-left: 10px;">
+  <h4 style="margin: 0; color: #007BFF;">Technical Details</h4>
+  <p style="font-size: 0.875rem; color: #6c757d; margin: 0; line-height: 1.5;">
+    Make sure your model can be <strong>loaded locally</strong> before submitting:
+  </p>
+  <div style="background-color: #f5f5f5; padding: 1rem; border-radius: 5px; font-family: monospace; color: #212529;">
+    <pre style="margin: 0; padding: 0; font-size: 1rem; white-space: pre-wrap; word-wrap: break-word;">
+from transformers import AutoConfig, AutoModel, AutoTokenizer
+config = AutoConfig.from_pretrained("your-username/your-model", revision="main")
+model = AutoModel.from_pretrained("your-username/your-model", revision="main")
+tokenizer = AutoTokenizer.from_pretrained("your-username/your-model", revision="main")
+    </pre>
+  </div>
+  <a href="https://huggingface.co/docs/transformers/installation" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Transformers documentation →</a>
+</div>

assets/html/03_linceses.html ADDED Viewed

	@@ -0,0 +1,8 @@

+<div style="margin-left: 10px;">
+  <h4 style="margin: 0; color: #007BFF;">License Requirements</h4>
+  <p style="font-size: 0.875rem; color: #6c757d;">
+    A <strong>license tag</strong> is required. <strong>Open licenses</strong>
+    (Apache, MIT, etc) are strongly recommended.
+  </p>
+<a href="https://huggingface.co/docs/hub/repositories-licenses" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">About model licenses →</a>
+</div>

assets/html/04_model_card.html ADDED Viewed

	@@ -0,0 +1,9 @@

+<div style="margin-left: 10px;">
+  <h4 style="margin: 0; color: #007BFF;">Model Card Requirements</h4>
+  <p style="font-size: 0.875rem; color: #6c757d;">
+    Your model card must include: <strong>architecture</strong>,
+    <strong>training details</strong>, <strong>dataset information</strong>, intended use, limitations, and
+    <strong>performance metrics</strong>.
+  </p>
+  <a href="https://huggingface.co/docs/hub/model-cards" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Model cards guide →</a>
+</div>

assets/html/05_checklist.html ADDED Viewed

	@@ -0,0 +1,8 @@

+<div style="margin-left: 10px;">
+  <h4 style="margin: 0; color: #007BFF;">Checklist</h4>
+  <p style="font-size: 0.875rem; color: #6c757d;">
+    Ensure your model is <strong>public</strong>, uses <strong>safetensors</strong> format,
+    has a <strong>license tag</strong>, and <strong>loads correctly</strong> with the provided code.
+  </p>
+  <a href="https://huggingface.co/docs/hub/repositories-getting-started" target="_blank" style="color: #007BFF; text-decoration: underline; font-family: monospace;">Sharing best practices →</a>
+</div>

assets/images/hf-logo.png ADDED Viewed

main.py ADDED Viewed

	@@ -0,0 +1,242 @@

+import streamlit as st
+import pandas as pd
+import re
+from datasets import load_dataset
+import src.check_validity as cv
+from src.submit import ModelSizeChecker
+import os
+from huggingface_hub import HfApi
+st.set_page_config(page_title="IVACE Leaderboard", layout="wide")
+def validate_model(model, precision, base_model, weight_type, use_chat_template):
+    """
+    Validate model with some checkers to assure tha can be evaluated
+    :param model: hf model name
+    :param precision: model parameters data type
+    :param base_model: base model (if it is need it)
+    :param weight_type:
+    :param use_chat_template:
+    :return:
+    """
+    API = HfApi()
+    try:
+        model_info = API.model_info(repo_id=model, revision="main")
+    except Exception as e:
+        return "Could not get your model information. Please fill it up properly."
+    # TODO: think if it makes sense. Maybe we need to allow upload sumissions more than once
+    # # Has it been submitted already?
+    # model_key = f"{model}_{model_info.sha}_{precision}"
+    # if model_key in requested_models:
+    #     return st.error(
+    #         f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")
+    # Check model size early
+    model_size, error_text = cv.get_model_size(model_info=model_info, precision=precision, base_model=base_model)
+    if model_size is None:
+        return error_text
+    # Absolute size limit for float16 and bfloat16
+    if precision in ["float16", "bfloat16"] and model_size > 100:
+        error_message = f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. " \
+                        f"Your model size: {model_size:.2f}B parameters."
+        return error_message
+    # Precision-adjusted size limit for 8bit, 4bit, and GPTQ
+    if precision in ["8bit", "4bit", "GPTQ"]:
+        size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)
+        if not size_checker.can_evaluate():
+            precision_factor = size_checker.get_precision_factor()
+            max_size = 140 * precision_factor
+            error_message = f"Sadly, models this big ({model_size:.2f}B parameters) cannot be evaluated automatically " \
+                            f"at the moment on our cluster. The maximum size for {precision} precision is {max_size:.2f}B parameters."
+            return error_message
+    architecture = "?"
+    # Is the model on the hub?
+    if weight_type in ["Delta", "Adapter"]:
+        base_model_on_hub, error, _ = cv.is_model_on_hub(
+            model_name=base_model, revision="main", token=None, test_tokenizer=True
+        )
+        if not base_model_on_hub:
+            return f'Base model "{base_model}" {error}'
+    if not weight_type == "Adapter":
+        model_on_hub, error, model_config = cv.is_model_on_hub(model_name=model, revision=model_info.sha,
+                                                               test_tokenizer=True)
+        if not model_on_hub or model_config is None:
+            return f'Model "{model}" {error}'
+        if model_config is not None:
+            architectures = getattr(model_config, "architectures", None)
+            if architectures:
+                architecture = ";".join(architectures)
+    # Were the model card and license filled?
+    try:
+        model_info.cardData["license"]
+    except Exception:
+        return "Please select a license for your model"
+    modelcard_OK, error_msg, model_card = cv.check_model_card(model)
+    if not modelcard_OK:
+        return error_msg
+    # Check the chat template submission
+    if use_chat_template:
+        chat_template_valid, chat_template_error = cv.check_chat_template(model, "main")
+        if not chat_template_valid:
+            return chat_template_error
+    return None
+# Function to send email
+def log_submission(model_name, description, user_contact):
+    # todo: create email or log in dataset
+    ...
+    return True
+def get_url(html_content):
+    match = re.search(r'href=["\'](https?://[^\s"\']+)', html_content)
+    if match:
+        url = match.group(1)
+        return url
+    return None
+@st.cache_data
+def load_data():
+    try:
+        columns = ["eval_name", "Model", "Type", "Average ⬆️", "IFEval", "MMLU-PRO", "GPQA", "MUSR", "CO₂ cost (kg)"]
+        data = load_dataset("open-llm-leaderboard/contents")["train"].to_pandas().head(10)
+        # print(data.columns)
+        data = data[columns]
+        # TODO: check if from submit this is neede it
+        data["Model"] = data["Model"].apply(get_url)
+        data.sort_values(by="Average ⬆️", ascending=False, inplace=True)
+        data.reset_index(drop=True, inplace=True)
+        return data
+    except FileNotFoundError:
+        st.error("open-llm-leaderboard/contents was not found in the hub")
+        return pd.DataFrame()
+leaderboard_data = load_data()
+tabs = st.tabs(["Leaderboard", "Submit model"]) # , "Vote for next model"
+with tabs[0]:
+    # logo
+    cols_logo = st.columns(5, vertical_alignment="center")
+    with cols_logo[2]:
+        st.image("assets/images/hf-logo.png", use_container_width=True)
+    # title
+    st.markdown(
+        """
+        <div style="text-align: center;">
+            <h1>IVACE LLM Leaderboard</h1>
+            <p style="font-size: 1.2rem;">
+                Comparing Large Language Models in an <span style="font-weight: 600;">open</span>
+                and <span style="font-weight: 600;">reproducible</span> way
+            </p>
+        </div>
+        """,
+        unsafe_allow_html=True,
+    )
+    leaderboard_cols = st.columns([0.1, 0.8, 0.1], vertical_alignment="center")
+    with leaderboard_cols[1]:
+        if not leaderboard_data.empty:
+            st.data_editor(
+                leaderboard_data,
+                column_config={
+                    "Model": st.column_config.LinkColumn("Model")
+                },
+                hide_index=False,
+            )
+        else:
+            st.write("No data found to display on leaderboard.")
+with tabs[1]:
+    st.header("Submit model")
+    import streamlit as st
+    def get_id_number(id_val):
+        html_template = f"""
+        <div style="display: flex; align-items: flex-start; margin-bottom: 1rem;">
+            <div style="
+                width: 32px;
+                height: 32px;
+                border-radius: 50%;
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                border: 1px solid #007BFF;
+                color: #007BFF;
+                font-size: 0.875rem;
+                font-weight: 600;
+                background-color: transparent;">
+                {id_val}
+            </div>"""
+        return html_template
+    # create guide info
+    guide_info_list = []
+    html_path = "assets/html"
+    for filename in os.listdir(html_path):
+        file_path = os.path.join(html_path, filename)
+        with open(file_path, 'r', encoding='utf-8') as file:
+            guide_info_list.append(file.read())
+    # display adding number id
+    for i, info_div in enumerate(guide_info_list):
+        st.markdown(get_id_number(i+1) + info_div, unsafe_allow_html=True)
+    with st.form("submit_model_form"):
+        model_name = st.text_input("Model Name (format: user_name/model_name)",
+                                   help="Your model should be public on the Hub and follow the username/model-id format (e.g. mistralai/Mistral-7B-v0.1).")
+        description = st.text_area("Description", help="Add a description of the proposed model for the evaluation to help prioritize its evaluation")
+        user_contact = st.text_input("Your Contact Email", help="User e-mail to contact when there are updates")
+        precision_option = st.selectbox(
+            "Choose precision format:",
+            help="Size limits vary by precision: • FP16/BF16: up to 100B parameters • 8-bit: up to 280B parameters (2x) • 4-bit: up to 560B parameters (4x) Choose carefully as incorrect precision can cause evaluation errors.",
+            options=["float16", "bfloat16", "8bit", "4bit", "GPTQ"],
+            index=0
+        )
+        weight_type_option = st.selectbox(
+            "Select what type of weights are being loaded from the checkpoint provided:",
+            help="Original: Complete model weights in safetensors format Delta: Weight differences from base model (requires base model for size calculation) Adapter: Lightweight fine-tuning layers (requires base model for size calculation)",
+            options=["Original", "Adapter", "Delta"],
+            index=0
+        )
+        base_model_name = st.text_input("Base model",
+                                        help="Required for delta weights or adapters. This information is used to identify the original model and calculate the total parameter count by combining base model and adapter/delta parameters.",
+                                        value="")
+        model_type = st.selectbox(
+            "Choose model type:",
+            help="🟢 Pretrained: Base models trained on text using masked modeling 🟩 Continuously Pretrained: Extended training on additional corpus 🔶 Fine-tuned: Domain-specific optimization 💬 Chat: Models using RLHF, DPO, or IFT for conversation 🤝 Merge: Combined weights without additional training",
+            options=["🟢 Pretrained", "🟩 Continuously Pretrained", "🔶 Fine-tuned", "💬 Chat", "🤝 Merge"],
+        )
+        submit_button = st.form_submit_button("Submit Request")
+        if submit_button:
+            # validate model size, license, chat_templates
+            use_chat_template = True if model_type == "💬 Chat" else False
+            validation_error = validate_model(model_name, precision_option, base_model_name, weight_type_option, use_chat_template)
+            if validation_error is not None:
+                st.error(validation_error)
+            elif not re.match(r"[^@]+@[^@]+\.[^@]+", user_contact):
+                st.error("Invalid email address.")
+            else:
+                if log_submission(model_name, description, user_contact):
+                    st.success("Your request has been sent successfully.")
+                else:
+                    st.error("Failed to send your request. Please try again later.")
+# with tabs[2]:
+#     st.header("Vote for next model")
+#     st.write("Esta sección estará disponible próximamente.")

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+datasets==2.14.4
+pandas==2.0.3
+streamlit==1.40.1
+scikit-learn==1.3.2
+matplotlib==3.7.5
+sentence-transformers==2.2.2
+transformers==4.48.0
+huggingface-hub==0.27.1
+fsspec==2023.9.2

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (156 Bytes). View file

src/__pycache__/__init__.cpython-38.pyc ADDED Viewed

Binary file (154 Bytes). View file

src/__pycache__/check_validity.cpython-310.pyc ADDED Viewed

Binary file (5.85 kB). View file

src/__pycache__/check_validity.cpython-38.pyc ADDED Viewed

Binary file (5.9 kB). View file

src/__pycache__/submit.cpython-310.pyc ADDED Viewed

Binary file (1.22 kB). View file

src/check_validity.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import json
+import os
+import re
+import logging
+from collections import defaultdict
+from datetime import datetime, timedelta, timezone
+import huggingface_hub
+from huggingface_hub import ModelCard, hf_hub_download
+from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, parse_safetensors_file_metadata
+from transformers import AutoConfig, AutoTokenizer
+# ht to @Wauplin, thank you for the snippet!
+# See https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard/discussions/317
+def check_model_card(repo_id: str) -> tuple[bool, str]:
+    # Returns operation status, and error message
+    try:
+        card = ModelCard.load(repo_id)
+    except huggingface_hub.utils.EntryNotFoundError:
+        return False, "Please add a model card to your model to explain how you trained/fine-tuned it.", None
+    # Enforce license metadata
+    if card.data.license is None and not ("license_name" in card.data and "license_link" in card.data):
+        return (
+            False,
+            (
+                "License not found. Please add a license to your model card using the `license` metadata or a"
+                " `license_name`/`license_link` pair."
+            ),
+            None,
+        )
+    # Enforce card content
+    if len(card.text) < 200:
+        return False, "Please add a description to your model card, it is too short.", None
+    return True, "", card
+def is_model_on_hub(
+    model_name: str, revision: str, token: str | None = None, trust_remote_code: bool = False, test_tokenizer: bool = False,
+) -> tuple[bool, str, AutoConfig]:
+    try:
+        config = AutoConfig.from_pretrained(
+            model_name, revision=revision, trust_remote_code=trust_remote_code, token=token, force_download=True)
+        if test_tokenizer:
+            try:
+                AutoTokenizer.from_pretrained(
+                    model_name, revision=revision, trust_remote_code=trust_remote_code, token=token,
+                )
+            except ValueError as e:
+                return (False, f"uses a tokenizer which is not in a transformers release: {e}", None)
+            except Exception:
+                return (
+                    False,
+                    "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
+                    None,
+                )
+            except Exception:
+                return (
+                    False,
+                    "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?",
+                    None,
+                )
+        return True, None, config
+    except ValueError:
+        return (
+            False,
+            "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
+            None,
+        )
+    except Exception as e:
+        if "You are trying to access a gated repo." in str(e):
+            return True, "uses a gated model.", None
+        return False, f"was not found or misconfigured on the hub! Error raised was {e.args[0]}", None
+def get_model_size(model_info: ModelInfo, precision: str, base_model: str| None) -> tuple[float | None, str]:
+    size_pattern = re.compile(r"(\d+\.)?\d+(b|m)")
+    safetensors = None
+    adapter_safetensors = None
+    # hack way to check that model is adapter
+    is_adapter = "adapter_config.json" in (s.rfilename for s in model_info.siblings)
+    try:
+        if is_adapter:
+            if not base_model:
+                return None, "Adapter model submission detected. Please ensure the base model information is provided."
+            adapter_safetensors = parse_safetensors_file_metadata(model_info.id, "adapter_model.safetensors")
+            safetensors = get_safetensors_metadata(base_model)
+        else:
+            safetensors = get_safetensors_metadata(model_info.id)
+    except Exception as e:
+        logging.warning(f"Failed to get safetensors metadata for model {model_info.id}: {e!s}")
+    if safetensors is not None:
+        model_size = sum(safetensors.parameter_count.values())
+        if adapter_safetensors is not None:
+            model_size += sum(safetensors.parameter_count.values())
+        model_size = round(model_size / 1e9, 3)
+    else:
+        try:
+            size_match = re.search(size_pattern, model_info.id.lower())
+            if size_match:
+                model_size = size_match.group(0)
+                model_size = round(float(model_size[:-1]) if model_size[-1] == "b" else float(model_size[:-1]) / 1e3, 3)
+            else:
+                return None, "Unknown model size"
+        except AttributeError:
+            logging.warning(f"Unable to parse model size from ID: {model_info.id}")
+            return None, "Unknown model size"
+    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
+    model_size = size_factor * model_size
+    return model_size, ""
+def get_model_arch(model_info: ModelInfo):
+    return model_info.config.get("architectures", "Unknown")
+def check_chat_template(model: str, revision: str) -> tuple[bool, str]:
+    try:
+        # Attempt to download only the tokenizer_config.json file
+        config_file = hf_hub_download(
+            repo_id=model,
+            filename="tokenizer_config.json",
+            revision=revision,
+            repo_type="model"
+        )
+        # Read and parse the tokenizer_config.json file
+        with open(config_file, 'r') as f:
+            tokenizer_config = json.load(f)
+        # Check if chat_template exists in the tokenizer configuration
+        if 'chat_template' not in tokenizer_config:
+            return False, f"The model {model} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
+        return True, ""
+    except Exception as e:
+        return False, f"Error checking chat_template for model {model}: {str(e)}"
+def get_model_tags(model_card, model: str):
+    is_merge_from_metadata = False
+    is_moe_from_metadata = False
+    tags = []
+    if model_card is None:
+        return tags
+    if model_card.data.tags:
+        is_merge_from_metadata = any(
+            [tag in model_card.data.tags for tag in ["merge", "moerge", "mergekit", "lazymergekit"]]
+        )
+        is_moe_from_metadata = any([tag in model_card.data.tags for tag in ["moe", "moerge"]])
+    is_merge_from_model_card = any(
+        keyword in model_card.text.lower() for keyword in ["merged model", "merge model", "moerge"]
+    )
+    if is_merge_from_model_card or is_merge_from_metadata:
+        tags.append("merge")
+    is_moe_from_model_card = any(keyword in model_card.text.lower() for keyword in ["moe", "mixtral"])
+    # Hardcoding because of gating problem
+    if "Qwen/Qwen1.5-32B" in model:
+        is_moe_from_model_card = False
+    is_moe_from_name = "moe" in model.lower().replace("/", "-").replace("_", "-").split("-")
+    if is_moe_from_model_card or is_moe_from_name or is_moe_from_metadata:
+        tags.append("moe")
+    return tags

src/submit.py ADDED Viewed

	@@ -0,0 +1,27 @@

+from transformers import AutoConfig
+from dataclasses import dataclass
+@dataclass
+class ModelSizeChecker:
+    model: str
+    precision: str
+    model_size_in_b: float
+    def get_precision_factor(self):
+        if self.precision in ["float16", "bfloat16"]:
+            return 1
+        elif self.precision == "8bit":
+            return 2
+        elif self.precision == "4bit":
+            return 4
+        elif self.precision == "GPTQ":
+            config = AutoConfig.from_pretrained(self.model)
+            num_bits = int(config.quantization_config["bits"])
+            bits_to_precision_factor = {2: 8, 3: 6, 4: 4, 8: 2}
+            return bits_to_precision_factor.get(num_bits, 1)
+        else:
+            raise Exception(f"Unknown precision {self.precision}.")
+    def can_evaluate(self):
+        precision_factor = self.get_precision_factor()
+        return self.model_size_in_b <= 140 * precision_factor