|
import gradio as gr |
|
import pandas as pd |
|
import requests |
|
|
|
|
|
from info.train_a_model import ( |
|
LLM_BENCHMARKS_TEXT) |
|
from info.submit import ( |
|
SUBMIT_TEXT) |
|
from info.deployment import ( |
|
DEPLOY_TEXT) |
|
from info.programs import ( |
|
PROGRAMS_TEXT) |
|
from info.citation import( |
|
CITATION_TEXT) |
|
from info.validated_chat_models import( |
|
VALIDATED_CHAT_MODELS) |
|
from src.processing import filter_benchmarks_table |
|
|
|
|
|
|
|
|
|
demo = gr.Blocks() |
|
|
|
with demo: |
|
|
|
gr.HTML("""<h1 align="center" id="space-title">π€Powered-by-Intel LLM Leaderboard π»</h1>""") |
|
gr.Markdown("""This leaderboard is designed to evaluate, score, and rank open-source LLMs |
|
that have been pre-trained or fine-tuned on Intel Hardware π¦Ύ. To submit your model for evaluation, |
|
follow the instructions and complete the form in the ποΈ Submit tab. Models submitted to the leaderboard are evaluated |
|
on the Intel Developer Cloud βοΈ. The evaluation platform consists of Gaudi Accelerators and Xeon CPUs running benchmarks from |
|
the [Eleuther AI Language Model Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness).""") |
|
gr.Markdown("""![DevHub-image](assets/DevHub_Logo.png) Join 5000+ developers on the [Intel DevHub Discord](https://discord.gg/yNYNxK2k) to get support with your submission and |
|
talk about everything from GenAI, HPC, to Quantum Computing.""") |
|
gr.Markdown("""A special shout-out to the π€ [Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |
|
team for generously sharing their code and best |
|
practices, ensuring that AI Developers have a valuable and enjoyable tool at their disposal.""") |
|
|
|
with gr.Accordion("Chat with Top Models on the Leaderboard Here π¬", open=False): |
|
|
|
|
|
chat_model_dropdown = gr.Dropdown( |
|
choices=VALIDATED_CHAT_MODELS, |
|
label="Select a leaderboard model to chat with. ", |
|
multiselect=False, |
|
value=VALIDATED_CHAT_MODELS[0], |
|
interactive=True, |
|
) |
|
|
|
|
|
chat_model_selection = 'Intel/neural-chat-7b-v1-1' |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Tabs(elem_classes="tab-buttons") as tabs: |
|
with gr.TabItem("π LLM Leadeboard", elem_id="llm-benchmark-table", id=0): |
|
with gr.Row(): |
|
with gr.Column(): |
|
filter_hw = gr.CheckboxGroup(choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], |
|
label="Select Training Platform*", |
|
elem_id="compute_platforms", |
|
value=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"]) |
|
filter_platform = gr.CheckboxGroup(choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], |
|
label="Training Infrastructure*", |
|
elem_id="training_infra", |
|
value=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"]) |
|
filter_affiliation = gr.CheckboxGroup(choices=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Liftoff", "Intel Labs", "Other"], |
|
label="Intel Program Affiliation", |
|
elem_id="program_affiliation", |
|
value=["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"]) |
|
|
|
with gr.Column(): |
|
filter_size = gr.CheckboxGroup(choices=[1,3,5,7,13,35,60,70,100], |
|
label="Model Sizes (Billion of Parameters)", |
|
elem_id="parameter_size", |
|
value=[1,3,5,7,13,35,60,70,100]) |
|
filter_precision = gr.CheckboxGroup(choices=["fp32","fp16","bf16","int8","fp8", "int4"], |
|
label="Model Precision", |
|
elem_id="precision", |
|
value=["fp32","fp16","bf16","int8","fp8", "int4"]) |
|
filter_type = gr.CheckboxGroup(choices=["pretrained","fine-tuned","chat-models","merges/moerges"], |
|
label="Model Types", |
|
elem_id="model_types", |
|
value=["pretrained","fine-tuned","chat-models","merges/moerges"]) |
|
|
|
initial_df = pd.read_csv("./status/leaderboard_status_030424.csv") |
|
|
|
def update_df(hw_selected, platform_selected, affiliation_selected, size_selected, precision_selected, type_selected): |
|
filtered_df = filter_benchmarks_table(df=initial_df, hw_selected=hw_selected, platform_selected=platform_selected, |
|
affiliation_selected=affiliation_selected, size_selected=size_selected, |
|
precision_selected=precision_selected, type_selected=type_selected) |
|
return filtered_df |
|
|
|
initial_filtered_df = update_df(["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], |
|
["Intel Developer Cloud","AWS","Azure","GCP","Local"], |
|
["No Affiliation","Intel Innovator","Intel Student Ambassador", "Intel Software Liftoff", "Intel Labs", "Other"], |
|
[1,3,5,7,13,35,60,70,100], |
|
["fp8","fp16","bf16","int8","4bit"], |
|
["pretrained","fine-tuned","chat-models","merges/moerges"]) |
|
|
|
gradio_df_display = gr.Dataframe(value=initial_filtered_df) |
|
|
|
filter_hw.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
filter_platform.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
filter_affiliation.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
filter_size.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
filter_precision.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
filter_type.change(fn=update_df, |
|
inputs=[filter_hw, filter_platform, filter_affiliation, filter_size, filter_precision, filter_type], |
|
outputs=[gradio_df_display]) |
|
|
|
|
|
with gr.TabItem("π§° Train a Model", elem_id="getting-started", id=1): |
|
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
|
with gr.TabItem("π Deployment Tips", elem_id="deployment-tips", id=2): |
|
gr.Markdown(DEPLOY_TEXT, elem_classes="markdown-text") |
|
with gr.TabItem("π©βπ» Developer Programs", elem_id="hardward-program", id=3): |
|
gr.Markdown(PROGRAMS_TEXT, elem_classes="markdown-text") |
|
with gr.TabItem("ποΈ Submit", elem_id="submit", id=4): |
|
gr.Markdown(SUBMIT_TEXT, elem_classes="markdown-text") |
|
with gr.Row(): |
|
gr.Markdown("# Submit Model for Evaluation ποΈ", elem_classes="markdown-text") |
|
with gr.Row(): |
|
with gr.Column(): |
|
model_name_textbox = gr.Textbox(label="Model name", |
|
info = """ Name of Model in the Hub. For example: 'Intel/neural-chat-7b-v1-1'""",) |
|
revision_name_textbox = gr.Textbox(label="Revision commit (Branch)", placeholder="main") |
|
model_type = gr.Dropdown( |
|
choices=["pretrained","fine-tuned","chat models","merges/moerges"], |
|
label="Model type", |
|
multiselect=False, |
|
value="pretrained", |
|
interactive=True, |
|
) |
|
|
|
hw_type = gr.Dropdown( |
|
choices=["Gaudi","Xeon","GPU Max","Arc GPU","Core Ultra"], |
|
label="Training Hardware", |
|
multiselect=False, |
|
value="Gaudi", |
|
interactive=True, |
|
) |
|
terms = gr.Checkbox( |
|
label="Check if you have read and agreed to terms and conditions associated with submitting\ |
|
a model to the leaderboard.", |
|
value=False, |
|
interactive=True, |
|
) |
|
submit_button = gr.Button("π€ Submit Eval π»") |
|
submission_result = gr.Markdown() |
|
with gr.Column(): |
|
precision = gr.Dropdown( |
|
choices=["fp32","fp16","bf16","int8","fp8", "int4"], |
|
label="Precision", |
|
multiselect=False, |
|
value="fp16", |
|
interactive=True, |
|
) |
|
weight_type = gr.Dropdown( |
|
choices=["Original", "Adapter", "Delta"], |
|
label="Weights type", |
|
multiselect=False, |
|
value="Original", |
|
interactive=True, |
|
info = """ Select the appropriate weights. If you have fine-tuned or adapted a model with PEFT or Delta-Tuning you likely have |
|
LoRA Adapters or Delta Weights.""", |
|
) |
|
training_infra = gr.Dropdown( |
|
choices=["Intel Developer Cloud","AWS","Azure","Google Cloud Platform","Local"], |
|
label="Training Infrastructure", |
|
multiselect=False, |
|
value="Intel Developer Cloud", |
|
interactive=True, |
|
info = """ Select the infrastructure that the model was developed on. |
|
Local is the ideal choice for Core Ultra, ARC GPUs, and local data center infrastructure.""", |
|
) |
|
affiliation = gr.Dropdown( |
|
choices=["No Affiliation","Innovator","Student Ambassador","Intel Liftoff", "Intel Labs", "Other"], |
|
label="Affiliation with Intel", |
|
multiselect=False, |
|
value="No Affiliation", |
|
interactive=True, |
|
info = """ Select "No Affiliation" if not part of any Intel programs.""", |
|
) |
|
base_model_name_textbox = gr.Textbox(label="Base model (for delta or adapter weights)") |
|
|
|
|
|
|
|
with gr.Accordion("π Citation", open=False): |
|
citation =gr.Textbox(value = CITATION_TEXT, |
|
lines=6, |
|
label="Use the following to cite this content") |
|
|
|
gr.Markdown("""<div style="display: flex; justify-content: center;"> <p> Intel, the Intel logo and Gaudi are trademarks of Intel Corporation or its subsidiaries. |
|
*Other names and brands may be claimed as the property of others. |
|
</p> </div>""") |
|
|
|
demo.launch(share=False) |