model_explorer4

Paused

File size: 1,741 Bytes

1efd233
cd39699
c1fc3a9
 
68f9e87
1e1efc2
27bcfa0
c1fc3a9
27bcfa0
 
 
 
 
1efd233
2ccc88d
b50be2b
2ccc88d
c1fc3a9
028d122
6bf2756
86fbb40
68f9e87
6bf2756
1efd233
c1fc3a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0d18b6e
c1fc3a9

import gradio as gr
import os
import torch
import subprocess
from transformers import AutoModelForCausalLM
from huggingface_hub import login

# Install required package
subprocess.run(
    "pip install flash-attn --no-build-isolation",
    env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
    shell=True,
)

hf_token = os.getenv("HF_TOKEN")
login(token=hf_token, add_to_git_credential=True)

# Function to get the model summary
@spaces.GPU
def get_model_summary(model_name):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True).to(device)
    return str(model)

# Create the Gradio Blocks interface
with gr.Blocks() as demo:
    with gr.Row():
        with gr.Column():
            textbox = gr.Textbox(label="Model Name")
            examples = gr.Examples(
                examples=[
                    ["google/gemma-7b"],
                    ["microsoft/Phi-3-mini-4k-instruct"],
                    ["meta-llama/Meta-Llama-3-8B"],
                    ["mistralai/Mistral-7B-Instruct-v0.3"],
                    ["vikhyatk/moondream2"],
                    ["microsoft/Phi-3-vision-128k-instruct"],
                    ["openbmb/MiniCPM-Llama3-V-2_5"],
                    ["google/paligemma-3b-mix-224"],
                    ["HuggingFaceM4/idefics2-8b-chatty"],
                    ["mistralai/Codestral-22B-v0.1"]
                ],
                inputs=textbox
            )
            submit_button = gr.Button("Submit")
        with gr.Column():
            output = gr.Textbox(label="Output", lines=20)

    submit_button.click(fn=get_model_summary, inputs=textbox, outputs=output)

# Launch the interface
demo.launch()