File size: 3,326 Bytes
ad6330a
1efd233
250a59f
27bcfa0
c1fc3a9
ad6330a
1efd233
c1fc3a9
3d4f4ef
c1fc3a9
 
47e0177
4b29566
 
 
ad6330a
 
4b29566
 
a6d3ba4
4b29566
 
 
a6d3ba4
 
4b29566
 
 
 
 
 
c1fc3a9
b1c8b37
 
fa0d21c
 
ad6330a
 
 
c1fc3a9
 
ad6330a
c1fc3a9
 
 
 
1a4a0d3
5debe34
bf6c5c6
1a4a0d3
32ccac4
1a4a0d3
 
5debe34
1a4a0d3
 
c1fc3a9
7e7282e
 
4b29566
 
 
 
c1fc3a9
4b29566
0d18b6e
c1fc3a9
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
import os
import gradio as gr
from utils import get_model_summary, install_flash_attn#, authenticate_hf

# Install required package
install_flash_attn()

# Create the Gradio Blocks interface
with gr.Blocks(theme="sudeepshouche/minimalist") as demo:
    with gr.Row():
        with gr.Column():
            textbox = gr.Textbox(label="Model Name", placeholder="Enter the model name here OR select an example below...", lines=1)
            gr.Markdown("### Vision Models")
            vision_examples = gr.Examples(
                examples=[
                    ["google/paligemma-3b-mix-224"],
                    ["google/paligemma-3b-ft-refcoco-seg-224"],
                    ["llava-hf/llava-v1.6-mistral-7b-hf"],
                    ["xtuner/llava-phi-3-mini-hf"],
                    ["xtuner/llava-llama-3-8b-v1_1-transformers"],
                    ["vikhyatk/moondream2"],
                    ["openbmb/MiniCPM-Llama3-V-2_5"],
                    ["microsoft/Phi-3-vision-128k-instruct"],
                    ["HuggingFaceM4/idefics2-8b-chatty"],
                    ["microsoft/llava-med-v1.5-mistral-7b"]
                ],
                inputs=textbox
            )
            
            gr.Markdown("### Other Models")
            other_examples = gr.Examples(
                examples=[
                    ["NousResearch/Meta-Llama-3-8B-Instruct"],
                    ["dwb2023/llama38binstruct_summarize"],
                    ["dwb2023/llama38binstruct_summarize_v3"],
                    ["dwb2023/llama38binstruct_summarize_v4"],                    
                    ["dwb2023/mistral-7b-instruct-quantized"],
                    ["mistralai/Mistral-7B-Instruct-v0.2"],
                    ["mistralai/Mistral-7B-Instruct-v0.3"],
                    ["google/gemma-7b"],
                    ["microsoft/Phi-3-mini-4k-instruct"],
                    ["meta-llama/Meta-Llama-3-8B"]
                ],
                inputs=textbox
            )
            submit_button = gr.Button("Submit")

            gr.Markdown("""
            #### 🧠📖 Where to get started with Vision Language Models!!! 🔧🧩 
            
            - [Hugging Face overview of VLMs](https://huggingface.co/blog/vlms#overview-of-open-source-vision-language-models)
            - [Blog Post on PaliGemma Model Capabilities and Use Cases](https://huggingface.co/blog/paligemma#model-capabilities)
            
            Keep an eye on the evolution of the [Model Explorer from Google](https://ai.google.dev/edge/model-explorer#two_ways_to_use_model_explorer).  It didn't work initially for some of the VLM "fusion" model types I was initially looking at, but certainly a great tool for the right model.
            """)
            
        with gr.Column():
            output = gr.Textbox(label="Model Architecture", lines=20, placeholder="Model architecture will appear here...", show_copy_button=True)
            error_output = gr.Textbox(label="Error", lines=10, placeholder="Exceptions will appear here...", show_copy_button=True)

    def handle_click(model_name):
        model_summary, error_message = get_model_summary(model_name)
        return model_summary, error_message

    submit_button.click(fn=handle_click, inputs=textbox, outputs=[output, error_output])

# Launch the interface
demo.launch()