File size: 2,482 Bytes
892d0c8
1f4ce0a
 
 
 
d297ae6
 
5e12f9b
d297ae6
ec1a268
67054df
892d0c8
d297ae6
 
241af22
 
723250f
a5fe88d
 
 
 
7e693ff
 
 
 
 
 
d297ae6
7e693ff
 
 
241af22
d297ae6
 
241af22
bb8bac0
c3cad0a
892d0c8
d297ae6
c3cad0a
d297ae6
 
c3cad0a
d297ae6
c3cad0a
 
 
 
 
 
d297ae6
c3cad0a
d297ae6
c3cad0a
d297ae6
c3cad0a
d297ae6
c3cad0a
 
 
d297ae6
 
c3cad0a
 
d297ae6
c3cad0a
 
354d8fc
a5a67ad
 
 
 
354d8fc
c3cad0a
 
d297ae6
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78

import os
import shutil
import subprocess

# Clone and install dependencies
print("Cloning the repository...")
subprocess.run(["git", "clone", "https://huggingface.co/irotem98/edge_vlm"])
print("Installing dependencies...")
subprocess.run(["pip", "install", "-r", "edge_vlm/requirements.txt"])
subprocess.run(["pip", "install", "sentencepiece"])

# Copy all files from edge_vlm to current directory
print("Copying files...")
source_dir = "edge_vlm"
destination_dir = "."

import torch
import gradio as gr


for item in os.listdir(source_dir):
    source_item = os.path.join(source_dir, item)
    destination_item = os.path.join(destination_dir, item)

    if os.path.isdir(source_item):
        if os.path.exists(destination_item):
            shutil.rmtree(destination_item)
        shutil.copytree(source_item, destination_item)
    else:
        shutil.copy(source_item, destination_item)

print("Files copied successfully.")

# Now import the model from the copied files
from model import MoondreamModel

# Load the model and tokenizer
print("Loading model...")
model = MoondreamModel.load_model()
print("Model loaded.")
print("Loading tokenizer...")
tokenizer = MoondreamModel.load_tokenizer()
print("Tokenizer loaded.")

# Define the default question
default_question = "Describe the image."

# Function to handle image and return generated caption
def generate_caption_with_default(image):
    print("Preprocessing image...")
    preprocessed_image = MoondreamModel.preprocess_image(image)
    print("Image preprocessed.")
    
    print("Generating caption...")
    caption = MoondreamModel.generate_caption(model, preprocessed_image, tokenizer)
    print("Caption generated.")
    
    return caption

# Create Gradio interface
print("Setting up Gradio interface...")
interface = gr.Interface(
    fn=generate_caption_with_default,
    inputs=gr.Image(type="pil", label="Upload an Image"),
    outputs="text",
    title="Image Caption Generator",
    description=(
        f"The default question is: '{default_question}'.\n\n"
        "Please note that the inference may take up to 200 seconds due to long captions and CPU limitations.\n\n"
        "[![Hugging Face Model](https://img.shields.io/badge/Hugging%20Face-Model-blue)](https://huggingface.co/irotem98/edge_vlm) "
        "[![GitHub Repo](https://img.shields.io/badge/GitHub-Repo-green)](https://github.com/rotem154154/edge_vlm)"
    )
)
# Launch the interface
print("Launching interface...")
interface.launch()