Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
from transformers import ( | |
AutoModelForCausalLM, | |
AutoProcessor, | |
GenerationConfig, | |
BitsAndBytesConfig, | |
) | |
from PIL import Image | |
import torch | |
# Configuration for 4-bit quantization and GPU offloading | |
bnb_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
) | |
# Model repository | |
repo_name = "cyan2k/molmo-7B-O-bnb-4bit" | |
# Load the processor and model | |
processor = AutoProcessor.from_pretrained(repo_name, trust_remote_code=True) | |
model = AutoModelForCausalLM.from_pretrained( | |
repo_name, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
trust_remote_code=True, | |
quantization_config=bnb_config, | |
) | |
# Ensure model is on GPU | |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
model.to(device) | |
def describe_images(images): | |
descriptions = [] | |
for image in images: | |
if isinstance(image, str): | |
image = Image.open(image) | |
# Process the image | |
inputs = processor.process( | |
images=[image], | |
text="Describe this image in great detail.", | |
) | |
# Move inputs to the same device as the model | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
# Generate output | |
with torch.no_grad(): | |
output = model.generate_from_batch( | |
inputs, | |
GenerationConfig(max_new_tokens=200, stop_strings=["<|endoftext|>"]), | |
tokenizer=processor.tokenizer, | |
) | |
# Decode generated tokens to text | |
generated_tokens = output[0, inputs["input_ids"].size(1):] | |
generated_text = processor.tokenizer.decode( | |
generated_tokens, skip_special_tokens=True | |
) | |
descriptions.append(generated_text.strip()) | |
return "\n\n".join(descriptions) | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("<h3><center>Image Description Generator</center></h3>") | |
with gr.Row(): | |
image_input = gr.File( | |
file_types=["image"], label="Upload Image(s)", multiple=True | |
) | |
generate_button = gr.Button("Generate Descriptions") | |
output_text = gr.Textbox(label="Descriptions", lines=15) | |
generate_button.click(describe_images, inputs=image_input, outputs=output_text) | |
demo.launch() | |