Spaces:

aekpic877
/

gradio

Sleeping

File size: 1,476 Bytes

934f8a8
 
b2ed79b
8dd40fa
934f8a8
b2ed79b
8dd40fa
b2ed79b
 
8dd40fa
 
 
b2ed79b
8dd40fa
934f8a8
8dd40fa
 
934f8a8
8dd40fa
 
 
b2ed79b
 
8dd40fa
 
 
 
b2ed79b
 
 
8dd40fa
 
934f8a8
8dd40fa
 
 
b2ed79b
 
8dd40fa
 
934f8a8
 
8dd40fa

import torch
from PIL import Image
from transformers import BlipProcessor, BlipForConditionalGeneration
import gradio as gr

# Load the BLIP model and processor
try:
    processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
    model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
    model = model.to(device='cuda' if torch.cuda.is_available() else 'cpu')
    model.eval()
except Exception as e:
    print(f"Error loading model or processor: {e}")
    exit()

def process_image(image, question):
    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    # Convert Gradio image to PIL Image
    image = Image.fromarray(image).convert('RGB')
    
    # Preprocess the image and question
    inputs = processor(image, question, return_tensors="pt").to(device)
    
    # Perform inference
    try:
        with torch.no_grad():
            outputs = model.generate(**inputs)
        answer = processor.decode(outputs[0], skip_special_tokens=True)
        return answer
    except Exception as e:
        return f"Error during model inference: {e}"

# Define the Gradio interface
interface = gr.Interface(
    fn=process_image,
    inputs=[gr.Image(type='numpy'), gr.Textbox(label="Question")],
    outputs=gr.Textbox(),
    title="Image Question Answering",
    description="Upload an image and ask a question about it. The model will provide an answer."
)

# Launch the Gradio app
interface.launch()