import gradio as gr from transformers import BlipProcessor, BlipForConditionalGeneration from huggingface_hub import login from PIL import Image # Step 1: Authenticate with Hugging Face using your token login(token="") # Paste your token here # Step 2: Load the processor and the private model model_name = "anushettypsl/paligemma_vqav2" # Replace with actual model link processor = BlipProcessor.from_pretrained(model_name) model = BlipForConditionalGeneration.from_pretrained(model_name) # Step 3: Define the prediction function def predict(image): inputs = processor(image, return_tensors="pt") outputs = model.generate(**inputs) generated_text = processor.decode(outputs[0], skip_special_tokens=True) return generated_text # Step 4: Create the Gradio interface interface = gr.Interface( fn=predict, inputs=gr.Image(type="pil"), # Image input outputs="text", # Text output title="Image-to-Text Model" ) # Step 5: Launch the app interface.launch()