Spaces:

annie08
/

nitish-caption-generator-transformer-for-vision-language

Runtime error

nitishhrms

new space

1ca801f 17 days ago

1.54 kB

	import torch
	from transformers import AutoProcessor, AutoModelForCausalLM
	from PIL import Image
	import gradio as gr

	# Step 1: Load the processor from Hugging Face
	processor = AutoProcessor.from_pretrained("microsoft/git-large-textcaps")

	# Step 2: Load the model architecture from Hugging Face
	model = AutoModelForCausalLM.from_pretrained("microsoft/git-large-textcaps") # Load model structure

	# Step 3: Load your custom PyTorch weights
	custom_weights_path = "model_folder/pytorch_model.bin" # Path to your custom weights
	model.load_state_dict(torch.load(custom_weights_path, map_location=torch.device("cpu"))) # Load custom weights
	model.eval() # Set the model to evaluation mode

	# Step 4: Define the caption generation function
	def generate_caption(image):
	# Convert the input image to PIL format (if necessary)
	image = Image.fromarray(image)

	# Preprocess the image using the processor
	inputs = processor(images=image, return_tensors="pt")
	pixel_values = inputs.pixel_values

	# Generate caption
	generated_ids = model.generate(pixel_values=pixel_values, max_length=50)
	generated_caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]

	return generated_caption

	# Step 5: Define the Gradio interface
	interface = gr.Interface(
	fn=generate_caption, # Function to process input
	inputs=gr.Image(), # Input as image
	outputs=gr.Textbox(), # Output as text
	live=True # Enable live prediction
	)

	# Step 6: Launch the Gradio app
	interface.launch()