Sample running code

from transformers import VisionEncoderDecoderModel, ViTImageProcessor, GPT2Tokenizer
import torch
from PIL import Image
model = VisionEncoderDecoderModel.from_pretrained("evlinzxxx/best_model_ViTB16_GPT2")
feature_extractor = ViTImageProcessor.from_pretrained("evlinzxxx/best_model_ViTB16_GPT2")
tokenizer = GPT2Tokenizer.from_pretrained("evlinzxxx/best_model_ViTB16_GPT2")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
def show_image_and_captions(url):
  # get the image and display it
  display(load_image(url))
  # get the captions on various models
  our_caption = get_caption(model, image_processor, tokenizer, url)
  # print the captions
  print(f"Our caption: {our_caption}")
show_image_and_captions("/content/drive/MyDrive/try/test_400/gl_16.jpg") # ['navigate around the obstacle ahead adjusting your route to bypass the parked car.']
Downloads last month
26
Safetensors
Model size
239M params
Tensor type
F32
·
Inference Examples
This model does not have enough activity to be deployed to Inference API (serverless) yet. Increase its social visibility and check back later, or deploy to Inference Endpoints (dedicated) instead.