from transformers import ViTImageProcessor, ViTForImageClassification import gradio as gr from PIL import Image import requests processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224') model = ViTForImageClassification.from_pretrained('google/vit-base-patch16-224') def predict(image) : inputs = processor(images=image, return_tensors="pt") outputs = model(**inputs) logits = outputs.logits # model predicts one of the 1000 ImageNet classes predicted_class_idx = logits.argmax(-1).item() return model.config.id2label[predicted_class_idx] gradio_app = gr.Interface( predict, inputs=gr.Image(label="Select image for classification", sources=['upload', 'webcam'], type="pil"), outputs=gr.Textbox(), title="Image Classification", live=True, allow_flagging="never", ) gradio_app.launch()