bobs24's picture
Update app.py
222cfe5 verified
import os
import torch
import pickle
import joblib
import torch.nn.functional as F
from PIL import Image
import gradio as gr
from transformers import AutoModelForImageClassification
from torch import nn
from torchvision import transforms
from huggingface_hub import hf_hub_download
# Paths in Hugging Face model repository
MODEL_PATH = "DeiT_Model_Parameter.pth"
ENCODER_PATH = "label_encoder.pkl"
# Ensure device is set
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def load_label_encoder():
# Load label encoder from Hugging Face repository
label_encoder_path = hf_hub_download(repo_id="bobs24/DeiT-Classification-Apparel", filename=ENCODER_PATH)
label_encoder = joblib.load(label_encoder_path)
return label_encoder
# Define the model class
class CustomModel(nn.Module):
def __init__(self, num_classes):
super(CustomModel, self).__init__()
self.base_model = AutoModelForImageClassification.from_pretrained(
"facebook/deit-base-patch16-224",
num_labels=num_classes,
ignore_mismatched_sizes=True
)
def forward(self, x):
return self.base_model(x).logits
def load_model():
# Load the model from Hugging Face repository
model_path = hf_hub_download(repo_id="bobs24/DeiT-Classification-Apparel", filename=MODEL_PATH)
label_encoder = load_label_encoder()
model = CustomModel(num_classes=len(label_encoder.classes_)).to(device)
model.load_state_dict(torch.load(model_path, map_location=device))
model.device = device
model.eval()
return model, label_encoder
# Load the model and label encoder
model, label_encoder = load_model()
# Preprocessing as per your training setup
preprocess = transforms.Compose([
transforms.Resize(256), # Resize to 256x256 (a bit larger than 224)
transforms.CenterCrop(224), # Crop the center to 224x224
transforms.ToTensor(), # Convert to tensor
transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) # Normalize as per DeiT
])
# Function to perform predictions and show probabilities
def predict(image):
if image is None: # Check if no image was provided
return "Please insert photo"
# Apply preprocessing to the input image
image = Image.fromarray(image).convert("RGB")
input_tensor = preprocess(image).unsqueeze(0).to(device)
# Perform inference
with torch.no_grad():
output = model(input_tensor)
# Apply softmax to get probabilities
probabilities = F.softmax(output, dim=1)
# Get the predicted label and confidence
predicted_label = torch.argmax(probabilities, dim=1).item()
confidence = probabilities[0, predicted_label].item()
# Get the class name using label encoder
class_name = label_encoder.inverse_transform([predicted_label])[0]
return f"Predicted class: {class_name}, Confidence: {confidence:.4f}"
# Create Gradio interface
iface = gr.Interface(fn=predict, inputs=gr.Image(type="numpy"), outputs="text", live=True)
# Launch the interface
iface.launch()