|
import numpy as np |
|
import gradio as gr |
|
import torch |
|
from transformers import Dinov2Config, Dinov2Model, Dinov2ForImageClassification, AutoImageProcessor |
|
import torch.nn as nn |
|
import os |
|
import json |
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
model_name = "DinoVdeau-large-2024_04_03-with_data_aug_batch-size32_epochs150_freeze" |
|
checkpoint_name = "lombardata/" + model_name |
|
|
|
|
|
def create_head(num_features , number_classes ,dropout_prob=0.5 ,activation_func =nn.ReLU): |
|
features_lst = [num_features , num_features//2 , num_features//4] |
|
layers = [] |
|
for in_f ,out_f in zip(features_lst[:-1] , features_lst[1:]): |
|
layers.append(nn.Linear(in_f , out_f)) |
|
layers.append(activation_func()) |
|
layers.append(nn.BatchNorm1d(out_f)) |
|
if dropout_prob !=0 : layers.append(nn.Dropout(dropout_prob)) |
|
layers.append(nn.Linear(features_lst[-1] , number_classes)) |
|
return nn.Sequential(*layers) |
|
from transformers import Dinov2Config, Dinov2Model |
|
|
|
class NewheadDinov2ForImageClassification(Dinov2ForImageClassification): |
|
def __init__(self, config: Dinov2Config) -> None: |
|
super().__init__(config) |
|
|
|
self.num_labels = config.num_labels |
|
self.dinov2 = Dinov2Model(config) |
|
|
|
|
|
self.classifier = create_head(config.hidden_size * 2, config.num_labels) |
|
|
|
model = NewheadDinov2ForImageClassification.from_pretrained(checkpoint_name) |
|
|
|
|
|
config_path = hf_hub_download(repo_id=checkpoint_name, filename="config.json") |
|
|
|
config_file = open(config_path) |
|
|
|
config = json.load(config_file) |
|
|
|
id2label = config["id2label"] |
|
label2id = config["label2id"] |
|
image_size = config["image_size"] |
|
classes_names = list(label2id.keys()) |
|
|
|
|
|
def sigmoid(_outputs): |
|
return 1.0 / (1.0 + np.exp(-_outputs)) |
|
|
|
def predict(input_image): |
|
image_processor = AutoImageProcessor.from_pretrained(checkpoint_name) |
|
|
|
inputs = image_processor(input_image, return_tensors="pt") |
|
inputs = inputs |
|
with torch.no_grad(): |
|
model_outputs = model(**inputs) |
|
outputs = model_outputs["logits"][0] |
|
scores = sigmoid(outputs) |
|
result = {} |
|
i = 0 |
|
for score in scores: |
|
label = classes_names[i] |
|
result[label] = float(score) |
|
i += 1 |
|
result = {key: result[key] for key in result if result[key] > 0.5} |
|
return result |
|
|
|
|
|
title = "DinoVd'eau image classification" |
|
model_link = "https://huggingface.co/" + checkpoint_name |
|
description = f"This application showcases the capability of artificial intelligence-based systems to identify objects within underwater images. To utilize it, you can either upload your own image or select one of the provided examples for analysis.\nFor predictions, we use this [open-source model]({model_link})" |
|
|
|
gr.Interface( |
|
fn=predict, |
|
inputs=gr.Image(shape=(512, 512)), |
|
outputs="label", |
|
title=title, |
|
description=description, |
|
examples=["session_GOPR0106.JPG", |
|
"session_2021_08_30_Mayotte_10_image_00066.jpg", |
|
"session_2018_11_17_kite_Le_Morne_Manawa_G0065777.JPG", |
|
"session_2023_06_28_caplahoussaye_plancha_body_v1B_00_GP1_3_1327.jpeg"]).launch() |