import gradio as gr
from huggingface_hub import hf_hub_download
from fastai.learner import load_learner
import os

print(gr.__version__)

TOKEN = os.environ["token"]
REPO_ID = "42digital/deepfashion_classification_vit-large-patch14-clip-336"
FILENAME = "model.pkl"
EXAMPLES = ["dress.jpg", "hoodie.jpg", "joggers.jpg", "jumpsuit.jpg", "shorts.jpg", "tee.jpg"]

learner = load_learner(
    hf_hub_download(repo_id=REPO_ID, filename=FILENAME, token=TOKEN)
)

def predict(img):
    _, _, probs = learner.predict(img)
    probs = [float(p) for p in probs.detach()]
    preds = {k: v for k, v in zip(learner.dls.vocab, probs)}
    return preds

gr.Interface(fn=predict, 
             inputs=gr.Image(type="numpy"),
             outputs=gr.Label(num_top_classes=5),
             examples=EXAMPLES,
             cache_examples=False,
             title="Fashion Classification",
             description="Recognize clothes in an image. [ViT-L/14](https://arxiv.org/abs/2010.11929) trained on 46 clothing categories from [DeepFashion](https://openaccess.thecvf.com/content_cvpr_2016/html/Liu_DeepFashion_Powering_Robust_CVPR_2016_paper.html) @ 76% Top-1 Accuracy and 92% [Top-3 Accuracy](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.top_k_accuracy_score.html) (many images show more than one clothing item).",
             analytics_enabled=False,
            ).launch()