Spaces:

IoriU
/

anime_space_predictor

Runtime error

App Files Files Community

hk-bt-rnd commited on Mar 20, 2024

Commit

d47b6b4

1 Parent(s): 1a6c69c

Init spaces

Browse files

Files changed (3) hide show

app.py +98 -0
model/best.pt +3 -0
requirements.txt +76 -0

app.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import gradio as gr
+import numpy as np
+from PIL import Image
+from matplotlib import cm
+import torch
+from transformers import AutoTokenizer, AutoModel
+from model import ImageModel, TextModel
+import torch.nn.functional as F
+import torchvision.transforms.v2 as transforms
+# Load model directly
+MODEL_NAME = "distilbert/distilroberta-base"
+class_names = ['Action', 'Adventure', 'Comedy', 'Drama', 'Fantasy', 'Romance', 'Sci-Fi']
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+cp = torch.load(r"model\best.pt", map_location="cpu")
+model_img = ImageModel(len(class_names))
+model_img.load_state_dict(cp['w_i'])
+model_text = TextModel(MODEL_NAME, len(class_names))
+model_text.load_state_dict(cp['w_t'])
+image_transforms = transforms.Compose([
+    transforms.Resize((224, 224)),
+    transforms.ToTensor(),
+    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
+])
+def text_predictor(title, synopsis):
+    encoded_synopsis = tokenizer(f"{title} </s> {synopsis}", \
+        add_special_tokens = True, \
+        max_length = 128, \
+        padding = "max_length", \
+        truncation = True,
+        return_tensors='pt')
+    with torch.no_grad():
+        score, isAward, genres = model_text((encoded_synopsis['input_ids'], encoded_synopsis['attention_mask']))
+        score, isAward, genres = score.squeeze(0), F.sigmoid(isAward.squeeze(0)) >= 0.5 , F.sigmoid(genres.squeeze(0))
+    preds_name = []
+    for prob, cls in zip(genres, class_names):
+        if prob >= 0.5:
+            preds_name.append(cls)
+    # print(preds_name)
+    return round(score.item(), 2), isAward.item(), {"genres":preds_name}
+def img_predictor(img):
+    # Preprocess the image
+    img = Image.fromarray(img.astype('uint8'), 'RGB')  # Convert NumPy array to PIL Image
+    img = image_transforms(img).unsqueeze(0)  # Apply transforms and add batch dimension
+    # Make predictions
+    with torch.no_grad():
+        output = model_img(img)
+        score, isAward, genres = output[0].squeeze(0), F.sigmoid(output[1].squeeze(0)) >= 0.5, F.sigmoid(output[2].squeeze(0))
+    preds_name = []
+    for prob, cls in zip(genres, class_names):
+        if prob >= 0.5:
+            preds_name.append(cls)
+    return round(score.item(), 2), isAward.item(), {"genres": preds_name}
+def combine_predictor(title, synopsis, img):
+    encoded_synopsis = tokenizer(f"{title} </s> {synopsis}", \
+        add_special_tokens = True, \
+        max_length = 128, \
+        padding = "max_length", \
+        truncation = True,
+        return_tensors='pt')
+    img = Image.fromarray(img.astype('uint8'), 'RGB')  # Convert NumPy array to PIL Image
+    img = image_transforms(img).unsqueeze(0)  # Apply transforms and add batch dimension
+    # Make predictions
+    with torch.no_grad():
+        output_text = model_text((encoded_synopsis['input_ids'], encoded_synopsis['attention_mask']))
+        output_img = model_img(img)
+        score = (output_img[0].squeeze(0) + output_text[0].squeeze(0))/2
+        isAward = F.sigmoid((output_img[1].squeeze(0) + output_text[1].squeeze(0))/2) >= 0.5
+        genres = F.sigmoid((output_img[2].squeeze(0) + output_text[2].squeeze(0))/2)
+    print(score, isAward, genres)
+    preds_name = []
+    for prob, cls in zip(genres, class_names):
+        if prob >= 0.5:
+            preds_name.append(cls)
+    return round(score.item(), 2), isAward.item(), {"genres": preds_name}
+# iface_1 = gr.Interface(age_predictor_image, gr.Image(height=256, width=256), "json", examples=[["young.webp"], ["old.jpg"]])
+iface_1 = gr.Interface(text_predictor, [gr.Text(placeholder="Input title here"), gr.Text(placeholder="Input synopsis here")], ["label", "label", "json"])
+iface_2 = gr.Interface(img_predictor, gr.Image(height=224, width=224), ["label", "label", "json"])
+iface_3 = gr.Interface(combine_predictor, [gr.Text(placeholder="Input title here"), gr.Text(placeholder="Input synopsis here"), gr.Image(height=224, width=224)], ["label", "label", "json"])
+demo = gr.TabbedInterface([iface_1, iface_2, iface_3], ["From Text", "From Image", "From Text and Image"])
+demo.launch()  # Launches the mini app!

model/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4b30a7ffad7969310ec38bcd7f9ef63ce4247e86ab91838a0c61adf0bbba268
+size 696898582

requirements.txt ADDED Viewed

	@@ -0,0 +1,76 @@

+aiofiles==23.2.1
+altair==5.2.0
+annotated-types==0.6.0
+anyio==4.3.0
+attrs==23.2.0
+certifi==2024.2.2
+charset-normalizer==3.3.2
+click==8.1.7
+colorama==0.4.6
+contourpy==1.2.0
+cycler==0.12.1
+exceptiongroup==1.2.0
+fastapi==0.110.0
+ffmpy==0.3.2
+filelock==3.13.1
+fonttools==4.50.0
+fsspec==2024.3.1
+gradio==4.22.0
+gradio_client==0.13.0
+h11==0.14.0
+httpcore==1.0.4
+httpx==0.27.0
+huggingface-hub==0.21.4
+idna==3.6
+importlib_resources==6.3.2
+Jinja2==3.1.3
+jsonschema==4.21.1
+jsonschema-specifications==2023.12.1
+kiwisolver==1.4.5
+markdown-it-py==3.0.0
+MarkupSafe==2.1.5
+matplotlib==3.8.3
+mdurl==0.1.2
+mpmath==1.3.0
+networkx==3.2.1
+numpy==1.26.4
+orjson==3.9.15
+packaging==24.0
+pandas==2.2.1
+pillow==10.2.0
+pydantic==2.6.4
+pydantic_core==2.16.3
+pydub==0.25.1
+Pygments==2.17.2
+pyparsing==3.1.2
+python-dateutil==2.9.0.post0
+python-multipart==0.0.9
+pytz==2024.1
+PyYAML==6.0.1
+referencing==0.34.0
+regex==2023.12.25
+requests==2.31.0
+rich==13.7.1
+rpds-py==0.18.0
+ruff==0.3.3
+safetensors==0.4.2
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.16.0
+sniffio==1.3.1
+starlette==0.36.3
+sympy==1.12
+tokenizers==0.15.2
+tomlkit==0.12.0
+toolz==0.12.1
+torch==2.2.1
+torchaudio==2.2.1
+torchvision==0.17.1
+tqdm==4.66.2
+transformers==4.38.2
+typer==0.9.0
+typing_extensions==4.10.0
+tzdata==2024.1
+urllib3==2.2.1
+uvicorn==0.29.0
+websockets==11.0.3