Spaces:

zhiqiulin
/

VQAScore

Running on Zero

File size: 1,224 Bytes

import gradio as gr
import spaces

import torch
torch.jit.script = lambda f: f

from t2v_metrics import VQAScore, list_all_vqascore_models

print(list_all_vqascore_models())

# Initialize the model only once
model_pipe = None

@spaces.GPU
def initialize_model(model_name):
    global model_pipe
    if model_pipe is None:
        model_pipe = VQAScore(model=model_name)  # our recommended scoring model
        print("Model initialized!")
    return model_pipe

@spaces.GPU
def generate(model_name, image, text):
    print("Model_name:", model_name)
    print("Image:", image)
    print("Text:", text)
    model_pipe = initialize_model(model_name)
    return model_pipe(images=[image], texts=[text])

iface = gr.Interface(
    fn=generate,  # function to call
    inputs=[gr.Dropdown(["clip-flant5-xl", "clip-flant5-xxl"], label="Model Name"), gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
    # inputs=[gr.Image(type="filepath"), gr.Textbox(label="Prompt")],  # define the types of inputs
    outputs="number",  # define the type of output
    title="VQAScore",  # title of the app
    description="This model evaluates the similarity between an image and a text prompt."
).launch()