Surveillance_scene_recognition

Runtime error

File size: 3,218 Bytes

23cf698
 
 
 
 
 
 
29a378b
e4fced7
 
 
 
29a378b
ceb927d
 
 
 
e4fced7
 
 
 
 
ceb927d
23cf698
 
ceb927d
494fb65
ceb927d
5119d09
de2e2ab
23cf698
ceb927d
23cf698
520d4cb
 
 
 
 
 
 
b725b48
520d4cb
 
 
23cf698

from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image


pipes = {
    "ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"),
    "ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"),
    "ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"),
    "ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
}
inputs = [
    gr.inputs.Image(type='pil'),
    "text",
    gr.inputs.Radio(choices=[
                                "ViT/B-16",
                                "ViT/L-14", 
                                "ViT/L-14@336px", 
                                "ViT/H-14",
                            ], type="value", default="ViT/B-16", label="Model"), 
]
images="festival.jpg"

def shot(image, labels_text, model_name):
    labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
    res = pipes[model_name](images=image, 
           candidate_labels=labels,
           hypothesis_template= "一张{}的图片。")
    return {dic["label"]: dic["score"] for dic in res}

iface = gr.Interface(shot, 
            inputs, 
            "label", 
            examples=[["festival.jpg", "灯笼, 鞭炮, 对联", "ViT/B-16"], 
                      ["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16"],
                      ["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16"]],
            description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
            Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
            Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
            To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片，并输入多个分类标签，用英文逗号分隔。<br>
            You can duplicating this space and run it privately: <a style='display:inline-block' href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQCAYAAAAf8/9hAAAAAXNSR0IArs4c6QAAAP5JREFUOE+lk7FqAkEURY+ltunEgFXS2sZGIbXfEPdLlnxJyDdYB62sbbUKpLbVNhyYFzbrrA74YJlh9r079973psed0cvUD4A+4HoCjsA85X0Dfn/RBLBgBDxnQPfAEJgBY+A9gALA4tcbamSzS4xq4FOQAJgCDwV2CPKV8tZAJcAjMMkUe1vX+U+SMhfAJEHasQIWmXNN3abzDwHUrgcRGmYcgKe0bxrblHEB4E/pndMazNpSZGcsZdBlYJcEL9Afo75molJyM2FxmPgmgPqlWNLGfwZGG6UiyEvLzHYDmoPkDDiNm9JR9uboiONcBXrpY1qmgs21x1QwyZcpvxt9NS09PlsPAAAAAElFTkSuQmCC&logoWidth=14' alt='Duplicate Space'></a></p>""",
            title="Zero-shot Image Classification (中文零样本图像分类)")

iface.launch()