File size: 4,393 Bytes
23cf698
 
 
 
 
 
 
29a378b
de0bb86
e4fced7
 
 
 
29a378b
ceb927d
33640e7
 
 
 
de0bb86
 
e4fced7
 
 
 
33640e7
 
 
 
ceb927d
23cf698
 
33640e7
494fb65
ceb927d
5119d09
33640e7
23cf698
ceb927d
c02bdc0
 
23cf698
520d4cb
 
b12da12
520d4cb
 
b725b48
33640e7
83dac04
520d4cb
23cf698
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image


pipes = {
    "openAI-ViT/B-16": pipeline("zero-shot-image-classification", model="openai/clip-vit-base-patch16"),
    "ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"),
    "ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"),
    "ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"),
    "ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
}
inputs = [
    gr.inputs.Image(type='pil', 
                    label="Image 输入图片"),
    gr.inputs.Textbox(lines=1, 
                      label="Candidate Labels 候选分类标签"),
    gr.inputs.Radio(choices=[   
                                "openAI-ViT/B-16"
                                "ViT/B-16",
                                "ViT/L-14", 
                                "ViT/L-14@336px", 
                                "ViT/H-14",
                            ], type="value", default="ViT/B-16", label="Model 模型规模"), 
    gr.inputs.Textbox(lines=1, 
                      label="Prompt Template Prompt模板 ({}指代候选标签)", 
                      default="一张{}的图片。"),
]
images="festival.jpg"

def shot(image, labels_text, model_name, hypothesis_template):
    labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
    res = pipes[model_name](images=image, 
           candidate_labels=labels,
           hypothesis_template=hypothesis_template)
    return {dic["label"]: dic["score"] for dic in res}

lei = "机动车道,非机动车道,人车混行道路,斑马线人行道,主干道路,乡间道路,内部小巷,人行横道,十字路口,丁字路口,岔路口,铁路沿线,铁路路口,高架桥,立交桥,过街天桥,桥梁,天桥上下口,地下隧道,地下人行通道,隧道通行区域,穿山隧道,隧道出入口,水池,河流,湖面,室外停车场,路面划线停车位,城市广场,裸露农田,林区,草坪,树木,公交站台,收费站,检查站,加油站,岗亭,车行道闸,人行闸机,安检机器,铁门,保安亭,门或电动门,人员出入口,车辆出入口,广告牌,横幅,沿街商铺,露天烧烤摊,超市,建筑施工,道路施工,人员卡口,车辆卡口,人行闸机,场所主出入口,安检门,X光安检机,电梯内部,扶梯,楼梯,台阶,室内通道,走廊,前台区域,公共大厅,室内停车场"

iface = gr.Interface(shot, 
            inputs, 
            "label", 
            examples=[["street.jpg", lei, "ViT/B-16", "一张{}的图片。"]],
            description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
            Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
            Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
            To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片,并输入多个分类标签,用英文逗号分隔。可点击页面最下方示例参考。<br>
            You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14' alt='Duplicate Space'></a></p>""",
            title="Zero-shot Image Classification (中文零样本图像分类)")

iface.launch()