Shuaizhang7's picture
Update app.py
c02bdc0 verified
raw
history blame
4.46 kB
from turtle import title
import gradio as gr
from transformers import pipeline
import numpy as np
from PIL import Image
pipes = {
"ViT/B-16": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-base-patch16"),
"ViT/L-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14"),
"ViT/L-14@336px": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-large-patch14-336px"),
"ViT/H-14": pipeline("zero-shot-image-classification", model="OFA-Sys/chinese-clip-vit-huge-patch14"),
}
inputs = [
gr.inputs.Image(type='pil',
label="Image 输入图片"),
gr.inputs.Textbox(lines=1,
label="Candidate Labels 候选分类标签"),
gr.inputs.Radio(choices=[
"ViT/B-16",
"ViT/L-14",
"ViT/L-14@336px",
"ViT/H-14",
], type="value", default="ViT/B-16", label="Model 模型规模"),
gr.inputs.Textbox(lines=1,
label="Prompt Template Prompt模板 ({}指代候选标签)",
default="一张{}的图片。"),
]
images="festival.jpg"
def shot(image, labels_text, model_name, hypothesis_template):
labels = [label.strip(" ") for label in labels_text.strip(" ").split(",")]
res = pipes[model_name](images=image,
candidate_labels=labels,
hypothesis_template=hypothesis_template)
return {dic["label"]: dic["score"] for dic in res}
lei = "机动车道,非机动车道,人车混行道路,斑马线人行道,主干道路,乡间道路,内部小巷,人行横道,十字路口,丁字路口,岔路口,铁路沿线,铁路路口,高架桥,立交桥,过街天桥,桥梁,天桥上下口,地下隧道,地下人行通道,隧道通行区域,穿山隧道,隧道出入口,水池,河流,湖面,室外停车场,路面划线停车位,城市广场,裸露农田,林区,草坪,树木,公交站台,收费站,检查站,加油站,岗亭,车行道闸,人行闸机,安检机器,铁门,保安亭,门或电动门,人员出入口,车辆出入口,广告牌,横幅,沿街商铺,露天烧烤摊,超市,建筑施工,道路施工,人员卡口,车辆卡口,人行闸机,场所主出入口,安检门,X光安检机,电梯内部,扶梯,楼梯,台阶,室内通道,走廊,前台区域,公共大厅,室内停车场"
iface = gr.Interface(shot,
inputs,
"label",
examples=[["festival.jpg", lei, "ViT/B-16", "一张{}的图片。"],
["cat-dog-music.png", "音乐表演, 体育运动", "ViT/B-16", "一张{}的图片。"],
["football-match.jpg", "梅西, C罗, 马奎尔", "ViT/B-16", "一张{}的图片。"]],
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
Paper: <a href='https://arxiv.org/abs/2211.01335'>https://arxiv.org/abs/2211.01335</a> <br>
Github: <a href='https://github.com/OFA-Sys/Chinese-CLIP'>https://github.com/OFA-Sys/Chinese-CLIP</a> (Welcome to star! 🔥🔥) <br><br>
To play with this demo, add a picture and a list of labels in Chinese separated by commas. 上传图片,并输入多个分类标签,用英文逗号分隔。可点击页面最下方示例参考。<br>
You can duplicate this space and run it privately: <a href='https://huggingface.co/spaces/OFA-Sys/chinese-clip-zero-shot-image-classification?duplicate=true'><img src='https://img.shields.io/badge/-Duplicate%20Space-blue?labelColor=white&style=flat&logo=&logoWidth=14' alt='Duplicate Space'></a></p>""",
title="Zero-shot Image Classification (中文零样本图像分类)")
iface.launch()