Spaces:
Running
Running
import gradio as gr | |
from transformers import T5TokenizerFast, CLIPTokenizer | |
def count_tokens(text): | |
# Load the common tokenizers | |
t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False) | |
clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") | |
# Get token counts directly using the encode method | |
t5_count = len(t5_tokenizer.encode(text)) | |
clip_count = len(clip_tokenizer.encode(text)) | |
return f"T5: {t5_count} tokens", f"CLIP: {clip_count} tokens" | |
# Create a Gradio interface | |
iface = gr.Interface( | |
fn=count_tokens, | |
inputs=[ | |
gr.Textbox(label="Text", placeholder="Enter text here...") | |
], | |
outputs=[ | |
gr.Textbox(label="T5 Tokenizer"), | |
gr.Textbox(label="CLIP Tokenizer") | |
], | |
title="Common Diffusion Model Token Counter", | |
description="Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models." | |
) | |
# Launch the app | |
iface.launch() | |