Spaces:

takarajordan
/

DiffusionTokenizer

Running

Jordan Legg

unified the approach to not rely on HF models, just input text.

b39e76c 3 months ago

992 Bytes

	import gradio as gr
	from transformers import T5TokenizerFast, CLIPTokenizer

	def count_tokens(text):
	# Load the common tokenizers
	t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False)
	clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")

	# Get token counts directly using the encode method
	t5_count = len(t5_tokenizer.encode(text))
	clip_count = len(clip_tokenizer.encode(text))

	return f"T5: {t5_count} tokens", f"CLIP: {clip_count} tokens"

	# Create a Gradio interface
	iface = gr.Interface(
	fn=count_tokens,
	inputs=[
	gr.Textbox(label="Text", placeholder="Enter text here...")
	],
	outputs=[
	gr.Textbox(label="T5 Tokenizer"),
	gr.Textbox(label="CLIP Tokenizer")
	],
	title="Common Diffusion Model Token Counter",
	description="Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models."
	)

	# Launch the app
	iface.launch()