Spaces:

DexterSptizu
/

SmolLM2-1.7B-Instruct

Running

App Files Files Community

SmolLM2-1.7B-Instruct / app.py

DexterSptizu

Update app.py

ddd1c57 verified about 13 hours ago

raw

history blame contribute delete

3.9 kB

	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Dictionary of available models
	MODELS = {
	"SmolLM2-135M-Instruct": "HuggingFaceTB/SmolLM2-135M-Instruct",
	"SmolLM2-360M-Instruct": "HuggingFaceTB/SmolLM2-360M-Instruct",
	"SmolLM2-1.7B-Instruct": "HuggingFaceTB/SmolLM2-1.7B-Instruct"
	}

	class ModelHandler:
	def __init__(self):
	self.current_model = None
	self.current_tokenizer = None
	self.device = "cpu" if torch.cuda.is_available() else "cpu"

	def load_model(self, model_name):
	try:
	checkpoint = MODELS[model_name]
	self.current_tokenizer = AutoTokenizer.from_pretrained(checkpoint)
	self.current_model = AutoModelForCausalLM.from_pretrained(
	checkpoint,
	torch_dtype=torch.bfloat16,
	device_map="auto"
	)
	return f"Successfully loaded {model_name}"
	except Exception as e:
	return f"Error loading model: {str(e)}"

	model_handler = ModelHandler()

	def generate_text(model_name, prompt, max_tokens, temperature, top_p):
	try:
	# Load model if it's different from the current one
	if model_handler.current_model is None or MODELS[model_name] != model_handler.current_model.name_or_path:
	load_status = model_handler.load_model(model_name)
	if "Error" in load_status:
	return load_status

	# Format input as chat message
	messages = [{"role": "user", "content": prompt}]
	input_text = model_handler.current_tokenizer.apply_chat_template(messages, tokenize=False)

	# Tokenize
	inputs = model_handler.current_tokenizer.encode(
	input_text,
	return_tensors="pt"
	).to(model_handler.device)

	# Generate
	outputs = model_handler.current_model.generate(
	inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True
	)

	# Decode and return
	response = model_handler.current_tokenizer.decode(
	outputs[0],
	skip_special_tokens=True
	)
	return response

	except Exception as e:
	return f"Error during generation: {str(e)}"

	# Create Gradio interface
	iface = gr.Interface(
	fn=generate_text,
	inputs=[
	gr.Dropdown(
	choices=list(MODELS.keys()),
	label="Select Model",
	value="SmolLM2-360M-Instruct"
	),
	gr.Textbox(
	label="Enter your prompt",
	placeholder="What would you like to know?",
	lines=3
	),
	gr.Slider(
	minimum=10,
	maximum=500,
	value=50,
	step=10,
	label="Maximum Tokens"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.2,
	step=0.1,
	label="Temperature"
	),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.9,
	step=0.1,
	label="Top P"
	)
	],
	outputs=gr.Textbox(label="Generated Response", lines=5),
	title="SmolLM2 Model Comparison",
	description="""
	Compare different sizes of SmolLM2 models:
	- SmolLM2-135M-Instruct: Smallest and fastest
	- SmolLM2-360M-Instruct: Balanced size and performance
	- SmolLM2-1.7B-Instruct: Largest and most capable
	""",
	examples=[
	["SmolLM2-360M-Instruct", "What is the capital of France?", 50, 0.2, 0.9],
	["SmolLM2-360M-Instruct", "Explain quantum computing in simple terms.", 200, 0.3, 0.9],
	["SmolLM2-360M-Instruct", "Write a short poem about nature.", 100, 0.7, 0.9]
	]
	)

	# Launch the application
	if __name__ == "__main__":
	iface.launch(share=True)