Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
# Load the tokenizer and model from Hugging Face | |
model_name = "waterdrops0/mistral-nouns400" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16) | |
def generate_text(prompt, max_length=50, temperature=0.7, repetition_penalty=1.2): | |
# Encode the input prompt | |
inputs = tokenizer.encode(prompt, return_tensors="pt").to(model.device) | |
# Generate output based on the prompt with repetition penalty | |
outputs = model.generate( | |
inputs, | |
max_length=max_length + inputs.shape[1], # Ensuring generated text extends beyond the input prompt | |
temperature=temperature, | |
repetition_penalty=repetition_penalty, # Add repetition penalty | |
do_sample=True, | |
top_p=0.95, | |
top_k=60 | |
) | |
# Decode the generated tokens, skipping the input tokens | |
generated_tokens = outputs[0, inputs.shape[1]:] # Only get the new tokens | |
generated_text = tokenizer.decode(generated_tokens, skip_special_tokens=True) | |
return generated_text | |
# Update the Gradio interface to include repetition penalty slider | |
iface = gr.Interface( | |
fn=generate_text, | |
inputs=[ | |
gr.Textbox(lines=2, placeholder="Enter your prompt here...", label="Prompt"), | |
gr.Slider(10, 200, step=10, value=50, label="Max Length"), | |
gr.Slider(0.1, 1.0, step=0.1, value=0.7, label="Temperature"), | |
gr.Slider(1.0, 2.0, step=0.1, value=1.2, label="Repetition Penalty") # Add a slider for repetition penalty | |
], | |
outputs=gr.Textbox(label="Generated Text"), | |
title="Mistral 7B Nouns Model", | |
description="Generate text using the fine-tuned Mistral 7B model with repetition penalty." | |
) | |
if __name__ == "__main__": | |
iface.launch() | |