File size: 2,525 Bytes
5b60a87 5527a29 2b0f34f 9e3433c 4950e4d 9e3433c b7402af 9e3433c b793725 4950e4d b793725 4950e4d 9e3433c 4950e4d ea47071 b793725 14ed4a6 4950e4d 14ed4a6 9e3433c 2f2e7ca 4950e4d 2f2e7ca 9e3433c c2f3e10 4950e4d 14ed4a6 9e3433c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Define the BLOOM model name
model_name = "CreitinGameplays/bloom-3b-conversational"
# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
def generate_text(user_prompt):
"""Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
# Construct the full prompt with system introduction, user prompt, and assistant role
prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
encoded_prompt = tokenizer(prompt, return_tensors="pt").input_ids
# Initialize variables for real-time generation
generated_text = ""
current_output = torch.tensor([tokenizer.encode("<|assistant|>", return_tensors="pt").input_ids[0]])
for char in user_prompt:
# Encode character and concatenate with previous output
encoded_char = torch.tensor([tokenizer.encode(char, return_tensors="pt").input_ids[0]])
input_ids = torch.cat((current_output, encoded_char), dim=-1)
# Generate text with the current prompt and encoded character
output = model.generate(
input_ids=input_ids,
max_length=256,
num_beams=1,
num_return_sequences=1,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.2,
repetition_penalty=1.155
)
# Decode the generated token sequence back to text
decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)
# Extract and update generated text, removing special tokens
generated_text += decoded_text.split("<|assistant|>")[-1].strip()
current_output = input_ids
# Remove prompt and user input from final response
assistant_response = generated_text.replace(f"{user_prompt}", "").strip()
assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
return assistant_response
# Define the Gradio interface with streaming enabled
interface = gr.Interface(
fn=generate_text,
inputs=[
gr.Textbox(label="Text Prompt", value="", type="verbatim"), # Set type to "verbatim" for character-by-character input
],
outputs="text",
description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
**{"allow_user_code": True}, # Enable user code execution for real-time updates
)
# Launch the Gradio interface
interface.launch()
|