Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

File size: 2,525 Bytes

5b60a87
5527a29
2b0f34f
9e3433c
 
 
 
4950e4d
9e3433c
b7402af
9e3433c
b793725
4950e4d
b793725
 
4950e4d
9e3433c
4950e4d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea47071
 
b793725
14ed4a6
4950e4d
14ed4a6
9e3433c
2f2e7ca
4950e4d
2f2e7ca
9e3433c
c2f3e10
4950e4d
14ed4a6
 
 
9e3433c

import gradio as gr
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Define the BLOOM model name
model_name = "CreitinGameplays/bloom-3b-conversational"

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)

def generate_text(user_prompt):
  """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
  # Construct the full prompt with system introduction, user prompt, and assistant role
  prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
  encoded_prompt = tokenizer(prompt, return_tensors="pt").input_ids

  # Initialize variables for real-time generation
  generated_text = ""
  current_output = torch.tensor([tokenizer.encode("<|assistant|>", return_tensors="pt").input_ids[0]])

  for char in user_prompt:
    # Encode character and concatenate with previous output
    encoded_char = torch.tensor([tokenizer.encode(char, return_tensors="pt").input_ids[0]])
    input_ids = torch.cat((current_output, encoded_char), dim=-1)

    # Generate text with the current prompt and encoded character
    output = model.generate(
        input_ids=input_ids,
        max_length=256,
        num_beams=1,
        num_return_sequences=1,  
        do_sample=True,  
        top_k=50,  
        top_p=0.95,  
        temperature=0.2,  
        repetition_penalty=1.155
    )

    # Decode the generated token sequence back to text
    decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)

    # Extract and update generated text, removing special tokens
    generated_text += decoded_text.split("<|assistant|>")[-1].strip()
    current_output = input_ids

  # Remove prompt and user input from final response
  assistant_response = generated_text.replace(f"{user_prompt}", "").strip()
  assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
  
  return assistant_response

# Define the Gradio interface with streaming enabled
interface = gr.Interface(
  fn=generate_text,
  inputs=[
      gr.Textbox(label="Text Prompt", value="", type="verbatim"),  # Set type to "verbatim" for character-by-character input
  ],
  outputs="text",
  description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
  **{"allow_user_code": True},  # Enable user code execution for real-time updates
)

# Launch the Gradio interface
interface.launch()