Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

CreitinGameplays commited on Apr 12

Commit

b793725

•

1 Parent(s): 33f79d4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -9,14 +9,17 @@ model_name = "CreitinGameplays/bloom-3b-conversational"
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
-def generate_text(prompt):
-  """Generates text using the BLOOM model from Hugging Face Transformers."""
-  # Encode the prompt into tokens
-  input_ids = tokenizer(prompt, return_tensors="pt").input_ids
-  # Generate text with the prompt and limit the maximum length to 256 tokens
   output = model.generate(
-      input_ids=input_ids,
       max_length=256,
       num_beams=1,
       num_return_sequences=1,  # Generate only 1 sequence
@@ -29,7 +32,11 @@ def generate_text(prompt):
   # Decode the generated token sequence back to text
   generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-  return generated_text
 # Define the Gradio interface
 interface = gr.Interface(

 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
+def generate_text(user_prompt):
+  """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
+  # Construct the full prompt with system introduction, user prompt, and assistant role
+  prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
+  # Encode the entire prompt into tokens
+  prompt_encoded = tokenizer(prompt, return_tensors="pt").input_ids
+  # Generate text with the complete prompt and limit the maximum length to 256 tokens
   output = model.generate(
+      input_ids=prompt_encoded,
       max_length=256,
       num_beams=1,
       num_return_sequences=1,  # Generate only 1 sequence
   # Decode the generated token sequence back to text
   generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
+  # Extract the assistant's response (assuming it starts with "<|assistant|>")
+  assistant_response = generated_text.split("<|assistant|>")[-1]
+  return assistant_response
 # Define the Gradio interface
 interface = gr.Interface(