Spaces:

CreitinGameplays
/

bloom-3b-conversational-gradio

Sleeping

App Files Files Community

CreitinGameplays commited on Apr 24

Commit

4950e4d

•

1 Parent(s): 7f42dfb

Update app.py

Browse files

Files changed (1) hide show

app.py +37 -27

app.py CHANGED Viewed

@@ -5,50 +5,60 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 # Define the BLOOM model name
 model_name = "CreitinGameplays/bloom-3b-conversational"
-# Load tokenizer and model (outside the function for efficiency)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 def generate_text(user_prompt):
-  """Generates text using the pre-loaded BLOOM model and removes the user prompt."""
   # Construct the full prompt with system introduction, user prompt, and assistant role
   prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
-  # Encode the entire prompt into tokens
-  prompt_encoded = tokenizer(prompt, return_tensors="pt").input_ids
-  # Generate text with the complete prompt and limit the maximum length to 256 tokens
-  output = model.generate(
-      input_ids=prompt_encoded,
-      max_length=256,
-      num_beams=1,
-      num_return_sequences=1,
-      do_sample=True,
-      top_k=50,
-      top_p=0.95,
-      temperature=0.2,
-      repetition_penalty=1.155
-  )
-  # Decode the generated token sequence back to text
-  generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
-  # Extract the assistant's response (assuming it starts with "<|assistant|>")
-  assistant_response = generated_text.split("<|assistant|>")[-1]
-  assistant_response = assistant_response.replace(f"{user_prompt}", "").strip()
   assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
   return assistant_response
-# Define the Gradio interface with live=True for real-time updates
 interface = gr.Interface(
   fn=generate_text,
   inputs=[
-      gr.Textbox(label="Text Prompt", value="What's an AI?"),
   ],
   outputs="text",
   description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
-  live=True
 )
 # Launch the Gradio interface

 # Define the BLOOM model name
 model_name = "CreitinGameplays/bloom-3b-conversational"
+# Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(model_name)
 def generate_text(user_prompt):
+  """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
   # Construct the full prompt with system introduction, user prompt, and assistant role
   prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
+  encoded_prompt = tokenizer(prompt, return_tensors="pt").input_ids
+  # Initialize variables for real-time generation
+  generated_text = ""
+  current_output = torch.tensor([tokenizer.encode("<|assistant|>", return_tensors="pt").input_ids[0]])
+  for char in user_prompt:
+    # Encode character and concatenate with previous output
+    encoded_char = torch.tensor([tokenizer.encode(char, return_tensors="pt").input_ids[0]])
+    input_ids = torch.cat((current_output, encoded_char), dim=-1)
+    # Generate text with the current prompt and encoded character
+    output = model.generate(
+        input_ids=input_ids,
+        max_length=256,
+        num_beams=1,
+        num_return_sequences=1,
+        do_sample=True,
+        top_k=50,
+        top_p=0.95,
+        temperature=0.2,
+        repetition_penalty=1.155
+    )
+    # Decode the generated token sequence back to text
+    decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)
+    # Extract and update generated text, removing special tokens
+    generated_text += decoded_text.split("<|assistant|>")[-1].strip()
+    current_output = input_ids
+  # Remove prompt and user input from final response
+  assistant_response = generated_text.replace(f"{user_prompt}", "").strip()
   assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
   return assistant_response
+# Define the Gradio interface with streaming enabled
 interface = gr.Interface(
   fn=generate_text,
   inputs=[
+      gr.Textbox(label="Text Prompt", value="", type="verbatim"),  # Set type to "verbatim" for character-by-character input
   ],
   outputs="text",
   description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
+  **{"allow_user_code": True},  # Enable user code execution for real-time updates
 )
 # Launch the Gradio interface