CreitinGameplays commited on
Commit
4950e4d
1 Parent(s): 7f42dfb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -27
app.py CHANGED
@@ -5,50 +5,60 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
5
  # Define the BLOOM model name
6
  model_name = "CreitinGameplays/bloom-3b-conversational"
7
 
8
- # Load tokenizer and model (outside the function for efficiency)
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
12
  def generate_text(user_prompt):
13
- """Generates text using the pre-loaded BLOOM model and removes the user prompt."""
14
  # Construct the full prompt with system introduction, user prompt, and assistant role
15
  prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
 
16
 
17
- # Encode the entire prompt into tokens
18
- prompt_encoded = tokenizer(prompt, return_tensors="pt").input_ids
19
-
20
- # Generate text with the complete prompt and limit the maximum length to 256 tokens
21
- output = model.generate(
22
- input_ids=prompt_encoded,
23
- max_length=256,
24
- num_beams=1,
25
- num_return_sequences=1,
26
- do_sample=True,
27
- top_k=50,
28
- top_p=0.95,
29
- temperature=0.2,
30
- repetition_penalty=1.155
31
- )
32
-
33
- # Decode the generated token sequence back to text
34
- generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
35
-
36
- # Extract the assistant's response (assuming it starts with "<|assistant|>")
37
- assistant_response = generated_text.split("<|assistant|>")[-1]
38
- assistant_response = assistant_response.replace(f"{user_prompt}", "").strip()
 
 
 
 
 
 
 
 
 
39
  assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
40
 
41
  return assistant_response
42
 
43
- # Define the Gradio interface with live=True for real-time updates
44
  interface = gr.Interface(
45
  fn=generate_text,
46
  inputs=[
47
- gr.Textbox(label="Text Prompt", value="What's an AI?"),
48
  ],
49
  outputs="text",
50
  description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
51
- live=True
52
  )
53
 
54
  # Launch the Gradio interface
 
5
  # Define the BLOOM model name
6
  model_name = "CreitinGameplays/bloom-3b-conversational"
7
 
8
+ # Load tokenizer and model
9
  tokenizer = AutoTokenizer.from_pretrained(model_name)
10
  model = AutoModelForCausalLM.from_pretrained(model_name)
11
 
12
  def generate_text(user_prompt):
13
+ """Generates text using the BLOOM model from Hugging Face Transformers and removes the user prompt."""
14
  # Construct the full prompt with system introduction, user prompt, and assistant role
15
  prompt = f"<|system|> You are a helpful AI assistant. </s> <|prompter|> {user_prompt} </s> <|assistant|>"
16
+ encoded_prompt = tokenizer(prompt, return_tensors="pt").input_ids
17
 
18
+ # Initialize variables for real-time generation
19
+ generated_text = ""
20
+ current_output = torch.tensor([tokenizer.encode("<|assistant|>", return_tensors="pt").input_ids[0]])
21
+
22
+ for char in user_prompt:
23
+ # Encode character and concatenate with previous output
24
+ encoded_char = torch.tensor([tokenizer.encode(char, return_tensors="pt").input_ids[0]])
25
+ input_ids = torch.cat((current_output, encoded_char), dim=-1)
26
+
27
+ # Generate text with the current prompt and encoded character
28
+ output = model.generate(
29
+ input_ids=input_ids,
30
+ max_length=256,
31
+ num_beams=1,
32
+ num_return_sequences=1,
33
+ do_sample=True,
34
+ top_k=50,
35
+ top_p=0.95,
36
+ temperature=0.2,
37
+ repetition_penalty=1.155
38
+ )
39
+
40
+ # Decode the generated token sequence back to text
41
+ decoded_text = tokenizer.decode(output[0], skip_special_tokens=True)
42
+
43
+ # Extract and update generated text, removing special tokens
44
+ generated_text += decoded_text.split("<|assistant|>")[-1].strip()
45
+ current_output = input_ids
46
+
47
+ # Remove prompt and user input from final response
48
+ assistant_response = generated_text.replace(f"{user_prompt}", "").strip()
49
  assistant_response = assistant_response.replace("You are a helpful AI assistant.", "").strip()
50
 
51
  return assistant_response
52
 
53
+ # Define the Gradio interface with streaming enabled
54
  interface = gr.Interface(
55
  fn=generate_text,
56
  inputs=[
57
+ gr.Textbox(label="Text Prompt", value="", type="verbatim"), # Set type to "verbatim" for character-by-character input
58
  ],
59
  outputs="text",
60
  description="Interact with BLOOM-3b-conversational (Loaded with Hugging Face Transformers)",
61
+ **{"allow_user_code": True}, # Enable user code execution for real-time updates
62
  )
63
 
64
  # Launch the Gradio interface