BryanBradfo commited on
Commit
0574f0a
·
1 Parent(s): 1960e32

print of response

Browse files
Files changed (1) hide show
  1. app.py +75 -51
app.py CHANGED
@@ -23,6 +23,43 @@ This app demonstrates the text generation capabilities of Google's Gemma 2-2B-IT
23
  Enter a prompt below and see the model generate text in real-time!
24
  """)
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # Check for Hugging Face Token
27
  huggingface_token = os.getenv("HF_TOKEN")
28
  if not huggingface_token:
@@ -99,39 +136,12 @@ user_input = st.text_area("Enter your prompt:",
99
  height=100,
100
  placeholder="e.g., Write a short story about a robot discovering emotions")
101
 
102
- # Function to load model and generate text
103
- @st.cache_resource(show_spinner=False)
104
- def load_model():
105
- try:
106
- # Get API Token
107
- huggingface_token = os.getenv("HF_TOKEN")
108
- if not huggingface_token:
109
- raise ValueError("No Hugging Face API token found. Please add your token as a secret named 'HF_TOKEN'.")
110
-
111
- # Attempt to download model with explicit token
112
- tokenizer = AutoTokenizer.from_pretrained(
113
- "google/gemma-2-2b-it",
114
- token=huggingface_token,
115
- use_fast=True
116
- )
117
-
118
- model = AutoModelForCausalLM.from_pretrained(
119
- "google/gemma-2-2b-it",
120
- token=huggingface_token,
121
- torch_dtype=torch.float16,
122
- device_map="auto"
123
- )
124
- return tokenizer, model
125
- except Exception as e:
126
- # Re-raise the exception to be handled in the calling function
127
- raise e
128
-
129
  def generate_text(prompt, max_new_tokens=300, temperature=0.7):
 
 
 
 
130
  try:
131
- with st.spinner("Loading model... (this may take a minute on first run)"):
132
- tokenizer, model = load_model()
133
-
134
- # Simpler approach: use the model's built-in text generation capabilities
135
  # Format the prompt according to Gemma's expected format
136
  formatted_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
137
 
@@ -141,40 +151,40 @@ def generate_text(prompt, max_new_tokens=300, temperature=0.7):
141
  output_area = st.empty()
142
  status_text.text("Generating response...")
143
 
144
- # Tokenize the input with attention mask explicitly set
145
- encoding = tokenizer(formatted_prompt, return_tensors="pt")
146
- input_ids = encoding["input_ids"].to(model.device)
147
 
148
- # Create an attention mask of ones (attend to all tokens)
149
  attention_mask = torch.ones_like(input_ids)
150
 
151
- # Generate the full text at once (simpler and more reliable)
152
- generated_ids = model.generate(
153
  input_ids=input_ids,
154
  attention_mask=attention_mask,
155
  max_new_tokens=max_new_tokens,
156
  do_sample=True,
157
  temperature=temperature,
158
- pad_token_id=tokenizer.eos_token_id,
159
  )
160
 
161
- # Get only the newly generated tokens (exclude input prompt)
162
- generated_text = tokenizer.decode(generated_ids[0][input_ids.shape[1]:], skip_special_tokens=True)
163
 
164
- # Simulate token-by-token generation for visual effect
 
 
 
 
 
165
  words = generated_text.split()
166
- displayed_text = ""
167
 
168
  for i, word in enumerate(words):
169
- displayed_text += word + " "
170
-
171
- # Update progress and display
172
- progress = min(1.0, (i + 1) / len(words))
173
  progress_bar.progress(progress)
174
- output_area.markdown(f"**Generated Response:**\n\n{displayed_text}")
175
-
176
- # Small delay for visual effect
177
- time.sleep(0.05)
178
 
179
  status_text.text("Generation complete!")
180
  progress_bar.progress(1.0)
@@ -182,7 +192,8 @@ def generate_text(prompt, max_new_tokens=300, temperature=0.7):
182
  return generated_text
183
 
184
  except Exception as e:
185
- st.session_state.error_message = str(e)
 
186
  return None
187
 
188
  # Show any existing error
@@ -210,6 +221,17 @@ if st.session_state.error_message:
210
  4. Add your token to the Space: Settings → Secrets → New Secret (HF_TOKEN)
211
  """)
212
 
 
 
 
 
 
 
 
 
 
 
 
213
  # Generate button
214
  if st.button("Generate Text"):
215
  # Reset any previous errors
@@ -219,7 +241,9 @@ if st.button("Generate Text"):
219
  st.error("Hugging Face token is required! Please add your token as described above.")
220
  elif user_input:
221
  st.session_state.user_prompt = user_input
 
222
  result = generate_text(user_input, max_length, temperature)
 
223
  if result is not None: # Only set if no error occurred
224
  st.session_state.generated_text = result
225
  st.session_state.generation_complete = True
 
23
  Enter a prompt below and see the model generate text in real-time!
24
  """)
25
 
26
+ # Function to load model
27
+ @st.cache_resource(show_spinner=False)
28
+ def load_model():
29
+ try:
30
+ # Get API Token
31
+ huggingface_token = os.getenv("HF_TOKEN")
32
+ if not huggingface_token:
33
+ return None, None, "No Hugging Face API token found. Please add your token as a secret named 'HF_TOKEN'."
34
+
35
+ # Attempt to download model with explicit token
36
+ tokenizer = AutoTokenizer.from_pretrained(
37
+ "google/gemma-2-2b-it",
38
+ token=huggingface_token
39
+ )
40
+
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ "google/gemma-2-2b-it",
43
+ token=huggingface_token,
44
+ torch_dtype=torch.float16,
45
+ device_map="auto"
46
+ )
47
+ return tokenizer, model, None
48
+ except Exception as e:
49
+ return None, None, str(e)
50
+
51
+ # Try to load the model at startup
52
+ with st.spinner("Initializing the Gemma model... this may take a minute."):
53
+ tokenizer, model, load_error = load_model()
54
+
55
+ if load_error:
56
+ st.error(f"Error loading model: {load_error}")
57
+ else:
58
+ if tokenizer and model:
59
+ st.success("✅ Gemma model loaded successfully! Ready to generate text.")
60
+ else:
61
+ st.warning("⚠️ Model not loaded. Please check your Hugging Face token.")
62
+
63
  # Check for Hugging Face Token
64
  huggingface_token = os.getenv("HF_TOKEN")
65
  if not huggingface_token:
 
136
  height=100,
137
  placeholder="e.g., Write a short story about a robot discovering emotions")
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  def generate_text(prompt, max_new_tokens=300, temperature=0.7):
140
+ if not tokenizer or not model:
141
+ st.session_state.error_message = "Model not properly loaded. Please check your Hugging Face token."
142
+ return None
143
+
144
  try:
 
 
 
 
145
  # Format the prompt according to Gemma's expected format
146
  formatted_prompt = f"<bos><start_of_turn>user\n{prompt}<end_of_turn>\n<start_of_turn>model\n"
147
 
 
151
  output_area = st.empty()
152
  status_text.text("Generating response...")
153
 
154
+ # Tokenize the input
155
+ encoding = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
156
+ input_ids = encoding["input_ids"]
157
 
158
+ # Ensure we have a proper attention mask
159
  attention_mask = torch.ones_like(input_ids)
160
 
161
+ # Simple approach - generate all at once
162
+ output_ids = model.generate(
163
  input_ids=input_ids,
164
  attention_mask=attention_mask,
165
  max_new_tokens=max_new_tokens,
166
  do_sample=True,
167
  temperature=temperature,
168
+ pad_token_id=tokenizer.eos_token_id
169
  )
170
 
171
+ st.write("Generation completed, processing output...")
 
172
 
173
+ # Get only the generated part (exclude the prompt)
174
+ new_tokens = output_ids[0][input_ids.shape[1]:]
175
+ generated_text = tokenizer.decode(new_tokens, skip_special_tokens=True)
176
+
177
+ # Display incrementally for visual effect
178
+ display_text = ""
179
  words = generated_text.split()
180
+ total_words = len(words)
181
 
182
  for i, word in enumerate(words):
183
+ display_text += word + " "
184
+ progress = min(1.0, (i + 1) / total_words)
 
 
185
  progress_bar.progress(progress)
186
+ output_area.markdown(f"**Generated Response:**\n\n{display_text}")
187
+ time.sleep(0.05) # Brief delay for visual effect
 
 
188
 
189
  status_text.text("Generation complete!")
190
  progress_bar.progress(1.0)
 
192
  return generated_text
193
 
194
  except Exception as e:
195
+ st.session_state.error_message = f"Error during generation: {str(e)}"
196
+ st.error(f"Error during generation: {str(e)}")
197
  return None
198
 
199
  # Show any existing error
 
221
  4. Add your token to the Space: Settings → Secrets → New Secret (HF_TOKEN)
222
  """)
223
 
224
+ # Add a debug section
225
+ with st.expander("Debug Information"):
226
+ st.write(f"Model loaded: {model is not None}")
227
+ st.write(f"Tokenizer loaded: {tokenizer is not None}")
228
+ st.write(f"Device mapping: {model.device_map if model else 'N/A'}")
229
+ st.write(f"Hugging Face token set: {huggingface_token is not None}")
230
+ if torch.cuda.is_available():
231
+ st.write(f"CUDA available: True (Device count: {torch.cuda.device_count()})")
232
+ else:
233
+ st.write("CUDA available: False")
234
+
235
  # Generate button
236
  if st.button("Generate Text"):
237
  # Reset any previous errors
 
241
  st.error("Hugging Face token is required! Please add your token as described above.")
242
  elif user_input:
243
  st.session_state.user_prompt = user_input
244
+ st.write("Starting text generation...")
245
  result = generate_text(user_input, max_length, temperature)
246
+ st.write(f"Generation result: {'Success' if result else 'Failed'}")
247
  if result is not None: # Only set if no error occurred
248
  st.session_state.generated_text = result
249
  st.session_state.generation_complete = True