torVik commited on
Commit
1d4ee6a
·
verified ·
1 Parent(s): 655cd27

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -155
app.py CHANGED
@@ -16,6 +16,8 @@ HF_TOKEN = os.environ.get("HF_TOKEN")
16
  if HF_TOKEN is None:
17
  print("Warning: HF_TOKEN is not set!")
18
 
 
 
19
  DESCRIPTION = "# Mistral-7B v0.2"
20
 
21
  if not torch.cuda.is_available():
@@ -104,6 +106,12 @@ def generate(
104
  raise e # Re-raise the error after logging it
105
 
106
 
 
 
 
 
 
 
107
  chat_interface = gr.ChatInterface(
108
  fn=generate,
109
  additional_inputs=[
@@ -158,12 +166,27 @@ print("Setting up interface...")
158
 
159
  with gr.Blocks(css="style.css") as demo:
160
  gr.Markdown(DESCRIPTION)
161
- gr.DuplicateButton(
162
- value="Duplicate Space for private use",
163
- elem_id="duplicate-button",
164
- visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
165
- )
166
- chat_interface.render()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
167
 
168
  # Debugging: Starting queue and launching the demo
169
  print("Launching demo...")
@@ -173,7 +196,8 @@ if __name__ == "__main__":
173
 
174
 
175
 
176
- #!/usr/bin/env python
 
177
 
178
  # import os
179
  # from threading import Thread
@@ -184,21 +208,39 @@ if __name__ == "__main__":
184
  # import torch
185
  # from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
186
 
 
 
 
187
  # HF_TOKEN = os.environ.get("HF_TOKEN")
 
 
188
 
189
  # DESCRIPTION = "# Mistral-7B v0.2"
190
 
191
  # if not torch.cuda.is_available():
192
  # DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
 
 
 
193
 
194
  # MAX_MAX_NEW_TOKENS = 2048
195
  # DEFAULT_MAX_NEW_TOKENS = 1024
196
  # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
197
 
 
198
  # if torch.cuda.is_available():
199
  # model_id = "mistralai/Mistral-7B-Instruct-v0.2"
200
- # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
201
- # tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
 
 
 
 
 
 
 
 
 
202
 
203
 
204
  # @spaces.GPU
@@ -211,36 +253,54 @@ if __name__ == "__main__":
211
  # top_k: int = 50,
212
  # repetition_penalty: float = 1.2,
213
  # ) -> Iterator[str]:
 
 
 
214
  # conversation = []
215
  # for user, assistant in chat_history:
216
  # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
217
  # conversation.append({"role": "user", "content": message})
218
 
219
- # input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
220
- # if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
221
- # input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
222
- # gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
223
- # input_ids = input_ids.to(model.device)
224
-
225
- # streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
226
- # generate_kwargs = dict(
227
- # {"input_ids": input_ids},
228
- # streamer=streamer,
229
- # max_new_tokens=max_new_tokens,
230
- # do_sample=True,
231
- # top_p=top_p,
232
- # top_k=top_k,
233
- # temperature=temperature,
234
- # num_beams=1,
235
- # repetition_penalty=repetition_penalty,
236
- # )
237
- # t = Thread(target=model.generate, kwargs=generate_kwargs)
238
- # t.start()
239
-
240
- # outputs = []
241
- # for text in streamer:
242
- # outputs.append(text)
243
- # yield "".join(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
 
245
 
246
  # chat_interface = gr.ChatInterface(
@@ -292,6 +352,9 @@ if __name__ == "__main__":
292
  # ],
293
  # )
294
 
 
 
 
295
  # with gr.Blocks(css="style.css") as demo:
296
  # gr.Markdown(DESCRIPTION)
297
  # gr.DuplicateButton(
@@ -301,126 +364,8 @@ if __name__ == "__main__":
301
  # )
302
  # chat_interface.render()
303
 
304
- # if __name__ == "__main__":
305
- # demo.queue(max_size=20).launch(share=True)
306
-
307
- # gr.ChatInterface(
308
- # fn=generate,
309
- # additional_inputs=[
310
- # gr.Slider(
311
- # label="Max new tokens",
312
- # minimum=1,
313
- # maximum=MAX_MAX_NEW_TOKENS,
314
- # step=1,
315
- # value=DEFAULT_MAX_NEW_TOKENS,
316
- # ),
317
- # gr.Slider(
318
- # label="Temperature",
319
- # minimum=0.1,
320
- # maximum=4.0,
321
- # step=0.1,
322
- # value=0.6,
323
- # ),
324
- # gr.Slider(
325
- # label="Top-p (nucleus sampling)",
326
- # minimum=0.05,
327
- # maximum=1.0,
328
- # step=0.05,
329
- # value=0.9,
330
- # ),
331
- # gr.Slider(
332
- # label="Top-k",
333
- # minimum=1,
334
- # maximum=1000,
335
- # step=1,
336
- # value=50,
337
- # ),
338
- # gr.Slider(
339
- # label="Repetition penalty",
340
- # minimum=1.0,
341
- # maximum=2.0,
342
- # step=0.05,
343
- # value=1.2,
344
- # ),
345
- # ],
346
- # stop_btn=None,
347
- # examples=[
348
- # ["Hello there! How are you doing?"],
349
- # ["Can you explain briefly to me what is the Python programming language?"],
350
- # ["Explain the plot of Cinderella in a sentence."],
351
- # ["How many hours does it take a man to eat a Helicopter?"],
352
- # ["Write a 100-word article on 'Benefits of Open-Source in AI research'"],
353
- # ],
354
- # ).launch(share=True)
355
-
356
-
357
-
358
-
359
-
360
- # import gradio as gr
361
- # import spaces
362
- # from huggingface_hub import InferenceClient
363
- # import gradio as gr
364
-
365
-
366
-
367
- # """
368
- # For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
369
- # """
370
- # client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
371
-
372
- # @spaces.GPU()
373
- # def respond(
374
- # message,
375
- # history: list[tuple[str, str]],
376
- # system_message,
377
- # max_tokens,
378
- # temperature,
379
- # top_p,
380
- # ):
381
- # messages = [{"role": "system", "content": system_message}]
382
-
383
- # for val in history:
384
- # if val[0]:
385
- # messages.append({"role": "user", "content": val[0]})
386
- # if val[1]:
387
- # messages.append({"role": "assistant", "content": val[1]})
388
-
389
- # messages.append({"role": "user", "content": message})
390
-
391
- # response = ""
392
-
393
- # for message in client.chat_completion(
394
- # messages,
395
- # max_tokens=max_tokens,
396
- # stream=True,
397
- # temperature=temperature,
398
- # top_p=top_p,
399
- # ):
400
- # token = message.choices[0].delta.content
401
-
402
- # response += token
403
- # yield response
404
-
405
- # """
406
- # For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
407
- # """
408
- # demo = gr.ChatInterface(
409
- # respond,
410
- # additional_inputs=[
411
- # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
412
- # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
413
- # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
414
- # gr.Slider(
415
- # minimum=0.1,
416
- # maximum=1.0,
417
- # value=0.95,
418
- # step=0.05,
419
- # label="Top-p (nucleus sampling)",
420
- # ),
421
- # ],
422
- # )
423
-
424
 
425
  # if __name__ == "__main__":
426
- # demo.launch()
 
16
  if HF_TOKEN is None:
17
  print("Warning: HF_TOKEN is not set!")
18
 
19
+ PASSWORD = os.getenv("APP_PASSWORD", "mysecretpassword") # Set your desired password here or via environment variable
20
+
21
  DESCRIPTION = "# Mistral-7B v0.2"
22
 
23
  if not torch.cuda.is_available():
 
106
  raise e # Re-raise the error after logging it
107
 
108
 
109
+ def password_auth(password):
110
+ if password == PASSWORD:
111
+ return gr.update(visible=True), gr.update(visible=False)
112
+ else:
113
+ return gr.update(visible=False), gr.update(visible=True, value="Incorrect password. Try again.")
114
+
115
  chat_interface = gr.ChatInterface(
116
  fn=generate,
117
  additional_inputs=[
 
166
 
167
  with gr.Blocks(css="style.css") as demo:
168
  gr.Markdown(DESCRIPTION)
169
+
170
+ # Create login components
171
+ with gr.Row(visible=True) as login_area:
172
+ password_input = gr.Textbox(
173
+ label="Enter Password", type="password", placeholder="Password", show_label=True
174
+ )
175
+ login_btn = gr.Button("Submit")
176
+ incorrect_password_msg = gr.Markdown("Incorrect password. Try again.", visible=False)
177
+
178
+ # Main chat interface
179
+ with gr.Column(visible=False) as chat_area:
180
+ gr.Markdown(DESCRIPTION)
181
+ gr.DuplicateButton(
182
+ value="Duplicate Space for private use",
183
+ elem_id="duplicate-button",
184
+ visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
185
+ )
186
+ chat_interface.render()
187
+
188
+ # Bind login button to check password
189
+ login_btn.click(password_auth, inputs=password_input, outputs=[chat_area, incorrect_password_msg])
190
 
191
  # Debugging: Starting queue and launching the demo
192
  print("Launching demo...")
 
196
 
197
 
198
 
199
+ # WORKING
200
+ # #!/usr/bin/env python
201
 
202
  # import os
203
  # from threading import Thread
 
208
  # import torch
209
  # from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
210
 
211
+ # # Debugging: Start script
212
+ # print("Starting script...")
213
+
214
  # HF_TOKEN = os.environ.get("HF_TOKEN")
215
+ # if HF_TOKEN is None:
216
+ # print("Warning: HF_TOKEN is not set!")
217
 
218
  # DESCRIPTION = "# Mistral-7B v0.2"
219
 
220
  # if not torch.cuda.is_available():
221
  # DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
222
+ # print("Warning: No GPU available. This model cannot run on CPU.")
223
+ # else:
224
+ # print("GPU is available!")
225
 
226
  # MAX_MAX_NEW_TOKENS = 2048
227
  # DEFAULT_MAX_NEW_TOKENS = 1024
228
  # MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
229
 
230
+ # # Debugging: GPU check passed, loading model
231
  # if torch.cuda.is_available():
232
  # model_id = "mistralai/Mistral-7B-Instruct-v0.2"
233
+ # try:
234
+ # print("Loading model...")
235
+ # model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto", token=HF_TOKEN)
236
+ # print("Model loaded successfully!")
237
+
238
+ # print("Loading tokenizer...")
239
+ # tokenizer = AutoTokenizer.from_pretrained(model_id, token=HF_TOKEN)
240
+ # print("Tokenizer loaded successfully!")
241
+ # except Exception as e:
242
+ # print(f"Error loading model or tokenizer: {e}")
243
+ # raise e # Re-raise the error after logging it
244
 
245
 
246
  # @spaces.GPU
 
253
  # top_k: int = 50,
254
  # repetition_penalty: float = 1.2,
255
  # ) -> Iterator[str]:
256
+ # print(f"Received message: {message}")
257
+ # print(f"Chat history: {chat_history}")
258
+
259
  # conversation = []
260
  # for user, assistant in chat_history:
261
  # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
262
  # conversation.append({"role": "user", "content": message})
263
 
264
+ # try:
265
+ # print("Tokenizing input...")
266
+ # input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
267
+ # print(f"Input tokenized: {input_ids.shape}")
268
+
269
+ # if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
270
+ # input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
271
+ # gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
272
+ # print("Trimmed input tokens due to length.")
273
+
274
+ # input_ids = input_ids.to(model.device)
275
+ # print("Input moved to the model's device.")
276
+
277
+ # streamer = TextIteratorStreamer(tokenizer, timeout=20.0, skip_prompt=True, skip_special_tokens=True)
278
+ # generate_kwargs = dict(
279
+ # {"input_ids": input_ids},
280
+ # streamer=streamer,
281
+ # max_new_tokens=max_new_tokens,
282
+ # do_sample=True,
283
+ # top_p=top_p,
284
+ # top_k=top_k,
285
+ # temperature=temperature,
286
+ # num_beams=1,
287
+ # repetition_penalty=repetition_penalty,
288
+ # )
289
+
290
+ # print("Starting generation...")
291
+ # t = Thread(target=model.generate, kwargs=generate_kwargs)
292
+ # t.start()
293
+ # print("Thread started for model generation.")
294
+
295
+ # outputs = []
296
+ # for text in streamer:
297
+ # outputs.append(text)
298
+ # print(f"Generated text so far: {''.join(outputs)}")
299
+ # yield "".join(outputs)
300
+
301
+ # except Exception as e:
302
+ # print(f"Error during generation: {e}")
303
+ # raise e # Re-raise the error after logging it
304
 
305
 
306
  # chat_interface = gr.ChatInterface(
 
352
  # ],
353
  # )
354
 
355
+ # # Debugging: Interface setup
356
+ # print("Setting up interface...")
357
+
358
  # with gr.Blocks(css="style.css") as demo:
359
  # gr.Markdown(DESCRIPTION)
360
  # gr.DuplicateButton(
 
364
  # )
365
  # chat_interface.render()
366
 
367
+ # # Debugging: Starting queue and launching the demo
368
+ # print("Launching demo...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
 
370
  # if __name__ == "__main__":
371
+ # demo.queue(max_size=20).launch(share=True)