salomonsky commited on
Commit
f600f75
verified
1 Parent(s): 02f697f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -22
app.py CHANGED
@@ -1,29 +1,29 @@
1
- import concurrent.futures
2
- import gradio as gr
3
- from dogpile.cache import make_region
4
  from huggingface_hub import InferenceClient
 
5
 
6
- cache = make_region().configure('dogpile.cache.memory', thread_local=True)
7
-
8
- system_prompt = ""
9
  system_prompt_sent = False
10
 
11
  def format_prompt(message, history):
12
  global system_prompt_sent
13
- prompt = "[INST] "
14
 
15
- if not any(f"[INST] {system_prompt} " in user_prompt for user_prompt, _ in history):
16
- prompt += f"{system_prompt} "
17
  system_prompt_sent = True
18
 
19
  for user_prompt, bot_response in history:
20
- prompt += f"[INST] {user_prompt} [{bot_response}]"
21
- prompt += " "
22
 
23
- prompt += f"[INST] {message} "
24
  return prompt
25
 
26
- def generate(prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0):
 
 
 
27
  temperature = float(temperature)
28
  if temperature < 1e-2:
29
  temperature = 1e-2
@@ -39,11 +39,7 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95,
39
  )
40
 
41
  formatted_prompt = format_prompt(prompt, history)
42
- cache_key = f"generate:{formatted_prompt}:{temperature}:{max_new_tokens}:{top_p}:{repetition_penalty}"
43
- cached_response = cache.get(cache_key)
44
- if cached_response is not None:
45
- return cached_response
46
-
47
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
48
  output = ""
49
 
@@ -51,12 +47,8 @@ def generate(prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95,
51
  output += response.token.text
52
  yield output
53
 
54
- cache.set(cache_key, output)
55
-
56
  return output
57
 
58
- client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
59
-
60
  chat_interface = gr.ChatInterface(
61
  fn=generate,
62
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, likeable=False, layout="vertical", height=900),
 
 
 
 
1
  from huggingface_hub import InferenceClient
2
+ import gradio as gr
3
 
4
+ client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
5
+ system_prompt = "Deber谩s proporcinar c贸digo limpio, resumido, investigando cada l铆nea en internet en foros, revisa stackoverflow.com para consultas, elimina comentarios siempre, concatena el funcionamiento de los bloques y bibliotecas, y esquematiza el funcionamiento global del c贸digo. Preve茅 posibles errores y complementa al final con una tabla explicando el funcionamiento, propon alternativas de bibliotecas para solucionar errores, siempre consulta internet para posibles resoluciones."
 
6
  system_prompt_sent = False
7
 
8
  def format_prompt(message, history):
9
  global system_prompt_sent
10
+ prompt = "<s>"
11
 
12
+ if not any(f"[INST] {system_prompt} [/INST]" in user_prompt for user_prompt, _ in history):
13
+ prompt += f"[INST] {system_prompt} [/INST]"
14
  system_prompt_sent = True
15
 
16
  for user_prompt, bot_response in history:
17
+ prompt += f"[INST] {user_prompt} [/INST]"
18
+ prompt += f" {bot_response}</s> "
19
 
20
+ prompt += f"[INST] {message} [/INST]"
21
  return prompt
22
 
23
+ def generate(
24
+ prompt, history, temperature=0.9, max_new_tokens=4096, top_p=0.95, repetition_penalty=1.0,
25
+ ):
26
+ global system_prompt_sent
27
  temperature = float(temperature)
28
  if temperature < 1e-2:
29
  temperature = 1e-2
 
39
  )
40
 
41
  formatted_prompt = format_prompt(prompt, history)
42
+
 
 
 
 
43
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=True)
44
  output = ""
45
 
 
47
  output += response.token.text
48
  yield output
49
 
 
 
50
  return output
51
 
 
 
52
  chat_interface = gr.ChatInterface(
53
  fn=generate,
54
  chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=False, likeable=False, layout="vertical", height=900),