wop commited on
Commit
08eb742
·
verified ·
1 Parent(s): d9ab650

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -60
app.py CHANGED
@@ -7,12 +7,12 @@ client = InferenceClient(
7
 
8
 
9
  def format_prompt(message, history):
10
- prompt = "<s>"
11
- for user_prompt, bot_response in history:
12
- prompt += f"[INST] {user_prompt} [/INST]"
13
- prompt += f" {bot_response}</s> "
14
- prompt += f"[INST] {message} [/INST]"
15
- return prompt
16
 
17
  def generate(
18
  prompt, history, temperature=0.9, max_new_tokens=2000, top_p=0.9, repetition_penalty=1.2,
@@ -28,66 +28,19 @@ def generate(
28
  top_p=top_p,
29
  repetition_penalty=repetition_penalty,
30
  do_sample=True,
31
- seed=50,
32
  )
33
 
34
- print(prompt, history)
35
-
36
  formatted_prompt = format_prompt(prompt, history)
37
 
38
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
39
  output = ""
40
-
41
 
42
  for response in stream:
43
- a = response
44
- a = ""
45
- print(a, end='')
46
-
47
  output += response.token.text
48
  yield output
49
  return output
50
 
51
-
52
- additional_inputs=[
53
- gr.Slider(
54
- label="Temperature",
55
- value=0.9,
56
- minimum=0.0,
57
- maximum=1.0,
58
- step=0.05,
59
- interactive=True,
60
- info="Higher values produce more diverse outputs",
61
- ),
62
- gr.Slider(
63
- label="Max new tokens",
64
- value=256,
65
- minimum=0,
66
- maximum=1048,
67
- step=64,
68
- interactive=True,
69
- info="The maximum numbers of new tokens",
70
- ),
71
- gr.Slider(
72
- label="Top-p (nucleus sampling)",
73
- value=0.90,
74
- minimum=0.0,
75
- maximum=1,
76
- step=0.05,
77
- interactive=True,
78
- info="Higher values sample more low-probability tokens",
79
- ),
80
- gr.Slider(
81
- label="Repetition penalty",
82
- value=1.2,
83
- minimum=1.0,
84
- maximum=2.0,
85
- step=0.05,
86
- interactive=True,
87
- info="Penalize repeated tokens",
88
- )
89
- ]
90
-
91
  css = """
92
  #mkd {
93
  height: 500px;
@@ -97,13 +50,12 @@ css = """
97
  """
98
 
99
  with gr.Blocks(css=css) as demo:
100
- gr.HTML("<h1><center>Mistral 7B Instruct<h1><center>")
101
- gr.HTML("<h3><center>In this demo, you can chat with <a href='https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.1'>Mistral-7B-Instruct</a> model. 💬<h3><center>")
102
- gr.HTML("<h3><center>Learn more about the model <a href='https://huggingface.co/docs/transformers/main/model_doc/mistral'>here</a>. 📚<h3><center>")
103
  gr.ChatInterface(
104
  generate,
105
- additional_inputs=additional_inputs,
106
- examples=[["What is the secret to life?"], ["Write me a recipe for pancakes."]]
107
  )
108
 
109
- demo.launch(debug=True)
 
7
 
8
 
9
  def format_prompt(message, history):
10
+ prompt = "<s>"
11
+ for user_prompt, bot_response in history:
12
+ prompt += f"[INST] {user_prompt} [/INST]"
13
+ prompt += f" {bot_response}</s> "
14
+ prompt += f"[INST] {message} [/INST]"
15
+ return prompt
16
 
17
  def generate(
18
  prompt, history, temperature=0.9, max_new_tokens=2000, top_p=0.9, repetition_penalty=1.2,
 
28
  top_p=top_p,
29
  repetition_penalty=repetition_penalty,
30
  do_sample=True,
31
+ seed=42,
32
  )
33
 
 
 
34
  formatted_prompt = format_prompt(prompt, history)
35
 
36
  stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
37
  output = ""
 
38
 
39
  for response in stream:
 
 
 
 
40
  output += response.token.text
41
  yield output
42
  return output
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  css = """
45
  #mkd {
46
  height: 500px;
 
50
  """
51
 
52
  with gr.Blocks(css=css) as demo:
53
+ gr.HTML("<h1><center><h1><center>")
54
+ gr.HTML("<h3><center><h3><center>")
55
+ gr.HTML("<h3><center><h3><center>")
56
  gr.ChatInterface(
57
  generate,
58
+ examples=[["What is the secret to life?"], ["Write me a recipe for pancakes."], ["Write a short story about Paris."]]
 
59
  )
60
 
61
+ demo.launch(debug=True)