SantiagoTesla commited on
Commit
79a677f
·
1 Parent(s): 954464a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -1
app.py CHANGED
@@ -1,2 +1,123 @@
1
  import gradio as gr
2
- gr.Interface.load("models/bigscience/bloom").launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision
3
+
4
+ repo_name = "rustformers/mpt-7b-ggml"
5
+ file_name = "mpt-7b-instruct-q5_1-ggjt.bin"
6
+
7
+ session_config = SessionConfig(threads=2,batch_size=2)
8
+ model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True)
9
+
10
+ def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
11
+
12
+ prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
13
+ ### Instruction:
14
+ {instruction}
15
+ ### Response:
16
+ Answer:"""
17
+ generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
18
+ response = ""
19
+ streamer = model.stream(prompt=prompt,generation_config=generation_config)
20
+ for new_text in streamer:
21
+ response += new_text
22
+ yield response
23
+
24
+
25
+ with gr.Blocks(
26
+ theme=gr.themes.Soft(),
27
+ css=".disclaimer {font-variant-caps: all-small-caps;}",
28
+ ) as demo:
29
+ with gr.Row():
30
+ with gr.Column():
31
+ with gr.Row():
32
+ instruction = gr.Textbox(
33
+ placeholder="Enter your question or instruction here",
34
+ label="Question/Instruction",
35
+ elem_id="q-input",
36
+ )
37
+ with gr.Accordion("Advanced Options:", open=False):
38
+ with gr.Row():
39
+ with gr.Column():
40
+ with gr.Row():
41
+ temperature = gr.Slider(
42
+ label="Temperature",
43
+ value=0.8,
44
+ minimum=0.1,
45
+ maximum=1.0,
46
+ step=0.1,
47
+ interactive=True,
48
+ info="Higher values produce more diverse outputs",
49
+ )
50
+ with gr.Column():
51
+ with gr.Row():
52
+ top_p = gr.Slider(
53
+ label="Top-p (nucleus sampling)",
54
+ value=0.95,
55
+ minimum=0.0,
56
+ maximum=1.0,
57
+ step=0.01,
58
+ interactive=True,
59
+ info=(
60
+ "Sample from the smallest possible set of tokens whose cumulative probability "
61
+ "exceeds top_p. Set to 1 to disable and sample from all tokens."
62
+ ),
63
+ )
64
+ with gr.Column():
65
+ with gr.Row():
66
+ top_k = gr.Slider(
67
+ label="Top-k",
68
+ value=40,
69
+ minimum=5,
70
+ maximum=80,
71
+ step=1,
72
+ interactive=True,
73
+ info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
74
+ )
75
+ with gr.Column():
76
+ with gr.Row():
77
+ max_new_tokens = gr.Slider(
78
+ label="Maximum new tokens",
79
+ value=256,
80
+ minimum=0,
81
+ maximum=1024,
82
+ step=5,
83
+ interactive=True,
84
+ info="The maximum number of new tokens to generate",
85
+ )
86
+
87
+ with gr.Column():
88
+ with gr.Row():
89
+ seed = gr.Number(
90
+ label="Seed",
91
+ value=42,
92
+ interactive=True,
93
+ info="The seed to use for the generation",
94
+ precision=0
95
+ )
96
+ with gr.Row():
97
+ submit = gr.Button("Submit")
98
+ with gr.Row():
99
+ with gr.Box():
100
+ gr.Markdown("**MPT-7B-Instruct**")
101
+ output_7b = gr.Markdown()
102
+
103
+ with gr.Row():
104
+ gr.Examples(
105
+ examples=examples,
106
+ inputs=[instruction],
107
+ cache_examples=False,
108
+ fn=process_stream,
109
+ outputs=output_7b,
110
+ )
111
+
112
+ submit.click(
113
+ process_stream,
114
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
115
+ outputs=output_7b,
116
+ )
117
+ instruction.submit(
118
+ process_stream,
119
+ inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
120
+ outputs=output_7b,
121
+ )
122
+
123
+ demo.queue(max_size=4, concurrency_count=1).launch(debug=True)