Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -11,7 +11,6 @@ description = "You can use [🐋🐳microsoft/Orca-2-13b](https://huggingface.co
|
|
11 |
|
12 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
13 |
model_name = "microsoft/Orca-2-13b"
|
14 |
-
|
15 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
16 |
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
17 |
|
@@ -49,7 +48,7 @@ class OrcaChatBot:
|
|
49 |
temperature=temperature,
|
50 |
top_p=top_p,
|
51 |
repetition_penalty=repetition_penalty,
|
52 |
-
|
53 |
do_sample=True
|
54 |
)
|
55 |
|
@@ -70,7 +69,7 @@ iface = gr.Interface(
|
|
70 |
inputs=[
|
71 |
gr.Textbox(label="Your Message", type="text", lines=3),
|
72 |
gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", type="text", lines=2),
|
73 |
-
gr.Slider(label="Max new tokens", value=
|
74 |
gr.Slider(label="Temperature", value=0.1, minimum=0.05, maximum=1.0, step=0.05),
|
75 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
|
76 |
gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
|
@@ -79,4 +78,4 @@ iface = gr.Interface(
|
|
79 |
theme="ParityError/Anime"
|
80 |
)
|
81 |
|
82 |
-
iface.launch()
|
|
|
11 |
|
12 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
13 |
model_name = "microsoft/Orca-2-13b"
|
|
|
14 |
tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
|
15 |
model = transformers.AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
16 |
|
|
|
48 |
temperature=temperature,
|
49 |
top_p=top_p,
|
50 |
repetition_penalty=repetition_penalty,
|
51 |
+
# pad_token_id=self.tokenizer.eos_token_id,
|
52 |
do_sample=True
|
53 |
)
|
54 |
|
|
|
69 |
inputs=[
|
70 |
gr.Textbox(label="Your Message", type="text", lines=3),
|
71 |
gr.Textbox(label="Introduce a Character Here or Set a Scene (system prompt)", type="text", lines=2),
|
72 |
+
gr.Slider(label="Max new tokens", value=550, minimum=360, maximum=600, step=1),
|
73 |
gr.Slider(label="Temperature", value=0.1, minimum=0.05, maximum=1.0, step=0.05),
|
74 |
gr.Slider(label="Top-p (nucleus sampling)", value=0.90, minimum=0.01, maximum=0.99, step=0.05),
|
75 |
gr.Slider(label="Repetition penalty", value=1.9, minimum=1.0, maximum=2.0, step=0.05)
|
|
|
78 |
theme="ParityError/Anime"
|
79 |
)
|
80 |
|
81 |
+
demo.queue(max_size=5).iface.launch()
|