Spaces:

Azure99
/

Blossom-9B-Demo

Running on Zero

App Files Files Community

Azure99 commited on Jul 24

Commit

86adf77

•

1 Parent(s): 99add8e

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -9

app.py CHANGED Viewed

@@ -6,18 +6,12 @@ import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_INPUT_LIMIT = 3584
 MODEL_NAME = "Azure99/blossom-v5.1-9b"
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-GENERATE_CONFIG = dict(
-    max_new_tokens=1536,
-    temperature=0.5,
-    top_p=0.85,
-    top_k=50,
-    repetition_penalty=1.05
-)
 def get_input_ids(inst, history):
     prefix = ("A chat between a human and an artificial intelligence bot. "
@@ -38,15 +32,22 @@ def get_input_ids(inst, history):
 @spaces.GPU
-def chat(inst, history):
     with torch.no_grad():
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         input_ids = get_input_ids(inst, history)
         if len(input_ids) > MAX_INPUT_LIMIT:
             yield "The input is too long, please clear the history."
             return
         generation_kwargs = dict(input_ids=torch.tensor([input_ids]).to(model.device), do_sample=True,
-                                 streamer=streamer, **GENERATE_CONFIG)
         Thread(target=model.generate, kwargs=generation_kwargs).start()
         outputs = ""
@@ -55,6 +56,36 @@ def chat(inst, history):
             yield outputs
 gr.ChatInterface(chat,
                  chatbot=gr.Chatbot(show_label=False, height=500, show_copy_button=True, render_markdown=True),
                  textbox=gr.Textbox(placeholder="", container=False, scale=7),
@@ -63,6 +94,8 @@ gr.ChatInterface(chat,
                              '<a href="https://github.com/Azure99/BlossomLM">GitHub</a>',
                  theme="soft",
                  examples=["Hello", "What is MBTI", "用Python实现二分查找", "为switch写一篇小红书种草文案，带上emoji"],
                  clear_btn="🗑️Clear",
                  undo_btn="↩️Undo",
                  retry_btn="🔄Retry",

 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 MAX_INPUT_LIMIT = 3584
+MAX_NEW_TOKENS = 1536
 MODEL_NAME = "Azure99/blossom-v5.1-9b"
 model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
 def get_input_ids(inst, history):
     prefix = ("A chat between a human and an artificial intelligence bot. "
 @spaces.GPU
+def chat(inst, history, temperature, top_p, repetition_penalty):
     with torch.no_grad():
         streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
         input_ids = get_input_ids(inst, history)
         if len(input_ids) > MAX_INPUT_LIMIT:
             yield "The input is too long, please clear the history."
             return
+        generate_config = dict(
+            max_new_tokens=MAX_NEW_TOKENS,
+            temperature=temperature,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty
+        )
+        print(generate_config)
         generation_kwargs = dict(input_ids=torch.tensor([input_ids]).to(model.device), do_sample=True,
+                                 streamer=streamer, **generate_config)
         Thread(target=model.generate, kwargs=generation_kwargs).start()
         outputs = ""
             yield outputs
+additional_inputs = [
+    gr.Slider(
+        label="Temperature",
+        value=0.5,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Controls randomness in choosing words.",
+    ),
+    gr.Slider(
+        label="Top-P",
+        value=0.85,
+        minimum=0.0,
+        maximum=1.0,
+        step=0.05,
+        interactive=True,
+        info="Picks words until their combined probability is at least top_p.",
+    ),
+    gr.Slider(
+        label="Repetition penalty",
+        value=1.05,
+        minimum=1.0,
+        maximum=1.2,
+        step=0.01,
+        interactive=True,
+        info="Repetition Penalty: Controls how much repetition is penalized.",
+    )
+]
 gr.ChatInterface(chat,
                  chatbot=gr.Chatbot(show_label=False, height=500, show_copy_button=True, render_markdown=True),
                  textbox=gr.Textbox(placeholder="", container=False, scale=7),
                              '<a href="https://github.com/Azure99/BlossomLM">GitHub</a>',
                  theme="soft",
                  examples=["Hello", "What is MBTI", "用Python实现二分查找", "为switch写一篇小红书种草文案，带上emoji"],
+                 additional_inputs=additional_inputs,
+                 additional_inputs_accordion=gr.Accordion(label="Config", open=True),
                  clear_btn="🗑️Clear",
                  undo_btn="↩️Undo",
                  retry_btn="🔄Retry",