experimental-kphi-3-nano-4k-instruct-gradio-autoloader

Sleeping

schuler commited on Nov 30, 2024

Commit

c0252bb

verified ·

1 Parent(s): 8f7074e

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -13,7 +13,7 @@ def load_model(repo_name):
     tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
     generator_conf = GenerationConfig.from_pretrained(repo_name)
     model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
-    # model.to('cuda')
     return tokenizer, generator_conf, model
 tokenizer, generator_conf, model = load_model(REPO_NAME)
@@ -61,7 +61,8 @@ def respond(
             max_new_tokens=max_tokens,
             do_sample=True,
             top_p=top_p,
-            repetition_penalty=1.2
         )
         generated_text = response_output[0]['generated_text']
@@ -101,7 +102,7 @@ demo = gr.ChatInterface(
     additional_inputs=[
         gr.Textbox(value="" + global_error, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=64, step=1, label="Max new tokens"),
-        # gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,

     tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
     generator_conf = GenerationConfig.from_pretrained(repo_name)
     model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True, torch_dtype=torch.bfloat16, attn_implementation="eager")
+    model.to('cuda')
     return tokenizer, generator_conf, model
 tokenizer, generator_conf, model = load_model(REPO_NAME)
             max_new_tokens=max_tokens,
             do_sample=True,
             top_p=top_p,
+            repetition_penalty=1.2,
+            temperature=temperature
         )
         generated_text = response_output[0]['generated_text']
     additional_inputs=[
         gr.Textbox(value="" + global_error, label="System message"),
         gr.Slider(minimum=1, maximum=2048, value=64, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Temperature"),
         gr.Slider(
             minimum=0.1,
             maximum=1.0,