Spaces:

alfredplpl
/

llm-jp-instruct-v2

Paused

alfredplpl commited on Apr 30, 2024

Commit

26f08ee

verified ·

1 Parent(s): f490dc4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -49,7 +49,7 @@ model = AutoModelForCausalLM.from_pretrained("llm-jp/llm-jp-13b-instruct-full-ac
 #model=model.eval()
 @spaces.GPU()
-def chat_llama3_8b(message: str,
               history: list,
               temperature: float,
               max_new_tokens: int
@@ -70,10 +70,6 @@ def chat_llama3_8b(message: str,
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
-    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
-    if temperature == 0:
-        generate_kwargs['do_sample'] = False
     tokenized_input = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=True, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(
@@ -95,12 +91,12 @@ with gr.Blocks(fill_height=True, css=css) as demo:
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     gr.ChatInterface(
-        fn=chat_llama3_8b,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
-            gr.Slider(minimum=0,
                       maximum=1,
                       step=0.1,
                       value=0.7,

 #model=model.eval()
 @spaces.GPU()
+def chat_llm_jp_v2(message: str,
               history: list,
               temperature: float,
               max_new_tokens: int
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
     tokenized_input = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, tokenize=True, return_tensors="pt").to(model.device)
     with torch.no_grad():
         output = model.generate(
     gr.Markdown(DESCRIPTION)
     gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     gr.ChatInterface(
+        fn=chat_llm_jp_v2,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
         additional_inputs=[
+            gr.Slider(minimum=0.1,
                       maximum=1,
                       step=0.1,
                       value=0.7,