Spaces:

TejAndrewsACC
/

Pulse_4_Free_ACC

Build error

App Files Files Community

TejAndrewsACC commited on Dec 10, 2024

Commit

b39d091

verified ·

1 Parent(s): d40f491

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -19

app.py CHANGED Viewed

@@ -1,33 +1,58 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
 import torch
 # Load the tokenizer and model
 repo_name = "nvidia/Hymba-1.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
-model = model.cuda().to(torch.bfloat16)
-# Chat with Hymba
-prompt = input()
 messages = [
     {"role": "system", "content": "You are a helpful assistant."}
 ]
-messages.append({"role": "user", "content": prompt})
-# Apply chat template
-tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
-stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings="</s>")])
-outputs = model.generate(
-    tokenized_chat,
-    max_new_tokens=256,
-    do_sample=False,
-    temperature=0.7,
-    use_cache=True,
-    stopping_criteria=stopping_criteria
 )
-input_length = tokenized_chat.shape[1]
-response = tokenizer.decode(outputs[0][input_length:], skip_special_tokens=True)
-print(f"Model response: {response}")

+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer, StopStringCriteria, StoppingCriteriaList
 import torch
 # Load the tokenizer and model
 repo_name = "nvidia/Hymba-1.5B-Instruct"
 tokenizer = AutoTokenizer.from_pretrained(repo_name, trust_remote_code=True)
 model = AutoModelForCausalLM.from_pretrained(repo_name, trust_remote_code=True)
+# Move the model to GPU with float16 precision for efficiency
+model = model.to("cuda").to(torch.float16)
+# Initialize the conversation history
 messages = [
     {"role": "system", "content": "You are a helpful assistant."}
 ]
+# Define stopping criteria
+stopping_criteria = StoppingCriteriaList([StopStringCriteria(tokenizer=tokenizer, stop_strings=["</s>"])])
+# Chat function for Gradio interface
+def chat_function(user_input):
+    # Add user message to the conversation history
+    messages.append({"role": "user", "content": user_input})
+    # Tokenize the conversation
+    tokenized_chat = tokenizer(messages, padding=True, truncation=True, return_tensors="pt").to("cuda")
+    # Generate a response
+    outputs = model.generate(
+        tokenized_chat["input_ids"],
+        max_new_tokens=256,
+        do_sample=False,
+        temperature=0.7,
+        use_cache=True,
+        stopping_criteria=stopping_criteria
+    )
+    # Decode the output response
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Add the assistant's response to the conversation history
+    messages.append({"role": "assistant", "content": response})
+    return response
+# Set up Gradio interface with the chatbot template
+iface = gr.Interface(
+    fn=chat_function,
+    inputs=gr.inputs.Textbox(label="Your message", placeholder="Enter your message here..."),
+    outputs=gr.outputs.Chatbot(),
+    live=True,
+    title="Hymba Chatbot",
+    description="Chat with the Hymba-1.5B-Instruct model!"
 )
+# Launch the Gradio interface
+iface.launch()