Spaces:

Ateeqq
/

Meta-Llama-3-8B-Instruct

Running

File size: 1,071 Bytes

2b6b154
 
 
 
d24a963
 
1b3204d
 
 
 
 
 
d24a963
2b6b154
 
09e1b8b
1b3204d
 
 
 
2b6b154
09e1b8b
 
 
1b3204d
 
09e1b8b

import gradio as gr
import spaces
import torch

import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Meta-Llama-3-8B"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16,device_map="auto")


@spaces.GPU
def yes_man(message, history):
    input_ids = tokenizer(message, return_tensors="pt").input_ids.to(model.device)
    output = model.generate(input_ids, max_length=512, num_return_sequences=1)
    detailed_prompt = tokenizer.decode(output[0], skip_special_tokens=True)
    return detailed_prompt

gr.ChatInterface(
    yes_man,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
    title="LLAMA 3 8B Chat",
    description="Ask Yes Man any question",
    theme="soft",
    examples=["Hello", "Am I cool?", "Are tomatoes vegetables?"],
    cache_examples=True,
    retry_btn=None,
    undo_btn="Delete Previous",
    clear_btn="Clear",
).launch()