Spaces:

Ateeqq
/

Meta-Llama-3-8B-Instruct

Running

File size: 1,772 Bytes

2b6b154
 
 
 
d24a963
 
1b3204d
 
145ecb9
d24a963
145ecb9
 
 
 
 
 
2b6b154
 
4581f8a
145ecb9
4581f8a
145ecb9
 
 
 
 
 
 
 
 
 
 
afd19d1
145ecb9
 
4581f8a
145ecb9
 
afd19d1
145ecb9
 
 
2b6b154
09e1b8b
145ecb9
1a33cc7
1b3204d
 
4581f8a
fda0234
afd19d1
4581f8a
09e1b8b
4581f8a
 
 
 
 
09e1b8b

import gradio as gr
import spaces
import torch

import transformers
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "meta-llama/Meta-Llama-3-8B-Instruct"

pipeline = transformers.pipeline(
    "text-generation",
    model=model_name,
    model_kwargs={"torch_dtype": torch.bfloat16},
    device="cuda",
)

@spaces.GPU
def chat_function(message, history, system_prompt,max_new_tokens,temperature):
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": message},
    ]
    prompt = pipeline.tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True
    )
    terminators = [
        pipeline.tokenizer.eos_token_id,
        pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
    ]
    temp = temperature + 0.1
    outputs = pipeline(
        prompt,
        max_new_tokens=max_new_tokens,
        eos_token_id=terminators,
        do_sample=True,
        temperature=temp,
        top_p=0.9,
    )
    return outputs[0]["generated_text"][len(prompt):]

gr.ChatInterface(
    chat_function,
    chatbot=gr.Chatbot(height=400),
    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
    title="LLAMA 3 8B Chat",
    description="""
    This space is dedicated for chatting with Meta's Latest LLM - Llama 8b Instruct. Find this model here: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct 
    Feel free to play with customization in the "Additional Inputs".
    """,
    theme="soft",
    additional_inputs=[
        gr.Textbox("You are helpful AI.", label="System Prompt"),
        gr.Slider(512, 4096, label="Max New Tokens"),
        gr.Slider(0, 1, label="Temperature")
    ]
).launch()