Spaces:

Moreza009
/

aya23-8b-4bitdq

Runtime error

File size: 1,764 Bytes

feb8c72
ccfca8d
 
feb8c72
 
 
8d0b34d
 
 
019c20a
 
feb8c72
019c20a
 
feb8c72
 
8d0b34d
 
 
feb8c72
 
e018ed2
8d0b34d
 
feb8c72
8d0b34d
 
 
 
feb8c72
8d0b34d
 
feb8c72
8d0b34d
 
feb8c72

import gradio as gr
import accelerate 
import bitsandbytes
"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""
# Load model directly
from transformers import AutoTokenizer, AutoModelForCausalLM

model_id = "MaziyarPanahi/Mistral-7B-Instruct-Aya-101-GGUF"
filename = "Mistral-7B-Instruct-Aya-101.Q8_0.gguf"

tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(model_id, gguf_file=filename)
def respond(
    message,
    max_new_tokens=4000,
    temperature=0.3,
    top_p = 0.7,
):


    messages = [{"role": "user", "content": f"{message}"}]
    input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")

    gen_tokens = model.generate(
        input_ids,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        temperature=temperature,
        top_p=top_p
        )

    gen_text = tokenizer.decode(gen_tokens[0])
    yield gen_text

"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()