|
import gradio as gr |
|
from llama_cpp import Llama |
|
import os |
|
|
|
HF_TOKEN = os.getenv("HF_TOKEN") |
|
MODEL_PATH = "unsloth.Q4_K_M.gguf" |
|
|
|
|
|
if not os.path.exists(MODEL_PATH): |
|
from huggingface_hub import hf_hub_download |
|
MODEL_PATH = hf_hub_download( |
|
repo_id="RCCsabi/model_q4_k_m", |
|
filename="unsloth.Q4_K_M.gguf", |
|
use_auth_token=HF_TOKEN |
|
) |
|
|
|
|
|
llm = Llama(model_path=MODEL_PATH) |
|
|
|
def generate_response(prompt): |
|
output = llm(prompt, max_tokens=50, stop=["\n"]) |
|
response = output["choices"][0]["text"] |
|
return response.strip() |
|
|
|
demo = gr.Interface( |
|
fn=generate_response, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."), |
|
outputs="text", |
|
title="unsloth.Q4_K_M.gguf Model Inference", |
|
description="Enter a prompt to generate a response." |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|