skatriXR / app.py
RCCsabi's picture
Update app.py
dae8804 verified
raw
history blame contribute delete
953 Bytes
import gradio as gr
from llama_cpp import Llama
import os
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_PATH = "unsloth.Q4_K_M.gguf"
# Download the model if not present (Optional)
if not os.path.exists(MODEL_PATH):
from huggingface_hub import hf_hub_download
MODEL_PATH = hf_hub_download(
repo_id="RCCsabi/model_q4_k_m",
filename="unsloth.Q4_K_M.gguf",
use_auth_token=HF_TOKEN # if the repo is private
)
# Initialize the Llama model
llm = Llama(model_path=MODEL_PATH)
def generate_response(prompt):
output = llm(prompt, max_tokens=50, stop=["\n"])
response = output["choices"][0]["text"]
return response.strip()
demo = gr.Interface(
fn=generate_response,
inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
outputs="text",
title="unsloth.Q4_K_M.gguf Model Inference",
description="Enter a prompt to generate a response."
)
if __name__ == "__main__":
demo.launch()