Spaces:
Running
Running
File size: 1,961 Bytes
4d818ed 61bd824 faabdd0 1b64f3b b36d551 1b64f3b fca82b6 1b64f3b fca82b6 1b64f3b 61bd824 1b64f3b 61bd824 751a778 52af5d8 751a778 61bd824 751a778 31229da bf8ca9c faabdd0 b665fc9 faabdd0 b665fc9 ae13704 b665fc9 ae13704 faabdd0 b665fc9 faabdd0 52af5d8 b665fc9 95cc203 52af5d8 95cc203 b665fc9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
import gradio as gr
import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
os.environ["LLAMA_CPP_USE_CUDA"] = "0"
title = "Gemma 2 2B - Bulgarian Joke Master - GGUF"
description = """
🔎 [Gemma 2 2B](https://huggingface.co/unsloth/gemma-2-2b-bnb-4bit) fine-tuned for Bulgarian jokes, running on CPU in GGUF format.
This model is fine-tuned for generating humorous content in Bulgarian, utilizing the [Llama.cpp library](https://github.com/ggerganov/llama.cpp).
Running on CPU, it can still produce impressive results, although larger models may require more processing power.
"""
model_dir = "models"
model_name = "unsloth.Q4_K_M.gguf"
model_path = os.path.join(model_dir, model_name)
hf_hub_download(
repo_id="vislupus/bulgarian-joke-master-SmolLM2-135M-Instruct-bnb-4bit-gguf",
filename=model_name,
local_dir=model_dir
)
if not os.path.exists(model_path):
raise FileNotFoundError(f"Model file not found at {model_path}")
print("Loading the model...")
llm = Llama(model_path=model_path)
print("Model loaded successfully!")
def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):
try:
conversation = ""
for msg in history:
role, content = msg
conversation += f"<start_of_turn>{role}\n{content}\n<end_of_turn>"
conversation += f"<start_of_turn>user\n{message}\n<end_of_turn>"
response = llm(conversation, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
return response["choices"][0]["text"].strip()
except Exception as e:
return f"Error generating response: {str(e)}"
if __name__ == "__main__":
gguf_demo = gr.ChatInterface(
generate_response,
title=title,
description=description,
)
gguf_demo.launch(share=True)
try:
if llm:
llm.close()
except Exception as e:
print(f"Error closing model: {e}")
|