Spaces:

vislupus
/

Bulgarian-Joke-Master

Running

File size: 1,961 Bytes

4d818ed
61bd824
faabdd0
 
1b64f3b
b36d551
1b64f3b
fca82b6
1b64f3b
fca82b6
 
 
1b64f3b
 
61bd824
1b64f3b
61bd824
751a778
 
52af5d8
751a778
61bd824
751a778
31229da
bf8ca9c
faabdd0
 
 
 
 
 
b665fc9
faabdd0
b665fc9
 
 
 
 
 
ae13704
b665fc9
 
ae13704
faabdd0
b665fc9
faabdd0
52af5d8
b665fc9
95cc203
52af5d8
 
95cc203
 
b665fc9

import gradio as gr
import os
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

os.environ["LLAMA_CPP_USE_CUDA"] = "0"

title = "Gemma 2 2B - Bulgarian Joke Master - GGUF"
description = """
🔎 [Gemma 2 2B](https://huggingface.co/unsloth/gemma-2-2b-bnb-4bit) fine-tuned for Bulgarian jokes, running on CPU in GGUF format.
This model is fine-tuned for generating humorous content in Bulgarian, utilizing the [Llama.cpp library](https://github.com/ggerganov/llama.cpp).
Running on CPU, it can still produce impressive results, although larger models may require more processing power.
"""

model_dir = "models"
model_name = "unsloth.Q4_K_M.gguf"
model_path = os.path.join(model_dir, model_name)

hf_hub_download(
    repo_id="vislupus/bulgarian-joke-master-SmolLM2-135M-Instruct-bnb-4bit-gguf",
    filename=model_name,
    local_dir=model_dir
)

if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file not found at {model_path}")

print("Loading the model...")
llm = Llama(model_path=model_path)
print("Model loaded successfully!")

def generate_response(message, history, temperature=0.7, top_p=1.0, max_tokens=256):  
    try:
        conversation = ""
        for msg in history:
            role, content = msg
            conversation += f"<start_of_turn>{role}\n{content}\n<end_of_turn>"

        conversation += f"<start_of_turn>user\n{message}\n<end_of_turn>"
    
        response = llm(conversation, max_tokens=max_tokens, temperature=temperature, top_p=top_p)
        return response["choices"][0]["text"].strip()
        
    except Exception as e:
        return f"Error generating response: {str(e)}"

if __name__ == "__main__":
    gguf_demo = gr.ChatInterface(
        generate_response,
        title=title,
        description=description,
    )

    gguf_demo.launch(share=True)

    try:
        if llm:
            llm.close()
    except Exception as e:
        print(f"Error closing model: {e}")