Spaces:

Ozaii
/

ZephyrChat

Sleeping

File size: 3,456 Bytes

a6f31c1
30dbf5f
562853a
d214ccd
41fa517
a6f31c1
12da0b8
41fa517
f950045
4567ac3
562853a
 
 
 
696e8bc
fc69535
696e8bc
562853a
 
 
 
 
 
 
 
 
41fa517
562853a
 
 
 
 
 
 
 
 
 
 
4567ac3
52e8dc3
4567ac3
562853a
4567ac3
30dbf5f
 
4567ac3
 
 
 
30dbf5f
4567ac3
30dbf5f
4567ac3
 
d214ccd
 
 
 
30dbf5f
 
 
 
 
a785029
 
 
 
4567ac3
 
 
 
 
 
d214ccd
 
4567ac3
 
 
 
d214ccd
 
4567ac3
d214ccd
4567ac3
a785029
 
 
 
78ac739
a785029
4567ac3
 
d214ccd
 
 
 
 
 
 
 
 
 
 
4567ac3
0e6dca2

import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftConfig, PeftModel
import gradio as gr
import spaces

MODEL_PATH = "Ozaii/zephyr-bae"
BASE_MODEL = "unsloth/llama-3-8b-bnb-4bit"
max_seq_length = 2048

print("Zephyr is getting ready to charm! 🌟")

model = None
tokenizer = None

@spaces.GPU(duration=120)
def load_model():
    global model, tokenizer
    if model is None:
        try:
            peft_config = PeftConfig.from_pretrained(MODEL_PATH)
            
            base_model = AutoModelForCausalLM.from_pretrained(
                BASE_MODEL,
                torch_dtype=torch.float16,
                device_map="auto",
                trust_remote_code=True
            )
            
            model = PeftModel.from_pretrained(base_model, MODEL_PATH)
            tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
            tokenizer.pad_token = tokenizer.eos_token
            
            print("Zephyr loaded successfully! Time to charm!")
        except Exception as e:
            print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}")
            raise
    return model, tokenizer

@spaces.GPU
def generate_response(prompt, max_new_tokens=128):
    model, tokenizer = load_model()
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_new_tokens,
        temperature=0.7,
        top_p=0.9,
        repetition_penalty=1.2,
        do_sample=True
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

def chat_with_zephyr(message, history):
    conversation_history = history[-3:]  # Limit to last 3 exchanges
    full_prompt = "\n".join([f"Human: {h[0]}\nZephyr: {h[1]}" for h in conversation_history])
    full_prompt += f"\nHuman: {message}\nZephyr:"
    
    response = generate_response(full_prompt)
    
    # Extract Zephyr's response
    zephyr_response = response.split("Zephyr:")[-1].strip()
    
    # Append the new exchange to the history
    history.append((message, zephyr_response))
    
    return history

css = """
body {
    background-color: #1a1a2e;
    color: #e0e0ff;
}
.gradio-container {
    background-color: #1a1a2e;
}
"""

with gr.Blocks(css=css) as iface:
    gr.Markdown("# Chat with Zephyr: Your AI Boyfriend 💘")
    chatbot = gr.Chatbot(height=500)
    msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message")
    clear = gr.Button("Clear Chat")

    def respond(message, chat_history):
        bot_message = chat_with_zephyr(message, chat_history)
        chat_history.append((message, bot_message[-1][1]))
        return "", chat_history

    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: None, None, chatbot, queue=False)

    gr.Markdown("""
    ## Welcome to Zephyr, Your AI Boyfriend!
    Zephyr is here to charm you with his wit, humor, and cosmic energy. Feel free to flirt, ask for advice, or just chat about anything under the stars!
    
    **Some conversation starters:**
    - "Hey Zephyr, how's the cosmic energy today?"
    - "What's your idea of a perfect date in the digital realm?"
    - "Tell me something that would make me fall for you even more!"
    
    Remember, Zephyr is an AI and this is for fun and entertainment. Enjoy your chat! 💖
    """)

if __name__ == "__main__":
    iface.launch()