Spaces:
Sleeping
Sleeping
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer | |
from peft import PeftConfig, PeftModel | |
from threading import Thread | |
import gradio as gr | |
import spaces | |
MODEL_PATH = "Ozaii/zephyr-bae" | |
BASE_MODEL = "unsloth/llama-3-8b-bnb-4bit" | |
max_seq_length = 2048 | |
print("Zephyr is getting ready to charm! π") | |
model = None | |
tokenizer = None | |
def load_model(): | |
global model, tokenizer | |
if model is None: | |
try: | |
peft_config = PeftConfig.from_pretrained(MODEL_PATH) | |
base_model = AutoModelForCausalLM.from_pretrained( | |
BASE_MODEL, | |
torch_dtype=torch.float16, | |
device_map="auto", | |
load_in_4bit=True, | |
trust_remote_code=True | |
) | |
model = PeftModel.from_pretrained(base_model, MODEL_PATH) | |
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL) | |
tokenizer.pad_token = tokenizer.eos_token | |
print("Zephyr loaded successfully! Time to charm!") | |
except Exception as e: | |
print(f"Oops! Zephyr seems to be playing hide and seek. Error: {str(e)}") | |
raise | |
return model, tokenizer | |
def generate_response(prompt, max_new_tokens=128): | |
model, tokenizer = load_model() | |
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=2048).to(model.device) | |
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True) | |
generation_kwargs = dict( | |
input_ids=inputs.input_ids, | |
max_new_tokens=max_new_tokens, | |
temperature=0.7, | |
top_p=0.9, | |
repetition_penalty=1.2, | |
streamer=streamer, | |
) | |
thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
thread.start() | |
return streamer | |
def chat_with_zephyr(message, history): | |
conversation_history = history[-3:] # Limit to last 3 exchanges | |
full_prompt = "\n".join([f"Human: {h[0]}\nZephyr: {h[1]}" for h in conversation_history]) | |
full_prompt += f"\nHuman: {message}\nZephyr:" | |
streamer = generate_response(full_prompt) | |
response = "" | |
for new_text in streamer: | |
response += new_text | |
yield response | |
css = """ | |
body { | |
background-color: #1a1a2e; | |
color: #e0e0ff; | |
} | |
.gradio-container { | |
background-color: #1a1a2e; | |
} | |
""" | |
with gr.Blocks(css=css) as iface: | |
gr.Markdown("# Chat with Zephyr: Your AI Boyfriend π") | |
chatbot = gr.Chatbot(height=500) | |
msg = gr.Textbox(placeholder="Tell Zephyr what's on your mind...", label="Your message") | |
clear = gr.Button("Clear Chat") | |
msg.submit(chat_with_zephyr, [msg, chatbot], [chatbot]) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
gr.Markdown(""" | |
## Welcome to Zephyr, Your AI Boyfriend! | |
Zephyr is here to charm you with his wit, humor, and cosmic energy. Feel free to flirt, ask for advice, or just chat about anything under the stars! | |
**Some conversation starters:** | |
- "Hey Zephyr, how's the cosmic energy today?" | |
- "What's your idea of a perfect date in the digital realm?" | |
- "Tell me something that would make me fall for you even more!" | |
Remember, Zephyr is an AI and this is for fun and entertainment. Enjoy your chat! π | |
""") | |
if __name__ == "__main__": | |
iface.launch() |