import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM import torch import spaces # Load the model and tokenizer from Hugging Face model_path = "Ozaii/W.AI-13B-Chat" # Replace with your username and repository name tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) model.eval() @spaces.GPU def generate_response(user_input, chat_history): max_context_length = 750 max_response_length = 150 prompt = "" for message in chat_history: if message[0] is not None: prompt += f"User: {message[0]}\n" if message[1] is not None: prompt += f"Assistant: {message[1]}\n" prompt += f"***Use natural conversation style of assistant, you can use +18 swear words and use irregular language similar to assistant.***User: {user_input}\nAssistant:" prompt_tokens = tokenizer.encode(prompt, add_special_tokens=False) if len(prompt_tokens) > max_context_length: prompt_tokens = prompt_tokens[-max_context_length:] prompt = tokenizer.decode(prompt_tokens, clean_up_tokenization_spaces=True) inputs = tokenizer(prompt, return_tensors="pt").to(device) with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_length=len(inputs.input_ids[0]) + max_response_length, # Limit the maximum length for context and response min_length = 45, temperature=0.7, # Slightly higher temperature for more diverse responses top_k=30, top_p=0.9, # Allow a bit more randomness repetition_penalty=1.1, # Mild repetition penalty no_repeat_ngram_size=3, # Ensure no repeated phrases eos_token_id=tokenizer.eos_token_id, pad_token_id=tokenizer.eos_token_id ) response = tokenizer.decode(outputs[0], skip_special_tokens=True) assistant_response = response.split("Assistant:")[-1].strip() assistant_response = assistant_response.split('\n')[0].strip() chat_history.append((user_input, assistant_response)) return chat_history, chat_history def restart_chat(): return [], [] with gr.Blocks() as chat_interface: gr.Markdown("

W.AI Chat Nikker xD

") chat_history = gr.State([]) with gr.Column(): chatbox = gr.Chatbot() with gr.Row(): user_input = gr.Textbox(show_label=False, placeholder="Summon Wali Here...") submit_button = gr.Button("Send") restart_button = gr.Button("Restart") submit_button.click( generate_response, inputs=[user_input, chat_history], outputs=[chatbox, chat_history] ) restart_button.click( restart_chat, inputs=[], outputs=[chatbox, chat_history] ) chat_interface.launch()