import gradio as gr import os import keras_nlp # Set Kaggle API credentials (if needed) # Set Kaggle API credentials os.environ["KAGGLE_USERNAME"] = "rogerkorantenng" os.environ["KAGGLE_KEY"] = "9a33b6e88bcb6058b1281d777fa6808d" # Set the JAX backend and configure memory os.environ["KERAS_BACKEND"] = "tensorflow" os.environ["XLA_PYTHON_CLIENT_MEM_FRACTION"] = "1.00" # Load the GemmaCausalLM model gemma_lm = keras_nlp.models.GemmaCausalLM.from_preset("gemma_2b_en") # Load LoRA weights if you have them LoRA_weights_path = "model.lora.h5" gemma_lm.backbone.enable_lora(rank=4) # Enable LoRA with rank 4 gemma_lm.preprocessor.sequence_length = 512 # Limit sequence length gemma_lm.backbone.load_lora_weights(LoRA_weights_path) # Load LoRA weights # Define the response generation function def generate_response(message, history): # Create a prompt template template = "Instruction:\n{instruction}\n\nResponse:\n{response}" # Format the history and the current message into the prompt formatted_history = "" for user_msg, bot_msg in history: formatted_history += template.format(instruction=user_msg, response=bot_msg) # Add the latest message from the user prompt = template.format(instruction=message, response="") # Combine history with the latest prompt final_prompt = formatted_history + prompt # Generate response from the model response = gemma_lm.generate(final_prompt, max_length=256) # Extract and return the generated response text return response[0] # Adjust this if your model's output structure differs # Create the Gradio chat interface interface = gr.ChatInterface( fn=generate_response, # Function that generates responses chatbot=gr.Chatbot(height=300), # Chatbot UI component textbox=gr.Textbox(placeholder="You can ask me anything", container=False, scale=7), title="Local Model Chat Bot", retry_btn=None, # Disable retry button undo_btn="Delete Previous", # Enable undo button clear_btn="Clear" # Enable clear button ) # Launch the Gradio app interface.launch(share=True)